Index: projects/clang1000-import/bin/pwait/pwait.1 =================================================================== --- projects/clang1000-import/bin/pwait/pwait.1 (revision 357178) +++ projects/clang1000-import/bin/pwait/pwait.1 (revision 357179) @@ -1,101 +1,103 @@ .\" .\" Copyright (c) 2004-2009, Jilles Tjoelker .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with .\" or without modification, are permitted provided that the .\" following conditions are met: .\" .\" 1. Redistributions of source code must retain the above .\" copyright notice, this list of conditions and the .\" following disclaimer. .\" 2. Redistributions in binary form must reproduce the .\" above copyright notice, this list of conditions and .\" the following disclaimer in the documentation and/or .\" other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND .\" CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED .\" WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED .\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A .\" PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE .\" COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY .\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, .\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF .\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER .\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING .\" NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE .\" USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY .\" OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd March 7, 2017 +.Dd January 26, 2020 .Dt PWAIT 1 .Os .Sh NAME .Nm pwait .Nd wait for processes to terminate .Sh SYNOPSIS .Nm .Op Fl t Ar duration -.Op Fl v +.Op Fl ov .Ar pid \&... .Sh DESCRIPTION The .Nm utility will wait until each of the given processes has terminated. .Pp The following option is available: .Bl -tag -width indent +.It Fl o +Exit when any of the given processes has terminated. .It Fl t Ar duration If any process is still running after .Ar duration , .Nm will exit. The .Ar duration value can be integer or decimal numbers. Values without unit symbols are interpreted as seconds. .Pp Supported unit symbols are: .Bl -tag -width indent -compact .It s seconds .It m minutes .It h hours .El .It Fl v Print the exit status when each process terminates. .El .Sh EXIT STATUS The .Nm utility exits 0 on success, and >0 if an error occurs. .Pp If the .Fl t flag is specified and a timeout occurs, the exit status will be 124. .Pp Invalid pids elicit a warning message but are otherwise ignored. .Sh SEE ALSO .Xr kill 1 , .Xr pkill 1 , .Xr ps 1 , .Xr wait 1 , .Xr kqueue 2 .Sh NOTES .Nm is not a substitute for the .Xr wait 1 builtin as it will not clean up any zombies or state in the parent process. .Sh HISTORY A .Nm command first appeared in SunOS 5.8. Index: projects/clang1000-import/bin/pwait/pwait.c =================================================================== --- projects/clang1000-import/bin/pwait/pwait.c (revision 357178) +++ projects/clang1000-import/bin/pwait/pwait.c (revision 357179) @@ -1,195 +1,219 @@ /*- * Copyright (c) 2004-2009, Jilles Tjoelker * All rights reserved. * * Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the * following conditions are met: * * 1. Redistributions of source code must retain the above * copyright notice, this list of conditions and the * following disclaimer. * 2. Redistributions in binary form must reproduce the * above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or * other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include static void usage(void) { - fprintf(stderr, "usage: pwait [-t timeout] [-v] pid ...\n"); - exit(EX_USAGE); + errx(EX_USAGE, "usage: pwait [-t timeout] [-ov] pid ..."); } /* * pwait - wait for processes to terminate */ int main(int argc, char *argv[]) { struct itimerval itv; - int kq; struct kevent *e; - int tflag, verbose; - int opt, nleft, n, i, duplicate, status; + int oflag, tflag, verbose; + int i, kq, n, nleft, opt, status; long pid; - char *s, *end; + char *end, *s; double timeout; - tflag = verbose = 0; + oflag = 0; + tflag = 0; + verbose = 0; memset(&itv, 0, sizeof(itv)); - while ((opt = getopt(argc, argv, "t:v")) != -1) { + + while ((opt = getopt(argc, argv, "ot:v")) != -1) { switch (opt) { + case 'o': + oflag = 1; + break; case 't': tflag = 1; errno = 0; timeout = strtod(optarg, &end); - if (end == optarg || errno == ERANGE || - timeout < 0) + if (end == optarg || errno == ERANGE || timeout < 0) { errx(EX_DATAERR, "timeout value"); + } switch(*end) { case 0: case 's': break; case 'h': timeout *= 60; /* FALLTHROUGH */ case 'm': timeout *= 60; break; default: errx(EX_DATAERR, "timeout unit"); } - if (timeout > 100000000L) + if (timeout > 100000000L) { errx(EX_DATAERR, "timeout value"); + } itv.it_value.tv_sec = (time_t)timeout; timeout -= (time_t)timeout; itv.it_value.tv_usec = (suseconds_t)(timeout * 1000000UL); break; case 'v': verbose = 1; break; default: usage(); /* NOTREACHED */ } } argc -= optind; argv += optind; - if (argc == 0) + if (argc == 0) { usage(); + } kq = kqueue(); - if (kq == -1) - err(1, "kqueue"); + if (kq == -1) { + err(EX_OSERR, "kqueue"); + } e = malloc((argc + tflag) * sizeof(struct kevent)); - if (e == NULL) - err(1, "malloc"); + if (e == NULL) { + err(EX_OSERR, "malloc"); + } nleft = 0; for (n = 0; n < argc; n++) { s = argv[n]; - if (!strncmp(s, "/proc/", 6)) /* Undocumented Solaris compat */ + /* Undocumented Solaris compat */ + if (!strncmp(s, "/proc/", 6)) { s += 6; + } errno = 0; pid = strtol(s, &end, 10); if (pid < 0 || *end != '\0' || errno != 0) { warnx("%s: bad process id", s); continue; } - duplicate = 0; - for (i = 0; i < nleft; i++) - if (e[i].ident == (uintptr_t)pid) - duplicate = 1; - if (!duplicate) { - EV_SET(e + nleft, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, - 0, NULL); - if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1) - warn("%ld", pid); - else - nleft++; + for (i = 0; i < nleft; i++) { + if (e[i].ident == (uintptr_t)pid) { + break; + } } + if (i < nleft) { + /* Duplicate. */ + continue; + } + EV_SET(e + nleft, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL); + if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1) { + warn("%ld", pid); + if (oflag) { + exit(EX_OK); + } + } else { + nleft++; + } } - if (tflag) { + if (nleft > 0 && tflag) { /* * Explicitly detect SIGALRM so that an exit status of 124 * can be returned rather than 142. */ EV_SET(e + nleft, SIGALRM, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); - if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1) + if (kevent(kq, e + nleft, 1, NULL, 0, NULL) == -1) { err(EX_OSERR, "kevent"); + } /* Ignore SIGALRM to not interrupt kevent(2). */ signal(SIGALRM, SIG_IGN); - if (setitimer(ITIMER_REAL, &itv, NULL) == -1) + if (setitimer(ITIMER_REAL, &itv, NULL) == -1) { err(EX_OSERR, "setitimer"); + } } while (nleft > 0) { n = kevent(kq, NULL, 0, e, nleft + tflag, NULL); - if (n == -1) - err(1, "kevent"); + if (n == -1) { + err(EX_OSERR, "kevent"); + } for (i = 0; i < n; i++) { if (e[i].filter == EVFILT_SIGNAL) { - if (verbose) + if (verbose) { printf("timeout\n"); - return (124); + } + exit(124); } if (verbose) { status = e[i].data; - if (WIFEXITED(status)) + if (WIFEXITED(status)) { printf("%ld: exited with status %d.\n", (long)e[i].ident, WEXITSTATUS(status)); - else if (WIFSIGNALED(status)) + } else if (WIFSIGNALED(status)) { printf("%ld: killed by signal %d.\n", (long)e[i].ident, WTERMSIG(status)); - else + } else { printf("%ld: terminated.\n", (long)e[i].ident); + } + } + if (oflag) { + exit(EX_OK); } --nleft; } } exit(EX_OK); } Index: projects/clang1000-import/bin/pwait/tests/pwait_test.sh =================================================================== --- projects/clang1000-import/bin/pwait/tests/pwait_test.sh (revision 357178) +++ projects/clang1000-import/bin/pwait/tests/pwait_test.sh (revision 357179) @@ -1,242 +1,322 @@ # $FreeBSD$ atf_test_case basic basic_head() { atf_set "descr" "Basic tests on pwait(1) utility" } basic_body() { sleep 1 & p1=$! sleep 5 & p5=$! sleep 10 & p10=$! atf_check \ -o empty \ -e empty \ -s exit:0 \ timeout --preserve-status 15 pwait $p1 $p5 $p10 atf_check \ -o empty \ -e inline:"kill: $p1: No such process\n" \ -s exit:1 \ kill -0 $p1 atf_check \ -o empty \ -e inline:"kill: $p5: No such process\n" \ -s exit:1 \ kill -0 $p5 atf_check \ -o empty \ -e inline:"kill: $p10: No such process\n" \ -s exit:1 \ kill -0 $p10 } basic_cleanup() { kill $p1 $p5 $p10 >/dev/null 2>&1 wait $p1 $p5 $p10 >/dev/null 2>&1 } atf_test_case time_unit time_unit_head() { atf_set "descr" "Test parsing the timeout unit and value" } time_unit_body() { init=1 atf_check \ -o empty \ -e inline:"pwait: timeout unit\n" \ -s exit:65 \ timeout --preserve-status 2 pwait -t 1d $init atf_check \ -o empty \ -e inline:"pwait: timeout unit\n" \ -s exit:65 \ timeout --preserve-status 2 pwait -t 1d $init atf_check \ -o empty \ -e inline:"pwait: timeout value\n" \ -s exit:65 \ timeout --preserve-status 2 pwait -t -1 $init atf_check \ -o empty \ -e inline:"pwait: timeout value\n" \ -s exit:65 \ timeout --preserve-status 2 pwait -t 100000001 $init # These long duration cases are expected to timeout from the # timeout utility rather than pwait -t. atf_check \ -o empty \ -e empty \ -s exit:143 \ timeout --preserve-status 2 pwait -t 100000000 $init atf_check \ -o empty \ -e empty \ -s exit:143 \ timeout --preserve-status 2 pwait -t 1h $init atf_check \ -o empty \ -e empty \ -s exit:143 \ timeout --preserve-status 2 pwait -t 1.5h $init atf_check \ -o empty \ -e empty \ -s exit:143 \ timeout --preserve-status 2 pwait -t 1m $init atf_check \ -o empty \ -e empty \ -s exit:143 \ timeout --preserve-status 2 pwait -t 1.5m $init atf_check \ -o empty \ -e empty \ -s exit:143 \ timeout --preserve-status 2 pwait -t 0 $init # The rest are fast enough that pwait -t is expected to trigger # the timeout. atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 2 pwait -t 1s $init atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 2 pwait -t 1.5s $init atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 2 pwait -t 1 $init atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 2 pwait -t 1.5 $init atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 2 pwait -t 0.5 $init } atf_test_case timeout_trigger_timeout timeout_trigger_timeout_head() { atf_set "descr" "Test that exceeding the timeout is detected" } timeout_trigger_timeout_body() { sleep 10 & p10=$! atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 6.5 pwait -t 5 $p10 } timeout_trigger_timeout_cleanup() { kill $p10 >/dev/null 2>&1 wait $p10 >/dev/null 2>&1 } atf_test_case timeout_no_timeout timeout_no_timeout_head() { atf_set "descr" "Test that not exceeding the timeout continues to wait" } timeout_no_timeout_body() { sleep 10 & p10=$! atf_check \ -o empty \ -e empty \ -s exit:0 \ timeout --preserve-status 11.5 pwait -t 12 $p10 } timeout_no_timeout_cleanup() { kill $p10 >/dev/null 2>&1 wait $p10 >/dev/null 2>&1 } atf_test_case timeout_many timeout_many_head() { atf_set "descr" "Test timeout on many processes" } timeout_many_body() { sleep 1 & p1=$! sleep 5 & p5=$! sleep 10 & p10=$! atf_check \ -o empty \ -e empty \ -s exit:124 \ timeout --preserve-status 7.5 pwait -t 6 $p1 $p5 $p10 } timeout_many_cleanup() { kill $p1 $p5 $p10 >/dev/null 2>&1 wait $p1 $p5 $p10 >/dev/null 2>&1 } +atf_test_case or_flag +or_flag_head() +{ + atf_set "descr" "Test OR flag" +} + +or_flag_body() +{ + sleep 2 & + p2=$! + + sleep 4 & + p4=$! + + sleep 6 & + p6=$! + + atf_check \ + -o inline:"$p2: exited with status 0.\n" \ + -e empty \ + -s exit:0 \ + timeout --preserve-status 15 pwait -o -v $p2 $p4 $p6 + + atf_check \ + -o empty \ + -e inline:"pwait: $p2: No such process\n" \ + -s exit:0 \ + timeout --preserve-status 15 pwait -o $p2 $p4 $p6 + + atf_check \ + -o empty \ + -e empty \ + -s exit:0 \ + timeout --preserve-status 15 pwait -o $p4 $p6 + + atf_check \ + -o empty \ + -e inline:"pwait: $p4: No such process\n" \ + -s exit:0 \ + timeout --preserve-status 15 pwait -o $p4 $p6 + + atf_check \ + -o inline:"$p6: exited with status 0.\n" \ + -e empty \ + -s exit:0 \ + timeout --preserve-status 15 pwait -o -v $p6 + + atf_check \ + -o empty \ + -e inline:"pwait: $p6: No such process\n" \ + -s exit:0 \ + timeout --preserve-status 15 pwait -o $p6 + + atf_check \ + -o empty \ + -e inline:"kill: $p2: No such process\n" \ + -s exit:1 \ + kill -0 $p2 + + atf_check \ + -o empty \ + -e inline:"kill: $p4: No such process\n" \ + -s exit:1 \ + kill -0 $p4 + + atf_check \ + -o empty \ + -e inline:"kill: $p6: No such process\n" \ + -s exit:1 \ + kill -0 $p6 + +} + +or_flag_cleanup() +{ + kill $p2 $p4 $p6 >/dev/null 2>&1 + wait $p2 $p4 $p6 >/dev/null 2>&1 +} + atf_init_test_cases() { atf_add_test_case basic atf_add_test_case time_unit atf_add_test_case timeout_trigger_timeout atf_add_test_case timeout_no_timeout atf_add_test_case timeout_many + atf_add_test_case or_flag } Index: projects/clang1000-import/contrib/libxo/configure.ac =================================================================== --- projects/clang1000-import/contrib/libxo/configure.ac (revision 357178) +++ projects/clang1000-import/contrib/libxo/configure.ac (revision 357179) @@ -1,500 +1,500 @@ # # $Id$ # # See ./INSTALL for more info # # # Release numbering: even numbered dot releases are official ones, and # odd numbers are development ones. The svn version of this file will # only (ONLY!) ever (EVER!) contain odd numbers, so I'll always know if # a particular user has the dist or svn release. # AC_PREREQ(2.2) -AC_INIT([libxo], [1.3.1], [phil@juniper.net]) +AC_INIT([libxo], [1.4.0], [phil@juniper.net]) AM_INIT_AUTOMAKE([-Wall -Werror foreign -Wno-portability]) # Support silent build rules. Requires at least automake-1.11. # Disable with "configure --disable-silent-rules" or "make V=1" m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_PROG_CC AC_PROG_INSTALL AC_CONFIG_MACRO_DIR([m4]) AC_PROG_LN_S # Must be after AC_PROG_AR LT_INIT([dlopen shared]) AC_PATH_PROG(BASENAME, basename, /usr/bin/basename) AC_PATH_PROG(BISON, bison, /usr/bin/bison) AC_PATH_PROG(CAT, cat, /bin/cat) AC_PATH_PROG(CHMOD, chmod, /bin/chmod) AC_PATH_PROG(CP, cp, /bin/cp) AC_PATH_PROG(DIFF, diff, /usr/bin/diff) AC_PATH_PROG(MKDIR, mkdir, /bin/mkdir) AC_PATH_PROG(MV, mv, /bin/mv) AC_PATH_PROG(RM, rm, /bin/rm) AC_PATH_PROG(SED, sed, /bin/sed) AC_STDC_HEADERS # Checks for typedefs, structures, and compiler characteristics. AC_C_INLINE AC_TYPE_SIZE_T # Checks for library functions. AC_FUNC_ALLOCA AC_FUNC_MALLOC AC_FUNC_REALLOC AC_CHECK_FUNCS([bzero memmove strchr strcspn strerror strspn]) AC_CHECK_FUNCS([sranddev srand strlcpy]) AC_CHECK_FUNCS([fdopen getrusage]) AC_CHECK_FUNCS([gettimeofday ctime]) AC_CHECK_FUNCS([getpass]) AC_CHECK_FUNCS([getprogname]) AC_CHECK_FUNCS([sysctlbyname]) AC_CHECK_FUNCS([flock]) AC_CHECK_FUNCS([asprintf]) AC_CHECK_FUNCS([__flbf]) AC_CHECK_FUNCS([sysctlbyname]) AC_CHECK_HEADERS([dlfcn.h]) AC_CHECK_HEADERS([dlfcn.h]) AC_CHECK_HEADERS([stdio_ext.h]) AC_CHECK_HEADERS([tzfile.h]) AC_CHECK_HEADERS([stdtime/tzfile.h]) AC_CHECK_FUNCS([dlfunc]) AC_CHECK_HEADERS([sys/time.h]) AC_CHECK_HEADERS([ctype.h errno.h stdio.h stdlib.h]) AC_CHECK_HEADERS([string.h sys/param.h unistd.h ]) AC_CHECK_HEADERS([sys/sysctl.h]) AC_CHECK_HEADERS([threads.h]) AC_CHECK_HEADERS([monitor.h]) dnl humanize_number(3) is a great function, but it's not standard. dnl Note Macosx has the function in libutil.a but doesn't ship the dnl header file, so I'll need to carry my own implementation. See: dnl https://devforums.apple.com/thread/271121 AC_CHECK_HEADERS([libutil.h]) AC_CHECK_LIB([util], [humanize_number], [HAVE_HUMANIZE_NUMBER=$ac_cv_header_libutil_h], [HAVE_HUMANIZE_NUMBER=no]) AC_MSG_RESULT(humanize_number results: :${HAVE_HUMANIZE_NUMBER}:${ac_cv_header_libutil_h}:) if test "$HAVE_HUMANIZE_NUMBER" = "yes"; then AC_DEFINE([HAVE_HUMANIZE_NUMBER], [1], [humanize_number(3)]) fi AM_CONDITIONAL([HAVE_HUMANIZE_NUMBER], [test "$HAVE_HUMANIZE_NUMBER" = "yes"]) AC_ARG_ENABLE([gettext], [ --disable-gettext Turn off support for gettext], [GETTEXT_ENABLE=$enableval], [GETTEXT_ENABLE=yes]) dnl Looking for gettext(), assumably in libintl AC_ARG_WITH(gettext, [ --with-gettext=[PFX] Specify location of gettext installation], [GETTEXT_PREFIX=$withval], [GETTEXT_PREFIX=/usr], ) HAVE_GETTEXT=no if test "$GETTEXT_ENABLE" != "no"; then AC_MSG_CHECKING([gettext in ${GETTEXT_PREFIX}]) _save_cflags="$CFLAGS" CFLAGS="$CFLAGS -I${GETTEXT_PREFIX}/include -L${GETTEXT_PREFIX}/lib -Werror -lintl" AC_LINK_IFELSE([AC_LANG_SOURCE([[#include ] [int main() {char *cp = dgettext(NULL, "xx"); return 0; }]])], [HAVE_GETTEXT=yes], [HAVE_GETTEXT=no]) CFLAGS="$_save_cflags" AC_MSG_RESULT([$HAVE_GETTEXT]) if test "$HAVE_GETTEXT" != "yes"; then GETTEXT_PREFIX=/opt/local AC_MSG_CHECKING([gettext in ${GETTEXT_PREFIX}]) _save_cflags="$CFLAGS" CFLAGS="$CFLAGS -I${GETTEXT_PREFIX}/include -L${GETTEXT_PREFIX}/lib -Werror -lintl" AC_LINK_IFELSE([AC_LANG_SOURCE([[#include ] [int main() {char *cp = dgettext(NULL, "xx"); return 0; }]])], [HAVE_GETTEXT=yes], [HAVE_GETTEXT=no]) CFLAGS="$_save_cflags" AC_MSG_RESULT([$HAVE_GETTEXT]) fi if test "$HAVE_GETTEXT" != "yes"; then GETTEXT_PREFIX=/usr/local AC_MSG_CHECKING([gettext in ${GETTEXT_PREFIX}]) _save_cflags="$CFLAGS" CFLAGS="$CFLAGS -I${GETTEXT_PREFIX}/include -L${GETTEXT_PREFIX}/lib -Werror -lintl" AC_LINK_IFELSE([AC_LANG_SOURCE([[#include ] [int main() {char *cp = dgettext(NULL, "xx"); return 0; }]])], [HAVE_GETTEXT=yes], [HAVE_GETTEXT=no]) CFLAGS="$_save_cflags" AC_MSG_RESULT([$HAVE_GETTEXT]) fi fi if test "$HAVE_GETTEXT" = "yes"; then AC_DEFINE([HAVE_GETTEXT], [1], [gettext(3)]) GETTEXT_CFLAGS="-I${GETTEXT_PREFIX}/include" GETTEXT_LIBS="-L${GETTEXT_PREFIX}/lib -lintl" else GETTEXT_PREFIX=none GETTEXT_CFLAGS= GETTEXT_LIBS= fi AC_SUBST(GETTEXT_CFLAGS) AC_SUBST(GETTEXT_LIBS) GETTEXT_LIBDIR=${GETTEXT_PREFIX}/lib AC_SUBST(GETTEXT_LIBDIR) if test -x ${GETTEXT_PREFIX}/bin/msgfmt ; then GETTEXT_BINDIR=${GETTEXT_PREFIX}/bin elif test -x ${GETTEXT_PREFIX}/local/bin/msgfmt ; then GETTEXT_BINDIR=${GETTEXT_PREFIX}/local/bin else AC_MSG_NOTICE("could not find msgfmt tool") # Use a (bad) fall back value GETTEXT_BINDIR=${GETTEXT_PREFIX}/bin fi AC_SUBST(GETTEXT_BINDIR) AM_CONDITIONAL([HAVE_GETTEXT], [test "$HAVE_GETTEXT" = "yes"]) dnl Looking for how to do thread-local variables AC_ARG_WITH(threads, [ --with-threads=[STYLE] Specify style of thread-local support (none)], [THREAD_LOCAL=$withval], [THREAD_LOCAL=unknown], ) AC_MSG_CHECKING([thread-locals are ${THREAD_LOCAL}]) if test "$THREAD_LOCAL" = "unknown"; then AC_LINK_IFELSE([AC_LANG_SOURCE([[] [__thread int foo; int main() { foo++; return foo; }]])], [THREAD_LOCAL=before], [THREAD_LOCAL=unknown]) AC_MSG_RESULT([$THREAD_LOCAL]) fi if test "$THREAD_LOCAL" = "unknown"; then AC_LINK_IFELSE([AC_LANG_SOURCE([[] [int __thread foo; int main() { foo++; return foo; }]])], [THREAD_LOCAL=after], [THREAD_LOCAL=unknown]) AC_MSG_RESULT([$THREAD_LOCAL]) fi if test "$THREAD_LOCAL" = "unknown"; then AC_LINK_IFELSE([AC_LANG_SOURCE([[] [__declspec(int) foo; int main() { foo++; return foo; }]])], [THREAD_LOCAL=declspec], [THREAD_LOCAL=unknown]) AC_MSG_RESULT([$THREAD_LOCAL]) fi if test "$THREAD_LOCAL" != "unknown"; then AC_DEFINE_UNQUOTED([HAVE_THREAD_LOCAL], THREAD_LOCAL_${THREAD_LOCAL}, [thread-local setting]) fi dnl Looking for libcrypto.... AC_CHECK_LIB([crypto], [MD5_Init]) AM_CONDITIONAL([HAVE_LIBCRYPTO], [test "$HAVE_LIBCRYPTO" != "no"]) AC_CHECK_MEMBER([struct sockaddr_un.sun_len], [HAVE_SUN_LEN=yes ; AC_DEFINE([HAVE_SUN_LEN], [1], [Have struct sockaddr_un.sun_len])], [HAS_SUN_LEN=no], [[#include ]]) AC_CHECK_DECLS([__isthreaded], [], [], [#include ]) HAVE_ISTHREADED=${ac_cv_have_decl___isthreaded} dnl dnl Some packages need to be checked against version numbers so we dnl define a function here for later use dnl AC_DEFUN([VERSION_TO_NUMBER], [`$1 | sed -e 's/lib.* //' | awk 'BEGIN { FS = "."; } { printf "%d", ([$]1 * 1000 + [$]2) * 1000 + [$]3;}'`]) LIBSLAX_CONFIG_PREFIX="" LIBSLAX_SRC="" AC_ARG_WITH(libslax-prefix, [ --with-libslax-prefix=[PFX] Specify location of libslax config], LIBSLAX_CONFIG_PREFIX=$withval ) AC_MSG_CHECKING(for libslax) if test "x$LIBSLAX_CONFIG_PREFIX" != "x" then SLAX_CONFIG=${LIBSLAX_CONFIG_PREFIX}/bin/slax-config else SLAX_CONFIG=slax-config fi dnl dnl make sure slax-config is executable, dnl test version and init our variables dnl if ${SLAX_CONFIG} --libs > /dev/null 2>&1 then LIBSLAX_VERSION=`$SLAX_CONFIG --version` SLAX_BINDIR="`$SLAX_CONFIG --bindir | head -1`" SLAX_OXTRADOCDIR="`$SLAX_CONFIG --oxtradoc | head -1`" AC_MSG_RESULT($LIBSLAX_VERSION found) HAVE_OXTRADOC=yes else LIBSLAX_VERSION= SLAX_BINDIR= SLAX_OXTRADOCDIR= AC_MSG_RESULT([no]) HAVE_OXTRADOC=no fi AM_CONDITIONAL([HAVE_OXTRADOC], [test "$HAVE_OXTRADOC" != "no"]) AC_SUBST(SLAX_BINDIR) AC_SUBST(SLAX_OXTRADOCDIR) AC_MSG_CHECKING([whether to build with warnings]) AC_ARG_ENABLE([warnings], [ --enable-warnings Turn on compiler warnings], [LIBXO_WARNINGS=$enableval], [LIBXO_WARNINGS=no]) AC_MSG_RESULT([$LIBXO_WARNINGS]) AM_CONDITIONAL([LIBXO_WARNINGS_HIGH], [test "$LIBXO_WARNINGS" != "no"]) AC_MSG_CHECKING([whether to build with debugging]) AC_ARG_ENABLE([debug], [ --enable-debug Turn on debugging], [LIBXO_DEBUG=yes; AC_DEFINE([LIBXO_DEBUG], [1], [Enable debugging])], [LIBXO_DEBUG=no]) AC_MSG_RESULT([$LIBXO_DEBUG]) AM_CONDITIONAL([LIBXO_DEBUG], [test "$LIBXO_DEBUG" != "no"]) AC_MSG_CHECKING([whether to use int return codes]) AC_ARG_ENABLE([int-return-codes], [ --enable-int-return-codes Use int return codes (instead of ssize_t)], [USE_INT_RETURN_CODES=yes; AC_DEFINE([USE_INT_RETURN_CODES], [1], [Use int return codes])], [USE_INT_RETURN_CODES=no]) AC_MSG_RESULT([$USE_INT_RETURN_CODES]) AC_MSG_CHECKING([whether to build with text-only rendering]) AC_ARG_ENABLE([text-only], [ --enable-text-only Turn on text-only rendering], [LIBXO_TEXT_ONLY=yes; AC_DEFINE([LIBXO_TEXT_ONLY], [1], [Enable text-only rendering])], [LIBXO_TEXT_ONLY=no]) AC_MSG_RESULT([$LIBXO_TEXT_ONLY]) AM_CONDITIONAL([LIBXO_TEXT_ONLY], [test "$LIBXO_TEXT_ONLY" != "no"]) AC_MSG_CHECKING([whether to build with local wcwidth implementation]) AC_ARG_ENABLE([wcwidth], [ --disable-wcwidth Disable local wcwidth implementation], [LIBXO_WCWIDTH=$enableval], [LIBXO_WCWIDTH=yes]) AC_MSG_RESULT([$LIBXO_WCWIDTH]) if test "${LIBXO_WCWIDTH}" != "no"; then AC_DEFINE([LIBXO_WCWIDTH], [1], [Enable local wcwidth implementation]) fi AC_MSG_CHECKING([retain hash bucket size]) AC_ARG_WITH(retain-size, [ --with-retain-size=[DIR] Specify retain hash bucket size (in bits)], [XO_RETAIN_SIZE=$withval], [XO_RETAIN_SIZE=default] ) AC_MSG_RESULT([$XO_RETAIN_SIZE]) if test "${XO_RETAIN_SIZE}" != "default"; then AC_DEFINE_UNQUOTED([XO_RETAIN_SIZE], ${XO_RETAIN_SIZE}, [Retain hash bucket size]) fi AC_CHECK_LIB([m], [lrint]) AM_CONDITIONAL([HAVE_LIBM], [test "$HAVE_LIBM" != "no"]) AC_MSG_CHECKING([compiler for gcc]) HAVE_GCC=no if test "${CC}" != ""; then HAVE_GCC=`${CC} --version 2>&1 | grep GCC` if test "${HAVE_GCC}" != ""; then HAVE_GCC=yes else HAVE_GCC=no fi fi AC_MSG_RESULT([$HAVE_GCC]) AM_CONDITIONAL([HAVE_GCC], [test "$HAVE_GCC" = "yes"]) AC_MSG_CHECKING([whether to build with printflike]) AC_ARG_ENABLE([printflike], [ --enable-printflike Enable use of GCC __printflike attribute], [HAVE_PRINTFLIKE=yes; AC_DEFINE([HAVE_PRINTFLIKE], [1], [Support printflike])], [HAVE_PRINTFLIKE=no]) AC_MSG_RESULT([$HAVE_PRINTFLIKE]) AM_CONDITIONAL([HAVE_PRINTFLIKE], [test "$HAVE_PRINTFLIKE" != ""]) AC_MSG_CHECKING([whether to build with LIBXO_OPTIONS]) AC_ARG_ENABLE([libxo-options], [ --disable-libxo-options Turn off support for LIBXO_OPTIONS], [LIBXO_OPTS=$enableval], [LIBXO_OPTS=yes]) AC_MSG_RESULT([$LIBXO_OPTS]) AM_CONDITIONAL([NO_LIBXO_OPTIONS], [test "$LIBXO_OPTS" != "yes"]) case $host_os in darwin*) LIBTOOL=glibtool XO_LIBEXT=dylib ;; Linux*|linux*) CFLAGS="-D_GNU_SOURCE $CFLAGS" LDFLAGS=-ldl XO_LIBEXT=so ;; cygwin*|CYGWIN*) LDFLAGS=-no-undefined XO_LIBEXT=ddl ;; esac case $prefix in NONE) prefix=/usr/local ;; esac XO_LIBS=-lxo XO_SRCDIR=${srcdir} XO_LIBDIR=${libdir} XO_BINDIR=${bindir} XO_INCLUDEDIR=${includedir} XO_CFLAGS="${CFLAGS}" AC_SUBST(XO_LIBS) AC_SUBST(XO_SRCDIR) AC_SUBST(XO_LIBDIR) AC_SUBST(XO_BINDIR) AC_SUBST(XO_INCLUDEDIR) AC_SUBST(XO_LIBEXT) AC_SUBST(XO_CFLAGS) AC_ARG_WITH(encoder-dir, [ --with-encoder-dir=[DIR] Specify location of encoder libraries], [XO_ENCODERDIR=$withval], [XO_ENCODERDIR=$libdir/libxo/encoder] ) AC_SUBST(XO_ENCODERDIR) AC_ARG_WITH(share-dir, [ --with-share-dir=[DIR] Specify location of shared files], [XO_SHAREDIR=$withval], [XO_SHAREDIR=$datarootdir/libxo] ) XO_SHAREDIR=`echo $XO_SHAREDIR | sed "s;\\${prefix};$prefix;"` AC_SUBST(XO_SHAREDIR) dnl for the spec file RELDATE=`date +'%Y-%m-%d%n'` AC_SUBST(RELDATE) AC_MSG_RESULT(Using configure dir $ac_abs_confdir) if test -d $ac_abs_confdir/.git ; then extra=`git branch | awk '/\*/ { print $2 }'` if test "$extra" != "" -a "$extra" != "master" then LIBXO_VERSION_EXTRA="-git-$extra" fi fi LIBXO_VERSION=$PACKAGE_VERSION LIBXO_VERSION_NUMBER=VERSION_TO_NUMBER(echo $PACKAGE_VERSION) AC_SUBST(LIBXO_VERSION) AC_SUBST(LIBXO_VERSION_NUMBER) AC_SUBST(LIBXO_VERSION_EXTRA) AC_DEFINE_UNQUOTED(LIBXO_VERSION, ["$LIBXO_VERSION"], [Version number as dotted value]) AC_DEFINE_UNQUOTED(LIBXO_VERSION_NUMBER, [$LIBXO_VERSION_NUMBER], [Version number as a number]) AC_DEFINE_UNQUOTED(LIBXO_VERSION_STRING, ["$LIBXO_VERSION_NUMBER"], [Version number as string]) AC_DEFINE_UNQUOTED(LIBXO_VERSION_EXTRA, ["$LIBXO_VERSION_EXTRA"], [Version number extra information]) AC_CONFIG_HEADERS([libxo/xo_config.h]) AC_CONFIG_FILES([ Makefile libxo-config xohtml/xohtml.sh libxo/Makefile libxo/add.man encoder/Makefile encoder/cbor/Makefile encoder/csv/Makefile encoder/test/Makefile xo/Makefile xolint/Makefile xohtml/Makefile xopo/Makefile packaging/libxo.pc doc/Makefile doc/top-link.html tests/Makefile tests/core/Makefile tests/gettext/Makefile tests/xo/Makefile packaging/libxo.spec packaging/libxo.rb.base ]) AC_OUTPUT AC_MSG_NOTICE([summary of build options: libxo version: ${VERSION} ${LIBXO_VERSION_EXTRA} host type: ${host} / ${host_os} install prefix: ${prefix} srcdir: ${XO_SRCDIR} libdir: ${XO_LIBDIR} bindir: ${XO_BINDIR} includedir: ${XO_INCLUDEDIR} share dir: ${XO_SHAREDIR} extensions dir: ${XO_ENCODERDIR} oxtradoc dir: ${SLAX_OXTRADOCDIR} compiler: ${CC} (${HAVE_GCC:-no}) compiler flags: ${CFLAGS} library types: Shared=${enable_shared}, Static=${enable_static} warnings: ${LIBXO_WARNINGS:-no} debug: ${LIBXO_DEBUG:-no} printf-like: ${HAVE_PRINTFLIKE:-no} libxo-options: ${LIBXO_OPTS:-no} text-only: ${LIBXO_TEXT_ONLY:-no} gettext: ${HAVE_GETTEXT:-no} (${GETTEXT_PREFIX}) isthreaded: ${HAVE_ISTHREADED:-no} thread-local: ${THREAD_LOCAL:-no} local wcwidth: ${LIBXO_WCWIDTH:-no} retain size: ${XO_RETAIN_SIZE:-no} ]) Index: projects/clang1000-import/contrib/libxo/doc/api.rst =================================================================== --- projects/clang1000-import/contrib/libxo/doc/api.rst (revision 357178) +++ projects/clang1000-import/contrib/libxo/doc/api.rst (revision 357179) @@ -1,1620 +1,1679 @@ .. index:: API The libxo API ============= This section gives details about the functions in libxo, how to call them, and the actions they perform. .. index:: Handles .. _handles: Handles ------- libxo uses "handles" to control its rendering functionality. The handle contains state and buffered data, as well as callback functions to process data. Handles give an abstraction for libxo that encapsulates the state of a stream of output. Handles have the data type "`xo_handle_t`" and are opaque to the caller. The library has a default handle that is automatically initialized. By default, this handle will send text style output (`XO_STYLE_TEXT`) to standard output. The xo_set_style and xo_set_flags functions can be used to change this behavior. For the typical command that is generating output on standard output, there is no need to create an explicit handle, but they are available when needed, e.g., for daemons that generate multiple streams of output. Many libxo functions take a handle as their first parameter; most that do not use the default handle. Any function taking a handle can be passed NULL to access the default handle. For the convenience of callers, the libxo library includes handle-less functions that implicitly use the default handle. For example, the following are equivalent:: xo_emit("test"); xo_emit_h(NULL, "test"); Handles are created using `xo_create` and destroy using `xo_destroy`. .. index:: xo_create xo_create ~~~~~~~~~ .. c:function:: xo_handle_t *xo_create (xo_style_t style, xo_xof_flags_t flags) The `xo_create` function allocates a new handle which can be passed to further libxo function calls. The `xo_handle_t` structure is opaque. :param xo_style_t style: Output style (XO_STYLE\_*) :param xo_xof_flags_t flags: Flags for this handle (XOF\_*) :return: New libxo handle :rtype: xo_handle_t \* :: EXAMPLE: xo_handle_t *xop = xo_create(XO_STYLE_JSON, XOF_WARN | XOF_PRETTY); .... xo_emit_h(xop, "testing\n"); See also :ref:`output-styles` and :ref:`flags`. .. index:: xo_create_to_file .. index:: XOF_CLOSE_FP xo_create_to_file ~~~~~~~~~~~~~~~~~ .. c:function:: xo_handle_t *xo_create_to_file (FILE *fp, unsigned style, unsigned flags) The `xo_create_to_file` function is aconvenience function is provided for situations when output should be written to a different file, rather than the default of standard output. The `XOF_CLOSE_FP` flag can be set on the returned handle to trigger a call to fclose() for the FILE pointer when the handle is destroyed, avoiding the need for the caller to perform this task. :param fp: FILE to use as base for this handle :type fp: FILE * :param xo_style_t style: Output style (XO_STYLE\_*) :param xo_xof_flags_t flags: Flags for this handle (XOF\_*) :return: New libxo handle :rtype: xo_handle_t \* .. index:: xo_set_writer .. index:: xo_write_func_t .. index:: xo_close_func_t .. index:: xo_flush_func_t xo_set_writer ~~~~~~~~~~~~~ .. c:function:: void xo_set_writer (xo_handle_t *xop, void *opaque, \ xo_write_func_t write_func, xo_close_func_t close_func, \ xo_flush_func_t flush_func) The `xo_set_writer` function allows custom functions which can tailor how libxo writes data. The `opaque` argument is recorded and passed back to the functions, allowing the function to acquire context information. The *write_func* function writes data to the output stream. The *close_func* function can release this opaque data and any other resources as needed. The *flush_func* function is called to flush buffered data associated with the opaque object. :param xop: Handle to modify (or NULL for default handle) :type xop: xo_handle_t * :param opaque: Pointer to opaque data passed to the given functions :type opaque: void * :param xo_write_func_t write_func: New write function :param xo_close_func_t close_func: New close function :param xo_flush_func_t flush_func: New flush function :returns: void .. index:: xo_get_style xo_get_style ~~~~~~~~~~~~ .. c:function:: xo_style_t xo_get_style(xo_handle_t *xop) Use the `xo_get_style` function to find the current output style for a given handle. To use the default handle, pass a `NULL` handle. :param xop: Handle to interrogate (or NULL for default handle) :type xop: xo_handle_t * :returns: Output style (XO_STYLE\_*) :rtype: xo_style_t :: EXAMPLE:: style = xo_get_style(NULL); .. index:: XO_STYLE_TEXT .. index:: XO_STYLE_XML .. index:: XO_STYLE_JSON .. index:: XO_STYLE_HTML .. _output-styles: Output Styles (XO_STYLE\_\*) ++++++++++++++++++++++++++++ The libxo functions accept a set of output styles: =============== ========================= Flag Description =============== ========================= XO_STYLE_TEXT Traditional text output XO_STYLE_XML XML encoded data XO_STYLE_JSON JSON encoded data XO_STYLE_HTML HTML encoded data =============== ========================= The "XML", "JSON", and "HTML" output styles all use the UTF-8 character encoding. "TEXT" using locale-based encoding. .. index:: xo_set_style xo_set_style ~~~~~~~~~~~~ .. c:function:: void xo_set_style(xo_handle_t *xop, xo_style_t style) The `xo_set_style` function is used to change the output style setting for a handle. To use the default handle, pass a `NULL` handle. :param xop: Handle to modify :type xop: xo_handle_t * :param xo_style_t style: Output style (XO_STYLE\_*) :returns: void :: EXAMPLE: xo_set_style(NULL, XO_STYLE_XML); .. index:: xo_set_style_name xo_set_style_name ~~~~~~~~~~~~~~~~~ .. c:function:: int xo_set_style_name (xo_handle_t *xop, const char *style) The `xo_set_style_name` function can be used to set the style based on a name encoded as a string: The name can be any of the supported styles: "text", "xml", "json", or "html". :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param style: Text name of the style :type style: const char \* :returns: zero for success, non-zero for error :rtype: int :: EXAMPLE: xo_set_style_name(NULL, "html"); .. index:: xo_set_flags xo_set_flags ~~~~~~~~~~~~ .. c:function:: void xo_set_flags(xo_handle_t *xop, xo_xof_flags_t flags) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param xo_xof_flags_t flags: Flags to add for the handle :returns: void Use the `xo_set_flags` function to turn on flags for a given libxo handle. To use the default handle, pass a `NULL` handle. :: EXAMPLE: xo_set_flags(NULL, XOF_PRETTY | XOF_WARN); .. index:: Flags; XOF_* .. index:: XOF_CLOSE_FP .. index:: XOF_COLOR .. index:: XOF_COLOR_ALLOWED .. index:: XOF_DTRT .. index:: XOF_INFO .. index:: XOF_KEYS .. index:: XOF_NO_ENV .. index:: XOF_NO_HUMANIZE .. index:: XOF_PRETTY .. index:: XOF_UNDERSCORES .. index:: XOF_UNITS .. index:: XOF_WARN .. index:: XOF_WARN_XML .. index:: XOF_XPATH .. index:: XOF_COLUMNS .. index:: XOF_FLUSH .. _flags: Flags (XOF\_\*) +++++++++++++++ The set of valid flags include: =================== ========================================= Flag Description =================== ========================================= XOF_CLOSE_FP Close file pointer on `xo_destroy` XOF_COLOR Enable color and effects in output XOF_COLOR_ALLOWED Allow color/effect for terminal output XOF_DTRT Enable "do the right thing" mode XOF_INFO Display info data attributes (HTML) XOF_KEYS Emit the key attribute (XML) XOF_NO_ENV Do not use the :ref:`libxo-options` env var XOF_NO_HUMANIZE Display humanization (TEXT, HTML) XOF_PRETTY Make "pretty printed" output XOF_UNDERSCORES Replaces hyphens with underscores XOF_UNITS Display units (XML, HMTL) XOF_WARN Generate warnings for broken calls XOF_WARN_XML Generate warnings in XML on stdout XOF_XPATH Emit XPath expressions (HTML) XOF_COLUMNS Force xo_emit to return columns used XOF_FLUSH Flush output after each `xo_emit` call =================== ========================================= The `XOF_CLOSE_FP` flag will trigger the call of the *close_func* (provided via `xo_set_writer`) when the handle is destroyed. The `XOF_COLOR` flag enables color and effects in output regardless of output device, while the `XOF_COLOR_ALLOWED` flag allows color and effects only if the output device is a terminal. The `XOF_PRETTY` flag requests "pretty printing", which will trigger the addition of indentation and newlines to enhance the readability of XML, JSON, and HTML output. Text output is not affected. The `XOF_WARN` flag requests that warnings will trigger diagnostic output (on standard error) when the library notices errors during operations, or with arguments to functions. Without warnings enabled, such conditions are ignored. Warnings allow developers to debug their interaction with libxo. The function `xo_failure` can used as a breakpoint for a debugger, regardless of whether warnings are enabled. If the style is `XO_STYLE_HTML`, the following additional flags can be used: =============== ========================================= Flag Description =============== ========================================= XOF_XPATH Emit "data-xpath" attributes XOF_INFO Emit additional info fields =============== ========================================= The `XOF_XPATH` flag enables the emission of XPath expressions detailing the hierarchy of XML elements used to encode the data field, if the XPATH style of output were requested. The `XOF_INFO` flag encodes additional informational fields for HTML output. See :ref:`field-information` for details. If the style is `XO_STYLE_XML`, the following additional flags can be used: =============== ========================================= Flag Description =============== ========================================= XOF_KEYS Flag "key" fields for XML =============== ========================================= The `XOF_KEYS` flag adds "key" attribute to the XML encoding for field definitions that use the "k" modifier. The key attribute has the value "key":: xo_emit("{k:name}", item); XML: truck .. index:: xo_clear_flags xo_clear_flags ++++++++++++++ .. c:function:: void xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param xo_xof_flags_t flags: Flags to clear for the handle :returns: void Use the `xo_clear_flags` function to turn off the given flags in a specific handle. To use the default handle, pass a `NULL` handle. .. index:: xo_set_options xo_set_options ++++++++++++++ .. c:function:: int xo_set_options (xo_handle_t *xop, const char *input) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param input: string containing options to set :type input: const char * :returns: zero for success, non-zero for error :rtype: int The `xo_set_options` function accepts a comma-separated list of output styles and modifier flags and enables them for a specific handle. The options are identical to those listed in :ref:`options`. To use the default handle, pass a `NULL` handle. .. index:: xo_destroy xo_destroy ++++++++++ .. c:function:: void xo_destroy(xo_handle_t *xop) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :returns: void The `xo_destroy` function releases a handle and any resources it is using. Calling `xo_destroy` with a `NULL` handle will release any resources associated with the default handle. .. index:: xo_emit Emitting Content (xo_emit) -------------------------- The functions in this section are used to emit output. The "fmt" argument is a string containing field descriptors as specified in :ref:`format-strings`. The use of a handle is optional and `NULL` can be passed to access the internal "default" handle. See :ref:`handles`. The remaining arguments to `xo_emit` and `xo_emit_h` are a set of arguments corresponding to the fields in the format string. Care must be taken to ensure the argument types match the fields in the format string, since an inappropriate cast can ruin your day. The vap argument to `xo_emit_hv` points to a variable argument list that can be used to retrieve arguments via `va_arg`. .. c:function:: xo_ssize_t xo_emit (const char *fmt, ...) :param fmt: The format string, followed by zero or more arguments :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted :rtype: xo_ssize_t .. c:function:: xo_ssize_t xo_emit_h (xo_handle_t *xop, const char *fmt, ...) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param fmt: The format string, followed by zero or more arguments :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted :rtype: xo_ssize_t .. c:function:: xo_ssize_t xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param fmt: The format string :param va_list vap: A set of variadic arguments :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted :rtype: xo_ssize_t .. index:: xo_emit_field Single Field Emitting Functions (xo_emit_field) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The functions in this section can also make output, but only make a single field at a time. These functions are intended to avoid the scenario where one would otherwise need to compose a format descriptors using `snprintf`. The individual parts of the format descriptor are passed in distinctly. .. c:function:: xo_ssize_t xo_emit_field (const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...) :param rolmod: A comma-separated list of field roles and field modifiers :type rolmod: const char * :param contents: The "contents" portion of the field description string :type contents: const char * :param fmt: Content format string :type fmt: const char * :param efmt: Encoding format string, followed by additional arguments :type efmt: const char * :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted :rtype: xo_ssize_t :: EXAMPLE:: xo_emit_field("T", "Host name is ", NULL, NULL); xo_emit_field("V", "host-name", NULL, NULL, host-name); .. c:function:: xo_ssize_t xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param rolmod: A comma-separated list of field roles and field modifiers :type rolmod: const char * :param contents: The "contents" portion of the field description string :type contents: const char * :param fmt: Content format string :type fmt: const char * :param efmt: Encoding format string, followed by additional arguments :type efmt: const char * :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted :rtype: xo_ssize_t .. c:function:: xo_ssize_t xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, va_list vap) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* :param rolmod: A comma-separated list of field roles and field modifiers :type rolmod: const char * :param contents: The "contents" portion of the field description string :type contents: const char * :param fmt: Content format string :type fmt: const char * :param efmt: Encoding format string :type efmt: const char * :param va_list vap: A set of variadic arguments :returns: If XOF_COLUMNS is set, the number of columns used; otherwise the number of bytes emitted :rtype: xo_ssize_t .. index:: xo_attr .. _xo_attr: Attributes (xo_attr) ~~~~~~~~~~~~~~~~~~~~ The functions in this section emit an XML attribute with the given name and value. This only affects the XML output style. The `name` parameter give the name of the attribute to be encoded. The `fmt` parameter gives a printf-style format string used to format the value of the attribute using any remaining arguments, or the vap parameter passed to `xo_attr_hv`. All attributes recorded via `xo_attr` are placed on the next container, instance, leaf, or leaf list that is emitted. Since attributes are only emitted in XML, their use should be limited to meta-data and additional or redundant representations of data already emitted in other form. .. c:function:: xo_ssize_t xo_attr (const char *name, const char *fmt, ...) :param name: Attribute name :type name: const char * :param fmt: Attribute value, as variadic arguments :type fmt: const char * :returns: -1 for error, or the number of bytes in the formatted attribute value :rtype: xo_ssize_t :: EXAMPLE: xo_attr("seconds", "%ld", (unsigned long) login_time); struct tm *tmp = localtime(login_time); strftime(buf, sizeof(buf), "%R", tmp); xo_emit("Logged in at {:login-time}\n", buf); XML: 00:14 .. c:function:: xo_ssize_t xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...) :param xop: Handle for modify (or NULL for default handle) :type xop: xo_handle_t \* The `xo_attr_h` function follows the conventions of `xo_attr` but adds an explicit libxo handle. .. c:function:: xo_ssize_t xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap) The `xo_attr_h` function follows the conventions of `xo_attr_h` but replaced the variadic list with a variadic pointer. .. index:: xo_flush Flushing Output (xo_flush) ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. c:function:: xo_ssize_t xo_flush (void) :returns: -1 for error, or the number of bytes generated :rtype: xo_ssize_t libxo buffers data, both for performance and consistency, but also to allow for the proper function of various advanced features. At various times, the caller may wish to flush any data buffered within the library. The `xo_flush` call is used for this. Calling `xo_flush` also triggers the flush function associated with the handle. For the default handle, this is equivalent to "fflush(stdio);". .. c:function:: xo_ssize_t xo_flush_h (xo_handle_t *xop) :param xop: Handle for flush (or NULL for default handle) :type xop: xo_handle_t \* :returns: -1 for error, or the number of bytes generated :rtype: xo_ssize_t The `xo_flush_h` function follows the conventions of `xo_flush`, but adds an explicit libxo handle. .. index:: xo_finish .. index:: xo_finish_atexit .. index:: atexit Finishing Output (xo_finish) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When the program is ready to exit or close a handle, a call to `xo_finish` or `xo_finish_h` is required. This flushes any buffered data, closes open libxo constructs, and completes any pending operations. Calling this function is vital to the proper operation of libxo, especially for the non-TEXT output styles. .. c:function:: xo_ssize_t xo_finish (void) :returns: -1 on error, or the number of bytes flushed :rtype: xo_ssize_t .. c:function:: xo_ssize_t xo_finish_h (xo_handle_t *xop) :param xop: Handle for finish (or NULL for default handle) :type xop: xo_handle_t \* :returns: -1 on error, or the number of bytes flushed :rtype: xo_ssize_t .. c:function:: void xo_finish_atexit (void) The `xo_finish_atexit` function is suitable for use with :manpage:`atexit(3)` to ensure that `xo_finish` is called on the default handle when the application exits. .. index:: UTF-8 .. index:: xo_open_container .. index:: xo_close_container Emitting Hierarchy ------------------ libxo represents two types of hierarchy: containers and lists. A container appears once under a given parent where a list consists of instances that can appear multiple times. A container is used to hold related fields and to give the data organization and scope. .. index:: YANG .. admonition:: YANG Terminology libxo uses terminology from YANG (:RFC:`7950`), the data modeling language for NETCONF: container, list, leaf, and leaf-list. For XML and JSON, individual fields appear inside hierarchies which provide context and meaning to the fields. Unfortunately, these encoding have a basic disconnect between how lists is similar objects are represented. XML encodes lists as set of sequential elements:: phil pallavi sjg JSON encodes lists using a single name and square brackets:: "user": [ "phil", "pallavi", "sjg" ] This means libxo needs three distinct indications of hierarchy: one for containers of hierarchy appear only once for any specific parent, one for lists, and one for each item in a list. .. index:: Containers Containers ~~~~~~~~~~ A "*container*" is an element of a hierarchy that appears only once under any specific parent. The container has no value, but serves to contain and organize other nodes. To open a container, call xo_open_container() or xo_open_container_h(). The former uses the default handle and the latter accepts a specific handle. To close a level, use the xo_close_container() or xo_close_container_h() functions. Each open call must have a matching close call. If the XOF_WARN flag is set and the name given does not match the name of the currently open container, a warning will be generated. .. c:function:: xo_ssize_t xo_open_container (const char *name) :param name: Name of the container :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `name` parameter gives the name of the container, encoded in UTF-8. Since ASCII is a proper subset of UTF-8, traditional C strings can be used directly. .. c:function:: xo_ssize_t xo_open_container_h (xo_handle_t *xop, const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_open_container_h` function adds a `handle` parameter. .. c:function:: xo_ssize_t xo_close_container (const char *name) :param name: Name of the container :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t .. c:function:: xo_ssize_t xo_close_container_h (xo_handle_t *xop, const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_close_container_h` function adds a `handle` parameter. Use the :index:`XOF_WARN` flag to generate a warning if the name given on the close does not match the current open container. For TEXT and HTML output, containers are not rendered into output text, though for HTML they are used to record an XPath value when the :index:`XOF_XPATH` flag is set. :: EXAMPLE: xo_open_container("top"); xo_open_container("system"); xo_emit("{:host-name/%s%s%s}", hostname, domainname ? "." : "", domainname ?: ""); xo_close_container("system"); xo_close_container("top"); TEXT: my-host.example.org XML: my-host.example.org JSON: "top" : { "system" : { "host-name": "my-host.example.org" } } HTML:
my-host.example.org
.. index:: xo_open_instance .. index:: xo_close_instance .. index:: xo_open_list .. index:: xo_close_list Lists and Instances ~~~~~~~~~~~~~~~~~~~ A "*list*" is set of one or more instances that appear under the same parent. The instances contain details about a specific object. One can think of instances as objects or records. A call is needed to open and close the list, while a distinct call is needed to open and close each instance of the list. The name given to all calls must be identical, and it is strongly suggested that the name be singular, not plural, as a matter of style and usage expectations:: EXAMPLE: xo_open_list("item"); for (ip = list; ip->i_title; ip++) { xo_open_instance("item"); xo_emit("{L:Item} '{:name/%s}':\n", ip->i_title); xo_close_instance("item"); } xo_close_list("item"); Getting the list and instance calls correct is critical to the proper generation of XML and JSON data. Opening Lists +++++++++++++ .. c:function:: xo_ssize_t xo_open_list (const char *name) :param name: Name of the list :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `xo_open_list` function open a list of instances. .. c:function:: xo_ssize_t xo_open_list_h (xo_handle_t *xop, const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * Closing Lists +++++++++++++ .. c:function:: xo_ssize_t xo_close_list (const char *name) :param name: Name of the list :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `xo_close_list` function closes a list of instances. .. c:function:: xo_ssize_t xo_close_list_h (xo_handle_t *xop, const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_close_container_h` function adds a `handle` parameter. Opening Instances +++++++++++++++++ .. c:function:: xo_ssize_t xo_open_instance (const char *name) :param name: Name of the instance (same as the list name) :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `xo_open_instance` function open a single instance. .. c:function:: xo_ssize_t xo_open_instance_h (xo_handle_t *xop, const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_open_instance_h` function adds a `handle` parameter. Closing Instances +++++++++++++++++ .. c:function:: xo_ssize_t xo_close_instance (const char *name) :param name: Name of the instance :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `xo_close_instance` function closes an open instance. .. c:function:: xo_ssize_t xo_close_instance_h (xo_handle_t *xop, const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_close_instance_h` function adds a `handle` parameter. :: EXAMPLE: xo_open_list("user"); for (i = 0; i < num_users; i++) { xo_open_instance("user"); xo_emit("{k:name}:{:uid/%u}:{:gid/%u}:{:home}\n", pw[i].pw_name, pw[i].pw_uid, pw[i].pw_gid, pw[i].pw_dir); xo_close_instance("user"); } xo_close_list("user"); TEXT: phil:1001:1001:/home/phil pallavi:1002:1002:/home/pallavi XML: phil 1001 1001 /home/phil pallavi 1002 1002 /home/pallavi JSON: user: [ { "name": "phil", "uid": 1001, "gid": 1001, "home": "/home/phil", }, { "name": "pallavi", "uid": 1002, "gid": 1002, "home": "/home/pallavi", } ] Markers ~~~~~~~ Markers are used to protect and restore the state of open hierarchy constructs (containers, lists, or instances). While a marker is open, no other open constructs can be closed. When a marker is closed, all constructs open since the marker was opened will be closed. Markers use names which are not user-visible, allowing the caller to choose appropriate internal names. In this example, the code whiffles through a list of fish, calling a function to emit details about each fish. The marker "fish-guts" is used to ensure that any constructs opened by the function are closed properly:: EXAMPLE: for (i = 0; fish[i]; i++) { xo_open_instance("fish"); xo_open_marker("fish-guts"); dump_fish_details(i); xo_close_marker("fish-guts"); } .. c:function:: xo_ssize_t xo_open_marker(const char *name) :param name: Name of the instance :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `xo_open_marker` function records the current state of open tags in order for `xo_close_marker` to close them at some later point. .. c:function:: xo_ssize_t xo_open_marker_h(const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_open_marker_h` function adds a `handle` parameter. .. c:function:: xo_ssize_t xo_close_marker(const char *name) :param name: Name of the instance :type name: const char * :returns: -1 on error, or the number of bytes generated :rtype: xo_ssize_t The `xo_close_marker` function closes any open containers, lists, or instances as needed to return to the state recorded when `xo_open_marker` was called with the matching name. .. c:function:: xo_ssize_t xo_close_marker(const char *name) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_close_marker_h` function adds a `handle` parameter. DTRT Mode ~~~~~~~~~ Some users may find tracking the names of open containers, lists, and instances inconvenient. libxo offers a "Do The Right Thing" mode, where libxo will track the names of open containers, lists, and instances so the close function can be called without a name. To enable DTRT mode, turn on the XOF_DTRT flag prior to making any other libxo output:: xo_set_flags(NULL, XOF_DTRT); .. index:: XOF_DTRT Each open and close function has a version with the suffix "_d", which will close the open container, list, or instance:: xo_open_container_d("top"); ... xo_close_container_d(); This also works for lists and instances:: xo_open_list_d("item"); for (...) { xo_open_instance_d("item"); xo_emit(...); xo_close_instance_d(); } xo_close_list_d(); .. index:: XOF_WARN Note that the XOF_WARN flag will also cause libxo to track open containers, lists, and instances. A warning is generated when the name given to the close function and the name recorded do not match. Support Functions ----------------- .. index:: xo_parse_args .. _xo_parse_args: Parsing Command-line Arguments (xo_parse_args) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. c:function:: int xo_parse_args (int argc, char **argv) :param int argc: Number of arguments :param argv: Array of argument strings :return: -1 on error, or the number of remaining arguments :rtype: int The `xo_parse_args` function is used to process a program's arguments. libxo-specific options are processed and removed from the argument list so the calling application does not need to process them. If successful, a new value for argc is returned. On failure, a message is emitted and -1 is returned:: argc = xo_parse_args(argc, argv); if (argc < 0) exit(EXIT_FAILURE); Following the call to xo_parse_args, the application can process the remaining arguments in a normal manner. See :ref:`options` for a description of valid arguments. .. index:: xo_set_program xo_set_program ~~~~~~~~~~~~~~ .. c:function:: void xo_set_program (const char *name) :param name: Name to use as the program name :type name: const char * :returns: void The `xo_set_program` function sets the name of the program as reported by functions like `xo_failure`, `xo_warn`, `xo_err`, etc. The program name is initialized by `xo_parse_args`, but subsequent calls to `xo_set_program` can override this value:: EXAMPLE: xo_set_program(argv[0]); Note that the value is not copied, so the memory passed to `xo_set_program` (and `xo_parse_args`) must be maintained by the caller. .. index:: xo_set_version xo_set_version ~~~~~~~~~~~~~~ .. c:function:: void xo_set_version (const char *version) :param name: Value to use as the version string :type name: const char * :returns: void The `xo_set_version` function records a version number to be emitted as part of the data for encoding styles (XML and JSON). This version number is suitable for tracking changes in the content, allowing a user of the data to discern which version of the data model is in use. .. c:function:: void xo_set_version_h (xo_handle_t *xop, const char *version) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * The `xo_set_version` function adds a `handle` parameter. .. index:: --libxo .. index:: XOF_INFO .. index:: xo_info_t .. _field-information: Field Information (xo_info_t) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HTML data can include additional information in attributes that begin with "data-". To enable this, three things must occur: First the application must build an array of xo_info_t structures, one per tag. The array must be sorted by name, since libxo uses a binary search to find the entry that matches names from format instructions. Second, the application must inform libxo about this information using the `xo_set_info` call:: typedef struct xo_info_s { const char *xi_name; /* Name of the element */ const char *xi_type; /* Type of field */ const char *xi_help; /* Description of field */ } xo_info_t; void xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count); Like other libxo calls, passing `NULL` for the handle tells libxo to use the default handle. If the count is -1, libxo will count the elements of infop, but there must be an empty element at the end. More typically, the number is known to the application:: xo_info_t info[] = { { "in-stock", "number", "Number of items in stock" }, { "name", "string", "Name of the item" }, { "on-order", "number", "Number of items on order" }, { "sku", "string", "Stock Keeping Unit" }, { "sold", "number", "Number of items sold" }, }; int info_count = (sizeof(info) / sizeof(info[0])); ... xo_set_info(NULL, info, info_count); Third, the emission of info must be triggered with the `XOF_INFO` flag using either the `xo_set_flags` function or the "`--libxo=info`" command line argument. The type and help values, if present, are emitted as the "data-type" and "data-help" attributes::
GRO-000-533
.. c:function:: void xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count) :param xop: Handle to use (or NULL for default handle) :type xop: xo_handle_t * :param infop: Array of information structures :type infop: xo_info_t * :returns: void .. index:: xo_set_allocator .. index:: xo_realloc_func_t .. index:: xo_free_func_t Memory Allocation ~~~~~~~~~~~~~~~~~ The `xo_set_allocator` function allows libxo to be used in environments where the standard :manpage:`realloc(3)` and :manpage:`free(3)` functions are not appropriate. .. c:function:: void xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func) :param xo_realloc_func_t realloc_func: Allocation function :param xo_free_func_t free_func: Free function *realloc_func* should expect the same arguments as :manpage:`realloc(3)` and return a pointer to memory following the same convention. *free_func* will receive the same argument as :manpage:`free(3)` and should release it, as appropriate for the environment. By default, the standard :manpage:`realloc(3)` and :manpage:`free(3)` functions are used. .. index:: --libxo .. _libxo-options: LIBXO_OPTIONS ~~~~~~~~~~~~~ The environment variable "LIBXO_OPTIONS" can be set to a subset of libxo options, including: - color - flush - flush-line - no-color - no-humanize - no-locale - no-retain - pretty - retain - underscores - warn For example, warnings can be enabled by:: % env LIBXO_OPTIONS=warn my-app Since environment variables are inherited, child processes will have the same options, which may be undesirable, making the use of the "`--libxo`" command-line option preferable in most situations. .. index:: xo_warn .. index:: xo_err .. index:: xo_errx .. index:: xo_message Errors, Warnings, and Messages ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Many programs make use of the standard library functions :manpage:`err(3)` and :manpage:`warn(3)` to generate errors and warnings for the user. libxo wants to pass that information via the current output style, and provides compatible functions to allow this:: void xo_warn (const char *fmt, ...); void xo_warnx (const char *fmt, ...); void xo_warn_c (int code, const char *fmt, ...); void xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...); void xo_err (int eval, const char *fmt, ...); void xo_errc (int eval, int code, const char *fmt, ...); void xo_errx (int eval, const char *fmt, ...); :: void xo_message (const char *fmt, ...); void xo_message_c (int code, const char *fmt, ...); void xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...); void xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap); These functions display the program name, a colon, a formatted message based on the arguments, and then optionally a colon and an error message associated with either *errno* or the *code* parameter:: EXAMPLE: if (open(filename, O_RDONLY) < 0) xo_err(1, "cannot open file '%s'", filename); .. index:: xo_error +.. index:: xo_error_h +.. index:: xo_error_hv +.. index:: xo_errorn +.. index:: xo_errorn_h +.. index:: xo_errorn_hv xo_error ~~~~~~~~ .. c:function:: void xo_error (const char *fmt, ...) :param fmt: Format string :type fmt: const char * :returns: void +.. c:function:: void xo_error_h (xo_handle_t *xop, const char *fmt, ...) + + :param xop: libxo handle pointer + :type xop: xo_handle_t * + :param fmt: Format string + :type fmt: const char * + :returns: void + +.. c:function:: void xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap) + + :param xop: libxo handle pointer + :type xop: xo_handle_t * + :param fmt: Format string + :type fmt: const char * + :param vap: variadic arguments + :type xop: va_list + :returns: void + +.. c:function:: void xo_errorn (const char *fmt, ...) + + :param fmt: Format string + :type fmt: const char * + :returns: void + +.. c:function:: void xo_errorn_h (xo_handle_t *xop, const char *fmt, ...) + + :param xop: libxo handle pointer + :type xop: xo_handle_t * + :param fmt: Format string + :type fmt: const char * + :returns: void + +.. c:function:: void xo_errorn_hv (xo_handle_t *xop, int need_newline, const char *fmt, va_list vap) + + :param xop: libxo handle pointer + :type xop: xo_handle_t * + :param need_newline: boolean indicating need for trailing newline + :type need_newline: int + :param fmt: Format string + :type fmt: const char * + :param vap: variadic arguments + :type xop: va_list + :returns: void + The `xo_error` function can be used for generic errors that should be reported over the handle, rather than to stderr. The `xo_error` function behaves like `xo_err` for TEXT and HTML output styles, but puts the error into XML or JSON elements:: EXAMPLE:: xo_error("Does not %s", "compute"); XML:: Does not compute JSON:: "error": { "message": "Does not compute" } + + The `xo_error_h` and `xo_error_hv` add a handle object and a + variadic-ized parameter to the signature, respectively. + + The `xo_errorn` function supplies a newline at the end the error + message if the format string does not include one. The + `xo_errorn_h` and `xo_errorn_hv` functions add a handle object and + a variadic-ized parameter to the signature, respectively. The + `xo_errorn_hv` function also adds a boolean to indicate the need for + a trailing newline. .. index:: xo_no_setlocale .. index:: Locale xo_no_setlocale ~~~~~~~~~~~~~~~ .. c:function:: void xo_no_setlocale (void) libxo automatically initializes the locale based on setting of the environment variables LC_CTYPE, LANG, and LC_ALL. The first of this list of variables is used and if none of the variables, the locale defaults to "UTF-8". The caller may wish to avoid this behavior, and can do so by calling the `xo_no_setlocale` function. Emitting syslog Messages ------------------------ syslog is the system logging facility used throughout the unix world. Messages are sent from commands, applications, and daemons to a hierarchy of servers, where they are filtered, saved, and forwarded based on configuration behaviors. syslog is an older protocol, originally documented only in source code. By the time :RFC:`3164` published, variation and mutation left the leading "" string as only common content. :RFC:`5424` defines a new version (version 1) of syslog and introduces structured data into the messages. Structured data is a set of name/value pairs transmitted distinctly alongside the traditional text message, allowing filtering on precise values instead of regular expressions. These name/value pairs are scoped by a two-part identifier; an enterprise identifier names the party responsible for the message catalog and a name identifying that message. `Enterprise IDs`_ are defined by IANA, the Internet Assigned Numbers Authority. .. _Enterprise IDs: https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers Use the `xo_set_syslog_enterprise_id` function to set the Enterprise ID, as needed. The message name should follow the conventions in :ref:`good-field-names`\ , as should the fields within the message:: /* Both of these calls are optional */ xo_set_syslog_enterprise_id(32473); xo_open_log("my-program", 0, LOG_DAEMON); /* Generate a syslog message */ xo_syslog(LOG_ERR, "upload-failed", "error <%d> uploading file '{:filename}' " "as '{:target/%s:%s}'", code, filename, protocol, remote); xo_syslog(LOG_INFO, "poofd-invalid-state", "state {:current/%u} is invalid {:connection/%u}", state, conn); The developer should be aware that the message name may be used in the future to allow access to further information, including documentation. Care should be taken to choose quality, descriptive names. .. _syslog-details: Priority, Facility, and Flags ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The `xo_syslog`, `xo_vsyslog`, and `xo_open_log` functions accept a set of flags which provide the priority of the message, the source facility, and some additional features. These values are OR'd together to create a single integer argument:: xo_syslog(LOG_ERR | LOG_AUTH, "login-failed", "Login failed; user '{:user}' from host '{:address}'", user, addr); These values are defined in . The priority value indicates the importance and potential impact of each message: ============= ======================================================= Priority Description ============= ======================================================= LOG_EMERG A panic condition, normally broadcast to all users LOG_ALERT A condition that should be corrected immediately LOG_CRIT Critical conditions LOG_ERR Generic errors LOG_WARNING Warning messages LOG_NOTICE Non-error conditions that might need special handling LOG_INFO Informational messages LOG_DEBUG Developer-oriented messages ============= ======================================================= The facility value indicates the source of message, in fairly generic terms: =============== ======================================================= Facility Description =============== ======================================================= LOG_AUTH The authorization system (e.g. :manpage:`login(1)`) LOG_AUTHPRIV As LOG_AUTH, but logged to a privileged file LOG_CRON The cron daemon: :manpage:`cron(8)` LOG_DAEMON System daemons, not otherwise explicitly listed LOG_FTP The file transfer protocol daemons LOG_KERN Messages generated by the kernel LOG_LPR The line printer spooling system LOG_MAIL The mail system LOG_NEWS The network news system LOG_SECURITY Security subsystems, such as :manpage:`ipfw(4)` LOG_SYSLOG Messages generated internally by :manpage:`syslogd(8)` LOG_USER Messages generated by user processes (default) LOG_UUCP The uucp system LOG_LOCAL0..7 Reserved for local use =============== ======================================================= In addition to the values listed above, xo_open_log accepts a set of addition flags requesting specific logging behaviors: ============ ==================================================== Flag Description ============ ==================================================== LOG_CONS If syslogd fails, attempt to write to /dev/console LOG_NDELAY Open the connection to :manpage:`syslogd(8)` immediately LOG_PERROR Write the message also to standard error output LOG_PID Log the process id with each message ============ ==================================================== .. index:: xo_syslog xo_syslog ~~~~~~~~~ .. c:function:: void xo_syslog (int pri, const char *name, const char *fmt, ...) :param int pri: syslog priority :param name: Name of the syslog event :type name: const char * :param fmt: Format string, followed by arguments :type fmt: const char * :returns: void Use the `xo_syslog` function to generate syslog messages by calling it with a log priority and facility, a message name, a format string, and a set of arguments. The priority/facility argument are discussed above, as is the message name. The format string follows the same conventions as `xo_emit`'s format string, with each field being rendered as an SD-PARAM pair:: xo_syslog(LOG_ERR, "poofd-missing-file", "'{:filename}' not found: {:error/%m}", filename); ... [poofd-missing-file@32473 filename="/etc/poofd.conf" error="Permission denied"] '/etc/poofd.conf' not found: Permission denied Support functions ~~~~~~~~~~~~~~~~~ .. index:: xo_vsyslog xo_vsyslog ++++++++++ .. c:function:: void xo_vsyslog (int pri, const char *name, const char *fmt, va_list vap) :param int pri: syslog priority :param name: Name of the syslog event :type name: const char * :param fmt: Format string :type fmt: const char * :param va_list vap: Variadic argument list :returns: void xo_vsyslog is identical in function to xo_syslog, but takes the set of arguments using a va_list:: EXAMPLE: void my_log (const char *name, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_vsyslog(LOG_ERR, name, fmt, vap); va_end(vap); } .. index:: xo_open_log xo_open_log +++++++++++ .. c:function:: void xo_open_log (const char *ident, int logopt, int facility) :param indent: :type indent: const char * :param int logopt: Bit field containing logging options :param int facility: :returns: void xo_open_log functions similar to :manpage:`openlog(3)`, allowing customization of the program name, the log facility number, and the additional option flags described in :ref:`syslog-details`. .. index:: xo_close_log xo_close_log ++++++++++++ .. c:function:: void xo_close_log (void) The `xo_close_log` function is similar to :manpage:`closelog(3)`, closing the log file and releasing any associated resources. .. index:: xo_set_logmask xo_set_logmask ++++++++++++++ .. c:function:: int xo_set_logmask (int maskpri) :param int maskpri: the log priority mask :returns: The previous log priority mask The `xo_set_logmask` function is similar to :manpage:`setlogmask(3)`, restricting the set of generated log event to those whose associated bit is set in maskpri. Use `LOG_MASK(pri)` to find the appropriate bit, or `LOG_UPTO(toppri)` to create a mask for all priorities up to and including toppri:: EXAMPLE: setlogmask(LOG_UPTO(LOG_WARN)); .. index:: xo_set_syslog_enterprise_id xo_set_syslog_enterprise_id +++++++++++++++++++++++++++ .. c:function:: void xo_set_syslog_enterprise_id (unsigned short eid) Use the `xo_set_syslog_enterprise_id` to supply a platform- or application-specific enterprise id. This value is used in any future syslog messages. Ideally, the operating system should supply a default value via the "kern.syslog.enterprise_id" sysctl value. Lacking that, the application should provide a suitable value. Enterprise IDs are administered by IANA, the Internet Assigned Number Authority. The complete list is EIDs on their web site:: https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers New EIDs can be requested from IANA using the following page:: http://pen.iana.org/pen/PenApplication.page Each software development organization that defines a set of syslog messages should register their own EID and use that value in their software to ensure that messages can be uniquely identified by the combination of EID + message name. Creating Custom Encoders ------------------------ The number of encoding schemes in current use is staggering, with new and distinct schemes appearing daily. While libxo provide XML, JSON, HMTL, and text natively, there are requirements for other encodings. Rather than bake support for all possible encoders into libxo, the API allows them to be defined externally. libxo can then interfaces with these encoding modules using a simplistic API. libxo processes all functions calls, handles state transitions, performs all formatting, and then passes the results as operations to a customized encoding function, which implements specific encoding logic as required. This means your encoder doesn't need to detect errors with unbalanced open/close operations but can rely on libxo to pass correct data. By making a simple API, libxo internals are not exposed, insulating the encoder and the library from future or internal changes. The three elements of the API are: - loading - initialization - operations The following sections provide details about these topics. .. index:: CBOR libxo source contains an encoder for Concise Binary Object Representation, aka CBOR (:RFC:`7049`), which can be used as an example for the API for other encoders. Loading Encoders ~~~~~~~~~~~~~~~~ Encoders can be registered statically or discovered dynamically. Applications can choose to call the `xo_encoder_register` function to explicitly register encoders, but more typically they are built as shared libraries, placed in the libxo/extensions directory, and loaded based on name. libxo looks for a file with the name of the encoder and an extension of ".enc". This can be a file or a symlink to the shared library file that supports the encoder:: % ls -1 lib/libxo/extensions/*.enc lib/libxo/extensions/cbor.enc lib/libxo/extensions/test.enc Encoder Initialization ~~~~~~~~~~~~~~~~~~~~~~ Each encoder must export a symbol used to access the library, which must have the following signature:: int xo_encoder_library_init (XO_ENCODER_INIT_ARGS); `XO_ENCODER_INIT_ARGS` is a macro defined in "xo_encoder.h" that defines an argument called "arg", a pointer of the type `xo_encoder_init_args_t`. This structure contains two fields: - `xei_version` is the version number of the API as implemented within libxo. This version is currently as 1 using `XO_ENCODER_VERSION`. This number can be checked to ensure compatibility. The working assumption is that all versions should be backward compatible, but each side may need to accurately know the version supported by the other side. `xo_encoder_library_init` can optionally check this value, and must then set it to the version number used by the encoder, allowing libxo to detect version differences and react accordingly. For example, if version 2 adds new operations, then libxo will know that an encoding library that set `xei_version` to 1 cannot be expected to handle those new operations. - xei_handler must be set to a pointer to a function of type `xo_encoder_func_t`, as defined in "xo_encoder.h". This function takes a set of parameters: - xop is a pointer to the opaque `xo_handle_t` structure - op is an integer representing the current operation - name is a string whose meaning differs by operation - value is a string whose meaning differs by operation - private is an opaque structure provided by the encoder Additional arguments may be added in the future, so handler functions should use the `XO_ENCODER_HANDLER_ARGS` macro. An appropriate "extern" declaration is provided to help catch errors. Once the encoder initialization function has completed processing, it should return zero to indicate that no error has occurred. A non-zero return code will cause the handle initialization to fail. Operations ~~~~~~~~~~ The encoder API defines a set of operations representing the processing model of libxo. Content is formatted within libxo, and callbacks are made to the encoder's handler function when data is ready to be processed: ======================= ======================================= Operation Meaning (Base function) ======================= ======================================= XO_OP_CREATE Called when the handle is created XO_OP_OPEN_CONTAINER Container opened (xo_open_container) XO_OP_CLOSE_CONTAINER Container closed (xo_close_container) XO_OP_OPEN_LIST List opened (xo_open_list) XO_OP_CLOSE_LIST List closed (xo_close_list) XO_OP_OPEN_LEAF_LIST Leaf list opened (xo_open_leaf_list) XO_OP_CLOSE_LEAF_LIST Leaf list closed (xo_close_leaf_list) XO_OP_OPEN_INSTANCE Instance opened (xo_open_instance) XO_OP_CLOSE_INSTANCE Instance closed (xo_close_instance) XO_OP_STRING Field with Quoted UTF-8 string XO_OP_CONTENT Field with content XO_OP_FINISH Finish any pending output XO_OP_FLUSH Flush any buffered output XO_OP_DESTROY Clean up resources XO_OP_ATTRIBUTE An attribute name/value pair XO_OP_VERSION A version string ======================= ======================================= For all the open and close operations, the name parameter holds the name of the construct. For string, content, and attribute operations, the name parameter is the name of the field and the value parameter is the value. "string" are differentiated from "content" to allow differing treatment of true, false, null, and numbers from real strings, though content values are formatted as strings before the handler is called. For version operations, the value parameter contains the version. All strings are encoded in UTF-8. Index: projects/clang1000-import/contrib/libxo/doc/encoders.rst =================================================================== --- projects/clang1000-import/contrib/libxo/doc/encoders.rst (revision 357178) +++ projects/clang1000-import/contrib/libxo/doc/encoders.rst (revision 357179) @@ -1,269 +1,274 @@ .. index:: encoder Encoders ======== This section gives an overview of encoders, details on the encoders that ship with libxo, and documentation for developers of future encoders. Overview -------- The libxo library contains software to generate four "built-in" formats: text, XML, JSON, and HTML. These formats are common and useful, but there are other common and useful formats that users will want, and including them all in the libxo software would be difficult and cumbersome. To allow support for additional encodings, libxo includes a "pluggable" extension mechanism for dynamically loading new encoders. libxo-based applications can automatically use any installed encoder. Use the "encoder=XXX" option to access encoders. The following example uses the "cbor" encoder, saving the output into a file:: df --libxo encoder=cbor > df-output.cbor Encoders can support specific options that can be accessed by -following the encoder name with a colon (':') and one of more options, -separated by a plus sign "+":: +following the encoder name with a colon (':') or a plus sign ('+') and +one of more options, separated by the same character:: - df --libxo encoder=csv:path=filesystem+leaf=name+no-header + df --libxo encoder=csv+path=filesystem+leaf=name+no-header + df --libxo encoder=csv:path=filesystem:leaf=name:no-header -This example instructs libxo to load the "csv" encoder and pass the +These examples instructs libxo to load the "csv" encoder and pass the following options:: path=filesystem leaf=name no-header Each of these option is interpreted by the encoder, and all such options names and semantics are specific to the particular encoder. Refer to the intended encoder for documentation on its options. + +The string "@" can be used in place of the string "encoder=". + + df --libxo @csv:no-header .. _csv_encoder: CSV - Comma Separated Values ---------------------------- libxo ships with a custom encoder for "CSV" files, a common format for comma separated values. The output of the CSV encoder can be loaded directly into spreadsheets or similar applications. A standard for CSV files is provided in :RFC:`4180`, but since the format predates that standard by decades, there are many minor differences in CSV file consumers and their expectations. The CSV encoder has a number of options to tailor output to those expectations. Consider the following XML:: % list-items --libxo xml,pretty GRO-000-415 gum 1412 54 10 HRD-000-212 rope 85 4 2 HRD-000-517 ladder 0 2 1 This output is a list of `instances` (named "item"), each containing a set of `leafs` ("sku", "name", etc). The CSV encoder will emit the leaf values in this output as `fields` inside a CSV `record`, which is a line containing a set of comma-separated values:: % list-items --libxo encoder=csv sku,name,sold,in-stock,on-order GRO-000-415,gum,1412,54,10 HRD-000-212,rope,85,4,2 HRD-000-517,ladder,0,2,1 Be aware that since the CSV encoder looks for data instances, when used with :ref:`xo`, the `--instance` option will be needed:: % xo --libxo encoder=csv --instance foo 'The {:product} is {:status}\n' stereo "in route" product,status stereo,in route .. _csv_path: The `path` Option ~~~~~~~~~~~~~~~~~ By default, the CSV encoder will attempt to emit any list instance generated by the application. In some cases, this may be unacceptable, and a specific list may be desired. Use the "path" option to limit the processing of output to a specific hierarchy. The path should be one or more names of containers or lists. For example, if the "list-items" application generates other lists, the user can give "path=top/data/item" as a path:: % list-items --libxo encoder=csv:path=top/data/item sku,name,sold,in-stock,on-order GRO-000-415,gum,1412,54,10 HRD-000-212,rope,85,4,2 HRD-000-517,ladder,0,2,1 Paths are "relative", meaning they need not be a complete set of names to the list. This means that "path=item" may be sufficient for the above example. .. _csv_leafs: The `leafs` Option ~~~~~~~~~~~~~~~~~~ The CSV encoding requires that all lines of output have the same number of fields with the same order. In contrast, XML and JSON allow any order (though libxo forces key leafs to appear before other leafs). To maintain a consistent set of fields inside the CSV file, the same set of leafs must be selected from each list item. By default, the CSV encoder records the set of leafs that appear in the first list instance it processes, and extract only those leafs from future instances. If the first instance is missing a leaf that is desired by the consumer, the "leaf" option can be used to ensure that an empty value is recorded for instances that lack a particular leaf. The "leafs" option can also be used to exclude leafs, limiting the output to only those leafs provided. In addition, the order of the output fields follows the order in which the leafs are listed. "leafs=one.two" and "leafs=two.one" give distinct output. So the "leafs" option can be used to expand, limit, and order the set of leafs. The value of the leafs option should be one or more leaf names, separated by a period ("."):: % list-items --libxo encoder=csv:leafs=sku.on-order sku,on-order GRO-000-415,10 HRD-000-212,2 HRD-000-517,1 % list-items -libxo encoder=csv:leafs=on-order.sku on-order,sku 10,GRO-000-415 2,HRD-000-212 1,HRD-000-517 Note that since libxo uses terminology from YANG (:RFC:`7950`), the data modeling language for NETCONF (:RFC:`6241`), which uses "leafs" as the plural form of "leaf". libxo follows that convention. .. _csv_no_header: The `no-header` Option ~~~~~~~~~~~~~~~~~~~~~~ CSV files typical begin with a line that defines the fields included in that file, in an attempt to make the contents self-defining:: sku,name,sold,in-stock,on-order GRO-000-415,gum,1412,54,10 HRD-000-212,rope,85,4,2 HRD-000-517,ladder,0,2,1 There is no reliable mechanism for determining whether this header line is included, so the consumer must make an assumption. The csv encoder defaults to producing the header line, but the "no-header" option can be included to avoid the header line. .. _csv_no_quotes: The `no-quotes` Option ~~~~~~~~~~~~~~~~~~~~~~ :RFC:`4180` specifies that fields containing spaces should be quoted, but many CSV consumers do not handle quotes. The "no-quotes" option instruct the CSV encoder to avoid the use of quotes. .. _csv_dos: The `dos` Option ~~~~~~~~~~~~~~~~ :RFC:`4180` defines the end-of-line marker as a carriage return followed by a newline. This `CRLF` convention dates from the distant past, but its use was anchored in the 1980s by the `DOS` operating system. The CSV encoder defaults to using the standard Unix end-of-line marker, a simple newline. Use the "dos" option to use the `CRLF` convention. The Encoder API --------------- The encoder API consists of three distinct phases: - loading the encoder - initializing the encoder - feeding operations to the encoder To load the encoder, libxo will open a shared library named: ${prefix}/lib/libxo/encoder/${name}.enc This file is typically a symbolic link to a dynamic library, suitable for `dlopen`(). libxo looks for a symbol called `xo_encoder_library_init` inside that library and calls it with the arguments defined in the header file "xo_encoder.h". This function should look as follows:: int xo_encoder_library_init (XO_ENCODER_INIT_ARGS) { arg->xei_version = XO_ENCODER_VERSION; arg->xei_handler = test_handler; return 0; } Several features here allow for future compatibility: the macro XO_ENCODER_INIT_ARGS allows the arguments to this function change over time, and the XO_ENCODER_VERSION allows the library to tell libxo which version of the API it was compiled with. The function places in xei_handler should be have the signature:: static int test_handler (XO_ENCODER_HANDLER_ARGS) { ... This function will be called with the "op" codes defined in "xo_encoder.h". Each op code represents a distinct event in the libxo processing model. For example OP_OPEN_CONTAINER tells the encoder that a new container has been opened, and the encoder can behave in an appropriate manner. Index: projects/clang1000-import/contrib/libxo/doc/options.rst =================================================================== --- projects/clang1000-import/contrib/libxo/doc/options.rst (revision 357178) +++ projects/clang1000-import/contrib/libxo/doc/options.rst (revision 357179) @@ -1,164 +1,184 @@ .. index:: --libxo .. index:: Options .. _options: Command-line Arguments ====================== libxo uses command line options to trigger rendering behavior. There are multiple conventions for passing options, all using the "`--libxo`" option:: --libxo --libxo= --libxo: The *brief-options* is a series of single letter abbrevations, where the *options* is a comma-separated list of words. Both provide access to identical functionality. The following invocations are all identical in outcome:: my-app --libxo warn,pretty arg1 my-app --libxo=warn,pretty arg1 my-app --libxo:WP arg1 Programs using libxo are expecting to call the xo_parse_args function to parse these arguments. See :ref:`xo_parse_args` for details. Option Keywords --------------- Options is a comma-separated list of tokens that correspond to output styles, flags, or features: =============== ======================================================= Token Action =============== ======================================================= color Enable colors/effects for display styles (TEXT, HTML) colors=xxxx Adjust color output values dtrt Enable "Do The Right Thing" mode flush Flush after every libxo function call flush-line Flush after every line (line-buffered) html Emit HTML output indent=xx Set the indentation level info Add info attributes (HTML) json Emit JSON output keys Emit the key attribute for keys (XML) log-gettext Log (via stderr) each gettext(3) string lookup log-syslog Log (via stderr) each syslog message (via xo_syslog) no-humanize Ignore the {h:} modifier (TEXT, HTML) no-locale Do not initialize the locale setting no-retain Prevent retaining formatting information no-top Do not emit a top set of braces (JSON) not-first Pretend the 1st output item was not 1st (JSON) pretty Emit pretty-printed output retain Force retaining formatting information text Emit TEXT output underscores Replace XML-friendly "-"s with JSON friendly "_"s units Add the 'units' (XML) or 'data-units (HTML) attribute warn Emit warnings when libxo detects bad calls warn-xml Emit warnings in XML xml Emit XML output xpath Add XPath expressions (HTML) =============== ======================================================= Most of these option are simple and direct, but some require additional details: - "colors" is described in :ref:`color-mapping`. - "flush-line" performs line buffering, even when the output is not directed to a TTY device. - "info" generates additional data for HTML, encoded in attributes using names that state with "data-". - "keys" adds a "key" attribute for XML output to indicate that a leaf is an identifier for the list member. - "no-humanize" avoids "humanizing" numeric output (see :ref:`humanize-modifier` for details). - "no-locale" instructs libxo to avoid translating output to the current locale. - "no-retain" disables the ability of libxo to internally retain "compiled" information about formatting strings (see :ref:`retain` for details). - "underscores" can be used with JSON output to change XML-friendly names with dashes into JSON-friendly name with underscores. - "warn" allows libxo to emit warnings on stderr when application code make incorrect calls. - "warn-xml" causes those warnings to be placed in XML inside the output. Brief Options ------------- The brief options are simple single-letter aliases to the normal keywords, as detailed below: ======== ============================================= Option Action ======== ============================================= c Enable color/effects for TEXT/HTML F Force line-buffered flushing H Enable HTML output (XO_STYLE_HTML) I Enable info output (XOF_INFO) i Indent by J Enable JSON output (XO_STYLE_JSON) k Add keys to XPATH expressions in HTML n Disable humanization (TEXT, HTML) P Enable pretty-printed output (XOF_PRETTY) T Enable text output (XO_STYLE_TEXT) U Add units to HTML output u Change "-"s to "_"s in element names (JSON) W Enable warnings (XOF_WARN) X Enable XML output (XO_STYLE_XML) x Enable XPath data (XOF_XPATH) ======== ============================================= .. index:: Colors .. _color-mapping: Color Mapping ------------- The "colors" option takes a value that is a set of mappings from the pre-defined set of colors to new foreground and background colors. The value is a series of "fg/bg" values, separated by a "+". Each pair of "fg/bg" values gives the colors to which a basic color is mapped when used as a foreground or background color. The order is the mappings is: - black - red - green - yellow - blue - magenta - cyan - white Pairs may be skipped, leaving them mapped as normal, as are missing pairs or single colors. For example consider the following xo_emit call:: xo_emit("{C:fg-red,bg-green}Merry XMas!!{C:}\n"); To turn all colored output to red-on-blue, use eight pairs of "red/blue" mappings separated by plus signs ("+"):: --libxo colors=red/blue+red/blue+red/blue+red/blue+\ red/blue+red/blue+red/blue+red/blue To turn the red-on-green text to magenta-on-cyan, give a "magenta" foreground value for red (the second mapping) and a "cyan" background to green (the third mapping):: --libxo colors=+magenta+/cyan Consider the common situation where blue output looks unreadable on a terminal session with a black background. To turn both "blue" foreground and background output to "yellow", give only the fifth mapping, skipping the first four mappings with bare plus signs ("+"):: --libxo colors=++++yellow/yellow + +Encoders +-------- + +In addition to the four "built-in" formats, libxo supports an +extensible mechanism for adding encoders. These are activated +using the "encoder" keyword:: + + --libxo encoder=cbor + +The encoder can include encoder-specific options, separated by either +colons (":") or plus signs ("+"): + + --libxo encoder=csv+path=filesystem+leaf=name+no-header + --libxo encoder=csv:path=filesystem:leaf=name:no-header + +For brevity, the string "@" can be used in place of the string +"encoder=". + + df --libxo @csv:no-header Index: projects/clang1000-import/contrib/libxo/encoder/csv/enc_csv.c =================================================================== --- projects/clang1000-import/contrib/libxo/encoder/csv/enc_csv.c (revision 357178) +++ projects/clang1000-import/contrib/libxo/encoder/csv/enc_csv.c (revision 357179) @@ -1,826 +1,834 @@ /* * Copyright (c) 2015, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, August 2015 */ /* * CSV encoder generates comma-separated value files for specific * subsets of data. This is not (and cannot be) a generalized * facility, but for specific subsets of data, CSV data can be * reasonably generated. For example, the df XML content: * * procfs * 4 * 4 * 0 * 100 * /proc * * * could be represented as: * * #+name,total-blocks,used-blocks,available-blocks,used-percent,mounted-on * procfs,4,4,0,100,/proc * * Data is then constrained to be sibling leaf values. In addition, * singular leafs can also be matched. The costs include recording * the specific leaf names (to ensure consistency) and some * buffering. * * Some escaping is needed for CSV files, following the rules of RFC4180: * * - Fields containing a line-break, double-quote or commas should be * quoted. (If they are not, the file will likely be impossible to * process correctly). * - A (double) quote character in a field must be represented by two * (double) quote characters. * - Leading and trialing whitespace require fields be quoted. * - * Cheesy, but simple. The RFC also requires MS-DOS end-of-line, which - * we only do with the "dos" option. Strange that we still live in a - * DOS-friendly world, but then again, we make spaceships based on the - * horse butts (http://www.astrodigital.org/space/stshorse.html). + * Cheesy, but simple. The RFC also requires MS-DOS end-of-line, + * which we only do with the "dos" option. Strange that we still live + * in a DOS-friendly world, but then again, we make spaceships based + * on the horse butts (http://www.astrodigital.org/space/stshorse.html + * though the "built by English expatriates” bit is rubbish; better to + * say the first engines used in America were built by Englishmen.) */ #include #include #include #include #include #include #include #include "xo.h" #include "xo_encoder.h" #include "xo_buf.h" #ifndef UNUSED #define UNUSED __attribute__ ((__unused__)) #endif /* UNUSED */ /* * The CSV encoder has three moving parts: * * - The path holds the path we are matching against * - This is given as input via "options" and does not change * * - The stack holds the current names of the open elements * - The "open" operations push, while the "close" pop * - Turns out, at this point, the stack is unused, but I've * left "drippings" in the code because I see this as useful * for future features (under CSV_STACK_IS_NEEDED). * * - The leafs record the current set of leaf * - A key from the parent list counts as a leaf (unless CF_NO_KEYS) * - Once the path is matched, all other leafs at that level are leafs * - Leafs are recorded to get the header comment accurately recorded * - Once the first line is emited, the set of leafs _cannot_ change * * We use offsets into the buffers, since we know they can be * realloc'd out from under us, as the size increases. The 'path' * is fixed, we allocate it once, so it doesn't need offsets. */ typedef struct path_frame_s { char *pf_name; /* Path member name; points into c_path_buf */ uint32_t pf_flags; /* Flags for this path element (PFF_*) */ } path_frame_t; typedef struct stack_frame_s { ssize_t sf_off; /* Element name; offset in c_stack_buf */ uint32_t sf_flags; /* Flags for this frame (SFF_*) */ } stack_frame_t; /* Flags for sf_flags */ typedef struct leaf_s { ssize_t f_name; /* Name of leaf; offset in c_name_buf */ ssize_t f_value; /* Value of leaf; offset in c_value_buf */ uint32_t f_flags; /* Flags for this value (FF_*) */ #ifdef CSV_STACK_IS_NEEDED ssize_t f_depth; /* Depth of stack when leaf was recorded */ #endif /* CSV_STACK_IS_NEEDED */ } leaf_t; /* Flags for f_flags */ #define LF_KEY (1<<0) /* Leaf is a key */ #define LF_HAS_VALUE (1<<1) /* Value has been set */ typedef struct csv_private_s { uint32_t c_flags; /* Flags for this encoder */ /* The path for which we select leafs */ char *c_path_buf; /* Buffer containing path members */ path_frame_t *c_path; /* Array of path members */ ssize_t c_path_max; /* Depth of c_path[] */ ssize_t c_path_cur; /* Current depth in c_path[] */ /* A stack of open elements (xo_op_list, xo_op_container) */ #if CSV_STACK_IS_NEEDED xo_buffer_t c_stack_buf; /* Buffer used for stack content */ stack_frame_t *c_stack; /* Stack of open tags */ ssize_t c_stack_max; /* Maximum stack depth */ #endif /* CSV_STACK_IS_NEEDED */ ssize_t c_stack_depth; /* Current stack depth */ /* List of leafs we are emitting (to ensure consistency) */ xo_buffer_t c_name_buf; /* String buffer for leaf names */ xo_buffer_t c_value_buf; /* String buffer for leaf values */ leaf_t *c_leaf; /* List of leafs */ ssize_t c_leaf_depth; /* Current depth of c_leaf[] (next free) */ ssize_t c_leaf_max; /* Max depth of c_leaf[] */ xo_buffer_t c_data; /* Buffer for creating data */ } csv_private_t; #define C_STACK_MAX 32 /* default c_stack_max */ #define C_LEAF_MAX 32 /* default c_leaf_max */ /* Flags for this structure */ #define CF_HEADER_DONE (1<<0) /* Have already written the header */ #define CF_NO_HEADER (1<<1) /* Do not generate header */ #define CF_NO_KEYS (1<<2) /* Do not generate excess keys */ #define CF_VALUE_ONLY (1<<3) /* Only generate the value */ #define CF_DOS_NEWLINE (1<<4) /* Generate CR-NL, just like MS-DOS */ #define CF_LEAFS_DONE (1<<5) /* Leafs are already been recorded */ #define CF_NO_QUOTES (1<<6) /* Do not generate quotes */ #define CF_RECORD_DATA (1<<7) /* Record all sibling leafs */ #define CF_DEBUG (1<<8) /* Make debug output */ #define CF_HAS_PATH (1<<9) /* A "path" option was provided */ /* * A simple debugging print function, similar to psu_dbg. Controlled by * the undocumented "debug" option. */ static void csv_dbg (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED, const char *fmt, ...) { if (csv == NULL || !(csv->c_flags & CF_DEBUG)) return; va_list vap; va_start(vap, fmt); vfprintf(stderr, fmt, vap); va_end(vap); } /* * Create the private data for this handle, initialize it, and record * the pointer in the handle. */ static int csv_create (xo_handle_t *xop) { csv_private_t *csv = xo_realloc(NULL, sizeof(*csv)); if (csv == NULL) return -1; bzero(csv, sizeof(*csv)); xo_buf_init(&csv->c_data); xo_buf_init(&csv->c_name_buf); xo_buf_init(&csv->c_value_buf); #ifdef CSV_STACK_IS_NEEDED xo_buf_init(&csv->c_stack_buf); #endif /* CSV_STACK_IS_NEEDED */ xo_set_private(xop, csv); return 0; } /* * Clean up and release any data in use by this handle */ static void csv_destroy (xo_handle_t *xop UNUSED, csv_private_t *csv) { /* Clean up */ xo_buf_cleanup(&csv->c_data); xo_buf_cleanup(&csv->c_name_buf); xo_buf_cleanup(&csv->c_value_buf); #ifdef CSV_STACK_IS_NEEDED xo_buf_cleanup(&csv->c_stack_buf); #endif /* CSV_STACK_IS_NEEDED */ if (csv->c_leaf) xo_free(csv->c_leaf); if (csv->c_path_buf) xo_free(csv->c_path_buf); } /* * Return the element name at the top of the path stack. This is the * item that we are currently trying to match on. */ static const char * csv_path_top (csv_private_t *csv, ssize_t delta) { if (!(csv->c_flags & CF_HAS_PATH) || csv->c_path == NULL) return NULL; ssize_t cur = csv->c_path_cur + delta; if (cur < 0) return NULL; return csv->c_path[cur].pf_name; } /* * Underimplemented stack functionality */ static inline void csv_stack_push (csv_private_t *csv UNUSED, const char *name UNUSED) { #ifdef CSV_STACK_IS_NEEDED csv->c_stack_depth += 1; #endif /* CSV_STACK_IS_NEEDED */ } /* * Underimplemented stack functionality */ static inline void csv_stack_pop (csv_private_t *csv UNUSED, const char *name UNUSED) { #ifdef CSV_STACK_IS_NEEDED csv->c_stack_depth -= 1; #endif /* CSV_STACK_IS_NEEDED */ } /* Flags for csv_quote_flags */ #define QF_NEEDS_QUOTES (1<<0) /* Needs to be quoted */ #define QF_NEEDS_ESCAPE (1<<1) /* Needs to be escaped */ /* * Determine how much quote processing is needed. The details of the * quoting rules are given at the top of this file. We return a set * of flags, indicating what's needed. */ static uint32_t csv_quote_flags (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED, const char *value) { static const char quoted[] = "\n\r\","; static const char escaped[] = "\""; if (csv->c_flags & CF_NO_QUOTES) /* User doesn't want quotes */ return 0; size_t len = strlen(value); uint32_t rc = 0; if (strcspn(value, quoted) != len) rc |= QF_NEEDS_QUOTES; else if (isspace((int) value[0])) /* Leading whitespace */ rc |= QF_NEEDS_QUOTES; else if (isspace((int) value[len - 1])) /* Trailing whitespace */ rc |= QF_NEEDS_QUOTES; if (strcspn(value, escaped) != len) rc |= QF_NEEDS_ESCAPE; csv_dbg(xop, csv, "csv: quote flags [%s] -> %x (%zu/%zu)\n", value, rc, len, strcspn(value, quoted)); return rc; } /* * Escape the string, following the rules in RFC4180 */ static void csv_escape (xo_buffer_t *xbp, const char *value, size_t len) { const char *cp, *ep, *np; for (cp = value, ep = value + len; cp && cp < ep; cp = np) { np = strchr(cp, '"'); if (np) { np += 1; xo_buf_append(xbp, cp, np - cp); xo_buf_append(xbp, "\"", 1); } else xo_buf_append(xbp, cp, ep - cp); } } /* * Append a newline to the buffer, following the settings of the "dos" * flag. */ static void csv_append_newline (xo_buffer_t *xbp, csv_private_t *csv) { if (csv->c_flags & CF_DOS_NEWLINE) xo_buf_append(xbp, "\r\n", 2); else xo_buf_append(xbp, "\n", 1); } /* * Create a 'record' of 'fields' from our recorded leaf values. If * this is the first line and "no-header" isn't given, make a record * containing the leaf names. */ static void csv_emit_record (xo_handle_t *xop, csv_private_t *csv) { csv_dbg(xop, csv, "csv: emit: ...\n"); ssize_t fnum; uint32_t quote_flags; leaf_t *lp; /* If we have no data, then don't bother */ if (csv->c_leaf_depth == 0) return; if (!(csv->c_flags & (CF_HEADER_DONE | CF_NO_HEADER))) { csv->c_flags |= CF_HEADER_DONE; for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) { lp = &csv->c_leaf[fnum]; const char *name = xo_buf_data(&csv->c_name_buf, lp->f_name); if (fnum != 0) xo_buf_append(&csv->c_data, ",", 1); xo_buf_append(&csv->c_data, name, strlen(name)); } csv_append_newline(&csv->c_data, csv); } for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) { lp = &csv->c_leaf[fnum]; const char *value; if (lp->f_flags & LF_HAS_VALUE) { value = xo_buf_data(&csv->c_value_buf, lp->f_value); } else { value = ""; } quote_flags = csv_quote_flags(xop, csv, value); if (fnum != 0) xo_buf_append(&csv->c_data, ",", 1); if (quote_flags & QF_NEEDS_QUOTES) xo_buf_append(&csv->c_data, "\"", 1); if (quote_flags & QF_NEEDS_ESCAPE) csv_escape(&csv->c_data, value, strlen(value)); else xo_buf_append(&csv->c_data, value, strlen(value)); if (quote_flags & QF_NEEDS_QUOTES) xo_buf_append(&csv->c_data, "\"", 1); } csv_append_newline(&csv->c_data, csv); /* We flush if either flush flag is set */ if (xo_get_flags(xop) & (XOF_FLUSH | XOF_FLUSH_LINE)) xo_flush_h(xop); /* Clean out values from leafs */ for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) { lp = &csv->c_leaf[fnum]; lp->f_flags &= ~LF_HAS_VALUE; lp->f_value = 0; } xo_buf_reset(&csv->c_value_buf); /* * Once we emit the first line, our set of leafs is locked and * cannot be changed. */ csv->c_flags |= CF_LEAFS_DONE; } /* * Open a "level" of hierarchy, either a container or an instance. Look * for a match in the path=x/y/z hierarchy, and ignore if not a match. * If we're at the end of the path, start recording leaf values. */ static int csv_open_level (xo_handle_t *xop UNUSED, csv_private_t *csv, const char *name, int instance) { /* An new "open" event means we stop recording */ if (csv->c_flags & CF_RECORD_DATA) { csv->c_flags &= ~CF_RECORD_DATA; csv_emit_record(xop, csv); return 0; } const char *path_top = csv_path_top(csv, 0); /* If the top of the stack does not match the name, then ignore */ if (path_top == NULL) { if (instance && !(csv->c_flags & CF_HAS_PATH)) { csv_dbg(xop, csv, "csv: recording (no-path) ...\n"); csv->c_flags |= CF_RECORD_DATA; } } else if (xo_streq(path_top, name)) { csv->c_path_cur += 1; /* Advance to next path member */ csv_dbg(xop, csv, "csv: match: [%s] (%zd/%zd)\n", name, csv->c_path_cur, csv->c_path_max); /* If we're all the way thru the path members, start recording */ if (csv->c_path_cur == csv->c_path_max) { csv_dbg(xop, csv, "csv: recording ...\n"); csv->c_flags |= CF_RECORD_DATA; } } /* Push the name on the stack */ csv_stack_push(csv, name); return 0; } /* * Close a "level", either a container or an instance. */ static int csv_close_level (xo_handle_t *xop UNUSED, csv_private_t *csv, const char *name) { /* If we're recording, a close triggers an emit */ if (csv->c_flags & CF_RECORD_DATA) { csv->c_flags &= ~CF_RECORD_DATA; csv_emit_record(xop, csv); } const char *path_top = csv_path_top(csv, -1); csv_dbg(xop, csv, "csv: close: [%s] [%s] (%zd)\n", name, path_top ?: "", csv->c_path_cur); /* If the top of the stack does not match the name, then ignore */ if (path_top != NULL && xo_streq(path_top, name)) { csv->c_path_cur -= 1; return 0; } /* Pop the name off the stack */ csv_stack_pop(csv, name); return 0; } /* * Return the index of a given leaf in the c_leaf[] array, where we * record leaf values. If the leaf is new and we haven't stopped recording * leafs, then make a new slot for it and record the name. */ static int csv_leaf_num (xo_handle_t *xop UNUSED, csv_private_t *csv, const char *name, xo_xff_flags_t flags) { ssize_t fnum; leaf_t *lp; xo_buffer_t *xbp = &csv->c_name_buf; for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) { lp = &csv->c_leaf[fnum]; const char *fname = xo_buf_data(xbp, lp->f_name); if (xo_streq(fname, name)) return fnum; } /* If we're done with adding new leafs, then bail */ if (csv->c_flags & CF_LEAFS_DONE) return -1; /* This leaf does not exist yet, so we need to create it */ /* Start by checking if there's enough room */ if (csv->c_leaf_depth + 1 >= csv->c_leaf_max) { /* Out of room; realloc it */ ssize_t new_max = csv->c_leaf_max * 2; if (new_max == 0) new_max = C_LEAF_MAX; lp = xo_realloc(csv->c_leaf, new_max * sizeof(*lp)); if (lp == NULL) return -1; /* No luck; bail */ /* Zero out the new portion */ bzero(&lp[csv->c_leaf_max], csv->c_leaf_max * sizeof(*lp)); /* Update csv data */ csv->c_leaf = lp; csv->c_leaf_max = new_max; } lp = &csv->c_leaf[csv->c_leaf_depth++]; #ifdef CSV_STACK_IS_NEEDED lp->f_depth = csv->c_stack_depth; #endif /* CSV_STACK_IS_NEEDED */ lp->f_name = xo_buf_offset(xbp); char *cp = xo_buf_cur(xbp); xo_buf_append(xbp, name, strlen(name) + 1); if (flags & XFF_KEY) lp->f_flags |= LF_KEY; csv_dbg(xop, csv, "csv: leaf: name: %zd [%s] [%s] %x\n", fnum, name, cp, lp->f_flags); return fnum; } /* * Record a new value for a leaf */ static void csv_leaf_set (xo_handle_t *xop UNUSED, csv_private_t *csv, leaf_t *lp, const char *value) { xo_buffer_t *xbp = &csv->c_value_buf; lp->f_value = xo_buf_offset(xbp); lp->f_flags |= LF_HAS_VALUE; char *cp = xo_buf_cur(xbp); xo_buf_append(xbp, value, strlen(value) + 1); csv_dbg(xop, csv, "csv: leaf: value: [%s] [%s] %x\n", value, cp, lp->f_flags); } /* * Record the requested set of leaf names. The input should be a set * of leaf names, separated by periods. */ static int csv_record_leafs (xo_handle_t *xop, csv_private_t *csv, const char *leafs_raw) { char *cp, *ep, *np; ssize_t len = strlen(leafs_raw); char *leafs_buf = alloca(len + 1); memcpy(leafs_buf, leafs_raw, len + 1); /* Make local copy */ for (cp = leafs_buf, ep = leafs_buf + len; cp && cp < ep; cp = np) { np = strchr(cp, '.'); if (np) *np++ = '\0'; if (*cp == '\0') /* Skip empty names */ continue; csv_dbg(xop, csv, "adding leaf: [%s]\n", cp); csv_leaf_num(xop, csv, cp, 0); } /* * Since we've been told explicitly what leafs matter, ignore the rest */ csv->c_flags |= CF_LEAFS_DONE; return 0; } /* * Record the requested path elements. The input should be a set of * container or instances names, separated by slashes. */ static int csv_record_path (xo_handle_t *xop, csv_private_t *csv, const char *path_raw) { int count; char *cp, *ep, *np; ssize_t len = strlen(path_raw); char *path_buf = xo_realloc(NULL, len + 1); memcpy(path_buf, path_raw, len + 1); for (cp = path_buf, ep = path_buf + len, count = 2; cp && cp < ep; cp = np) { np = strchr(cp, '/'); if (np) { np += 1; count += 1; } } path_frame_t *path = xo_realloc(NULL, sizeof(path[0]) * count); if (path == NULL) { xo_failure(xop, "allocation failure for path '%s'", path_buf); return -1; } bzero(path, sizeof(path[0]) * count); for (count = 0, cp = path_buf; cp && cp < ep; cp = np) { path[count++].pf_name = cp; np = strchr(cp, '/'); if (np) *np++ = '\0'; csv_dbg(xop, csv, "path: [%s]\n", cp); } path[count].pf_name = NULL; if (csv->c_path) /* In case two paths are given */ xo_free(csv->c_path); if (csv->c_path_buf) /* In case two paths are given */ xo_free(csv->c_path_buf); csv->c_path_buf = path_buf; csv->c_path = path; csv->c_path_max = count; csv->c_path_cur = 0; return 0; } /* * Extract the option values. The format is: - * -libxo encoder=csv:kw=val+kw=val+kw=val,pretty,etc + * -libxo encoder=csv:kw=val:kw=val:kw=val,pretty + * -libxo encoder=csv+kw=val+kw=val+kw=val,pretty */ static int -csv_options (xo_handle_t *xop, csv_private_t *csv, const char *raw_opts) +csv_options (xo_handle_t *xop, csv_private_t *csv, + const char *raw_opts, char opts_char) { ssize_t len = strlen(raw_opts); char *options = alloca(len + 1); memcpy(options, raw_opts, len); options[len] = '\0'; char *cp, *ep, *np, *vp; for (cp = options, ep = options + len + 1; cp && cp < ep; cp = np) { - np = strchr(cp, '+'); + np = strchr(cp, opts_char); if (np) *np++ = '\0'; vp = strchr(cp, '='); if (vp) *vp++ = '\0'; if (xo_streq(cp, "path")) { /* Record the path */ if (vp != NULL && csv_record_path(xop, csv, vp)) return -1; csv->c_flags |= CF_HAS_PATH; /* Yup, we have an explicit path now */ } else if (xo_streq(cp, "leafs") || xo_streq(cp, "leaf") || xo_streq(cp, "leaves")) { /* Record the leafs */ if (vp != NULL && csv_record_leafs(xop, csv, vp)) return -1; } else if (xo_streq(cp, "no-keys")) { csv->c_flags |= CF_NO_KEYS; } else if (xo_streq(cp, "no-header")) { csv->c_flags |= CF_NO_HEADER; } else if (xo_streq(cp, "value-only")) { csv->c_flags |= CF_VALUE_ONLY; } else if (xo_streq(cp, "dos")) { csv->c_flags |= CF_DOS_NEWLINE; } else if (xo_streq(cp, "no-quotes")) { csv->c_flags |= CF_NO_QUOTES; } else if (xo_streq(cp, "debug")) { csv->c_flags |= CF_DEBUG; } else { xo_warn_hc(xop, -1, "unknown encoder option value: '%s'", cp); return -1; } } return 0; } /* * Handler for incoming data values. We just record each leaf name and * value. The values are emittd when the instance is closed. */ static int csv_data (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED, const char *name, const char *value, xo_xof_flags_t flags) { csv_dbg(xop, csv, "data: [%s]=[%s] %llx\n", name, value, (unsigned long long) flags); if (!(csv->c_flags & CF_RECORD_DATA)) return 0; /* Find the leaf number */ int fnum = csv_leaf_num(xop, csv, name, flags); if (fnum < 0) return 0; /* Don't bother recording */ leaf_t *lp = &csv->c_leaf[fnum]; csv_leaf_set(xop, csv, lp, value); return 0; } /* * The callback from libxo, passing us operations/events as they * happen. */ static int csv_handler (XO_ENCODER_HANDLER_ARGS) { int rc = 0; csv_private_t *csv = private; xo_buffer_t *xbp = csv ? &csv->c_data : NULL; csv_dbg(xop, csv, "op %s: [%s] [%s]\n", xo_encoder_op_name(op), name ?: "", value ?: ""); fflush(stdout); /* If we don't have private data, we're sunk */ if (csv == NULL && op != XO_OP_CREATE) return -1; switch (op) { case XO_OP_CREATE: /* Called when the handle is init'd */ rc = csv_create(xop); break; case XO_OP_OPTIONS: - rc = csv_options(xop, csv, value); + rc = csv_options(xop, csv, value, ':'); + break; + + case XO_OP_OPTIONS_PLUS: + rc = csv_options(xop, csv, value, '+'); break; case XO_OP_OPEN_LIST: case XO_OP_CLOSE_LIST: break; /* Ignore these ops */ case XO_OP_OPEN_CONTAINER: case XO_OP_OPEN_LEAF_LIST: rc = csv_open_level(xop, csv, name, 0); break; case XO_OP_OPEN_INSTANCE: rc = csv_open_level(xop, csv, name, 1); break; case XO_OP_CLOSE_CONTAINER: case XO_OP_CLOSE_LEAF_LIST: case XO_OP_CLOSE_INSTANCE: rc = csv_close_level(xop, csv, name); break; case XO_OP_STRING: /* Quoted UTF-8 string */ case XO_OP_CONTENT: /* Other content */ rc = csv_data(xop, csv, name, value, flags); break; case XO_OP_FINISH: /* Clean up function */ break; case XO_OP_FLUSH: /* Clean up function */ rc = write(1, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp); if (rc > 0) rc = 0; xo_buf_reset(xbp); break; case XO_OP_DESTROY: /* Clean up function */ csv_destroy(xop, csv); break; case XO_OP_ATTRIBUTE: /* Attribute name/value */ break; case XO_OP_VERSION: /* Version string */ break; } return rc; } /* * Callback when our encoder is loaded. */ int xo_encoder_library_init (XO_ENCODER_INIT_ARGS) { arg->xei_handler = csv_handler; arg->xei_version = XO_ENCODER_VERSION; return 0; } Index: projects/clang1000-import/contrib/libxo/libxo/libxo.c =================================================================== --- projects/clang1000-import/contrib/libxo/libxo/libxo.c (revision 357178) +++ projects/clang1000-import/contrib/libxo/libxo/libxo.c (revision 357179) @@ -1,8465 +1,8518 @@ /* * Copyright (c) 2014-2019, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, July 2014 * * This is the implementation of libxo, the formatting library that * generates multiple styles of output from a single code path. * Command line utilities can have their normal text output while * automation tools can see XML or JSON output, and web tools can use * HTML output that encodes the text output annotated with additional * information. Specialized encoders can be built that allow custom * encoding including binary ones like CBOR, thrift, protobufs, etc. * * Full documentation is available in ./doc/libxo.txt or online at: * http://juniper.github.io/libxo/libxo-manual.html * * For first time readers, the core bits of code to start looking at are: * - xo_do_emit() -- parse and emit a set of fields * - xo_do_emit_fields -- the central function of the library * - xo_do_format_field() -- handles formatting a single field * - xo_transiton() -- the state machine that keeps things sane * and of course the "xo_handle_t" data structure, which carries all * configuration and state. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xo_config.h" #include "xo.h" #include "xo_encoder.h" #include "xo_buf.h" #include "xo_explicit.h" /* * We ask wcwidth() to do an impossible job, really. It's supposed to * need to tell us the number of columns consumed to display a unicode * character. It returns that number without any sort of context, but * we know they are characters whose glyph differs based on placement * (end of word, middle of word, etc) and many that affect characters * previously emitted. Without content, it can't hope to tell us. * But it's the only standard tool we've got, so we use it. We would * use wcswidth() but it typically just loops through adding the results * of wcwidth() calls in an entirely unhelpful way. * * Even then, there are many poor implementations (macosx), so we have * to carry our own. We could have configure.ac test this (with * something like 'assert(wcwidth(0x200d) == 0)'), but it would have * to run a binary, which breaks cross-compilation. Hmm... I could * run this test at init time and make a warning for our dear user. * * Anyhow, it remains a best-effort sort of thing. And it's all made * more hopeless because we assume the display code doing the rendering is * playing by the same rules we are. If it display 0x200d as a square * box or a funky question mark, the output will be hosed. */ #ifdef LIBXO_WCWIDTH #include "xo_wcwidth.h" #else /* LIBXO_WCWIDTH */ #define xo_wcwidth(_x) wcwidth(_x) #endif /* LIBXO_WCWIDTH */ #ifdef HAVE_STDIO_EXT_H #include #endif /* HAVE_STDIO_EXT_H */ /* * humanize_number is a great function, unless you don't have it. So * we carry one in our pocket. */ #ifdef HAVE_HUMANIZE_NUMBER #include #define xo_humanize_number humanize_number #else /* HAVE_HUMANIZE_NUMBER */ #include "xo_humanize.h" #endif /* HAVE_HUMANIZE_NUMBER */ #ifdef HAVE_GETTEXT #include #endif /* HAVE_GETTEXT */ /* Rather lame that we can't count on these... */ #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif /* * Three styles of specifying thread-local variables are supported. * configure.ac has the brains to run each possibility through the * compiler and see what works; we are left to define the THREAD_LOCAL * macro to the right value. Most toolchains (clang, gcc) use * "before", but some (borland) use "after" and I've heard of some * (ms) that use __declspec. Any others out there? */ #define THREAD_LOCAL_before 1 #define THREAD_LOCAL_after 2 #define THREAD_LOCAL_declspec 3 #ifndef HAVE_THREAD_LOCAL #define THREAD_LOCAL(_x) _x #elif HAVE_THREAD_LOCAL == THREAD_LOCAL_before #define THREAD_LOCAL(_x) __thread _x #elif HAVE_THREAD_LOCAL == THREAD_LOCAL_after #define THREAD_LOCAL(_x) _x __thread #elif HAVE_THREAD_LOCAL == THREAD_LOCAL_declspec #define THREAD_LOCAL(_x) __declspec(_x) #else #error unknown thread-local setting #endif /* HAVE_THREADS_H */ const char xo_version[] = LIBXO_VERSION; const char xo_version_extra[] = LIBXO_VERSION_EXTRA; static const char xo_default_format[] = "%s"; #ifndef UNUSED #define UNUSED __attribute__ ((__unused__)) #endif /* UNUSED */ #define XO_INDENT_BY 2 /* Amount to indent when pretty printing */ #define XO_DEPTH 128 /* Default stack depth */ #define XO_MAX_ANCHOR_WIDTH (8*1024) /* Anything wider is just silly */ #define XO_FAILURE_NAME "failure" /* Flags for the stack frame */ typedef unsigned xo_xsf_flags_t; /* XSF_* flags */ #define XSF_NOT_FIRST (1<<0) /* Not the first element */ #define XSF_LIST (1<<1) /* Frame is a list */ #define XSF_INSTANCE (1<<2) /* Frame is an instance */ #define XSF_DTRT (1<<3) /* Save the name for DTRT mode */ #define XSF_CONTENT (1<<4) /* Some content has been emitted */ #define XSF_EMIT (1<<5) /* Some field has been emitted */ #define XSF_EMIT_KEY (1<<6) /* A key has been emitted */ #define XSF_EMIT_LEAF_LIST (1<<7) /* A leaf-list field has been emitted */ /* These are the flags we propagate between markers and their parents */ #define XSF_MARKER_FLAGS \ (XSF_NOT_FIRST | XSF_CONTENT | XSF_EMIT | XSF_EMIT_KEY | XSF_EMIT_LEAF_LIST ) /* * Turn the transition between two states into a number suitable for * a "switch" statement. */ #define XSS_TRANSITION(_old, _new) ((_old) << 8 | (_new)) /* * xo_stack_t: As we open and close containers and levels, we * create a stack of frames to track them. This is needed for * XOF_WARN and XOF_XPATH. */ typedef struct xo_stack_s { xo_xsf_flags_t xs_flags; /* Flags for this frame */ xo_state_t xs_state; /* State for this stack frame */ char *xs_name; /* Name (for XPath value) */ char *xs_keys; /* XPath predicate for any key fields */ } xo_stack_t; /* * libxo supports colors and effects, for those who like them. * XO_COL_* ("colors") refers to fancy ansi codes, while X__EFF_* * ("effects") are bits since we need to maintain state. */ typedef uint8_t xo_color_t; #define XO_COL_DEFAULT 0 #define XO_COL_BLACK 1 #define XO_COL_RED 2 #define XO_COL_GREEN 3 #define XO_COL_YELLOW 4 #define XO_COL_BLUE 5 #define XO_COL_MAGENTA 6 #define XO_COL_CYAN 7 #define XO_COL_WHITE 8 #define XO_NUM_COLORS 9 /* * Yes, there's no blink. We're civilized. We like users. Blink * isn't something one does to someone you like. Friends don't let * friends use blink. On friends. You know what I mean. Blink is * like, well, it's like bursting into show tunes at a funeral. It's * just not done. Not something anyone wants. And on those rare * instances where it might actually be appropriate, it's still wrong, * since it's likely done by the wrong person for the wrong reason. * Just like blink. And if I implemented blink, I'd be like a funeral * director who adds "Would you like us to burst into show tunes?" on * the list of questions asked while making funeral arrangements. * It's formalizing wrongness in the wrong way. And we're just too * civilized to do that. Hhhmph! */ #define XO_EFF_RESET (1<<0) #define XO_EFF_NORMAL (1<<1) #define XO_EFF_BOLD (1<<2) #define XO_EFF_UNDERLINE (1<<3) #define XO_EFF_INVERSE (1<<4) #define XO_EFF_CLEAR_BITS XO_EFF_RESET /* Reset gets reset, surprisingly */ typedef uint8_t xo_effect_t; typedef struct xo_colors_s { xo_effect_t xoc_effects; /* Current effect set */ xo_color_t xoc_col_fg; /* Foreground color */ xo_color_t xoc_col_bg; /* Background color */ } xo_colors_t; /* * xo_handle_t: this is the principle data structure for libxo. * It's used as a store for state, options, content, and all manor * of other information. */ struct xo_handle_s { xo_xof_flags_t xo_flags; /* Flags (XOF_*) from the user*/ xo_xof_flags_t xo_iflags; /* Internal flags (XOIF_*) */ xo_style_t xo_style; /* XO_STYLE_* value */ unsigned short xo_indent; /* Indent level (if pretty) */ unsigned short xo_indent_by; /* Indent amount (tab stop) */ xo_write_func_t xo_write; /* Write callback */ xo_close_func_t xo_close; /* Close callback */ xo_flush_func_t xo_flush; /* Flush callback */ xo_formatter_t xo_formatter; /* Custom formating function */ xo_checkpointer_t xo_checkpointer; /* Custom formating support function */ void *xo_opaque; /* Opaque data for write function */ xo_buffer_t xo_data; /* Output data */ xo_buffer_t xo_fmt; /* Work area for building format strings */ xo_buffer_t xo_attrs; /* Work area for building XML attributes */ xo_buffer_t xo_predicate; /* Work area for building XPath predicates */ xo_stack_t *xo_stack; /* Stack pointer */ int xo_depth; /* Depth of stack */ int xo_stack_size; /* Size of the stack */ xo_info_t *xo_info; /* Info fields for all elements */ int xo_info_count; /* Number of info entries */ va_list xo_vap; /* Variable arguments (stdargs) */ char *xo_leading_xpath; /* A leading XPath expression */ mbstate_t xo_mbstate; /* Multi-byte character conversion state */ ssize_t xo_anchor_offset; /* Start of anchored text */ ssize_t xo_anchor_columns; /* Number of columns since the start anchor */ ssize_t xo_anchor_min_width; /* Desired width of anchored text */ ssize_t xo_units_offset; /* Start of units insertion point */ ssize_t xo_columns; /* Columns emitted during this xo_emit call */ #ifndef LIBXO_TEXT_ONLY xo_color_t xo_color_map_fg[XO_NUM_COLORS]; /* Foreground color mappings */ xo_color_t xo_color_map_bg[XO_NUM_COLORS]; /* Background color mappings */ #endif /* LIBXO_TEXT_ONLY */ xo_colors_t xo_colors; /* Current color and effect values */ xo_buffer_t xo_color_buf; /* HTML: buffer of colors and effects */ char *xo_version; /* Version string */ int xo_errno; /* Saved errno for "%m" */ char *xo_gt_domain; /* Gettext domain, suitable for dgettext(3) */ xo_encoder_func_t xo_encoder; /* Encoding function */ void *xo_private; /* Private data for external encoders */ }; /* Flag operations */ #define XOF_BIT_ISSET(_flag, _bit) (((_flag) & (_bit)) ? 1 : 0) #define XOF_BIT_SET(_flag, _bit) do { (_flag) |= (_bit); } while (0) #define XOF_BIT_CLEAR(_flag, _bit) do { (_flag) &= ~(_bit); } while (0) #define XOF_ISSET(_xop, _bit) XOF_BIT_ISSET(_xop->xo_flags, _bit) #define XOF_SET(_xop, _bit) XOF_BIT_SET(_xop->xo_flags, _bit) #define XOF_CLEAR(_xop, _bit) XOF_BIT_CLEAR(_xop->xo_flags, _bit) #define XOIF_ISSET(_xop, _bit) XOF_BIT_ISSET(_xop->xo_iflags, _bit) #define XOIF_SET(_xop, _bit) XOF_BIT_SET(_xop->xo_iflags, _bit) #define XOIF_CLEAR(_xop, _bit) XOF_BIT_CLEAR(_xop->xo_iflags, _bit) /* Internal flags */ #define XOIF_REORDER XOF_BIT(0) /* Reordering fields; record field info */ #define XOIF_DIV_OPEN XOF_BIT(1) /* A
is open */ #define XOIF_TOP_EMITTED XOF_BIT(2) /* The top JSON braces have been emitted */ #define XOIF_ANCHOR XOF_BIT(3) /* An anchor is in place */ #define XOIF_UNITS_PENDING XOF_BIT(4) /* We have a units-insertion pending */ #define XOIF_INIT_IN_PROGRESS XOF_BIT(5) /* Init of handle is in progress */ #define XOIF_MADE_OUTPUT XOF_BIT(6) /* Have already made output */ /* * Normal printf has width and precision, which for strings operate as * min and max number of columns. But this depends on the idea that * one byte means one column, which UTF-8 and multi-byte characters * pitches on its ear. It may take 40 bytes of data to populate 14 * columns, but we can't go off looking at 40 bytes of data without the * caller's permission for fear/knowledge that we'll generate core files. * * So we make three values, distinguishing between "max column" and * "number of bytes that we will inspect inspect safely" We call the * later "size", and make the format "%[[].[[].]]s". * * Under the "first do no harm" theory, we default "max" to "size". * This is a reasonable assumption for folks that don't grok the * MBS/WCS/UTF-8 world, and while it will be annoying, it will never * be evil. * * For example, xo_emit("{:tag/%-14.14s}", buf) will make 14 * columns of output, but will never look at more than 14 bytes of the * input buffer. This is mostly compatible with printf and caller's * expectations. * * In contrast xo_emit("{:tag/%-14..14s}", buf) will look at however * many bytes (or until a NUL is seen) are needed to fill 14 columns * of output. xo_emit("{:tag/%-14.*.14s}", xx, buf) will look at up * to xx bytes (or until a NUL is seen) in order to fill 14 columns * of output. * * It's fairly amazing how a good idea (handle all languages of the * world) blows such a big hole in the bottom of the fairly weak boat * that is C string handling. The simplicity and completenesss are * sunk in ways we haven't even begun to understand. */ #define XF_WIDTH_MIN 0 /* Minimal width */ #define XF_WIDTH_SIZE 1 /* Maximum number of bytes to examine */ #define XF_WIDTH_MAX 2 /* Maximum width */ #define XF_WIDTH_NUM 3 /* Numeric fields in printf (min.size.max) */ /* Input and output string encodings */ #define XF_ENC_WIDE 1 /* Wide characters (wchar_t) */ #define XF_ENC_UTF8 2 /* UTF-8 */ #define XF_ENC_LOCALE 3 /* Current locale */ /* * A place to parse printf-style format flags for each field */ typedef struct xo_format_s { unsigned char xf_fc; /* Format character */ unsigned char xf_enc; /* Encoding of the string (XF_ENC_*) */ unsigned char xf_skip; /* Skip this field */ unsigned char xf_lflag; /* 'l' (long) */ unsigned char xf_hflag;; /* 'h' (half) */ unsigned char xf_jflag; /* 'j' (intmax_t) */ unsigned char xf_tflag; /* 't' (ptrdiff_t) */ unsigned char xf_zflag; /* 'z' (size_t) */ unsigned char xf_qflag; /* 'q' (quad_t) */ unsigned char xf_seen_minus; /* Seen a minus */ int xf_leading_zero; /* Seen a leading zero (zero fill) */ unsigned xf_dots; /* Seen one or more '.'s */ int xf_width[XF_WIDTH_NUM]; /* Width/precision/size numeric fields */ unsigned xf_stars; /* Seen one or more '*'s */ unsigned char xf_star[XF_WIDTH_NUM]; /* Seen one or more '*'s */ } xo_format_t; /* * This structure represents the parsed field information, suitable for * processing by xo_do_emit and anything else that needs to parse fields. * Note that all pointers point to the main format string. * * XXX This is a first step toward compilable or cachable format * strings. We can also cache the results of dgettext when no format * is used, assuming the 'p' modifier has _not_ been set. */ typedef struct xo_field_info_s { xo_xff_flags_t xfi_flags; /* Flags for this field */ unsigned xfi_ftype; /* Field type, as character (e.g. 'V') */ const char *xfi_start; /* Start of field in the format string */ const char *xfi_content; /* Field's content */ const char *xfi_format; /* Field's Format */ const char *xfi_encoding; /* Field's encoding format */ const char *xfi_next; /* Next character in format string */ ssize_t xfi_len; /* Length of field */ ssize_t xfi_clen; /* Content length */ ssize_t xfi_flen; /* Format length */ ssize_t xfi_elen; /* Encoding length */ unsigned xfi_fnum; /* Field number (if used; 0 otherwise) */ unsigned xfi_renum; /* Reordered number (0 == no renumbering) */ } xo_field_info_t; /* * We keep a 'default' handle to allow callers to avoid having to * allocate one. Passing NULL to any of our functions will use * this default handle. Most functions have a variant that doesn't * require a handle at all, since most output is to stdout, which * the default handle handles handily. */ static THREAD_LOCAL(xo_handle_t) xo_default_handle; static THREAD_LOCAL(int) xo_default_inited; static int xo_locale_inited; static const char *xo_program; /* * To allow libxo to be used in diverse environment, we allow the * caller to give callbacks for memory allocation. */ xo_realloc_func_t xo_realloc = realloc; xo_free_func_t xo_free = free; /* Forward declarations */ static ssize_t xo_transition (xo_handle_t *xop, xo_xof_flags_t flags, const char *name, xo_state_t new_state); static int xo_set_options_simple (xo_handle_t *xop, const char *input); static int xo_color_find (const char *str); static void xo_buf_append_div (xo_handle_t *xop, const char *class, xo_xff_flags_t flags, const char *name, ssize_t nlen, const char *value, ssize_t vlen, const char *fmt, ssize_t flen, const char *encoding, ssize_t elen); static void xo_anchor_clear (xo_handle_t *xop); /* * xo_style is used to retrieve the current style. When we're built * for "text only" mode, we use this function to drive the removal * of most of the code in libxo. We return a constant and the compiler * happily removes the non-text code that is not longer executed. This * trims our code nicely without needing to trampel perfectly readable * code with ifdefs. */ static inline xo_style_t xo_style (xo_handle_t *xop UNUSED) { #ifdef LIBXO_TEXT_ONLY return XO_STYLE_TEXT; #else /* LIBXO_TEXT_ONLY */ return xop->xo_style; #endif /* LIBXO_TEXT_ONLY */ } /* * Allow the compiler to optimize out non-text-only code while * still compiling it. */ static inline int xo_text_only (void) { #ifdef LIBXO_TEXT_ONLY return TRUE; #else /* LIBXO_TEXT_ONLY */ return FALSE; #endif /* LIBXO_TEXT_ONLY */ } /* * Callback to write data to a FILE pointer */ static xo_ssize_t xo_write_to_file (void *opaque, const char *data) { FILE *fp = (FILE *) opaque; return fprintf(fp, "%s", data); } /* * Callback to close a file */ static void xo_close_file (void *opaque) { FILE *fp = (FILE *) opaque; fclose(fp); } /* * Callback to flush a FILE pointer */ static int xo_flush_file (void *opaque) { FILE *fp = (FILE *) opaque; return fflush(fp); } /* * Use a rotating stock of buffers to make a printable string */ #define XO_NUMBUFS 8 #define XO_SMBUFSZ 128 static const char * xo_printable (const char *str) { static THREAD_LOCAL(char) bufset[XO_NUMBUFS][XO_SMBUFSZ]; static THREAD_LOCAL(int) bufnum = 0; if (str == NULL) return ""; if (++bufnum == XO_NUMBUFS) bufnum = 0; char *res = bufset[bufnum], *cp, *ep; for (cp = res, ep = res + XO_SMBUFSZ - 1; *str && cp < ep; cp++, str++) { if (*str == '\n') { *cp++ = '\\'; *cp = 'n'; } else if (*str == '\r') { *cp++ = '\\'; *cp = 'r'; } else if (*str == '\"') { *cp++ = '\\'; *cp = '"'; } else *cp = *str; } *cp = '\0'; return res; } static int xo_depth_check (xo_handle_t *xop, int depth) { xo_stack_t *xsp; if (depth >= xop->xo_stack_size) { depth += XO_DEPTH; /* Extra room */ xsp = xo_realloc(xop->xo_stack, sizeof(xop->xo_stack[0]) * depth); if (xsp == NULL) { xo_failure(xop, "xo_depth_check: out of memory (%d)", depth); return -1; } int count = depth - xop->xo_stack_size; bzero(xsp + xop->xo_stack_size, count * sizeof(*xsp)); xop->xo_stack_size = depth; xop->xo_stack = xsp; } return 0; } void xo_no_setlocale (void) { xo_locale_inited = 1; /* Skip initialization */ } /* * For XML, the first character of a tag cannot be numeric, but people * will likely not notice. So we people-proof them by forcing a leading * underscore if they use invalid tags. Note that this doesn't cover * all broken tags, just this fairly specific case. */ static const char * xo_xml_leader_len (xo_handle_t *xop, const char *name, xo_ssize_t nlen) { if (name == NULL || isalpha(name[0]) || name[0] == '_') return ""; xo_failure(xop, "invalid XML tag name: '%.*s'", nlen, name); return "_"; } static const char * xo_xml_leader (xo_handle_t *xop, const char *name) { return xo_xml_leader_len(xop, name, strlen(name)); } /* * We need to decide if stdout is line buffered (_IOLBF). Lacking a * standard way to decide this (e.g. getlinebuf()), we have configure * look to find __flbf, which glibc supported. If not, we'll rely on * isatty, with the assumption that terminals are the only thing * that's line buffered. We _could_ test for "steam._flags & _IOLBF", * which is all __flbf does, but that's even tackier. Like a * bedazzled Elvis outfit on an ugly lap dog sort of tacky. Not * something we're willing to do. */ static int xo_is_line_buffered (FILE *stream) { #if HAVE___FLBF if (__flbf(stream)) return 1; #else /* HAVE___FLBF */ if (isatty(fileno(stream))) return 1; #endif /* HAVE___FLBF */ return 0; } /* * Initialize an xo_handle_t, using both static defaults and * the global settings from the LIBXO_OPTIONS environment * variable. */ static void xo_init_handle (xo_handle_t *xop) { xop->xo_opaque = stdout; xop->xo_write = xo_write_to_file; xop->xo_flush = xo_flush_file; if (xo_is_line_buffered(stdout)) XOF_SET(xop, XOF_FLUSH_LINE); /* * We need to initialize the locale, which isn't really pretty. * Libraries should depend on their caller to set up the * environment. But we really can't count on the caller to do * this, because well, they won't. Trust me. */ if (!xo_locale_inited) { xo_locale_inited = 1; /* Only do this once */ #ifdef __FreeBSD__ /* Who does The Right Thing */ const char *cp = ""; #else /* __FreeBSD__ */ const char *cp = getenv("LC_ALL"); if (cp == NULL) cp = getenv("LC_CTYPE"); if (cp == NULL) cp = getenv("LANG"); if (cp == NULL) cp = "C"; /* Default for C programs */ #endif /* __FreeBSD__ */ (void) setlocale(LC_CTYPE, cp); } /* * Initialize only the xo_buffers we know we'll need; the others * can be allocated as needed. */ xo_buf_init(&xop->xo_data); xo_buf_init(&xop->xo_fmt); if (XOIF_ISSET(xop, XOIF_INIT_IN_PROGRESS)) return; XOIF_SET(xop, XOIF_INIT_IN_PROGRESS); xop->xo_indent_by = XO_INDENT_BY; xo_depth_check(xop, XO_DEPTH); XOIF_CLEAR(xop, XOIF_INIT_IN_PROGRESS); } /* * Initialize the default handle. */ static void xo_default_init (void) { xo_handle_t *xop = &xo_default_handle; xo_init_handle(xop); #if !defined(NO_LIBXO_OPTIONS) if (!XOF_ISSET(xop, XOF_NO_ENV)) { char *env = getenv("LIBXO_OPTIONS"); if (env) xo_set_options_simple(xop, env); } #endif /* NO_LIBXO_OPTIONS */ xo_default_inited = 1; } /* * Cheap convenience function to return either the argument, or * the internal handle, after it has been initialized. The usage * is: * xop = xo_default(xop); */ static xo_handle_t * xo_default (xo_handle_t *xop) { if (xop == NULL) { if (xo_default_inited == 0) xo_default_init(); xop = &xo_default_handle; } return xop; } /* * Return the number of spaces we should be indenting. If * we are pretty-printing, this is indent * indent_by. */ static int xo_indent (xo_handle_t *xop) { int rc = 0; xop = xo_default(xop); if (XOF_ISSET(xop, XOF_PRETTY)) { rc = xop->xo_indent * xop->xo_indent_by; if (XOIF_ISSET(xop, XOIF_TOP_EMITTED)) rc += xop->xo_indent_by; } return (rc > 0) ? rc : 0; } static void xo_buf_indent (xo_handle_t *xop, int indent) { xo_buffer_t *xbp = &xop->xo_data; if (indent <= 0) indent = xo_indent(xop); if (!xo_buf_has_room(xbp, indent)) return; memset(xbp->xb_curp, ' ', indent); xbp->xb_curp += indent; } static char xo_xml_amp[] = "&"; static char xo_xml_lt[] = "<"; static char xo_xml_gt[] = ">"; static char xo_xml_quot[] = """; static ssize_t xo_escape_xml (xo_buffer_t *xbp, ssize_t len, xo_xff_flags_t flags) { ssize_t slen; ssize_t delta = 0; char *cp, *ep, *ip; const char *sp; int attr = XOF_BIT_ISSET(flags, XFF_ATTR); for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) { /* We're subtracting 2: 1 for the NUL, 1 for the char we replace */ if (*cp == '<') delta += sizeof(xo_xml_lt) - 2; else if (*cp == '>') delta += sizeof(xo_xml_gt) - 2; else if (*cp == '&') delta += sizeof(xo_xml_amp) - 2; else if (attr && *cp == '"') delta += sizeof(xo_xml_quot) - 2; } if (delta == 0) /* Nothing to escape; bail */ return len; if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */ return 0; ep = xbp->xb_curp; cp = ep + len; ip = cp + delta; do { cp -= 1; ip -= 1; if (*cp == '<') sp = xo_xml_lt; else if (*cp == '>') sp = xo_xml_gt; else if (*cp == '&') sp = xo_xml_amp; else if (attr && *cp == '"') sp = xo_xml_quot; else { *ip = *cp; continue; } slen = strlen(sp); ip -= slen - 1; memcpy(ip, sp, slen); } while (cp > ep && cp != ip); return len + delta; } static ssize_t xo_escape_json (xo_buffer_t *xbp, ssize_t len, xo_xff_flags_t flags UNUSED) { ssize_t delta = 0; char *cp, *ep, *ip; for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) { if (*cp == '\\' || *cp == '"') delta += 1; else if (*cp == '\n' || *cp == '\r') delta += 1; } if (delta == 0) /* Nothing to escape; bail */ return len; if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */ return 0; ep = xbp->xb_curp; cp = ep + len; ip = cp + delta; do { cp -= 1; ip -= 1; if (*cp == '\\' || *cp == '"') { *ip-- = *cp; *ip = '\\'; } else if (*cp == '\n') { *ip-- = 'n'; *ip = '\\'; } else if (*cp == '\r') { *ip-- = 'r'; *ip = '\\'; } else { *ip = *cp; } } while (cp > ep && cp != ip); return len + delta; } /* * PARAM-VALUE = UTF-8-STRING ; characters '"', '\' and * ; ']' MUST be escaped. */ static ssize_t xo_escape_sdparams (xo_buffer_t *xbp, ssize_t len, xo_xff_flags_t flags UNUSED) { ssize_t delta = 0; char *cp, *ep, *ip; for (cp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) { if (*cp == '\\' || *cp == '"' || *cp == ']') delta += 1; } if (delta == 0) /* Nothing to escape; bail */ return len; if (!xo_buf_has_room(xbp, delta)) /* No room; bail, but don't append */ return 0; ep = xbp->xb_curp; cp = ep + len; ip = cp + delta; do { cp -= 1; ip -= 1; if (*cp == '\\' || *cp == '"' || *cp == ']') { *ip-- = *cp; *ip = '\\'; } else { *ip = *cp; } } while (cp > ep && cp != ip); return len + delta; } static void xo_buf_escape (xo_handle_t *xop, xo_buffer_t *xbp, const char *str, ssize_t len, xo_xff_flags_t flags) { if (!xo_buf_has_room(xbp, len)) return; memcpy(xbp->xb_curp, str, len); switch (xo_style(xop)) { case XO_STYLE_XML: case XO_STYLE_HTML: len = xo_escape_xml(xbp, len, flags); break; case XO_STYLE_JSON: len = xo_escape_json(xbp, len, flags); break; case XO_STYLE_SDPARAMS: len = xo_escape_sdparams(xbp, len, flags); break; } xbp->xb_curp += len; } /* * Write the current contents of the data buffer using the handle's * xo_write function. */ static ssize_t xo_write (xo_handle_t *xop) { ssize_t rc = 0; xo_buffer_t *xbp = &xop->xo_data; if (xbp->xb_curp != xbp->xb_bufp) { xo_buf_append(xbp, "", 1); /* Append ending NUL */ xo_anchor_clear(xop); if (xop->xo_write) rc = xop->xo_write(xop->xo_opaque, xbp->xb_bufp); xbp->xb_curp = xbp->xb_bufp; } /* Turn off the flags that don't survive across writes */ XOIF_CLEAR(xop, XOIF_UNITS_PENDING); return rc; } /* * Format arguments into our buffer. If a custom formatter has been set, * we use that to do the work; otherwise we vsnprintf(). */ static ssize_t xo_vsnprintf (xo_handle_t *xop, xo_buffer_t *xbp, const char *fmt, va_list vap) { va_list va_local; ssize_t rc; ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); va_copy(va_local, vap); if (xop->xo_formatter) rc = xop->xo_formatter(xop, xbp->xb_curp, left, fmt, va_local); else rc = vsnprintf(xbp->xb_curp, left, fmt, va_local); if (rc >= left) { if (!xo_buf_has_room(xbp, rc)) { va_end(va_local); return -1; } /* * After we call vsnprintf(), the stage of vap is not defined. * We need to copy it before we pass. Then we have to do our * own logic below to move it along. This is because the * implementation can have va_list be a pointer (bsd) or a * structure (macosx) or anything in between. */ va_end(va_local); /* Reset vap to the start */ va_copy(va_local, vap); left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); if (xop->xo_formatter) rc = xop->xo_formatter(xop, xbp->xb_curp, left, fmt, va_local); else rc = vsnprintf(xbp->xb_curp, left, fmt, va_local); } va_end(va_local); return rc; } /* * Print some data through the handle. */ static ssize_t xo_printf_v (xo_handle_t *xop, const char *fmt, va_list vap) { xo_buffer_t *xbp = &xop->xo_data; ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); ssize_t rc; va_list va_local; va_copy(va_local, vap); rc = vsnprintf(xbp->xb_curp, left, fmt, va_local); if (rc >= left) { if (!xo_buf_has_room(xbp, rc)) { va_end(va_local); return -1; } va_end(va_local); /* Reset vap to the start */ va_copy(va_local, vap); left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); rc = vsnprintf(xbp->xb_curp, left, fmt, va_local); } va_end(va_local); if (rc > 0) xbp->xb_curp += rc; return rc; } static ssize_t xo_printf (xo_handle_t *xop, const char *fmt, ...) { ssize_t rc; va_list vap; va_start(vap, fmt); rc = xo_printf_v(xop, fmt, vap); va_end(vap); return rc; } /* * These next few function are make The Essential UTF-8 Ginsu Knife. * Identify an input and output character, and convert it. */ static uint8_t xo_utf8_data_bits[5] = { 0, 0x7f, 0x1f, 0x0f, 0x07 }; static uint8_t xo_utf8_len_bits[5] = { 0, 0x00, 0xc0, 0xe0, 0xf0 }; /* * If the byte has a high-bit set, it's UTF-8, not ASCII. */ static int xo_is_utf8 (char ch) { return (ch & 0x80); } /* * Look at the high bits of the first byte to determine the length * of the UTF-8 character. */ static inline ssize_t xo_utf8_to_wc_len (const char *buf) { uint8_t bval = (uint8_t) *buf; ssize_t len; if ((bval & 0x80) == 0x0) len = 1; else if ((bval & 0xe0) == 0xc0) len = 2; else if ((bval & 0xf0) == 0xe0) len = 3; else if ((bval & 0xf8) == 0xf0) len = 4; else len = -1; return len; } static ssize_t xo_buf_utf8_len (xo_handle_t *xop, const char *buf, ssize_t bufsiz) { unsigned b = (unsigned char) *buf; ssize_t len, i; len = xo_utf8_to_wc_len(buf); if (len < 0) { xo_failure(xop, "invalid UTF-8 data: %02hhx", b); return -1; } if (len > bufsiz) { xo_failure(xop, "invalid UTF-8 data (short): %02hhx (%d/%d)", b, len, bufsiz); return -1; } for (i = 2; i < len; i++) { b = (unsigned char ) buf[i]; if ((b & 0xc0) != 0x80) { xo_failure(xop, "invalid UTF-8 data (byte %d): %x", i, b); return -1; } } return len; } /* * Build a wide character from the input buffer; the number of * bits we pull off the first character is dependent on the length, * but we put 6 bits off all other bytes. */ static inline wchar_t xo_utf8_char (const char *buf, ssize_t len) { /* Most common case: singleton byte */ if (len == 1) return (unsigned char) buf[0]; ssize_t i; wchar_t wc; const unsigned char *cp = (const unsigned char *) buf; wc = *cp & xo_utf8_data_bits[len]; for (i = 1; i < len; i++) { wc <<= 6; /* Low six bits have data */ wc |= cp[i] & 0x3f; if ((cp[i] & 0xc0) != 0x80) return (wchar_t) -1; } return wc; } /* * Determine the number of bytes needed to encode a wide character. */ static ssize_t xo_utf8_emit_len (wchar_t wc) { ssize_t len; if ((wc & ((1 << 7) - 1)) == wc) /* Simple case */ len = 1; else if ((wc & ((1 << 11) - 1)) == wc) len = 2; else if ((wc & ((1 << 16) - 1)) == wc) len = 3; else if ((wc & ((1 << 21) - 1)) == wc) len = 4; else len = -1; /* Invalid */ return len; } /* * Emit one wide character into the given buffer */ static void xo_utf8_emit_char (char *buf, ssize_t len, wchar_t wc) { ssize_t i; if (len == 1) { /* Simple case */ buf[0] = wc & 0x7f; return; } /* Start with the low bits and insert them, six bits at a time */ for (i = len - 1; i >= 0; i--) { buf[i] = 0x80 | (wc & 0x3f); wc >>= 6; /* Drop the low six bits */ } /* Finish off the first byte with the length bits */ buf[0] &= xo_utf8_data_bits[len]; /* Clear out the length bits */ buf[0] |= xo_utf8_len_bits[len]; /* Drop in new length bits */ } /* * Append a single UTF-8 character to a buffer, converting it to locale * encoding. Returns the number of columns consumed by that character, * as best we can determine it. */ static ssize_t xo_buf_append_locale_from_utf8 (xo_handle_t *xop, xo_buffer_t *xbp, const char *ibuf, ssize_t ilen) { wchar_t wc; ssize_t len; /* * Build our wide character from the input buffer; the number of * bits we pull off the first character is dependent on the length, * but we put 6 bits off all other bytes. */ wc = xo_utf8_char(ibuf, ilen); if (wc == (wchar_t) -1) { xo_failure(xop, "invalid UTF-8 byte sequence"); return 0; } if (XOF_ISSET(xop, XOF_NO_LOCALE)) { if (!xo_buf_has_room(xbp, ilen)) return 0; memcpy(xbp->xb_curp, ibuf, ilen); xbp->xb_curp += ilen; } else { if (!xo_buf_has_room(xbp, MB_LEN_MAX + 1)) return 0; bzero(&xop->xo_mbstate, sizeof(xop->xo_mbstate)); len = wcrtomb(xbp->xb_curp, wc, &xop->xo_mbstate); if (len <= 0) { xo_failure(xop, "could not convert wide char: %lx", (unsigned long) wc); return 0; } xbp->xb_curp += len; } return xo_wcwidth(wc); } /* * Append a UTF-8 string to a buffer, converting it into locale encoding */ static void xo_buf_append_locale (xo_handle_t *xop, xo_buffer_t *xbp, const char *cp, ssize_t len) { const char *sp = cp, *ep = cp + len; ssize_t save_off = xbp->xb_bufp - xbp->xb_curp; ssize_t slen; int cols = 0; for ( ; cp < ep; cp++) { if (!xo_is_utf8(*cp)) { cols += 1; continue; } /* * We're looking at a non-ascii UTF-8 character. * First we copy the previous data. * Then we need find the length and validate it. * Then we turn it into a wide string. * Then we turn it into a localized string. * Then we repeat. Isn't i18n fun? */ if (sp != cp) xo_buf_append(xbp, sp, cp - sp); /* Append previous data */ slen = xo_buf_utf8_len(xop, cp, ep - cp); if (slen <= 0) { /* Bad data; back it all out */ xbp->xb_curp = xbp->xb_bufp + save_off; return; } cols += xo_buf_append_locale_from_utf8(xop, xbp, cp, slen); /* Next time through, we'll start at the next character */ cp += slen - 1; sp = cp + 1; } /* Update column values */ if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += cols; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += cols; /* Before we fall into the basic logic below, we need reset len */ len = ep - sp; if (len != 0) /* Append trailing data */ xo_buf_append(xbp, sp, len); } /* * Append the given string to the given buffer, without escaping or * character set conversion. This is the straight copy to the data * buffer with no fanciness. */ static void xo_data_append (xo_handle_t *xop, const char *str, ssize_t len) { xo_buf_append(&xop->xo_data, str, len); } /* * Append the given string to the given buffer */ static void xo_data_escape (xo_handle_t *xop, const char *str, ssize_t len) { xo_buf_escape(xop, &xop->xo_data, str, len, 0); } #ifdef LIBXO_NO_RETAIN /* * Empty implementations of the retain logic */ void xo_retain_clear_all (void) { return; } void xo_retain_clear (const char *fmt UNUSED) { return; } static void xo_retain_add (const char *fmt UNUSED, xo_field_info_t *fields UNUSED, unsigned num_fields UNUSED) { return; } static int xo_retain_find (const char *fmt UNUSED, xo_field_info_t **valp UNUSED, unsigned *nump UNUSED) { return -1; } #else /* !LIBXO_NO_RETAIN */ /* * Retain: We retain parsed field definitions to enhance performance, * especially inside loops. We depend on the caller treating the format * strings as immutable, so that we can retain pointers into them. We * hold the pointers in a hash table, so allow quick access. Retained * information is retained until xo_retain_clear is called. */ /* * xo_retain_entry_t holds information about one retained set of * parsed fields. */ typedef struct xo_retain_entry_s { struct xo_retain_entry_s *xre_next; /* Pointer to next (older) entry */ unsigned long xre_hits; /* Number of times we've hit */ const char *xre_format; /* Pointer to format string */ unsigned xre_num_fields; /* Number of fields saved */ xo_field_info_t *xre_fields; /* Pointer to fields */ } xo_retain_entry_t; /* * xo_retain_t holds a complete set of parsed fields as a hash table. */ #ifndef XO_RETAIN_SIZE #define XO_RETAIN_SIZE 6 #endif /* XO_RETAIN_SIZE */ #define RETAIN_HASH_SIZE (1<> 4) & (((1 << 24) - 1))); val = (val ^ 61) ^ (val >> 16); val = val + (val << 3); val = val ^ (val >> 4); val = val * 0x3a8f05c5; /* My large prime number */ val = val ^ (val >> 15); val &= RETAIN_HASH_SIZE - 1; return val; } /* * Walk all buckets, clearing all retained entries */ void xo_retain_clear_all (void) { int i; xo_retain_entry_t *xrep, *next; for (i = 0; i < RETAIN_HASH_SIZE; i++) { for (xrep = xo_retain.xr_bucket[i]; xrep; xrep = next) { next = xrep->xre_next; xo_free(xrep); } xo_retain.xr_bucket[i] = NULL; } xo_retain_count = 0; } /* * Walk all buckets, clearing all retained entries */ void xo_retain_clear (const char *fmt) { xo_retain_entry_t **xrepp; unsigned hash = xo_retain_hash(fmt); for (xrepp = &xo_retain.xr_bucket[hash]; *xrepp; xrepp = &(*xrepp)->xre_next) { if ((*xrepp)->xre_format == fmt) { *xrepp = (*xrepp)->xre_next; xo_retain_count -= 1; return; } } } /* * Search the hash for an entry matching 'fmt'; return it's fields. */ static int xo_retain_find (const char *fmt, xo_field_info_t **valp, unsigned *nump) { if (xo_retain_count == 0) return -1; unsigned hash = xo_retain_hash(fmt); xo_retain_entry_t *xrep; for (xrep = xo_retain.xr_bucket[hash]; xrep != NULL; xrep = xrep->xre_next) { if (xrep->xre_format == fmt) { *valp = xrep->xre_fields; *nump = xrep->xre_num_fields; xrep->xre_hits += 1; return 0; } } return -1; } static void xo_retain_add (const char *fmt, xo_field_info_t *fields, unsigned num_fields) { unsigned hash = xo_retain_hash(fmt); xo_retain_entry_t *xrep; ssize_t sz = sizeof(*xrep) + (num_fields + 1) * sizeof(*fields); xo_field_info_t *xfip; xrep = xo_realloc(NULL, sz); if (xrep == NULL) return; xfip = (xo_field_info_t *) &xrep[1]; memcpy(xfip, fields, num_fields * sizeof(*fields)); bzero(xrep, sizeof(*xrep)); xrep->xre_format = fmt; xrep->xre_fields = xfip; xrep->xre_num_fields = num_fields; /* Record the field info in the retain bucket */ xrep->xre_next = xo_retain.xr_bucket[hash]; xo_retain.xr_bucket[hash] = xrep; xo_retain_count += 1; } #endif /* !LIBXO_NO_RETAIN */ /* * Generate a warning. Normally, this is a text message written to * standard error. If the XOF_WARN_XML flag is set, then we generate * XMLified content on standard output. */ static void xo_warn_hcv (xo_handle_t *xop, int code, int check_warn, const char *fmt, va_list vap) { xop = xo_default(xop); if (check_warn && !XOF_ISSET(xop, XOF_WARN)) return; if (fmt == NULL) return; ssize_t len = strlen(fmt); ssize_t plen = xo_program ? strlen(xo_program) : 0; char *newfmt = alloca(len + 1 + plen + 2); /* NUL, and ": " */ if (plen) { memcpy(newfmt, xo_program, plen); newfmt[plen++] = ':'; newfmt[plen++] = ' '; } memcpy(newfmt + plen, fmt, len); newfmt[len + plen] = '\0'; if (XOF_ISSET(xop, XOF_WARN_XML)) { static char err_open[] = ""; static char err_close[] = ""; static char msg_open[] = ""; static char msg_close[] = ""; xo_buffer_t *xbp = &xop->xo_data; xo_buf_append(xbp, err_open, sizeof(err_open) - 1); xo_buf_append(xbp, msg_open, sizeof(msg_open) - 1); va_list va_local; va_copy(va_local, vap); ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); ssize_t rc = vsnprintf(xbp->xb_curp, left, newfmt, vap); if (rc >= left) { if (!xo_buf_has_room(xbp, rc)) { va_end(va_local); return; } va_end(vap); /* Reset vap to the start */ va_copy(vap, va_local); left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); rc = vsnprintf(xbp->xb_curp, left, fmt, vap); } va_end(va_local); rc = xo_escape_xml(xbp, rc, 1); xbp->xb_curp += rc; xo_buf_append(xbp, msg_close, sizeof(msg_close) - 1); xo_buf_append(xbp, err_close, sizeof(err_close) - 1); if (code >= 0) { const char *msg = strerror(code); if (msg) { xo_buf_append(xbp, ": ", 2); xo_buf_append(xbp, msg, strlen(msg)); } } xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */ (void) xo_write(xop); } else { vfprintf(stderr, newfmt, vap); if (code >= 0) { const char *msg = strerror(code); if (msg) fprintf(stderr, ": %s", msg); } fprintf(stderr, "\n"); } } void xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_warn_hcv(xop, code, 0, fmt, vap); va_end(vap); } void xo_warn_c (int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_warn_hcv(NULL, code, 0, fmt, vap); va_end(vap); } void xo_warn (const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_warn_hcv(NULL, code, 0, fmt, vap); va_end(vap); } void xo_warnx (const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_warn_hcv(NULL, -1, 0, fmt, vap); va_end(vap); } void xo_err (int eval, const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_warn_hcv(NULL, code, 0, fmt, vap); va_end(vap); xo_finish(); exit(eval); } void xo_errx (int eval, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_warn_hcv(NULL, -1, 0, fmt, vap); va_end(vap); xo_finish(); exit(eval); } void xo_errc (int eval, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_warn_hcv(NULL, code, 0, fmt, vap); va_end(vap); xo_finish(); exit(eval); } /* * Generate a warning. Normally, this is a text message written to * standard error. If the XOF_WARN_XML flag is set, then we generate * XMLified content on standard output. */ void xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap) { static char msg_open[] = ""; static char msg_close[] = ""; xo_buffer_t *xbp; ssize_t rc; va_list va_local; xop = xo_default(xop); if (fmt == NULL || *fmt == '\0') return; int need_nl = (fmt[strlen(fmt) - 1] != '\n'); switch (xo_style(xop)) { case XO_STYLE_XML: xbp = &xop->xo_data; if (XOF_ISSET(xop, XOF_PRETTY)) xo_buf_indent(xop, xop->xo_indent_by); xo_buf_append(xbp, msg_open, sizeof(msg_open) - 1); va_copy(va_local, vap); ssize_t left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); rc = vsnprintf(xbp->xb_curp, left, fmt, vap); if (rc >= left) { if (!xo_buf_has_room(xbp, rc)) { va_end(va_local); return; } va_end(vap); /* Reset vap to the start */ va_copy(vap, va_local); left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); rc = vsnprintf(xbp->xb_curp, left, fmt, vap); } va_end(va_local); rc = xo_escape_xml(xbp, rc, 0); xbp->xb_curp += rc; if (need_nl && code > 0) { const char *msg = strerror(code); if (msg) { xo_buf_append(xbp, ": ", 2); xo_buf_append(xbp, msg, strlen(msg)); } } if (need_nl) xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */ xo_buf_append(xbp, msg_close, sizeof(msg_close) - 1); if (XOF_ISSET(xop, XOF_PRETTY)) xo_buf_append(xbp, "\n", 1); /* Append newline and NUL to string */ (void) xo_write(xop); break; case XO_STYLE_HTML: { char buf[BUFSIZ], *bp = buf, *cp; ssize_t bufsiz = sizeof(buf); ssize_t rc2; va_copy(va_local, vap); rc = vsnprintf(bp, bufsiz, fmt, va_local); if (rc > bufsiz) { bufsiz = rc + BUFSIZ; bp = alloca(bufsiz); va_end(va_local); va_copy(va_local, vap); rc = vsnprintf(bp, bufsiz, fmt, va_local); } va_end(va_local); cp = bp + rc; if (need_nl) { rc2 = snprintf(cp, bufsiz - rc, "%s%s\n", (code > 0) ? ": " : "", (code > 0) ? strerror(code) : ""); if (rc2 > 0) rc += rc2; } xo_buf_append_div(xop, "message", 0, NULL, 0, bp, rc, NULL, 0, NULL, 0); } break; case XO_STYLE_JSON: case XO_STYLE_SDPARAMS: case XO_STYLE_ENCODER: /* No means of representing messages */ return; case XO_STYLE_TEXT: rc = xo_printf_v(xop, fmt, vap); /* * XXX need to handle UTF-8 widths */ if (rc > 0) { if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += rc; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += rc; } if (need_nl && code > 0) { const char *msg = strerror(code); if (msg) { xo_printf(xop, ": %s", msg); } } if (need_nl) xo_printf(xop, "\n"); break; } switch (xo_style(xop)) { case XO_STYLE_HTML: if (XOIF_ISSET(xop, XOIF_DIV_OPEN)) { static char div_close[] = "
"; XOIF_CLEAR(xop, XOIF_DIV_OPEN); xo_data_append(xop, div_close, sizeof(div_close) - 1); if (XOF_ISSET(xop, XOF_PRETTY)) xo_data_append(xop, "\n", 1); } break; } (void) xo_flush_h(xop); } void xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_message_hcv(xop, code, fmt, vap); va_end(vap); } void xo_message_c (int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_message_hcv(NULL, code, fmt, vap); va_end(vap); } void xo_message_e (const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_message_hcv(NULL, code, fmt, vap); va_end(vap); } void xo_message (const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_message_hcv(NULL, 0, fmt, vap); va_end(vap); } void xo_failure (xo_handle_t *xop, const char *fmt, ...) { if (!XOF_ISSET(xop, XOF_WARN)) return; va_list vap; va_start(vap, fmt); xo_warn_hcv(xop, -1, 1, fmt, vap); va_end(vap); } /** * Create a handle for use by later libxo functions. * * Note: normal use of libxo does not require a distinct handle, since * the default handle (used when NULL is passed) generates text on stdout. * * @param style Style of output desired (XO_STYLE_* value) * @param flags Set of XOF_* flags in use with this handle * @return Newly allocated handle * @see xo_destroy */ xo_handle_t * xo_create (xo_style_t style, xo_xof_flags_t flags) { xo_handle_t *xop = xo_realloc(NULL, sizeof(*xop)); if (xop) { bzero(xop, sizeof(*xop)); xop->xo_style = style; XOF_SET(xop, flags); xo_init_handle(xop); xop->xo_style = style; /* Reset style (see LIBXO_OPTIONS) */ } return xop; } /** * Create a handle that will write to the given file. Use * the XOF_CLOSE_FP flag to have the file closed on xo_destroy(). * * @param fp FILE pointer to use * @param style Style of output desired (XO_STYLE_* value) * @param flags Set of XOF_* flags to use with this handle * @return Newly allocated handle * @see xo_destroy */ xo_handle_t * xo_create_to_file (FILE *fp, xo_style_t style, xo_xof_flags_t flags) { xo_handle_t *xop = xo_create(style, flags); if (xop) { xop->xo_opaque = fp; xop->xo_write = xo_write_to_file; xop->xo_close = xo_close_file; xop->xo_flush = xo_flush_file; } return xop; } /** * Set the default handler to output to a file. * * @param xop libxo handle * @param fp FILE pointer to use * @return 0 on success, non-zero on failure */ int xo_set_file_h (xo_handle_t *xop, FILE *fp) { xop = xo_default(xop); if (fp == NULL) { xo_failure(xop, "xo_set_file: NULL fp"); return -1; } xop->xo_opaque = fp; xop->xo_write = xo_write_to_file; xop->xo_close = xo_close_file; xop->xo_flush = xo_flush_file; return 0; } /** * Set the default handler to output to a file. * * @param fp FILE pointer to use * @return 0 on success, non-zero on failure */ int xo_set_file (FILE *fp) { return xo_set_file_h(NULL, fp); } /** * Release any resources held by the handle. * * @param xop XO handle to alter (or NULL for default handle) */ void xo_destroy (xo_handle_t *xop_arg) { xo_handle_t *xop = xo_default(xop_arg); xo_flush_h(xop); if (xop->xo_close && XOF_ISSET(xop, XOF_CLOSE_FP)) xop->xo_close(xop->xo_opaque); xo_free(xop->xo_stack); xo_buf_cleanup(&xop->xo_data); xo_buf_cleanup(&xop->xo_fmt); xo_buf_cleanup(&xop->xo_predicate); xo_buf_cleanup(&xop->xo_attrs); xo_buf_cleanup(&xop->xo_color_buf); if (xop->xo_version) xo_free(xop->xo_version); if (xop_arg == NULL) { bzero(&xo_default_handle, sizeof(xo_default_handle)); xo_default_inited = 0; } else xo_free(xop); } /** * Record a new output style to use for the given handle (or default if * handle is NULL). This output style will be used for any future output. * * @param xop XO handle to alter (or NULL for default handle) * @param style new output style (XO_STYLE_*) */ void xo_set_style (xo_handle_t *xop, xo_style_t style) { xop = xo_default(xop); xop->xo_style = style; } /** * Return the current style of a handle * * @param xop XO handle to access * @return The handle's current style */ xo_style_t xo_get_style (xo_handle_t *xop) { xop = xo_default(xop); return xo_style(xop); } /** * Return the XO_STYLE_* value matching a given name * * @param name String name of a style * @return XO_STYLE_* value */ static int xo_name_to_style (const char *name) { if (xo_streq(name, "xml")) return XO_STYLE_XML; else if (xo_streq(name, "json")) return XO_STYLE_JSON; else if (xo_streq(name, "encoder")) return XO_STYLE_ENCODER; else if (xo_streq(name, "text")) return XO_STYLE_TEXT; else if (xo_streq(name, "html")) return XO_STYLE_HTML; else if (xo_streq(name, "sdparams")) return XO_STYLE_SDPARAMS; return -1; } /* * Indicate if the style is an "encoding" one as opposed to a "display" one. */ static int xo_style_is_encoding (xo_handle_t *xop) { if (xo_style(xop) == XO_STYLE_JSON || xo_style(xop) == XO_STYLE_XML || xo_style(xop) == XO_STYLE_SDPARAMS || xo_style(xop) == XO_STYLE_ENCODER) return 1; return 0; } /* Simple name-value mapping */ typedef struct xo_mapping_s { xo_xff_flags_t xm_value; /* Flag value */ const char *xm_name; /* String name */ } xo_mapping_t; static xo_xff_flags_t xo_name_lookup (xo_mapping_t *map, const char *value, ssize_t len) { if (len == 0) return 0; if (len < 0) len = strlen(value); while (isspace((int) *value)) { value += 1; len -= 1; } while (isspace((int) value[len])) len -= 1; if (*value == '\0') return 0; for ( ; map->xm_name; map++) if (strncmp(map->xm_name, value, len) == 0) return map->xm_value; return 0; } #ifdef NOT_NEEDED_YET static const char * xo_value_lookup (xo_mapping_t *map, xo_xff_flags_t value) { if (value == 0) return NULL; for ( ; map->xm_name; map++) if (map->xm_value == value) return map->xm_name; return NULL; } #endif /* NOT_NEEDED_YET */ static xo_mapping_t xo_xof_names[] = { { XOF_COLOR_ALLOWED, "color" }, { XOF_COLOR, "color-force" }, { XOF_COLUMNS, "columns" }, { XOF_DTRT, "dtrt" }, { XOF_FLUSH, "flush" }, { XOF_FLUSH_LINE, "flush-line" }, { XOF_IGNORE_CLOSE, "ignore-close" }, { XOF_INFO, "info" }, { XOF_KEYS, "keys" }, { XOF_LOG_GETTEXT, "log-gettext" }, { XOF_LOG_SYSLOG, "log-syslog" }, { XOF_NO_HUMANIZE, "no-humanize" }, { XOF_NO_LOCALE, "no-locale" }, { XOF_RETAIN_NONE, "no-retain" }, { XOF_NO_TOP, "no-top" }, { XOF_NOT_FIRST, "not-first" }, { XOF_PRETTY, "pretty" }, { XOF_RETAIN_ALL, "retain" }, { XOF_UNDERSCORES, "underscores" }, { XOF_UNITS, "units" }, { XOF_WARN, "warn" }, { XOF_WARN_XML, "warn-xml" }, { XOF_XPATH, "xpath" }, { 0, NULL } }; /* Options available via the environment variable ($LIBXO_OPTIONS) */ static xo_mapping_t xo_xof_simple_names[] = { { XOF_COLOR_ALLOWED, "color" }, { XOF_FLUSH, "flush" }, { XOF_FLUSH_LINE, "flush-line" }, { XOF_NO_HUMANIZE, "no-humanize" }, { XOF_NO_LOCALE, "no-locale" }, { XOF_RETAIN_NONE, "no-retain" }, { XOF_PRETTY, "pretty" }, { XOF_RETAIN_ALL, "retain" }, { XOF_UNDERSCORES, "underscores" }, { XOF_WARN, "warn" }, { 0, NULL } }; /* * Convert string name to XOF_* flag value. * Not all are useful. Or safe. Or sane. */ static unsigned xo_name_to_flag (const char *name) { return (unsigned) xo_name_lookup(xo_xof_names, name, -1); } /** * Set the style of an libxo handle based on a string name * * @param xop XO handle * @param name String value of name * @return 0 on success, non-zero on failure */ int xo_set_style_name (xo_handle_t *xop, const char *name) { if (name == NULL) return -1; int style = xo_name_to_style(name); if (style < 0) return -1; xo_set_style(xop, style); return 0; } /* * Fill in the color map, based on the input string; currently unimplemented * Look for something like "colors=red/blue+green/yellow" as fg/bg pairs. */ static void xo_set_color_map (xo_handle_t *xop, char *value) { if (xo_text_only()) return; char *cp, *ep, *vp, *np; ssize_t len = value ? strlen(value) + 1 : 0; int num = 1, fg, bg; for (cp = value, ep = cp + len - 1; cp && *cp && cp < ep; cp = np) { np = strchr(cp, '+'); if (np) *np++ = '\0'; vp = strchr(cp, '/'); if (vp) *vp++ = '\0'; fg = *cp ? xo_color_find(cp) : -1; bg = (vp && *vp) ? xo_color_find(vp) : -1; #ifndef LIBXO_TEXT_ONLY xop->xo_color_map_fg[num] = (fg < 0) ? num : fg; xop->xo_color_map_bg[num] = (bg < 0) ? num : bg; #endif /* LIBXO_TEXT_ONLY */ if (++num > XO_NUM_COLORS) break; } /* If no color initialization happened, then we don't need the map */ if (num > 1) XOF_SET(xop, XOF_COLOR_MAP); else XOF_CLEAR(xop, XOF_COLOR_MAP); #ifndef LIBXO_TEXT_ONLY /* Fill in the rest of the colors with the defaults */ for ( ; num < XO_NUM_COLORS; num++) xop->xo_color_map_fg[num] = xop->xo_color_map_bg[num] = num; #endif /* LIBXO_TEXT_ONLY */ } static int xo_set_options_simple (xo_handle_t *xop, const char *input) { xo_xof_flags_t new_flag; char *cp, *ep, *vp, *np, *bp; ssize_t len = strlen(input) + 1; bp = alloca(len); memcpy(bp, input, len); for (cp = bp, ep = cp + len - 1; cp && cp < ep; cp = np) { np = strchr(cp, ','); if (np) *np++ = '\0'; vp = strchr(cp, '='); if (vp) *vp++ = '\0'; if (xo_streq("colors", cp)) { xo_set_color_map(xop, vp); continue; } new_flag = xo_name_lookup(xo_xof_simple_names, cp, -1); if (new_flag != 0) { XOF_SET(xop, new_flag); } else if (xo_streq(cp, "no-color")) { XOF_CLEAR(xop, XOF_COLOR_ALLOWED); } else { xo_failure(xop, "unknown simple option: %s", cp); return -1; } } return 0; } /** * Set the options for a handle using a string of options * passed in. The input is a comma-separated set of names * and optional values: "xml,pretty,indent=4" * * @param xop XO handle * @param input Comma-separated set of option values * @return 0 on success, non-zero on failure */ int xo_set_options (xo_handle_t *xop, const char *input) { char *cp, *ep, *vp, *np, *bp; int style = -1, new_style, rc = 0; ssize_t len; xo_xof_flags_t new_flag; if (input == NULL) return 0; xop = xo_default(xop); #ifdef LIBXO_COLOR_ON_BY_DEFAULT /* If the installer used --enable-color-on-by-default, then we allow it */ XOF_SET(xop, XOF_COLOR_ALLOWED); #endif /* LIBXO_COLOR_ON_BY_DEFAULT */ /* * We support a simpler, old-school style of giving option * also, using a single character for each option. It's * ideal for lazy people, such as myself. */ if (*input == ':') { ssize_t sz; for (input++ ; *input; input++) { switch (*input) { case 'c': XOF_SET(xop, XOF_COLOR_ALLOWED); break; case 'f': XOF_SET(xop, XOF_FLUSH); break; case 'F': XOF_SET(xop, XOF_FLUSH_LINE); break; case 'g': XOF_SET(xop, XOF_LOG_GETTEXT); break; case 'H': xop->xo_style = XO_STYLE_HTML; break; case 'I': XOF_SET(xop, XOF_INFO); break; case 'i': sz = strspn(input + 1, "0123456789"); if (sz > 0) { xop->xo_indent_by = atoi(input + 1); input += sz - 1; /* Skip value */ } break; case 'J': xop->xo_style = XO_STYLE_JSON; break; case 'k': XOF_SET(xop, XOF_KEYS); break; case 'n': XOF_SET(xop, XOF_NO_HUMANIZE); break; case 'P': XOF_SET(xop, XOF_PRETTY); break; case 'T': xop->xo_style = XO_STYLE_TEXT; break; case 'U': XOF_SET(xop, XOF_UNITS); break; case 'u': XOF_SET(xop, XOF_UNDERSCORES); break; case 'W': XOF_SET(xop, XOF_WARN); break; case 'X': xop->xo_style = XO_STYLE_XML; break; case 'x': XOF_SET(xop, XOF_XPATH); break; } } return 0; } len = strlen(input) + 1; bp = alloca(len); memcpy(bp, input, len); for (cp = bp, ep = cp + len - 1; cp && cp < ep; cp = np) { np = strchr(cp, ','); if (np) *np++ = '\0'; + /* + * "@foo" is a shorthand for "encoder=foo". This is driven + * chiefly by a desire to make pluggable encoders not appear + * so distinct from built-in encoders. + */ + if (*cp == '@') { + vp = cp + 1; + + if (*vp == '\0') + xo_failure(xop, "missing value for encoder option"); + else { + rc = xo_encoder_init(xop, vp); + if (rc) + xo_warnx("error initializing encoder: %s", vp); + } + + continue; + } + vp = strchr(cp, '='); if (vp) *vp++ = '\0'; if (xo_streq("colors", cp)) { xo_set_color_map(xop, vp); continue; } /* * For options, we don't allow "encoder" since we want to * handle it explicitly below as "encoder=xxx". */ new_style = xo_name_to_style(cp); if (new_style >= 0 && new_style != XO_STYLE_ENCODER) { if (style >= 0) xo_warnx("ignoring multiple styles: '%s'", cp); else style = new_style; } else { new_flag = xo_name_to_flag(cp); if (new_flag != 0) XOF_SET(xop, new_flag); else if (xo_streq(cp, "no-color")) XOF_CLEAR(xop, XOF_COLOR_ALLOWED); else if (xo_streq(cp, "indent")) { if (vp) xop->xo_indent_by = atoi(vp); else xo_failure(xop, "missing value for indent option"); } else if (xo_streq(cp, "encoder")) { if (vp == NULL) xo_failure(xop, "missing value for encoder option"); else { rc = xo_encoder_init(xop, vp); if (rc) xo_warnx("error initializing encoder: %s", vp); } } else { xo_warnx("unknown libxo option value: '%s'", cp); rc = -1; } } } if (style > 0) xop->xo_style= style; return rc; } /** * Set one or more flags for a given handle (or default if handle is NULL). * These flags will affect future output. * * @param xop XO handle to alter (or NULL for default handle) * @param flags Flags to be set (XOF_*) */ void xo_set_flags (xo_handle_t *xop, xo_xof_flags_t flags) { xop = xo_default(xop); XOF_SET(xop, flags); } /** * Accessor to return the current set of flags for a handle * @param xop XO handle * @return Current set of flags */ xo_xof_flags_t xo_get_flags (xo_handle_t *xop) { xop = xo_default(xop); return xop->xo_flags; } /** * strndup with a twist: len < 0 means len = strlen(str) */ static char * xo_strndup (const char *str, ssize_t len) { if (len < 0) len = strlen(str); char *cp = xo_realloc(NULL, len + 1); if (cp) { memcpy(cp, str, len); cp[len] = '\0'; } return cp; } /** * Record a leading prefix for the XPath we generate. This allows the * generated data to be placed within an XML hierarchy but still have * accurate XPath expressions. * * @param xop XO handle to alter (or NULL for default handle) * @param path The XPath expression */ void xo_set_leading_xpath (xo_handle_t *xop, const char *path) { xop = xo_default(xop); if (xop->xo_leading_xpath) { xo_free(xop->xo_leading_xpath); xop->xo_leading_xpath = NULL; } if (path == NULL) return; xop->xo_leading_xpath = xo_strndup(path, -1); } /** * Record the info data for a set of tags * * @param xop XO handle to alter (or NULL for default handle) * @param info Info data (xo_info_t) to be recorded (or NULL) (MUST BE SORTED) * @pararm count Number of entries in info (or -1 to count them ourselves) */ void xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count) { xop = xo_default(xop); if (count < 0 && infop) { xo_info_t *xip; for (xip = infop, count = 0; xip->xi_name; xip++, count++) continue; } xop->xo_info = infop; xop->xo_info_count = count; } /** * Set the formatter callback for a handle. The callback should * return a newly formatting contents of a formatting instruction, * meaning the bits inside the braces. */ void xo_set_formatter (xo_handle_t *xop, xo_formatter_t func, xo_checkpointer_t cfunc) { xop = xo_default(xop); xop->xo_formatter = func; xop->xo_checkpointer = cfunc; } /** * Clear one or more flags for a given handle (or default if handle is NULL). * These flags will affect future output. * * @param xop XO handle to alter (or NULL for default handle) * @param flags Flags to be cleared (XOF_*) */ void xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags) { xop = xo_default(xop); XOF_CLEAR(xop, flags); } static const char * xo_state_name (xo_state_t state) { static const char *names[] = { "init", "open_container", "close_container", "open_list", "close_list", "open_instance", "close_instance", "open_leaf_list", "close_leaf_list", "discarding", "marker", "emit", "emit_leaf_list", "finish", NULL }; if (state < (sizeof(names) / sizeof(names[0]))) return names[state]; return "unknown"; } static void xo_line_ensure_open (xo_handle_t *xop, xo_xff_flags_t flags UNUSED) { static char div_open[] = "
"; static char div_open_blank[] = "
"; if (XOF_ISSET(xop, XOF_CONTINUATION)) { XOF_CLEAR(xop, XOF_CONTINUATION); XOIF_SET(xop, XOIF_DIV_OPEN); return; } if (XOIF_ISSET(xop, XOIF_DIV_OPEN)) return; if (xo_style(xop) != XO_STYLE_HTML) return; XOIF_SET(xop, XOIF_DIV_OPEN); if (flags & XFF_BLANK_LINE) xo_data_append(xop, div_open_blank, sizeof(div_open_blank) - 1); else xo_data_append(xop, div_open, sizeof(div_open) - 1); if (XOF_ISSET(xop, XOF_PRETTY)) xo_data_append(xop, "\n", 1); } static void xo_line_close (xo_handle_t *xop) { static char div_close[] = "
"; switch (xo_style(xop)) { case XO_STYLE_HTML: if (!XOIF_ISSET(xop, XOIF_DIV_OPEN)) xo_line_ensure_open(xop, 0); XOIF_CLEAR(xop, XOIF_DIV_OPEN); xo_data_append(xop, div_close, sizeof(div_close) - 1); if (XOF_ISSET(xop, XOF_PRETTY)) xo_data_append(xop, "\n", 1); break; case XO_STYLE_TEXT: xo_data_append(xop, "\n", 1); break; } } static int xo_info_compare (const void *key, const void *data) { const char *name = key; const xo_info_t *xip = data; return strcmp(name, xip->xi_name); } static xo_info_t * xo_info_find (xo_handle_t *xop, const char *name, ssize_t nlen) { xo_info_t *xip; char *cp = alloca(nlen + 1); /* Need local copy for NUL termination */ memcpy(cp, name, nlen); cp[nlen] = '\0'; xip = bsearch(cp, xop->xo_info, xop->xo_info_count, sizeof(xop->xo_info[0]), xo_info_compare); return xip; } #define CONVERT(_have, _need) (((_have) << 8) | (_need)) /* * Check to see that the conversion is safe and sane. */ static int xo_check_conversion (xo_handle_t *xop, int have_enc, int need_enc) { switch (CONVERT(have_enc, need_enc)) { case CONVERT(XF_ENC_UTF8, XF_ENC_UTF8): case CONVERT(XF_ENC_UTF8, XF_ENC_LOCALE): case CONVERT(XF_ENC_WIDE, XF_ENC_UTF8): case CONVERT(XF_ENC_WIDE, XF_ENC_LOCALE): case CONVERT(XF_ENC_LOCALE, XF_ENC_LOCALE): case CONVERT(XF_ENC_LOCALE, XF_ENC_UTF8): return 0; default: xo_failure(xop, "invalid conversion (%c:%c)", have_enc, need_enc); return 1; } } static int xo_format_string_direct (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags, const wchar_t *wcp, const char *cp, ssize_t len, int max, int need_enc, int have_enc) { int cols = 0; wchar_t wc = 0; ssize_t ilen, olen; ssize_t width; int attr = XOF_BIT_ISSET(flags, XFF_ATTR); const char *sp; if (len > 0 && !xo_buf_has_room(xbp, len)) return 0; for (;;) { if (len == 0) break; if (cp) { if (*cp == '\0') break; if ((flags & XFF_UNESCAPE) && (*cp == '\\' || *cp == '%')) { cp += 1; len -= 1; if (len == 0 || *cp == '\0') break; } } if (wcp && *wcp == L'\0') break; ilen = 0; switch (have_enc) { case XF_ENC_WIDE: /* Wide character */ wc = *wcp++; ilen = 1; break; case XF_ENC_UTF8: /* UTF-8 */ ilen = xo_utf8_to_wc_len(cp); if (ilen < 0) { xo_failure(xop, "invalid UTF-8 character: %02hhx", *cp); return -1; /* Can't continue; we can't find the end */ } if (len > 0 && len < ilen) { len = 0; /* Break out of the loop */ continue; } wc = xo_utf8_char(cp, ilen); if (wc == (wchar_t) -1) { xo_failure(xop, "invalid UTF-8 character: %02hhx/%d", *cp, ilen); return -1; /* Can't continue; we can't find the end */ } cp += ilen; break; case XF_ENC_LOCALE: /* Native locale */ ilen = (len > 0) ? len : MB_LEN_MAX; ilen = mbrtowc(&wc, cp, ilen, &xop->xo_mbstate); if (ilen < 0) { /* Invalid data; skip */ xo_failure(xop, "invalid mbs char: %02hhx", *cp); wc = L'?'; ilen = 1; } if (ilen == 0) { /* Hit a wide NUL character */ len = 0; continue; } cp += ilen; break; } /* Reduce len, but not below zero */ if (len > 0) { len -= ilen; if (len < 0) len = 0; } /* * Find the width-in-columns of this character, which must be done * in wide characters, since we lack a mbswidth() function. If * it doesn't fit */ width = xo_wcwidth(wc); if (width < 0) width = iswcntrl(wc) ? 0 : 1; if (xo_style(xop) == XO_STYLE_TEXT || xo_style(xop) == XO_STYLE_HTML) { if (max > 0 && cols + width > max) break; } switch (need_enc) { case XF_ENC_UTF8: /* Output in UTF-8 needs to be escaped, based on the style */ switch (xo_style(xop)) { case XO_STYLE_XML: case XO_STYLE_HTML: if (wc == '<') sp = xo_xml_lt; else if (wc == '>') sp = xo_xml_gt; else if (wc == '&') sp = xo_xml_amp; else if (attr && wc == '"') sp = xo_xml_quot; else break; ssize_t slen = strlen(sp); if (!xo_buf_has_room(xbp, slen - 1)) return -1; memcpy(xbp->xb_curp, sp, slen); xbp->xb_curp += slen; goto done_with_encoding; /* Need multi-level 'break' */ case XO_STYLE_JSON: if (wc != '\\' && wc != '"' && wc != '\n' && wc != '\r') break; if (!xo_buf_has_room(xbp, 2)) return -1; *xbp->xb_curp++ = '\\'; if (wc == '\n') wc = 'n'; else if (wc == '\r') wc = 'r'; else wc = wc & 0x7f; *xbp->xb_curp++ = wc; goto done_with_encoding; case XO_STYLE_SDPARAMS: if (wc != '\\' && wc != '"' && wc != ']') break; if (!xo_buf_has_room(xbp, 2)) return -1; *xbp->xb_curp++ = '\\'; wc = wc & 0x7f; *xbp->xb_curp++ = wc; goto done_with_encoding; } olen = xo_utf8_emit_len(wc); if (olen < 0) { xo_failure(xop, "ignoring bad length"); continue; } if (!xo_buf_has_room(xbp, olen)) return -1; xo_utf8_emit_char(xbp->xb_curp, olen, wc); xbp->xb_curp += olen; break; case XF_ENC_LOCALE: if (!xo_buf_has_room(xbp, MB_LEN_MAX + 1)) return -1; olen = wcrtomb(xbp->xb_curp, wc, &xop->xo_mbstate); if (olen <= 0) { xo_failure(xop, "could not convert wide char: %lx", (unsigned long) wc); width = 1; *xbp->xb_curp++ = '?'; } else xbp->xb_curp += olen; break; } done_with_encoding: cols += width; } return cols; } static int xo_needed_encoding (xo_handle_t *xop) { if (XOF_ISSET(xop, XOF_UTF8)) /* Check the override flag */ return XF_ENC_UTF8; if (xo_style(xop) == XO_STYLE_TEXT) /* Text means locale */ return XF_ENC_LOCALE; return XF_ENC_UTF8; /* Otherwise, we love UTF-8 */ } static ssize_t xo_format_string (xo_handle_t *xop, xo_buffer_t *xbp, xo_xff_flags_t flags, xo_format_t *xfp) { static char null[] = "(null)"; static char null_no_quotes[] = "null"; char *cp = NULL; wchar_t *wcp = NULL; ssize_t len; ssize_t cols = 0, rc = 0; ssize_t off = xbp->xb_curp - xbp->xb_bufp, off2; int need_enc = xo_needed_encoding(xop); if (xo_check_conversion(xop, xfp->xf_enc, need_enc)) return 0; len = xfp->xf_width[XF_WIDTH_SIZE]; if (xfp->xf_fc == 'm') { cp = strerror(xop->xo_errno); if (len < 0) len = cp ? strlen(cp) : 0; goto normal_string; } else if (xfp->xf_enc == XF_ENC_WIDE) { wcp = va_arg(xop->xo_vap, wchar_t *); if (xfp->xf_skip) return 0; /* * Dont' deref NULL; use the traditional "(null)" instead * of the more accurate "who's been a naughty boy, then?". */ if (wcp == NULL) { cp = null; len = sizeof(null) - 1; } } else { cp = va_arg(xop->xo_vap, char *); /* UTF-8 or native */ normal_string: if (xfp->xf_skip) return 0; /* Echo "Dont' deref NULL" logic */ if (cp == NULL) { if ((flags & XFF_NOQUOTE) && xo_style_is_encoding(xop)) { cp = null_no_quotes; len = sizeof(null_no_quotes) - 1; } else { cp = null; len = sizeof(null) - 1; } } /* * Optimize the most common case, which is "%s". We just * need to copy the complete string to the output buffer. */ if (xfp->xf_enc == need_enc && xfp->xf_width[XF_WIDTH_MIN] < 0 && xfp->xf_width[XF_WIDTH_SIZE] < 0 && xfp->xf_width[XF_WIDTH_MAX] < 0 && !(XOIF_ISSET(xop, XOIF_ANCHOR) || XOF_ISSET(xop, XOF_COLUMNS))) { len = strlen(cp); xo_buf_escape(xop, xbp, cp, len, flags); /* * Our caller expects xb_curp left untouched, so we have * to reset it and return the number of bytes written to * the buffer. */ off2 = xbp->xb_curp - xbp->xb_bufp; rc = off2 - off; xbp->xb_curp = xbp->xb_bufp + off; return rc; } } cols = xo_format_string_direct(xop, xbp, flags, wcp, cp, len, xfp->xf_width[XF_WIDTH_MAX], need_enc, xfp->xf_enc); if (cols < 0) goto bail; /* * xo_buf_append* will move xb_curp, so we save/restore it. */ off2 = xbp->xb_curp - xbp->xb_bufp; rc = off2 - off; xbp->xb_curp = xbp->xb_bufp + off; if (cols < xfp->xf_width[XF_WIDTH_MIN]) { /* * Find the number of columns needed to display the string. * If we have the original wide string, we just call wcswidth, * but if we did the work ourselves, then we need to do it. */ int delta = xfp->xf_width[XF_WIDTH_MIN] - cols; if (!xo_buf_has_room(xbp, xfp->xf_width[XF_WIDTH_MIN])) goto bail; /* * If seen_minus, then pad on the right; otherwise move it so * we can pad on the left. */ if (xfp->xf_seen_minus) { cp = xbp->xb_curp + rc; } else { cp = xbp->xb_curp; memmove(xbp->xb_curp + delta, xbp->xb_curp, rc); } /* Set the padding */ memset(cp, (xfp->xf_leading_zero > 0) ? '0' : ' ', delta); rc += delta; cols += delta; } if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += cols; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += cols; return rc; bail: xbp->xb_curp = xbp->xb_bufp + off; return 0; } /* * Look backwards in a buffer to find a numeric value */ static int xo_buf_find_last_number (xo_buffer_t *xbp, ssize_t start_offset) { int rc = 0; /* Fail with zero */ int digit = 1; char *sp = xbp->xb_bufp; char *cp = sp + start_offset; while (--cp >= sp) if (isdigit((int) *cp)) break; for ( ; cp >= sp; cp--) { if (!isdigit((int) *cp)) break; rc += (*cp - '0') * digit; digit *= 10; } return rc; } static ssize_t xo_count_utf8_cols (const char *str, ssize_t len) { ssize_t tlen; wchar_t wc; ssize_t cols = 0; const char *ep = str + len; while (str < ep) { tlen = xo_utf8_to_wc_len(str); if (tlen < 0) /* Broken input is very bad */ return cols; wc = xo_utf8_char(str, tlen); if (wc == (wchar_t) -1) return cols; /* We only print printable characters */ if (iswprint((wint_t) wc)) { /* * Find the width-in-columns of this character, which must be done * in wide characters, since we lack a mbswidth() function. */ ssize_t width = xo_wcwidth(wc); if (width < 0) width = iswcntrl(wc) ? 0 : 1; cols += width; } str += tlen; } return cols; } #ifdef HAVE_GETTEXT static inline const char * xo_dgettext (xo_handle_t *xop, const char *str) { const char *domainname = xop->xo_gt_domain; const char *res; res = dgettext(domainname, str); if (XOF_ISSET(xop, XOF_LOG_GETTEXT)) fprintf(stderr, "xo: gettext: %s%s%smsgid \"%s\" returns \"%s\"\n", domainname ? "domain \"" : "", xo_printable(domainname), domainname ? "\", " : "", xo_printable(str), xo_printable(res)); return res; } static inline const char * xo_dngettext (xo_handle_t *xop, const char *sing, const char *plural, unsigned long int n) { const char *domainname = xop->xo_gt_domain; const char *res; res = dngettext(domainname, sing, plural, n); if (XOF_ISSET(xop, XOF_LOG_GETTEXT)) fprintf(stderr, "xo: gettext: %s%s%s" "msgid \"%s\", msgid_plural \"%s\" (%lu) returns \"%s\"\n", domainname ? "domain \"" : "", xo_printable(domainname), domainname ? "\", " : "", xo_printable(sing), xo_printable(plural), n, xo_printable(res)); return res; } #else /* HAVE_GETTEXT */ static inline const char * xo_dgettext (xo_handle_t *xop UNUSED, const char *str) { return str; } static inline const char * xo_dngettext (xo_handle_t *xop UNUSED, const char *singular, const char *plural, unsigned long int n) { return (n == 1) ? singular : plural; } #endif /* HAVE_GETTEXT */ /* * This is really _re_formatting, since the normal format code has * generated a beautiful string into xo_data, starting at * start_offset. We need to see if it's plural, which means * comma-separated options, or singular. Then we make the appropriate * call to d[n]gettext() to get the locale-based version. Note that * both input and output of gettext() this should be UTF-8. */ static ssize_t xo_format_gettext (xo_handle_t *xop, xo_xff_flags_t flags, ssize_t start_offset, ssize_t cols, int need_enc) { xo_buffer_t *xbp = &xop->xo_data; if (!xo_buf_has_room(xbp, 1)) return cols; xbp->xb_curp[0] = '\0'; /* NUL-terminate the input string */ char *cp = xbp->xb_bufp + start_offset; ssize_t len = xbp->xb_curp - cp; const char *newstr = NULL; /* * The plural flag asks us to look backwards at the last numeric * value rendered and disect the string into two pieces. */ if (flags & XFF_GT_PLURAL) { int n = xo_buf_find_last_number(xbp, start_offset); char *two = memchr(cp, (int) ',', len); if (two == NULL) { xo_failure(xop, "no comma in plural gettext field: '%s'", cp); return cols; } if (two == cp) { xo_failure(xop, "nothing before comma in plural gettext " "field: '%s'", cp); return cols; } if (two == xbp->xb_curp) { xo_failure(xop, "nothing after comma in plural gettext " "field: '%s'", cp); return cols; } *two++ = '\0'; if (flags & XFF_GT_FIELD) { newstr = xo_dngettext(xop, cp, two, n); } else { /* Don't do a gettext() look up, just get the plural form */ newstr = (n == 1) ? cp : two; } /* * If we returned the first string, optimize a bit by * backing up over comma */ if (newstr == cp) { xbp->xb_curp = two - 1; /* One for comma */ /* * If the caller wanted UTF8, we're done; nothing changed, * but we need to count the columns used. */ if (need_enc == XF_ENC_UTF8) return xo_count_utf8_cols(cp, xbp->xb_curp - cp); } } else { /* The simple case (singular) */ newstr = xo_dgettext(xop, cp); if (newstr == cp) { /* If the caller wanted UTF8, we're done; nothing changed */ if (need_enc == XF_ENC_UTF8) return cols; } } /* * Since the new string string might be in gettext's buffer or * in the buffer (as the plural form), we make a copy. */ ssize_t nlen = strlen(newstr); char *newcopy = alloca(nlen + 1); memcpy(newcopy, newstr, nlen + 1); xbp->xb_curp = xbp->xb_bufp + start_offset; /* Reset the buffer */ return xo_format_string_direct(xop, xbp, flags, NULL, newcopy, nlen, 0, need_enc, XF_ENC_UTF8); } static void xo_data_append_content (xo_handle_t *xop, const char *str, ssize_t len, xo_xff_flags_t flags) { int cols; int need_enc = xo_needed_encoding(xop); ssize_t start_offset = xo_buf_offset(&xop->xo_data); cols = xo_format_string_direct(xop, &xop->xo_data, XFF_UNESCAPE | flags, NULL, str, len, -1, need_enc, XF_ENC_UTF8); if (flags & XFF_GT_FLAGS) cols = xo_format_gettext(xop, flags, start_offset, cols, need_enc); if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += cols; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += cols; } /** * Bump one of the 'width' values in a format strings (e.g. "%40.50.60s"). * @param xfp Formatting instructions * @param digit Single digit (0-9) of input */ static void xo_bump_width (xo_format_t *xfp, int digit) { int *ip = &xfp->xf_width[xfp->xf_dots]; *ip = ((*ip > 0) ? *ip : 0) * 10 + digit; } static ssize_t xo_trim_ws (xo_buffer_t *xbp, ssize_t len) { char *cp, *sp, *ep; ssize_t delta; /* First trim leading space */ for (cp = sp = xbp->xb_curp, ep = cp + len; cp < ep; cp++) { if (*cp != ' ') break; } delta = cp - sp; if (delta) { len -= delta; memmove(sp, cp, len); } /* Then trim off the end */ for (cp = xbp->xb_curp, sp = ep = cp + len; cp < ep; ep--) { if (ep[-1] != ' ') break; } delta = sp - ep; if (delta) { len -= delta; cp[len] = '\0'; } return len; } /* * Interface to format a single field. The arguments are in xo_vap, * and the format is in 'fmt'. If 'xbp' is null, we use xop->xo_data; * this is the most common case. */ static ssize_t xo_do_format_field (xo_handle_t *xop, xo_buffer_t *xbp, const char *fmt, ssize_t flen, xo_xff_flags_t flags) { xo_format_t xf; const char *cp, *ep, *sp, *xp = NULL; ssize_t rc, cols; int style = (flags & XFF_XML) ? XO_STYLE_XML : xo_style(xop); unsigned make_output = !(flags & XFF_NO_OUTPUT) ? 1 : 0; int need_enc = xo_needed_encoding(xop); int real_need_enc = need_enc; ssize_t old_cols = xop->xo_columns; /* The gettext interface is UTF-8, so we'll need that for now */ if (flags & XFF_GT_FIELD) need_enc = XF_ENC_UTF8; if (xbp == NULL) xbp = &xop->xo_data; ssize_t start_offset = xo_buf_offset(xbp); for (cp = fmt, ep = fmt + flen; cp < ep; cp++) { /* * Since we're starting a new field, save the starting offset. * We'll need this later for field-related operations. */ if (*cp != '%') { add_one: if (xp == NULL) xp = cp; if (*cp == '\\' && cp[1] != '\0') cp += 1; continue; } else if (cp + 1 < ep && cp[1] == '%') { cp += 1; goto add_one; } if (xp) { if (make_output) { cols = xo_format_string_direct(xop, xbp, flags | XFF_UNESCAPE, NULL, xp, cp - xp, -1, need_enc, XF_ENC_UTF8); if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += cols; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += cols; } xp = NULL; } bzero(&xf, sizeof(xf)); xf.xf_leading_zero = -1; xf.xf_width[0] = xf.xf_width[1] = xf.xf_width[2] = -1; /* * "%@" starts an XO-specific set of flags: * @X@ - XML-only field; ignored if style isn't XML */ if (cp[1] == '@') { for (cp += 2; cp < ep; cp++) { if (*cp == '@') { break; } if (*cp == '*') { /* * '*' means there's a "%*.*s" value in vap that * we want to ignore */ if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) va_arg(xop->xo_vap, int); } } } /* Hidden fields are only visible to JSON and XML */ if (XOF_ISSET(xop, XFF_ENCODE_ONLY)) { if (style != XO_STYLE_XML && !xo_style_is_encoding(xop)) xf.xf_skip = 1; } else if (XOF_ISSET(xop, XFF_DISPLAY_ONLY)) { if (style != XO_STYLE_TEXT && xo_style(xop) != XO_STYLE_HTML) xf.xf_skip = 1; } if (!make_output) xf.xf_skip = 1; /* * Looking at one piece of a format; find the end and * call snprintf. Then advance xo_vap on our own. * * Note that 'n', 'v', and '$' are not supported. */ sp = cp; /* Save start pointer */ for (cp += 1; cp < ep; cp++) { if (*cp == 'l') xf.xf_lflag += 1; else if (*cp == 'h') xf.xf_hflag += 1; else if (*cp == 'j') xf.xf_jflag += 1; else if (*cp == 't') xf.xf_tflag += 1; else if (*cp == 'z') xf.xf_zflag += 1; else if (*cp == 'q') xf.xf_qflag += 1; else if (*cp == '.') { if (++xf.xf_dots >= XF_WIDTH_NUM) { xo_failure(xop, "Too many dots in format: '%s'", fmt); return -1; } } else if (*cp == '-') xf.xf_seen_minus = 1; else if (isdigit((int) *cp)) { if (xf.xf_leading_zero < 0) xf.xf_leading_zero = (*cp == '0'); xo_bump_width(&xf, *cp - '0'); } else if (*cp == '*') { xf.xf_stars += 1; xf.xf_star[xf.xf_dots] = 1; } else if (strchr("diouxXDOUeEfFgGaAcCsSpm", *cp) != NULL) break; else if (*cp == 'n' || *cp == 'v') { xo_failure(xop, "unsupported format: '%s'", fmt); return -1; } } if (cp == ep) xo_failure(xop, "field format missing format character: %s", fmt); xf.xf_fc = *cp; if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) { if (*cp == 's' || *cp == 'S') { /* Handle "%*.*.*s" */ int s; for (s = 0; s < XF_WIDTH_NUM; s++) { if (xf.xf_star[s]) { xf.xf_width[s] = va_arg(xop->xo_vap, int); /* Normalize a negative width value */ if (xf.xf_width[s] < 0) { if (s == 0) { xf.xf_width[0] = -xf.xf_width[0]; xf.xf_seen_minus = 1; } else xf.xf_width[s] = -1; /* Ignore negative values */ } } } } } /* If no max is given, it defaults to size */ if (xf.xf_width[XF_WIDTH_MAX] < 0 && xf.xf_width[XF_WIDTH_SIZE] >= 0) xf.xf_width[XF_WIDTH_MAX] = xf.xf_width[XF_WIDTH_SIZE]; if (xf.xf_fc == 'D' || xf.xf_fc == 'O' || xf.xf_fc == 'U') xf.xf_lflag = 1; if (!xf.xf_skip) { xo_buffer_t *fbp = &xop->xo_fmt; ssize_t len = cp - sp + 1; if (!xo_buf_has_room(fbp, len + 1)) return -1; char *newfmt = fbp->xb_curp; memcpy(newfmt, sp, len); newfmt[0] = '%'; /* If we skipped over a "%@...@s" format */ newfmt[len] = '\0'; /* * Bad news: our strings are UTF-8, but the stock printf * functions won't handle field widths for wide characters * correctly. So we have to handle this ourselves. */ if (xop->xo_formatter == NULL && (xf.xf_fc == 's' || xf.xf_fc == 'S' || xf.xf_fc == 'm')) { xf.xf_enc = (xf.xf_fc == 'm') ? XF_ENC_UTF8 : (xf.xf_lflag || (xf.xf_fc == 'S')) ? XF_ENC_WIDE : xf.xf_hflag ? XF_ENC_LOCALE : XF_ENC_UTF8; rc = xo_format_string(xop, xbp, flags, &xf); if ((flags & XFF_TRIM_WS) && xo_style_is_encoding(xop)) rc = xo_trim_ws(xbp, rc); } else { ssize_t columns = rc = xo_vsnprintf(xop, xbp, newfmt, xop->xo_vap); if (rc > 0) { /* * For XML and HTML, we need "&<>" processing; for JSON, * it's quotes. Text gets nothing. */ switch (style) { case XO_STYLE_XML: if (flags & XFF_TRIM_WS) columns = rc = xo_trim_ws(xbp, rc); /* FALLTHRU */ case XO_STYLE_HTML: rc = xo_escape_xml(xbp, rc, (flags & XFF_ATTR)); break; case XO_STYLE_JSON: if (flags & XFF_TRIM_WS) columns = rc = xo_trim_ws(xbp, rc); rc = xo_escape_json(xbp, rc, 0); break; case XO_STYLE_SDPARAMS: if (flags & XFF_TRIM_WS) columns = rc = xo_trim_ws(xbp, rc); rc = xo_escape_sdparams(xbp, rc, 0); break; case XO_STYLE_ENCODER: if (flags & XFF_TRIM_WS) columns = rc = xo_trim_ws(xbp, rc); break; } /* * We can assume all the non-%s data we've * added is ASCII, so the columns and bytes are the * same. xo_format_string handles all the fancy * string conversions and updates xo_anchor_columns * accordingly. */ if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += columns; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += columns; } } if (rc > 0) xbp->xb_curp += rc; } /* * Now for the tricky part: we need to move the argument pointer * along by the amount needed. */ if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) { if (xf.xf_fc == 's' ||xf.xf_fc == 'S') { /* * The 'S' and 's' formats are normally handled in * xo_format_string, but if we skipped it, then we * need to pop it. */ if (xf.xf_skip) va_arg(xop->xo_vap, char *); } else if (xf.xf_fc == 'm') { /* Nothing on the stack for "%m" */ } else { int s; for (s = 0; s < XF_WIDTH_NUM; s++) { if (xf.xf_star[s]) va_arg(xop->xo_vap, int); } if (strchr("diouxXDOU", xf.xf_fc) != NULL) { if (xf.xf_hflag > 1) { va_arg(xop->xo_vap, int); } else if (xf.xf_hflag > 0) { va_arg(xop->xo_vap, int); } else if (xf.xf_lflag > 1) { va_arg(xop->xo_vap, unsigned long long); } else if (xf.xf_lflag > 0) { va_arg(xop->xo_vap, unsigned long); } else if (xf.xf_jflag > 0) { va_arg(xop->xo_vap, intmax_t); } else if (xf.xf_tflag > 0) { va_arg(xop->xo_vap, ptrdiff_t); } else if (xf.xf_zflag > 0) { va_arg(xop->xo_vap, size_t); } else if (xf.xf_qflag > 0) { va_arg(xop->xo_vap, quad_t); } else { va_arg(xop->xo_vap, int); } } else if (strchr("eEfFgGaA", xf.xf_fc) != NULL) if (xf.xf_lflag) va_arg(xop->xo_vap, long double); else va_arg(xop->xo_vap, double); else if (xf.xf_fc == 'C' || (xf.xf_fc == 'c' && xf.xf_lflag)) va_arg(xop->xo_vap, wint_t); else if (xf.xf_fc == 'c') va_arg(xop->xo_vap, int); else if (xf.xf_fc == 'p') va_arg(xop->xo_vap, void *); } } } if (xp) { if (make_output) { cols = xo_format_string_direct(xop, xbp, flags | XFF_UNESCAPE, NULL, xp, cp - xp, -1, need_enc, XF_ENC_UTF8); if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += cols; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += cols; } xp = NULL; } if (flags & XFF_GT_FLAGS) { /* * Handle gettext()ing the field by looking up the value * and then copying it in, while converting to locale, if * needed. */ ssize_t new_cols = xo_format_gettext(xop, flags, start_offset, old_cols, real_need_enc); if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += new_cols - old_cols; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += new_cols - old_cols; } return 0; } /* * Remove any numeric precision/width format from the format string by * inserting the "%" after the [0-9]+, returning the substring. */ static char * xo_fix_encoding (xo_handle_t *xop UNUSED, char *encoding) { char *cp = encoding; if (cp[0] != '%' || !isdigit((int) cp[1])) return encoding; for (cp += 2; *cp; cp++) { if (!isdigit((int) *cp)) break; } *--cp = '%'; /* Back off and insert the '%' */ return cp; } static void xo_color_append_html (xo_handle_t *xop) { /* * If the color buffer has content, we add it now. It's already * prebuilt and ready, since we want to add it to every
. */ if (!xo_buf_is_empty(&xop->xo_color_buf)) { xo_buffer_t *xbp = &xop->xo_color_buf; xo_data_append(xop, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp); } } /* * A wrapper for humanize_number that autoscales, since the * HN_AUTOSCALE flag scales as needed based on the size of * the output buffer, not the size of the value. I also * wish HN_DECIMAL was more imperative, without the <10 * test. But the boat only goes where we want when we hold * the rudder, so xo_humanize fixes part of the problem. */ static ssize_t xo_humanize (char *buf, ssize_t len, uint64_t value, int flags) { int scale = 0; if (value) { uint64_t left = value; if (flags & HN_DIVISOR_1000) { for ( ; left; scale++) left /= 1000; } else { for ( ; left; scale++) left /= 1024; } scale -= 1; } return xo_humanize_number(buf, len, value, "", scale, flags); } /* * This is an area where we can save information from the handle for * later restoration. We need to know what data was rendered to know * what needs cleaned up. */ typedef struct xo_humanize_save_s { ssize_t xhs_offset; /* Saved xo_offset */ ssize_t xhs_columns; /* Saved xo_columns */ ssize_t xhs_anchor_columns; /* Saved xo_anchor_columns */ } xo_humanize_save_t; /* * Format a "humanized" value for a numeric, meaning something nice * like "44M" instead of "44470272". We autoscale, choosing the * most appropriate value for K/M/G/T/P/E based on the value given. */ static void xo_format_humanize (xo_handle_t *xop, xo_buffer_t *xbp, xo_humanize_save_t *savep, xo_xff_flags_t flags) { if (XOF_ISSET(xop, XOF_NO_HUMANIZE)) return; ssize_t end_offset = xbp->xb_curp - xbp->xb_bufp; if (end_offset == savep->xhs_offset) /* Huh? Nothing to render */ return; /* * We have a string that's allegedly a number. We want to * humanize it, which means turning it back into a number * and calling xo_humanize_number on it. */ uint64_t value; char *ep; xo_buf_append(xbp, "", 1); /* NUL-terminate it */ value = strtoull(xbp->xb_bufp + savep->xhs_offset, &ep, 0); if (!(value == ULLONG_MAX && errno == ERANGE) && (ep != xbp->xb_bufp + savep->xhs_offset)) { /* * There are few values where humanize_number needs * more bytes than the original value. I've used * 10 as a rectal number to cover those scenarios. */ if (xo_buf_has_room(xbp, 10)) { xbp->xb_curp = xbp->xb_bufp + savep->xhs_offset; ssize_t rc; ssize_t left = (xbp->xb_bufp + xbp->xb_size) - xbp->xb_curp; int hn_flags = HN_NOSPACE; /* On by default */ if (flags & XFF_HN_SPACE) hn_flags &= ~HN_NOSPACE; if (flags & XFF_HN_DECIMAL) hn_flags |= HN_DECIMAL; if (flags & XFF_HN_1000) hn_flags |= HN_DIVISOR_1000; rc = xo_humanize(xbp->xb_curp, left, value, hn_flags); if (rc > 0) { xbp->xb_curp += rc; xop->xo_columns = savep->xhs_columns + rc; xop->xo_anchor_columns = savep->xhs_anchor_columns + rc; } } } } /* * Convenience function that either append a fixed value (if one is * given) or formats a field using a format string. If it's * encode_only, then we can't skip formatting the field, since it may * be pulling arguments off the stack. */ static inline void xo_simple_field (xo_handle_t *xop, unsigned encode_only, const char *value, ssize_t vlen, const char *fmt, ssize_t flen, xo_xff_flags_t flags) { if (encode_only) flags |= XFF_NO_OUTPUT; if (vlen == 0) xo_do_format_field(xop, NULL, fmt, flen, flags); else if (!encode_only) xo_data_append_content(xop, value, vlen, flags); } /* * Html mode: append a
to the output buffer contain a field * along with all the supporting information indicated by the flags. */ static void xo_buf_append_div (xo_handle_t *xop, const char *class, xo_xff_flags_t flags, const char *name, ssize_t nlen, const char *value, ssize_t vlen, const char *fmt, ssize_t flen, const char *encoding, ssize_t elen) { static char div_start[] = "
"; static char div_close[] = "
"; /* The encoding format defaults to the normal format */ if (encoding == NULL && fmt != NULL) { char *enc = alloca(flen + 1); memcpy(enc, fmt, flen); enc[flen] = '\0'; encoding = xo_fix_encoding(xop, enc); elen = strlen(encoding); } /* * To build our XPath predicate, we need to save the va_list before * we format our data, and then restore it before we format the * xpath expression. * Display-only keys implies that we've got an encode-only key * elsewhere, so we don't use them from making predicates. */ int need_predidate = (name && (flags & XFF_KEY) && !(flags & XFF_DISPLAY_ONLY) && XOF_ISSET(xop, XOF_XPATH)) ? 1 : 0; if (need_predidate) { va_list va_local; va_copy(va_local, xop->xo_vap); if (xop->xo_checkpointer) xop->xo_checkpointer(xop, xop->xo_vap, 0); /* * Build an XPath predicate expression to match this key. * We use the format buffer. */ xo_buffer_t *pbp = &xop->xo_predicate; pbp->xb_curp = pbp->xb_bufp; /* Restart buffer */ xo_buf_append(pbp, "[", 1); xo_buf_escape(xop, pbp, name, nlen, 0); if (XOF_ISSET(xop, XOF_PRETTY)) xo_buf_append(pbp, " = '", 4); else xo_buf_append(pbp, "='", 2); xo_xff_flags_t pflags = flags | XFF_XML | XFF_ATTR; pflags &= ~(XFF_NO_OUTPUT | XFF_ENCODE_ONLY); xo_do_format_field(xop, pbp, encoding, elen, pflags); xo_buf_append(pbp, "']", 2); /* Now we record this predicate expression in the stack */ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; ssize_t olen = xsp->xs_keys ? strlen(xsp->xs_keys) : 0; ssize_t dlen = pbp->xb_curp - pbp->xb_bufp; char *cp = xo_realloc(xsp->xs_keys, olen + dlen + 1); if (cp) { memcpy(cp + olen, pbp->xb_bufp, dlen); cp[olen + dlen] = '\0'; xsp->xs_keys = cp; } /* Now we reset the xo_vap as if we were never here */ va_end(xop->xo_vap); va_copy(xop->xo_vap, va_local); va_end(va_local); if (xop->xo_checkpointer) xop->xo_checkpointer(xop, xop->xo_vap, 1); } if (flags & XFF_ENCODE_ONLY) { /* * Even if this is encode-only, we need to go through the * work of formatting it to make sure the args are cleared * from xo_vap. This is not true when vlen is zero, since * that means our "value" isn't on the stack. */ xo_simple_field(xop, TRUE, NULL, 0, encoding, elen, flags); return; } xo_line_ensure_open(xop, 0); if (XOF_ISSET(xop, XOF_PRETTY)) xo_buf_indent(xop, xop->xo_indent_by); xo_data_append(xop, div_start, sizeof(div_start) - 1); xo_data_append(xop, class, strlen(class)); /* * If the color buffer has content, we add it now. It's already * prebuilt and ready, since we want to add it to every
. */ if (!xo_buf_is_empty(&xop->xo_color_buf)) { xo_buffer_t *xbp = &xop->xo_color_buf; xo_data_append(xop, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp); } if (name) { xo_data_append(xop, div_tag, sizeof(div_tag) - 1); xo_data_escape(xop, name, nlen); /* * Save the offset at which we'd place units. See xo_format_units. */ if (XOF_ISSET(xop, XOF_UNITS)) { XOIF_SET(xop, XOIF_UNITS_PENDING); /* * Note: We need the '+1' here because we know we've not * added the closing quote. We add one, knowing the quote * will be added shortly. */ xop->xo_units_offset = xop->xo_data.xb_curp -xop->xo_data.xb_bufp + 1; } if (XOF_ISSET(xop, XOF_XPATH)) { int i; xo_stack_t *xsp; xo_data_append(xop, div_xpath, sizeof(div_xpath) - 1); if (xop->xo_leading_xpath) xo_data_append(xop, xop->xo_leading_xpath, strlen(xop->xo_leading_xpath)); for (i = 0; i <= xop->xo_depth; i++) { xsp = &xop->xo_stack[i]; if (xsp->xs_name == NULL) continue; /* * XSS_OPEN_LIST and XSS_OPEN_LEAF_LIST stack frames * are directly under XSS_OPEN_INSTANCE frames so we * don't need to put these in our XPath expressions. */ if (xsp->xs_state == XSS_OPEN_LIST || xsp->xs_state == XSS_OPEN_LEAF_LIST) continue; xo_data_append(xop, "/", 1); xo_data_escape(xop, xsp->xs_name, strlen(xsp->xs_name)); if (xsp->xs_keys) { /* Don't show keys for the key field */ if (i != xop->xo_depth || !(flags & XFF_KEY)) xo_data_append(xop, xsp->xs_keys, strlen(xsp->xs_keys)); } } xo_data_append(xop, "/", 1); xo_data_escape(xop, name, nlen); } if (XOF_ISSET(xop, XOF_INFO) && xop->xo_info) { static char in_type[] = "\" data-type=\""; static char in_help[] = "\" data-help=\""; xo_info_t *xip = xo_info_find(xop, name, nlen); if (xip) { if (xip->xi_type) { xo_data_append(xop, in_type, sizeof(in_type) - 1); xo_data_escape(xop, xip->xi_type, strlen(xip->xi_type)); } if (xip->xi_help) { xo_data_append(xop, in_help, sizeof(in_help) - 1); xo_data_escape(xop, xip->xi_help, strlen(xip->xi_help)); } } } if ((flags & XFF_KEY) && XOF_ISSET(xop, XOF_KEYS)) xo_data_append(xop, div_key, sizeof(div_key) - 1); } xo_buffer_t *xbp = &xop->xo_data; ssize_t base_offset = xbp->xb_curp - xbp->xb_bufp; xo_data_append(xop, div_end, sizeof(div_end) - 1); xo_humanize_save_t save; /* Save values for humanizing logic */ save.xhs_offset = xbp->xb_curp - xbp->xb_bufp; save.xhs_columns = xop->xo_columns; save.xhs_anchor_columns = xop->xo_anchor_columns; xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); if (flags & XFF_HUMANIZE) { /* * Unlike text style, we want to retain the original value and * stuff it into the "data-number" attribute. */ static const char div_number[] = "\" data-number=\""; ssize_t div_len = sizeof(div_number) - 1; ssize_t end_offset = xbp->xb_curp - xbp->xb_bufp; ssize_t olen = end_offset - save.xhs_offset; char *cp = alloca(olen + 1); memcpy(cp, xbp->xb_bufp + save.xhs_offset, olen); cp[olen] = '\0'; xo_format_humanize(xop, xbp, &save, flags); if (xo_buf_has_room(xbp, div_len + olen)) { ssize_t new_offset = xbp->xb_curp - xbp->xb_bufp; /* Move the humanized string off to the left */ memmove(xbp->xb_bufp + base_offset + div_len + olen, xbp->xb_bufp + base_offset, new_offset - base_offset); /* Copy the data_number attribute name */ memcpy(xbp->xb_bufp + base_offset, div_number, div_len); /* Copy the original long value */ memcpy(xbp->xb_bufp + base_offset + div_len, cp, olen); xbp->xb_curp += div_len + olen; } } xo_data_append(xop, div_close, sizeof(div_close) - 1); if (XOF_ISSET(xop, XOF_PRETTY)) xo_data_append(xop, "\n", 1); } static void xo_format_text (xo_handle_t *xop, const char *str, ssize_t len) { switch (xo_style(xop)) { case XO_STYLE_TEXT: xo_buf_append_locale(xop, &xop->xo_data, str, len); break; case XO_STYLE_HTML: xo_buf_append_div(xop, "text", 0, NULL, 0, str, len, NULL, 0, NULL, 0); break; } } static void xo_format_title (xo_handle_t *xop, xo_field_info_t *xfip, const char *value, ssize_t vlen) { const char *fmt = xfip->xfi_format; ssize_t flen = xfip->xfi_flen; xo_xff_flags_t flags = xfip->xfi_flags; static char div_open[] = "
"; static char div_close[] = "
"; if (flen == 0) { fmt = "%s"; flen = 2; } switch (xo_style(xop)) { case XO_STYLE_XML: case XO_STYLE_JSON: case XO_STYLE_SDPARAMS: case XO_STYLE_ENCODER: /* * Even though we don't care about text, we need to do * enough parsing work to skip over the right bits of xo_vap. */ xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); return; } xo_buffer_t *xbp = &xop->xo_data; ssize_t start = xbp->xb_curp - xbp->xb_bufp; ssize_t left = xbp->xb_size - start; ssize_t rc; if (xo_style(xop) == XO_STYLE_HTML) { xo_line_ensure_open(xop, 0); if (XOF_ISSET(xop, XOF_PRETTY)) xo_buf_indent(xop, xop->xo_indent_by); xo_buf_append(&xop->xo_data, div_open, sizeof(div_open) - 1); xo_color_append_html(xop); xo_buf_append(&xop->xo_data, div_middle, sizeof(div_middle) - 1); } start = xbp->xb_curp - xbp->xb_bufp; /* Reset start */ if (vlen) { char *newfmt = alloca(flen + 1); memcpy(newfmt, fmt, flen); newfmt[flen] = '\0'; /* If len is non-zero, the format string apply to the name */ char *newstr = alloca(vlen + 1); memcpy(newstr, value, vlen); newstr[vlen] = '\0'; if (newstr[vlen - 1] == 's') { char *bp; rc = snprintf(NULL, 0, newfmt, newstr); if (rc > 0) { /* * We have to do this the hard way, since we might need * the columns. */ bp = alloca(rc + 1); rc = snprintf(bp, rc + 1, newfmt, newstr); xo_data_append_content(xop, bp, rc, flags); } goto move_along; } else { rc = snprintf(xbp->xb_curp, left, newfmt, newstr); if (rc >= left) { if (!xo_buf_has_room(xbp, rc)) return; left = xbp->xb_size - (xbp->xb_curp - xbp->xb_bufp); rc = snprintf(xbp->xb_curp, left, newfmt, newstr); } if (rc > 0) { if (XOF_ISSET(xop, XOF_COLUMNS)) xop->xo_columns += rc; if (XOIF_ISSET(xop, XOIF_ANCHOR)) xop->xo_anchor_columns += rc; } } } else { xo_do_format_field(xop, NULL, fmt, flen, flags); /* xo_do_format_field moved curp, so we need to reset it */ rc = xbp->xb_curp - (xbp->xb_bufp + start); xbp->xb_curp = xbp->xb_bufp + start; } /* If we're styling HTML, then we need to escape it */ if (xo_style(xop) == XO_STYLE_HTML) { rc = xo_escape_xml(xbp, rc, 0); } if (rc > 0) xbp->xb_curp += rc; move_along: if (xo_style(xop) == XO_STYLE_HTML) { xo_data_append(xop, div_close, sizeof(div_close) - 1); if (XOF_ISSET(xop, XOF_PRETTY)) xo_data_append(xop, "\n", 1); } } /* * strspn() with a string length */ static ssize_t xo_strnspn (const char *str, size_t len, const char *accept) { ssize_t i; const char *cp, *ep; for (i = 0, cp = str, ep = str + len; cp < ep && *cp != '\0'; i++, cp++) { if (strchr(accept, *cp) == NULL) break; } return i; } /* * Decide if a format string should be considered "numeric", * in the sense that the number does not need to be quoted. * This means that it consists only of a single numeric field * with nothing exotic or "interesting". This means that * static values are never considered numeric. */ static int xo_format_is_numeric (const char *fmt, ssize_t flen) { if (flen <= 0 || *fmt++ != '%') /* Must start with '%' */ return FALSE; flen -= 1; /* Handle leading flags; don't want "#" since JSON can't handle hex */ ssize_t spn = xo_strnspn(fmt, flen, "0123456789.*+ -"); if (spn >= flen) return FALSE; fmt += spn; /* Move along the input string */ flen -= spn; /* Handle the length modifiers */ spn = xo_strnspn(fmt, flen, "hljtqz"); if (spn >= flen) return FALSE; fmt += spn; /* Move along the input string */ flen -= spn; if (flen != 1) /* Should only be one character left */ return FALSE; return (strchr("diouDOUeEfFgG", *fmt) == NULL) ? FALSE : TRUE; } /* * Update the stack flags using the object flags, allowing callers * to monkey with the stack flags without even knowing they exist. */ static void xo_stack_set_flags (xo_handle_t *xop) { if (XOF_ISSET(xop, XOF_NOT_FIRST)) { xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; xsp->xs_flags |= XSF_NOT_FIRST; XOF_CLEAR(xop, XOF_NOT_FIRST); } } static void xo_format_prep (xo_handle_t *xop, xo_xff_flags_t flags) { if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) { xo_data_append(xop, ",", 1); if (!(flags & XFF_LEAF_LIST) && XOF_ISSET(xop, XOF_PRETTY)) xo_data_append(xop, "\n", 1); } else xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; } #if 0 /* Useful debugging function */ void xo_arg (xo_handle_t *xop); void xo_arg (xo_handle_t *xop) { xop = xo_default(xop); fprintf(stderr, "0x%x", va_arg(xop->xo_vap, unsigned)); } #endif /* 0 */ static void xo_format_value (xo_handle_t *xop, const char *name, ssize_t nlen, const char *value, ssize_t vlen, const char *fmt, ssize_t flen, const char *encoding, ssize_t elen, xo_xff_flags_t flags) { int pretty = XOF_ISSET(xop, XOF_PRETTY); int quote; /* * Before we emit a value, we need to know that the frame is ready. */ xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; if (flags & XFF_LEAF_LIST) { /* * Check if we've already started to emit normal leafs * or if we're not in a leaf list. */ if ((xsp->xs_flags & (XSF_EMIT | XSF_EMIT_KEY)) || !(xsp->xs_flags & XSF_EMIT_LEAF_LIST)) { char nbuf[nlen + 1]; memcpy(nbuf, name, nlen); nbuf[nlen] = '\0'; ssize_t rc = xo_transition(xop, 0, nbuf, XSS_EMIT_LEAF_LIST); if (rc < 0) flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY; else xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT_LEAF_LIST; } xsp = &xop->xo_stack[xop->xo_depth]; if (xsp->xs_name) { name = xsp->xs_name; nlen = strlen(name); } } else if (flags & XFF_KEY) { /* Emitting a 'k' (key) field */ if ((xsp->xs_flags & XSF_EMIT) && !(flags & XFF_DISPLAY_ONLY)) { xo_failure(xop, "key field emitted after normal value field: '%.*s'", nlen, name); } else if (!(xsp->xs_flags & XSF_EMIT_KEY)) { char nbuf[nlen + 1]; memcpy(nbuf, name, nlen); nbuf[nlen] = '\0'; ssize_t rc = xo_transition(xop, 0, nbuf, XSS_EMIT); if (rc < 0) flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY; else xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT_KEY; xsp = &xop->xo_stack[xop->xo_depth]; xsp->xs_flags |= XSF_EMIT_KEY; } } else { /* Emitting a normal value field */ if ((xsp->xs_flags & XSF_EMIT_LEAF_LIST) || !(xsp->xs_flags & XSF_EMIT)) { char nbuf[nlen + 1]; memcpy(nbuf, name, nlen); nbuf[nlen] = '\0'; ssize_t rc = xo_transition(xop, 0, nbuf, XSS_EMIT); if (rc < 0) flags |= XFF_DISPLAY_ONLY | XFF_ENCODE_ONLY; else xop->xo_stack[xop->xo_depth].xs_flags |= XSF_EMIT; xsp = &xop->xo_stack[xop->xo_depth]; xsp->xs_flags |= XSF_EMIT; } } xo_buffer_t *xbp = &xop->xo_data; xo_humanize_save_t save; /* Save values for humanizing logic */ const char *leader = xo_xml_leader_len(xop, name, nlen); switch (xo_style(xop)) { case XO_STYLE_TEXT: if (flags & XFF_ENCODE_ONLY) flags |= XFF_NO_OUTPUT; save.xhs_offset = xbp->xb_curp - xbp->xb_bufp; save.xhs_columns = xop->xo_columns; save.xhs_anchor_columns = xop->xo_anchor_columns; xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); if (flags & XFF_HUMANIZE) xo_format_humanize(xop, xbp, &save, flags); break; case XO_STYLE_HTML: if (flags & XFF_ENCODE_ONLY) flags |= XFF_NO_OUTPUT; xo_buf_append_div(xop, "data", flags, name, nlen, value, vlen, fmt, flen, encoding, elen); break; case XO_STYLE_XML: /* * Even though we're not making output, we still need to * let the formatting code handle the va_arg popping. */ if (flags & XFF_DISPLAY_ONLY) { xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); break; } if (encoding) { fmt = encoding; flen = elen; } else { char *enc = alloca(flen + 1); memcpy(enc, fmt, flen); enc[flen] = '\0'; fmt = xo_fix_encoding(xop, enc); flen = strlen(fmt); } if (nlen == 0) { static char missing[] = "missing-field-name"; xo_failure(xop, "missing field name: %s", fmt); name = missing; nlen = sizeof(missing) - 1; } if (pretty) xo_buf_indent(xop, -1); xo_data_append(xop, "<", 1); if (*leader) xo_data_append(xop, leader, 1); xo_data_escape(xop, name, nlen); if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) { xo_data_append(xop, xop->xo_attrs.xb_bufp, xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp); xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp; } /* * We indicate 'key' fields using the 'key' attribute. While * this is really committing the crime of mixing meta-data with * data, it's often useful. Especially when format meta-data is * difficult to come by. */ if ((flags & XFF_KEY) && XOF_ISSET(xop, XOF_KEYS)) { static char attr[] = " key=\"key\""; xo_data_append(xop, attr, sizeof(attr) - 1); } /* * Save the offset at which we'd place units. See xo_format_units. */ if (XOF_ISSET(xop, XOF_UNITS)) { XOIF_SET(xop, XOIF_UNITS_PENDING); xop->xo_units_offset = xop->xo_data.xb_curp -xop->xo_data.xb_bufp; } xo_data_append(xop, ">", 1); xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); xo_data_append(xop, "", 1); if (pretty) xo_data_append(xop, "\n", 1); break; case XO_STYLE_JSON: if (flags & XFF_DISPLAY_ONLY) { xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); break; } if (encoding) { fmt = encoding; flen = elen; } else { char *enc = alloca(flen + 1); memcpy(enc, fmt, flen); enc[flen] = '\0'; fmt = xo_fix_encoding(xop, enc); flen = strlen(fmt); } xo_stack_set_flags(xop); int first = (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) ? 0 : 1; xo_format_prep(xop, flags); if (flags & XFF_QUOTE) quote = 1; else if (flags & XFF_NOQUOTE) quote = 0; else if (vlen != 0) quote = 1; else if (flen == 0) { quote = 0; fmt = "true"; /* JSON encodes empty tags as a boolean true */ flen = 4; } else if (xo_format_is_numeric(fmt, flen)) quote = 0; else quote = 1; if (nlen == 0) { static char missing[] = "missing-field-name"; xo_failure(xop, "missing field name: %s", fmt); name = missing; nlen = sizeof(missing) - 1; } if (flags & XFF_LEAF_LIST) { if (!first && pretty) xo_data_append(xop, "\n", 1); if (pretty) xo_buf_indent(xop, -1); } else { if (pretty) xo_buf_indent(xop, -1); xo_data_append(xop, "\"", 1); xbp = &xop->xo_data; ssize_t off = xbp->xb_curp - xbp->xb_bufp; xo_data_escape(xop, name, nlen); if (XOF_ISSET(xop, XOF_UNDERSCORES)) { ssize_t coff = xbp->xb_curp - xbp->xb_bufp; for ( ; off < coff; off++) if (xbp->xb_bufp[off] == '-') xbp->xb_bufp[off] = '_'; } xo_data_append(xop, "\":", 2); if (pretty) xo_data_append(xop, " ", 1); } if (quote) xo_data_append(xop, "\"", 1); xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); if (quote) xo_data_append(xop, "\"", 1); break; case XO_STYLE_SDPARAMS: if (flags & XFF_DISPLAY_ONLY) { xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); break; } if (encoding) { fmt = encoding; flen = elen; } else { char *enc = alloca(flen + 1); memcpy(enc, fmt, flen); enc[flen] = '\0'; fmt = xo_fix_encoding(xop, enc); flen = strlen(fmt); } if (nlen == 0) { static char missing[] = "missing-field-name"; xo_failure(xop, "missing field name: %s", fmt); name = missing; nlen = sizeof(missing) - 1; } xo_data_escape(xop, name, nlen); xo_data_append(xop, "=\"", 2); xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); xo_data_append(xop, "\" ", 2); break; case XO_STYLE_ENCODER: if (flags & XFF_DISPLAY_ONLY) { xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); break; } if (flags & XFF_QUOTE) quote = 1; else if (flags & XFF_NOQUOTE) quote = 0; else if (flen == 0) { quote = 0; fmt = "true"; /* JSON encodes empty tags as a boolean true */ flen = 4; } else if (strchr("diouxXDOUeEfFgGaAcCp", fmt[flen - 1]) == NULL) quote = 1; else quote = 0; if (encoding) { fmt = encoding; flen = elen; } else { char *enc = alloca(flen + 1); memcpy(enc, fmt, flen); enc[flen] = '\0'; fmt = xo_fix_encoding(xop, enc); flen = strlen(fmt); } if (nlen == 0) { static char missing[] = "missing-field-name"; xo_failure(xop, "missing field name: %s", fmt); name = missing; nlen = sizeof(missing) - 1; } ssize_t name_offset = xo_buf_offset(&xop->xo_data); xo_data_append(xop, name, nlen); xo_data_append(xop, "", 1); ssize_t value_offset = xo_buf_offset(&xop->xo_data); xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); xo_data_append(xop, "", 1); xo_encoder_handle(xop, quote ? XO_OP_STRING : XO_OP_CONTENT, xo_buf_data(&xop->xo_data, name_offset), xo_buf_data(&xop->xo_data, value_offset), flags); xo_buf_reset(&xop->xo_data); break; } } static void xo_set_gettext_domain (xo_handle_t *xop, xo_field_info_t *xfip, const char *str, ssize_t len) { const char *fmt = xfip->xfi_format; ssize_t flen = xfip->xfi_flen; /* Start by discarding previous domain */ if (xop->xo_gt_domain) { xo_free(xop->xo_gt_domain); xop->xo_gt_domain = NULL; } /* An empty {G:} means no domainname */ if (len == 0 && flen == 0) return; ssize_t start_offset = -1; if (len == 0 && flen != 0) { /* Need to do format the data to get the domainname from args */ start_offset = xop->xo_data.xb_curp - xop->xo_data.xb_bufp; xo_do_format_field(xop, NULL, fmt, flen, 0); ssize_t end_offset = xop->xo_data.xb_curp - xop->xo_data.xb_bufp; len = end_offset - start_offset; str = xop->xo_data.xb_bufp + start_offset; } xop->xo_gt_domain = xo_strndup(str, len); /* Reset the current buffer point to avoid emitting the name as output */ if (start_offset >= 0) xop->xo_data.xb_curp = xop->xo_data.xb_bufp + start_offset; } static void xo_format_content (xo_handle_t *xop, const char *class_name, const char *tag_name, const char *value, ssize_t vlen, const char *fmt, ssize_t flen, xo_xff_flags_t flags) { switch (xo_style(xop)) { case XO_STYLE_TEXT: xo_simple_field(xop, FALSE, value, vlen, fmt, flen, flags); break; case XO_STYLE_HTML: xo_buf_append_div(xop, class_name, flags, NULL, 0, value, vlen, fmt, flen, NULL, 0); break; case XO_STYLE_XML: case XO_STYLE_JSON: case XO_STYLE_SDPARAMS: if (tag_name) { xo_open_container_h(xop, tag_name); xo_format_value(xop, "message", 7, value, vlen, fmt, flen, NULL, 0, flags); xo_close_container_h(xop, tag_name); } else { /* * Even though we don't care about labels, we need to do * enough parsing work to skip over the right bits of xo_vap. */ xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); } break; case XO_STYLE_ENCODER: xo_simple_field(xop, TRUE, value, vlen, fmt, flen, flags); break; } } static const char *xo_color_names[] = { "default", /* XO_COL_DEFAULT */ "black", /* XO_COL_BLACK */ "red", /* XO_CLOR_RED */ "green", /* XO_COL_GREEN */ "yellow", /* XO_COL_YELLOW */ "blue", /* XO_COL_BLUE */ "magenta", /* XO_COL_MAGENTA */ "cyan", /* XO_COL_CYAN */ "white", /* XO_COL_WHITE */ NULL }; static int xo_color_find (const char *str) { int i; for (i = 0; xo_color_names[i]; i++) { if (xo_streq(xo_color_names[i], str)) return i; } return -1; } static const char *xo_effect_names[] = { "reset", /* XO_EFF_RESET */ "normal", /* XO_EFF_NORMAL */ "bold", /* XO_EFF_BOLD */ "underline", /* XO_EFF_UNDERLINE */ "inverse", /* XO_EFF_INVERSE */ NULL }; static const char *xo_effect_on_codes[] = { "0", /* XO_EFF_RESET */ "0", /* XO_EFF_NORMAL */ "1", /* XO_EFF_BOLD */ "4", /* XO_EFF_UNDERLINE */ "7", /* XO_EFF_INVERSE */ NULL }; #if 0 /* * See comment below re: joy of terminal standards. These can * be use by just adding: * + if (newp->xoc_effects & bit) * code = xo_effect_on_codes[i]; * + else * + code = xo_effect_off_codes[i]; * in xo_color_handle_text. */ static const char *xo_effect_off_codes[] = { "0", /* XO_EFF_RESET */ "0", /* XO_EFF_NORMAL */ "21", /* XO_EFF_BOLD */ "24", /* XO_EFF_UNDERLINE */ "27", /* XO_EFF_INVERSE */ NULL }; #endif /* 0 */ static int xo_effect_find (const char *str) { int i; for (i = 0; xo_effect_names[i]; i++) { if (xo_streq(xo_effect_names[i], str)) return i; } return -1; } static void xo_colors_parse (xo_handle_t *xop, xo_colors_t *xocp, char *str) { if (xo_text_only()) return; char *cp, *ep, *np, *xp; ssize_t len = strlen(str); int rc; /* * Possible tokens: colors, bg-colors, effects, no-effects, "reset". */ for (cp = str, ep = cp + len - 1; cp && cp < ep; cp = np) { /* Trim leading whitespace */ while (isspace((int) *cp)) cp += 1; np = strchr(cp, ','); if (np) *np++ = '\0'; /* Trim trailing whitespace */ xp = cp + strlen(cp) - 1; while (isspace(*xp) && xp > cp) *xp-- = '\0'; if (cp[0] == 'f' && cp[1] == 'g' && cp[2] == '-') { rc = xo_color_find(cp + 3); if (rc < 0) goto unknown; xocp->xoc_col_fg = rc; } else if (cp[0] == 'b' && cp[1] == 'g' && cp[2] == '-') { rc = xo_color_find(cp + 3); if (rc < 0) goto unknown; xocp->xoc_col_bg = rc; } else if (cp[0] == 'n' && cp[1] == 'o' && cp[2] == '-') { rc = xo_effect_find(cp + 3); if (rc < 0) goto unknown; xocp->xoc_effects &= ~(1 << rc); } else { rc = xo_effect_find(cp); if (rc < 0) goto unknown; xocp->xoc_effects |= 1 << rc; switch (1 << rc) { case XO_EFF_RESET: xocp->xoc_col_fg = xocp->xoc_col_bg = 0; /* Note: not "|=" since we want to wipe out the old value */ xocp->xoc_effects = XO_EFF_RESET; break; case XO_EFF_NORMAL: xocp->xoc_effects &= ~(XO_EFF_BOLD | XO_EFF_UNDERLINE | XO_EFF_INVERSE | XO_EFF_NORMAL); break; } } continue; unknown: if (XOF_ISSET(xop, XOF_WARN)) xo_failure(xop, "unknown color/effect string detected: '%s'", cp); } } static inline int xo_colors_enabled (xo_handle_t *xop UNUSED) { #ifdef LIBXO_TEXT_ONLY return 0; #else /* LIBXO_TEXT_ONLY */ return XOF_ISSET(xop, XOF_COLOR); #endif /* LIBXO_TEXT_ONLY */ } /* * If the color map is in use (--libxo colors=xxxx), then update * the incoming foreground and background colors from the map. */ static void xo_colors_update (xo_handle_t *xop UNUSED, xo_colors_t *newp UNUSED) { #ifndef LIBXO_TEXT_ONLY xo_color_t fg = newp->xoc_col_fg; if (XOF_ISSET(xop, XOF_COLOR_MAP) && fg < XO_NUM_COLORS) fg = xop->xo_color_map_fg[fg]; /* Fetch from color map */ newp->xoc_col_fg = fg; xo_color_t bg = newp->xoc_col_bg; if (XOF_ISSET(xop, XOF_COLOR_MAP) && bg < XO_NUM_COLORS) bg = xop->xo_color_map_bg[bg]; /* Fetch from color map */ newp->xoc_col_bg = bg; #endif /* LIBXO_TEXT_ONLY */ } static void xo_colors_handle_text (xo_handle_t *xop, xo_colors_t *newp) { char buf[BUFSIZ]; char *cp = buf, *ep = buf + sizeof(buf); unsigned i, bit; xo_colors_t *oldp = &xop->xo_colors; const char *code = NULL; /* * Start the buffer with an escape. We don't want to add the '[' * now, since we let xo_effect_text_add unconditionally add the ';'. * We'll replace the first ';' with a '[' when we're done. */ *cp++ = 0x1b; /* Escape */ /* * Terminals were designed back in the age before "certainty" was * invented, when standards were more what you'd call "guidelines" * than actual rules. Anyway we can't depend on them to operate * correctly. So when display attributes are changed, we punt, * reseting them all and turning back on the ones we want to keep. * Longer, but should be completely reliable. Savvy? */ if (oldp->xoc_effects != (newp->xoc_effects & oldp->xoc_effects)) { newp->xoc_effects |= XO_EFF_RESET; oldp->xoc_effects = 0; } for (i = 0, bit = 1; xo_effect_names[i]; i++, bit <<= 1) { if ((newp->xoc_effects & bit) == (oldp->xoc_effects & bit)) continue; code = xo_effect_on_codes[i]; cp += snprintf(cp, ep - cp, ";%s", code); if (cp >= ep) return; /* Should not occur */ if (bit == XO_EFF_RESET) { /* Mark up the old value so we can detect current values as new */ oldp->xoc_effects = 0; oldp->xoc_col_fg = oldp->xoc_col_bg = XO_COL_DEFAULT; } } xo_color_t fg = newp->xoc_col_fg; if (fg != oldp->xoc_col_fg) { cp += snprintf(cp, ep - cp, ";3%u", (fg != XO_COL_DEFAULT) ? fg - 1 : 9); } xo_color_t bg = newp->xoc_col_bg; if (bg != oldp->xoc_col_bg) { cp += snprintf(cp, ep - cp, ";4%u", (bg != XO_COL_DEFAULT) ? bg - 1 : 9); } if (cp - buf != 1 && cp < ep - 3) { buf[1] = '['; /* Overwrite leading ';' */ *cp++ = 'm'; *cp = '\0'; xo_buf_append(&xop->xo_data, buf, cp - buf); } } static void xo_colors_handle_html (xo_handle_t *xop, xo_colors_t *newp) { xo_colors_t *oldp = &xop->xo_colors; /* * HTML colors are mostly trivial: fill in xo_color_buf with * a set of class tags representing the colors and effects. */ /* If nothing changed, then do nothing */ if (oldp->xoc_effects == newp->xoc_effects && oldp->xoc_col_fg == newp->xoc_col_fg && oldp->xoc_col_bg == newp->xoc_col_bg) return; unsigned i, bit; xo_buffer_t *xbp = &xop->xo_color_buf; xo_buf_reset(xbp); /* We rebuild content after each change */ for (i = 0, bit = 1; xo_effect_names[i]; i++, bit <<= 1) { if (!(newp->xoc_effects & bit)) continue; xo_buf_append_str(xbp, " effect-"); xo_buf_append_str(xbp, xo_effect_names[i]); } const char *fg = NULL; const char *bg = NULL; if (newp->xoc_col_fg != XO_COL_DEFAULT) fg = xo_color_names[newp->xoc_col_fg]; if (newp->xoc_col_bg != XO_COL_DEFAULT) bg = xo_color_names[newp->xoc_col_bg]; if (newp->xoc_effects & XO_EFF_INVERSE) { const char *tmp = fg; fg = bg; bg = tmp; if (fg == NULL) fg = "inverse"; if (bg == NULL) bg = "inverse"; } if (fg) { xo_buf_append_str(xbp, " color-fg-"); xo_buf_append_str(xbp, fg); } if (bg) { xo_buf_append_str(xbp, " color-bg-"); xo_buf_append_str(xbp, bg); } } static void xo_format_colors (xo_handle_t *xop, xo_field_info_t *xfip, const char *value, ssize_t vlen) { const char *fmt = xfip->xfi_format; ssize_t flen = xfip->xfi_flen; xo_buffer_t xb; /* If the string is static and we've in an encoding style, bail */ if (vlen != 0 && xo_style_is_encoding(xop)) return; xo_buf_init(&xb); if (vlen) xo_buf_append(&xb, value, vlen); else if (flen) xo_do_format_field(xop, &xb, fmt, flen, 0); else xo_buf_append(&xb, "reset", 6); /* Default if empty */ if (xo_colors_enabled(xop)) { switch (xo_style(xop)) { case XO_STYLE_TEXT: case XO_STYLE_HTML: xo_buf_append(&xb, "", 1); xo_colors_t xoc = xop->xo_colors; xo_colors_parse(xop, &xoc, xb.xb_bufp); xo_colors_update(xop, &xoc); if (xo_style(xop) == XO_STYLE_TEXT) { /* * Text mode means emitting the colors as ANSI character * codes. This will allow people who like colors to have * colors. The issue is, of course conflicting with the * user's perfectly reasonable color scheme. Which leads * to the hell of LSCOLORS, where even app need to have * customization hooks for adjusting colors. Instead we * provide a simpler-but-still-annoying answer where one * can map colors to other colors. */ xo_colors_handle_text(xop, &xoc); xoc.xoc_effects &= ~XO_EFF_RESET; /* After handling it */ } else { /* * HTML output is wrapped in divs, so the color information * must appear in every div until cleared. Most pathetic. * Most unavoidable. */ xoc.xoc_effects &= ~XO_EFF_RESET; /* Before handling effects */ xo_colors_handle_html(xop, &xoc); } xop->xo_colors = xoc; break; case XO_STYLE_XML: case XO_STYLE_JSON: case XO_STYLE_SDPARAMS: case XO_STYLE_ENCODER: /* * Nothing to do; we did all that work just to clear the stack of * formatting arguments. */ break; } } xo_buf_cleanup(&xb); } static void xo_format_units (xo_handle_t *xop, xo_field_info_t *xfip, const char *value, ssize_t vlen) { const char *fmt = xfip->xfi_format; ssize_t flen = xfip->xfi_flen; xo_xff_flags_t flags = xfip->xfi_flags; static char units_start_xml[] = " units=\""; static char units_start_html[] = " data-units=\""; if (!XOIF_ISSET(xop, XOIF_UNITS_PENDING)) { xo_format_content(xop, "units", NULL, value, vlen, fmt, flen, flags); return; } xo_buffer_t *xbp = &xop->xo_data; ssize_t start = xop->xo_units_offset; ssize_t stop = xbp->xb_curp - xbp->xb_bufp; if (xo_style(xop) == XO_STYLE_XML) xo_buf_append(xbp, units_start_xml, sizeof(units_start_xml) - 1); else if (xo_style(xop) == XO_STYLE_HTML) xo_buf_append(xbp, units_start_html, sizeof(units_start_html) - 1); else return; if (vlen) xo_data_escape(xop, value, vlen); else xo_do_format_field(xop, NULL, fmt, flen, flags); xo_buf_append(xbp, "\"", 1); ssize_t now = xbp->xb_curp - xbp->xb_bufp; ssize_t delta = now - stop; if (delta <= 0) { /* Strange; no output to move */ xbp->xb_curp = xbp->xb_bufp + stop; /* Reset buffer to prior state */ return; } /* * Now we're in it alright. We've need to insert the unit value * we just created into the right spot. We make a local copy, * move it and then insert our copy. We know there's room in the * buffer, since we're just moving this around. */ char *buf = alloca(delta); memcpy(buf, xbp->xb_bufp + stop, delta); memmove(xbp->xb_bufp + start + delta, xbp->xb_bufp + start, stop - start); memmove(xbp->xb_bufp + start, buf, delta); } static ssize_t xo_find_width (xo_handle_t *xop, xo_field_info_t *xfip, const char *value, ssize_t vlen) { const char *fmt = xfip->xfi_format; ssize_t flen = xfip->xfi_flen; long width = 0; char *bp; char *cp; if (vlen) { bp = alloca(vlen + 1); /* Make local NUL-terminated copy of value */ memcpy(bp, value, vlen); bp[vlen] = '\0'; width = strtol(bp, &cp, 0); if (width == LONG_MIN || width == LONG_MAX || bp == cp || *cp != '\0') { width = 0; xo_failure(xop, "invalid width for anchor: '%s'", bp); } } else if (flen) { /* * We really expect the format for width to be "{:/%d}" or * "{:/%u}", so if that's the case, we just grab our width off * the argument list. But we need to avoid optimized logic if * there's a custom formatter. */ if (xop->xo_formatter == NULL && flen == 2 && strncmp("%d", fmt, flen) == 0) { if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) width = va_arg(xop->xo_vap, int); } else if (xop->xo_formatter == NULL && flen == 2 && strncmp("%u", fmt, flen) == 0) { if (!XOF_ISSET(xop, XOF_NO_VA_ARG)) width = va_arg(xop->xo_vap, unsigned); } else { /* * So we have a format and it's not a simple one like * "{:/%d}". That means we need to format the field, * extract the value from the formatted output, and then * discard that output. */ int anchor_was_set = FALSE; xo_buffer_t *xbp = &xop->xo_data; ssize_t start_offset = xo_buf_offset(xbp); bp = xo_buf_cur(xbp); /* Save start of the string */ cp = NULL; if (XOIF_ISSET(xop, XOIF_ANCHOR)) { XOIF_CLEAR(xop, XOIF_ANCHOR); anchor_was_set = TRUE; } ssize_t rc = xo_do_format_field(xop, xbp, fmt, flen, 0); if (rc >= 0) { xo_buf_append(xbp, "", 1); /* Append a NUL */ width = strtol(bp, &cp, 0); if (width == LONG_MIN || width == LONG_MAX || bp == cp || *cp != '\0') { width = 0; xo_failure(xop, "invalid width for anchor: '%s'", bp); } } /* Reset the cur pointer to where we found it */ xbp->xb_curp = xbp->xb_bufp + start_offset; if (anchor_was_set) XOIF_SET(xop, XOIF_ANCHOR); } } return width; } static void xo_anchor_clear (xo_handle_t *xop) { XOIF_CLEAR(xop, XOIF_ANCHOR); xop->xo_anchor_offset = 0; xop->xo_anchor_columns = 0; xop->xo_anchor_min_width = 0; } /* * An anchor is a marker used to delay field width implications. * Imagine the format string "{[:10}{min:%d}/{cur:%d}/{max:%d}{:]}". * We are looking for output like " 1/4/5" * * To make this work, we record the anchor and then return to * format it when the end anchor tag is seen. */ static void xo_anchor_start (xo_handle_t *xop, xo_field_info_t *xfip, const char *value, ssize_t vlen) { if (XOIF_ISSET(xop, XOIF_ANCHOR)) xo_failure(xop, "the anchor already recording is discarded"); XOIF_SET(xop, XOIF_ANCHOR); xo_buffer_t *xbp = &xop->xo_data; xop->xo_anchor_offset = xbp->xb_curp - xbp->xb_bufp; xop->xo_anchor_columns = 0; /* * Now we find the width, if possible. If it's not there, * we'll get it on the end anchor. */ xop->xo_anchor_min_width = xo_find_width(xop, xfip, value, vlen); } static void xo_anchor_stop (xo_handle_t *xop, xo_field_info_t *xfip, const char *value, ssize_t vlen) { if (!XOIF_ISSET(xop, XOIF_ANCHOR)) { xo_failure(xop, "no start anchor"); return; } XOIF_CLEAR(xop, XOIF_UNITS_PENDING); ssize_t width = xo_find_width(xop, xfip, value, vlen); if (width == 0) width = xop->xo_anchor_min_width; if (width == 0) /* No width given; nothing to do */ goto done; xo_buffer_t *xbp = &xop->xo_data; ssize_t start = xop->xo_anchor_offset; ssize_t stop = xbp->xb_curp - xbp->xb_bufp; ssize_t abswidth = (width > 0) ? width : -width; ssize_t blen = abswidth - xop->xo_anchor_columns; if (blen <= 0) /* Already over width */ goto done; if (abswidth > XO_MAX_ANCHOR_WIDTH) { xo_failure(xop, "width over %u are not supported", XO_MAX_ANCHOR_WIDTH); goto done; } /* Make a suitable padding field and emit it */ char *buf = alloca(blen); memset(buf, ' ', blen); xo_format_content(xop, "padding", NULL, buf, blen, NULL, 0, 0); if (width < 0) /* Already left justified */ goto done; ssize_t now = xbp->xb_curp - xbp->xb_bufp; ssize_t delta = now - stop; if (delta <= 0) /* Strange; no output to move */ goto done; /* * Now we're in it alright. We've need to insert the padding data * we just created (which might be an HTML
or text) before * the formatted data. We make a local copy, move it and then * insert our copy. We know there's room in the buffer, since * we're just moving this around. */ if (delta > blen) buf = alloca(delta); /* Expand buffer if needed */ memcpy(buf, xbp->xb_bufp + stop, delta); memmove(xbp->xb_bufp + start + delta, xbp->xb_bufp + start, stop - start); memmove(xbp->xb_bufp + start, buf, delta); done: xo_anchor_clear(xop); } static const char * xo_class_name (int ftype) { switch (ftype) { case 'D': return "decoration"; case 'E': return "error"; case 'L': return "label"; case 'N': return "note"; case 'P': return "padding"; case 'W': return "warning"; } return NULL; } static const char * xo_tag_name (int ftype) { switch (ftype) { case 'E': return "__error"; case 'W': return "__warning"; } return NULL; } static int xo_role_wants_default_format (int ftype) { switch (ftype) { /* These roles can be completely empty and/or without formatting */ case 'C': case 'G': case '[': case ']': return 0; } return 1; } static xo_mapping_t xo_role_names[] = { { 'C', "color" }, { 'D', "decoration" }, { 'E', "error" }, { 'L', "label" }, { 'N', "note" }, { 'P', "padding" }, { 'T', "title" }, { 'U', "units" }, { 'V', "value" }, { 'W', "warning" }, { '[', "start-anchor" }, { ']', "stop-anchor" }, { 0, NULL } }; #define XO_ROLE_EBRACE '{' /* Escaped braces */ #define XO_ROLE_TEXT '+' #define XO_ROLE_NEWLINE '\n' static xo_mapping_t xo_modifier_names[] = { { XFF_ARGUMENT, "argument" }, { XFF_COLON, "colon" }, { XFF_COMMA, "comma" }, { XFF_DISPLAY_ONLY, "display" }, { XFF_ENCODE_ONLY, "encoding" }, { XFF_GT_FIELD, "gettext" }, { XFF_HUMANIZE, "humanize" }, { XFF_HUMANIZE, "hn" }, { XFF_HN_SPACE, "hn-space" }, { XFF_HN_DECIMAL, "hn-decimal" }, { XFF_HN_1000, "hn-1000" }, { XFF_KEY, "key" }, { XFF_LEAF_LIST, "leaf-list" }, { XFF_LEAF_LIST, "list" }, { XFF_NOQUOTE, "no-quotes" }, { XFF_NOQUOTE, "no-quote" }, { XFF_GT_PLURAL, "plural" }, { XFF_QUOTE, "quotes" }, { XFF_QUOTE, "quote" }, { XFF_TRIM_WS, "trim" }, { XFF_WS, "white" }, { 0, NULL } }; #ifdef NOT_NEEDED_YET static xo_mapping_t xo_modifier_short_names[] = { { XFF_COLON, "c" }, { XFF_DISPLAY_ONLY, "d" }, { XFF_ENCODE_ONLY, "e" }, { XFF_GT_FIELD, "g" }, { XFF_HUMANIZE, "h" }, { XFF_KEY, "k" }, { XFF_LEAF_LIST, "l" }, { XFF_NOQUOTE, "n" }, { XFF_GT_PLURAL, "p" }, { XFF_QUOTE, "q" }, { XFF_TRIM_WS, "t" }, { XFF_WS, "w" }, { 0, NULL } }; #endif /* NOT_NEEDED_YET */ static int xo_count_fields (xo_handle_t *xop UNUSED, const char *fmt) { int rc = 1; const char *cp; for (cp = fmt; *cp; cp++) if (*cp == '{' || *cp == '\n') rc += 1; return rc * 2 + 1; } /* * The field format is: * '{' modifiers ':' content [ '/' print-fmt [ '/' encode-fmt ]] '}' * Roles are optional and include the following field types: * 'D': decoration; something non-text and non-data (colons, commmas) * 'E': error message * 'G': gettext() the entire string; optional domainname as content * 'L': label; text preceding data * 'N': note; text following data * 'P': padding; whitespace * 'T': Title, where 'content' is a column title * 'U': Units, where 'content' is the unit label * 'V': value, where 'content' is the name of the field (the default) * 'W': warning message * '[': start a section of anchored text * ']': end a section of anchored text * The following modifiers are also supported: * 'a': content is provided via argument (const char *), not descriptor * 'c': flag: emit a colon after the label * 'd': field is only emitted for display styles (text and html) * 'e': field is only emitted for encoding styles (xml and json) * 'g': gettext() the field * 'h': humanize a numeric value (only for display styles) * 'k': this field is a key, suitable for XPath predicates * 'l': a leaf-list, a simple list of values * 'n': no quotes around this field * 'p': the field has plural gettext semantics (ngettext) * 'q': add quotes around this field * 't': trim whitespace around the value * 'w': emit a blank after the label * The print-fmt and encode-fmt strings is the printf-style formating * for this data. JSON and XML will use the encoding-fmt, if present. * If the encode-fmt is not provided, it defaults to the print-fmt. * If the print-fmt is not provided, it defaults to 's'. */ static const char * xo_parse_roles (xo_handle_t *xop, const char *fmt, const char *basep, xo_field_info_t *xfip) { const char *sp; unsigned ftype = 0; xo_xff_flags_t flags = 0; uint8_t fnum = 0; for (sp = basep; sp && *sp; sp++) { if (*sp == ':' || *sp == '/' || *sp == '}') break; if (*sp == '\\') { if (sp[1] == '\0') { xo_failure(xop, "backslash at the end of string"); return NULL; } /* Anything backslashed is ignored */ sp += 1; continue; } if (*sp == ',') { const char *np; for (np = ++sp; *np; np++) if (*np == ':' || *np == '/' || *np == '}' || *np == ',') break; ssize_t slen = np - sp; if (slen > 0) { xo_xff_flags_t value; value = xo_name_lookup(xo_role_names, sp, slen); if (value) ftype = value; else { value = xo_name_lookup(xo_modifier_names, sp, slen); if (value) flags |= value; else xo_failure(xop, "unknown keyword ignored: '%.*s'", slen, sp); } } sp = np - 1; continue; } switch (*sp) { case 'C': case 'D': case 'E': case 'G': case 'L': case 'N': case 'P': case 'T': case 'U': case 'V': case 'W': case '[': case ']': if (ftype != 0) { xo_failure(xop, "field descriptor uses multiple types: '%s'", xo_printable(fmt)); return NULL; } ftype = *sp; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': fnum = (fnum * 10) + (*sp - '0'); break; case 'a': flags |= XFF_ARGUMENT; break; case 'c': flags |= XFF_COLON; break; case 'd': flags |= XFF_DISPLAY_ONLY; break; case 'e': flags |= XFF_ENCODE_ONLY; break; case 'g': flags |= XFF_GT_FIELD; break; case 'h': flags |= XFF_HUMANIZE; break; case 'k': flags |= XFF_KEY; break; case 'l': flags |= XFF_LEAF_LIST; break; case 'n': flags |= XFF_NOQUOTE; break; case 'p': flags |= XFF_GT_PLURAL; break; case 'q': flags |= XFF_QUOTE; break; case 't': flags |= XFF_TRIM_WS; break; case 'w': flags |= XFF_WS; break; default: xo_failure(xop, "field descriptor uses unknown modifier: '%s'", xo_printable(fmt)); /* * No good answer here; a bad format will likely * mean a core file. We just return and hope * the caller notices there's no output, and while * that seems, well, bad, there's nothing better. */ return NULL; } if (ftype == 'N' || ftype == 'U') { if (flags & XFF_COLON) { xo_failure(xop, "colon modifier on 'N' or 'U' field ignored: " "'%s'", xo_printable(fmt)); flags &= ~XFF_COLON; } } } xfip->xfi_flags = flags; xfip->xfi_ftype = ftype ?: 'V'; xfip->xfi_fnum = fnum; return sp; } /* * Number any remaining fields that need numbers. Note that some * field types (text, newline, escaped braces) never get numbers. */ static void xo_gettext_finish_numbering_fields (xo_handle_t *xop UNUSED, const char *fmt UNUSED, xo_field_info_t *fields) { xo_field_info_t *xfip; unsigned fnum, max_fields; uint64_t bits = 0; const uint64_t one = 1; /* Avoid "1ULL" */ /* First make a list of add the explicitly used bits */ for (xfip = fields, fnum = 0; xfip->xfi_ftype; xfip++) { switch (xfip->xfi_ftype) { case XO_ROLE_NEWLINE: /* Don't get numbered */ case XO_ROLE_TEXT: case XO_ROLE_EBRACE: case 'G': continue; } fnum += 1; if (fnum >= 63) break; if (xfip->xfi_fnum) bits |= one << xfip->xfi_fnum; } max_fields = fnum; for (xfip = fields, fnum = 0; xfip->xfi_ftype; xfip++) { switch (xfip->xfi_ftype) { case XO_ROLE_NEWLINE: /* Don't get numbered */ case XO_ROLE_TEXT: case XO_ROLE_EBRACE: case 'G': continue; } if (xfip->xfi_fnum != 0) continue; /* Find the next unassigned field */ for (fnum++; bits & (one << fnum); fnum++) continue; if (fnum > max_fields) break; xfip->xfi_fnum = fnum; /* Mark the field number */ bits |= one << fnum; /* Mark it used */ } } /* * The format string uses field numbers, so we need to whiffle through it * and make sure everything's sane and lovely. */ static int xo_parse_field_numbers (xo_handle_t *xop, const char *fmt, xo_field_info_t *fields, unsigned num_fields) { xo_field_info_t *xfip; unsigned field, fnum; uint64_t bits = 0; const uint64_t one = 1; /* Avoid 1ULL */ for (xfip = fields, field = 0; field < num_fields; xfip++, field++) { /* Fields default to 1:1 with natural position */ if (xfip->xfi_fnum == 0) xfip->xfi_fnum = field + 1; else if (xfip->xfi_fnum > num_fields) { xo_failure(xop, "field number exceeds number of fields: '%s'", fmt); return -1; } fnum = xfip->xfi_fnum - 1; /* Move to zero origin */ if (fnum < 64) { /* Only test what fits */ if (bits & (one << fnum)) { xo_failure(xop, "field number %u reused: '%s'", xfip->xfi_fnum, fmt); return -1; } bits |= one << fnum; } } return 0; } static int xo_parse_fields (xo_handle_t *xop, xo_field_info_t *fields, unsigned num_fields, const char *fmt) { const char *cp, *sp, *ep, *basep; unsigned field = 0; xo_field_info_t *xfip = fields; unsigned seen_fnum = 0; for (cp = fmt; *cp && field < num_fields; field++, xfip++) { xfip->xfi_start = cp; if (*cp == '\n') { xfip->xfi_ftype = XO_ROLE_NEWLINE; xfip->xfi_len = 1; cp += 1; continue; } if (*cp != '{') { /* Normal text */ for (sp = cp; *sp; sp++) { if (*sp == '{' || *sp == '\n') break; } xfip->xfi_ftype = XO_ROLE_TEXT; xfip->xfi_content = cp; xfip->xfi_clen = sp - cp; xfip->xfi_next = sp; cp = sp; continue; } if (cp[1] == '{') { /* Start of {{escaped braces}} */ xfip->xfi_start = cp + 1; /* Start at second brace */ xfip->xfi_ftype = XO_ROLE_EBRACE; cp += 2; /* Skip over _both_ characters */ for (sp = cp; *sp; sp++) { if (*sp == '}' && sp[1] == '}') break; } if (*sp == '\0') { xo_failure(xop, "missing closing '}}': '%s'", xo_printable(fmt)); return -1; } xfip->xfi_len = sp - xfip->xfi_start + 1; /* Move along the string, but don't run off the end */ if (*sp == '}' && sp[1] == '}') /* Paranoid; must be true */ sp += 2; cp = sp; xfip->xfi_next = cp; continue; } /* We are looking at the start of a field definition */ xfip->xfi_start = basep = cp + 1; const char *format = NULL; ssize_t flen = 0; /* Looking at roles and modifiers */ sp = xo_parse_roles(xop, fmt, basep, xfip); if (sp == NULL) { /* xo_failure has already been called */ return -1; } if (xfip->xfi_fnum) seen_fnum = 1; /* Looking at content */ if (*sp == ':') { for (ep = ++sp; *sp; sp++) { if (*sp == '}' || *sp == '/') break; if (*sp == '\\') { if (sp[1] == '\0') { xo_failure(xop, "backslash at the end of string"); return -1; } sp += 1; continue; } } if (ep != sp) { xfip->xfi_clen = sp - ep; xfip->xfi_content = ep; } } else { xo_failure(xop, "missing content (':'): '%s'", xo_printable(fmt)); return -1; } /* Looking at main (display) format */ if (*sp == '/') { for (ep = ++sp; *sp; sp++) { if (*sp == '}' || *sp == '/') break; if (*sp == '\\') { if (sp[1] == '\0') { xo_failure(xop, "backslash at the end of string"); return -1; } sp += 1; continue; } } flen = sp - ep; format = ep; } /* Looking at encoding format */ if (*sp == '/') { for (ep = ++sp; *sp; sp++) { if (*sp == '}') break; } xfip->xfi_encoding = ep; xfip->xfi_elen = sp - ep; } if (*sp != '}') { xo_failure(xop, "missing closing '}': %s", xo_printable(fmt)); return -1; } xfip->xfi_len = sp - xfip->xfi_start; xfip->xfi_next = ++sp; /* If we have content, then we have a default format */ if (xfip->xfi_clen || format || (xfip->xfi_flags & XFF_ARGUMENT)) { if (format) { xfip->xfi_format = format; xfip->xfi_flen = flen; } else if (xo_role_wants_default_format(xfip->xfi_ftype)) { xfip->xfi_format = xo_default_format; xfip->xfi_flen = 2; } } cp = sp; } int rc = 0; /* * If we saw a field number on at least one field, then we need * to enforce some rules and/or guidelines. */ if (seen_fnum) rc = xo_parse_field_numbers(xop, fmt, fields, field); return rc; } /* * We are passed a pointer to a format string just past the "{G:}" * field. We build a simplified version of the format string. */ static int xo_gettext_simplify_format (xo_handle_t *xop UNUSED, xo_buffer_t *xbp, xo_field_info_t *fields, int this_field, const char *fmt UNUSED, xo_simplify_field_func_t field_cb) { unsigned ftype; xo_xff_flags_t flags; int field = this_field + 1; xo_field_info_t *xfip; char ch; for (xfip = &fields[field]; xfip->xfi_ftype; xfip++, field++) { ftype = xfip->xfi_ftype; flags = xfip->xfi_flags; if ((flags & XFF_GT_FIELD) && xfip->xfi_content && ftype != 'V') { if (field_cb) field_cb(xfip->xfi_content, xfip->xfi_clen, (flags & XFF_GT_PLURAL) ? 1 : 0); } switch (ftype) { case 'G': /* Ignore gettext roles */ break; case XO_ROLE_NEWLINE: xo_buf_append(xbp, "\n", 1); break; case XO_ROLE_EBRACE: xo_buf_append(xbp, "{", 1); xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen); xo_buf_append(xbp, "}", 1); break; case XO_ROLE_TEXT: xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen); break; default: xo_buf_append(xbp, "{", 1); if (ftype != 'V') { ch = ftype; xo_buf_append(xbp, &ch, 1); } unsigned fnum = xfip->xfi_fnum ?: 0; if (fnum) { char num[12]; /* Field numbers are origin 1, not 0, following printf(3) */ snprintf(num, sizeof(num), "%u", fnum); xo_buf_append(xbp, num, strlen(num)); } xo_buf_append(xbp, ":", 1); xo_buf_append(xbp, xfip->xfi_content, xfip->xfi_clen); xo_buf_append(xbp, "}", 1); } } xo_buf_append(xbp, "", 1); return 0; } void xo_dump_fields (xo_field_info_t *); /* Fake prototype for debug function */ void xo_dump_fields (xo_field_info_t *fields) { xo_field_info_t *xfip; for (xfip = fields; xfip->xfi_ftype; xfip++) { printf("%lu(%u): %lx [%c/%u] [%.*s] [%.*s] [%.*s]\n", (unsigned long) (xfip - fields), xfip->xfi_fnum, (unsigned long) xfip->xfi_flags, isprint((int) xfip->xfi_ftype) ? xfip->xfi_ftype : ' ', xfip->xfi_ftype, (int) xfip->xfi_clen, xfip->xfi_content ?: "", (int) xfip->xfi_flen, xfip->xfi_format ?: "", (int) xfip->xfi_elen, xfip->xfi_encoding ?: ""); } } #ifdef HAVE_GETTEXT /* * Find the field that matches the given field number */ static xo_field_info_t * xo_gettext_find_field (xo_field_info_t *fields, unsigned fnum) { xo_field_info_t *xfip; for (xfip = fields; xfip->xfi_ftype; xfip++) if (xfip->xfi_fnum == fnum) return xfip; return NULL; } /* * At this point, we need to consider if the fields have been reordered, * such as "The {:adjective} {:noun}" to "La {:noun} {:adjective}". * * We need to rewrite the new_fields using the old fields order, * so that we can render the message using the arguments as they * appear on the stack. It's a lot of work, but we don't really * want to (eventually) fall into the standard printf code which * means using the arguments straight (and in order) from the * varargs we were originally passed. */ static void xo_gettext_rewrite_fields (xo_handle_t *xop UNUSED, xo_field_info_t *fields, unsigned max_fields) { xo_field_info_t tmp[max_fields]; bzero(tmp, max_fields * sizeof(tmp[0])); unsigned fnum = 0; xo_field_info_t *newp, *outp, *zp; for (newp = fields, outp = tmp; newp->xfi_ftype; newp++, outp++) { switch (newp->xfi_ftype) { case XO_ROLE_NEWLINE: /* Don't get numbered */ case XO_ROLE_TEXT: case XO_ROLE_EBRACE: case 'G': *outp = *newp; outp->xfi_renum = 0; continue; } zp = xo_gettext_find_field(fields, ++fnum); if (zp == NULL) { /* Should not occur */ *outp = *newp; outp->xfi_renum = 0; continue; } *outp = *zp; outp->xfi_renum = newp->xfi_fnum; } memcpy(fields, tmp, max_fields * sizeof(tmp[0])); } /* * We've got two lists of fields, the old list from the original * format string and the new one from the parsed gettext reply. The * new list has the localized words, where the old list has the * formatting information. We need to combine them into a single list * (the new list). * * If the list needs to be reordered, then we've got more serious work * to do. */ static int xo_gettext_combine_formats (xo_handle_t *xop, const char *fmt UNUSED, const char *gtfmt, xo_field_info_t *old_fields, xo_field_info_t *new_fields, unsigned new_max_fields, int *reorderedp) { int reordered = 0; xo_field_info_t *newp, *oldp, *startp = old_fields; xo_gettext_finish_numbering_fields(xop, fmt, old_fields); for (newp = new_fields; newp->xfi_ftype; newp++) { switch (newp->xfi_ftype) { case XO_ROLE_NEWLINE: case XO_ROLE_TEXT: case XO_ROLE_EBRACE: continue; case 'V': for (oldp = startp; oldp->xfi_ftype; oldp++) { if (oldp->xfi_ftype != 'V') continue; if (newp->xfi_clen != oldp->xfi_clen || strncmp(newp->xfi_content, oldp->xfi_content, oldp->xfi_clen) != 0) { reordered = 1; continue; } startp = oldp + 1; break; } /* Didn't find it on the first pass (starting from start) */ if (oldp->xfi_ftype == 0) { for (oldp = old_fields; oldp < startp; oldp++) { if (oldp->xfi_ftype != 'V') continue; if (newp->xfi_clen != oldp->xfi_clen) continue; if (strncmp(newp->xfi_content, oldp->xfi_content, oldp->xfi_clen) != 0) continue; reordered = 1; break; } if (oldp == startp) { /* Field not found */ xo_failure(xop, "post-gettext format can't find field " "'%.*s' in format '%s'", newp->xfi_clen, newp->xfi_content, xo_printable(gtfmt)); return -1; } } break; default: /* * Other fields don't have names for us to use, so if * the types aren't the same, then we'll have to assume * the original field is a match. */ for (oldp = startp; oldp->xfi_ftype; oldp++) { if (oldp->xfi_ftype == 'V') /* Can't go past these */ break; if (oldp->xfi_ftype == newp->xfi_ftype) goto copy_it; /* Assumably we have a match */ } continue; } /* * Found a match; copy over appropriate fields */ copy_it: newp->xfi_flags = oldp->xfi_flags; newp->xfi_fnum = oldp->xfi_fnum; newp->xfi_format = oldp->xfi_format; newp->xfi_flen = oldp->xfi_flen; newp->xfi_encoding = oldp->xfi_encoding; newp->xfi_elen = oldp->xfi_elen; } *reorderedp = reordered; if (reordered) { xo_gettext_finish_numbering_fields(xop, fmt, new_fields); xo_gettext_rewrite_fields(xop, new_fields, new_max_fields); } return 0; } /* * We don't want to make gettext() calls here with a complete format * string, since that means changing a flag would mean a * labor-intensive re-translation expense. Instead we build a * simplified form with a reduced level of detail, perform a lookup on * that string and then re-insert the formating info. * * So something like: * xo_emit("{G:}close {:fd/%ld} returned {g:error/%m} {:test/%6.6s}\n", ...) * would have a lookup string of: * "close {:fd} returned {:error} {:test}\n" * * We also need to handling reordering of fields, where the gettext() * reply string uses fields in a different order than the original * format string: * "cluse-a {:fd} retoorned {:test}. Bork {:error} Bork. Bork.\n" * If we have to reorder fields within the message, then things get * complicated. See xo_gettext_rewrite_fields. * * Summary: i18n aighn't cheap. */ static const char * xo_gettext_build_format (xo_handle_t *xop, xo_field_info_t *fields, int this_field, const char *fmt, char **new_fmtp) { if (xo_style_is_encoding(xop)) goto bail; xo_buffer_t xb; xo_buf_init(&xb); if (xo_gettext_simplify_format(xop, &xb, fields, this_field, fmt, NULL)) goto bail2; const char *gtfmt = xo_dgettext(xop, xb.xb_bufp); if (gtfmt == NULL || gtfmt == fmt || xo_streq(gtfmt, fmt)) goto bail2; char *new_fmt = xo_strndup(gtfmt, -1); if (new_fmt == NULL) goto bail2; xo_buf_cleanup(&xb); *new_fmtp = new_fmt; return new_fmt; bail2: xo_buf_cleanup(&xb); bail: *new_fmtp = NULL; return fmt; } static void xo_gettext_rebuild_content (xo_handle_t *xop, xo_field_info_t *fields, ssize_t *fstart, unsigned min_fstart, ssize_t *fend, unsigned max_fend) { xo_field_info_t *xfip; char *buf; ssize_t base = fstart[min_fstart]; ssize_t blen = fend[max_fend] - base; xo_buffer_t *xbp = &xop->xo_data; if (blen == 0) return; buf = xo_realloc(NULL, blen); if (buf == NULL) return; memcpy(buf, xbp->xb_bufp + fstart[min_fstart], blen); /* Copy our data */ unsigned field = min_fstart, len, fnum; ssize_t soff, doff = base; xo_field_info_t *zp; /* * Be aware there are two competing views of "field number": we * want the user to thing in terms of "The {1:size}" where {G:}, * newlines, escaped braces, and text don't have numbers. But is * also the internal view, where we have an array of * xo_field_info_t and every field have an index. fnum, fstart[] * and fend[] are the latter, but xfi_renum is the former. */ for (xfip = fields + field; xfip->xfi_ftype; xfip++, field++) { fnum = field; if (xfip->xfi_renum) { zp = xo_gettext_find_field(fields, xfip->xfi_renum); fnum = zp ? zp - fields : field; } soff = fstart[fnum]; len = fend[fnum] - soff; if (len > 0) { soff -= base; memcpy(xbp->xb_bufp + doff, buf + soff, len); doff += len; } } xo_free(buf); } #else /* HAVE_GETTEXT */ static const char * xo_gettext_build_format (xo_handle_t *xop UNUSED, xo_field_info_t *fields UNUSED, int this_field UNUSED, const char *fmt UNUSED, char **new_fmtp) { *new_fmtp = NULL; return fmt; } static int xo_gettext_combine_formats (xo_handle_t *xop UNUSED, const char *fmt UNUSED, const char *gtfmt UNUSED, xo_field_info_t *old_fields UNUSED, xo_field_info_t *new_fields UNUSED, unsigned new_max_fields UNUSED, int *reorderedp UNUSED) { return -1; } static void xo_gettext_rebuild_content (xo_handle_t *xop UNUSED, xo_field_info_t *fields UNUSED, ssize_t *fstart UNUSED, unsigned min_fstart UNUSED, ssize_t *fend UNUSED, unsigned max_fend UNUSED) { return; } #endif /* HAVE_GETTEXT */ /* * Emit a set of fields. This is really the core of libxo. */ static ssize_t xo_do_emit_fields (xo_handle_t *xop, xo_field_info_t *fields, unsigned max_fields, const char *fmt) { int gettext_inuse = 0; int gettext_changed = 0; int gettext_reordered = 0; unsigned ftype; xo_xff_flags_t flags; xo_field_info_t *new_fields = NULL; xo_field_info_t *xfip; unsigned field; ssize_t rc = 0; int flush = XOF_ISSET(xop, XOF_FLUSH); int flush_line = XOF_ISSET(xop, XOF_FLUSH_LINE); char *new_fmt = NULL; if (XOIF_ISSET(xop, XOIF_REORDER) || xo_style(xop) == XO_STYLE_ENCODER) flush_line = 0; /* * Some overhead for gettext; if the fields in the msgstr returned * by gettext are reordered, then we need to record start and end * for each field. We'll go ahead and render the fields in the * normal order, but later we can then reconstruct the reordered * fields using these fstart/fend values. */ unsigned flimit = max_fields * 2; /* Pessimistic limit */ unsigned min_fstart = flimit - 1; unsigned max_fend = 0; /* Highest recorded fend[] entry */ ssize_t fstart[flimit]; bzero(fstart, flimit * sizeof(fstart[0])); ssize_t fend[flimit]; bzero(fend, flimit * sizeof(fend[0])); for (xfip = fields, field = 0; field < max_fields && xfip->xfi_ftype; xfip++, field++) { ftype = xfip->xfi_ftype; flags = xfip->xfi_flags; /* Record field start offset */ if (gettext_reordered) { fstart[field] = xo_buf_offset(&xop->xo_data); if (min_fstart > field) min_fstart = field; } const char *content = xfip->xfi_content; ssize_t clen = xfip->xfi_clen; if (flags & XFF_ARGUMENT) { /* * Argument flag means the content isn't given in the descriptor, * but as a UTF-8 string ('const char *') argument in xo_vap. */ content = va_arg(xop->xo_vap, char *); clen = content ? strlen(content) : 0; } if (ftype == XO_ROLE_NEWLINE) { xo_line_close(xop); if (flush_line && xo_flush_h(xop) < 0) return -1; goto bottom; } else if (ftype == XO_ROLE_EBRACE) { xo_format_text(xop, xfip->xfi_start, xfip->xfi_len); goto bottom; } else if (ftype == XO_ROLE_TEXT) { /* Normal text */ xo_format_text(xop, xfip->xfi_content, xfip->xfi_clen); goto bottom; } /* * Notes and units need the 'w' flag handled before the content. */ if (ftype == 'N' || ftype == 'U') { if (flags & XFF_WS) { xo_format_content(xop, "padding", NULL, " ", 1, NULL, 0, flags); flags &= ~XFF_WS; /* Prevent later handling of this flag */ } } if (ftype == 'V') xo_format_value(xop, content, clen, NULL, 0, xfip->xfi_format, xfip->xfi_flen, xfip->xfi_encoding, xfip->xfi_elen, flags); else if (ftype == '[') xo_anchor_start(xop, xfip, content, clen); else if (ftype == ']') xo_anchor_stop(xop, xfip, content, clen); else if (ftype == 'C') xo_format_colors(xop, xfip, content, clen); else if (ftype == 'G') { /* * A {G:domain} field; disect the domain name and translate * the remaining portion of the input string. If the user * didn't put the {G:} at the start of the format string, then * assumably they just want us to translate the rest of it. * Since gettext returns strings in a static buffer, we make * a copy in new_fmt. */ xo_set_gettext_domain(xop, xfip, content, clen); if (!gettext_inuse) { /* Only translate once */ gettext_inuse = 1; if (new_fmt) { xo_free(new_fmt); new_fmt = NULL; } xo_gettext_build_format(xop, fields, field, xfip->xfi_next, &new_fmt); if (new_fmt) { gettext_changed = 1; unsigned new_max_fields = xo_count_fields(xop, new_fmt); if (++new_max_fields < max_fields) new_max_fields = max_fields; /* Leave a blank slot at the beginning */ ssize_t sz = (new_max_fields + 1) * sizeof(xo_field_info_t); new_fields = alloca(sz); bzero(new_fields, sz); if (!xo_parse_fields(xop, new_fields + 1, new_max_fields, new_fmt)) { gettext_reordered = 0; if (!xo_gettext_combine_formats(xop, fmt, new_fmt, fields, new_fields + 1, new_max_fields, &gettext_reordered)) { if (gettext_reordered) { if (XOF_ISSET(xop, XOF_LOG_GETTEXT)) xo_failure(xop, "gettext finds reordered " "fields in '%s' and '%s'", xo_printable(fmt), xo_printable(new_fmt)); flush_line = 0; /* Must keep at content */ XOIF_SET(xop, XOIF_REORDER); } field = -1; /* Will be incremented at top of loop */ xfip = new_fields; max_fields = new_max_fields; } } } } continue; } else if (clen || xfip->xfi_format) { const char *class_name = xo_class_name(ftype); if (class_name) xo_format_content(xop, class_name, xo_tag_name(ftype), content, clen, xfip->xfi_format, xfip->xfi_flen, flags); else if (ftype == 'T') xo_format_title(xop, xfip, content, clen); else if (ftype == 'U') xo_format_units(xop, xfip, content, clen); else xo_failure(xop, "unknown field type: '%c'", ftype); } if (flags & XFF_COLON) xo_format_content(xop, "decoration", NULL, ":", 1, NULL, 0, 0); if (flags & XFF_WS) xo_format_content(xop, "padding", NULL, " ", 1, NULL, 0, 0); bottom: /* Record the end-of-field offset */ if (gettext_reordered) { fend[field] = xo_buf_offset(&xop->xo_data); max_fend = field; } } if (gettext_changed && gettext_reordered) { /* Final step: rebuild the content using the rendered fields */ xo_gettext_rebuild_content(xop, new_fields + 1, fstart, min_fstart, fend, max_fend); } XOIF_CLEAR(xop, XOIF_REORDER); /* * If we've got enough data, flush it. */ if (xo_buf_offset(&xop->xo_data) > XO_BUF_HIGH_WATER) flush = 1; /* If we don't have an anchor, write the text out */ if (flush && !XOIF_ISSET(xop, XOIF_ANCHOR)) { if (xo_flush_h(xop) < 0) rc = -1; } if (new_fmt) xo_free(new_fmt); /* * We've carried the gettext domainname inside our handle just for * convenience, but we need to ensure it doesn't survive across * xo_emit calls. */ if (xop->xo_gt_domain) { xo_free(xop->xo_gt_domain); xop->xo_gt_domain = NULL; } return (rc < 0) ? rc : xop->xo_columns; } /* * Parse and emit a set of fields */ static int xo_do_emit (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt) { xop->xo_columns = 0; /* Always reset it */ xop->xo_errno = errno; /* Save for "%m" */ if (fmt == NULL) return 0; unsigned max_fields; xo_field_info_t *fields = NULL; /* Adjust XOEF_RETAIN based on global flags */ if (XOF_ISSET(xop, XOF_RETAIN_ALL)) flags |= XOEF_RETAIN; if (XOF_ISSET(xop, XOF_RETAIN_NONE)) flags &= ~XOEF_RETAIN; /* * Check for 'retain' flag, telling us to retain the field * information. If we've already saved it, then we can avoid * re-parsing the format string. */ if (!(flags & XOEF_RETAIN) || xo_retain_find(fmt, &fields, &max_fields) != 0 || fields == NULL) { /* Nothing retained; parse the format string */ max_fields = xo_count_fields(xop, fmt); fields = alloca(max_fields * sizeof(fields[0])); bzero(fields, max_fields * sizeof(fields[0])); if (xo_parse_fields(xop, fields, max_fields, fmt)) return -1; /* Warning already displayed */ if (flags & XOEF_RETAIN) { /* Retain the info */ xo_retain_add(fmt, fields, max_fields); } } return xo_do_emit_fields(xop, fields, max_fields, fmt); } /* * Rebuild a format string in a gettext-friendly format. This function * is exposed to tools can perform this function. See xo(1). */ char * xo_simplify_format (xo_handle_t *xop, const char *fmt, int with_numbers, xo_simplify_field_func_t field_cb) { xop = xo_default(xop); xop->xo_columns = 0; /* Always reset it */ xop->xo_errno = errno; /* Save for "%m" */ unsigned max_fields = xo_count_fields(xop, fmt); xo_field_info_t fields[max_fields]; bzero(fields, max_fields * sizeof(fields[0])); if (xo_parse_fields(xop, fields, max_fields, fmt)) return NULL; /* Warning already displayed */ xo_buffer_t xb; xo_buf_init(&xb); if (with_numbers) xo_gettext_finish_numbering_fields(xop, fmt, fields); if (xo_gettext_simplify_format(xop, &xb, fields, -1, fmt, field_cb)) return NULL; return xb.xb_bufp; } xo_ssize_t xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap) { ssize_t rc; xop = xo_default(xop); va_copy(xop->xo_vap, vap); rc = xo_do_emit(xop, 0, fmt); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); return rc; } xo_ssize_t xo_emit_h (xo_handle_t *xop, const char *fmt, ...) { ssize_t rc; xop = xo_default(xop); va_start(xop->xo_vap, fmt); rc = xo_do_emit(xop, 0, fmt); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); return rc; } xo_ssize_t xo_emit (const char *fmt, ...) { xo_handle_t *xop = xo_default(NULL); ssize_t rc; va_start(xop->xo_vap, fmt); rc = xo_do_emit(xop, 0, fmt); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); return rc; } xo_ssize_t xo_emit_hvf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, va_list vap) { ssize_t rc; xop = xo_default(xop); va_copy(xop->xo_vap, vap); rc = xo_do_emit(xop, flags, fmt); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); return rc; } xo_ssize_t xo_emit_hf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...) { ssize_t rc; xop = xo_default(xop); va_start(xop->xo_vap, fmt); rc = xo_do_emit(xop, flags, fmt); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); return rc; } xo_ssize_t xo_emit_f (xo_emit_flags_t flags, const char *fmt, ...) { xo_handle_t *xop = xo_default(NULL); ssize_t rc; va_start(xop->xo_vap, fmt); rc = xo_do_emit(xop, flags, fmt); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); return rc; } /* * Emit a single field by providing the info information typically provided * inside the field description (role, modifiers, and formats). This is * a convenience function to avoid callers using snprintf to build field * descriptions. */ xo_ssize_t xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, va_list vap) { ssize_t rc; xop = xo_default(xop); if (rolmod == NULL) rolmod = "V"; xo_field_info_t xfi; bzero(&xfi, sizeof(xfi)); const char *cp; cp = xo_parse_roles(xop, rolmod, rolmod, &xfi); if (cp == NULL) return -1; xfi.xfi_start = fmt; xfi.xfi_content = contents; xfi.xfi_format = fmt; xfi.xfi_encoding = efmt; xfi.xfi_clen = contents ? strlen(contents) : 0; xfi.xfi_flen = fmt ? strlen(fmt) : 0; xfi.xfi_elen = efmt ? strlen(efmt) : 0; /* If we have content, then we have a default format */ if (contents && fmt == NULL && xo_role_wants_default_format(xfi.xfi_ftype)) { xfi.xfi_format = xo_default_format; xfi.xfi_flen = 2; } va_copy(xop->xo_vap, vap); rc = xo_do_emit_fields(xop, &xfi, 1, fmt ?: contents ?: "field"); va_end(xop->xo_vap); return rc; } xo_ssize_t xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...) { ssize_t rc; va_list vap; va_start(vap, efmt); rc = xo_emit_field_hv(xop, rolmod, contents, fmt, efmt, vap); va_end(vap); return rc; } xo_ssize_t xo_emit_field (const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...) { ssize_t rc; va_list vap; va_start(vap, efmt); rc = xo_emit_field_hv(NULL, rolmod, contents, fmt, efmt, vap); va_end(vap); return rc; } xo_ssize_t xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap) { const ssize_t extra = 5; /* space, equals, quote, quote, and nul */ xop = xo_default(xop); ssize_t rc = 0; ssize_t nlen = strlen(name); xo_buffer_t *xbp = &xop->xo_attrs; ssize_t name_offset, value_offset; switch (xo_style(xop)) { case XO_STYLE_XML: if (!xo_buf_has_room(xbp, nlen + extra)) return -1; *xbp->xb_curp++ = ' '; memcpy(xbp->xb_curp, name, nlen); xbp->xb_curp += nlen; *xbp->xb_curp++ = '='; *xbp->xb_curp++ = '"'; rc = xo_vsnprintf(xop, xbp, fmt, vap); if (rc >= 0) { rc = xo_escape_xml(xbp, rc, 1); xbp->xb_curp += rc; } if (!xo_buf_has_room(xbp, 2)) return -1; *xbp->xb_curp++ = '"'; *xbp->xb_curp = '\0'; rc += nlen + extra; break; case XO_STYLE_ENCODER: name_offset = xo_buf_offset(xbp); xo_buf_append(xbp, name, nlen); xo_buf_append(xbp, "", 1); value_offset = xo_buf_offset(xbp); rc = xo_vsnprintf(xop, xbp, fmt, vap); if (rc >= 0) { xbp->xb_curp += rc; *xbp->xb_curp = '\0'; rc = xo_encoder_handle(xop, XO_OP_ATTRIBUTE, xo_buf_data(xbp, name_offset), xo_buf_data(xbp, value_offset), 0); } } return rc; } xo_ssize_t xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...) { ssize_t rc; va_list vap; va_start(vap, fmt); rc = xo_attr_hv(xop, name, fmt, vap); va_end(vap); return rc; } xo_ssize_t xo_attr (const char *name, const char *fmt, ...) { ssize_t rc; va_list vap; va_start(vap, fmt); rc = xo_attr_hv(NULL, name, fmt, vap); va_end(vap); return rc; } static void xo_depth_change (xo_handle_t *xop, const char *name, int delta, int indent, xo_state_t state, xo_xsf_flags_t flags) { if (xo_style(xop) == XO_STYLE_HTML || xo_style(xop) == XO_STYLE_TEXT) indent = 0; if (XOF_ISSET(xop, XOF_DTRT)) flags |= XSF_DTRT; if (delta >= 0) { /* Push operation */ if (xo_depth_check(xop, xop->xo_depth + delta)) return; xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth + delta]; xsp->xs_flags = flags; xsp->xs_state = state; xo_stack_set_flags(xop); if (name == NULL) name = XO_FAILURE_NAME; xsp->xs_name = xo_strndup(name, -1); } else { /* Pop operation */ if (xop->xo_depth == 0) { if (!XOF_ISSET(xop, XOF_IGNORE_CLOSE)) xo_failure(xop, "close with empty stack: '%s'", name); return; } xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; if (XOF_ISSET(xop, XOF_WARN)) { const char *top = xsp->xs_name; if (top != NULL && name != NULL && !xo_streq(name, top)) { xo_failure(xop, "incorrect close: '%s' .vs. '%s'", name, top); return; } if ((xsp->xs_flags & XSF_LIST) != (flags & XSF_LIST)) { xo_failure(xop, "list close on list confict: '%s'", name); return; } if ((xsp->xs_flags & XSF_INSTANCE) != (flags & XSF_INSTANCE)) { xo_failure(xop, "list close on instance confict: '%s'", name); return; } } if (xsp->xs_name) { xo_free(xsp->xs_name); xsp->xs_name = NULL; } if (xsp->xs_keys) { xo_free(xsp->xs_keys); xsp->xs_keys = NULL; } } xop->xo_depth += delta; /* Record new depth */ xop->xo_indent += indent; } void xo_set_depth (xo_handle_t *xop, int depth) { xop = xo_default(xop); if (xo_depth_check(xop, depth)) return; xop->xo_depth += depth; xop->xo_indent += depth; /* * Handling the "top wrapper" for JSON is a bit of a pain. Here * we need to detect that the depth has been changed to set the * "XOIF_TOP_EMITTED" flag correctly. */ if (xop->xo_style == XO_STYLE_JSON && !XOF_ISSET(xop, XOF_NO_TOP) && xop->xo_depth > 0) XOIF_SET(xop, XOIF_TOP_EMITTED); } static xo_xsf_flags_t xo_stack_flags (xo_xof_flags_t xflags) { if (xflags & XOF_DTRT) return XSF_DTRT; return 0; } static void xo_emit_top (xo_handle_t *xop, const char *ppn) { xo_printf(xop, "%*s{%s", xo_indent(xop), "", ppn); XOIF_SET(xop, XOIF_TOP_EMITTED); if (xop->xo_version) { xo_printf(xop, "%*s\"__version\": \"%s\", %s", xo_indent(xop), "", xop->xo_version, ppn); xo_free(xop->xo_version); xop->xo_version = NULL; } } static ssize_t xo_do_open_container (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { ssize_t rc = 0; const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; const char *pre_nl = ""; if (name == NULL) { xo_failure(xop, "NULL passed for container name"); name = XO_FAILURE_NAME; } const char *leader = xo_xml_leader(xop, name); flags |= xop->xo_flags; /* Pick up handle flags */ switch (xo_style(xop)) { case XO_STYLE_XML: rc = xo_printf(xop, "%*s<%s%s", xo_indent(xop), "", leader, name); if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) { rc += xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp; xo_data_append(xop, xop->xo_attrs.xb_bufp, xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp); xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp; } rc += xo_printf(xop, ">%s", ppn); break; case XO_STYLE_JSON: xo_stack_set_flags(xop); if (!XOF_ISSET(xop, XOF_NO_TOP) && !XOIF_ISSET(xop, XOIF_TOP_EMITTED)) xo_emit_top(xop, ppn); if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", "; xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; rc = xo_printf(xop, "%s%*s\"%s\": {%s", pre_nl, xo_indent(xop), "", name, ppn); break; case XO_STYLE_SDPARAMS: break; case XO_STYLE_ENCODER: rc = xo_encoder_handle(xop, XO_OP_OPEN_CONTAINER, name, NULL, flags); break; } xo_depth_change(xop, name, 1, 1, XSS_OPEN_CONTAINER, xo_stack_flags(flags)); return rc; } xo_ssize_t xo_open_container_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { return xo_transition(xop, flags, name, XSS_OPEN_CONTAINER); } xo_ssize_t xo_open_container_h (xo_handle_t *xop, const char *name) { return xo_open_container_hf(xop, 0, name); } xo_ssize_t xo_open_container (const char *name) { return xo_open_container_hf(NULL, 0, name); } xo_ssize_t xo_open_container_hd (xo_handle_t *xop, const char *name) { return xo_open_container_hf(xop, XOF_DTRT, name); } xo_ssize_t xo_open_container_d (const char *name) { return xo_open_container_hf(NULL, XOF_DTRT, name); } static int xo_do_close_container (xo_handle_t *xop, const char *name) { xop = xo_default(xop); ssize_t rc = 0; const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; const char *pre_nl = ""; if (name == NULL) { xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; name = xsp->xs_name; if (name) { ssize_t len = strlen(name) + 1; /* We need to make a local copy; xo_depth_change will free it */ char *cp = alloca(len); memcpy(cp, name, len); name = cp; } else if (!(xsp->xs_flags & XSF_DTRT)) { xo_failure(xop, "missing name without 'dtrt' mode"); name = XO_FAILURE_NAME; } } const char *leader = xo_xml_leader(xop, name); switch (xo_style(xop)) { case XO_STYLE_XML: xo_depth_change(xop, name, -1, -1, XSS_CLOSE_CONTAINER, 0); rc = xo_printf(xop, "%*s%s", xo_indent(xop), "", leader, name, ppn); break; case XO_STYLE_JSON: xo_stack_set_flags(xop); pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; ppn = ""; xo_depth_change(xop, name, -1, -1, XSS_CLOSE_CONTAINER, 0); rc = xo_printf(xop, "%s%*s}%s", pre_nl, xo_indent(xop), "", ppn); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; break; case XO_STYLE_HTML: case XO_STYLE_TEXT: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_CONTAINER, 0); break; case XO_STYLE_SDPARAMS: break; case XO_STYLE_ENCODER: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_CONTAINER, 0); rc = xo_encoder_handle(xop, XO_OP_CLOSE_CONTAINER, name, NULL, 0); break; } return rc; } xo_ssize_t xo_close_container_h (xo_handle_t *xop, const char *name) { return xo_transition(xop, 0, name, XSS_CLOSE_CONTAINER); } xo_ssize_t xo_close_container (const char *name) { return xo_close_container_h(NULL, name); } xo_ssize_t xo_close_container_hd (xo_handle_t *xop) { return xo_close_container_h(xop, NULL); } xo_ssize_t xo_close_container_d (void) { return xo_close_container_h(NULL, NULL); } static int xo_do_open_list (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { ssize_t rc = 0; int indent = 0; xop = xo_default(xop); const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; const char *pre_nl = ""; switch (xo_style(xop)) { case XO_STYLE_JSON: indent = 1; if (!XOF_ISSET(xop, XOF_NO_TOP) && !XOIF_ISSET(xop, XOIF_TOP_EMITTED)) xo_emit_top(xop, ppn); if (name == NULL) { xo_failure(xop, "NULL passed for list name"); name = XO_FAILURE_NAME; } xo_stack_set_flags(xop); if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", "; xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; rc = xo_printf(xop, "%s%*s\"%s\": [%s", pre_nl, xo_indent(xop), "", name, ppn); break; case XO_STYLE_ENCODER: rc = xo_encoder_handle(xop, XO_OP_OPEN_LIST, name, NULL, flags); break; } xo_depth_change(xop, name, 1, indent, XSS_OPEN_LIST, XSF_LIST | xo_stack_flags(flags)); return rc; } xo_ssize_t xo_open_list_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { return xo_transition(xop, flags, name, XSS_OPEN_LIST); } xo_ssize_t xo_open_list_h (xo_handle_t *xop, const char *name) { return xo_open_list_hf(xop, 0, name); } xo_ssize_t xo_open_list (const char *name) { return xo_open_list_hf(NULL, 0, name); } xo_ssize_t xo_open_list_hd (xo_handle_t *xop, const char *name) { return xo_open_list_hf(xop, XOF_DTRT, name); } xo_ssize_t xo_open_list_d (const char *name) { return xo_open_list_hf(NULL, XOF_DTRT, name); } static int xo_do_close_list (xo_handle_t *xop, const char *name) { ssize_t rc = 0; const char *pre_nl = ""; if (name == NULL) { xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; name = xsp->xs_name; if (name) { ssize_t len = strlen(name) + 1; /* We need to make a local copy; xo_depth_change will free it */ char *cp = alloca(len); memcpy(cp, name, len); name = cp; } else if (!(xsp->xs_flags & XSF_DTRT)) { xo_failure(xop, "missing name without 'dtrt' mode"); name = XO_FAILURE_NAME; } } switch (xo_style(xop)) { case XO_STYLE_JSON: if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; xo_depth_change(xop, name, -1, -1, XSS_CLOSE_LIST, XSF_LIST); rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), ""); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; break; case XO_STYLE_ENCODER: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LIST, XSF_LIST); rc = xo_encoder_handle(xop, XO_OP_CLOSE_LIST, name, NULL, 0); break; default: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LIST, XSF_LIST); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; break; } return rc; } xo_ssize_t xo_close_list_h (xo_handle_t *xop, const char *name) { return xo_transition(xop, 0, name, XSS_CLOSE_LIST); } xo_ssize_t xo_close_list (const char *name) { return xo_close_list_h(NULL, name); } xo_ssize_t xo_close_list_hd (xo_handle_t *xop) { return xo_close_list_h(xop, NULL); } xo_ssize_t xo_close_list_d (void) { return xo_close_list_h(NULL, NULL); } static int xo_do_open_leaf_list (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { ssize_t rc = 0; int indent = 0; xop = xo_default(xop); const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; const char *pre_nl = ""; switch (xo_style(xop)) { case XO_STYLE_JSON: indent = 1; if (!XOF_ISSET(xop, XOF_NO_TOP)) { if (!XOIF_ISSET(xop, XOIF_TOP_EMITTED)) { xo_printf(xop, "%*s{%s", xo_indent(xop), "", ppn); XOIF_SET(xop, XOIF_TOP_EMITTED); } } if (name == NULL) { xo_failure(xop, "NULL passed for list name"); name = XO_FAILURE_NAME; } xo_stack_set_flags(xop); if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", "; xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; rc = xo_printf(xop, "%s%*s\"%s\": [%s", pre_nl, xo_indent(xop), "", name, ppn); break; case XO_STYLE_ENCODER: rc = xo_encoder_handle(xop, XO_OP_OPEN_LEAF_LIST, name, NULL, flags); break; } xo_depth_change(xop, name, 1, indent, XSS_OPEN_LEAF_LIST, XSF_LIST | xo_stack_flags(flags)); return rc; } static int xo_do_close_leaf_list (xo_handle_t *xop, const char *name) { ssize_t rc = 0; const char *pre_nl = ""; if (name == NULL) { xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; name = xsp->xs_name; if (name) { ssize_t len = strlen(name) + 1; /* We need to make a local copy; xo_depth_change will free it */ char *cp = alloca(len); memcpy(cp, name, len); name = cp; } else if (!(xsp->xs_flags & XSF_DTRT)) { xo_failure(xop, "missing name without 'dtrt' mode"); name = XO_FAILURE_NAME; } } switch (xo_style(xop)) { case XO_STYLE_JSON: if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; xo_depth_change(xop, name, -1, -1, XSS_CLOSE_LEAF_LIST, XSF_LIST); rc = xo_printf(xop, "%s%*s]", pre_nl, xo_indent(xop), ""); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; break; case XO_STYLE_ENCODER: rc = xo_encoder_handle(xop, XO_OP_CLOSE_LEAF_LIST, name, NULL, 0); /* FALLTHRU */ default: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_LEAF_LIST, XSF_LIST); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; break; } return rc; } static int xo_do_open_instance (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { xop = xo_default(xop); ssize_t rc = 0; const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; const char *pre_nl = ""; if (name == NULL) { xo_failure(xop, "NULL passed for instance name"); name = XO_FAILURE_NAME; } const char *leader = xo_xml_leader(xop, name); flags |= xop->xo_flags; switch (xo_style(xop)) { case XO_STYLE_XML: rc = xo_printf(xop, "%*s<%s%s", xo_indent(xop), "", leader, name); if (xop->xo_attrs.xb_curp != xop->xo_attrs.xb_bufp) { rc += xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp; xo_data_append(xop, xop->xo_attrs.xb_bufp, xop->xo_attrs.xb_curp - xop->xo_attrs.xb_bufp); xop->xo_attrs.xb_curp = xop->xo_attrs.xb_bufp; } rc += xo_printf(xop, ">%s", ppn); break; case XO_STYLE_JSON: xo_stack_set_flags(xop); if (xop->xo_stack[xop->xo_depth].xs_flags & XSF_NOT_FIRST) pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? ",\n" : ", "; xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; rc = xo_printf(xop, "%s%*s{%s", pre_nl, xo_indent(xop), "", ppn); break; case XO_STYLE_SDPARAMS: break; case XO_STYLE_ENCODER: rc = xo_encoder_handle(xop, XO_OP_OPEN_INSTANCE, name, NULL, flags); break; } xo_depth_change(xop, name, 1, 1, XSS_OPEN_INSTANCE, xo_stack_flags(flags)); return rc; } xo_ssize_t xo_open_instance_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name) { return xo_transition(xop, flags, name, XSS_OPEN_INSTANCE); } xo_ssize_t xo_open_instance_h (xo_handle_t *xop, const char *name) { return xo_open_instance_hf(xop, 0, name); } xo_ssize_t xo_open_instance (const char *name) { return xo_open_instance_hf(NULL, 0, name); } xo_ssize_t xo_open_instance_hd (xo_handle_t *xop, const char *name) { return xo_open_instance_hf(xop, XOF_DTRT, name); } xo_ssize_t xo_open_instance_d (const char *name) { return xo_open_instance_hf(NULL, XOF_DTRT, name); } static int xo_do_close_instance (xo_handle_t *xop, const char *name) { xop = xo_default(xop); ssize_t rc = 0; const char *ppn = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; const char *pre_nl = ""; if (name == NULL) { xo_stack_t *xsp = &xop->xo_stack[xop->xo_depth]; name = xsp->xs_name; if (name) { ssize_t len = strlen(name) + 1; /* We need to make a local copy; xo_depth_change will free it */ char *cp = alloca(len); memcpy(cp, name, len); name = cp; } else if (!(xsp->xs_flags & XSF_DTRT)) { xo_failure(xop, "missing name without 'dtrt' mode"); name = XO_FAILURE_NAME; } } const char *leader = xo_xml_leader(xop, name); switch (xo_style(xop)) { case XO_STYLE_XML: xo_depth_change(xop, name, -1, -1, XSS_CLOSE_INSTANCE, 0); rc = xo_printf(xop, "%*s%s", xo_indent(xop), "", leader, name, ppn); break; case XO_STYLE_JSON: pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; xo_depth_change(xop, name, -1, -1, XSS_CLOSE_INSTANCE, 0); rc = xo_printf(xop, "%s%*s}", pre_nl, xo_indent(xop), ""); xop->xo_stack[xop->xo_depth].xs_flags |= XSF_NOT_FIRST; break; case XO_STYLE_HTML: case XO_STYLE_TEXT: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_INSTANCE, 0); break; case XO_STYLE_SDPARAMS: break; case XO_STYLE_ENCODER: xo_depth_change(xop, name, -1, 0, XSS_CLOSE_INSTANCE, 0); rc = xo_encoder_handle(xop, XO_OP_CLOSE_INSTANCE, name, NULL, 0); break; } return rc; } xo_ssize_t xo_close_instance_h (xo_handle_t *xop, const char *name) { return xo_transition(xop, 0, name, XSS_CLOSE_INSTANCE); } xo_ssize_t xo_close_instance (const char *name) { return xo_close_instance_h(NULL, name); } xo_ssize_t xo_close_instance_hd (xo_handle_t *xop) { return xo_close_instance_h(xop, NULL); } xo_ssize_t xo_close_instance_d (void) { return xo_close_instance_h(NULL, NULL); } static int xo_do_close_all (xo_handle_t *xop, xo_stack_t *limit) { xo_stack_t *xsp; ssize_t rc = 0; xo_xsf_flags_t flags; for (xsp = &xop->xo_stack[xop->xo_depth]; xsp >= limit; xsp--) { switch (xsp->xs_state) { case XSS_INIT: /* Nothing */ rc = 0; break; case XSS_OPEN_CONTAINER: rc = xo_do_close_container(xop, NULL); break; case XSS_OPEN_LIST: rc = xo_do_close_list(xop, NULL); break; case XSS_OPEN_INSTANCE: rc = xo_do_close_instance(xop, NULL); break; case XSS_OPEN_LEAF_LIST: rc = xo_do_close_leaf_list(xop, NULL); break; case XSS_MARKER: flags = xsp->xs_flags & XSF_MARKER_FLAGS; xo_depth_change(xop, xsp->xs_name, -1, 0, XSS_MARKER, 0); xop->xo_stack[xop->xo_depth].xs_flags |= flags; rc = 0; break; } if (rc < 0) xo_failure(xop, "close %d failed: %d", xsp->xs_state, rc); } return 0; } /* * This function is responsible for clearing out whatever is needed * to get to the desired state, if possible. */ static int xo_do_close (xo_handle_t *xop, const char *name, xo_state_t new_state) { xo_stack_t *xsp, *limit = NULL; ssize_t rc; xo_state_t need_state = new_state; if (new_state == XSS_CLOSE_CONTAINER) need_state = XSS_OPEN_CONTAINER; else if (new_state == XSS_CLOSE_LIST) need_state = XSS_OPEN_LIST; else if (new_state == XSS_CLOSE_INSTANCE) need_state = XSS_OPEN_INSTANCE; else if (new_state == XSS_CLOSE_LEAF_LIST) need_state = XSS_OPEN_LEAF_LIST; else if (new_state == XSS_MARKER) need_state = XSS_MARKER; else return 0; /* Unknown or useless new states are ignored */ for (xsp = &xop->xo_stack[xop->xo_depth]; xsp > xop->xo_stack; xsp--) { /* * Marker's normally stop us from going any further, unless * we are popping a marker (new_state == XSS_MARKER). */ if (xsp->xs_state == XSS_MARKER && need_state != XSS_MARKER) { if (name) { xo_failure(xop, "close (xo_%s) fails at marker '%s'; " "not found '%s'", xo_state_name(new_state), xsp->xs_name, name); return 0; } else { limit = xsp; xo_failure(xop, "close stops at marker '%s'", xsp->xs_name); } break; } if (xsp->xs_state != need_state) continue; if (name && xsp->xs_name && !xo_streq(name, xsp->xs_name)) continue; limit = xsp; break; } if (limit == NULL) { xo_failure(xop, "xo_%s can't find match for '%s'", xo_state_name(new_state), name); return 0; } rc = xo_do_close_all(xop, limit); return rc; } /* * We are in a given state and need to transition to the new state. */ static ssize_t xo_transition (xo_handle_t *xop, xo_xof_flags_t flags, const char *name, xo_state_t new_state) { xo_stack_t *xsp; ssize_t rc = 0; int old_state, on_marker; xop = xo_default(xop); xsp = &xop->xo_stack[xop->xo_depth]; old_state = xsp->xs_state; on_marker = (old_state == XSS_MARKER); /* If there's a marker on top of the stack, we need to find a real state */ while (old_state == XSS_MARKER) { if (xsp == xop->xo_stack) break; xsp -= 1; old_state = xsp->xs_state; } /* * At this point, the list of possible states are: * XSS_INIT, XSS_OPEN_CONTAINER, XSS_OPEN_LIST, * XSS_OPEN_INSTANCE, XSS_OPEN_LEAF_LIST, XSS_DISCARDING */ switch (XSS_TRANSITION(old_state, new_state)) { open_container: case XSS_TRANSITION(XSS_INIT, XSS_OPEN_CONTAINER): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_CONTAINER): case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_CONTAINER): rc = xo_do_open_container(xop, flags, name); break; case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_CONTAINER): case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_CONTAINER): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, NULL); if (rc >= 0) goto open_container; break; case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_CONTAINER): /* This is an exception for "xo --close" */ rc = xo_do_close_container(xop, name); break; /*close_container:*/ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_CONTAINER): case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_CONTAINER): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_CONTAINER): if (on_marker) goto marker_prevents_close; rc = xo_do_close(xop, name, new_state); break; case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_CONTAINER): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, NULL); if (rc >= 0) rc = xo_do_close(xop, name, new_state); break; open_list: case XSS_TRANSITION(XSS_INIT, XSS_OPEN_LIST): case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_LIST): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_LIST): rc = xo_do_open_list(xop, flags, name); break; case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_LIST): if (on_marker) goto marker_prevents_close; rc = xo_do_close_list(xop, NULL); if (rc >= 0) goto open_list; break; case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_LIST): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, NULL); if (rc >= 0) goto open_list; break; /*close_list:*/ case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_LIST): if (on_marker) goto marker_prevents_close; rc = xo_do_close(xop, name, new_state); break; case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_LIST): case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_LIST): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_LIST): case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_LIST): rc = xo_do_close(xop, name, new_state); break; open_instance: case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_INSTANCE): rc = xo_do_open_instance(xop, flags, name); break; case XSS_TRANSITION(XSS_INIT, XSS_OPEN_INSTANCE): case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_INSTANCE): rc = xo_do_open_list(xop, flags, name); if (rc >= 0) goto open_instance; break; case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_INSTANCE): if (on_marker) { rc = xo_do_open_list(xop, flags, name); } else { rc = xo_do_close_instance(xop, NULL); } if (rc >= 0) goto open_instance; break; case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_INSTANCE): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, NULL); if (rc >= 0) goto open_instance; break; /*close_instance:*/ case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_INSTANCE): if (on_marker) goto marker_prevents_close; rc = xo_do_close_instance(xop, name); break; case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_INSTANCE): /* This one makes no sense; ignore it */ xo_failure(xop, "xo_close_instance ignored when called from " "initial state ('%s')", name ?: "(unknown)"); break; case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_INSTANCE): case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_INSTANCE): if (on_marker) goto marker_prevents_close; rc = xo_do_close(xop, name, new_state); break; case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_INSTANCE): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, NULL); if (rc >= 0) rc = xo_do_close(xop, name, new_state); break; open_leaf_list: case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_OPEN_LEAF_LIST): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_OPEN_LEAF_LIST): case XSS_TRANSITION(XSS_INIT, XSS_OPEN_LEAF_LIST): rc = xo_do_open_leaf_list(xop, flags, name); break; case XSS_TRANSITION(XSS_OPEN_LIST, XSS_OPEN_LEAF_LIST): case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_OPEN_LEAF_LIST): if (on_marker) goto marker_prevents_close; rc = xo_do_close_list(xop, NULL); if (rc >= 0) goto open_leaf_list; break; /*close_leaf_list:*/ case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_CLOSE_LEAF_LIST): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, name); break; case XSS_TRANSITION(XSS_INIT, XSS_CLOSE_LEAF_LIST): /* Makes no sense; ignore */ xo_failure(xop, "xo_close_leaf_list ignored when called from " "initial state ('%s')", name ?: "(unknown)"); break; case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_CLOSE_LEAF_LIST): case XSS_TRANSITION(XSS_OPEN_LIST, XSS_CLOSE_LEAF_LIST): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_CLOSE_LEAF_LIST): if (on_marker) goto marker_prevents_close; rc = xo_do_close(xop, name, new_state); break; /*emit:*/ case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_EMIT): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_EMIT): break; case XSS_TRANSITION(XSS_OPEN_LIST, XSS_EMIT): if (on_marker) goto marker_prevents_close; rc = xo_do_close(xop, NULL, XSS_CLOSE_LIST); break; case XSS_TRANSITION(XSS_INIT, XSS_EMIT): break; case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_EMIT): if (on_marker) goto marker_prevents_close; rc = xo_do_close_leaf_list(xop, NULL); break; /*emit_leaf_list:*/ case XSS_TRANSITION(XSS_INIT, XSS_EMIT_LEAF_LIST): case XSS_TRANSITION(XSS_OPEN_CONTAINER, XSS_EMIT_LEAF_LIST): case XSS_TRANSITION(XSS_OPEN_INSTANCE, XSS_EMIT_LEAF_LIST): rc = xo_do_open_leaf_list(xop, flags, name); break; case XSS_TRANSITION(XSS_OPEN_LEAF_LIST, XSS_EMIT_LEAF_LIST): break; case XSS_TRANSITION(XSS_OPEN_LIST, XSS_EMIT_LEAF_LIST): /* * We need to be backward compatible with the pre-xo_open_leaf_list * API, where both lists and leaf-lists were opened as lists. So * if we find an open list that hasn't had anything written to it, * we'll accept it. */ break; default: xo_failure(xop, "unknown transition: (%u -> %u)", xsp->xs_state, new_state); } /* Handle the flush flag */ if (rc >= 0 && XOF_ISSET(xop, XOF_FLUSH)) if (xo_flush_h(xop) < 0) rc = -1; /* We have now official made output */ XOIF_SET(xop, XOIF_MADE_OUTPUT); return rc; marker_prevents_close: xo_failure(xop, "marker '%s' prevents transition from %s to %s", xop->xo_stack[xop->xo_depth].xs_name, xo_state_name(old_state), xo_state_name(new_state)); return -1; } xo_ssize_t xo_open_marker_h (xo_handle_t *xop, const char *name) { xop = xo_default(xop); xo_depth_change(xop, name, 1, 0, XSS_MARKER, xop->xo_stack[xop->xo_depth].xs_flags & XSF_MARKER_FLAGS); return 0; } xo_ssize_t xo_open_marker (const char *name) { return xo_open_marker_h(NULL, name); } xo_ssize_t xo_close_marker_h (xo_handle_t *xop, const char *name) { xop = xo_default(xop); return xo_do_close(xop, name, XSS_MARKER); } xo_ssize_t xo_close_marker (const char *name) { return xo_close_marker_h(NULL, name); } /* * Record custom output functions into the xo handle, allowing * integration with a variety of output frameworks. */ void xo_set_writer (xo_handle_t *xop, void *opaque, xo_write_func_t write_func, xo_close_func_t close_func, xo_flush_func_t flush_func) { xop = xo_default(xop); xop->xo_opaque = opaque; xop->xo_write = write_func; xop->xo_close = close_func; xop->xo_flush = flush_func; } void xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func) { xo_realloc = realloc_func; xo_free = free_func; } xo_ssize_t xo_flush_h (xo_handle_t *xop) { ssize_t rc; xop = xo_default(xop); switch (xo_style(xop)) { case XO_STYLE_ENCODER: xo_encoder_handle(xop, XO_OP_FLUSH, NULL, NULL, 0); } rc = xo_write(xop); if (rc >= 0 && xop->xo_flush) if (xop->xo_flush(xop->xo_opaque) < 0) return -1; return rc; } xo_ssize_t xo_flush (void) { return xo_flush_h(NULL); } xo_ssize_t xo_finish_h (xo_handle_t *xop) { const char *open_if_empty = ""; xop = xo_default(xop); if (!XOF_ISSET(xop, XOF_NO_CLOSE)) xo_do_close_all(xop, xop->xo_stack); switch (xo_style(xop)) { case XO_STYLE_JSON: if (!XOF_ISSET(xop, XOF_NO_TOP)) { const char *pre_nl = XOF_ISSET(xop, XOF_PRETTY) ? "\n" : ""; if (XOIF_ISSET(xop, XOIF_TOP_EMITTED)) XOIF_CLEAR(xop, XOIF_TOP_EMITTED); /* Turn off before output */ else if (!XOIF_ISSET(xop, XOIF_MADE_OUTPUT)) { open_if_empty = "{ "; pre_nl = ""; } xo_printf(xop, "%s%*s%s}\n", pre_nl, xo_indent(xop), "", open_if_empty); } break; case XO_STYLE_ENCODER: xo_encoder_handle(xop, XO_OP_FINISH, NULL, NULL, 0); break; } return xo_flush_h(xop); } xo_ssize_t xo_finish (void) { return xo_finish_h(NULL); } /* * xo_finish_atexit is suitable for atexit() calls, to force clear up * and finalizing output. */ void xo_finish_atexit (void) { (void) xo_finish_h(NULL); } /* * Generate an error message, such as would be displayed on stderr */ void -xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap) +xo_errorn_hv (xo_handle_t *xop, int need_newline, const char *fmt, va_list vap) { xop = xo_default(xop); /* * If the format string doesn't end with a newline, we pop * one on ourselves. */ - ssize_t len = strlen(fmt); - if (len > 0 && fmt[len - 1] != '\n') { - char *newfmt = alloca(len + 2); - memcpy(newfmt, fmt, len); - newfmt[len] = '\n'; - newfmt[len + 1] = '\0'; - fmt = newfmt; + if (need_newline) { + ssize_t len = strlen(fmt); + if (len > 0 && fmt[len - 1] != '\n') { + char *newfmt = alloca(len + 2); + memcpy(newfmt, fmt, len); + newfmt[len] = '\n'; + newfmt[len + 1] = '\0'; + fmt = newfmt; + } } switch (xo_style(xop)) { case XO_STYLE_TEXT: vfprintf(stderr, fmt, vap); break; case XO_STYLE_HTML: va_copy(xop->xo_vap, vap); xo_buf_append_div(xop, "error", 0, NULL, 0, NULL, 0, fmt, strlen(fmt), NULL, 0); if (XOIF_ISSET(xop, XOIF_DIV_OPEN)) xo_line_close(xop); xo_write(xop); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); break; case XO_STYLE_XML: case XO_STYLE_JSON: va_copy(xop->xo_vap, vap); xo_open_container_h(xop, "error"); xo_format_value(xop, "message", 7, NULL, 0, fmt, strlen(fmt), NULL, 0, 0); xo_close_container_h(xop, "error"); va_end(xop->xo_vap); bzero(&xop->xo_vap, sizeof(xop->xo_vap)); break; case XO_STYLE_SDPARAMS: case XO_STYLE_ENCODER: break; } } void xo_error_h (xo_handle_t *xop, const char *fmt, ...) { va_list vap; va_start(vap, fmt); - xo_error_hv(xop, fmt, vap); + xo_errorn_hv(xop, 0, fmt, vap); va_end(vap); } /* * Generate an error message, such as would be displayed on stderr */ void xo_error (const char *fmt, ...) { va_list vap; va_start(vap, fmt); - xo_error_hv(NULL, fmt, vap); + xo_errorn_hv(NULL, 0, fmt, vap); va_end(vap); } +void +xo_errorn_h (xo_handle_t *xop, const char *fmt, ...) +{ + va_list vap; + + va_start(vap, fmt); + xo_errorn_hv(xop, 1, fmt, vap); + va_end(vap); +} + /* + * Generate an error message, such as would be displayed on stderr + */ +void +xo_errorn (const char *fmt, ...) +{ + va_list vap; + + va_start(vap, fmt); + xo_errorn_hv(NULL, 1, fmt, vap); + va_end(vap); +} + +/* * Parse any libxo-specific options from the command line, removing them * so the main() argument parsing won't see them. We return the new value * for argc or -1 for error. If an error occurred, the program should * exit. A suitable error message has already been displayed. */ int xo_parse_args (int argc, char **argv) { static char libxo_opt[] = "--libxo"; char *cp; int i, save; - /* Save our program name for xo_err and friends */ - xo_program = argv[0]; - cp = strrchr(xo_program, '/'); - if (cp) - xo_program = ++cp; - else - cp = argv[0]; /* Reset to front of string */ + /* + * If xo_set_program has always been called, we honor that value + */ + if (xo_program == NULL) { + /* Save our program name for xo_err and friends */ + xo_program = argv[0]; + cp = strrchr(xo_program, '/'); + if (cp) + xo_program = ++cp; + else + cp = argv[0]; /* Reset to front of string */ - /* GNU tools add an annoying ".test" as the program extension; remove it */ - size_t len = strlen(xo_program); - static const char gnu_ext[] = ".test"; - if (len >= sizeof(gnu_ext)) { - cp += len + 1 - sizeof(gnu_ext); - if (xo_streq(cp, gnu_ext)) - *cp = '\0'; + /* + * GNU libtool add an annoying ".test" as the program + * extension; we remove it. libtool also adds a "lt-" prefix + * that we cannot remove. + */ + size_t len = strlen(xo_program); + static const char gnu_ext[] = ".test"; + if (len >= sizeof(gnu_ext)) { + cp += len + 1 - sizeof(gnu_ext); + if (xo_streq(cp, gnu_ext)) + *cp = '\0'; + } } xo_handle_t *xop = xo_default(NULL); for (save = i = 1; i < argc; i++) { if (argv[i] == NULL || strncmp(argv[i], libxo_opt, sizeof(libxo_opt) - 1) != 0) { if (save != i) argv[save] = argv[i]; save += 1; continue; } cp = argv[i] + sizeof(libxo_opt) - 1; if (*cp == '\0') { cp = argv[++i]; if (cp == NULL) { xo_warnx("missing libxo option"); return -1; } if (xo_set_options(xop, cp) < 0) return -1; } else if (*cp == ':') { if (xo_set_options(xop, cp) < 0) return -1; } else if (*cp == '=') { if (xo_set_options(xop, ++cp) < 0) return -1; } else if (*cp == '-') { cp += 1; if (xo_streq(cp, "check")) { exit(XO_HAS_LIBXO); } else { xo_warnx("unknown libxo option: '%s'", argv[i]); return -1; } } else { xo_warnx("unknown libxo option: '%s'", argv[i]); return -1; } } /* * We only want to do color output on terminals, but we only want * to do this if the user has asked for color. */ if (XOF_ISSET(xop, XOF_COLOR_ALLOWED) && isatty(1)) XOF_SET(xop, XOF_COLOR); argv[save] = NULL; return save; } /* * Debugging function that dumps the current stack of open libxo constructs, * suitable for calling from the debugger. */ void xo_dump_stack (xo_handle_t *xop) { int i; xo_stack_t *xsp; xop = xo_default(xop); fprintf(stderr, "Stack dump:\n"); xsp = xop->xo_stack; for (i = 1, xsp++; i <= xop->xo_depth; i++, xsp++) { fprintf(stderr, " [%d] %s '%s' [%x]\n", i, xo_state_name(xsp->xs_state), xsp->xs_name ?: "--", xsp->xs_flags); } } /* * Record the program name used for error messages */ void xo_set_program (const char *name) { xo_program = name; } void xo_set_version_h (xo_handle_t *xop, const char *version) { xop = xo_default(xop); if (version == NULL || strchr(version, '"') != NULL) return; if (!xo_style_is_encoding(xop)) return; switch (xo_style(xop)) { case XO_STYLE_XML: /* For XML, we record this as an attribute for the first tag */ xo_attr_h(xop, "version", "%s", version); break; case XO_STYLE_JSON: /* * For JSON, we record the version string in our handle, and emit * it in xo_emit_top. */ xop->xo_version = xo_strndup(version, -1); break; case XO_STYLE_ENCODER: xo_encoder_handle(xop, XO_OP_VERSION, NULL, version, 0); break; } } /* * Set the version number for the API content being carried through * the xo handle. */ void xo_set_version (const char *version) { xo_set_version_h(NULL, version); } /* * Generate a warning. Normally, this is a text message written to * standard error. If the XOF_WARN_XML flag is set, then we generate * XMLified content on standard output. */ void xo_emit_warn_hcv (xo_handle_t *xop, int as_warning, int code, const char *fmt, va_list vap) { xop = xo_default(xop); if (fmt == NULL) return; xo_open_marker_h(xop, "xo_emit_warn_hcv"); xo_open_container_h(xop, as_warning ? "__warning" : "__error"); if (xo_program) xo_emit("{wc:program}", xo_program); if (xo_style(xop) == XO_STYLE_XML || xo_style(xop) == XO_STYLE_JSON) { va_list ap; xo_handle_t temp; bzero(&temp, sizeof(temp)); temp.xo_style = XO_STYLE_TEXT; xo_buf_init(&temp.xo_data); xo_depth_check(&temp, XO_DEPTH); va_copy(ap, vap); (void) xo_emit_hv(&temp, fmt, ap); va_end(ap); xo_buffer_t *src = &temp.xo_data; xo_format_value(xop, "message", 7, src->xb_bufp, src->xb_curp - src->xb_bufp, NULL, 0, NULL, 0, 0); xo_free(temp.xo_stack); xo_buf_cleanup(src); } (void) xo_emit_hv(xop, fmt, vap); ssize_t len = strlen(fmt); if (len > 0 && fmt[len - 1] != '\n') { if (code > 0) { const char *msg = strerror(code); if (msg) xo_emit_h(xop, ": {G:strerror}{g:error/%s}", msg); } xo_emit("\n"); } xo_close_marker_h(xop, "xo_emit_warn_hcv"); xo_flush_h(xop); } void xo_emit_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(xop, 1, code, fmt, vap); va_end(vap); } void xo_emit_warn_c (int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 1, code, fmt, vap); va_end(vap); } void xo_emit_warn (const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 1, code, fmt, vap); va_end(vap); } void xo_emit_warnx (const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 1, -1, fmt, vap); va_end(vap); } void xo_emit_err_v (int eval, int code, const char *fmt, va_list vap) { xo_emit_warn_hcv(NULL, 0, code, fmt, vap); xo_finish(); exit(eval); } void xo_emit_err (int eval, const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_emit_err_v(eval, code, fmt, vap); /*NOTREACHED*/ } void xo_emit_errx (int eval, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_err_v(eval, -1, fmt, vap); /* This will exit */ /*NOTREACHED*/ } void xo_emit_errc (int eval, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_err_v(eval, code, fmt, vap); /* This will exit */ /*NOTREACHED*/ } /* * Get the opaque private pointer for an xo handle */ void * xo_get_private (xo_handle_t *xop) { xop = xo_default(xop); return xop->xo_private; } /* * Set the opaque private pointer for an xo handle. */ void xo_set_private (xo_handle_t *xop, void *opaque) { xop = xo_default(xop); xop->xo_private = opaque; } /* * Get the encoder function */ xo_encoder_func_t xo_get_encoder (xo_handle_t *xop) { xop = xo_default(xop); return xop->xo_encoder; } /* * Record an encoder callback function in an xo handle. */ void xo_set_encoder (xo_handle_t *xop, xo_encoder_func_t encoder) { xop = xo_default(xop); xop->xo_style = XO_STYLE_ENCODER; xop->xo_encoder = encoder; } /* * The xo(1) utility needs to be able to open and close lists and * instances, but since it's called without "state", we cannot * rely on the state transitions (in xo_transition) to DTRT, so * we have a mechanism for external parties to "force" transitions * that would otherwise be impossible. This is not a general * mechanism, and is really tailored only for xo(1). */ void xo_explicit_transition (xo_handle_t *xop, xo_state_t new_state, const char *name, xo_xof_flags_t flags) { xo_xsf_flags_t xsf_flags; xop = xo_default(xop); switch (new_state) { case XSS_OPEN_LIST: xo_do_open_list(xop, flags, name); break; case XSS_OPEN_INSTANCE: xo_do_open_instance(xop, flags, name); break; case XSS_CLOSE_INSTANCE: xo_depth_change(xop, name, 1, 1, XSS_OPEN_INSTANCE, xo_stack_flags(flags)); xo_stack_set_flags(xop); xo_do_close_instance(xop, name); break; case XSS_CLOSE_LIST: xsf_flags = XOF_ISSET(xop, XOF_NOT_FIRST) ? XSF_NOT_FIRST : 0; xo_depth_change(xop, name, 1, 1, XSS_OPEN_LIST, XSF_LIST | xsf_flags | xo_stack_flags(flags)); xo_do_close_list(xop, name); break; } } Index: projects/clang1000-import/contrib/libxo/libxo/xo.h =================================================================== --- projects/clang1000-import/contrib/libxo/libxo/xo.h (revision 357178) +++ projects/clang1000-import/contrib/libxo/libxo/xo.h (revision 357179) @@ -1,693 +1,702 @@ /* * Copyright (c) 2014-2018, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, July 2014 */ /** * libxo provides a means of generating text, XML, JSON, and HTML output * using a single set of function calls, maximizing the value of output * while minimizing the cost/impact on the code. * * Full documentation is available in ./doc/libxo.txt or online at: * http://juniper.github.io/libxo/libxo-manual.html */ #ifndef INCLUDE_XO_H #define INCLUDE_XO_H #include #include #include #include #include #include #ifdef __dead2 #define NORETURN __dead2 #else #define NORETURN #endif /* __dead2 */ /* * Normally we'd use the HAVE_PRINTFLIKE define triggered by the * --enable-printflike option to configure, but we don't install * our internal "xoconfig.h", and I'd rather not. Taking the * coward's path, we'll turn it on inside a #if that allows * others to turn it off where needed. Not ideal, but functional. */ #if !defined(NO_PRINTFLIKE) #if defined(__linux) && !defined(__printflike) #define __printflike(_x, _y) __attribute__((__format__ (__printf__, _x, _y))) #endif #define PRINTFLIKE(_x, _y) __printflike(_x, _y) #else #define PRINTFLIKE(_x, _y) #endif /* NO_PRINTFLIKE */ /** Formatting types */ typedef unsigned short xo_style_t; #define XO_STYLE_TEXT 0 /** Generate text output */ #define XO_STYLE_XML 1 /** Generate XML output */ #define XO_STYLE_JSON 2 /** Generate JSON output */ #define XO_STYLE_HTML 3 /** Generate HTML output */ #define XO_STYLE_SDPARAMS 4 /* Generate syslog structured data params */ #define XO_STYLE_ENCODER 5 /* Generate calls to external encoder */ /** Flags for libxo */ typedef unsigned long long xo_xof_flags_t; #define XOF_BIT(_n) ((xo_xof_flags_t) 1 << (_n)) #define XOF_CLOSE_FP XOF_BIT(0) /** Close file pointer on xo_close() */ #define XOF_PRETTY XOF_BIT(1) /** Make 'pretty printed' output */ #define XOF_LOG_SYSLOG XOF_BIT(2) /** Log (on stderr) our syslog content */ #define XOF_RESV3 XOF_BIT(3) /* Unused */ #define XOF_WARN XOF_BIT(4) /** Generate warnings for broken calls */ #define XOF_XPATH XOF_BIT(5) /** Emit XPath attributes in HTML */ #define XOF_INFO XOF_BIT(6) /** Emit additional info fields (HTML) */ #define XOF_WARN_XML XOF_BIT(7) /** Emit warnings in XML (on stdout) */ #define XOF_NO_ENV XOF_BIT(8) /** Don't look at LIBXO_OPTIONS env var */ #define XOF_NO_VA_ARG XOF_BIT(9) /** Don't advance va_list w/ va_arg() */ #define XOF_DTRT XOF_BIT(10) /** Enable "do the right thing" mode */ #define XOF_KEYS XOF_BIT(11) /** Flag 'key' fields for xml and json */ #define XOF_IGNORE_CLOSE XOF_BIT(12) /** Ignore errors on close tags */ #define XOF_NOT_FIRST XOF_BIT(13) /* Not the first item (JSON) */ #define XOF_NO_LOCALE XOF_BIT(14) /** Don't bother with locale */ #define XOF_RESV15 XOF_BIT(15) /* Unused */ #define XOF_NO_TOP XOF_BIT(16) /** Don't emit the top braces in JSON */ #define XOF_RESV17 XOF_BIT(17) /* Unused */ #define XOF_UNITS XOF_BIT(18) /** Encode units in XML */ #define XOF_RESV19 XOF_BIT(19) /* Unused */ #define XOF_UNDERSCORES XOF_BIT(20) /** Replace dashes with underscores (JSON)*/ #define XOF_COLUMNS XOF_BIT(21) /** xo_emit should return a column count */ #define XOF_FLUSH XOF_BIT(22) /** Flush after each xo_emit call */ #define XOF_FLUSH_LINE XOF_BIT(23) /** Flush after each newline */ #define XOF_NO_CLOSE XOF_BIT(24) /** xo_finish won't close open elements */ #define XOF_COLOR_ALLOWED XOF_BIT(25) /** Allow color/effects to be enabled */ #define XOF_COLOR XOF_BIT(26) /** Enable color and effects */ #define XOF_NO_HUMANIZE XOF_BIT(27) /** Block the {h:} modifier */ #define XOF_LOG_GETTEXT XOF_BIT(28) /** Log (stderr) gettext lookup strings */ #define XOF_UTF8 XOF_BIT(29) /** Force text output to be UTF8 */ #define XOF_RETAIN_ALL XOF_BIT(30) /** Force use of XOEF_RETAIN */ #define XOF_RETAIN_NONE XOF_BIT(31) /** Prevent use of XOEF_RETAIN */ #define XOF_COLOR_MAP XOF_BIT(32) /** Color map has been initialized */ #define XOF_CONTINUATION XOF_BIT(33) /** Continuation of previous line */ typedef unsigned xo_emit_flags_t; /* Flags to xo_emit() and friends */ #define XOEF_RETAIN (1<<0) /* Retain parsed formatting information */ /* * The xo_info_t structure provides a mapping between names and * additional data emitted via HTML. */ typedef struct xo_info_s { const char *xi_name; /* Name of the element */ const char *xi_type; /* Type of field */ const char *xi_help; /* Description of field */ } xo_info_t; #define XO_INFO_NULL NULL, NULL, NULL /* Use '{ XO_INFO_NULL }' to end lists */ struct xo_handle_s; /* Opaque structure forward */ typedef struct xo_handle_s xo_handle_t; /* Handle for XO output */ /* * Early versions of the API used "int" instead of "size_t" for buffer * sizes. We want to fix this but allow for backwards compatibility * where needed. */ #ifdef XO_USE_INT_RETURN_CODES typedef int xo_ssize_t; /* Buffer size */ #else /* XO_USE_INT_RETURN_CODES */ typedef ssize_t xo_ssize_t; /* Buffer size */ #endif /* XO_USE_INT_RETURN_CODES */ typedef xo_ssize_t (*xo_write_func_t)(void *, const char *); typedef void (*xo_close_func_t)(void *); typedef int (*xo_flush_func_t)(void *); typedef void *(*xo_realloc_func_t)(void *, size_t); typedef void (*xo_free_func_t)(void *); /* * The formatter function mirrors "vsnprintf", with an additional argument * of the xo handle. The caller should return the number of bytes _needed_ * to fit the data, even if this exceeds 'len'. */ typedef xo_ssize_t (*xo_formatter_t)(xo_handle_t *, char *, xo_ssize_t, const char *, va_list); typedef void (*xo_checkpointer_t)(xo_handle_t *, va_list, int); xo_handle_t * xo_create (xo_style_t style, xo_xof_flags_t flags); xo_handle_t * xo_create_to_file (FILE *fp, xo_style_t style, xo_xof_flags_t flags); void xo_destroy (xo_handle_t *xop); void xo_set_writer (xo_handle_t *xop, void *opaque, xo_write_func_t write_func, xo_close_func_t close_func, xo_flush_func_t flush_func); void xo_set_allocator (xo_realloc_func_t realloc_func, xo_free_func_t free_func); void xo_set_style (xo_handle_t *xop, xo_style_t style); xo_style_t xo_get_style (xo_handle_t *xop); int xo_set_style_name (xo_handle_t *xop, const char *style); int xo_set_options (xo_handle_t *xop, const char *input); xo_xof_flags_t xo_get_flags (xo_handle_t *xop); void xo_set_flags (xo_handle_t *xop, xo_xof_flags_t flags); void xo_clear_flags (xo_handle_t *xop, xo_xof_flags_t flags); int xo_set_file_h (xo_handle_t *xop, FILE *fp); int xo_set_file (FILE *fp); void xo_set_info (xo_handle_t *xop, xo_info_t *infop, int count); void xo_set_formatter (xo_handle_t *xop, xo_formatter_t func, xo_checkpointer_t); void xo_set_depth (xo_handle_t *xop, int depth); xo_ssize_t xo_emit_hv (xo_handle_t *xop, const char *fmt, va_list vap); xo_ssize_t xo_emit_h (xo_handle_t *xop, const char *fmt, ...); xo_ssize_t xo_emit (const char *fmt, ...); xo_ssize_t xo_emit_hvf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, va_list vap); xo_ssize_t xo_emit_hf (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...); xo_ssize_t xo_emit_f (xo_emit_flags_t flags, const char *fmt, ...); PRINTFLIKE(2, 0) static inline xo_ssize_t xo_emit_hvp (xo_handle_t *xop, const char *fmt, va_list vap) { return xo_emit_hv(xop, fmt, vap); } PRINTFLIKE(2, 3) static inline xo_ssize_t xo_emit_hp (xo_handle_t *xop, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_ssize_t rc = xo_emit_hv(xop, fmt, vap); va_end(vap); return rc; } PRINTFLIKE(1, 2) static inline xo_ssize_t xo_emit_p (const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_ssize_t rc = xo_emit_hv(NULL, fmt, vap); va_end(vap); return rc; } PRINTFLIKE(3, 0) static inline xo_ssize_t xo_emit_hvfp (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, va_list vap) { return xo_emit_hvf(xop, flags, fmt, vap); } PRINTFLIKE(3, 4) static inline xo_ssize_t xo_emit_hfp (xo_handle_t *xop, xo_emit_flags_t flags, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_ssize_t rc = xo_emit_hvf(xop, flags, fmt, vap); va_end(vap); return rc; } PRINTFLIKE(2, 3) static inline xo_ssize_t xo_emit_fp (xo_emit_flags_t flags, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_ssize_t rc = xo_emit_hvf(NULL, flags, fmt, vap); va_end(vap); return rc; } xo_ssize_t xo_open_container_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name); xo_ssize_t xo_open_container_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_container (const char *name); xo_ssize_t xo_open_container_hd (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_container_d (const char *name); xo_ssize_t xo_close_container_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_close_container (const char *name); xo_ssize_t xo_close_container_hd (xo_handle_t *xop); xo_ssize_t xo_close_container_d (void); xo_ssize_t xo_open_list_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name); xo_ssize_t xo_open_list_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_list (const char *name); xo_ssize_t xo_open_list_hd (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_list_d (const char *name); xo_ssize_t xo_close_list_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_close_list (const char *name); xo_ssize_t xo_close_list_hd (xo_handle_t *xop); xo_ssize_t xo_close_list_d (void); xo_ssize_t xo_open_instance_hf (xo_handle_t *xop, xo_xof_flags_t flags, const char *name); xo_ssize_t xo_open_instance_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_instance (const char *name); xo_ssize_t xo_open_instance_hd (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_instance_d (const char *name); xo_ssize_t xo_close_instance_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_close_instance (const char *name); xo_ssize_t xo_close_instance_hd (xo_handle_t *xop); xo_ssize_t xo_close_instance_d (void); xo_ssize_t xo_open_marker_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_open_marker (const char *name); xo_ssize_t xo_close_marker_h (xo_handle_t *xop, const char *name); xo_ssize_t xo_close_marker (const char *name); xo_ssize_t xo_attr_h (xo_handle_t *xop, const char *name, const char *fmt, ...); xo_ssize_t xo_attr_hv (xo_handle_t *xop, const char *name, const char *fmt, va_list vap); xo_ssize_t xo_attr (const char *name, const char *fmt, ...); void xo_error_hv (xo_handle_t *xop, const char *fmt, va_list vap); void xo_error_h (xo_handle_t *xop, const char *fmt, ...); void xo_error (const char *fmt, ...); +void +xo_errorn_hv (xo_handle_t *xop, int need_newline, const char *fmt, va_list vap); + +void +xo_errorn_h (xo_handle_t *xop, const char *fmt, ...); + +void +xo_errorn (const char *fmt, ...); + xo_ssize_t xo_flush_h (xo_handle_t *xop); xo_ssize_t xo_flush (void); xo_ssize_t xo_finish_h (xo_handle_t *xop); xo_ssize_t xo_finish (void); void xo_finish_atexit (void); void xo_set_leading_xpath (xo_handle_t *xop, const char *path); void xo_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...) PRINTFLIKE(3, 4); void xo_warn_c (int code, const char *fmt, ...) PRINTFLIKE(2, 3); void xo_warn (const char *fmt, ...) PRINTFLIKE(1, 2); void xo_warnx (const char *fmt, ...) PRINTFLIKE(1, 2); void xo_err (int eval, const char *fmt, ...) NORETURN PRINTFLIKE(2, 3); void xo_errx (int eval, const char *fmt, ...) NORETURN PRINTFLIKE(2, 3); void xo_errc (int eval, int code, const char *fmt, ...) NORETURN PRINTFLIKE(3, 4); void xo_message_hcv (xo_handle_t *xop, int code, const char *fmt, va_list vap) PRINTFLIKE(3, 0); void xo_message_hc (xo_handle_t *xop, int code, const char *fmt, ...) PRINTFLIKE(3, 4); void xo_message_c (int code, const char *fmt, ...) PRINTFLIKE(2, 3); void xo_message_e (const char *fmt, ...) PRINTFLIKE(1, 2); void xo_message (const char *fmt, ...) PRINTFLIKE(1, 2); void xo_emit_warn_hcv (xo_handle_t *xop, int as_warning, int code, const char *fmt, va_list vap); void xo_emit_warn_hc (xo_handle_t *xop, int code, const char *fmt, ...); void xo_emit_warn_c (int code, const char *fmt, ...); void xo_emit_warn (const char *fmt, ...); void xo_emit_warnx (const char *fmt, ...); void xo_emit_err (int eval, const char *fmt, ...) NORETURN; void xo_emit_errx (int eval, const char *fmt, ...) NORETURN; void xo_emit_errc (int eval, int code, const char *fmt, ...) NORETURN; PRINTFLIKE(4, 0) static inline void xo_emit_warn_hcvp (xo_handle_t *xop, int as_warning, int code, const char *fmt, va_list vap) { xo_emit_warn_hcv(xop, as_warning, code, fmt, vap); } PRINTFLIKE(3, 4) static inline void xo_emit_warn_hcp (xo_handle_t *xop, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(xop, 1, code, fmt, vap); va_end(vap); } PRINTFLIKE(2, 3) static inline void xo_emit_warn_cp (int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 1, code, fmt, vap); va_end(vap); } PRINTFLIKE(1, 2) static inline void xo_emit_warn_p (const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 1, code, fmt, vap); va_end(vap); } PRINTFLIKE(1, 2) static inline void xo_emit_warnx_p (const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 1, -1, fmt, vap); va_end(vap); } NORETURN PRINTFLIKE(2, 3) static inline void xo_emit_err_p (int eval, const char *fmt, ...) { int code = errno; va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 0, code, fmt, vap); va_end(vap); exit(eval); } PRINTFLIKE(2, 3) static inline void xo_emit_errx_p (int eval, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 0, -1, fmt, vap); va_end(vap); exit(eval); } PRINTFLIKE(3, 4) static inline void xo_emit_errc_p (int eval, int code, const char *fmt, ...) { va_list vap; va_start(vap, fmt); xo_emit_warn_hcv(NULL, 0, code, fmt, vap); va_end(vap); exit(eval); } void xo_emit_err_v (int eval, int code, const char *fmt, va_list vap) NORETURN PRINTFLIKE(3, 0); void xo_no_setlocale (void); /** * @brief Lift libxo-specific arguments from a set of arguments * * libxo-enable programs typically use command line options to enable * all the nifty-cool libxo features. xo_parse_args() makes this simple * by pre-processing the command line arguments given to main(), handling * and removing the libxo-specific ones, meaning anything starting with * "--libxo". A full description of these arguments is in the base * documentation. * @param[in] argc Number of arguments (ala #main()) * @param[in] argc Array of argument strings (ala #main()) * @return New number of arguments, or -1 for failure. */ int xo_parse_args (int argc, char **argv); /** * This is the "magic" number returned by libxo-supporting commands * when passed the equally magic "--libxo-check" option. If you * return this, we can (unsafely) assume that since you know the magic * handshake, you'll happily handle future --libxo options and not do * something violent like reboot the box or create another hole in the * ozone layer. */ #define XO_HAS_LIBXO 121 /** * externs for libxo's version number strings */ extern const char xo_version[]; /** Base version triple string */ extern const char xo_version_extra[]; /** Extra version magic content */ /** * @brief Dump the internal stack of a libxo handle. * * This diagnostic function is something I will ask you to call from * your program when you write to tell me libxo has gone bat-stink * crazy and has discarded your list or container or content. Output * content will be what we lovingly call "developer entertainment". * @param[in] xop A valid libxo handle, or NULL for the default handle */ void xo_dump_stack (xo_handle_t *xop); /** * @brief Recode the name of the program, suitable for error output. * * libxo will record the given name for use while generating error * messages. The contents are not copied, so the value must continue * to point to a valid memory location. This allows the caller to change * the value, but requires the caller to manage the memory. Typically * this is called with argv[0] from main(). * @param[in] name The name of the current application program */ void xo_set_program (const char *name); /** * @brief Add a version string to the output, where possible. * * Adds a version number to the output, suitable for tracking * changes in the content. This is only important for the "encoding" * format styles (XML and JSON) and allows a user of the data to * discern which version of the data model is in use. * @param[in] version The version number, encoded as a string */ void xo_set_version (const char *version); /** * #xo_set_version with a handle. * @param[in] xop A valid libxo handle, or NULL for the default handle * @param[in] version The version number, encoded as a string */ void xo_set_version_h (xo_handle_t *xop, const char *version); void xo_open_log (const char *ident, int logopt, int facility); void xo_close_log (void); int xo_set_logmask (int maskpri); void xo_set_unit_test_mode (int value); void xo_syslog (int priority, const char *name, const char *message, ...); void xo_vsyslog (int priority, const char *name, const char *message, va_list args); typedef void (*xo_syslog_open_t)(void); typedef void (*xo_syslog_send_t)(const char *full_msg, const char *v0_hdr, const char *text_only); typedef void (*xo_syslog_close_t)(void); void xo_set_syslog_handler (xo_syslog_open_t open_func, xo_syslog_send_t send_func, xo_syslog_close_t close_func); void xo_set_syslog_enterprise_id (unsigned short eid); typedef void (*xo_simplify_field_func_t)(const char *, unsigned, int); char * xo_simplify_format (xo_handle_t *xop, const char *fmt, int with_numbers, xo_simplify_field_func_t field_cb); xo_ssize_t xo_emit_field_hv (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, va_list vap); xo_ssize_t xo_emit_field_h (xo_handle_t *xop, const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...); xo_ssize_t xo_emit_field (const char *rolmod, const char *contents, const char *fmt, const char *efmt, ...); void xo_retain_clear_all (void); void xo_retain_clear (const char *fmt); #endif /* INCLUDE_XO_H */ Index: projects/clang1000-import/contrib/libxo/libxo/xo_encoder.c =================================================================== --- projects/clang1000-import/contrib/libxo/libxo/xo_encoder.c (revision 357178) +++ projects/clang1000-import/contrib/libxo/libxo/xo_encoder.c (revision 357179) @@ -1,400 +1,417 @@ /* * Copyright (c) 2015, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, August 2015 */ /** * libxo includes a number of fixed encoding styles. But other * external encoders are need to deal with new encoders. Rather * than expose a swarm of libxo internals, we create a distinct * API, with a simpler API than we use internally. */ #include #include #include #include #include #include #include "xo_config.h" #include "xo.h" #include "xo_encoder.h" #ifdef HAVE_DLFCN_H #include #if !defined(HAVE_DLFUNC) #define dlfunc(_p, _n) dlsym(_p, _n) #endif #else /* HAVE_DLFCN_H */ #define dlopen(_n, _f) NULL /* Fail */ #define dlsym(_p, _n) NULL /* Fail */ #define dlfunc(_p, _n) NULL /* Fail */ #endif /* HAVE_DLFCN_H */ static void xo_encoder_setup (void); /* Forward decl */ /* * Need a simple string collection */ typedef struct xo_string_node_s { TAILQ_ENTRY(xo_string_node_s) xs_link; /* Next string */ char xs_data[0]; /* String data */ } xo_string_node_t; typedef TAILQ_HEAD(xo_string_list_s, xo_string_node_s) xo_string_list_t; static inline void xo_string_list_init (xo_string_list_t *listp) { if (listp->tqh_last == NULL) TAILQ_INIT(listp); } static inline xo_string_node_t * xo_string_add (xo_string_list_t *listp, const char *str) { if (listp == NULL || str == NULL) return NULL; xo_string_list_init(listp); size_t len = strlen(str); xo_string_node_t *xsp; xsp = xo_realloc(NULL, sizeof(*xsp) + len + 1); if (xsp) { memcpy(xsp->xs_data, str, len); xsp->xs_data[len] = '\0'; TAILQ_INSERT_TAIL(listp, xsp, xs_link); } return xsp; } #define XO_STRING_LIST_FOREACH(_xsp, _listp) \ xo_string_list_init(_listp); \ TAILQ_FOREACH(_xsp, _listp, xs_link) static inline void xo_string_list_clean (xo_string_list_t *listp) { xo_string_node_t *xsp; xo_string_list_init(listp); for (;;) { xsp = TAILQ_FIRST(listp); if (xsp == NULL) break; TAILQ_REMOVE(listp, xsp, xs_link); xo_free(xsp); } } static xo_string_list_t xo_encoder_path; void xo_encoder_path_add (const char *path) { xo_encoder_setup(); if (path) xo_string_add(&xo_encoder_path, path); } /* ---------------------------------------------------------------------- */ typedef struct xo_encoder_node_s { TAILQ_ENTRY(xo_encoder_node_s) xe_link; /* Next session */ char *xe_name; /* Name for this encoder */ xo_encoder_func_t xe_handler; /* Callback function */ void *xe_dlhandle; /* dlopen handle */ } xo_encoder_node_t; typedef TAILQ_HEAD(xo_encoder_list_s, xo_encoder_node_s) xo_encoder_list_t; #define XO_ENCODER_LIST_FOREACH(_xep, _listp) \ xo_encoder_list_init(_listp); \ TAILQ_FOREACH(_xep, _listp, xe_link) static xo_encoder_list_t xo_encoders; static void xo_encoder_list_init (xo_encoder_list_t *listp) { if (listp->tqh_last == NULL) TAILQ_INIT(listp); } static xo_encoder_node_t * xo_encoder_list_add (const char *name) { if (name == NULL) return NULL; xo_encoder_node_t *xep = xo_realloc(NULL, sizeof(*xep)); if (xep) { ssize_t len = strlen(name) + 1; xep->xe_name = xo_realloc(NULL, len); if (xep->xe_name == NULL) { xo_free(xep); return NULL; } memcpy(xep->xe_name, name, len); TAILQ_INSERT_TAIL(&xo_encoders, xep, xe_link); } return xep; } void xo_encoders_clean (void) { xo_encoder_node_t *xep; xo_encoder_setup(); for (;;) { xep = TAILQ_FIRST(&xo_encoders); if (xep == NULL) break; TAILQ_REMOVE(&xo_encoders, xep, xe_link); if (xep->xe_dlhandle) dlclose(xep->xe_dlhandle); xo_free(xep); } xo_string_list_clean(&xo_encoder_path); } static void xo_encoder_setup (void) { static int initted; if (!initted) { initted = 1; xo_string_list_init(&xo_encoder_path); xo_encoder_list_init(&xo_encoders); xo_encoder_path_add(XO_ENCODERDIR); } } static xo_encoder_node_t * xo_encoder_find (const char *name) { xo_encoder_node_t *xep; xo_encoder_list_init(&xo_encoders); XO_ENCODER_LIST_FOREACH(xep, &xo_encoders) { if (xo_streq(xep->xe_name, name)) return xep; } return NULL; } static xo_encoder_node_t * xo_encoder_discover (const char *name) { void *dlp = NULL; char buf[MAXPATHLEN]; xo_string_node_t *xsp; xo_encoder_node_t *xep = NULL; XO_STRING_LIST_FOREACH(xsp, &xo_encoder_path) { static const char fmt[] = "%s/%s.enc"; char *dir = xsp->xs_data; size_t len = snprintf(buf, sizeof(buf), fmt, dir, name); if (len > sizeof(buf)) /* Should not occur */ continue; dlp = dlopen((const char *) buf, RTLD_NOW); if (dlp) break; } if (dlp) { /* * If the library exists, find the initializer function and * call it. */ xo_encoder_init_func_t func; func = (xo_encoder_init_func_t) dlfunc(dlp, XO_ENCODER_INIT_NAME); if (func) { xo_encoder_init_args_t xei; bzero(&xei, sizeof(xei)); xei.xei_version = XO_ENCODER_VERSION; ssize_t rc = func(&xei); if (rc == 0 && xei.xei_handler) { xep = xo_encoder_list_add(name); if (xep) { xep->xe_handler = xei.xei_handler; xep->xe_dlhandle = dlp; } } } if (xep == NULL) dlclose(dlp); } return xep; } void xo_encoder_register (const char *name, xo_encoder_func_t func) { xo_encoder_setup(); xo_encoder_node_t *xep = xo_encoder_find(name); if (xep) /* "We alla-ready got one" */ return; xep = xo_encoder_list_add(name); if (xep) xep->xe_handler = func; } void xo_encoder_unregister (const char *name) { xo_encoder_setup(); xo_encoder_node_t *xep = xo_encoder_find(name); if (xep) { TAILQ_REMOVE(&xo_encoders, xep, xe_link); xo_free(xep); } } int xo_encoder_init (xo_handle_t *xop, const char *name) { xo_encoder_setup(); - const char *opts = strchr(name, ':'); + char opts_char = '\0'; + const char *col_opts = strchr(name, ':'); + const char *plus_opts = strchr(name, '+'); + + /* + * Find the option-separating character (plus or colon) which + * appears first in the options string. + */ + const char *opts = (col_opts == NULL) ? plus_opts + : (plus_opts == NULL) ? col_opts + : (plus_opts < col_opts) ? plus_opts : col_opts; + if (opts) { + opts_char = *opts; + /* Make a writable copy of the name */ size_t len = strlen(name); char *copy = alloca(len + 1); memcpy(copy, name, len); copy[len] = '\0'; char *opts_copy = copy + (opts - name); /* Move to ':' */ *opts_copy++ = '\0'; /* Trim it off */ opts = opts_copy; /* Use copy as options */ name = copy; /* Use trimmed copy as name */ } /* Can't have names containing '/' or ':' */ if (strchr(name, '/') != NULL || strchr(name, ':') != NULL) { xo_failure(xop, "invalid encoder name: %s", name); return -1; } /* * First we look on the list of known (registered) encoders. * If we don't find it, we follow the set of paths to find * the encoding library. */ xo_encoder_node_t *xep = xo_encoder_find(name); if (xep == NULL) { xep = xo_encoder_discover(name); if (xep == NULL) { xo_failure(xop, "encoder not founde: %s", name); return -1; } } xo_set_encoder(xop, xep->xe_handler); int rc = xo_encoder_handle(xop, XO_OP_CREATE, name, NULL, 0); if (rc == 0 && opts != NULL) { - rc = xo_encoder_handle(xop, XO_OP_OPTIONS, name, opts, 0); + xo_encoder_op_t op; + + /* Encoder API is limited, so we're stuck with two different options */ + op = (opts_char == '+') ? XO_OP_OPTIONS_PLUS : XO_OP_OPTIONS; + rc = xo_encoder_handle(xop, op, name, opts, 0); } return rc; } /* * A couple of function varieties here, to allow for multiple * use cases. This variant is for when the main program knows * its own encoder needs. */ xo_handle_t * xo_encoder_create (const char *name, xo_xof_flags_t flags) { xo_handle_t *xop; xop = xo_create(XO_STYLE_ENCODER, flags); if (xop) { if (xo_encoder_init(xop, name)) { xo_destroy(xop); xop = NULL; } } return xop; } int xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op, const char *name, const char *value, xo_xff_flags_t flags) { void *private = xo_get_private(xop); xo_encoder_func_t func = xo_get_encoder(xop); if (func == NULL) return -1; return func(xop, op, name, value, private, flags); } const char * xo_encoder_op_name (xo_encoder_op_t op) { static const char *names[] = { /* 0 */ "unknown", /* 1 */ "create", /* 2 */ "open_container", /* 3 */ "close_container", /* 4 */ "open_list", /* 5 */ "close_list", /* 6 */ "open_leaf_list", /* 7 */ "close_leaf_list", /* 8 */ "open_instance", /* 9 */ "close_instance", /* 10 */ "string", /* 11 */ "content", /* 12 */ "finish", /* 13 */ "flush", /* 14 */ "destroy", /* 15 */ "attr", /* 16 */ "version", /* 17 */ "options", }; if (op > sizeof(names) / sizeof(names[0])) return "unknown"; return names[op]; } Index: projects/clang1000-import/contrib/libxo/libxo/xo_encoder.h =================================================================== --- projects/clang1000-import/contrib/libxo/libxo/xo_encoder.h (revision 357178) +++ projects/clang1000-import/contrib/libxo/libxo/xo_encoder.h (revision 357179) @@ -1,169 +1,170 @@ /* * Copyright (c) 2015, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, August 2015 */ /* * NOTE WELL: This file is needed to software that implements an * external encoder for libxo that allows libxo data to be encoded in * new and bizarre formats. General libxo code should _never_ * include this header file. */ #ifndef XO_ENCODER_H #define XO_ENCODER_H #include /* * Expose libxo's memory allocation functions */ extern xo_realloc_func_t xo_realloc; extern xo_free_func_t xo_free; /* * Simple string comparison function (without the temptation * to forget the "== 0"). */ static inline int xo_streq (const char *one, const char *two) { return strcmp(one, two) == 0; } /* Flags for formatting functions */ typedef unsigned long xo_xff_flags_t; #define XFF_COLON (1<<0) /* Append a ":" */ #define XFF_COMMA (1<<1) /* Append a "," iff there's more output */ #define XFF_WS (1<<2) /* Append a blank */ #define XFF_ENCODE_ONLY (1<<3) /* Only emit for encoding styles (XML, JSON) */ #define XFF_QUOTE (1<<4) /* Force quotes */ #define XFF_NOQUOTE (1<<5) /* Force no quotes */ #define XFF_DISPLAY_ONLY (1<<6) /* Only emit for display styles (text, html) */ #define XFF_KEY (1<<7) /* Field is a key (for XPath) */ #define XFF_XML (1<<8) /* Force XML encoding style (for XPath) */ #define XFF_ATTR (1<<9) /* Escape value using attribute rules (XML) */ #define XFF_BLANK_LINE (1<<10) /* Emit a blank line */ #define XFF_NO_OUTPUT (1<<11) /* Do not make any output */ #define XFF_TRIM_WS (1<<12) /* Trim whitespace off encoded values */ #define XFF_LEAF_LIST (1<<13) /* A leaf-list (list of values) */ #define XFF_UNESCAPE (1<<14) /* Need to printf-style unescape the value */ #define XFF_HUMANIZE (1<<15) /* Humanize the value (for display styles) */ #define XFF_HN_SPACE (1<<16) /* Humanize: put space before suffix */ #define XFF_HN_DECIMAL (1<<17) /* Humanize: add one decimal place if <10 */ #define XFF_HN_1000 (1<<18) /* Humanize: use 1000, not 1024 */ #define XFF_GT_FIELD (1<<19) /* Call gettext() on a field */ #define XFF_GT_PLURAL (1<<20) /* Call dngettext to find plural form */ #define XFF_ARGUMENT (1<<21) /* Content provided via argument */ /* Flags to turn off when we don't want i18n processing */ #define XFF_GT_FLAGS (XFF_GT_FIELD | XFF_GT_PLURAL) typedef unsigned xo_encoder_op_t; /* Encoder operations; names are in xo_encoder.c:xo_encoder_op_name() */ #define XO_OP_UNKNOWN 0 #define XO_OP_CREATE 1 /* Called when the handle is init'd */ #define XO_OP_OPEN_CONTAINER 2 #define XO_OP_CLOSE_CONTAINER 3 #define XO_OP_OPEN_LIST 4 #define XO_OP_CLOSE_LIST 5 #define XO_OP_OPEN_LEAF_LIST 6 #define XO_OP_CLOSE_LEAF_LIST 7 #define XO_OP_OPEN_INSTANCE 8 #define XO_OP_CLOSE_INSTANCE 9 #define XO_OP_STRING 10 /* Quoted UTF-8 string */ #define XO_OP_CONTENT 11 /* Other content */ #define XO_OP_FINISH 12 /* Finish any pending output */ #define XO_OP_FLUSH 13 /* Flush any buffered output */ #define XO_OP_DESTROY 14 /* Clean up function */ #define XO_OP_ATTRIBUTE 15 /* Attribute name/value */ #define XO_OP_VERSION 16 /* Version string */ #define XO_OP_OPTIONS 17 /* Additional command line options */ +#define XO_OP_OPTIONS_PLUS 18 /* Additional command line options */ #define XO_ENCODER_HANDLER_ARGS \ xo_handle_t *xop __attribute__ ((__unused__)), \ xo_encoder_op_t op __attribute__ ((__unused__)), \ const char *name __attribute__ ((__unused__)), \ const char *value __attribute__ ((__unused__)), \ void *private __attribute__ ((__unused__)), \ xo_xff_flags_t flags __attribute__ ((__unused__)) typedef int (*xo_encoder_func_t)(XO_ENCODER_HANDLER_ARGS); typedef struct xo_encoder_init_args_s { unsigned xei_version; /* Current version */ xo_encoder_func_t xei_handler; /* Encoding handler */ } xo_encoder_init_args_t; #define XO_ENCODER_VERSION 1 /* Current version */ #define XO_ENCODER_INIT_ARGS \ xo_encoder_init_args_t *arg __attribute__ ((__unused__)) typedef int (*xo_encoder_init_func_t)(XO_ENCODER_INIT_ARGS); /* * Each encoder library must define a function named xo_encoder_init * that takes the arguments defined in XO_ENCODER_INIT_ARGS. It * should return zero for success. */ #define XO_ENCODER_INIT_NAME_TOKEN xo_encoder_library_init #define XO_STRINGIFY(_x) #_x #define XO_STRINGIFY2(_x) XO_STRINGIFY(_x) #define XO_ENCODER_INIT_NAME XO_STRINGIFY2(XO_ENCODER_INIT_NAME_TOKEN) extern int XO_ENCODER_INIT_NAME_TOKEN (XO_ENCODER_INIT_ARGS); void xo_encoder_register (const char *name, xo_encoder_func_t func); void xo_encoder_unregister (const char *name); void * xo_get_private (xo_handle_t *xop); void xo_encoder_path_add (const char *path); void xo_set_private (xo_handle_t *xop, void *opaque); xo_encoder_func_t xo_get_encoder (xo_handle_t *xop); void xo_set_encoder (xo_handle_t *xop, xo_encoder_func_t encoder); int xo_encoder_init (xo_handle_t *xop, const char *name); xo_handle_t * xo_encoder_create (const char *name, xo_xof_flags_t flags); int xo_encoder_handle (xo_handle_t *xop, xo_encoder_op_t op, const char *name, const char *value, xo_xff_flags_t flags); void xo_encoders_clean (void); const char * xo_encoder_op_name (xo_encoder_op_t op); /* * xo_failure is used to announce internal failures, when "warn" is on */ void xo_failure (xo_handle_t *xop, const char *fmt, ...); #endif /* XO_ENCODER_H */ Index: projects/clang1000-import/contrib/libxo/tests/core/Makefile.am =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/Makefile.am (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/Makefile.am (revision 357179) @@ -1,145 +1,148 @@ # # $Id$ # # Copyright 2014, Juniper Networks, Inc. # All rights reserved. # This SOFTWARE is licensed under the LICENSE provided in the # ../Copyright file. By downloading, installing, copying, or otherwise # using the SOFTWARE, you agree to be bound by the terms of that # LICENSE. AM_CFLAGS = -I${top_srcdir} -I${top_srcdir}/libxo # Ick: maintained by hand! TEST_CASES = \ test_01.c \ test_02.c \ test_03.c \ test_04.c \ test_05.c \ test_06.c \ test_07.c \ test_08.c \ test_09.c \ test_10.c \ test_11.c \ test_12.c test_01_test_SOURCES = test_01.c test_02_test_SOURCES = test_02.c test_03_test_SOURCES = test_03.c test_04_test_SOURCES = test_04.c test_05_test_SOURCES = test_05.c test_06_test_SOURCES = test_06.c test_07_test_SOURCES = test_07.c test_08_test_SOURCES = test_08.c test_09_test_SOURCES = test_09.c test_10_test_SOURCES = test_10.c test_11_test_SOURCES = test_11.c test_12_test_SOURCES = test_12.c # TEST_CASES := $(shell cd ${srcdir} ; echo *.c ) noinst_PROGRAMS = ${TEST_CASES:.c=.test} LDADD = \ ${top_builddir}/libxo/libxo.la if HAVE_HUMANIZE_NUMBER LDADD += -lutil endif EXTRA_DIST = \ ${TEST_CASES} \ ${addprefix saved/, ${TEST_CASES:.c=.T.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.T.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.XP.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.XP.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.JP.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.JP.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.HP.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.HP.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.X.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.X.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.J.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.J.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.H.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.H.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.HIPx.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.HIPx.out}} \ ${addprefix saved/, ${TEST_CASES:.c=.E.err}} \ ${addprefix saved/, ${TEST_CASES:.c=.E.out}} S2O = | ${SED} '1,/@@/d' all: valgrind: @echo '## Running the regression tests under Valgrind' ${MAKE} CHECKER='valgrind -q' tests #TEST_TRACE = set -x ; TEST_JIG = \ ${CHECKER} ./$$base.test --libxo$$xoopts ${TEST_OPTS} \ > out/$$base.$$fmt.out 2> out/$$base.$$fmt.err ; \ ${DIFF} -Nu ${srcdir}/saved/$$base.$$fmt.out out/$$base.$$fmt.out ${S2O} ; \ ${DIFF} -Nu ${srcdir}/saved/$$base.$$fmt.err out/$$base.$$fmt.err ${S2O} TEST_JIG2 = \ echo "... $$test ... $$fmt ..."; \ -xoopts==warn,encoder=csv$$csv ; \ +xoopts==warn,$$csv ; \ ${TEST_JIG}; true; TEST_FORMATS = T XP JP HP X J H HIPx test tests: ${bin_PROGRAMS} @${MKDIR} -p out -@ ${TEST_TRACE} (for test in ${TEST_CASES} ; do \ base=`${BASENAME} $$test .c` ; \ (for fmt in ${TEST_FORMATS}; do \ echo "... $$test ... $$fmt ..."; \ xoopts=:W$$fmt ; \ ${TEST_JIG}; \ true; \ done) ; \ (for fmt in E; do \ echo "... $$test ... $$fmt ..."; \ xoopts==warn,encoder=test ; \ ${TEST_JIG}; \ true; \ done) \ done) -@ (${TEST_TRACE} test=test_01.c; base=test_01; \ - ( fmt=Ecsv1; csv= ; ${TEST_JIG2} ); \ - ( fmt=Ecsv2; csv=:path=top/data/item+no-header ; ${TEST_JIG2} ); \ - ( fmt=Ecsv3; csv=:path=item+leafs=sku.sold+no-quotes ; ${TEST_JIG2} ); \ + ( fmt=Ecsv1; csv=encoder=csv ; \ + ${TEST_JIG2} ); \ + ( fmt=Ecsv2; csv=encoder=csv:path=top/data/item:no-header ; \ + ${TEST_JIG2} ); \ + ( fmt=Ecsv3; csv=@csv:path=item:leafs=sku.sold:no-quotes ; \ + ${TEST_JIG2} ); \ ) one: -@(test=${TEST_CASE}; data=${TEST_DATA}; ${TEST_ONE} ; true) accept: -@(for test in ${TEST_CASES} ; do \ base=`${BASENAME} $$test .c` ; \ (for fmt in ${TEST_FORMATS} E ; do \ echo "... $$test ... $$fmt ..."; \ ${CP} out/$$base.$$fmt.out ${srcdir}/saved/$$base.$$fmt.out ; \ ${CP} out/$$base.$$fmt.err ${srcdir}/saved/$$base.$$fmt.err ; \ done) \ done) -@(test=test_01.c; base=test_01; for fmt in Ecsv1 Ecsv2 Ecsv3 ; do \ echo "... $$test ... $$fmt ..."; \ ${CP} out/$$base.$$fmt.out ${srcdir}/saved/$$base.$$fmt.out ; \ ${CP} out/$$base.$$fmt.err ${srcdir}/saved/$$base.$$fmt.err ; \ done) .c.test: $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -o $@ $< CLEANFILES = ${TEST_CASES:.c=.test} CLEANDIRS = out clean-local: rm -rf ${CLEANDIRS} Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.H.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.H.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.H.out (revision 357179) @@ -1,7 +1,10 @@
em0
em0
We are
{emit}
{ting}
some
braces
abcdef
abcdef: Bad file descriptor
improper use of profanity; ten yard penalty; first down
length
abcdef
close
-1
returned
Bad file descriptor
good
close
-1
returned
Bad fi
good
improper use of profanity; ten yard penalty; first down
20
30
40
file
0
bytes
1
byte
2
bytes
3
bytes
4
bytes
10
/
20
/
30
mbufs <&> in use (current/cache/total)
50
from
Boston
64
left out of
640
64
left out of
640
beforeworkingafter:
string
:
10
11
1010
packets here/there/everywhere
1010
packets here/there/everywhere
(
15
/
20
/
125
)
(
15
/
20
/
125
)
(
15
/
20
/
125
)
(
15
/
20
/
125
)
Humanize:
21
,
57 K
,
96M
,
44M
,
1.2G
one
two
three
(null)
1:
1000
2:
test5000
3:
ten-longx
4:
xtest
this is an error
two more errors
this is an warning
two more warnings
V1/V2 packets
:
10
0004
tries
improper use of profanity; ten yard penalty; first down
Shut 'er down, Clancey! She's a-pumpin' mud! <>!,"!<> +
err message (1)
err message (2) +
err message (1) +
err message (2)
\ No newline at end of file Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HIPx.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HIPx.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HIPx.out (revision 357179) @@ -1,227 +1,242 @@
em0
em0
We are
{emit}
{ting}
some
braces
abcdef
abcdef: Bad file descriptor
improper use of profanity; ten yard penalty; first down
length
abcdef
close
-1
returned
Bad file descriptor
good
close
-1
returned
Bad fi
good
improper use of profanity; ten yard penalty; first down
20
30
40
file
0
bytes
1
byte
2
bytes
3
bytes
4
bytes
10
/
20
/
30
mbufs <&> in use (current/cache/total)
50
from
Boston
64
left out of
640
64
left out of
640
beforeworkingafter:
string
:
10
11
1010
packets here/there/everywhere
1010
packets here/there/everywhere
(
15
/
20
/
125
)
(
15
/
20
/
125
)
(
15
/
20
/
125
)
(
15
/
20
/
125
)
Humanize:
21
,
57 K
,
96M
,
44M
,
1.2G
one
two
three
(null)
1:
1000
2:
test5000
3:
ten-longx
4:
xtest
this is an error
two more errors
this is an warning
two more warnings
V1/V2 packets
:
10
0004
tries
improper use of profanity; ten yard penalty; first down
Shut 'er down, Clancey! She's a-pumpin' mud! <>!,"!<>
+
+
err message (1)
+
+
+
err message (2) +
+
+
+
err message (1) +
+
+
+
err message (2) +
+
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HP.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HP.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.HP.out (revision 357179) @@ -1,227 +1,242 @@
em0
em0
We are
{emit}
{ting}
some
braces
abcdef
abcdef: Bad file descriptor
improper use of profanity; ten yard penalty; first down
length
abcdef
close
-1
returned
Bad file descriptor
good
close
-1
returned
Bad fi
good
improper use of profanity; ten yard penalty; first down
20
30
40
file
0
bytes
1
byte
2
bytes
3
bytes
4
bytes
10
/
20
/
30
mbufs <&> in use (current/cache/total)
50
from
Boston
64
left out of
640
64
left out of
640
beforeworkingafter:
string
:
10
11
1010
packets here/there/everywhere
1010
packets here/there/everywhere
(
15
/
20
/
125
)
(
15
/
20
/
125
)
(
15
/
20
/
125
)
(
15
/
20
/
125
)
Humanize:
21
,
57 K
,
96M
,
44M
,
1.2G
one
two
three
(null)
1:
1000
2:
test5000
3:
ten-longx
4:
xtest
this is an error
two more errors
this is an warning
two more warnings
V1/V2 packets
:
10
0004
tries
improper use of profanity; ten yard penalty; first down
Shut 'er down, Clancey! She's a-pumpin' mud! <>!,"!<>
+
+
err message (1)
+
+
+
err message (2) +
+
+
+
err message (1) +
+
+
+
err message (2) +
+
Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.J.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.J.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.J.out (revision 357179) @@ -1 +1 @@ -{"top": {"data": {"name":"em0","flags":"0x8843","name":"em0","flags":"0x8843","what":"braces","length":"abcdef","fd":-1,"error":"Bad file descriptor","test":"good","fd":-1,"error":"Bad fi","test":"good","lines":20,"words":30,"characters":40, "bytes": [0,1,2,3,4],"mbuf-current":10,"mbuf-cache":20,"mbuf-total":30,"distance":50,"location":"Boston","memory":64,"total":640,"memory":64,"total":640,"ten":10,"eleven":11,"unknown":1010,"unknown":1010,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"val1":21,"val2":58368,"val3":100663296,"val4":44470272,"val5":1342172800, "flag": ["one","two","three"],"works":null,"empty-tag":true,"t1":"1000","t2":"test5000","t3":"ten-longx","t4":"xtest", "__error": {"message":"this is an error"}, "__error": {"message":"two more errors"}, "__warning": {"message":"this is an warning"}, "__warning": {"message":"two more warnings"},"count":10,"test":4, "error": {"message":"Shut 'er down, Clancey! She's a-pumpin' mud! <>!,\"!<>\n"}}}} +{"top": {"data": {"name":"em0","flags":"0x8843","name":"em0","flags":"0x8843","what":"braces","length":"abcdef","fd":-1,"error":"Bad file descriptor","test":"good","fd":-1,"error":"Bad fi","test":"good","lines":20,"words":30,"characters":40, "bytes": [0,1,2,3,4],"mbuf-current":10,"mbuf-cache":20,"mbuf-total":30,"distance":50,"location":"Boston","memory":64,"total":640,"memory":64,"total":640,"ten":10,"eleven":11,"unknown":1010,"unknown":1010,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"min":15,"cur":20,"max":125,"val1":21,"val2":58368,"val3":100663296,"val4":44470272,"val5":1342172800, "flag": ["one","two","three"],"works":null,"empty-tag":true,"t1":"1000","t2":"test5000","t3":"ten-longx","t4":"xtest", "__error": {"message":"this is an error"}, "__error": {"message":"two more errors"}, "__warning": {"message":"this is an warning"}, "__warning": {"message":"two more warnings"},"count":10,"test":4, "error": {"message":"Shut 'er down, Clancey! She's a-pumpin' mud! <>!,\"!<>\n"}, "error": {"message":"err message (1)"}, "error": {"message":"err message (2)\n"}, "error": {"message":"err message (1)\n"}, "error": {"message":"err message (2)\n"}}}} Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.JP.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.JP.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.JP.out (revision 357179) @@ -1,86 +1,98 @@ { "top": { "data": { "name": "em0", "flags": "0x8843", "name": "em0", "flags": "0x8843", "what": "braces", "length": "abcdef", "fd": -1, "error": "Bad file descriptor", "test": "good", "fd": -1, "error": "Bad fi", "test": "good", "lines": 20, "words": 30, "characters": 40, "bytes": [ 0, 1, 2, 3, 4 ], "mbuf-current": 10, "mbuf-cache": 20, "mbuf-total": 30, "distance": 50, "location": "Boston", "memory": 64, "total": 640, "memory": 64, "total": 640, "ten": 10, "eleven": 11, "unknown": 1010, "unknown": 1010, "min": 15, "cur": 20, "max": 125, "min": 15, "cur": 20, "max": 125, "min": 15, "cur": 20, "max": 125, "min": 15, "cur": 20, "max": 125, "val1": 21, "val2": 58368, "val3": 100663296, "val4": 44470272, "val5": 1342172800, "flag": [ "one", "two", "three" ], "works": null, "empty-tag": true, "t1": "1000", "t2": "test5000", "t3": "ten-longx", "t4": "xtest", "__error": { "message": "this is an error" }, "__error": { "message": "two more errors" }, "__warning": { "message": "this is an warning" }, "__warning": { "message": "two more warnings" }, "count": 10, "test": 4, "error": { "message": "Shut 'er down, Clancey! She's a-pumpin' mud! <>!,\"!<>\n" + }, + "error": { + "message": "err message (1)" + }, + "error": { + "message": "err message (2)\n" + }, + "error": { + "message": "err message (1)\n" + }, + "error": { + "message": "err message (2)\n" } } } } Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.T.err =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.T.err (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.T.err (revision 357179) @@ -1,2 +1,5 @@ test_02: key field emitted after normal value field: 'name' Shut 'er down, Clancey! She's a-pumpin' mud! <>!,"!<> +err message (1)err message (2) +err message (1) +err message (2) Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.X.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.X.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.X.out (revision 357179) @@ -1,7 +1,10 @@ em00x8843em00x8843bracesabcdef abcdef: Bad file descriptor improper use of profanity; ten yard penalty; first down abcdef-1Bad file descriptorgood-1Bad figoodimproper use of profanity; ten yard penalty; first down 2030400123410203050Boston646406464010111010101015201251520125152012515201252158368100663296444702721342172800onetwothreenull1000test5000ten-longxxtest<__error>this is an error<__error>two more errors<__warning>this is an warning<__warning>two more warnings104improper use of profanity; ten yard penalty; first down Shut 'er down, Clancey! She's a-pumpin' mud! <>!,"!<> +err message (1)err message (2) +err message (1) +err message (2) \ No newline at end of file Index: projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.XP.out =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.XP.out (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/saved/test_02.XP.out (revision 357179) @@ -1,91 +1,106 @@ em0 0x8843 em0 0x8843 braces abcdef abcdef: Bad file descriptor improper use of profanity; ten yard penalty; first down abcdef -1 Bad file descriptor good -1 Bad fi good improper use of profanity; ten yard penalty; first down 20 30 40 0 1 2 3 4 10 20 30 50 Boston 64 640 64 640 10 11 1010 1010 15 20 125 15 20 125 15 20 125 15 20 125 21 58368 100663296 44470272 1342172800 one two three null 1000 test5000 ten-longx xtest <__error> this is an error <__error> two more errors <__warning> this is an warning <__warning> two more warnings 10 4 improper use of profanity; ten yard penalty; first down Shut 'er down, Clancey! She's a-pumpin' mud! <>!,"!<> + + err message (1) + + + err message (2) + + + + err message (1) + + + + err message (2) + + Index: projects/clang1000-import/contrib/libxo/tests/core/test_02.c =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/test_02.c (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/test_02.c (revision 357179) @@ -1,155 +1,161 @@ /* * Copyright (c) 2014-2019, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, July 2014 */ #include #include #include #include #include "xo.h" #include "xo_encoder.h" #include "xo_humanize.h" int main (int argc, char **argv) { + xo_set_program("test_02"); + argc = xo_parse_args(argc, argv); if (argc < 0) return 1; for (argc = 1; argv[argc]; argc++) { if (xo_streq(argv[argc], "xml")) xo_set_style(NULL, XO_STYLE_XML); else if (xo_streq(argv[argc], "json")) xo_set_style(NULL, XO_STYLE_JSON); else if (xo_streq(argv[argc], "text")) xo_set_style(NULL, XO_STYLE_TEXT); else if (xo_streq(argv[argc], "html")) xo_set_style(NULL, XO_STYLE_HTML); else if (xo_streq(argv[argc], "pretty")) xo_set_flags(NULL, XOF_PRETTY); else if (xo_streq(argv[argc], "xpath")) xo_set_flags(NULL, XOF_XPATH); else if (xo_streq(argv[argc], "info")) xo_set_flags(NULL, XOF_INFO); } xo_set_flags(NULL, XOF_UNITS); /* Always test w/ this */ xo_set_file(stdout); xo_open_container_h(NULL, "top"); xo_open_container("data"); xo_emit("{kt:name/%-*.*s}{eq:flags/0x%x}", 5, 5, "em0", 34883); xo_emit("{d:/%-*.*s}{etk:name}{eq:flags/0x%x}", 5, 5, "em0", "em0", 34883); xo_emit("We are {{emit}}{{ting}} some {:what}\n", "braces"); xo_message("abcdef"); close(-1); xo_message_e("abcdef"); xo_message("improper use of profanity; %s; %s", "ten yard penalty", "first down"); xo_emit("length {:length/%6.6s}\n", "abcdefghijklmnopqrstuvwxyz"); close(-1); xo_emit("close {:fd/%d} returned {:error/%m} {:test}\n", -1, "good"); close(-1); xo_emit("close {:fd/%d} returned {:error/%6.6m} {:test}\n", -1, "good"); xo_message("improper use of profanity; %s; %s", "ten yard penalty", "first down"); xo_emit(" {:lines/%7ju} {:words/%7ju} " "{:characters/%7ju} {d:filename/%s}\n", (uintmax_t) 20, (uintmax_t) 30, (uintmax_t) 40, "file"); int i; for (i = 0; i < 5; i++) xo_emit("{lw:bytes/%d}{Np:byte,bytes}\n", i); xo_emit("{:mbuf-current/%u}/{:mbuf-cache/%u}/{:mbuf-total/%u} " "{N:mbufs <&> in use (current\\/cache\\/total)}\n", 10, 20, 30); xo_emit("{:distance/%u}{Uw:miles} from {:location}\n", 50, "Boston"); xo_emit("{:memory/%u}{U:k} left out of {:total/%u}{U:kb}\n", 64, 640); xo_emit("{:memory/%u}{U:/%s} left out of {:total/%u}{U:/%s}\n", 64, "k", 640, "kilobytes"); xo_emit("{,title:/before%safter:}\n", "working"); xo_emit("{,display,white,colon:some/%s}" "{,value:ten/%ju}{,value:eleven/%ju}\n", "string", (uintmax_t) 10, (uintmax_t) 11); xo_emit("{:unknown/%u} " "{N:/packet%s here\\/there\\/everywhere}\n", 1010, "s"); xo_emit("{:unknown/%u} " "{,note:/packet%s here\\/there\\/everywhere}\n", 1010, "s"); xo_emit("({[:/%d}{n:min/15}/{n:cur/20}/{:max/%d}{]:})\n", 30, 125); xo_emit("({[:30}{:min/%u}/{:cur/%u}/{:max/%u}{]:})\n", 15, 20, 125); xo_emit("({[:-30}{n:min/15}/{n:cur/20}/{n:max/125}{]:})\n"); xo_emit("({[:}{:min/%u}/{:cur/%u}/{:max/%u}{]:/%d})\n", 15, 20, 125, -30); xo_emit("Humanize: {h:val1/%u}, {h,hn-space:val2/%u}, " "{h,hn-decimal:val3/%u}, {h,hn-1000:val4/%u}, " "{h,hn-decimal:val5/%u}\n", 21, 57 * 1024, 96 * 1024 * 1024, (42 * 1024 + 420) * 1024, 1342172800); xo_open_list("flag"); xo_emit("{lq:flag/one} {lq:flag/two} {lq:flag/three}\n"); xo_close_list("flag"); xo_emit("{n:works/%s}\n", NULL); xo_emit("{e:empty-tag/}"); xo_emit("1:{qt:t1/%*d} 2:{qt:t2/test%-*u} " "3:{qt:t3/%10sx} 4:{qt:t4/x%-*.*s}\n", 6, 1000, 8, 5000, "ten-long", 10, 10, "test"); xo_emit("{E:this is an error}\n"); xo_emit("{E:/%s more error%s}\n", "two", "s" ); xo_emit("{W:this is an warning}\n"); xo_emit("{W:/%s more warning%s}\n", "two", "s" ); xo_emit("{L:/V1\\/V2 packet%s}: {:count/%u}\n", "s", 10); int test = 4; xo_emit("{:test/%04d} {L:/tr%s}\n", test, (test == 1) ? "y" : "ies"); xo_message("improper use of profanity; %s; %s", "ten yard penalty", "first down"); xo_error("Shut 'er down, Clancey! She's a-pumpin' mud! <>!,\"!<>\n"); + xo_error("err message (%d)", 1); + xo_error("err message (%d)\n", 2); + xo_errorn("err message (%d)", 1); + xo_errorn("err message (%d)\n", 2); xo_close_container("data"); xo_close_container_h(NULL, "top"); xo_finish(); return 0; } Index: projects/clang1000-import/contrib/libxo/tests/core/test_12.c =================================================================== --- projects/clang1000-import/contrib/libxo/tests/core/test_12.c (revision 357178) +++ projects/clang1000-import/contrib/libxo/tests/core/test_12.c (revision 357179) @@ -1,93 +1,95 @@ /* * Copyright (c) 2014, Juniper Networks, Inc. * All rights reserved. * This SOFTWARE is licensed under the LICENSE provided in the * ../Copyright file. By downloading, installing, copying, or otherwise * using the SOFTWARE, you agree to be bound by the terms of that * LICENSE. * Phil Shafer, July 2014 */ #include #include #include #include #include "xo_config.h" #include "xo.h" #include "xo_encoder.h" int main (int argc, char **argv) { int i, count = 10; int mon = 0; xo_emit_flags_t flags = XOEF_RETAIN; int opt_color = 1; + xo_set_program("test_12"); + argc = xo_parse_args(argc, argv); if (argc < 0) return 1; for (argc = 1; argv[argc]; argc++) { if (xo_streq(argv[argc], "xml")) xo_set_style(NULL, XO_STYLE_XML); else if (xo_streq(argv[argc], "json")) xo_set_style(NULL, XO_STYLE_JSON); else if (xo_streq(argv[argc], "text")) xo_set_style(NULL, XO_STYLE_TEXT); else if (xo_streq(argv[argc], "html")) xo_set_style(NULL, XO_STYLE_HTML); else if (xo_streq(argv[argc], "no-color")) opt_color = 0; else if (xo_streq(argv[argc], "pretty")) xo_set_flags(NULL, XOF_PRETTY); else if (xo_streq(argv[argc], "xpath")) xo_set_flags(NULL, XOF_XPATH); else if (xo_streq(argv[argc], "info")) xo_set_flags(NULL, XOF_INFO); else if (xo_streq(argv[argc], "no-retain")) flags &= ~XOEF_RETAIN; else if (xo_streq(argv[argc], "big")) { if (argv[argc + 1]) count = atoi(argv[++argc]); } } xo_set_flags(NULL, XOF_UNITS); /* Always test w/ this */ if (opt_color) xo_set_flags(NULL, XOF_COLOR); /* Force color output */ xo_set_file(stdout); xo_open_container("top"); xo_open_container("data"); xo_emit("{C:fg-red,bg-green}Merry XMas!!{C:}\n"); xo_emit("One {C:fg-yellow,bg-blue}{:animal}{C:}, " "Two {C:fg-green,bg-yellow}{:animal}{C:}\n", "fish", "fish"); const char *fmt1 = "The {C:fg-red}{k:name}{C:reset} is " "{C:/fg-%s}{:color}{C:reset} til {:time/%02d:%02d}\n"; const char *fmt2 = "My {C:fg-red}{:hand}{C:reset} hand is " "{C:/fg-%s}{:color}{C:reset} til {:time/%02d:%02d}\n"; for (i = 0; i < count; i++) { xo_open_instance("thing"); xo_emit_f(flags, fmt1, "thing", "green", "green", 2, 15); xo_emit_f(flags, fmt2, "left", "blue", "blue", 3, 45); } xo_open_container("2by4"); xo_emit("There is {:4x4} in {:2morrow}\n", "truck", "tomorrow"); xo_close_container("2by4"); xo_close_container("data"); xo_close_container_h(NULL, "top"); xo_finish(); return 0; } Index: projects/clang1000-import/contrib/libxo =================================================================== --- projects/clang1000-import/contrib/libxo (revision 357178) +++ projects/clang1000-import/contrib/libxo (revision 357179) Property changes on: projects/clang1000-import/contrib/libxo ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,2 ## Merged /vendor/Juniper/libxo/dist:r354455-357124 Merged /head/contrib/libxo:r356848-357178 Index: projects/clang1000-import/lib/libxo/add.man =================================================================== --- projects/clang1000-import/lib/libxo/add.man (revision 357178) +++ projects/clang1000-import/lib/libxo/add.man (revision 357179) @@ -1,30 +1,30 @@ .\" $FreeBSD$ .Sh ADDITIONAL DOCUMENTATION .Fx uses .Nm libxo -version 1.3.1. +version 1.4.0. Complete documentation can be found on github: .Bd -literal -offset indent -https://juniper.github.io/libxo/1.3.1/html/index.html +https://juniper.github.io/libxo/1.4.0/html/index.html .Ed .Pp .Nm libxo lives on github as: .Bd -literal -offset indent https://github.com/Juniper/libxo .Ed .Pp The latest release of .Nm libxo is available at: .Bd -literal -offset indent https://github.com/Juniper/libxo/releases .Ed .Sh HISTORY The .Nm libxo library was added in .Fx 11.0 . .Sh AUTHOR Phil Shafer Index: projects/clang1000-import/lib/libxo/libxo/xo_config.h =================================================================== --- projects/clang1000-import/lib/libxo/libxo/xo_config.h (revision 357178) +++ projects/clang1000-import/lib/libxo/libxo/xo_config.h (revision 357179) @@ -1,257 +1,257 @@ /* $FreeBSD$ */ /* libxo/xo_config.h. Generated from xo_config.h.in by configure. */ /* libxo/xo_config.h.in. Generated from configure.ac by autoheader. */ /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP systems. This function is required for `alloca.c' support on those systems. */ /* #undef CRAY_STACKSEG_END */ /* Define to 1 if using `alloca.c'. */ /* #undef C_ALLOCA */ /* Define to 1 if you have `alloca', as a function or macro. */ #define HAVE_ALLOCA 1 /* Define to 1 if you have and it should be used (not on Ultrix). */ /* #undef HAVE_ALLOCA_H */ /* Define to 1 if you have the `asprintf' function. */ #define HAVE_ASPRINTF 1 /* Define to 1 if you have the `bzero' function. */ #define HAVE_BZERO 1 /* Define to 1 if you have the `ctime' function. */ #define HAVE_CTIME 1 /* Define to 1 if you have the header file. */ #define HAVE_CTYPE_H 1 /* Define to 1 if you have the declaration of `__isthreaded', and to 0 if you don't. */ #define HAVE_DECL___ISTHREADED 1 /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* Define to 1 if you have the `dlfunc' function. */ #define HAVE_DLFUNC 1 /* Define to 1 if you have the header file. */ #define HAVE_ERRNO_H 1 /* Define to 1 if you have the `fdopen' function. */ #define HAVE_FDOPEN 1 /* Define to 1 if you have the `flock' function. */ #define HAVE_FLOCK 1 /* Define to 1 if you have the `getpass' function. */ #define HAVE_GETPASS 1 /* Define to 1 if you have the `getprogname' function. */ #define HAVE_GETPROGNAME 1 /* Define to 1 if you have the `getrusage' function. */ #define HAVE_GETRUSAGE 1 /* gettext(3) */ /* #undef HAVE_GETTEXT */ /* Define to 1 if you have the `gettimeofday' function. */ #define HAVE_GETTIMEOFDAY 1 /* humanize_number(3) */ #define HAVE_HUMANIZE_NUMBER 1 /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* Define to 1 if you have the `crypto' library (-lcrypto). */ #define HAVE_LIBCRYPTO 1 /* Define to 1 if you have the `m' library (-lm). */ #define HAVE_LIBM 1 /* Define to 1 if you have the header file. */ #define HAVE_LIBUTIL_H 1 /* Define to 1 if your system has a GNU libc compatible `malloc' function, and to 0 otherwise. */ #define HAVE_MALLOC 1 /* Define to 1 if you have the `memmove' function. */ #define HAVE_MEMMOVE 1 /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_MONITOR_H */ /* Support printflike */ /* #undef HAVE_PRINTFLIKE */ /* Define to 1 if your system has a GNU libc compatible `realloc' function, and to 0 otherwise. */ #define HAVE_REALLOC 1 /* Define to 1 if you have the `srand' function. */ #define HAVE_SRAND 1 /* Define to 1 if you have the `sranddev' function. */ #define HAVE_SRANDDEV 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_STDIO_EXT_H */ /* Define to 1 if you have the header file. */ #define HAVE_STDIO_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_STDTIME_TZFILE_H */ /* Define to 1 if you have the `strchr' function. */ #define HAVE_STRCHR 1 /* Define to 1 if you have the `strcspn' function. */ #define HAVE_STRCSPN 1 /* Define to 1 if you have the `strerror' function. */ #define HAVE_STRERROR 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if you have the `strlcpy' function. */ #define HAVE_STRLCPY 1 /* Define to 1 if you have the `strspn' function. */ #define HAVE_STRSPN 1 /* Have struct sockaddr_un.sun_len */ #define HAVE_SUN_LEN 1 /* Define to 1 if you have the `sysctlbyname' function. */ #define HAVE_SYSCTLBYNAME 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_PARAM_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_SYSCTL_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TIME_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the header file. */ #define HAVE_THREADS_H 1 /* thread-local setting */ #define HAVE_THREAD_LOCAL THREAD_LOCAL_before /* Define to 1 if you have the header file. */ /* #undef HAVE_TZFILE_H */ /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* Define to 1 if you have the `__flbf' function. */ /* #undef HAVE___FLBF */ /* Enable debugging */ /* #undef LIBXO_DEBUG */ /* Enable text-only rendering */ /* #undef LIBXO_TEXT_ONLY */ /* Version number as dotted value */ -#define LIBXO_VERSION "1.3.1" +#define LIBXO_VERSION "1.4.0" /* Version number extra information */ #define LIBXO_VERSION_EXTRA "" /* Version number as a number */ -#define LIBXO_VERSION_NUMBER 1003001 +#define LIBXO_VERSION_NUMBER 1004000 /* Version number as string */ -#define LIBXO_VERSION_STRING "1003001" +#define LIBXO_VERSION_STRING "1004000" /* Enable local wcwidth implementation */ #define LIBXO_WCWIDTH 1 /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" /* Name of package */ #define PACKAGE "libxo" /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "phil@juniper.net" /* Define to the full name of this package. */ #define PACKAGE_NAME "libxo" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libxo 1.3.1" +#define PACKAGE_STRING "libxo 1.4.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "libxo" /* Define to the home page for this package. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.3.1" +#define PACKAGE_VERSION "1.4.0" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be automatically deduced at runtime. STACK_DIRECTION > 0 => grows toward higher addresses STACK_DIRECTION < 0 => grows toward lower addresses STACK_DIRECTION = 0 => direction of growth unknown */ /* #undef STACK_DIRECTION */ /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Use int return codes */ /* #undef USE_INT_RETURN_CODES */ /* Version number of package */ -#define VERSION "1.3.1" +#define VERSION "1.4.0" /* Retain hash bucket size */ /* #undef XO_RETAIN_SIZE */ /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus /* #undef inline */ #endif /* Define to rpl_malloc if the replacement function should be used. */ /* #undef malloc */ /* Define to rpl_realloc if the replacement function should be used. */ /* #undef realloc */ /* Define to `unsigned int' if does not define. */ /* #undef size_t */ Index: projects/clang1000-import/sbin/newfs_msdos/mkfs_msdos.c =================================================================== --- projects/clang1000-import/sbin/newfs_msdos/mkfs_msdos.c (revision 357178) +++ projects/clang1000-import/sbin/newfs_msdos/mkfs_msdos.c (revision 357179) @@ -1,1000 +1,1033 @@ /* * Copyright (c) 1998 Robert Nordier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include +#ifdef MAKEFS +/* In the makefs case we only want struct disklabel */ +#include +#else #include #include #include #include +#endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mkfs_msdos.h" #define MAXU16 0xffff /* maximum unsigned 16-bit quantity */ #define BPN 4 /* bits per nibble */ #define NPB 2 /* nibbles per byte */ #define DOSMAGIC 0xaa55 /* DOS magic number */ #define MINBPS 512 /* minimum bytes per sector */ #define MAXSPC 128 /* maximum sectors per cluster */ #define MAXNFT 16 /* maximum number of FATs */ #define DEFBLK 4096 /* default block size */ #define DEFBLK16 2048 /* default block size FAT16 */ #define DEFRDE 512 /* default root directory entries */ #define RESFTE 2 /* reserved FAT entries */ #define MINCLS12 1U /* minimum FAT12 clusters */ #define MINCLS16 0xff5U /* minimum FAT16 clusters */ #define MINCLS32 0xfff5U /* minimum FAT32 clusters */ #define MAXCLS12 0xff4U /* maximum FAT12 clusters */ #define MAXCLS16 0xfff4U /* maximum FAT16 clusters */ #define MAXCLS32 0xffffff4U /* maximum FAT32 clusters */ #define mincls(fat) ((fat) == 12 ? MINCLS12 : \ (fat) == 16 ? MINCLS16 : \ MINCLS32) #define maxcls(fat) ((fat) == 12 ? MAXCLS12 : \ (fat) == 16 ? MAXCLS16 : \ MAXCLS32) #define mk1(p, x) \ (p) = (u_int8_t)(x) #define mk2(p, x) \ (p)[0] = (u_int8_t)(x), \ (p)[1] = (u_int8_t)((x) >> 010) #define mk4(p, x) \ (p)[0] = (u_int8_t)(x), \ (p)[1] = (u_int8_t)((x) >> 010), \ (p)[2] = (u_int8_t)((x) >> 020), \ (p)[3] = (u_int8_t)((x) >> 030) struct bs { u_int8_t bsJump[3]; /* bootstrap entry point */ u_int8_t bsOemName[8]; /* OEM name and version */ } __packed; struct bsbpb { u_int8_t bpbBytesPerSec[2]; /* bytes per sector */ u_int8_t bpbSecPerClust; /* sectors per cluster */ u_int8_t bpbResSectors[2]; /* reserved sectors */ u_int8_t bpbFATs; /* number of FATs */ u_int8_t bpbRootDirEnts[2]; /* root directory entries */ u_int8_t bpbSectors[2]; /* total sectors */ u_int8_t bpbMedia; /* media descriptor */ u_int8_t bpbFATsecs[2]; /* sectors per FAT */ u_int8_t bpbSecPerTrack[2]; /* sectors per track */ u_int8_t bpbHeads[2]; /* drive heads */ u_int8_t bpbHiddenSecs[4]; /* hidden sectors */ u_int8_t bpbHugeSectors[4]; /* big total sectors */ } __packed; struct bsxbpb { u_int8_t bpbBigFATsecs[4]; /* big sectors per FAT */ u_int8_t bpbExtFlags[2]; /* FAT control flags */ u_int8_t bpbFSVers[2]; /* file system version */ u_int8_t bpbRootClust[4]; /* root directory start cluster */ u_int8_t bpbFSInfo[2]; /* file system info sector */ u_int8_t bpbBackup[2]; /* backup boot sector */ u_int8_t bpbReserved[12]; /* reserved */ } __packed; struct bsx { u_int8_t exDriveNumber; /* drive number */ u_int8_t exReserved1; /* reserved */ u_int8_t exBootSignature; /* extended boot signature */ u_int8_t exVolumeID[4]; /* volume ID number */ u_int8_t exVolumeLabel[11]; /* volume label */ u_int8_t exFileSysType[8]; /* file system type */ } __packed; struct de { u_int8_t deName[11]; /* name and extension */ u_int8_t deAttributes; /* attributes */ u_int8_t rsvd[10]; /* reserved */ u_int8_t deMTime[2]; /* last-modified time */ u_int8_t deMDate[2]; /* last-modified date */ u_int8_t deStartCluster[2]; /* starting cluster */ u_int8_t deFileSize[4]; /* size */ } __packed; struct bpb { u_int bpbBytesPerSec; /* bytes per sector */ u_int bpbSecPerClust; /* sectors per cluster */ u_int bpbResSectors; /* reserved sectors */ u_int bpbFATs; /* number of FATs */ u_int bpbRootDirEnts; /* root directory entries */ u_int bpbSectors; /* total sectors */ u_int bpbMedia; /* media descriptor */ u_int bpbFATsecs; /* sectors per FAT */ u_int bpbSecPerTrack; /* sectors per track */ u_int bpbHeads; /* drive heads */ u_int bpbHiddenSecs; /* hidden sectors */ u_int bpbHugeSectors; /* big total sectors */ u_int bpbBigFATsecs; /* big sectors per FAT */ u_int bpbRootClust; /* root directory start cluster */ u_int bpbFSInfo; /* file system info sector */ u_int bpbBackup; /* backup boot sector */ }; #define BPBGAP 0, 0, 0, 0, 0, 0 static struct { const char *name; struct bpb bpb; } const stdfmt[] = { {"160", {512, 1, 1, 2, 64, 320, 0xfe, 1, 8, 1, BPBGAP}}, {"180", {512, 1, 1, 2, 64, 360, 0xfc, 2, 9, 1, BPBGAP}}, {"320", {512, 2, 1, 2, 112, 640, 0xff, 1, 8, 2, BPBGAP}}, {"360", {512, 2, 1, 2, 112, 720, 0xfd, 2, 9, 2, BPBGAP}}, {"640", {512, 2, 1, 2, 112, 1280, 0xfb, 2, 8, 2, BPBGAP}}, {"720", {512, 2, 1, 2, 112, 1440, 0xf9, 3, 9, 2, BPBGAP}}, {"1200", {512, 1, 1, 2, 224, 2400, 0xf9, 7, 15, 2, BPBGAP}}, {"1232", {1024,1, 1, 2, 192, 1232, 0xfe, 2, 8, 2, BPBGAP}}, {"1440", {512, 1, 1, 2, 224, 2880, 0xf0, 9, 18, 2, BPBGAP}}, {"2880", {512, 2, 1, 2, 240, 5760, 0xf0, 9, 36, 2, BPBGAP}} }; static const u_int8_t bootcode[] = { 0xfa, /* cli */ 0x31, 0xc0, /* xor ax,ax */ 0x8e, 0xd0, /* mov ss,ax */ 0xbc, 0x00, 0x7c, /* mov sp,7c00h */ 0xfb, /* sti */ 0x8e, 0xd8, /* mov ds,ax */ 0xe8, 0x00, 0x00, /* call $ + 3 */ 0x5e, /* pop si */ 0x83, 0xc6, 0x19, /* add si,+19h */ 0xbb, 0x07, 0x00, /* mov bx,0007h */ 0xfc, /* cld */ 0xac, /* lodsb */ 0x84, 0xc0, /* test al,al */ 0x74, 0x06, /* jz $ + 8 */ 0xb4, 0x0e, /* mov ah,0eh */ 0xcd, 0x10, /* int 10h */ 0xeb, 0xf5, /* jmp $ - 9 */ 0x30, 0xe4, /* xor ah,ah */ 0xcd, 0x16, /* int 16h */ 0xcd, 0x19, /* int 19h */ 0x0d, 0x0a, 'N', 'o', 'n', '-', 's', 'y', 's', 't', 'e', 'm', ' ', 'd', 'i', 's', 'k', 0x0d, 0x0a, 'P', 'r', 'e', 's', 's', ' ', 'a', 'n', 'y', ' ', 'k', 'e', 'y', ' ', 't', 'o', ' ', 'r', 'e', 'b', 'o', 'o', 't', 0x0d, 0x0a, 0 }; static volatile sig_atomic_t got_siginfo; static void infohandler(int); static int check_mounted(const char *, mode_t); static int getstdfmt(const char *, struct bpb *); static int getdiskinfo(int, const char *, const char *, int, struct bpb *); static void print_bpb(struct bpb *); static int ckgeom(const char *, u_int, const char *); static void mklabel(u_int8_t *, const char *); static int oklabel(const char *); static void setstr(u_int8_t *, const char *, size_t); int mkfs_msdos(const char *fname, const char *dtype, const struct msdos_options *op) { char buf[MAXPATHLEN]; struct sigaction si_sa; struct stat sb; struct timeval tv; struct bpb bpb; struct tm *tm; struct bs *bs; struct bsbpb *bsbpb; struct bsxbpb *bsxbpb; struct bsx *bsx; struct de *de; u_int8_t *img; const char *bname; ssize_t n; time_t now; u_int fat, bss, rds, cls, dir, lsn, x, x1, x2; u_int extra_res, alignment, saved_x, attempts=0; bool set_res, set_spf, set_spc; int fd, fd1, rv; struct msdos_options o = *op; img = NULL; rv = -1; fd = fd1 = -1; if (o.block_size && o.sectors_per_cluster) { warnx("Cannot specify both block size and sectors per cluster"); goto done; } if (o.OEM_string && strlen(o.OEM_string) > 8) { warnx("%s: bad OEM string", o.OEM_string); goto done; } if (o.create_size) { if (o.no_create) { warnx("create (-C) is incompatible with -N"); goto done; } fd = open(fname, O_RDWR | O_CREAT | O_TRUNC, 0644); if (fd == -1) { warnx("failed to create %s", fname); goto done; } if (ftruncate(fd, o.create_size)) { warnx("failed to initialize %jd bytes", (intmax_t)o.create_size); goto done; } } else if ((fd = open(fname, o.no_create ? O_RDONLY : O_RDWR)) == -1) { warn("%s", fname); goto done; } if (fstat(fd, &sb)) { warn("%s", fname); goto done; } if (o.create_size) { if (!S_ISREG(sb.st_mode)) warnx("warning, %s is not a regular file", fname); } else { -#ifndef MAKEFS +#ifdef MAKEFS + errx(1, "o.create_size must be set!"); +#else if (!S_ISCHR(sb.st_mode)) warnx("warning, %s is not a character device", fname); #endif } +#ifndef MAKEFS if (!o.no_create) if (check_mounted(fname, sb.st_mode) == -1) goto done; +#endif if (o.offset && o.offset != lseek(fd, o.offset, SEEK_SET)) { warnx("cannot seek to %jd", (intmax_t)o.offset); goto done; } memset(&bpb, 0, sizeof(bpb)); if (o.floppy) { if (getstdfmt(o.floppy, &bpb) == -1) goto done; bpb.bpbHugeSectors = bpb.bpbSectors; bpb.bpbSectors = 0; bpb.bpbBigFATsecs = bpb.bpbFATsecs; bpb.bpbFATsecs = 0; } if (o.drive_heads) bpb.bpbHeads = o.drive_heads; if (o.sectors_per_track) bpb.bpbSecPerTrack = o.sectors_per_track; if (o.bytes_per_sector) bpb.bpbBytesPerSec = o.bytes_per_sector; if (o.size) bpb.bpbHugeSectors = o.size; if (o.hidden_sectors_set) bpb.bpbHiddenSecs = o.hidden_sectors; if (!(o.floppy || (o.drive_heads && o.sectors_per_track && o.bytes_per_sector && o.size && o.hidden_sectors_set))) { if (getdiskinfo(fd, fname, dtype, o.hidden_sectors_set, &bpb) == -1) goto done; bpb.bpbHugeSectors -= (o.offset / bpb.bpbBytesPerSec); if (bpb.bpbSecPerClust == 0) { /* set defaults */ if (bpb.bpbHugeSectors <= 6000) /* about 3MB -> 512 bytes */ bpb.bpbSecPerClust = 1; else if (bpb.bpbHugeSectors <= (1<<17)) /* 64M -> 4k */ bpb.bpbSecPerClust = 8; else if (bpb.bpbHugeSectors <= (1<<19)) /* 256M -> 8k */ bpb.bpbSecPerClust = 16; else if (bpb.bpbHugeSectors <= (1<<21)) /* 1G -> 16k */ bpb.bpbSecPerClust = 32; else bpb.bpbSecPerClust = 64; /* otherwise 32k */ } } if (!powerof2(bpb.bpbBytesPerSec)) { warnx("bytes/sector (%u) is not a power of 2", bpb.bpbBytesPerSec); goto done; } if (bpb.bpbBytesPerSec < MINBPS) { warnx("bytes/sector (%u) is too small; minimum is %u", bpb.bpbBytesPerSec, MINBPS); goto done; } if (o.volume_label && !oklabel(o.volume_label)) { warnx("%s: bad volume label", o.volume_label); goto done; } if (!(fat = o.fat_type)) { if (o.floppy) fat = 12; else if (!o.directory_entries && (o.info_sector || o.backup_sector)) fat = 32; } if ((fat == 32 && o.directory_entries) || (fat != 32 && (o.info_sector || o.backup_sector))) { warnx("-%c is not a legal FAT%s option", fat == 32 ? 'e' : o.info_sector ? 'i' : 'k', fat == 32 ? "32" : "12/16"); goto done; } if (o.floppy && fat == 32) bpb.bpbRootDirEnts = 0; if (fat != 0 && fat != 12 && fat != 16 && fat != 32) { warnx("%d: bad FAT type", fat); goto done; } if (o.block_size) { if (!powerof2(o.block_size)) { warnx("block size (%u) is not a power of 2", o.block_size); goto done; } if (o.block_size < bpb.bpbBytesPerSec) { warnx("block size (%u) is too small; minimum is %u", o.block_size, bpb.bpbBytesPerSec); goto done; } if (o.block_size > bpb.bpbBytesPerSec * MAXSPC) { warnx("block size (%u) is too large; maximum is %u", o.block_size, bpb.bpbBytesPerSec * MAXSPC); goto done; } bpb.bpbSecPerClust = o.block_size / bpb.bpbBytesPerSec; } if (o.sectors_per_cluster) { if (!powerof2(o.sectors_per_cluster)) { warnx("sectors/cluster (%u) is not a power of 2", o.sectors_per_cluster); goto done; } bpb.bpbSecPerClust = o.sectors_per_cluster; } if (o.reserved_sectors) bpb.bpbResSectors = o.reserved_sectors; if (o.num_FAT) { if (o.num_FAT > MAXNFT) { warnx("number of FATs (%u) is too large; maximum is %u", o.num_FAT, MAXNFT); goto done; } bpb.bpbFATs = o.num_FAT; } if (o.directory_entries) bpb.bpbRootDirEnts = o.directory_entries; if (o.media_descriptor_set) { if (o.media_descriptor < 0xf0) { warnx("illegal media descriptor (%#x)", o.media_descriptor); goto done; } bpb.bpbMedia = o.media_descriptor; } if (o.sectors_per_fat) bpb.bpbBigFATsecs = o.sectors_per_fat; if (o.info_sector) bpb.bpbFSInfo = o.info_sector; if (o.backup_sector) bpb.bpbBackup = o.backup_sector; bss = 1; bname = NULL; fd1 = -1; if (o.bootstrap) { bname = o.bootstrap; if (!strchr(bname, '/')) { snprintf(buf, sizeof(buf), "/boot/%s", bname); bname = buf; } if ((fd1 = open(bname, O_RDONLY)) == -1 || fstat(fd1, &sb)) { warn("%s", bname); goto done; } if (!S_ISREG(sb.st_mode) || sb.st_size % bpb.bpbBytesPerSec || sb.st_size < bpb.bpbBytesPerSec || sb.st_size > bpb.bpbBytesPerSec * MAXU16) { warnx("%s: inappropriate file type or format", bname); goto done; } bss = sb.st_size / bpb.bpbBytesPerSec; } if (!bpb.bpbFATs) bpb.bpbFATs = 2; if (!fat) { if (bpb.bpbHugeSectors < (bpb.bpbResSectors ? bpb.bpbResSectors : bss) + howmany((RESFTE + (bpb.bpbSecPerClust ? MINCLS16 : MAXCLS12 + 1)) * (bpb.bpbSecPerClust ? 16 : 12) / BPN, bpb.bpbBytesPerSec * NPB) * bpb.bpbFATs + howmany(bpb.bpbRootDirEnts ? bpb.bpbRootDirEnts : DEFRDE, bpb.bpbBytesPerSec / sizeof(struct de)) + (bpb.bpbSecPerClust ? MINCLS16 : MAXCLS12 + 1) * (bpb.bpbSecPerClust ? bpb.bpbSecPerClust : howmany(DEFBLK, bpb.bpbBytesPerSec))) fat = 12; else if (bpb.bpbRootDirEnts || bpb.bpbHugeSectors < (bpb.bpbResSectors ? bpb.bpbResSectors : bss) + howmany((RESFTE + MAXCLS16) * 2, bpb.bpbBytesPerSec) * bpb.bpbFATs + howmany(DEFRDE, bpb.bpbBytesPerSec / sizeof(struct de)) + (MAXCLS16 + 1) * (bpb.bpbSecPerClust ? bpb.bpbSecPerClust : howmany(8192, bpb.bpbBytesPerSec))) fat = 16; else fat = 32; } x = bss; if (fat == 32) { if (!bpb.bpbFSInfo) { if (x == MAXU16 || x == bpb.bpbBackup) { warnx("no room for info sector"); goto done; } bpb.bpbFSInfo = x; } if (bpb.bpbFSInfo != MAXU16 && x <= bpb.bpbFSInfo) x = bpb.bpbFSInfo + 1; if (!bpb.bpbBackup) { if (x == MAXU16) { warnx("no room for backup sector"); goto done; } bpb.bpbBackup = x; } else if (bpb.bpbBackup != MAXU16 && bpb.bpbBackup == bpb.bpbFSInfo) { warnx("backup sector would overwrite info sector"); goto done; } if (bpb.bpbBackup != MAXU16 && x <= bpb.bpbBackup) x = bpb.bpbBackup + 1; } extra_res = 0; alignment = 0; set_res = (bpb.bpbResSectors == 0); set_spf = (bpb.bpbBigFATsecs == 0); set_spc = (bpb.bpbSecPerClust == 0); saved_x = x; /* * Attempt to align the root directory to cluster if o.align is set. * This is done by padding with reserved blocks. Note that this can * cause other factors to change, which can in turn change the alignment. * This should take at most 2 iterations, as increasing the reserved * amount may cause the FAT size to decrease by 1, requiring another * bpbFATs reserved blocks. If bpbSecPerClust changes, it will * be half of its previous size, and thus will not throw off alignment. */ do { x = saved_x; if (set_res) bpb.bpbResSectors = ((fat == 32) ? MAX(x, MAX(16384 / bpb.bpbBytesPerSec, 4)) : x) + extra_res; else if (bpb.bpbResSectors < x) { warnx("too few reserved sectors (need %d have %d)", x, bpb.bpbResSectors); goto done; } if (fat != 32 && !bpb.bpbRootDirEnts) bpb.bpbRootDirEnts = DEFRDE; rds = howmany(bpb.bpbRootDirEnts, bpb.bpbBytesPerSec / sizeof(struct de)); if (set_spc) { for (bpb.bpbSecPerClust = howmany(fat == 16 ? DEFBLK16 : DEFBLK, bpb.bpbBytesPerSec); bpb.bpbSecPerClust < MAXSPC && (bpb.bpbResSectors + howmany((RESFTE + maxcls(fat)) * (fat / BPN), bpb.bpbBytesPerSec * NPB) * bpb.bpbFATs + rds + (u_int64_t) (maxcls(fat) + 1) * bpb.bpbSecPerClust) <= bpb.bpbHugeSectors; bpb.bpbSecPerClust <<= 1) continue; } if (fat != 32 && bpb.bpbBigFATsecs > MAXU16) { warnx("too many sectors/FAT for FAT12/16"); goto done; } x1 = bpb.bpbResSectors + rds; x = bpb.bpbBigFATsecs ? bpb.bpbBigFATsecs : 1; if (x1 + (u_int64_t)x * bpb.bpbFATs > bpb.bpbHugeSectors) { warnx("meta data exceeds file system size"); goto done; } x1 += x * bpb.bpbFATs; x = (u_int64_t)(bpb.bpbHugeSectors - x1) * bpb.bpbBytesPerSec * NPB / (bpb.bpbSecPerClust * bpb.bpbBytesPerSec * NPB + fat / BPN * bpb.bpbFATs); x2 = howmany((RESFTE + MIN(x, maxcls(fat))) * (fat / BPN), bpb.bpbBytesPerSec * NPB); if (set_spf) { if (bpb.bpbBigFATsecs == 0) bpb.bpbBigFATsecs = x2; x1 += (bpb.bpbBigFATsecs - 1) * bpb.bpbFATs; } if (set_res) { /* attempt to align root directory */ alignment = (bpb.bpbResSectors + bpb.bpbBigFATsecs * bpb.bpbFATs) % bpb.bpbSecPerClust; if (o.align) extra_res += bpb.bpbSecPerClust - alignment; } attempts++; } while (o.align && alignment != 0 && attempts < 2); if (o.align && alignment != 0) warnx("warning: Alignment failed."); cls = (bpb.bpbHugeSectors - x1) / bpb.bpbSecPerClust; x = (u_int64_t)bpb.bpbBigFATsecs * bpb.bpbBytesPerSec * NPB / (fat / BPN) - RESFTE; if (cls > x) cls = x; if (bpb.bpbBigFATsecs < x2) warnx("warning: sectors/FAT limits file system to %u clusters", cls); if (cls < mincls(fat)) { warnx("%u clusters too few clusters for FAT%u, need %u", cls, fat, mincls(fat)); goto done; } if (cls > maxcls(fat)) { cls = maxcls(fat); bpb.bpbHugeSectors = x1 + (cls + 1) * bpb.bpbSecPerClust - 1; warnx("warning: FAT type limits file system to %u sectors", bpb.bpbHugeSectors); } printf("%s: %u sector%s in %u FAT%u cluster%s " "(%u bytes/cluster)\n", fname, cls * bpb.bpbSecPerClust, cls * bpb.bpbSecPerClust == 1 ? "" : "s", cls, fat, cls == 1 ? "" : "s", bpb.bpbBytesPerSec * bpb.bpbSecPerClust); if (!bpb.bpbMedia) bpb.bpbMedia = !bpb.bpbHiddenSecs ? 0xf0 : 0xf8; if (fat == 32) bpb.bpbRootClust = RESFTE; if (bpb.bpbHugeSectors <= MAXU16) { bpb.bpbSectors = bpb.bpbHugeSectors; bpb.bpbHugeSectors = 0; } if (fat != 32) { bpb.bpbFATsecs = bpb.bpbBigFATsecs; bpb.bpbBigFATsecs = 0; } print_bpb(&bpb); if (!o.no_create) { if (o.timestamp_set) { tv.tv_sec = now = o.timestamp; tv.tv_usec = 0; tm = gmtime(&now); } else { gettimeofday(&tv, NULL); now = tv.tv_sec; tm = localtime(&now); } if (!(img = malloc(bpb.bpbBytesPerSec))) { warn(NULL); goto done; } dir = bpb.bpbResSectors + (bpb.bpbFATsecs ? bpb.bpbFATsecs : bpb.bpbBigFATsecs) * bpb.bpbFATs; memset(&si_sa, 0, sizeof(si_sa)); si_sa.sa_handler = infohandler; +#ifdef SIGINFO if (sigaction(SIGINFO, &si_sa, NULL) == -1) { warn("sigaction SIGINFO"); goto done; } +#endif for (lsn = 0; lsn < dir + (fat == 32 ? bpb.bpbSecPerClust : rds); lsn++) { if (got_siginfo) { fprintf(stderr,"%s: writing sector %u of %u (%u%%)\n", fname, lsn, (dir + (fat == 32 ? bpb.bpbSecPerClust: rds)), (lsn * 100) / (dir + (fat == 32 ? bpb.bpbSecPerClust: rds))); got_siginfo = 0; } x = lsn; if (o.bootstrap && fat == 32 && bpb.bpbBackup != MAXU16 && bss <= bpb.bpbBackup && x >= bpb.bpbBackup) { x -= bpb.bpbBackup; if (!x && lseek(fd1, o.offset, SEEK_SET)) { warn("%s", bname); goto done; } } if (o.bootstrap && x < bss) { if ((n = read(fd1, img, bpb.bpbBytesPerSec)) == -1) { warn("%s", bname); goto done; } if ((unsigned)n != bpb.bpbBytesPerSec) { warnx("%s: can't read sector %u", bname, x); goto done; } } else memset(img, 0, bpb.bpbBytesPerSec); if (!lsn || (fat == 32 && bpb.bpbBackup != MAXU16 && lsn == bpb.bpbBackup)) { x1 = sizeof(struct bs); bsbpb = (struct bsbpb *)(img + x1); mk2(bsbpb->bpbBytesPerSec, bpb.bpbBytesPerSec); mk1(bsbpb->bpbSecPerClust, bpb.bpbSecPerClust); mk2(bsbpb->bpbResSectors, bpb.bpbResSectors); mk1(bsbpb->bpbFATs, bpb.bpbFATs); mk2(bsbpb->bpbRootDirEnts, bpb.bpbRootDirEnts); mk2(bsbpb->bpbSectors, bpb.bpbSectors); mk1(bsbpb->bpbMedia, bpb.bpbMedia); mk2(bsbpb->bpbFATsecs, bpb.bpbFATsecs); mk2(bsbpb->bpbSecPerTrack, bpb.bpbSecPerTrack); mk2(bsbpb->bpbHeads, bpb.bpbHeads); mk4(bsbpb->bpbHiddenSecs, bpb.bpbHiddenSecs); mk4(bsbpb->bpbHugeSectors, bpb.bpbHugeSectors); x1 += sizeof(struct bsbpb); if (fat == 32) { bsxbpb = (struct bsxbpb *)(img + x1); mk4(bsxbpb->bpbBigFATsecs, bpb.bpbBigFATsecs); mk2(bsxbpb->bpbExtFlags, 0); mk2(bsxbpb->bpbFSVers, 0); mk4(bsxbpb->bpbRootClust, bpb.bpbRootClust); mk2(bsxbpb->bpbFSInfo, bpb.bpbFSInfo); mk2(bsxbpb->bpbBackup, bpb.bpbBackup); x1 += sizeof(struct bsxbpb); } bsx = (struct bsx *)(img + x1); mk1(bsx->exBootSignature, 0x29); if (o.volume_id_set) x = o.volume_id; else x = (((u_int)(1 + tm->tm_mon) << 8 | (u_int)tm->tm_mday) + ((u_int)tm->tm_sec << 8 | (u_int)(tv.tv_usec / 10))) << 16 | ((u_int)(1900 + tm->tm_year) + ((u_int)tm->tm_hour << 8 | (u_int)tm->tm_min)); mk4(bsx->exVolumeID, x); mklabel(bsx->exVolumeLabel, o.volume_label ? o.volume_label : "NO NAME"); snprintf(buf, sizeof(buf), "FAT%u", fat); setstr(bsx->exFileSysType, buf, sizeof(bsx->exFileSysType)); if (!o.bootstrap) { x1 += sizeof(struct bsx); bs = (struct bs *)img; mk1(bs->bsJump[0], 0xeb); mk1(bs->bsJump[1], x1 - 2); mk1(bs->bsJump[2], 0x90); setstr(bs->bsOemName, o.OEM_string ? o.OEM_string : "BSD4.4 ", sizeof(bs->bsOemName)); memcpy(img + x1, bootcode, sizeof(bootcode)); mk2(img + MINBPS - 2, DOSMAGIC); } } else if (fat == 32 && bpb.bpbFSInfo != MAXU16 && (lsn == bpb.bpbFSInfo || (bpb.bpbBackup != MAXU16 && lsn == bpb.bpbBackup + bpb.bpbFSInfo))) { mk4(img, 0x41615252); mk4(img + MINBPS - 28, 0x61417272); mk4(img + MINBPS - 24, 0xffffffff); mk4(img + MINBPS - 20, 0xffffffff); mk2(img + MINBPS - 2, DOSMAGIC); } else if (lsn >= bpb.bpbResSectors && lsn < dir && !((lsn - bpb.bpbResSectors) % (bpb.bpbFATsecs ? bpb.bpbFATsecs : bpb.bpbBigFATsecs))) { mk1(img[0], bpb.bpbMedia); for (x = 1; x < fat * (fat == 32 ? 3 : 2) / 8; x++) mk1(img[x], fat == 32 && x % 4 == 3 ? 0x0f : 0xff); } else if (lsn == dir && o.volume_label) { de = (struct de *)img; mklabel(de->deName, o.volume_label); mk1(de->deAttributes, 050); x = (u_int)tm->tm_hour << 11 | (u_int)tm->tm_min << 5 | (u_int)tm->tm_sec >> 1; mk2(de->deMTime, x); x = (u_int)(tm->tm_year - 80) << 9 | (u_int)(tm->tm_mon + 1) << 5 | (u_int)tm->tm_mday; mk2(de->deMDate, x); } if ((n = write(fd, img, bpb.bpbBytesPerSec)) == -1) { warn("%s", fname); goto done; } if ((unsigned)n != bpb.bpbBytesPerSec) { warnx("%s: can't write sector %u", fname, lsn); goto done; } } } rv = 0; done: free(img); if (fd != -1) close(fd); if (fd1 != -1) close(fd1); return rv; } /* * return -1 with error if file system is mounted. */ static int check_mounted(const char *fname, mode_t mode) { +/* + * If getmntinfo() is not available (e.g. Linux) don't check. This should + * not be a problem since we will only be using makefs to create images. + */ +#if !defined(MAKEFS) struct statfs *mp; const char *s1, *s2; size_t len; int n, r; if (!(n = getmntinfo(&mp, MNT_NOWAIT))) { warn("getmntinfo"); return -1; } len = strlen(_PATH_DEV); s1 = fname; if (!strncmp(s1, _PATH_DEV, len)) s1 += len; r = S_ISCHR(mode) && s1 != fname && *s1 == 'r'; for (; n--; mp++) { s2 = mp->f_mntfromname; if (!strncmp(s2, _PATH_DEV, len)) s2 += len; if ((r && s2 != mp->f_mntfromname && !strcmp(s1 + 1, s2)) || !strcmp(s1, s2)) { warnx("%s is mounted on %s", fname, mp->f_mntonname); return -1; } } +#endif return 0; } /* * Get a standard format. */ static int getstdfmt(const char *fmt, struct bpb *bpb) { u_int x, i; x = nitems(stdfmt); for (i = 0; i < x && strcmp(fmt, stdfmt[i].name); i++); if (i == x) { warnx("%s: unknown standard format", fmt); return -1; } *bpb = stdfmt[i].bpb; return 0; } +static void +compute_geometry_from_file(int fd, const char *fname, struct disklabel *lp) +{ + struct stat st; + off_t ms; + + if (fstat(fd, &st)) + err(1, "cannot get disk size"); + if (!S_ISREG(st.st_mode)) + errx(1, "%s is not a regular file", fname); + ms = st.st_size; + lp->d_secsize = 512; + lp->d_nsectors = 63; + lp->d_ntracks = 255; + lp->d_secperunit = ms / lp->d_secsize; +} + /* * Get disk slice, partition, and geometry information. */ static int getdiskinfo(int fd, const char *fname, const char *dtype, __unused int oflag, struct bpb *bpb) { struct disklabel *lp, dlp; + off_t hs = 0; +#ifndef MAKEFS + off_t ms; struct fd_type type; - off_t ms, hs = 0; lp = NULL; /* If the user specified a disk type, try to use that */ if (dtype != NULL) { lp = getdiskbyname(dtype); } /* Maybe it's a floppy drive */ if (lp == NULL) { if (ioctl(fd, DIOCGMEDIASIZE, &ms) == -1) { - struct stat st; - - if (fstat(fd, &st)) - err(1, "cannot get disk size"); /* create a fake geometry for a file image */ - ms = st.st_size; - dlp.d_secsize = 512; - dlp.d_nsectors = 63; - dlp.d_ntracks = 255; - dlp.d_secperunit = ms / dlp.d_secsize; + compute_geometry_from_file(fd, fname, &dlp); lp = &dlp; } else if (ioctl(fd, FD_GTYPE, &type) != -1) { dlp.d_secsize = 128 << type.secsize; dlp.d_nsectors = type.sectrac; dlp.d_ntracks = type.heads; dlp.d_secperunit = ms / dlp.d_secsize; lp = &dlp; } } /* Maybe it's a fixed drive */ if (lp == NULL) { if (bpb->bpbBytesPerSec) dlp.d_secsize = bpb->bpbBytesPerSec; if (bpb->bpbBytesPerSec == 0 && ioctl(fd, DIOCGSECTORSIZE, &dlp.d_secsize) == -1) err(1, "cannot get sector size"); dlp.d_secperunit = ms / dlp.d_secsize; if (bpb->bpbSecPerTrack == 0 && ioctl(fd, DIOCGFWSECTORS, &dlp.d_nsectors) == -1) { warn("cannot get number of sectors per track"); dlp.d_nsectors = 63; } if (bpb->bpbHeads == 0 && ioctl(fd, DIOCGFWHEADS, &dlp.d_ntracks) == -1) { warn("cannot get number of heads"); if (dlp.d_secperunit <= 63*1*1024) dlp.d_ntracks = 1; else if (dlp.d_secperunit <= 63*16*1024) dlp.d_ntracks = 16; else dlp.d_ntracks = 255; } hs = (ms / dlp.d_secsize) - dlp.d_secperunit; lp = &dlp; } +#else + /* In the makefs case we only support image files: */ + compute_geometry_from_file(fd, fname, &dlp); + lp = &dlp; +#endif if (bpb->bpbBytesPerSec == 0) { if (ckgeom(fname, lp->d_secsize, "bytes/sector") == -1) return -1; bpb->bpbBytesPerSec = lp->d_secsize; } if (bpb->bpbSecPerTrack == 0) { if (ckgeom(fname, lp->d_nsectors, "sectors/track") == -1) return -1; bpb->bpbSecPerTrack = lp->d_nsectors; } if (bpb->bpbHeads == 0) { if (ckgeom(fname, lp->d_ntracks, "drive heads") == -1) return -1; bpb->bpbHeads = lp->d_ntracks; } if (bpb->bpbHugeSectors == 0) bpb->bpbHugeSectors = lp->d_secperunit; if (bpb->bpbHiddenSecs == 0) bpb->bpbHiddenSecs = hs; return 0; } /* * Print out BPB values. */ static void print_bpb(struct bpb *bpb) { printf("BytesPerSec=%u SecPerClust=%u ResSectors=%u FATs=%u", bpb->bpbBytesPerSec, bpb->bpbSecPerClust, bpb->bpbResSectors, bpb->bpbFATs); if (bpb->bpbRootDirEnts) printf(" RootDirEnts=%u", bpb->bpbRootDirEnts); if (bpb->bpbSectors) printf(" Sectors=%u", bpb->bpbSectors); printf(" Media=%#x", bpb->bpbMedia); if (bpb->bpbFATsecs) printf(" FATsecs=%u", bpb->bpbFATsecs); printf(" SecPerTrack=%u Heads=%u HiddenSecs=%u", bpb->bpbSecPerTrack, bpb->bpbHeads, bpb->bpbHiddenSecs); if (bpb->bpbHugeSectors) printf(" HugeSectors=%u", bpb->bpbHugeSectors); if (!bpb->bpbFATsecs) { printf(" FATsecs=%u RootCluster=%u", bpb->bpbBigFATsecs, bpb->bpbRootClust); printf(" FSInfo="); printf(bpb->bpbFSInfo == MAXU16 ? "%#x" : "%u", bpb->bpbFSInfo); printf(" Backup="); printf(bpb->bpbBackup == MAXU16 ? "%#x" : "%u", bpb->bpbBackup); } printf("\n"); } /* * Check a disk geometry value. */ static int ckgeom(const char *fname, u_int val, const char *msg) { if (!val) { warnx("%s: no default %s", fname, msg); return -1; } if (val > MAXU16) { warnx("%s: illegal %s %d", fname, msg, val); return -1; } return 0; } /* * Check a volume label. */ static int oklabel(const char *src) { int c, i; for (i = 0; i <= 11; i++) { c = (u_char)*src++; if (c < ' ' + !i || strchr("\"*+,./:;<=>?[\\]|", c)) break; } return i && !c; } /* * Make a volume label. */ static void mklabel(u_int8_t *dest, const char *src) { int c, i; for (i = 0; i < 11; i++) { c = *src ? toupper(*src++) : ' '; *dest++ = !i && c == '\xe5' ? 5 : c; } } /* * Copy string, padding with spaces. */ static void setstr(u_int8_t *dest, const char *src, size_t len) { while (len--) *dest++ = *src ? *src++ : ' '; } static void infohandler(int sig __unused) { got_siginfo = 1; } Index: projects/clang1000-import/share/mk/bsd.compat.mk =================================================================== --- projects/clang1000-import/share/mk/bsd.compat.mk (revision 357178) +++ projects/clang1000-import/share/mk/bsd.compat.mk (revision 357179) @@ -1,173 +1,172 @@ # $FreeBSD$ .if !targets(__<${_this:T}>__) __<${_this:T}>__: .if defined(_LIBCOMPAT) COMPAT_ARCH= ${TARGET_ARCH} COMPAT_CPUTYPE= ${TARGET_CPUTYPE} .if (defined(WANT_COMPILER_TYPE) && ${WANT_COMPILER_TYPE} == gcc) || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) COMPAT_COMPILER_TYPE= gcc .else COMPAT_COMPILER_TYPE= clang .endif .else COMPAT_ARCH= ${MACHINE_ARCH} COMPAT_CPUTYPE= ${CPUTYPE} .include COMPAT_COMPILER_TYPE=${COMPILER_TYPE} .endif # ------------------------------------------------------------------- # 32 bit world .if ${COMPAT_ARCH} == "amd64" HAS_COMPAT=32 .if empty(COMPAT_CPUTYPE) LIB32CPUFLAGS= -march=i686 -mmmx -msse -msse2 .else LIB32CPUFLAGS= -march=${COMPAT_CPUTYPE} .endif .if ${COMPAT_COMPILER_TYPE} == gcc .else LIB32CPUFLAGS+= -target x86_64-unknown-freebsd13.0 .endif LIB32CPUFLAGS+= -m32 LIB32_MACHINE= i386 LIB32_MACHINE_ARCH= i386 LIB32WMAKEENV= MACHINE_CPU="i686 mmx sse sse2" LIB32WMAKEFLAGS= \ AS="${XAS} --32" \ LD="${XLD} -m elf_i386_fbsd -L${LIBCOMPATTMP}/usr/lib32" .elif ${COMPAT_ARCH} == "powerpc64" HAS_COMPAT=32 .if empty(COMPAT_CPUTYPE) LIB32CPUFLAGS= -mcpu=powerpc .else LIB32CPUFLAGS= -mcpu=${COMPAT_CPUTYPE} .endif .if ${COMPAT_COMPILER_TYPE} == "gcc" LIB32CPUFLAGS+= -m32 .else LIB32CPUFLAGS+= -target powerpc-unknown-freebsd13.0 # Use BFD to workaround ld.lld issues on PowerPC 32 bit LIB32CPUFLAGS+= -fuse-ld=${LD_BFD} .endif LIB32_MACHINE= powerpc LIB32_MACHINE_ARCH= powerpc LIB32WMAKEFLAGS= \ LD="${LD_BFD} -m elf32ppc_fbsd" .elif ${COMPAT_ARCH:Mmips64*} != "" HAS_COMPAT=32 .if ${COMPAT_COMPILER_TYPE} == gcc .if empty(COMPAT_CPUTYPE) LIB32CPUFLAGS= -march=mips3 .else LIB32CPUFLAGS= -march=${COMPAT_CPUTYPE} .endif .else .if ${COMPAT_ARCH:Mmips64el*} != "" LIB32CPUFLAGS= -target mipsel-unknown-freebsd13.0 .else LIB32CPUFLAGS= -target mips-unknown-freebsd13.0 .endif .endif LIB32CPUFLAGS+= -mabi=32 LIB32_MACHINE= mips +LIB32_MACHINE_ARCH:= ${COMPAT_ARCH:S/64//} .if ${COMPAT_ARCH:Mmips64el*} != "" -LIB32_MACHINE_ARCH= mipsel _EMULATION= elf32ltsmip_fbsd .else -LIB32_MACHINE_ARCH= mips _EMULATION= elf32btsmip_fbsd .endif LIB32WMAKEFLAGS= LD="${XLD} -m ${_EMULATION}" LIB32LDFLAGS= -Wl,-m${_EMULATION} .endif LIB32WMAKEFLAGS+= NM="${XNM}" LIB32WMAKEFLAGS+= OBJCOPY="${XOBJCOPY}" LIB32CFLAGS= -DCOMPAT_32BIT LIB32DTRACE= ${DTRACE} -32 LIB32WMAKEFLAGS+= -DCOMPAT_32BIT # ------------------------------------------------------------------- # soft-fp world .if ${COMPAT_ARCH:Marmv[67]*} != "" HAS_COMPAT=SOFT LIBSOFTCFLAGS= -DCOMPAT_SOFTFP LIBSOFTCPUFLAGS= -mfloat-abi=softfp LIBSOFT_MACHINE= arm LIBSOFT_MACHINE_ARCH= ${COMPAT_ARCH} LIBSOFTWMAKEENV= CPUTYPE=soft LIBSOFTWMAKEFLAGS= -DCOMPAT_SOFTFP .endif # ------------------------------------------------------------------- # In the program linking case, select LIBCOMPAT .if defined(NEED_COMPAT) .ifndef HAS_COMPAT .warning NEED_COMPAT defined, but no LIBCOMPAT is available (COMPAT_ARCH == ${COMPAT_ARCH} .elif !${HAS_COMPAT:M${NEED_COMPAT}} && ${NEED_COMPAT} != "any" .error NEED_COMPAT (${NEED_COMPAT}) defined, but not in HAS_COMPAT ($HAS_COMPAT) .elif ${NEED_COMPAT} == "any" .endif .ifdef WANT_COMPAT .error Both WANT_COMPAT and NEED_COMPAT defined .endif WANT_COMPAT:= ${NEED_COMPAT} .endif .if defined(HAS_COMPAT) && defined(WANT_COMPAT) .if ${WANT_COMPAT} == "any" _LIBCOMPAT:= ${HAS_COMPAT:[1]} .else _LIBCOMPAT:= ${WANT_COMPAT} .endif .endif # ------------------------------------------------------------------- # Generic code for each type. # Set defaults based on type. libcompat= ${_LIBCOMPAT:tl} _LIBCOMPAT_MAKEVARS= _OBJTOP TMP CPUFLAGS CFLAGS CXXFLAGS LDFLAGS \ _MACHINE _MACHINE_ARCH WMAKEENV WMAKEFLAGS WMAKE .for _var in ${_LIBCOMPAT_MAKEVARS} .if !empty(LIB${_LIBCOMPAT}${_var}) LIBCOMPAT${_var}?= ${LIB${_LIBCOMPAT}${_var}} .endif .endfor # Shared flags LIBCOMPAT_OBJTOP?= ${OBJTOP}/obj-lib${libcompat} LIBCOMPATTMP?= ${LIBCOMPAT_OBJTOP}/tmp LIBCOMPATCFLAGS+= ${LIBCOMPATCPUFLAGS} \ -L${LIBCOMPATTMP}/usr/lib${libcompat} \ --sysroot=${LIBCOMPATTMP} \ ${BFLAGS} LIBCOMPATWMAKEENV+= MACHINE=${LIBCOMPAT_MACHINE} LIBCOMPATWMAKEENV+= MACHINE_ARCH=${LIBCOMPAT_MACHINE_ARCH} # -B is needed to find /usr/lib32/crti.o for GCC and /usr/libsoft/crti.o for # Clang/GCC. LIBCOMPATCFLAGS+= -B${LIBCOMPATTMP}/usr/lib${libcompat} .if defined(WANT_COMPAT) LIBDIR_BASE:= /usr/lib${libcompat} _LIB_OBJTOP= ${LIBCOMPAT_OBJTOP} CFLAGS+= ${LIBCOMPATCFLAGS} LDFLAGS+= ${CFLAGS} ${LIBCOMPATLDFLAGS} MACHINE= ${LIBCOMPAT_MACHINE} MACHINE_ARCH= ${LIBCOMPAT_MACHINE_ARCH} .endif .endif Index: projects/clang1000-import/sys/conf/Makefile.mips =================================================================== --- projects/clang1000-import/sys/conf/Makefile.mips (revision 357178) +++ projects/clang1000-import/sys/conf/Makefile.mips (revision 357179) @@ -1,111 +1,108 @@ # Makefile.mips # $FreeBSD$ # # Makefile for FreeBSD # # This makefile is constructed from a machine description: # config machineid # Most changes should be made in the machine description # /sys/mips/conf/``machineid'' # after which you should do # config machineid # Generic makefile changes should be made in # /sys/conf/Makefile.mips # after which config should be rerun for all machines. # # Which version of config(8) is required. %VERSREQ= 600012 STD8X16FONT?= iso .if !defined(S) .if exists(./@/.) S= ./@ .else S= ../../.. .endif .endif .include "$S/conf/kern.pre.mk" INCLUDES+= -I$S/contrib/libfdt LDSCRIPT_NAME?=ldscript.$M SYSTEM_LD:= ${SYSTEM_LD:$S/conf/${LDSCRIPT_NAME}=${LDSCRIPT_NAME}} SYSTEM_DEP:= ${SYSTEM_DEP:$S/conf/${LDSCRIPT_NAME}=${LDSCRIPT_NAME}} KERNLOADADDR?=0x80001000 # This obscure value is defined by CFE for WR160N # To be changed later TRAMPLOADADDR?=0x807963c0 # We default to the MIPS32 ISA for O32 and MIPS64 ISA for N64 and N32 # if none is specified in the kernel configuration file. .if ${MACHINE_ARCH:Mmips64*} != "" || ${MACHINE_ARCH:Mmipsn32*} != "" ARCH_FLAGS?=-march=mips64 .else ARCH_FLAGS?=-march=mips32 .endif ARCH_FLAGS+=-mabi=${MIPS_ABI} EXTRA_FLAGS=-fno-pic -mno-abicalls -G0 -DKERNLOADADDR=${KERNLOADADDR} EXTRA_FLAGS+=-${MIPS_ENDIAN} -HACK_EXTRA_FLAGS=-shared - # We add the -fno-pic flag to kernels because otherwise performance # is extremely poor, as well as -mno-abicalls to force no ABI usage. CFLAGS+=${EXTRA_FLAGS} $(ARCH_FLAGS) -HACK_EXTRA_FLAGS+=${EXTRA_FLAGS} $(ARCH_FLAGS) TRAMP_ARCH_FLAGS?=$(ARCH_FLAGS) TRAMP_EXTRA_FLAGS=${EXTRA_FLAGS} ${TRAMP_ARCH_FLAGS} # Kernel code is always compiled with soft-float on MIPS TRAMP_EXTRA_FLAGS+=-msoft-float .if ${MACHINE_ARCH:Mmips64*} != "" TRAMP_ELFSIZE=64 .else TRAMP_ELFSIZE=32 .endif ASM_CFLAGS+=${CFLAGS} -D_LOCORE -DLOCORE .if !defined(WITHOUT_KERNEL_TRAMPOLINE) KERNEL_EXTRA=trampoline KERNEL_EXTRA_INSTALL=${KERNEL_KO}.tramp.bin trampoline: ${KERNEL_KO}.tramp.bin ${KERNEL_KO}.tramp.bin: ${KERNEL_KO} $S/$M/$M/elf_trampoline.c \ $S/$M/$M/inckern.S ${OBJCOPY} --strip-symbol '$$d' --strip-symbol '$$a' \ -g --strip-symbol '$$t' ${FULLKERNEL} ${KERNEL_KO}.tmp sed -e s/${KERNLOADADDR}/${TRAMPLOADADDR}/ -e s/" + SIZEOF_HEADERS"// \ ${LDSCRIPT_NAME} > ${LDSCRIPT_NAME}.tramp.noheader ${CC} -O -nostdlib -I. -I$S ${TRAMP_EXTRA_FLAGS} ${TRAMP_LDFLAGS} -Xlinker \ -T -Xlinker ${LDSCRIPT_NAME}.tramp.noheader \ -DKERNNAME="\"${KERNEL_KO}.tmp\"" -DELFSIZE=${TRAMP_ELFSIZE} \ -fno-asynchronous-unwind-tables \ $S/$M/$M/inckern.S $S/$M/$M/elf_trampoline.c \ -o ${KERNEL_KO}.tramp.elf ${OBJCOPY} -S -O binary ${KERNEL_KO}.tramp.elf \ ${KERNEL_KO}.tramp.bin .endif %BEFORE_DEPEND %OBJS %FILES.c %FILES.s %FILES.m %CLEAN CLEAN+= ${LDSCRIPT_NAME} ${LDSCRIPT_NAME}.tramp.noheader \ ${KERNEL_KO}.tramp.elf ${KERNEL_KO}.tramp.bin ${LDSCRIPT_NAME}: $S/conf/${LDSCRIPT_NAME} sed s/KERNLOADADDR/${KERNLOADADDR}/g $S/conf/${LDSCRIPT_NAME} \ > ${LDSCRIPT_NAME} %RULES .include "$S/conf/kern.post.mk" Index: projects/clang1000-import/sys/conf/Makefile.powerpc =================================================================== --- projects/clang1000-import/sys/conf/Makefile.powerpc (revision 357178) +++ projects/clang1000-import/sys/conf/Makefile.powerpc (revision 357179) @@ -1,83 +1,82 @@ # Makefile.powerpc -- with config changes. # Copyright 1990 W. Jolitz # from: @(#)Makefile.i386 7.1 5/10/91 # $FreeBSD$ # # Makefile for FreeBSD # # This makefile is constructed from a machine description: # config machineid # Most changes should be made in the machine description # /sys/powerpc/conf/``machineid'' # after which you should do # config machineid # Generic makefile changes should be made in # /sys/conf/Makefile.powerpc # after which config should be rerun for all machines. # # Which version of config(8) is required. %VERSREQ= 600012 STD8X16FONT?= iso .if !defined(S) .if exists(./@/.) S= ./@ .else S= ../../.. .endif .endif LDSCRIPT_NAME?= ldscript.${MACHINE_ARCH} .include "$S/conf/kern.pre.mk" INCLUDES+= -I$S/contrib/libfdt .if "${MACHINE_ARCH}" == "powerpcspe" # Force __SPE__, since the builtin will be removed later with -mno-spe CFLAGS.gcc+= -mabi=spe -D__SPE__ CFLAGS.clang+= -mspe -D__SPE__ -m32 -HACK_EXTRA_FLAGS= -shared -m32 -mspe -D__SPE__ .endif CFLAGS+= -msoft-float CFLAGS.gcc+= -Wa,-many # Apply compiler-specific DPAA exceptions. .if "${COMPILER_TYPE}" == "clang" DPAAWARNFLAGS += \ -Wno-error=parentheses-equality \ -Wno-error=self-assign \ -Wno-error=incompatible-pointer-types-discards-qualifiers \ -Wno-error=non-literal-null-conversion \ -Wno-error=enum-conversion .elif "${COMPILER_TYPE}" == "gcc" && ${COMPILER_VERSION} >= 50200 DPAAWARNFLAGS += \ -Wno-error=redundant-decls \ -Wno-error=int-in-bool-context .endif # Build position-independent kernel CFLAGS+= -fPIC LDFLAGS+= -pie .if !empty(DDB_ENABLED) CFLAGS+= -fno-omit-frame-pointer .endif %BEFORE_DEPEND %OBJS %FILES.c %FILES.s %FILES.m %CLEAN %RULES .include "$S/conf/kern.post.mk" Index: projects/clang1000-import/sys/conf/kern.post.mk =================================================================== --- projects/clang1000-import/sys/conf/kern.post.mk (revision 357178) +++ projects/clang1000-import/sys/conf/kern.post.mk (revision 357179) @@ -1,488 +1,487 @@ # $FreeBSD$ # Part of a unified Makefile for building kernels. This part includes all # the definitions that need to be after all the % directives except %RULES # and ones that act like they are part of %RULES. # # Most make variables should not be defined in this file. Instead, they # should be defined in the kern.pre.mk so that port makefiles can # override or augment them. .if defined(DTS) || defined(DTSO) || defined(FDT_DTS_FILE) .include "dtb.build.mk" KERNEL_EXTRA+= ${DTB} ${DTBO} CLEAN+= ${DTB} ${DTBO} kernel-install: _dtbinstall .ORDER: beforeinstall _dtbinstall .endif # In case the config had a makeoptions DESTDIR... .if defined(DESTDIR) MKMODULESENV+= DESTDIR="${DESTDIR}" .endif SYSDIR?= ${S:C;^[^/];${.CURDIR}/&;:tA} MKMODULESENV+= KERNBUILDDIR="${.CURDIR}" SYSDIR="${SYSDIR}" MKMODULESENV+= MODULE_TIED=yes .if defined(CONF_CFLAGS) MKMODULESENV+= CONF_CFLAGS="${CONF_CFLAGS}" .endif .if defined(WITH_CTF) MKMODULESENV+= WITH_CTF="${WITH_CTF}" .endif .if defined(WITH_EXTRA_TCP_STACKS) MKMODULESENV+= WITH_EXTRA_TCP_STACKS="${WITH_EXTRA_TCP_STACKS}" .endif .if defined(KCSAN_ENABLED) MKMODULESENV+= KCSAN_ENABLED="yes" .endif .if defined(SAN_CFLAGS) MKMODULESENV+= SAN_CFLAGS="${SAN_CFLAGS}" .endif .if defined(GCOV_CFLAGS) MKMODULESENV+= GCOV_CFLAGS="${GCOV_CFLAGS}" .endif # Allow overriding the kernel debug directory, so kernel and user debug may be # installed in different directories. Setting it to "" restores the historical # behavior of installing debug files in the kernel directory. KERN_DEBUGDIR?= ${DEBUGDIR} .MAIN: all .if !defined(NO_MODULES) # Default prefix used for modules installed from ports LOCALBASE?= /usr/local LOCAL_MODULES_DIR?= ${LOCALBASE}/sys/modules # Default to installing all modules installed by ports unless overridden # by the user. .if !defined(LOCAL_MODULES) && exists(${LOCAL_MODULES_DIR}) LOCAL_MODULES!= ls ${LOCAL_MODULES_DIR} .endif .endif .for target in all clean cleandepend cleandir clobber depend install \ ${_obj} reinstall tags ${target}: kernel-${target} .if !defined(NO_MODULES) ${target}: modules-${target} modules-${target}: .if !defined(MODULES_WITH_WORLD) && exists($S/modules) cd $S/modules; ${MKMODULESENV} ${MAKE} \ ${target:S/^reinstall$/install/:S/^clobber$/cleandir/} .endif .for module in ${LOCAL_MODULES} @${ECHODIR} "===> ${module} (${target:S/^reinstall$/install/:S/^clobber$/cleandir/})" @cd ${LOCAL_MODULES_DIR}/${module}; ${MKMODULESENV} ${MAKE} \ DIRPRFX="${module}/" \ ${target:S/^reinstall$/install/:S/^clobber$/cleandir/} .endfor .endif .endfor # Handle ports (as defined by the user) that build kernel modules .if !defined(NO_MODULES) && defined(PORTS_MODULES) # # The ports tree needs some environment variables defined to match the new kernel # # SRC_BASE is how the ports tree refers to the location of the base source files .if !defined(SRC_BASE) SRC_BASE= ${SYSDIR:H:tA} .endif # OSVERSION is used by some ports to determine build options .if !defined(OSRELDATE) # Definition copied from src/Makefile.inc1 OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${MAKEOBJDIRPREFIX}${SRC_BASE}/include/osreldate.h .endif # Keep the related ports builds in the obj directory so that they are only rebuilt once per kernel build # # Ports search for some dependencies in PATH, so add the location of the # installed files WRKDIRPREFIX?= ${.OBJDIR} PORTSMODULESENV=\ env \ -u CC \ -u CXX \ -u CPP \ -u MAKESYSPATH \ -u MK_AUTO_OBJ \ -u MAKEOBJDIR \ MAKEFLAGS="${MAKEFLAGS:M*:tW:S/^-m /-m_/g:S/ -m / -m_/g:tw:N-m_*:NMK_AUTO_OBJ=*}" \ SYSDIR=${SYSDIR} \ PATH=${PATH}:${LOCALBASE}/bin:${LOCALBASE}/sbin \ SRC_BASE=${SRC_BASE} \ OSVERSION=${OSRELDATE} \ WRKDIRPREFIX=${WRKDIRPREFIX} # The WRKDIR needs to be cleaned before building, and trying to change the target # with a :C pattern below results in install -> instclean all: .for __i in ${PORTS_MODULES} @${ECHO} "===> Ports module ${__i} (all)" cd $${PORTSDIR:-/usr/ports}/${__i}; ${PORTSMODULESENV} ${MAKE} -B clean build .endfor .for __target in install reinstall clean ${__target}: ports-${__target} ports-${__target}: .for __i in ${PORTS_MODULES} @${ECHO} "===> Ports module ${__i} (${__target})" cd $${PORTSDIR:-/usr/ports}/${__i}; ${PORTSMODULESENV} ${MAKE} -B ${__target:C/(re)?install/deinstall reinstall/} .endfor .endfor .endif .ORDER: kernel-install modules-install beforebuild: .PHONY kernel-all: beforebuild .WAIT ${KERNEL_KO} ${KERNEL_EXTRA} kernel-cleandir: kernel-clean kernel-cleandepend kernel-clobber: find . -maxdepth 1 ! -type d ! -name version -delete kernel-obj: .if !defined(NO_MODULES) modules: modules-all modules-depend: beforebuild modules-all: beforebuild .if !defined(NO_MODULES_OBJ) modules-all modules-depend: modules-obj .endif .endif .if !defined(DEBUG) FULLKERNEL= ${KERNEL_KO} .else FULLKERNEL= ${KERNEL_KO}.full ${KERNEL_KO}: ${FULLKERNEL} ${KERNEL_KO}.debug ${OBJCOPY} --strip-debug --add-gnu-debuglink=${KERNEL_KO}.debug \ ${FULLKERNEL} ${.TARGET} ${KERNEL_KO}.debug: ${FULLKERNEL} ${OBJCOPY} --only-keep-debug ${FULLKERNEL} ${.TARGET} install.debug reinstall.debug: gdbinit cd ${.CURDIR}; ${MAKE} ${.TARGET:R} # Install gdbinit files for kernel debugging. gdbinit: grep -v '# XXX' ${S}/../tools/debugscripts/dot.gdbinit | \ sed "s:MODPATH:${.OBJDIR}/modules:" > .gdbinit cp ${S}/../tools/debugscripts/gdbinit.kernel ${.CURDIR} .if exists(${S}/../tools/debugscripts/gdbinit.${MACHINE_CPUARCH}) cp ${S}/../tools/debugscripts/gdbinit.${MACHINE_CPUARCH} \ ${.CURDIR}/gdbinit.machine .endif .endif ${FULLKERNEL}: ${SYSTEM_DEP} vers.o @rm -f ${.TARGET} @echo linking ${.TARGET} ${SYSTEM_LD} .if !empty(MD_ROOT_SIZE_CONFIGURED) && defined(MFS_IMAGE) @sh ${S}/tools/embed_mfs.sh ${.TARGET} ${MFS_IMAGE} .endif .if ${MK_CTF} != "no" @echo ${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ... @${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ${SYSTEM_OBJS} vers.o .endif .if !defined(DEBUG) ${OBJCOPY} --strip-debug ${.TARGET} .endif ${SYSTEM_LD_TAIL} OBJS_DEPEND_GUESS+= offset.inc assym.inc vnode_if.h ${BEFORE_DEPEND:M*.h} \ ${MFILES:T:S/.m$/.h/} .for mfile in ${MFILES} # XXX the low quality .m.o rules gnerated by config are normally used # instead of the .m.c rules here. ${mfile:T:S/.m$/.c/}: ${mfile} ${AWK} -f $S/tools/makeobjops.awk ${mfile} -c ${mfile:T:S/.m$/.h/}: ${mfile} ${AWK} -f $S/tools/makeobjops.awk ${mfile} -h .endfor kernel-clean: rm -f *.o *.so *.pico *.ko *.s eddep errs \ ${FULLKERNEL} ${KERNEL_KO} ${KERNEL_KO}.debug \ tags vers.c \ vnode_if.c vnode_if.h vnode_if_newproto.h vnode_if_typedef.h \ ${MFILES:T:S/.m$/.c/} ${MFILES:T:S/.m$/.h/} \ ${CLEAN} # This is a hack. BFD "optimizes" away dynamic mode if there are no # dynamic references. We could probably do a '-Bforcedynamic' mode like # in the a.out ld. For now, this works. -HACK_EXTRA_FLAGS?= -shared hack.pico: Makefile :> hack.c - ${CC} ${HACK_EXTRA_FLAGS} -nostdlib hack.c -o hack.pico + ${CC} -shared ${CFLAGS} -nostdlib hack.c -o hack.pico rm -f hack.c offset.inc: $S/kern/genoffset.sh genoffset.o NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genoffset.sh genoffset.o > ${.TARGET} genoffset.o: $S/kern/genoffset.c ${CC} -c ${CFLAGS:N-flto:N-fno-common} $S/kern/genoffset.c # genoffset_test.o is not actually used for anything - the point of compiling it # is to exercise the CTASSERT that checks that the offsets in the offset.inc # _lite struct(s) match those in the original(s). genoffset_test.o: $S/kern/genoffset.c offset.inc ${CC} -c ${CFLAGS:N-flto:N-fno-common} -DOFFSET_TEST \ $S/kern/genoffset.c -o ${.TARGET} assym.inc: $S/kern/genassym.sh genassym.o genoffset_test.o NM='${NM}' NMFLAGS='${NMFLAGS}' sh $S/kern/genassym.sh genassym.o > ${.TARGET} genassym.o: $S/$M/$M/genassym.c offset.inc ${CC} -c ${CFLAGS:N-flto:N-fno-common} $S/$M/$M/genassym.c OBJS_DEPEND_GUESS+= opt_global.h genoffset.o genassym.o vers.o: opt_global.h .if !empty(.MAKE.MODE:Unormal:Mmeta) && empty(.MAKE.MODE:Unormal:Mnofilemon) _meta_filemon= 1 .endif # Skip reading .depend when not needed to speed up tree-walks and simple # lookups. For install, only do this if no other targets are specified. # Also skip generating or including .depend.* files if in meta+filemon mode # since it will track dependencies itself. OBJS_DEPEND_GUESS is still used # for _meta_filemon but not for _SKIP_DEPEND. .if !defined(NO_SKIP_DEPEND) && \ ((!empty(.MAKEFLAGS:M-V) && empty(.MAKEFLAGS:M*DEP*)) || \ ${.TARGETS:M*obj} == ${.TARGETS} || \ ${.TARGETS:M*clean*} == ${.TARGETS} || \ ${.TARGETS:M*install*} == ${.TARGETS}) _SKIP_DEPEND= 1 .endif .if defined(_SKIP_DEPEND) || defined(_meta_filemon) .MAKE.DEPENDFILE= /dev/null .endif kernel-depend: .depend SRCS= assym.inc offset.inc vnode_if.h ${BEFORE_DEPEND} ${CFILES} \ ${SYSTEM_CFILES} ${GEN_CFILES} ${SFILES} \ ${MFILES:T:S/.m$/.h/} DEPENDOBJS+= ${SYSTEM_OBJS} genassym.o genoffset.o genoffset_test.o DEPENDOBJS+= ${CLEAN:M*.o} DEPENDFILES= ${DEPENDOBJS:O:u:C/^/.depend./} .if ${MAKE_VERSION} < 20160220 DEPEND_MP?= -MP .endif .if defined(_SKIP_DEPEND) # Don't bother reading any .meta files ${DEPENDOBJS}: .NOMETA .depend: .NOMETA # Unset these to avoid looping/statting on them later. .undef DEPENDOBJS .undef DEPENDFILES .endif # defined(_SKIP_DEPEND) DEPEND_CFLAGS+= -MD ${DEPEND_MP} -MF.depend.${.TARGET} DEPEND_CFLAGS+= -MT${.TARGET} .if !defined(_meta_filemon) .if !empty(DEPEND_CFLAGS) # Only add in DEPEND_CFLAGS for CFLAGS on files we expect from DEPENDOBJS # as those are the only ones we will include. DEPEND_CFLAGS_CONDITION= "${DEPENDOBJS:M${.TARGET}}" != "" CFLAGS+= ${${DEPEND_CFLAGS_CONDITION}:?${DEPEND_CFLAGS}:} .endif .for __depend_obj in ${DEPENDFILES} .if ${MAKE_VERSION} < 20160220 .sinclude "${.OBJDIR}/${__depend_obj}" .else .dinclude "${.OBJDIR}/${__depend_obj}" .endif .endfor .endif # !defined(_meta_filemon) # Always run 'make depend' to generate dependencies early and to avoid the # need for manually running it. For the kernel this is mostly a NOP since # all dependencies are correctly added or accounted for. This is mostly to # ensure downstream uses of kernel-depend are handled. beforebuild: kernel-depend # Guess some dependencies for when no ${DEPENDFILE}.OBJ is generated yet. # For meta+filemon the .meta file is checked for since it is the dependency # file used. .for __obj in ${DEPENDOBJS:O:u} .if defined(_meta_filemon) _depfile= ${.OBJDIR}/${__obj}.meta .else _depfile= ${.OBJDIR}/.depend.${__obj} .endif .if !exists(${_depfile}) .if ${SYSTEM_OBJS:M${__obj}} ${__obj}: ${OBJS_DEPEND_GUESS} .endif ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}} .elif defined(_meta_filemon) # For meta mode we still need to know which file to depend on to avoid # ambiguous suffix transformation rules from .PATH. Meta mode does not # use .depend files. We really only need source files, not headers since # they are typically in SRCS/beforebuild already. For target-specific # guesses do include headers though since they may not be in SRCS. .if ${SYSTEM_OBJS:M${__obj}} ${__obj}: ${OBJS_DEPEND_GUESS:N*.h} .endif ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}} .endif # !exists(${_depfile}) .endfor .NOPATH: .depend ${DEPENDFILES} .depend: .PRECIOUS ${SRCS} .if ${COMPILER_TYPE} == "clang" || \ (${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 60000) _MAP_DEBUG_PREFIX= yes .endif _ILINKS= machine .if ${MACHINE} != ${MACHINE_CPUARCH} && ${MACHINE} != "arm64" _ILINKS+= ${MACHINE_CPUARCH} .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" _ILINKS+= x86 .endif # Ensure that the link exists without depending on it when it exists. # Ensure that debug info references the path in the source tree. .for _link in ${_ILINKS} .if !exists(${.OBJDIR}/${_link}) ${SRCS} ${DEPENDOBJS}: ${_link} .endif .if defined(_MAP_DEBUG_PREFIX) .if ${_link} == "machine" CFLAGS+= -fdebug-prefix-map=./machine=${SYSDIR}/${MACHINE}/include .else CFLAGS+= -fdebug-prefix-map=./${_link}=${SYSDIR}/${_link}/include .endif .endif .endfor ${_ILINKS}: @case ${.TARGET} in \ machine) \ path=${S}/${MACHINE}/include ;; \ *) \ path=${S}/${.TARGET}/include ;; \ esac ; \ ${ECHO} ${.TARGET} "->" $$path ; \ ln -fns $$path ${.TARGET} # .depend needs include links so we remove them only together. kernel-cleandepend: .PHONY rm -f .depend .depend.* ${_ILINKS} kernel-tags: @ls .depend.* > /dev/null 2>&1 || \ { echo "you must make depend first"; exit 1; } sh $S/conf/systags.sh kernel-install: .PHONY @if [ ! -f ${KERNEL_KO} ] ; then \ echo "You must build a kernel first." ; \ exit 1 ; \ fi .if exists(${DESTDIR}${KODIR}) -thiskernel=`sysctl -n kern.bootfile` ; \ if [ ! "`dirname "$$thiskernel"`" -ef ${DESTDIR}${KODIR} ] ; then \ chflags -R noschg ${DESTDIR}${KODIR} ; \ rm -rf ${DESTDIR}${KODIR} ; \ rm -rf ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ; \ else \ if [ -d ${DESTDIR}${KODIR}.old ] ; then \ chflags -R noschg ${DESTDIR}${KODIR}.old ; \ rm -rf ${DESTDIR}${KODIR}.old ; \ fi ; \ mv ${DESTDIR}${KODIR} ${DESTDIR}${KODIR}.old ; \ if [ -n "${KERN_DEBUGDIR}" -a \ -d ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ]; then \ rm -rf ${DESTDIR}${KERN_DEBUGDIR}${KODIR}.old ; \ mv ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ${DESTDIR}${KERN_DEBUGDIR}${KODIR}.old ; \ fi ; \ sysctl kern.bootfile=${DESTDIR}${KODIR}.old/"`basename "$$thiskernel"`" ; \ fi .endif mkdir -p ${DESTDIR}${KODIR} ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO} ${DESTDIR}${KODIR}/ .if defined(DEBUG) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no" mkdir -p ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.debug ${DESTDIR}${KERN_DEBUGDIR}${KODIR}/ .endif .if defined(KERNEL_EXTRA_INSTALL) ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_EXTRA_INSTALL} ${DESTDIR}${KODIR}/ .endif kernel-reinstall: @-chflags -R noschg ${DESTDIR}${KODIR} ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO} ${DESTDIR}${KODIR}/ .if defined(DEBUG) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no" ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.debug ${DESTDIR}${KERN_DEBUGDIR}${KODIR}/ .endif config.o env.o hints.o vers.o vnode_if.o: ${NORMAL_C} ${NORMAL_CTFCONVERT} .if ${MK_REPRODUCIBLE_BUILD} != "no" REPRO_FLAG="-R" .endif vers.c: $S/conf/newvers.sh $S/sys/param.h ${SYSTEM_DEP} MAKE="${MAKE}" sh $S/conf/newvers.sh ${REPRO_FLAG} ${KERN_IDENT} vnode_if.c: $S/tools/vnode_if.awk $S/kern/vnode_if.src ${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -c vnode_if.h vnode_if_newproto.h vnode_if_typedef.h: $S/tools/vnode_if.awk \ $S/kern/vnode_if.src vnode_if.h: vnode_if_newproto.h vnode_if_typedef.h ${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -h vnode_if_newproto.h: ${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -p vnode_if_typedef.h: ${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -q .if ${MFS_IMAGE:Uno} != "no" .if empty(MD_ROOT_SIZE_CONFIGURED) # Generate an object file from the file system image to embed in the kernel # via linking. Make sure the contents are in the mfs section and rename the # start/end/size variables to __start_mfs, __stop_mfs, and mfs_size, # respectively. embedfs_${MFS_IMAGE:T:R}.o: ${MFS_IMAGE} ${OBJCOPY} --input-target binary \ --output-target ${EMBEDFS_FORMAT.${MACHINE_ARCH}} \ --binary-architecture ${EMBEDFS_ARCH.${MACHINE_ARCH}} \ ${MFS_IMAGE} ${.TARGET} ${OBJCOPY} \ --rename-section .data=mfs,contents,alloc,load,readonly,data \ --redefine-sym \ _binary_${MFS_IMAGE:C,[^[:alnum:]],_,g}_size=__mfs_root_size \ --redefine-sym \ _binary_${MFS_IMAGE:C,[^[:alnum:]],_,g}_start=mfs_root \ --redefine-sym \ _binary_${MFS_IMAGE:C,[^[:alnum:]],_,g}_end=mfs_root_end \ ${.TARGET} .endif .endif # XXX strictly, everything depends on Makefile because changes to ${PROF} # only appear there, but we don't handle that. .include "kern.mk" Index: projects/clang1000-import/sys/dev/mrsas/mrsas_cam.c =================================================================== --- projects/clang1000-import/sys/dev/mrsas/mrsas_cam.c (revision 357178) +++ projects/clang1000-import/sys/dev/mrsas/mrsas_cam.c (revision 357179) @@ -1,2154 +1,2155 @@ /* * Copyright (c) 2015, AVAGO Tech. All rights reserved. Author: Marian Choy * Copyright (c) 2014, LSI Corp. All rights reserved. Author: Marian Choy * Support: freebsdraid@avagotech.com * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. 2. Redistributions * in binary form must reproduce the above copyright notice, this list of * conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. 3. Neither the name of the * nor the names of its contributors may be used to endorse or * promote products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "dev/mrsas/mrsas.h" #include #include #include #include #include #include #include #include #include #include #include #include /* XXX for pcpu.h */ #include /* XXX for PCPU_GET */ #define smp_processor_id() PCPU_GET(cpuid) /* * Function prototypes */ int mrsas_cam_attach(struct mrsas_softc *sc); int mrsas_find_io_type(struct cam_sim *sim, union ccb *ccb); int mrsas_bus_scan(struct mrsas_softc *sc); int mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim); int mrsas_map_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb); int mrsas_build_ldio_rw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb); int mrsas_build_ldio_nonrw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb); int mrsas_build_syspdio(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb, struct cam_sim *sim, u_int8_t fp_possible); int mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb, u_int32_t device_id, MRSAS_RAID_SCSI_IO_REQUEST * io_request); void mrsas_xpt_freeze(struct mrsas_softc *sc); void mrsas_xpt_release(struct mrsas_softc *sc); void mrsas_cam_detach(struct mrsas_softc *sc); void mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd); void mrsas_unmap_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd); void mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd); void mrsas_fire_cmd(struct mrsas_softc *sc, u_int32_t req_desc_lo, u_int32_t req_desc_hi); void mrsas_set_pd_lba(MRSAS_RAID_SCSI_IO_REQUEST * io_request, u_int8_t cdb_len, struct IO_REQUEST_INFO *io_info, union ccb *ccb, MR_DRV_RAID_MAP_ALL * local_map_ptr, u_int32_t ref_tag, u_int32_t ld_block_size); static void mrsas_freeze_simq(struct mrsas_mpt_cmd *cmd, struct cam_sim *sim); static void mrsas_cam_poll(struct cam_sim *sim); static void mrsas_action(struct cam_sim *sim, union ccb *ccb); static void mrsas_scsiio_timeout(void *data); static int mrsas_track_scsiio(struct mrsas_softc *sc, target_id_t id, u_int32_t bus_id); static void mrsas_tm_response_code(struct mrsas_softc *sc, MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply); static int mrsas_issue_tm(struct mrsas_softc *sc, MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc); static void mrsas_data_load_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int32_t mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim, union ccb *ccb); static boolean_t mrsas_is_prp_possible(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nsegs); static void mrsas_build_ieee_sgl(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nseg); static void mrsas_build_prp_nvme(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nseg); struct mrsas_mpt_cmd *mrsas_get_mpt_cmd(struct mrsas_softc *sc); MRSAS_REQUEST_DESCRIPTOR_UNION * mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index); extern int mrsas_reset_targets(struct mrsas_softc *sc); extern u_int16_t MR_TargetIdToLdGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map); extern u_int32_t MR_LdBlockSizeGet(u_int32_t ldTgtId, MR_DRV_RAID_MAP_ALL * map); extern void mrsas_isr(void *arg); extern void mrsas_aen_handler(struct mrsas_softc *sc); extern u_int8_t MR_BuildRaidContext(struct mrsas_softc *sc, struct IO_REQUEST_INFO *io_info, RAID_CONTEXT * pRAID_Context, MR_DRV_RAID_MAP_ALL * map); extern u_int16_t MR_LdSpanArrayGet(u_int32_t ld, u_int32_t span, MR_DRV_RAID_MAP_ALL * map); extern u_int16_t mrsas_get_updated_dev_handle(struct mrsas_softc *sc, PLD_LOAD_BALANCE_INFO lbInfo, struct IO_REQUEST_INFO *io_info); extern int mrsas_complete_cmd(struct mrsas_softc *sc, u_int32_t MSIxIndex); extern MR_LD_RAID *MR_LdRaidGet(u_int32_t ld, MR_DRV_RAID_MAP_ALL * map); extern void mrsas_disable_intr(struct mrsas_softc *sc); extern void mrsas_enable_intr(struct mrsas_softc *sc); void mrsas_prepare_secondRaid1_IO(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd); /* * mrsas_cam_attach: Main entry to CAM subsystem * input: Adapter instance soft state * * This function is called from mrsas_attach() during initialization to perform * SIM allocations and XPT bus registration. If the kernel version is 7.4 or * earlier, it would also initiate a bus scan. */ int mrsas_cam_attach(struct mrsas_softc *sc) { struct cam_devq *devq; int mrsas_cam_depth; mrsas_cam_depth = sc->max_scsi_cmds; if ((devq = cam_simq_alloc(mrsas_cam_depth)) == NULL) { device_printf(sc->mrsas_dev, "Cannot allocate SIM queue\n"); return (ENOMEM); } /* * Create SIM for bus 0 and register, also create path */ sc->sim_0 = cam_sim_alloc(mrsas_action, mrsas_cam_poll, "mrsas", sc, device_get_unit(sc->mrsas_dev), &sc->sim_lock, mrsas_cam_depth, mrsas_cam_depth, devq); if (sc->sim_0 == NULL) { cam_simq_free(devq); device_printf(sc->mrsas_dev, "Cannot register SIM\n"); return (ENXIO); } /* Initialize taskqueue for Event Handling */ TASK_INIT(&sc->ev_task, 0, (void *)mrsas_aen_handler, sc); sc->ev_tq = taskqueue_create("mrsas_taskq", M_NOWAIT | M_ZERO, taskqueue_thread_enqueue, &sc->ev_tq); /* Run the task queue with lowest priority */ taskqueue_start_threads(&sc->ev_tq, 1, 255, "%s taskq", device_get_nameunit(sc->mrsas_dev)); mtx_lock(&sc->sim_lock); if (xpt_bus_register(sc->sim_0, sc->mrsas_dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->sim_0, TRUE); /* passing true frees the devq */ mtx_unlock(&sc->sim_lock); return (ENXIO); } if (xpt_create_path(&sc->path_0, NULL, cam_sim_path(sc->sim_0), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->sim_0)); cam_sim_free(sc->sim_0, TRUE); /* passing true will free the * devq */ mtx_unlock(&sc->sim_lock); return (ENXIO); } mtx_unlock(&sc->sim_lock); /* * Create SIM for bus 1 and register, also create path */ sc->sim_1 = cam_sim_alloc(mrsas_action, mrsas_cam_poll, "mrsas", sc, device_get_unit(sc->mrsas_dev), &sc->sim_lock, mrsas_cam_depth, mrsas_cam_depth, devq); if (sc->sim_1 == NULL) { cam_simq_free(devq); device_printf(sc->mrsas_dev, "Cannot register SIM\n"); return (ENXIO); } mtx_lock(&sc->sim_lock); if (xpt_bus_register(sc->sim_1, sc->mrsas_dev, 1) != CAM_SUCCESS) { cam_sim_free(sc->sim_1, TRUE); /* passing true frees the devq */ mtx_unlock(&sc->sim_lock); return (ENXIO); } if (xpt_create_path(&sc->path_1, NULL, cam_sim_path(sc->sim_1), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->sim_1)); cam_sim_free(sc->sim_1, TRUE); mtx_unlock(&sc->sim_lock); return (ENXIO); } mtx_unlock(&sc->sim_lock); #if (__FreeBSD_version <= 704000) if (mrsas_bus_scan(sc)) { device_printf(sc->mrsas_dev, "Error in bus scan.\n"); return (1); } #endif return (0); } /* * mrsas_cam_detach: De-allocates and teardown CAM * input: Adapter instance soft state * * De-registers and frees the paths and SIMs. */ void mrsas_cam_detach(struct mrsas_softc *sc) { if (sc->ev_tq != NULL) taskqueue_free(sc->ev_tq); mtx_lock(&sc->sim_lock); if (sc->path_0) xpt_free_path(sc->path_0); if (sc->sim_0) { xpt_bus_deregister(cam_sim_path(sc->sim_0)); cam_sim_free(sc->sim_0, FALSE); } if (sc->path_1) xpt_free_path(sc->path_1); if (sc->sim_1) { xpt_bus_deregister(cam_sim_path(sc->sim_1)); cam_sim_free(sc->sim_1, TRUE); } mtx_unlock(&sc->sim_lock); } /* * mrsas_action: SIM callback entry point * input: pointer to SIM pointer to CAM Control Block * * This function processes CAM subsystem requests. The type of request is stored * in ccb->ccb_h.func_code. The preprocessor #ifdef is necessary because * ccb->cpi.maxio is not supported for FreeBSD version 7.4 or earlier. */ static void mrsas_action(struct cam_sim *sim, union ccb *ccb) { struct mrsas_softc *sc = (struct mrsas_softc *)cam_sim_softc(sim); struct ccb_hdr *ccb_h = &(ccb->ccb_h); u_int32_t device_id; /* * Check if the system going down * or the adapter is in unrecoverable critical error */ if (sc->remove_in_progress || (sc->adprecovery == MRSAS_HW_CRITICAL_ERROR)) { ccb->ccb_h.status |= CAM_DEV_NOT_THERE; xpt_done(ccb); return; } switch (ccb->ccb_h.func_code) { case XPT_SCSI_IO: { device_id = ccb_h->target_id; /* * bus 0 is LD, bus 1 is for system-PD */ if (cam_sim_bus(sim) == 1 && sc->pd_list[device_id].driveState != MR_PD_STATE_SYSTEM) { ccb->ccb_h.status |= CAM_DEV_NOT_THERE; xpt_done(ccb); } else { if (mrsas_startio(sc, sim, ccb)) { ccb->ccb_h.status |= CAM_REQ_INVALID; xpt_done(ccb); } } break; } case XPT_ABORT: { ccb->ccb_h.status = CAM_UA_ABORT; xpt_done(ccb); break; } case XPT_RESET_BUS: { xpt_done(ccb); break; } case XPT_GET_TRAN_SETTINGS: { ccb->cts.protocol = PROTO_SCSI; ccb->cts.protocol_version = SCSI_REV_2; ccb->cts.transport = XPORT_SPI; ccb->cts.transport_version = 2; ccb->cts.xport_specific.spi.valid = CTS_SPI_VALID_DISC; ccb->cts.xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->cts.proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; ccb->cts.proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; xpt_done(ccb); break; } case XPT_CALC_GEOMETRY: { cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); break; } case XPT_PATH_INQ: { ccb->cpi.version_num = 1; ccb->cpi.hba_inquiry = 0; ccb->cpi.target_sprt = 0; #if (__FreeBSD_version >= 902001) ccb->cpi.hba_misc = PIM_UNMAPPED; #else ccb->cpi.hba_misc = 0; #endif ccb->cpi.hba_eng_cnt = 0; ccb->cpi.max_lun = MRSAS_SCSI_MAX_LUNS; ccb->cpi.unit_number = cam_sim_unit(sim); ccb->cpi.bus_id = cam_sim_bus(sim); ccb->cpi.initiator_id = MRSAS_SCSI_INITIATOR_ID; ccb->cpi.base_transfer_speed = 150000; strlcpy(ccb->cpi.sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(ccb->cpi.hba_vid, "AVAGO", HBA_IDLEN); strlcpy(ccb->cpi.dev_name, cam_sim_name(sim), DEV_IDLEN); ccb->cpi.transport = XPORT_SPI; ccb->cpi.transport_version = 2; ccb->cpi.protocol = PROTO_SCSI; ccb->cpi.protocol_version = SCSI_REV_2; if (ccb->cpi.bus_id == 0) ccb->cpi.max_target = MRSAS_MAX_PD - 1; else ccb->cpi.max_target = MRSAS_MAX_LD_IDS - 1; #if (__FreeBSD_version > 704000) ccb->cpi.maxio = sc->max_num_sge * MRSAS_PAGE_SIZE; #endif ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); break; } default: { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); break; } } } /* * mrsas_scsiio_timeout: Callback function for IO timed out * input: mpt command context * * This function will execute after timeout value provided by ccb header from * CAM layer, if timer expires. Driver will run timer for all DCDM and LDIO * coming from CAM layer. This function is callback function for IO timeout * and it runs in no-sleep context. Set do_timedout_reset in Adapter context * so that it will execute OCR/Kill adpter from ocr_thread context. */ static void mrsas_scsiio_timeout(void *data) { struct mrsas_mpt_cmd *cmd; struct mrsas_softc *sc; u_int32_t target_id; if (!data) return; cmd = (struct mrsas_mpt_cmd *)data; sc = cmd->sc; if (cmd->ccb_ptr == NULL) { printf("command timeout with NULL ccb\n"); return; } /* * Below callout is dummy entry so that it will be cancelled from * mrsas_cmd_done(). Now Controller will go to OCR/Kill Adapter based * on OCR enable/disable property of Controller from ocr_thread * context. */ #if (__FreeBSD_version >= 1000510) callout_reset_sbt(&cmd->cm_callout, SBT_1S * 180, 0, mrsas_scsiio_timeout, cmd, 0); #else callout_reset(&cmd->cm_callout, (180000 * hz) / 1000, mrsas_scsiio_timeout, cmd); #endif if (cmd->ccb_ptr->cpi.bus_id == 0) target_id = cmd->ccb_ptr->ccb_h.target_id; else target_id = (cmd->ccb_ptr->ccb_h.target_id + (MRSAS_MAX_PD - 1)); /* Save the cmd to be processed for TM, if it is not there in the array */ if (sc->target_reset_pool[target_id] == NULL) { sc->target_reset_pool[target_id] = cmd; mrsas_atomic_inc(&sc->target_reset_outstanding); } return; } /* * mrsas_startio: SCSI IO entry point * input: Adapter instance soft state * pointer to CAM Control Block * * This function is the SCSI IO entry point and it initiates IO processing. It * copies the IO and depending if the IO is read/write or inquiry, it would * call mrsas_build_ldio() or mrsas_build_dcdb(), respectively. It returns 0 * if the command is sent to firmware successfully, otherwise it returns 1. */ static int32_t mrsas_startio(struct mrsas_softc *sc, struct cam_sim *sim, union ccb *ccb) { struct mrsas_mpt_cmd *cmd, *r1_cmd = NULL; struct ccb_hdr *ccb_h = &(ccb->ccb_h); struct ccb_scsiio *csio = &(ccb->csio); MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc; u_int8_t cmd_type; if ((csio->cdb_io.cdb_bytes[0]) == SYNCHRONIZE_CACHE && (!sc->fw_sync_cache_support)) { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return (0); } ccb_h->status |= CAM_SIM_QUEUED; if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) { ccb_h->status |= CAM_REQUEUE_REQ; xpt_done(ccb); mrsas_atomic_dec(&sc->fw_outstanding); return (0); } cmd = mrsas_get_mpt_cmd(sc); if (!cmd) { ccb_h->status |= CAM_REQUEUE_REQ; xpt_done(ccb); mrsas_atomic_dec(&sc->fw_outstanding); return (0); } if ((ccb_h->flags & CAM_DIR_MASK) != CAM_DIR_NONE) { if (ccb_h->flags & CAM_DIR_IN) cmd->flags |= MRSAS_DIR_IN; if (ccb_h->flags & CAM_DIR_OUT) cmd->flags |= MRSAS_DIR_OUT; } else cmd->flags = MRSAS_DIR_NONE; /* no data */ /* For FreeBSD 9.2 and higher */ #if (__FreeBSD_version >= 902001) /* * XXX We don't yet support physical addresses here. */ switch ((ccb->ccb_h.flags & CAM_DATA_MASK)) { case CAM_DATA_PADDR: case CAM_DATA_SG_PADDR: device_printf(sc->mrsas_dev, "%s: physical addresses not supported\n", __func__); mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_INVALID; ccb_h->status &= ~CAM_SIM_QUEUED; goto done; case CAM_DATA_SG: device_printf(sc->mrsas_dev, "%s: scatter gather is not supported\n", __func__); mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_INVALID; goto done; case CAM_DATA_VADDR: if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) { mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_TOO_BIG; goto done; } cmd->length = csio->dxfer_len; if (cmd->length) cmd->data = csio->data_ptr; break; case CAM_DATA_BIO: if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) { mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_TOO_BIG; goto done; } cmd->length = csio->dxfer_len; if (cmd->length) cmd->data = csio->data_ptr; break; default: ccb->ccb_h.status = CAM_REQ_INVALID; goto done; } #else if (!(ccb_h->flags & CAM_DATA_PHYS)) { /* Virtual data address */ if (!(ccb_h->flags & CAM_SCATTER_VALID)) { if (csio->dxfer_len > (sc->max_num_sge * MRSAS_PAGE_SIZE)) { mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_TOO_BIG; goto done; } cmd->length = csio->dxfer_len; if (cmd->length) cmd->data = csio->data_ptr; } else { mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_INVALID; goto done; } } else { /* Data addresses are physical. */ mrsas_release_mpt_cmd(cmd); ccb_h->status = CAM_REQ_INVALID; ccb_h->status &= ~CAM_SIM_QUEUED; goto done; } #endif /* save ccb ptr */ cmd->ccb_ptr = ccb; req_desc = mrsas_get_request_desc(sc, (cmd->index) - 1); if (!req_desc) { device_printf(sc->mrsas_dev, "Cannot get request_descriptor.\n"); return (FAIL); } memset(req_desc, 0, sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION)); cmd->request_desc = req_desc; if (ccb_h->flags & CAM_CDB_POINTER) bcopy(csio->cdb_io.cdb_ptr, cmd->io_request->CDB.CDB32, csio->cdb_len); else bcopy(csio->cdb_io.cdb_bytes, cmd->io_request->CDB.CDB32, csio->cdb_len); mtx_lock(&sc->raidmap_lock); /* Check for IO type READ-WRITE targeted for Logical Volume */ cmd_type = mrsas_find_io_type(sim, ccb); switch (cmd_type) { case READ_WRITE_LDIO: /* Build READ-WRITE IO for Logical Volume */ if (mrsas_build_ldio_rw(sc, cmd, ccb)) { device_printf(sc->mrsas_dev, "Build RW LDIO failed.\n"); mtx_unlock(&sc->raidmap_lock); mrsas_release_mpt_cmd(cmd); return (1); } break; case NON_READ_WRITE_LDIO: /* Build NON READ-WRITE IO for Logical Volume */ if (mrsas_build_ldio_nonrw(sc, cmd, ccb)) { device_printf(sc->mrsas_dev, "Build NON-RW LDIO failed.\n"); mtx_unlock(&sc->raidmap_lock); mrsas_release_mpt_cmd(cmd); return (1); } break; case READ_WRITE_SYSPDIO: case NON_READ_WRITE_SYSPDIO: if (sc->secure_jbod_support && (cmd_type == NON_READ_WRITE_SYSPDIO)) { /* Build NON-RW IO for JBOD */ if (mrsas_build_syspdio(sc, cmd, ccb, sim, 0)) { device_printf(sc->mrsas_dev, "Build SYSPDIO failed.\n"); mtx_unlock(&sc->raidmap_lock); mrsas_release_mpt_cmd(cmd); return (1); } } else { /* Build RW IO for JBOD */ if (mrsas_build_syspdio(sc, cmd, ccb, sim, 1)) { device_printf(sc->mrsas_dev, "Build SYSPDIO failed.\n"); mtx_unlock(&sc->raidmap_lock); mrsas_release_mpt_cmd(cmd); return (1); } } } mtx_unlock(&sc->raidmap_lock); if (cmd->flags == MRSAS_DIR_IN) /* from device */ cmd->io_request->Control |= MPI2_SCSIIO_CONTROL_READ; else if (cmd->flags == MRSAS_DIR_OUT) /* to device */ cmd->io_request->Control |= MPI2_SCSIIO_CONTROL_WRITE; cmd->io_request->SGLFlags = MPI2_SGE_FLAGS_64_BIT_ADDRESSING; cmd->io_request->SGLOffset0 = offsetof(MRSAS_RAID_SCSI_IO_REQUEST, SGL) / 4; cmd->io_request->SenseBufferLowAddress = cmd->sense_phys_addr; cmd->io_request->SenseBufferLength = MRSAS_SCSI_SENSE_BUFFERSIZE; req_desc = cmd->request_desc; req_desc->SCSIIO.SMID = cmd->index; /* * Start timer for IO timeout. Default timeout value is 90 second. */ cmd->callout_owner = true; #if (__FreeBSD_version >= 1000510) callout_reset_sbt(&cmd->cm_callout, SBT_1S * 180, 0, mrsas_scsiio_timeout, cmd, 0); #else callout_reset(&cmd->cm_callout, (180000 * hz) / 1000, mrsas_scsiio_timeout, cmd); #endif if (mrsas_atomic_read(&sc->fw_outstanding) > sc->io_cmds_highwater) sc->io_cmds_highwater++; /* * if it is raid 1/10 fp write capable. * try to get second command from pool and construct it. * From FW, it has confirmed that lba values of two PDs corresponds to * single R1/10 LD are always same * */ /* * driver side count always should be less than max_fw_cmds to get * new command */ if (cmd->r1_alt_dev_handle != MR_DEVHANDLE_INVALID) { mrsas_prepare_secondRaid1_IO(sc, cmd); mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high); r1_cmd = cmd->peer_cmd; mrsas_fire_cmd(sc, r1_cmd->request_desc->addr.u.low, r1_cmd->request_desc->addr.u.high); } else { mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high); } return (0); done: xpt_done(ccb); mrsas_atomic_dec(&sc->fw_outstanding); return (0); } /* * mrsas_find_io_type: Determines if IO is read/write or inquiry * input: pointer to CAM Control Block * * This function determines if the IO is read/write or inquiry. It returns a 1 * if the IO is read/write and 0 if it is inquiry. */ int mrsas_find_io_type(struct cam_sim *sim, union ccb *ccb) { struct ccb_scsiio *csio = &(ccb->csio); switch (csio->cdb_io.cdb_bytes[0]) { case READ_10: case WRITE_10: case READ_12: case WRITE_12: case READ_6: case WRITE_6: case READ_16: case WRITE_16: return (cam_sim_bus(sim) ? READ_WRITE_SYSPDIO : READ_WRITE_LDIO); default: return (cam_sim_bus(sim) ? NON_READ_WRITE_SYSPDIO : NON_READ_WRITE_LDIO); } } /* * mrsas_get_mpt_cmd: Get a cmd from free command pool * input: Adapter instance soft state * * This function removes an MPT command from the command free list and * initializes it. */ struct mrsas_mpt_cmd * mrsas_get_mpt_cmd(struct mrsas_softc *sc) { struct mrsas_mpt_cmd *cmd = NULL; mtx_lock(&sc->mpt_cmd_pool_lock); if (!TAILQ_EMPTY(&sc->mrsas_mpt_cmd_list_head)) { cmd = TAILQ_FIRST(&sc->mrsas_mpt_cmd_list_head); TAILQ_REMOVE(&sc->mrsas_mpt_cmd_list_head, cmd, next); } else { goto out; } memset((uint8_t *)cmd->io_request, 0, MRSAS_MPI2_RAID_DEFAULT_IO_FRAME_SIZE); cmd->data = NULL; cmd->length = 0; cmd->flags = 0; cmd->error_code = 0; cmd->load_balance = 0; cmd->ccb_ptr = NULL; out: mtx_unlock(&sc->mpt_cmd_pool_lock); return cmd; } /* * mrsas_release_mpt_cmd: Return a cmd to free command pool * input: Command packet for return to free command pool * * This function returns an MPT command to the free command list. */ void mrsas_release_mpt_cmd(struct mrsas_mpt_cmd *cmd) { struct mrsas_softc *sc = cmd->sc; mtx_lock(&sc->mpt_cmd_pool_lock); cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID; cmd->sync_cmd_idx = (u_int32_t)MRSAS_ULONG_MAX; cmd->peer_cmd = NULL; cmd->cmd_completed = 0; memset((uint8_t *)cmd->io_request, 0, sizeof(MRSAS_RAID_SCSI_IO_REQUEST)); TAILQ_INSERT_HEAD(&(sc->mrsas_mpt_cmd_list_head), cmd, next); mtx_unlock(&sc->mpt_cmd_pool_lock); return; } /* * mrsas_get_request_desc: Get request descriptor from array * input: Adapter instance soft state * SMID index * * This function returns a pointer to the request descriptor. */ MRSAS_REQUEST_DESCRIPTOR_UNION * mrsas_get_request_desc(struct mrsas_softc *sc, u_int16_t index) { u_int8_t *p; KASSERT(index < sc->max_fw_cmds, ("req_desc is out of range")); p = sc->req_desc + sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION) * index; return (MRSAS_REQUEST_DESCRIPTOR_UNION *) p; } /* mrsas_prepare_secondRaid1_IO * It prepares the raid 1 second IO */ void mrsas_prepare_secondRaid1_IO(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd) { MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc, *req_desc2 = NULL; struct mrsas_mpt_cmd *r1_cmd; r1_cmd = cmd->peer_cmd; req_desc = cmd->request_desc; /* * copy the io request frame as well as 8 SGEs data for r1 * command */ memcpy(r1_cmd->io_request, cmd->io_request, (sizeof(MRSAS_RAID_SCSI_IO_REQUEST))); memcpy(&r1_cmd->io_request->SGL, &cmd->io_request->SGL, (sc->max_sge_in_main_msg * sizeof(MPI2_SGE_IO_UNION))); /* sense buffer is different for r1 command */ r1_cmd->io_request->SenseBufferLowAddress = r1_cmd->sense_phys_addr; r1_cmd->ccb_ptr = cmd->ccb_ptr; req_desc2 = mrsas_get_request_desc(sc, r1_cmd->index - 1); req_desc2->addr.Words = 0; r1_cmd->request_desc = req_desc2; req_desc2->SCSIIO.SMID = r1_cmd->index; req_desc2->SCSIIO.RequestFlags = req_desc->SCSIIO.RequestFlags; r1_cmd->request_desc->SCSIIO.DevHandle = cmd->r1_alt_dev_handle; r1_cmd->r1_alt_dev_handle = cmd->io_request->DevHandle; r1_cmd->io_request->DevHandle = cmd->r1_alt_dev_handle; cmd->io_request->RaidContext.raid_context_g35.smid.peerSMID = r1_cmd->index; r1_cmd->io_request->RaidContext.raid_context_g35.smid.peerSMID = cmd->index; /* * MSIxIndex of both commands request descriptors * should be same */ r1_cmd->request_desc->SCSIIO.MSIxIndex = cmd->request_desc->SCSIIO.MSIxIndex; /* span arm is different for r1 cmd */ r1_cmd->io_request->RaidContext.raid_context_g35.spanArm = cmd->io_request->RaidContext.raid_context_g35.spanArm + 1; } /* * mrsas_build_ldio_rw: Builds an LDIO command * input: Adapter instance soft state * Pointer to command packet * Pointer to CCB * * This function builds the LDIO command packet. It returns 0 if the command is * built successfully, otherwise it returns a 1. */ int mrsas_build_ldio_rw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb) { struct ccb_hdr *ccb_h = &(ccb->ccb_h); struct ccb_scsiio *csio = &(ccb->csio); u_int32_t device_id; MRSAS_RAID_SCSI_IO_REQUEST *io_request; device_id = ccb_h->target_id; io_request = cmd->io_request; io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id; io_request->RaidContext.raid_context.status = 0; io_request->RaidContext.raid_context.exStatus = 0; /* just the cdb len, other flags zero, and ORed-in later for FP */ io_request->IoFlags = csio->cdb_len; if (mrsas_setup_io(sc, cmd, ccb, device_id, io_request) != SUCCESS) device_printf(sc->mrsas_dev, "Build ldio or fpio error\n"); io_request->DataLength = cmd->length; if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) { if (cmd->sge_count > sc->max_num_sge) { device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds" "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge); return (FAIL); } if (sc->is_ventura || sc->is_aero) io_request->RaidContext.raid_context_g35.numSGE = cmd->sge_count; else { /* * numSGE store lower 8 bit of sge_count. numSGEExt store * higher 8 bit of sge_count */ io_request->RaidContext.raid_context.numSGE = cmd->sge_count; io_request->RaidContext.raid_context.numSGEExt = (uint8_t)(cmd->sge_count >> 8); } } else { device_printf(sc->mrsas_dev, "Data map/load failed.\n"); return (FAIL); } return (0); } /* stream detection on read and and write IOs */ static void mrsas_stream_detect(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, struct IO_REQUEST_INFO *io_info) { u_int32_t device_id = io_info->ldTgtId; LD_STREAM_DETECT *current_ld_SD = sc->streamDetectByLD[device_id]; u_int32_t *track_stream = ¤t_ld_SD->mruBitMap; u_int32_t streamNum, shiftedValues, unshiftedValues; u_int32_t indexValueMask, shiftedValuesMask; int i; boolean_t isReadAhead = false; STREAM_DETECT *current_SD; /* find possible stream */ for (i = 0; i < MAX_STREAMS_TRACKED; ++i) { streamNum = (*track_stream >> (i * BITS_PER_INDEX_STREAM)) & STREAM_MASK; current_SD = ¤t_ld_SD->streamTrack[streamNum]; /* * if we found a stream, update the raid context and * also update the mruBitMap */ if (current_SD->nextSeqLBA && io_info->ldStartBlock >= current_SD->nextSeqLBA && (io_info->ldStartBlock <= (current_SD->nextSeqLBA+32)) && (current_SD->isRead == io_info->isRead)) { if (io_info->ldStartBlock != current_SD->nextSeqLBA && (!io_info->isRead || !isReadAhead)) { /* * Once the API availible we need to change this. * At this point we are not allowing any gap */ continue; } cmd->io_request->RaidContext.raid_context_g35.streamDetected = TRUE; current_SD->nextSeqLBA = io_info->ldStartBlock + io_info->numBlocks; /* * update the mruBitMap LRU */ shiftedValuesMask = (1 << i * BITS_PER_INDEX_STREAM) - 1 ; shiftedValues = ((*track_stream & shiftedValuesMask) << BITS_PER_INDEX_STREAM); indexValueMask = STREAM_MASK << i * BITS_PER_INDEX_STREAM; unshiftedValues = (*track_stream) & (~(shiftedValuesMask | indexValueMask)); *track_stream = (unshiftedValues | shiftedValues | streamNum); return; } } /* * if we did not find any stream, create a new one from the least recently used */ streamNum = (*track_stream >> ((MAX_STREAMS_TRACKED - 1) * BITS_PER_INDEX_STREAM)) & STREAM_MASK; current_SD = ¤t_ld_SD->streamTrack[streamNum]; current_SD->isRead = io_info->isRead; current_SD->nextSeqLBA = io_info->ldStartBlock + io_info->numBlocks; *track_stream = (((*track_stream & ZERO_LAST_STREAM) << 4) | streamNum); return; } /* * mrsas_setup_io: Set up data including Fast Path I/O * input: Adapter instance soft state * Pointer to command packet * Pointer to CCB * * This function builds the DCDB inquiry command. It returns 0 if the command * is built successfully, otherwise it returns a 1. */ int mrsas_setup_io(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb, u_int32_t device_id, MRSAS_RAID_SCSI_IO_REQUEST * io_request) { struct ccb_hdr *ccb_h = &(ccb->ccb_h); struct ccb_scsiio *csio = &(ccb->csio); struct IO_REQUEST_INFO io_info; MR_DRV_RAID_MAP_ALL *map_ptr; struct mrsas_mpt_cmd *r1_cmd = NULL; MR_LD_RAID *raid; u_int8_t fp_possible; u_int32_t start_lba_hi, start_lba_lo, ld_block_size, ld; u_int32_t datalength = 0; io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id; start_lba_lo = 0; start_lba_hi = 0; fp_possible = 0; /* * READ_6 (0x08) or WRITE_6 (0x0A) cdb */ if (csio->cdb_len == 6) { datalength = (u_int32_t)csio->cdb_io.cdb_bytes[4]; start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[1] << 16) | ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 8) | (u_int32_t)csio->cdb_io.cdb_bytes[3]; start_lba_lo &= 0x1FFFFF; } /* * READ_10 (0x28) or WRITE_6 (0x2A) cdb */ else if (csio->cdb_len == 10) { datalength = (u_int32_t)csio->cdb_io.cdb_bytes[8] | ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 8); start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) | ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) | (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 | ((u_int32_t)csio->cdb_io.cdb_bytes[5]); } /* * READ_12 (0xA8) or WRITE_12 (0xAA) cdb */ else if (csio->cdb_len == 12) { datalength = (u_int32_t)csio->cdb_io.cdb_bytes[6] << 24 | ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 16) | ((u_int32_t)csio->cdb_io.cdb_bytes[8] << 8) | ((u_int32_t)csio->cdb_io.cdb_bytes[9]); start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) | ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) | (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 | ((u_int32_t)csio->cdb_io.cdb_bytes[5]); } /* * READ_16 (0x88) or WRITE_16 (0xx8A) cdb */ else if (csio->cdb_len == 16) { datalength = (u_int32_t)csio->cdb_io.cdb_bytes[10] << 24 | ((u_int32_t)csio->cdb_io.cdb_bytes[11] << 16) | ((u_int32_t)csio->cdb_io.cdb_bytes[12] << 8) | ((u_int32_t)csio->cdb_io.cdb_bytes[13]); start_lba_lo = ((u_int32_t)csio->cdb_io.cdb_bytes[6] << 24) | ((u_int32_t)csio->cdb_io.cdb_bytes[7] << 16) | (u_int32_t)csio->cdb_io.cdb_bytes[8] << 8 | ((u_int32_t)csio->cdb_io.cdb_bytes[9]); start_lba_hi = ((u_int32_t)csio->cdb_io.cdb_bytes[2] << 24) | ((u_int32_t)csio->cdb_io.cdb_bytes[3] << 16) | (u_int32_t)csio->cdb_io.cdb_bytes[4] << 8 | ((u_int32_t)csio->cdb_io.cdb_bytes[5]); } memset(&io_info, 0, sizeof(struct IO_REQUEST_INFO)); io_info.ldStartBlock = ((u_int64_t)start_lba_hi << 32) | start_lba_lo; io_info.numBlocks = datalength; io_info.ldTgtId = device_id; io_info.r1_alt_dev_handle = MR_DEVHANDLE_INVALID; io_request->DataLength = cmd->length; switch (ccb_h->flags & CAM_DIR_MASK) { case CAM_DIR_IN: io_info.isRead = 1; break; case CAM_DIR_OUT: io_info.isRead = 0; break; case CAM_DIR_NONE: default: mrsas_dprint(sc, MRSAS_TRACE, "From %s : DMA Flag is %d \n", __func__, ccb_h->flags & CAM_DIR_MASK); break; } map_ptr = sc->ld_drv_map[(sc->map_id & 1)]; ld_block_size = MR_LdBlockSizeGet(device_id, map_ptr); ld = MR_TargetIdToLdGet(device_id, map_ptr); if ((ld >= MAX_LOGICAL_DRIVES_EXT) || (!sc->fast_path_io)) { io_request->RaidContext.raid_context.regLockFlags = 0; fp_possible = 0; } else { if (MR_BuildRaidContext(sc, &io_info, &io_request->RaidContext.raid_context, map_ptr)) fp_possible = io_info.fpOkForIo; } raid = MR_LdRaidGet(ld, map_ptr); /* Store the TM capability value in cmd */ cmd->tmCapable = raid->capability.tmCapable; cmd->request_desc->SCSIIO.MSIxIndex = sc->msix_vectors ? smp_processor_id() % sc->msix_vectors : 0; if (sc->is_ventura || sc->is_aero) { if (sc->streamDetectByLD) { mtx_lock(&sc->stream_lock); mrsas_stream_detect(sc, cmd, &io_info); mtx_unlock(&sc->stream_lock); /* In ventura if stream detected for a read and * it is read ahead capable make this IO as LDIO */ if (io_request->RaidContext.raid_context_g35.streamDetected && io_info.isRead && io_info.raCapable) fp_possible = FALSE; } /* Set raid 1/10 fast path write capable bit in io_info. * Note - reset peer_cmd and r1_alt_dev_handle if fp_possible * disabled after this point. Try not to add more check for * fp_possible toggle after this. */ if (fp_possible && (io_info.r1_alt_dev_handle != MR_DEVHANDLE_INVALID) && (raid->level == 1) && !io_info.isRead) { r1_cmd = mrsas_get_mpt_cmd(sc); if (mrsas_atomic_inc_return(&sc->fw_outstanding) > sc->max_scsi_cmds) { fp_possible = FALSE; mrsas_atomic_dec(&sc->fw_outstanding); } else { r1_cmd = mrsas_get_mpt_cmd(sc); if (!r1_cmd) { fp_possible = FALSE; mrsas_atomic_dec(&sc->fw_outstanding); } else { cmd->peer_cmd = r1_cmd; r1_cmd->peer_cmd = cmd; } } } } if (fp_possible) { mrsas_set_pd_lba(io_request, csio->cdb_len, &io_info, ccb, map_ptr, start_lba_lo, ld_block_size); io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST; cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_FP_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); if (sc->mrsas_gen3_ctrl) { if (io_request->RaidContext.raid_context.regLockFlags == REGION_TYPE_UNUSED) cmd->request_desc->SCSIIO.RequestFlags = (MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); io_request->RaidContext.raid_context.Type = MPI2_TYPE_CUDA; io_request->RaidContext.raid_context.nseg = 0x1; io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH; io_request->RaidContext.raid_context.regLockFlags |= (MR_RL_FLAGS_GRANT_DESTINATION_CUDA | MR_RL_FLAGS_SEQ_NUM_ENABLE); } else if (sc->is_ventura || sc->is_aero) { io_request->RaidContext.raid_context_g35.Type = MPI2_TYPE_CUDA; io_request->RaidContext.raid_context_g35.nseg = 0x1; io_request->RaidContext.raid_context_g35.routingFlags.bits.sqn = 1; io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH; if (io_request->RaidContext.raid_context_g35.routingFlags.bits.sld) { io_request->RaidContext.raid_context_g35.RAIDFlags = (MR_RAID_FLAGS_IO_SUB_TYPE_CACHE_BYPASS << MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT); } } if ((sc->load_balance_info[device_id].loadBalanceFlag) && (io_info.isRead)) { io_info.devHandle = mrsas_get_updated_dev_handle(sc, &sc->load_balance_info[device_id], &io_info); cmd->load_balance = MRSAS_LOAD_BALANCE_FLAG; cmd->pd_r1_lb = io_info.pd_after_lb; if (sc->is_ventura || sc->is_aero) io_request->RaidContext.raid_context_g35.spanArm = io_info.span_arm; else io_request->RaidContext.raid_context.spanArm = io_info.span_arm; } else cmd->load_balance = 0; if (sc->is_ventura || sc->is_aero) cmd->r1_alt_dev_handle = io_info.r1_alt_dev_handle; else cmd->r1_alt_dev_handle = MR_DEVHANDLE_INVALID; cmd->request_desc->SCSIIO.DevHandle = io_info.devHandle; io_request->DevHandle = io_info.devHandle; cmd->pdInterface = io_info.pdInterface; } else { /* Not FP IO */ io_request->RaidContext.raid_context.timeoutValue = map_ptr->raidMap.fpPdIoTimeoutSec; cmd->request_desc->SCSIIO.RequestFlags = (MRSAS_REQ_DESCRIPT_FLAGS_LD_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); if (sc->mrsas_gen3_ctrl) { if (io_request->RaidContext.raid_context.regLockFlags == REGION_TYPE_UNUSED) cmd->request_desc->SCSIIO.RequestFlags = (MRSAS_REQ_DESCRIPT_FLAGS_NO_LOCK << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); io_request->RaidContext.raid_context.Type = MPI2_TYPE_CUDA; io_request->RaidContext.raid_context.regLockFlags |= (MR_RL_FLAGS_GRANT_DESTINATION_CPU0 | MR_RL_FLAGS_SEQ_NUM_ENABLE); io_request->RaidContext.raid_context.nseg = 0x1; } else if (sc->is_ventura || sc->is_aero) { io_request->RaidContext.raid_context_g35.Type = MPI2_TYPE_CUDA; io_request->RaidContext.raid_context_g35.routingFlags.bits.sqn = 1; io_request->RaidContext.raid_context_g35.nseg = 0x1; } io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST; io_request->DevHandle = device_id; } return (0); } /* * mrsas_build_ldio_nonrw: Builds an LDIO command * input: Adapter instance soft state * Pointer to command packet * Pointer to CCB * * This function builds the LDIO command packet. It returns 0 if the command is * built successfully, otherwise it returns a 1. */ int mrsas_build_ldio_nonrw(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb) { struct ccb_hdr *ccb_h = &(ccb->ccb_h); u_int32_t device_id, ld; MR_DRV_RAID_MAP_ALL *map_ptr; MR_LD_RAID *raid; RAID_CONTEXT *pRAID_Context; MRSAS_RAID_SCSI_IO_REQUEST *io_request; io_request = cmd->io_request; device_id = ccb_h->target_id; map_ptr = sc->ld_drv_map[(sc->map_id & 1)]; ld = MR_TargetIdToLdGet(device_id, map_ptr); raid = MR_LdRaidGet(ld, map_ptr); /* get RAID_Context pointer */ pRAID_Context = &io_request->RaidContext.raid_context; /* Store the TM capability value in cmd */ cmd->tmCapable = raid->capability.tmCapable; /* FW path for LD Non-RW (SCSI management commands) */ io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST; io_request->DevHandle = device_id; cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id; io_request->LUN[1] = ccb_h->target_lun & 0xF; io_request->DataLength = cmd->length; if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) { if (cmd->sge_count > sc->max_num_sge) { device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds" "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge); return (1); } if (sc->is_ventura || sc->is_aero) io_request->RaidContext.raid_context_g35.numSGE = cmd->sge_count; else { /* * numSGE store lower 8 bit of sge_count. numSGEExt store * higher 8 bit of sge_count */ io_request->RaidContext.raid_context.numSGE = cmd->sge_count; io_request->RaidContext.raid_context.numSGEExt = (uint8_t)(cmd->sge_count >> 8); } } else { device_printf(sc->mrsas_dev, "Data map/load failed.\n"); return (1); } return (0); } /* * mrsas_build_syspdio: Builds an DCDB command * input: Adapter instance soft state * Pointer to command packet * Pointer to CCB * * This function builds the DCDB inquiry command. It returns 0 if the command * is built successfully, otherwise it returns a 1. */ int mrsas_build_syspdio(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb, struct cam_sim *sim, u_int8_t fp_possible) { struct ccb_hdr *ccb_h = &(ccb->ccb_h); u_int32_t device_id; MR_DRV_RAID_MAP_ALL *local_map_ptr; MRSAS_RAID_SCSI_IO_REQUEST *io_request; RAID_CONTEXT *pRAID_Context; struct MR_PD_CFG_SEQ_NUM_SYNC *pd_sync; io_request = cmd->io_request; /* get RAID_Context pointer */ pRAID_Context = &io_request->RaidContext.raid_context; device_id = ccb_h->target_id; local_map_ptr = sc->ld_drv_map[(sc->map_id & 1)]; io_request->RaidContext.raid_context.RAIDFlags = MR_RAID_FLAGS_IO_SUB_TYPE_SYSTEM_PD << MR_RAID_CTX_RAID_FLAGS_IO_SUB_TYPE_SHIFT; io_request->RaidContext.raid_context.regLockFlags = 0; io_request->RaidContext.raid_context.regLockRowLBA = 0; io_request->RaidContext.raid_context.regLockLength = 0; cmd->pdInterface = sc->target_list[device_id].interface_type; /* If FW supports PD sequence number */ if (sc->use_seqnum_jbod_fp && sc->pd_list[device_id].driveType == 0x00) { //printf("Using Drv seq num\n"); pd_sync = (void *)sc->jbodmap_mem[(sc->pd_seq_map_id - 1) & 1]; cmd->tmCapable = pd_sync->seq[device_id].capability.tmCapable; /* More than 256 PD/JBOD support for Ventura */ if (sc->support_morethan256jbod) io_request->RaidContext.raid_context.VirtualDiskTgtId = pd_sync->seq[device_id].pdTargetId; else io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id + 255; io_request->RaidContext.raid_context.configSeqNum = pd_sync->seq[device_id].seqNum; io_request->DevHandle = pd_sync->seq[device_id].devHandle; if (sc->is_ventura || sc->is_aero) io_request->RaidContext.raid_context_g35.routingFlags.bits.sqn = 1; else io_request->RaidContext.raid_context.regLockFlags |= (MR_RL_FLAGS_SEQ_NUM_ENABLE | MR_RL_FLAGS_GRANT_DESTINATION_CUDA); /* raid_context.Type = MPI2_TYPE_CUDA is valid only, * if FW support Jbod Sequence number */ io_request->RaidContext.raid_context.Type = MPI2_TYPE_CUDA; io_request->RaidContext.raid_context.nseg = 0x1; } else if (sc->fast_path_io) { //printf("Using LD RAID map\n"); io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id; io_request->RaidContext.raid_context.configSeqNum = 0; local_map_ptr = sc->ld_drv_map[(sc->map_id & 1)]; io_request->DevHandle = local_map_ptr->raidMap.devHndlInfo[device_id].curDevHdl; } else { //printf("Using FW PATH\n"); /* Want to send all IO via FW path */ io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id; io_request->RaidContext.raid_context.configSeqNum = 0; io_request->DevHandle = MR_DEVHANDLE_INVALID; } cmd->request_desc->SCSIIO.DevHandle = io_request->DevHandle; cmd->request_desc->SCSIIO.MSIxIndex = sc->msix_vectors ? smp_processor_id() % sc->msix_vectors : 0; if (!fp_possible) { /* system pd firmware path */ io_request->Function = MRSAS_MPI2_FUNCTION_LD_IO_REQUEST; cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); io_request->RaidContext.raid_context.timeoutValue = local_map_ptr->raidMap.fpPdIoTimeoutSec; io_request->RaidContext.raid_context.VirtualDiskTgtId = device_id; } else { /* system pd fast path */ io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST; io_request->RaidContext.raid_context.timeoutValue = local_map_ptr->raidMap.fpPdIoTimeoutSec; /* * NOTE - For system pd RW cmds only IoFlags will be FAST_PATH * Because the NON RW cmds will now go via FW Queue * and not the Exception queue */ if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) io_request->IoFlags |= MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH; cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_FP_IO << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); } io_request->LUN[1] = ccb_h->target_lun & 0xF; io_request->DataLength = cmd->length; if (mrsas_map_request(sc, cmd, ccb) == SUCCESS) { if (cmd->sge_count > sc->max_num_sge) { device_printf(sc->mrsas_dev, "Error: sge_count (0x%x) exceeds" "max (0x%x) allowed\n", cmd->sge_count, sc->max_num_sge); return (1); } if (sc->is_ventura || sc->is_aero) io_request->RaidContext.raid_context_g35.numSGE = cmd->sge_count; else { /* * numSGE store lower 8 bit of sge_count. numSGEExt store * higher 8 bit of sge_count */ io_request->RaidContext.raid_context.numSGE = cmd->sge_count; io_request->RaidContext.raid_context.numSGEExt = (uint8_t)(cmd->sge_count >> 8); } } else { device_printf(sc->mrsas_dev, "Data map/load failed.\n"); return (1); } return (0); } /* * mrsas_is_prp_possible: This function will tell whether PRPs should be built or not * sc: Adapter instance soft state * cmd: MPT command frame pointer * nsesg: Number of OS SGEs * * This function will check whether IO is qualified to build PRPs * return: true: if PRP should be built * false: if IEEE SGLs should be built */ static boolean_t mrsas_is_prp_possible(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nsegs) { struct mrsas_softc *sc = cmd->sc; int i; u_int32_t data_length = 0; bool build_prp = false; u_int32_t mr_nvme_pg_size; mr_nvme_pg_size = max(sc->nvme_page_size, MR_DEFAULT_NVME_PAGE_SIZE); data_length = cmd->length; if (data_length > (mr_nvme_pg_size * 5)) build_prp = true; else if ((data_length > (mr_nvme_pg_size * 4)) && (data_length <= (mr_nvme_pg_size * 5))) { /* check if 1st SG entry size is < residual beyond 4 pages */ if ((segs[0].ds_len) < (data_length - (mr_nvme_pg_size * 4))) build_prp = true; } /*check for SGE holes here*/ for (i = 0; i < nsegs; i++) { /* check for mid SGEs */ if ((i != 0) && (i != (nsegs - 1))) { if ((segs[i].ds_addr % mr_nvme_pg_size) || (segs[i].ds_len % mr_nvme_pg_size)) { build_prp = false; mrsas_atomic_inc(&sc->sge_holes); break; } } /* check for first SGE*/ if ((nsegs > 1) && (i == 0)) { if ((segs[i].ds_addr + segs[i].ds_len) % mr_nvme_pg_size) { build_prp = false; mrsas_atomic_inc(&sc->sge_holes); break; } } /* check for Last SGE*/ if ((nsegs > 1) && (i == (nsegs - 1))) { if (segs[i].ds_addr % mr_nvme_pg_size) { build_prp = false; mrsas_atomic_inc(&sc->sge_holes); break; } } } return build_prp; } /* * mrsas_map_request: Map and load data * input: Adapter instance soft state * Pointer to command packet * * For data from OS, map and load the data buffer into bus space. The SG list * is built in the callback. If the bus dmamap load is not successful, * cmd->error_code will contain the error code and a 1 is returned. */ int mrsas_map_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd, union ccb *ccb) { u_int32_t retcode = 0; struct cam_sim *sim; sim = xpt_path_sim(cmd->ccb_ptr->ccb_h.path); if (cmd->data != NULL) { /* Map data buffer into bus space */ mtx_lock(&sc->io_lock); #if (__FreeBSD_version >= 902001) retcode = bus_dmamap_load_ccb(sc->data_tag, cmd->data_dmamap, ccb, mrsas_data_load_cb, cmd, 0); #else retcode = bus_dmamap_load(sc->data_tag, cmd->data_dmamap, cmd->data, cmd->length, mrsas_data_load_cb, cmd, BUS_DMA_NOWAIT); #endif mtx_unlock(&sc->io_lock); if (retcode) device_printf(sc->mrsas_dev, "bus_dmamap_load(): retcode = %d\n", retcode); if (retcode == EINPROGRESS) { device_printf(sc->mrsas_dev, "request load in progress\n"); mrsas_freeze_simq(cmd, sim); } } if (cmd->error_code) return (1); return (retcode); } /* * mrsas_unmap_request: Unmap and unload data * input: Adapter instance soft state * Pointer to command packet * * This function unmaps and unloads data from OS. */ void mrsas_unmap_request(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd) { if (cmd->data != NULL) { if (cmd->flags & MRSAS_DIR_IN) bus_dmamap_sync(sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_POSTREAD); if (cmd->flags & MRSAS_DIR_OUT) bus_dmamap_sync(sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_POSTWRITE); mtx_lock(&sc->io_lock); bus_dmamap_unload(sc->data_tag, cmd->data_dmamap); mtx_unlock(&sc->io_lock); } } /** * mrsas_build_ieee_sgl - Prepare IEEE SGLs * @sc: Adapter soft state * @segs: OS SGEs pointers * @nseg: Number of OS SGEs * @cmd: Fusion command frame * return: void */ static void mrsas_build_ieee_sgl(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nseg) { struct mrsas_softc *sc = cmd->sc; MRSAS_RAID_SCSI_IO_REQUEST *io_request; pMpi25IeeeSgeChain64_t sgl_ptr; int i = 0, sg_processed = 0; io_request = cmd->io_request; sgl_ptr = (pMpi25IeeeSgeChain64_t)&io_request->SGL; if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) { pMpi25IeeeSgeChain64_t sgl_ptr_end = sgl_ptr; sgl_ptr_end += sc->max_sge_in_main_msg - 1; sgl_ptr_end->Flags = 0; } if (nseg != 0) { for (i = 0; i < nseg; i++) { sgl_ptr->Address = segs[i].ds_addr; sgl_ptr->Length = segs[i].ds_len; sgl_ptr->Flags = 0; if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) { if (i == nseg - 1) sgl_ptr->Flags = IEEE_SGE_FLAGS_END_OF_LIST; } sgl_ptr++; sg_processed = i + 1; if ((sg_processed == (sc->max_sge_in_main_msg - 1)) && (nseg > sc->max_sge_in_main_msg)) { pMpi25IeeeSgeChain64_t sg_chain; if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) { if ((cmd->io_request->IoFlags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) != MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) cmd->io_request->ChainOffset = sc->chain_offset_io_request; else cmd->io_request->ChainOffset = 0; } else cmd->io_request->ChainOffset = sc->chain_offset_io_request; sg_chain = sgl_ptr; if (sc->mrsas_gen3_ctrl || sc->is_ventura || sc->is_aero) sg_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT; else sg_chain->Flags = (IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR); sg_chain->Length = (sizeof(MPI2_SGE_IO_UNION) * (nseg - sg_processed)); sg_chain->Address = cmd->chain_frame_phys_addr; sgl_ptr = (pMpi25IeeeSgeChain64_t)cmd->chain_frame; } } } } /** * mrsas_build_prp_nvme - Prepare PRPs(Physical Region Page)- SGLs specific to NVMe drives only * @sc: Adapter soft state * @segs: OS SGEs pointers * @nseg: Number of OS SGEs * @cmd: Fusion command frame * return: void */ static void mrsas_build_prp_nvme(struct mrsas_mpt_cmd *cmd, bus_dma_segment_t *segs, int nseg) { struct mrsas_softc *sc = cmd->sc; int sge_len, offset, num_prp_in_chain = 0; pMpi25IeeeSgeChain64_t main_chain_element, ptr_first_sgl, sgl_ptr; u_int64_t *ptr_sgl; bus_addr_t ptr_sgl_phys; u_int64_t sge_addr; u_int32_t page_mask, page_mask_result, i = 0; u_int32_t first_prp_len; int data_len = cmd->length; u_int32_t mr_nvme_pg_size = max(sc->nvme_page_size, MR_DEFAULT_NVME_PAGE_SIZE); sgl_ptr = (pMpi25IeeeSgeChain64_t) &cmd->io_request->SGL; /* * NVMe has a very convoluted PRP format. One PRP is required * for each page or partial page. We need to split up OS SG * entries if they are longer than one page or cross a page * boundary. We also have to insert a PRP list pointer entry as * the last entry in each physical page of the PRP list. * * NOTE: The first PRP "entry" is actually placed in the first * SGL entry in the main message in IEEE 64 format. The 2nd * entry in the main message is the chain element, and the rest * of the PRP entries are built in the contiguous PCIe buffer. */ page_mask = mr_nvme_pg_size - 1; ptr_sgl = (u_int64_t *) cmd->chain_frame; ptr_sgl_phys = cmd->chain_frame_phys_addr; memset(ptr_sgl, 0, sc->max_chain_frame_sz); /* Build chain frame element which holds all PRPs except first*/ main_chain_element = (pMpi25IeeeSgeChain64_t) ((u_int8_t *)sgl_ptr + sizeof(MPI25_IEEE_SGE_CHAIN64)); main_chain_element->Address = cmd->chain_frame_phys_addr; main_chain_element->NextChainOffset = 0; main_chain_element->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT | IEEE_SGE_FLAGS_SYSTEM_ADDR | MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP; /* Build first PRP, SGE need not to be PAGE aligned*/ ptr_first_sgl = sgl_ptr; sge_addr = segs[i].ds_addr; sge_len = segs[i].ds_len; i++; offset = (u_int32_t) (sge_addr & page_mask); first_prp_len = mr_nvme_pg_size - offset; ptr_first_sgl->Address = sge_addr; ptr_first_sgl->Length = first_prp_len; data_len -= first_prp_len; if (sge_len > first_prp_len) { sge_addr += first_prp_len; sge_len -= first_prp_len; } else if (sge_len == first_prp_len) { sge_addr = segs[i].ds_addr; sge_len = segs[i].ds_len; i++; } for (;;) { offset = (u_int32_t) (sge_addr & page_mask); /* Put PRP pointer due to page boundary*/ page_mask_result = (uintptr_t)(ptr_sgl + 1) & page_mask; if (!page_mask_result) { device_printf(sc->mrsas_dev, "BRCM: Put prp pointer as we are at page boundary" " ptr_sgl: 0x%p\n", ptr_sgl); ptr_sgl_phys++; *ptr_sgl = (uintptr_t)ptr_sgl_phys; ptr_sgl++; num_prp_in_chain++; } *ptr_sgl = sge_addr; ptr_sgl++; ptr_sgl_phys++; num_prp_in_chain++; sge_addr += mr_nvme_pg_size; sge_len -= mr_nvme_pg_size; data_len -= mr_nvme_pg_size; if (data_len <= 0) break; if (sge_len > 0) continue; sge_addr = segs[i].ds_addr; sge_len = segs[i].ds_len; i++; } main_chain_element->Length = num_prp_in_chain * sizeof(u_int64_t); mrsas_atomic_inc(&sc->prp_count); } /* * mrsas_data_load_cb: Callback entry point to build SGLs * input: Pointer to command packet as argument * Pointer to segment * Number of segments Error * * This is the callback function of the bus dma map load. It builds SG list */ static void mrsas_data_load_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct mrsas_mpt_cmd *cmd = (struct mrsas_mpt_cmd *)arg; struct mrsas_softc *sc = cmd->sc; boolean_t build_prp = false; if (error) { cmd->error_code = error; device_printf(sc->mrsas_dev, "mrsas_data_load_cb_prp: error=%d\n", error); if (error == EFBIG) { cmd->ccb_ptr->ccb_h.status = CAM_REQ_TOO_BIG; return; } } if (cmd->flags & MRSAS_DIR_IN) bus_dmamap_sync(cmd->sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_PREREAD); if (cmd->flags & MRSAS_DIR_OUT) bus_dmamap_sync(cmd->sc->data_tag, cmd->data_dmamap, BUS_DMASYNC_PREWRITE); if (nseg > sc->max_num_sge) { device_printf(sc->mrsas_dev, "SGE count is too large or 0.\n"); return; } /* Check for whether PRPs should be built or IEEE SGLs*/ if ((cmd->io_request->IoFlags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) && (cmd->pdInterface == NVME_PD)) build_prp = mrsas_is_prp_possible(cmd, segs, nseg); if (build_prp == true) mrsas_build_prp_nvme(cmd, segs, nseg); else mrsas_build_ieee_sgl(cmd, segs, nseg); cmd->sge_count = nseg; } /* * mrsas_freeze_simq: Freeze SIM queue * input: Pointer to command packet * Pointer to SIM * * This function freezes the sim queue. */ static void mrsas_freeze_simq(struct mrsas_mpt_cmd *cmd, struct cam_sim *sim) { union ccb *ccb = (union ccb *)(cmd->ccb_ptr); xpt_freeze_simq(sim, 1); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; ccb->ccb_h.status |= CAM_REQUEUE_REQ; } void mrsas_xpt_freeze(struct mrsas_softc *sc) { xpt_freeze_simq(sc->sim_0, 1); xpt_freeze_simq(sc->sim_1, 1); } void mrsas_xpt_release(struct mrsas_softc *sc) { xpt_release_simq(sc->sim_0, 1); xpt_release_simq(sc->sim_1, 1); } /* * mrsas_cmd_done: Perform remaining command completion * input: Adapter instance soft state Pointer to command packet * * This function calls ummap request and releases the MPT command. */ void mrsas_cmd_done(struct mrsas_softc *sc, struct mrsas_mpt_cmd *cmd) { mrsas_unmap_request(sc, cmd); mtx_lock(&sc->sim_lock); if (cmd->callout_owner) { callout_stop(&cmd->cm_callout); cmd->callout_owner = false; } xpt_done(cmd->ccb_ptr); cmd->ccb_ptr = NULL; mtx_unlock(&sc->sim_lock); mrsas_release_mpt_cmd(cmd); } /* * mrsas_cam_poll: Polling entry point * input: Pointer to SIM * * This is currently a stub function. */ static void mrsas_cam_poll(struct cam_sim *sim) { int i; struct mrsas_softc *sc = (struct mrsas_softc *)cam_sim_softc(sim); if (sc->msix_vectors != 0){ for (i=0; imsix_vectors; i++){ mrsas_complete_cmd(sc, i); } } else { mrsas_complete_cmd(sc, 0); } } /* * mrsas_bus_scan: Perform bus scan * input: Adapter instance soft state * * This mrsas_bus_scan function is needed for FreeBSD 7.x. Also, it should not * be called in FreeBSD 8.x and later versions, where the bus scan is * automatic. */ int mrsas_bus_scan(struct mrsas_softc *sc) { union ccb *ccb_0; union ccb *ccb_1; if ((ccb_0 = xpt_alloc_ccb()) == NULL) { return (ENOMEM); } if ((ccb_1 = xpt_alloc_ccb()) == NULL) { xpt_free_ccb(ccb_0); return (ENOMEM); } mtx_lock(&sc->sim_lock); if (xpt_create_path(&ccb_0->ccb_h.path, xpt_periph, cam_sim_path(sc->sim_0), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb_0); xpt_free_ccb(ccb_1); mtx_unlock(&sc->sim_lock); return (EIO); } if (xpt_create_path(&ccb_1->ccb_h.path, xpt_periph, cam_sim_path(sc->sim_1), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb_0); xpt_free_ccb(ccb_1); mtx_unlock(&sc->sim_lock); return (EIO); } mtx_unlock(&sc->sim_lock); xpt_rescan(ccb_0); xpt_rescan(ccb_1); return (0); } /* * mrsas_bus_scan_sim: Perform bus scan per SIM * input: adapter instance soft state * * This function will be called from Event handler on LD creation/deletion, * JBOD on/off. */ int mrsas_bus_scan_sim(struct mrsas_softc *sc, struct cam_sim *sim) { union ccb *ccb; if ((ccb = xpt_alloc_ccb()) == NULL) { return (ENOMEM); } mtx_lock(&sc->sim_lock); if (xpt_create_path(&ccb->ccb_h.path, xpt_periph, cam_sim_path(sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); mtx_unlock(&sc->sim_lock); return (EIO); } mtx_unlock(&sc->sim_lock); xpt_rescan(ccb); return (0); } /* * mrsas_track_scsiio: Track IOs for a given target in the mpt_cmd_list * input: Adapter instance soft state * Target ID of target * Bus ID of the target * * This function checks for any pending IO in the whole mpt_cmd_list pool * with the bus_id and target_id passed in arguments. If some IO is found * that means target reset is not successfully completed. * * Returns FAIL if IOs pending to the target device, else return SUCCESS */ static int mrsas_track_scsiio(struct mrsas_softc *sc, target_id_t tgt_id, u_int32_t bus_id) { int i; struct mrsas_mpt_cmd *mpt_cmd = NULL; for (i = 0 ; i < sc->max_fw_cmds; i++) { mpt_cmd = sc->mpt_cmd_list[i]; - /* - * Check if the target_id and bus_id is same as the timeout IO - */ - if (mpt_cmd->ccb_ptr) { - /* bus_id = 1 denotes a VD */ - if (bus_id == 1) - tgt_id = (mpt_cmd->ccb_ptr->ccb_h.target_id - (MRSAS_MAX_PD - 1)); + /* + * Check if the target_id and bus_id is same as the timeout IO + */ + if (mpt_cmd->ccb_ptr) { + /* bus_id = 1 denotes a VD */ + if (bus_id == 1) + tgt_id = + (mpt_cmd->ccb_ptr->ccb_h.target_id - (MRSAS_MAX_PD - 1)); if (mpt_cmd->ccb_ptr->cpi.bus_id == bus_id && mpt_cmd->ccb_ptr->ccb_h.target_id == tgt_id) { device_printf(sc->mrsas_dev, "IO commands pending to target id %d\n", tgt_id); return FAIL; } } } return SUCCESS; } #if TM_DEBUG /* * mrsas_tm_response_code: Prints TM response code received from FW * input: Adapter instance soft state * MPI reply returned from firmware * * Returns nothing. */ static void mrsas_tm_response_code(struct mrsas_softc *sc, MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply) { char *desc; switch (mpi_reply->ResponseCode) { case MPI2_SCSITASKMGMT_RSP_TM_COMPLETE: desc = "task management request completed"; break; case MPI2_SCSITASKMGMT_RSP_INVALID_FRAME: desc = "invalid frame"; break; case MPI2_SCSITASKMGMT_RSP_TM_NOT_SUPPORTED: desc = "task management request not supported"; break; case MPI2_SCSITASKMGMT_RSP_TM_FAILED: desc = "task management request failed"; break; case MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED: desc = "task management request succeeded"; break; case MPI2_SCSITASKMGMT_RSP_TM_INVALID_LUN: desc = "invalid lun"; break; case 0xA: desc = "overlapped tag attempted"; break; case MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC: desc = "task queued, however not sent to target"; break; default: desc = "unknown"; break; } device_printf(sc->mrsas_dev, "response_code(%01x): %s\n", mpi_reply->ResponseCode, desc); device_printf(sc->mrsas_dev, "TerminationCount/DevHandle/Function/TaskType/IOCStat/IOCLoginfo\n" "0x%x/0x%x/0x%x/0x%x/0x%x/0x%x\n", mpi_reply->TerminationCount, mpi_reply->DevHandle, mpi_reply->Function, mpi_reply->TaskType, mpi_reply->IOCStatus, mpi_reply->IOCLogInfo); } #endif /* * mrsas_issue_tm: Fires the TM command to FW and waits for completion * input: Adapter instance soft state * reqest descriptor compiled by mrsas_reset_targets * * Returns FAIL if TM command TIMEDOUT from FW else SUCCESS. */ static int mrsas_issue_tm(struct mrsas_softc *sc, MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc) { int sleep_stat; mrsas_fire_cmd(sc, req_desc->addr.u.low, req_desc->addr.u.high); sleep_stat = msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "tm_sleep", 50*hz); if (sleep_stat == EWOULDBLOCK) { device_printf(sc->mrsas_dev, "tm cmd TIMEDOUT\n"); return FAIL; } return SUCCESS; } /* * mrsas_reset_targets : Gathers info to fire a target reset command * input: Adapter instance soft state * * This function compiles data for a target reset command to be fired to the FW * and then traverse the target_reset_pool to see targets with TIMEDOUT IOs. * * Returns SUCCESS or FAIL */ int mrsas_reset_targets(struct mrsas_softc *sc) { struct mrsas_mpt_cmd *tm_mpt_cmd = NULL; struct mrsas_mpt_cmd *tgt_mpt_cmd = NULL; MR_TASK_MANAGE_REQUEST *mr_request; MPI2_SCSI_TASK_MANAGE_REQUEST *tm_mpi_request; MRSAS_REQUEST_DESCRIPTOR_UNION *req_desc; int retCode = FAIL, count, i, outstanding; u_int32_t MSIxIndex, bus_id; target_id_t tgt_id; #if TM_DEBUG MPI2_SCSI_TASK_MANAGE_REPLY *mpi_reply; #endif outstanding = mrsas_atomic_read(&sc->fw_outstanding); if (!outstanding) { device_printf(sc->mrsas_dev, "NO IOs pending...\n"); mrsas_atomic_set(&sc->target_reset_outstanding, 0); retCode = SUCCESS; goto return_status; } else if (sc->adprecovery != MRSAS_HBA_OPERATIONAL) { device_printf(sc->mrsas_dev, "Controller is not operational\n"); goto return_status; } else { /* Some more error checks will be added in future */ } /* Get an mpt frame and an index to fire the TM cmd */ tm_mpt_cmd = mrsas_get_mpt_cmd(sc); if (!tm_mpt_cmd) { retCode = FAIL; goto return_status; } req_desc = mrsas_get_request_desc(sc, (tm_mpt_cmd->index) - 1); if (!req_desc) { device_printf(sc->mrsas_dev, "Cannot get request_descriptor for tm.\n"); retCode = FAIL; goto release_mpt; } memset(req_desc, 0, sizeof(MRSAS_REQUEST_DESCRIPTOR_UNION)); req_desc->HighPriority.SMID = tm_mpt_cmd->index; req_desc->HighPriority.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY << MRSAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); req_desc->HighPriority.MSIxIndex = 0; req_desc->HighPriority.LMID = 0; req_desc->HighPriority.Reserved1 = 0; tm_mpt_cmd->request_desc = req_desc; mr_request = (MR_TASK_MANAGE_REQUEST *) tm_mpt_cmd->io_request; memset(mr_request, 0, sizeof(MR_TASK_MANAGE_REQUEST)); tm_mpi_request = (MPI2_SCSI_TASK_MANAGE_REQUEST *) &mr_request->TmRequest; tm_mpi_request->Function = MPI2_FUNCTION_SCSI_TASK_MGMT; tm_mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET; tm_mpi_request->TaskMID = 0; /* smid task */ tm_mpi_request->LUN[1] = 0; /* Traverse the tm_mpt pool to get valid entries */ for (i = 0 ; i < MRSAS_MAX_TM_TARGETS; i++) { if(!sc->target_reset_pool[i]) { continue; } else { tgt_mpt_cmd = sc->target_reset_pool[i]; } tgt_id = i; /* See if the target is tm capable or NOT */ if (!tgt_mpt_cmd->tmCapable) { device_printf(sc->mrsas_dev, "Task management NOT SUPPORTED for " "CAM target:%d\n", tgt_id); retCode = FAIL; goto release_mpt; } tm_mpi_request->DevHandle = tgt_mpt_cmd->io_request->DevHandle; if (i < (MRSAS_MAX_PD - 1)) { mr_request->uTmReqReply.tmReqFlags.isTMForPD = 1; bus_id = 0; } else { mr_request->uTmReqReply.tmReqFlags.isTMForLD = 1; bus_id = 1; } device_printf(sc->mrsas_dev, "TM will be fired for " "CAM target:%d and bus_id %d\n", tgt_id, bus_id); sc->ocr_chan = (void *)&tm_mpt_cmd; retCode = mrsas_issue_tm(sc, req_desc); if (retCode == FAIL) goto release_mpt; #if TM_DEBUG mpi_reply = (MPI2_SCSI_TASK_MANAGE_REPLY *) &mr_request->uTmReqReply.TMReply; mrsas_tm_response_code(sc, mpi_reply); #endif mrsas_atomic_dec(&sc->target_reset_outstanding); sc->target_reset_pool[i] = NULL; /* Check for pending cmds in the mpt_cmd_pool with the tgt_id */ mrsas_disable_intr(sc); /* Wait for 1 second to complete parallel ISR calling same * mrsas_complete_cmd() */ msleep(&sc->ocr_chan, &sc->sim_lock, PRIBIO, "mrsas_reset_wakeup", 1 * hz); count = sc->msix_vectors > 0 ? sc->msix_vectors : 1; mtx_unlock(&sc->sim_lock); for (MSIxIndex = 0; MSIxIndex < count; MSIxIndex++) mrsas_complete_cmd(sc, MSIxIndex); mtx_lock(&sc->sim_lock); retCode = mrsas_track_scsiio(sc, tgt_id, bus_id); mrsas_enable_intr(sc); if (retCode == FAIL) goto release_mpt; } device_printf(sc->mrsas_dev, "Number of targets outstanding " "after reset: %d\n", mrsas_atomic_read(&sc->target_reset_outstanding)); release_mpt: mrsas_release_mpt_cmd(tm_mpt_cmd); return_status: device_printf(sc->mrsas_dev, "target reset %s!!\n", (retCode == SUCCESS) ? "SUCCESS" : "FAIL"); return retCode; } Index: projects/clang1000-import/sys/dev/msk/if_msk.c =================================================================== --- projects/clang1000-import/sys/dev/msk/if_msk.c (revision 357178) +++ projects/clang1000-import/sys/dev/msk/if_msk.c (revision 357179) @@ -1,4607 +1,4610 @@ /****************************************************************************** * * Name : sky2.c * Project: Gigabit Ethernet Driver for FreeBSD 5.x/6.x * Version: $Revision: 1.23 $ * Date : $Date: 2005/12/22 09:04:11 $ * Purpose: Main driver source file * *****************************************************************************/ /****************************************************************************** * * LICENSE: * Copyright (C) Marvell International Ltd. and/or its affiliates * * The computer program files contained in this folder ("Files") * are provided to you under the BSD-type license terms provided * below, and any use of such Files and any derivative works * thereof created by you shall be governed by the following terms * and conditions: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * - Neither the name of Marvell nor the names of its contributors * may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * /LICENSE * *****************************************************************************/ /*- * SPDX-License-Identifier: BSD-4-Clause AND BSD-3-Clause * * Copyright (c) 1997, 1998, 1999, 2000 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2003 Nathan L. Binkert * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Device driver for the Marvell Yukon II Ethernet controller. * Due to lack of documentation, this driver is based on the code from * sk(4) and Marvell's myk(4) driver for FreeBSD 5.x. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(msk, pci, 1, 1, 1); MODULE_DEPEND(msk, ether, 1, 1, 1); MODULE_DEPEND(msk, miibus, 1, 1, 1); /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" /* Tunables. */ static int msi_disable = 0; TUNABLE_INT("hw.msk.msi_disable", &msi_disable); static int legacy_intr = 0; TUNABLE_INT("hw.msk.legacy_intr", &legacy_intr); static int jumbo_disable = 0; TUNABLE_INT("hw.msk.jumbo_disable", &jumbo_disable); #define MSK_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) /* * Devices supported by this driver. */ static const struct msk_product { uint16_t msk_vendorid; uint16_t msk_deviceid; const char *msk_name; } msk_products[] = { { VENDORID_SK, DEVICEID_SK_YUKON2, "SK-9Sxx Gigabit Ethernet" }, { VENDORID_SK, DEVICEID_SK_YUKON2_EXPR, "SK-9Exx Gigabit Ethernet"}, { VENDORID_MARVELL, DEVICEID_MRVL_8021CU, "Marvell Yukon 88E8021CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8021X, "Marvell Yukon 88E8021 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8022CU, "Marvell Yukon 88E8022CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8022X, "Marvell Yukon 88E8022 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8061CU, "Marvell Yukon 88E8061CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8061X, "Marvell Yukon 88E8061 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8062CU, "Marvell Yukon 88E8062CU Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8062X, "Marvell Yukon 88E8062 SX/LX Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8035, "Marvell Yukon 88E8035 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8036, "Marvell Yukon 88E8036 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8038, "Marvell Yukon 88E8038 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8039, "Marvell Yukon 88E8039 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8040, "Marvell Yukon 88E8040 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8040T, "Marvell Yukon 88E8040T Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8042, "Marvell Yukon 88E8042 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_8048, "Marvell Yukon 88E8048 Fast Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4361, "Marvell Yukon 88E8050 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4360, "Marvell Yukon 88E8052 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4362, "Marvell Yukon 88E8053 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4363, "Marvell Yukon 88E8055 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4364, "Marvell Yukon 88E8056 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4365, "Marvell Yukon 88E8070 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436A, "Marvell Yukon 88E8058 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436B, "Marvell Yukon 88E8071 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436C, "Marvell Yukon 88E8072 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_436D, "Marvell Yukon 88E8055 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4370, "Marvell Yukon 88E8075 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4380, "Marvell Yukon 88E8057 Gigabit Ethernet" }, { VENDORID_MARVELL, DEVICEID_MRVL_4381, "Marvell Yukon 88E8059 Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE550SX, "D-Link 550SX Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE560SX, "D-Link 560SX Gigabit Ethernet" }, { VENDORID_DLINK, DEVICEID_DLINK_DGE560T, "D-Link 560T Gigabit Ethernet" } }; static const char *model_name[] = { "Yukon XL", "Yukon EC Ultra", "Yukon EX", "Yukon EC", "Yukon FE", "Yukon FE+", "Yukon Supreme", "Yukon Ultra 2", "Yukon Unknown", "Yukon Optima", }; static int mskc_probe(device_t); static int mskc_attach(device_t); static int mskc_detach(device_t); static int mskc_shutdown(device_t); static int mskc_setup_rambuffer(struct msk_softc *); static int mskc_suspend(device_t); static int mskc_resume(device_t); static bus_dma_tag_t mskc_get_dma_tag(device_t, device_t); static void mskc_reset(struct msk_softc *); static int msk_probe(device_t); static int msk_attach(device_t); static int msk_detach(device_t); static void msk_tick(void *); static void msk_intr(void *); static void msk_intr_phy(struct msk_if_softc *); static void msk_intr_gmac(struct msk_if_softc *); static __inline void msk_rxput(struct msk_if_softc *); static int msk_handle_events(struct msk_softc *); static void msk_handle_hwerr(struct msk_if_softc *, uint32_t); static void msk_intr_hwerr(struct msk_softc *); #ifndef __NO_STRICT_ALIGNMENT static __inline void msk_fixup_rx(struct mbuf *); #endif static __inline void msk_rxcsum(struct msk_if_softc *, uint32_t, struct mbuf *); static void msk_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int); static void msk_jumbo_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int); static void msk_txeof(struct msk_if_softc *, int); static int msk_encap(struct msk_if_softc *, struct mbuf **); static void msk_start(struct ifnet *); static void msk_start_locked(struct ifnet *); static int msk_ioctl(struct ifnet *, u_long, caddr_t); static void msk_set_prefetch(struct msk_softc *, int, bus_addr_t, uint32_t); static void msk_set_rambuffer(struct msk_if_softc *); static void msk_set_tx_stfwd(struct msk_if_softc *); static void msk_init(void *); static void msk_init_locked(struct msk_if_softc *); static void msk_stop(struct msk_if_softc *); static void msk_watchdog(struct msk_if_softc *); static int msk_mediachange(struct ifnet *); static void msk_mediastatus(struct ifnet *, struct ifmediareq *); static void msk_phy_power(struct msk_softc *, int); static void msk_dmamap_cb(void *, bus_dma_segment_t *, int, int); static int msk_status_dma_alloc(struct msk_softc *); static void msk_status_dma_free(struct msk_softc *); static int msk_txrx_dma_alloc(struct msk_if_softc *); static int msk_rx_dma_jalloc(struct msk_if_softc *); static void msk_txrx_dma_free(struct msk_if_softc *); static void msk_rx_dma_jfree(struct msk_if_softc *); static int msk_rx_fill(struct msk_if_softc *, int); static int msk_init_rx_ring(struct msk_if_softc *); static int msk_init_jumbo_rx_ring(struct msk_if_softc *); static void msk_init_tx_ring(struct msk_if_softc *); static __inline void msk_discard_rxbuf(struct msk_if_softc *, int); static __inline void msk_discard_jumbo_rxbuf(struct msk_if_softc *, int); static int msk_newbuf(struct msk_if_softc *, int); static int msk_jumbo_newbuf(struct msk_if_softc *, int); static int msk_phy_readreg(struct msk_if_softc *, int, int); static int msk_phy_writereg(struct msk_if_softc *, int, int, int); static int msk_miibus_readreg(device_t, int, int); static int msk_miibus_writereg(device_t, int, int, int); static void msk_miibus_statchg(device_t); static void msk_rxfilter(struct msk_if_softc *); static void msk_setvlan(struct msk_if_softc *, struct ifnet *); static void msk_stats_clear(struct msk_if_softc *); static void msk_stats_update(struct msk_if_softc *); static int msk_sysctl_stat32(SYSCTL_HANDLER_ARGS); static int msk_sysctl_stat64(SYSCTL_HANDLER_ARGS); static void msk_sysctl_node(struct msk_if_softc *); static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int); static int sysctl_hw_msk_proc_limit(SYSCTL_HANDLER_ARGS); static device_method_t mskc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, mskc_probe), DEVMETHOD(device_attach, mskc_attach), DEVMETHOD(device_detach, mskc_detach), DEVMETHOD(device_suspend, mskc_suspend), DEVMETHOD(device_resume, mskc_resume), DEVMETHOD(device_shutdown, mskc_shutdown), DEVMETHOD(bus_get_dma_tag, mskc_get_dma_tag), DEVMETHOD_END }; static driver_t mskc_driver = { "mskc", mskc_methods, sizeof(struct msk_softc) }; static devclass_t mskc_devclass; static device_method_t msk_methods[] = { /* Device interface */ DEVMETHOD(device_probe, msk_probe), DEVMETHOD(device_attach, msk_attach), DEVMETHOD(device_detach, msk_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* MII interface */ DEVMETHOD(miibus_readreg, msk_miibus_readreg), DEVMETHOD(miibus_writereg, msk_miibus_writereg), DEVMETHOD(miibus_statchg, msk_miibus_statchg), DEVMETHOD_END }; static driver_t msk_driver = { "msk", msk_methods, sizeof(struct msk_if_softc) }; static devclass_t msk_devclass; DRIVER_MODULE(mskc, pci, mskc_driver, mskc_devclass, NULL, NULL); DRIVER_MODULE(msk, mskc, msk_driver, msk_devclass, NULL, NULL); DRIVER_MODULE(miibus, msk, miibus_driver, miibus_devclass, NULL, NULL); static struct resource_spec msk_res_spec_io[] = { { SYS_RES_IOPORT, PCIR_BAR(1), RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec msk_res_spec_mem[] = { { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, { -1, 0, 0 } }; static struct resource_spec msk_irq_spec_legacy[] = { { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0, 0 } }; static struct resource_spec msk_irq_spec_msi[] = { { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0, 0 } }; static int msk_miibus_readreg(device_t dev, int phy, int reg) { struct msk_if_softc *sc_if; sc_if = device_get_softc(dev); return (msk_phy_readreg(sc_if, phy, reg)); } static int msk_phy_readreg(struct msk_if_softc *sc_if, int phy, int reg) { struct msk_softc *sc; int i, val; sc = sc_if->msk_softc; GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_CTRL, GM_SMI_CT_PHY_AD(phy) | GM_SMI_CT_REG_AD(reg) | GM_SMI_CT_OP_RD); for (i = 0; i < MSK_TIMEOUT; i++) { DELAY(1); val = GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_CTRL); if ((val & GM_SMI_CT_RD_VAL) != 0) { val = GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_DATA); break; } } if (i == MSK_TIMEOUT) { if_printf(sc_if->msk_ifp, "phy failed to come ready\n"); val = 0; } return (val); } static int msk_miibus_writereg(device_t dev, int phy, int reg, int val) { struct msk_if_softc *sc_if; sc_if = device_get_softc(dev); return (msk_phy_writereg(sc_if, phy, reg, val)); } static int msk_phy_writereg(struct msk_if_softc *sc_if, int phy, int reg, int val) { struct msk_softc *sc; int i; sc = sc_if->msk_softc; GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_DATA, val); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SMI_CTRL, GM_SMI_CT_PHY_AD(phy) | GM_SMI_CT_REG_AD(reg)); for (i = 0; i < MSK_TIMEOUT; i++) { DELAY(1); if ((GMAC_READ_2(sc, sc_if->msk_port, GM_SMI_CTRL) & GM_SMI_CT_BUSY) == 0) break; } if (i == MSK_TIMEOUT) if_printf(sc_if->msk_ifp, "phy write timeout\n"); return (0); } static void msk_miibus_statchg(device_t dev) { struct msk_softc *sc; struct msk_if_softc *sc_if; struct mii_data *mii; struct ifnet *ifp; uint32_t gmac; sc_if = device_get_softc(dev); sc = sc_if->msk_softc; MSK_IF_LOCK_ASSERT(sc_if); mii = device_get_softc(sc_if->msk_miibus); ifp = sc_if->msk_ifp; if (mii == NULL || ifp == NULL || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc_if->msk_flags &= ~MSK_FLAG_LINK; if ((mii->mii_media_status & (IFM_AVALID | IFM_ACTIVE)) == (IFM_AVALID | IFM_ACTIVE)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: sc_if->msk_flags |= MSK_FLAG_LINK; break; case IFM_1000_T: case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: if ((sc_if->msk_flags & MSK_FLAG_FASTETHER) == 0) sc_if->msk_flags |= MSK_FLAG_LINK; break; default: break; } } if ((sc_if->msk_flags & MSK_FLAG_LINK) != 0) { /* Enable Tx FIFO Underrun. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_MSK), GM_IS_TX_FF_UR | GM_IS_RX_FF_OR); /* * Because mii(4) notify msk(4) that it detected link status * change, there is no need to enable automatic * speed/flow-control/duplex updates. */ gmac = GM_GPCR_AU_ALL_DIS; switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_1000_SX: case IFM_1000_T: gmac |= GM_GPCR_SPEED_1000; break; case IFM_100_TX: gmac |= GM_GPCR_SPEED_100; break; case IFM_10_T: break; } if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) == 0) gmac |= GM_GPCR_FC_RX_DIS; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) == 0) gmac |= GM_GPCR_FC_TX_DIS; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) gmac |= GM_GPCR_DUP_FULL; else gmac |= GM_GPCR_FC_RX_DIS | GM_GPCR_FC_TX_DIS; gmac |= GM_GPCR_RX_ENA | GM_GPCR_TX_ENA; GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac); /* Read again to ensure writing. */ GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); gmac = GMC_PAUSE_OFF; if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) { if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) gmac = GMC_PAUSE_ON; } CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), gmac); /* Enable PHY interrupt for FIFO underrun/overflow. */ msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, PHY_M_IS_FIFO_ERROR); } else { /* * Link state changed to down. * Disable PHY interrupts. */ msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0); /* Disable Rx/Tx MAC. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); if ((gmac & (GM_GPCR_RX_ENA | GM_GPCR_TX_ENA)) != 0) { gmac &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, gmac); /* Read again to ensure writing. */ GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); } } } static u_int msk_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { uint32_t *mchash = arg; uint32_t crc; crc = ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN); /* Just want the 6 least significant bits. */ crc &= 0x3f; /* Set the corresponding bit in the hash table. */ mchash[crc >> 5] |= 1 << (crc & 0x1f); return (1); } static void msk_rxfilter(struct msk_if_softc *sc_if) { struct msk_softc *sc; struct ifnet *ifp; uint32_t mchash[2]; uint16_t mode; sc = sc_if->msk_softc; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; bzero(mchash, sizeof(mchash)); mode = GMAC_READ_2(sc, sc_if->msk_port, GM_RX_CTRL); if ((ifp->if_flags & IFF_PROMISC) != 0) mode &= ~(GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA); else if ((ifp->if_flags & IFF_ALLMULTI) != 0) { mode |= GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA; mchash[0] = 0xffff; mchash[1] = 0xffff; } else { mode |= GM_RXCR_UCF_ENA; if_foreach_llmaddr(ifp, msk_hash_maddr, mchash); if (mchash[0] != 0 || mchash[1] != 0) mode |= GM_RXCR_MCF_ENA; } GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H1, mchash[0] & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H2, (mchash[0] >> 16) & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H3, mchash[1] & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_MC_ADDR_H4, (mchash[1] >> 16) & 0xffff); GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_CTRL, mode); } static void msk_setvlan(struct msk_if_softc *sc_if, struct ifnet *ifp) { struct msk_softc *sc; sc = sc_if->msk_softc; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_VLAN_TAG_ON); } else { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), RX_VLAN_STRIP_OFF); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_VLAN_TAG_OFF); } } static int msk_rx_fill(struct msk_if_softc *sc_if, int jumbo) { uint16_t idx; int i; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { /* Wait until controller executes OP_TCPSTART command. */ for (i = 100; i > 0; i--) { DELAY(100); idx = CSR_READ_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_GET_IDX_REG)); if (idx != 0) break; } if (i == 0) { device_printf(sc_if->msk_if_dev, "prefetch unit stuck?\n"); return (ETIMEDOUT); } /* * Fill consumed LE with free buffer. This can be done * in Rx handler but we don't want to add special code * in fast handler. */ if (jumbo > 0) { if (msk_jumbo_newbuf(sc_if, 0) != 0) return (ENOBUFS); bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } else { if (msk_newbuf(sc_if, 0) != 0) return (ENOBUFS); bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } sc_if->msk_cdata.msk_rx_prod = 0; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod); } return (0); } static int msk_init_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_rxdesc *rxd; int i, nbuf, prod; MSK_IF_LOCK_ASSERT(sc_if); sc_if->msk_cdata.msk_rx_cons = 0; sc_if->msk_cdata.msk_rx_prod = 0; sc_if->msk_cdata.msk_rx_putwm = MSK_PUT_WM; rd = &sc_if->msk_rdata; bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT); for (i = prod = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; MSK_INC(prod, MSK_RX_RING_CNT); } nbuf = MSK_RX_BUF_CNT; prod = 0; /* Have controller know how to compute Rx checksum. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); nbuf--; } for (i = 0; i < nbuf; i++) { if (msk_newbuf(sc_if, prod) != 0) return (ENOBUFS); MSK_RX_INC(prod, MSK_RX_RING_CNT); } bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Update prefetch unit. */ sc_if->msk_cdata.msk_rx_prod = prod; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), (sc_if->msk_cdata.msk_rx_prod + MSK_RX_RING_CNT - 1) % MSK_RX_RING_CNT); if (msk_rx_fill(sc_if, 0) != 0) return (ENOBUFS); return (0); } static int msk_init_jumbo_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_rxdesc *rxd; int i, nbuf, prod; MSK_IF_LOCK_ASSERT(sc_if); sc_if->msk_cdata.msk_rx_cons = 0; sc_if->msk_cdata.msk_rx_prod = 0; sc_if->msk_cdata.msk_rx_putwm = MSK_PUT_WM; rd = &sc_if->msk_rdata; bzero(rd->msk_jumbo_rx_ring, sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT); for (i = prod = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); } nbuf = MSK_RX_BUF_CNT; prod = 0; /* Have controller know how to compute Rx checksum. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | ETHER_HDR_LEN); rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); nbuf--; } for (i = 0; i < nbuf; i++) { if (msk_jumbo_newbuf(sc_if, prod) != 0) return (ENOBUFS); MSK_RX_INC(prod, MSK_JUMBO_RX_RING_CNT); } bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Update prefetch unit. */ sc_if->msk_cdata.msk_rx_prod = prod; CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), (sc_if->msk_cdata.msk_rx_prod + MSK_JUMBO_RX_RING_CNT - 1) % MSK_JUMBO_RX_RING_CNT); if (msk_rx_fill(sc_if, 1) != 0) return (ENOBUFS); return (0); } static void msk_init_tx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; struct msk_txdesc *txd; int i; sc_if->msk_cdata.msk_tso_mtu = 0; sc_if->msk_cdata.msk_last_csum = 0; sc_if->msk_cdata.msk_tx_prod = 0; sc_if->msk_cdata.msk_tx_cons = 0; sc_if->msk_cdata.msk_tx_cnt = 0; sc_if->msk_cdata.msk_tx_high_addr = 0; rd = &sc_if->msk_rdata; bzero(rd->msk_tx_ring, sizeof(struct msk_tx_desc) * MSK_TX_RING_CNT); for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; txd->tx_m = NULL; txd->tx_le = &rd->msk_tx_ring[i]; } bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static __inline void msk_discard_rxbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; rx_le = rxd->rx_le; rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; m = rxd->rx_m; rx_le = rxd->rx_le; rx_le->msk_control = htole32(m->m_len | OP_PACKET | HW_OWNER); } static __inline void msk_discard_jumbo_rxbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; rx_le = rxd->rx_le; rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_JUMBO_RX_RING_CNT); #endif rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; m = rxd->rx_m; rx_le = rxd->rx_le; rx_le->msk_control = htole32(m->m_len | OP_PACKET | HW_OWNER); } static int msk_newbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) m_adj(m, ETHER_ALIGN); #ifndef __NO_STRICT_ALIGNMENT else m_adj(m, MSK_RX_BUF_ALIGN); #endif if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_rx_tag, sc_if->msk_cdata.msk_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; #ifdef MSK_64BIT_DMA rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr)); rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_RX_RING_CNT); rxd = &sc_if->msk_cdata.msk_rxdesc[idx]; #endif if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); rxd->rx_m = NULL; } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->msk_cdata.msk_rx_sparemap; sc_if->msk_cdata.msk_rx_sparemap = map; bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_LO(segs[0].ds_addr)); rx_le->msk_control = htole32(segs[0].ds_len | OP_PACKET | HW_OWNER); return (0); } static int msk_jumbo_newbuf(struct msk_if_softc *sc_if, int idx) { struct msk_rx_desc *rx_le; struct msk_rxdesc *rxd; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; int nsegs; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) m_adj(m, ETHER_ALIGN); #ifndef __NO_STRICT_ALIGNMENT else m_adj(m, MSK_RX_BUF_ALIGN); #endif if (bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_jumbo_rx_tag, sc_if->msk_cdata.msk_jumbo_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT) != 0) { m_freem(m); return (ENOBUFS); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; #ifdef MSK_64BIT_DMA rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr)); rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); MSK_INC(idx, MSK_JUMBO_RX_RING_CNT); rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx]; #endif if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap); rxd->rx_m = NULL; } map = rxd->rx_dmamap; rxd->rx_dmamap = sc_if->msk_cdata.msk_jumbo_rx_sparemap; sc_if->msk_cdata.msk_jumbo_rx_sparemap = map; bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_PREREAD); rxd->rx_m = m; rx_le = rxd->rx_le; rx_le->msk_addr = htole32(MSK_ADDR_LO(segs[0].ds_addr)); rx_le->msk_control = htole32(segs[0].ds_len | OP_PACKET | HW_OWNER); return (0); } /* * Set media options. */ static int msk_mediachange(struct ifnet *ifp) { struct msk_if_softc *sc_if; struct mii_data *mii; int error; sc_if = ifp->if_softc; MSK_IF_LOCK(sc_if); mii = device_get_softc(sc_if->msk_miibus); error = mii_mediachg(mii); MSK_IF_UNLOCK(sc_if); return (error); } /* * Report current media status. */ static void msk_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct msk_if_softc *sc_if; struct mii_data *mii; sc_if = ifp->if_softc; MSK_IF_LOCK(sc_if); if ((ifp->if_flags & IFF_UP) == 0) { MSK_IF_UNLOCK(sc_if); return; } mii = device_get_softc(sc_if->msk_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; MSK_IF_UNLOCK(sc_if); } static int msk_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct msk_if_softc *sc_if; struct ifreq *ifr; struct mii_data *mii; int error, mask, reinit; sc_if = ifp->if_softc; ifr = (struct ifreq *)data; error = 0; switch(command) { case SIOCSIFMTU: MSK_IF_LOCK(sc_if); if (ifr->ifr_mtu > MSK_JUMBO_MTU || ifr->ifr_mtu < ETHERMIN) error = EINVAL; else if (ifp->if_mtu != ifr->ifr_mtu) { if (ifr->ifr_mtu > ETHERMTU) { if ((sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) { error = EINVAL; MSK_IF_UNLOCK(sc_if); break; } if ((sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) { ifp->if_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO); ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); VLAN_CAPABILITIES(ifp); } } ifp->if_mtu = ifr->ifr_mtu; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; msk_init_locked(sc_if); } } MSK_IF_UNLOCK(sc_if); break; case SIOCSIFFLAGS: MSK_IF_LOCK(sc_if); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc_if->msk_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) != 0) msk_rxfilter(sc_if); else if ((sc_if->msk_flags & MSK_FLAG_DETACH) == 0) msk_init_locked(sc_if); } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) msk_stop(sc_if); sc_if->msk_if_flags = ifp->if_flags; MSK_IF_UNLOCK(sc_if); break; case SIOCADDMULTI: case SIOCDELMULTI: MSK_IF_LOCK(sc_if); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) msk_rxfilter(sc_if); MSK_IF_UNLOCK(sc_if); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: mii = device_get_softc(sc_if->msk_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: reinit = 0; MSK_IF_LOCK(sc_if); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if ((mask & IFCAP_TXCSUM) != 0 && (IFCAP_TXCSUM & ifp->if_capabilities) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((IFCAP_TXCSUM & ifp->if_capenable) != 0) ifp->if_hwassist |= MSK_CSUM_FEATURES; else ifp->if_hwassist &= ~MSK_CSUM_FEATURES; } if ((mask & IFCAP_RXCSUM) != 0 && (IFCAP_RXCSUM & ifp->if_capabilities) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0) reinit = 1; } if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (IFCAP_VLAN_HWCSUM & ifp->if_capabilities) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; if ((mask & IFCAP_TSO4) != 0 && (IFCAP_TSO4 & ifp->if_capabilities) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((IFCAP_TSO4 & ifp->if_capenable) != 0) ifp->if_hwassist |= CSUM_TSO; else ifp->if_hwassist &= ~CSUM_TSO; } if ((mask & IFCAP_VLAN_HWTSO) != 0 && (IFCAP_VLAN_HWTSO & ifp->if_capabilities) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (IFCAP_VLAN_HWTAGGING & ifp->if_capabilities) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if ((IFCAP_VLAN_HWTAGGING & ifp->if_capenable) == 0) ifp->if_capenable &= ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); msk_setvlan(sc_if, ifp); } if (ifp->if_mtu > ETHERMTU && (sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) { ifp->if_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO); ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); } VLAN_CAPABILITIES(ifp); if (reinit > 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; msk_init_locked(sc_if); } MSK_IF_UNLOCK(sc_if); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int mskc_probe(device_t dev) { const struct msk_product *mp; uint16_t vendor, devid; int i; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); mp = msk_products; for (i = 0; i < nitems(msk_products); i++, mp++) { if (vendor == mp->msk_vendorid && devid == mp->msk_deviceid) { device_set_desc(dev, mp->msk_name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int mskc_setup_rambuffer(struct msk_softc *sc) { int next; int i; /* Get adapter SRAM size. */ sc->msk_ramsize = CSR_READ_1(sc, B2_E_0) * 4; if (bootverbose) device_printf(sc->msk_dev, "RAM buffer size : %dKB\n", sc->msk_ramsize); if (sc->msk_ramsize == 0) return (0); sc->msk_pflags |= MSK_FLAG_RAMBUF; /* * Give receiver 2/3 of memory and round down to the multiple * of 1024. Tx/Rx RAM buffer size of Yukon II should be multiple * of 1024. */ sc->msk_rxqsize = rounddown((sc->msk_ramsize * 1024 * 2) / 3, 1024); sc->msk_txqsize = (sc->msk_ramsize * 1024) - sc->msk_rxqsize; for (i = 0, next = 0; i < sc->msk_num_port; i++) { sc->msk_rxqstart[i] = next; sc->msk_rxqend[i] = next + sc->msk_rxqsize - 1; next = sc->msk_rxqend[i] + 1; sc->msk_txqstart[i] = next; sc->msk_txqend[i] = next + sc->msk_txqsize - 1; next = sc->msk_txqend[i] + 1; if (bootverbose) { device_printf(sc->msk_dev, "Port %d : Rx Queue %dKB(0x%08x:0x%08x)\n", i, sc->msk_rxqsize / 1024, sc->msk_rxqstart[i], sc->msk_rxqend[i]); device_printf(sc->msk_dev, "Port %d : Tx Queue %dKB(0x%08x:0x%08x)\n", i, sc->msk_txqsize / 1024, sc->msk_txqstart[i], sc->msk_txqend[i]); } } return (0); } static void msk_phy_power(struct msk_softc *sc, int mode) { uint32_t our, val; int i; switch (mode) { case MSK_PHY_POWERUP: /* Switch power to VCC (WA for VAUX problem). */ CSR_WRITE_1(sc, B0_POWER_CTRL, PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_OFF | PC_VCC_ON); /* Disable Core Clock Division, set Clock Select to 0. */ CSR_WRITE_4(sc, B2_Y2_CLK_CTRL, Y2_CLK_DIV_DIS); val = 0; if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { /* Enable bits are inverted. */ val = Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS | Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS | Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS; } /* * Enable PCI & Core Clock, enable clock gating for both Links. */ CSR_WRITE_1(sc, B2_Y2_CLK_GATE, val); our = CSR_PCI_READ_4(sc, PCI_OUR_REG_1); our &= ~(PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD); if (sc->msk_hw_id == CHIP_ID_YUKON_XL) { if (sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { /* Deassert Low Power for 1st PHY. */ our |= PCI_Y2_PHY1_COMA; if (sc->msk_num_port > 1) our |= PCI_Y2_PHY2_COMA; } } if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U || sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id >= CHIP_ID_YUKON_FE_P) { val = CSR_PCI_READ_4(sc, PCI_OUR_REG_4); val &= (PCI_FORCE_ASPM_REQUEST | PCI_ASPM_GPHY_LINK_DOWN | PCI_ASPM_INT_FIFO_EMPTY | PCI_ASPM_CLKRUN_REQUEST); /* Set all bits to 0 except bits 15..12. */ CSR_PCI_WRITE_4(sc, PCI_OUR_REG_4, val); val = CSR_PCI_READ_4(sc, PCI_OUR_REG_5); val &= PCI_CTL_TIM_VMAIN_AV_MSK; CSR_PCI_WRITE_4(sc, PCI_OUR_REG_5, val); CSR_PCI_WRITE_4(sc, PCI_CFG_REG_1, 0); CSR_WRITE_2(sc, B0_CTST, Y2_HW_WOL_ON); /* * Disable status race, workaround for * Yukon EC Ultra & Yukon EX. */ val = CSR_READ_4(sc, B2_GP_IO); val |= GLB_GPIO_STAT_RACE_DIS; CSR_WRITE_4(sc, B2_GP_IO, val); CSR_READ_4(sc, B2_GP_IO); } /* Release PHY from PowerDown/COMA mode. */ CSR_PCI_WRITE_4(sc, PCI_OUR_REG_1, our); for (i = 0; i < sc->msk_num_port; i++) { CSR_WRITE_2(sc, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_SET); CSR_WRITE_2(sc, MR_ADDR(i, GMAC_LINK_CTRL), GMLC_RST_CLR); } break; case MSK_PHY_POWERDOWN: val = CSR_PCI_READ_4(sc, PCI_OUR_REG_1); val |= PCI_Y2_PHY1_POWD | PCI_Y2_PHY2_POWD; if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { val &= ~PCI_Y2_PHY1_COMA; if (sc->msk_num_port > 1) val &= ~PCI_Y2_PHY2_COMA; } CSR_PCI_WRITE_4(sc, PCI_OUR_REG_1, val); val = Y2_PCI_CLK_LNK1_DIS | Y2_COR_CLK_LNK1_DIS | Y2_CLK_GAT_LNK1_DIS | Y2_PCI_CLK_LNK2_DIS | Y2_COR_CLK_LNK2_DIS | Y2_CLK_GAT_LNK2_DIS; if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev > CHIP_REV_YU_XL_A1) { /* Enable bits are inverted. */ val = 0; } /* * Disable PCI & Core Clock, disable clock gating for * both Links. */ CSR_WRITE_1(sc, B2_Y2_CLK_GATE, val); CSR_WRITE_1(sc, B0_POWER_CTRL, PC_VAUX_ENA | PC_VCC_ENA | PC_VAUX_ON | PC_VCC_OFF); break; default: break; } } static void mskc_reset(struct msk_softc *sc) { bus_addr_t addr; uint16_t status; uint32_t val; int i, initram; /* Disable ASF. */ if (sc->msk_hw_id >= CHIP_ID_YUKON_XL && sc->msk_hw_id <= CHIP_ID_YUKON_SUPR) { if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) { CSR_WRITE_4(sc, B28_Y2_CPU_WDOG, 0); status = CSR_READ_2(sc, B28_Y2_ASF_HCU_CCSR); /* Clear AHB bridge & microcontroller reset. */ status &= ~(Y2_ASF_HCU_CCSR_AHB_RST | Y2_ASF_HCU_CCSR_CPU_RST_MODE); /* Clear ASF microcontroller state. */ status &= ~Y2_ASF_HCU_CCSR_UC_STATE_MSK; status &= ~Y2_ASF_HCU_CCSR_CPU_CLK_DIVIDE_MSK; CSR_WRITE_2(sc, B28_Y2_ASF_HCU_CCSR, status); CSR_WRITE_4(sc, B28_Y2_CPU_WDOG, 0); } else CSR_WRITE_1(sc, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET); CSR_WRITE_2(sc, B0_CTST, Y2_ASF_DISABLE); /* * Since we disabled ASF, S/W reset is required for * Power Management. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); CSR_WRITE_2(sc, B0_CTST, CS_RST_CLR); } /* Clear all error bits in the PCI status register. */ status = pci_read_config(sc->msk_dev, PCIR_STATUS, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); pci_write_config(sc->msk_dev, PCIR_STATUS, status | PCIM_STATUS_PERR | PCIM_STATUS_SERR | PCIM_STATUS_RMABORT | PCIM_STATUS_RTABORT | PCIM_STATUS_MDPERR, 2); CSR_WRITE_2(sc, B0_CTST, CS_MRST_CLR); switch (sc->msk_bustype) { case MSK_PEX_BUS: /* Clear all PEX errors. */ CSR_PCI_WRITE_4(sc, PEX_UNC_ERR_STAT, 0xffffffff); val = CSR_PCI_READ_4(sc, PEX_UNC_ERR_STAT); if ((val & PEX_RX_OV) != 0) { sc->msk_intrmask &= ~Y2_IS_HW_ERR; sc->msk_intrhwemask &= ~Y2_IS_PCI_EXP; } break; case MSK_PCI_BUS: case MSK_PCIX_BUS: /* Set Cache Line Size to 2(8bytes) if configured to 0. */ val = pci_read_config(sc->msk_dev, PCIR_CACHELNSZ, 1); if (val == 0) pci_write_config(sc->msk_dev, PCIR_CACHELNSZ, 2, 1); if (sc->msk_bustype == MSK_PCIX_BUS) { /* Set Cache Line Size opt. */ val = pci_read_config(sc->msk_dev, PCI_OUR_REG_1, 4); val |= PCI_CLS_OPT; pci_write_config(sc->msk_dev, PCI_OUR_REG_1, val, 4); } break; } /* Set PHY power state. */ msk_phy_power(sc, MSK_PHY_POWERUP); /* Reset GPHY/GMAC Control */ for (i = 0; i < sc->msk_num_port; i++) { /* GPHY Control reset. */ CSR_WRITE_1(sc, MR_ADDR(i, GPHY_CTRL), GPC_RST_SET); CSR_WRITE_1(sc, MR_ADDR(i, GPHY_CTRL), GPC_RST_CLR); /* GMAC Control reset. */ CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_RST_SET); CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_RST_CLR); CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_F_LOOPB_OFF); if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) CSR_WRITE_4(sc, MR_ADDR(i, GMAC_CTRL), GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON | GMC_BYP_RETR_ON); } if (sc->msk_hw_id == CHIP_ID_YUKON_SUPR && sc->msk_hw_rev > CHIP_REV_YU_SU_B0) CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, PCI_CLK_MACSEC_DIS); if (sc->msk_hw_id == CHIP_ID_YUKON_OPT && sc->msk_hw_rev == 0) { /* Disable PCIe PHY powerdown(reg 0x80, bit7). */ CSR_WRITE_4(sc, Y2_PEX_PHY_DATA, (0x0080 << 16) | 0x0080); } CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); /* LED On. */ CSR_WRITE_2(sc, B0_CTST, Y2_LED_STAT_ON); /* Clear TWSI IRQ. */ CSR_WRITE_4(sc, B2_I2C_IRQ, I2C_CLR_IRQ); /* Turn off hardware timer. */ CSR_WRITE_1(sc, B2_TI_CTRL, TIM_STOP); CSR_WRITE_1(sc, B2_TI_CTRL, TIM_CLR_IRQ); /* Turn off descriptor polling. */ CSR_WRITE_1(sc, B28_DPT_CTRL, DPT_STOP); /* Turn off time stamps. */ CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_STOP); CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ); initram = 0; if (sc->msk_hw_id == CHIP_ID_YUKON_XL || sc->msk_hw_id == CHIP_ID_YUKON_EC || sc->msk_hw_id == CHIP_ID_YUKON_FE) initram++; /* Configure timeout values. */ for (i = 0; initram > 0 && i < sc->msk_num_port; i++) { CSR_WRITE_2(sc, SELECT_RAM_BUFFER(i, B3_RI_CTRL), RI_RST_SET); CSR_WRITE_2(sc, SELECT_RAM_BUFFER(i, B3_RI_CTRL), RI_RST_CLR); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_R1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_R1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS1), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_R2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XA2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_WTO_XS2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_R2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XA2), MSK_RI_TO_53); CSR_WRITE_1(sc, SELECT_RAM_BUFFER(i, B3_RI_RTO_XS2), MSK_RI_TO_53); } /* Disable all interrupts. */ CSR_WRITE_4(sc, B0_HWE_IMSK, 0); CSR_READ_4(sc, B0_HWE_IMSK); CSR_WRITE_4(sc, B0_IMSK, 0); CSR_READ_4(sc, B0_IMSK); /* * On dual port PCI-X card, there is an problem where status * can be received out of order due to split transactions. */ if (sc->msk_pcixcap != 0 && sc->msk_num_port > 1) { uint16_t pcix_cmd; pcix_cmd = pci_read_config(sc->msk_dev, sc->msk_pcixcap + PCIXR_COMMAND, 2); /* Clear Max Outstanding Split Transactions. */ pcix_cmd &= ~PCIXM_COMMAND_MAX_SPLITS; CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); pci_write_config(sc->msk_dev, sc->msk_pcixcap + PCIXR_COMMAND, pcix_cmd, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } if (sc->msk_expcap != 0) { /* Change Max. Read Request Size to 2048 bytes. */ if (pci_get_max_read_req(sc->msk_dev) == 512) pci_set_max_read_req(sc->msk_dev, 2048); } /* Clear status list. */ bzero(sc->msk_stat_ring, sizeof(struct msk_stat_desc) * sc->msk_stat_count); sc->msk_stat_cons = 0; bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_RST_SET); CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_RST_CLR); /* Set the status list base address. */ addr = sc->msk_stat_ring_paddr; CSR_WRITE_4(sc, STAT_LIST_ADDR_LO, MSK_ADDR_LO(addr)); CSR_WRITE_4(sc, STAT_LIST_ADDR_HI, MSK_ADDR_HI(addr)); /* Set the status list last index. */ CSR_WRITE_2(sc, STAT_LAST_IDX, sc->msk_stat_count - 1); if (sc->msk_hw_id == CHIP_ID_YUKON_EC && sc->msk_hw_rev == CHIP_REV_YU_EC_A1) { /* WA for dev. #4.3 */ CSR_WRITE_2(sc, STAT_TX_IDX_TH, ST_TXTH_IDX_MASK); /* WA for dev. #4.18 */ CSR_WRITE_1(sc, STAT_FIFO_WM, 0x21); CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x07); } else { CSR_WRITE_2(sc, STAT_TX_IDX_TH, 0x0a); CSR_WRITE_1(sc, STAT_FIFO_WM, 0x10); if (sc->msk_hw_id == CHIP_ID_YUKON_XL && sc->msk_hw_rev == CHIP_REV_YU_XL_A0) CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x04); else CSR_WRITE_1(sc, STAT_FIFO_ISR_WM, 0x10); CSR_WRITE_4(sc, STAT_ISR_TIMER_INI, 0x0190); } /* * Use default value for STAT_ISR_TIMER_INI, STAT_LEV_TIMER_INI. */ CSR_WRITE_4(sc, STAT_TX_TIMER_INI, MSK_USECS(sc, 1000)); /* Enable status unit. */ CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_OP_ON); CSR_WRITE_1(sc, STAT_TX_TIMER_CTRL, TIM_START); CSR_WRITE_1(sc, STAT_LEV_TIMER_CTRL, TIM_START); CSR_WRITE_1(sc, STAT_ISR_TIMER_CTRL, TIM_START); } static int msk_probe(device_t dev) { struct msk_softc *sc; char desc[100]; sc = device_get_softc(device_get_parent(dev)); /* * Not much to do here. We always know there will be * at least one GMAC present, and if there are two, * mskc_attach() will create a second device instance * for us. */ snprintf(desc, sizeof(desc), "Marvell Technology Group Ltd. %s Id 0x%02x Rev 0x%02x", model_name[sc->msk_hw_id - CHIP_ID_YUKON_XL], sc->msk_hw_id, sc->msk_hw_rev); device_set_desc_copy(dev, desc); return (BUS_PROBE_DEFAULT); } static int msk_attach(device_t dev) { struct msk_softc *sc; struct msk_if_softc *sc_if; struct ifnet *ifp; struct msk_mii_data *mmd; int i, port, error; uint8_t eaddr[6]; if (dev == NULL) return (EINVAL); error = 0; sc_if = device_get_softc(dev); sc = device_get_softc(device_get_parent(dev)); mmd = device_get_ivars(dev); port = mmd->port; sc_if->msk_if_dev = dev; sc_if->msk_port = port; sc_if->msk_softc = sc; sc_if->msk_flags = sc->msk_pflags; sc->msk_if[port] = sc_if; /* Setup Tx/Rx queue register offsets. */ if (port == MSK_PORT_A) { sc_if->msk_txq = Q_XA1; sc_if->msk_txsq = Q_XS1; sc_if->msk_rxq = Q_R1; } else { sc_if->msk_txq = Q_XA2; sc_if->msk_txsq = Q_XS2; sc_if->msk_rxq = Q_R2; } callout_init_mtx(&sc_if->msk_tick_ch, &sc_if->msk_softc->msk_mtx, 0); msk_sysctl_node(sc_if); if ((error = msk_txrx_dma_alloc(sc_if)) != 0) goto fail; msk_rx_dma_jalloc(sc_if); ifp = sc_if->msk_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc_if->msk_if_dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } ifp->if_softc = sc_if; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO4; /* * Enable Rx checksum offloading if controller supports * new descriptor formant and controller is not Yukon XL. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && sc->msk_hw_id != CHIP_ID_YUKON_XL) ifp->if_capabilities |= IFCAP_RXCSUM; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 && (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0) ifp->if_capabilities |= IFCAP_RXCSUM; ifp->if_hwassist = MSK_CSUM_FEATURES | CSUM_TSO; ifp->if_capenable = ifp->if_capabilities; ifp->if_ioctl = msk_ioctl; ifp->if_start = msk_start; ifp->if_init = msk_init; IFQ_SET_MAXLEN(&ifp->if_snd, MSK_TX_RING_CNT - 1); ifp->if_snd.ifq_drv_maxlen = MSK_TX_RING_CNT - 1; IFQ_SET_READY(&ifp->if_snd); /* * Get station address for this interface. Note that * dual port cards actually come with three station * addresses: one for each port, plus an extra. The * extra one is used by the SysKonnect driver software * as a 'virtual' station address for when both ports * are operating in failover mode. Currently we don't * use this extra address. */ MSK_IF_LOCK(sc_if); for (i = 0; i < ETHER_ADDR_LEN; i++) eaddr[i] = CSR_READ_1(sc, B2_MAC_1 + (port * 8) + i); /* * Call MI attach routine. Can't hold locks when calling into ether_*. */ MSK_IF_UNLOCK(sc_if); ether_ifattach(ifp, eaddr); MSK_IF_LOCK(sc_if); /* VLAN capability setup */ ifp->if_capabilities |= IFCAP_VLAN_MTU; if ((sc_if->msk_flags & MSK_FLAG_NOHWVLAN) == 0) { /* * Due to Tx checksum offload hardware bugs, msk(4) manually * computes checksum for short frames. For VLAN tagged frames * this workaround does not work so disable checksum offload * for VLAN interface. */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO; /* * Enable Rx checksum offloading for VLAN tagged frames * if controller support new descriptor format. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 && (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0) ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; } ifp->if_capenable = ifp->if_capabilities; /* * Disable RX checksum offloading on controllers that don't use * new descriptor format but give chance to enable it. */ if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0) ifp->if_capenable &= ~IFCAP_RXCSUM; /* * Tell the upper layer(s) we support long frames. * Must appear after the call to ether_ifattach() because * ether_ifattach() sets ifi_hdrlen to the default value. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* * Do miibus setup. */ MSK_IF_UNLOCK(sc_if); error = mii_attach(dev, &sc_if->msk_miibus, ifp, msk_mediachange, msk_mediastatus, BMSR_DEFCAPMASK, PHY_ADDR_MARV, MII_OFFSET_ANY, mmd->mii_flags); if (error != 0) { device_printf(sc_if->msk_if_dev, "attaching PHYs failed\n"); ether_ifdetach(ifp); error = ENXIO; goto fail; } fail: if (error != 0) { /* Access should be ok even though lock has been dropped */ sc->msk_if[port] = NULL; msk_detach(dev); } return (error); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ static int mskc_attach(device_t dev) { struct msk_softc *sc; struct msk_mii_data *mmd; int error, msic, msir, reg; sc = device_get_softc(dev); sc->msk_dev = dev; mtx_init(&sc->msk_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); /* * Map control/status registers. */ pci_enable_busmaster(dev); /* Allocate I/O resource */ #ifdef MSK_USEIOSPACE sc->msk_res_spec = msk_res_spec_io; #else sc->msk_res_spec = msk_res_spec_mem; #endif sc->msk_irq_spec = msk_irq_spec_legacy; error = bus_alloc_resources(dev, sc->msk_res_spec, sc->msk_res); if (error) { if (sc->msk_res_spec == msk_res_spec_mem) sc->msk_res_spec = msk_res_spec_io; else sc->msk_res_spec = msk_res_spec_mem; error = bus_alloc_resources(dev, sc->msk_res_spec, sc->msk_res); if (error) { device_printf(dev, "couldn't allocate %s resources\n", sc->msk_res_spec == msk_res_spec_mem ? "memory" : "I/O"); mtx_destroy(&sc->msk_mtx); return (ENXIO); } } /* Enable all clocks before accessing any registers. */ CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, 0); CSR_WRITE_2(sc, B0_CTST, CS_RST_CLR); sc->msk_hw_id = CSR_READ_1(sc, B2_CHIP_ID); sc->msk_hw_rev = (CSR_READ_1(sc, B2_MAC_CFG) >> 4) & 0x0f; /* Bail out if chip is not recognized. */ if (sc->msk_hw_id < CHIP_ID_YUKON_XL || sc->msk_hw_id > CHIP_ID_YUKON_OPT || sc->msk_hw_id == CHIP_ID_YUKON_UNKNOWN) { device_printf(dev, "unknown device: id=0x%02x, rev=0x%02x\n", sc->msk_hw_id, sc->msk_hw_rev); mtx_destroy(&sc->msk_mtx); return (ENXIO); } SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "process_limit", CTLTYPE_INT | CTLFLAG_RW, &sc->msk_process_limit, 0, sysctl_hw_msk_proc_limit, "I", "max number of Rx events to process"); sc->msk_process_limit = MSK_PROC_DEFAULT; error = resource_int_value(device_get_name(dev), device_get_unit(dev), "process_limit", &sc->msk_process_limit); if (error == 0) { if (sc->msk_process_limit < MSK_PROC_MIN || sc->msk_process_limit > MSK_PROC_MAX) { device_printf(dev, "process_limit value out of range; " "using default: %d\n", MSK_PROC_DEFAULT); sc->msk_process_limit = MSK_PROC_DEFAULT; } } sc->msk_int_holdoff = MSK_INT_HOLDOFF_DEFAULT; SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "int_holdoff", CTLFLAG_RW, &sc->msk_int_holdoff, 0, "Maximum number of time to delay interrupts"); resource_int_value(device_get_name(dev), device_get_unit(dev), "int_holdoff", &sc->msk_int_holdoff); sc->msk_pmd = CSR_READ_1(sc, B2_PMD_TYP); /* Check number of MACs. */ sc->msk_num_port = 1; if ((CSR_READ_1(sc, B2_Y2_HW_RES) & CFG_DUAL_MAC_MSK) == CFG_DUAL_MAC_MSK) { if (!(CSR_READ_1(sc, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC)) sc->msk_num_port++; } /* Check bus type. */ if (pci_find_cap(sc->msk_dev, PCIY_EXPRESS, ®) == 0) { sc->msk_bustype = MSK_PEX_BUS; sc->msk_expcap = reg; } else if (pci_find_cap(sc->msk_dev, PCIY_PCIX, ®) == 0) { sc->msk_bustype = MSK_PCIX_BUS; sc->msk_pcixcap = reg; } else sc->msk_bustype = MSK_PCI_BUS; switch (sc->msk_hw_id) { case CHIP_ID_YUKON_EC: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; case CHIP_ID_YUKON_EC_U: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_JUMBO_NOCSUM; break; case CHIP_ID_YUKON_EX: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2 | MSK_FLAG_AUTOTX_CSUM; /* * Yukon Extreme seems to have silicon bug for * automatic Tx checksum calculation capability. */ if (sc->msk_hw_rev == CHIP_REV_YU_EX_B0) sc->msk_pflags &= ~MSK_FLAG_AUTOTX_CSUM; /* * Yukon Extreme A0 could not use store-and-forward * for jumbo frames, so disable Tx checksum * offloading for jumbo frames. */ if (sc->msk_hw_rev == CHIP_REV_YU_EX_A0) sc->msk_pflags |= MSK_FLAG_JUMBO_NOCSUM; break; case CHIP_ID_YUKON_FE: sc->msk_clock = 100; /* 100 MHz */ sc->msk_pflags |= MSK_FLAG_FASTETHER; break; case CHIP_ID_YUKON_FE_P: sc->msk_clock = 50; /* 50 MHz */ sc->msk_pflags |= MSK_FLAG_FASTETHER | MSK_FLAG_DESCV2 | MSK_FLAG_AUTOTX_CSUM; if (sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) { /* * XXX * FE+ A0 has status LE writeback bug so msk(4) * does not rely on status word of received frame * in msk_rxeof() which in turn disables all * hardware assistance bits reported by the status * word as well as validity of the received frame. * Just pass received frames to upper stack with * minimal test and let upper stack handle them. */ sc->msk_pflags |= MSK_FLAG_NOHWVLAN | MSK_FLAG_NORXCHK | MSK_FLAG_NORX_CSUM; } break; case CHIP_ID_YUKON_XL: sc->msk_clock = 156; /* 156 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; case CHIP_ID_YUKON_SUPR: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2 | MSK_FLAG_AUTOTX_CSUM; break; case CHIP_ID_YUKON_UL_2: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO; break; case CHIP_ID_YUKON_OPT: sc->msk_clock = 125; /* 125 MHz */ sc->msk_pflags |= MSK_FLAG_JUMBO | MSK_FLAG_DESCV2; break; default: sc->msk_clock = 156; /* 156 MHz */ break; } /* Allocate IRQ resources. */ msic = pci_msi_count(dev); if (bootverbose) device_printf(dev, "MSI count : %d\n", msic); if (legacy_intr != 0) msi_disable = 1; if (msi_disable == 0 && msic > 0) { msir = 1; if (pci_alloc_msi(dev, &msir) == 0) { if (msir == 1) { sc->msk_pflags |= MSK_FLAG_MSI; sc->msk_irq_spec = msk_irq_spec_msi; } else pci_release_msi(dev); } } error = bus_alloc_resources(dev, sc->msk_irq_spec, sc->msk_irq); if (error) { device_printf(dev, "couldn't allocate IRQ resources\n"); goto fail; } if ((error = msk_status_dma_alloc(sc)) != 0) goto fail; /* Set base interrupt mask. */ sc->msk_intrmask = Y2_IS_HW_ERR | Y2_IS_STAT_BMU; sc->msk_intrhwemask = Y2_IS_TIST_OV | Y2_IS_MST_ERR | Y2_IS_IRQ_STAT | Y2_IS_PCI_EXP | Y2_IS_PCI_NEXP; /* Reset the adapter. */ mskc_reset(sc); if ((error = mskc_setup_rambuffer(sc)) != 0) goto fail; sc->msk_devs[MSK_PORT_A] = device_add_child(dev, "msk", -1); if (sc->msk_devs[MSK_PORT_A] == NULL) { device_printf(dev, "failed to add child for PORT_A\n"); error = ENXIO; goto fail; } mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | M_ZERO); mmd->port = MSK_PORT_A; mmd->pmd = sc->msk_pmd; mmd->mii_flags |= MIIF_DOPAUSE; if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S') mmd->mii_flags |= MIIF_HAVEFIBER; if (sc->msk_pmd == 'P') mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0; device_set_ivars(sc->msk_devs[MSK_PORT_A], mmd); if (sc->msk_num_port > 1) { sc->msk_devs[MSK_PORT_B] = device_add_child(dev, "msk", -1); if (sc->msk_devs[MSK_PORT_B] == NULL) { device_printf(dev, "failed to add child for PORT_B\n"); error = ENXIO; goto fail; } mmd = malloc(sizeof(struct msk_mii_data), M_DEVBUF, M_WAITOK | M_ZERO); mmd->port = MSK_PORT_B; mmd->pmd = sc->msk_pmd; if (sc->msk_pmd == 'L' || sc->msk_pmd == 'S') mmd->mii_flags |= MIIF_HAVEFIBER; if (sc->msk_pmd == 'P') mmd->mii_flags |= MIIF_HAVEFIBER | MIIF_MACPRIV0; device_set_ivars(sc->msk_devs[MSK_PORT_B], mmd); } error = bus_generic_attach(dev); if (error) { device_printf(dev, "failed to attach port(s)\n"); goto fail; } /* Hook interrupt last to avoid having to lock softc. */ error = bus_setup_intr(dev, sc->msk_irq[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, msk_intr, sc, &sc->msk_intrhand); if (error != 0) { device_printf(dev, "couldn't set up interrupt handler\n"); goto fail; } fail: if (error != 0) mskc_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int msk_detach(device_t dev) { struct msk_softc *sc; struct msk_if_softc *sc_if; struct ifnet *ifp; sc_if = device_get_softc(dev); KASSERT(mtx_initialized(&sc_if->msk_softc->msk_mtx), ("msk mutex not initialized in msk_detach")); MSK_IF_LOCK(sc_if); ifp = sc_if->msk_ifp; if (device_is_attached(dev)) { /* XXX */ sc_if->msk_flags |= MSK_FLAG_DETACH; msk_stop(sc_if); /* Can't hold locks while calling detach. */ MSK_IF_UNLOCK(sc_if); callout_drain(&sc_if->msk_tick_ch); if (ifp) ether_ifdetach(ifp); MSK_IF_LOCK(sc_if); } /* * We're generally called from mskc_detach() which is using * device_delete_child() to get to here. It's already trashed * miibus for us, so don't do it here or we'll panic. * * if (sc_if->msk_miibus != NULL) { * device_delete_child(dev, sc_if->msk_miibus); * sc_if->msk_miibus = NULL; * } */ msk_rx_dma_jfree(sc_if); msk_txrx_dma_free(sc_if); bus_generic_detach(dev); sc = sc_if->msk_softc; sc->msk_if[sc_if->msk_port] = NULL; MSK_IF_UNLOCK(sc_if); if (ifp) if_free(ifp); return (0); } static int mskc_detach(device_t dev) { struct msk_softc *sc; sc = device_get_softc(dev); KASSERT(mtx_initialized(&sc->msk_mtx), ("msk mutex not initialized")); if (device_is_alive(dev)) { if (sc->msk_devs[MSK_PORT_A] != NULL) { free(device_get_ivars(sc->msk_devs[MSK_PORT_A]), M_DEVBUF); device_delete_child(dev, sc->msk_devs[MSK_PORT_A]); } if (sc->msk_devs[MSK_PORT_B] != NULL) { free(device_get_ivars(sc->msk_devs[MSK_PORT_B]), M_DEVBUF); device_delete_child(dev, sc->msk_devs[MSK_PORT_B]); } bus_generic_detach(dev); } /* Disable all interrupts. */ CSR_WRITE_4(sc, B0_IMSK, 0); CSR_READ_4(sc, B0_IMSK); CSR_WRITE_4(sc, B0_HWE_IMSK, 0); CSR_READ_4(sc, B0_HWE_IMSK); /* LED Off. */ CSR_WRITE_2(sc, B0_CTST, Y2_LED_STAT_OFF); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); msk_status_dma_free(sc); if (sc->msk_intrhand) { bus_teardown_intr(dev, sc->msk_irq[0], sc->msk_intrhand); sc->msk_intrhand = NULL; } bus_release_resources(dev, sc->msk_irq_spec, sc->msk_irq); if ((sc->msk_pflags & MSK_FLAG_MSI) != 0) pci_release_msi(dev); bus_release_resources(dev, sc->msk_res_spec, sc->msk_res); mtx_destroy(&sc->msk_mtx); return (0); } static bus_dma_tag_t mskc_get_dma_tag(device_t bus, device_t child __unused) { return (bus_get_dma_tag(bus)); } struct msk_dmamap_arg { bus_addr_t msk_busaddr; }; static void msk_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct msk_dmamap_arg *ctx; if (error != 0) return; ctx = arg; ctx->msk_busaddr = segs[0].ds_addr; } /* Create status DMA region. */ static int msk_status_dma_alloc(struct msk_softc *sc) { struct msk_dmamap_arg ctx; bus_size_t stat_sz; int count, error; /* * It seems controller requires number of status LE entries * is power of 2 and the maximum number of status LE entries * is 4096. For dual-port controllers, the number of status * LE entries should be large enough to hold both port's * status updates. */ count = 3 * MSK_RX_RING_CNT + MSK_TX_RING_CNT; count = imin(4096, roundup2(count, 1024)); sc->msk_stat_count = count; stat_sz = count * sizeof(struct msk_stat_desc); error = bus_dma_tag_create( bus_get_dma_tag(sc->msk_dev), /* parent */ MSK_STAT_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ stat_sz, /* maxsize */ 1, /* nsegments */ stat_sz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->msk_stat_tag); if (error != 0) { device_printf(sc->msk_dev, "failed to create status DMA tag\n"); return (error); } /* Allocate DMA'able memory and load the DMA map for status ring. */ error = bus_dmamem_alloc(sc->msk_stat_tag, (void **)&sc->msk_stat_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->msk_stat_map); if (error != 0) { device_printf(sc->msk_dev, "failed to allocate DMA'able memory for status ring\n"); return (error); } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc->msk_stat_tag, sc->msk_stat_map, sc->msk_stat_ring, stat_sz, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->msk_dev, "failed to load DMA'able memory for status ring\n"); return (error); } sc->msk_stat_ring_paddr = ctx.msk_busaddr; return (0); } static void msk_status_dma_free(struct msk_softc *sc) { /* Destroy status block. */ if (sc->msk_stat_tag) { if (sc->msk_stat_ring_paddr) { bus_dmamap_unload(sc->msk_stat_tag, sc->msk_stat_map); sc->msk_stat_ring_paddr = 0; } if (sc->msk_stat_ring) { bus_dmamem_free(sc->msk_stat_tag, sc->msk_stat_ring, sc->msk_stat_map); sc->msk_stat_ring = NULL; } bus_dma_tag_destroy(sc->msk_stat_tag); sc->msk_stat_tag = NULL; } } static int msk_txrx_dma_alloc(struct msk_if_softc *sc_if) { struct msk_dmamap_arg ctx; struct msk_txdesc *txd; struct msk_rxdesc *rxd; bus_size_t rxalign; int error, i; /* Create parent DMA tag. */ error = bus_dma_tag_create( bus_get_dma_tag(sc_if->msk_if_dev), /* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_parent_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create parent DMA tag\n"); goto fail; } /* Create tag for Tx ring. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ MSK_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_TX_RING_SZ, /* maxsize */ 1, /* nsegments */ MSK_TX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_tx_ring_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Tx ring DMA tag\n"); goto fail; } /* Create tag for Rx ring. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ MSK_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ MSK_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_rx_ring_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Rx ring DMA tag\n"); goto fail; } /* Create tag for Tx buffers. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ 1, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_TSO_MAXSIZE, /* maxsize */ MSK_MAXTXSEGS, /* nsegments */ MSK_TSO_MAXSGSIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_tx_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Tx DMA tag\n"); goto fail; } rxalign = 1; /* * Workaround hardware hang which seems to happen when Rx buffer * is not aligned on multiple of FIFO word(8 bytes). */ if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) rxalign = MSK_RX_BUF_ALIGN; /* Create tag for Rx buffers. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ rxalign, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_rx_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Rx DMA tag\n"); goto fail; } /* Allocate DMA'able memory and load the DMA map for Tx ring. */ error = bus_dmamem_alloc(sc_if->msk_cdata.msk_tx_ring_tag, (void **)&sc_if->msk_rdata.msk_tx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_tx_ring_map); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to allocate DMA'able memory for Tx ring\n"); goto fail; } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, sc_if->msk_rdata.msk_tx_ring, MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for Tx ring\n"); goto fail; } sc_if->msk_rdata.msk_tx_ring_paddr = ctx.msk_busaddr; /* Allocate DMA'able memory and load the DMA map for Rx ring. */ error = bus_dmamem_alloc(sc_if->msk_cdata.msk_rx_ring_tag, (void **)&sc_if->msk_rdata.msk_rx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_rx_ring_map); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to allocate DMA'able memory for Rx ring\n"); goto fail; } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, sc_if->msk_rdata.msk_rx_ring, MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for Rx ring\n"); goto fail; } sc_if->msk_rdata.msk_rx_ring_paddr = ctx.msk_busaddr; /* Create DMA maps for Tx buffers. */ for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; txd->tx_m = NULL; txd->tx_dmamap = NULL; error = bus_dmamap_create(sc_if->msk_cdata.msk_tx_tag, 0, &txd->tx_dmamap); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Tx dmamap\n"); goto fail; } } /* Create DMA maps for Rx buffers. */ if ((error = bus_dmamap_create(sc_if->msk_cdata.msk_rx_tag, 0, &sc_if->msk_cdata.msk_rx_sparemap)) != 0) { device_printf(sc_if->msk_if_dev, "failed to create spare Rx dmamap\n"); goto fail; } for (i = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[i]; rxd->rx_m = NULL; rxd->rx_dmamap = NULL; error = bus_dmamap_create(sc_if->msk_cdata.msk_rx_tag, 0, &rxd->rx_dmamap); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create Rx dmamap\n"); goto fail; } } fail: return (error); } static int msk_rx_dma_jalloc(struct msk_if_softc *sc_if) { struct msk_dmamap_arg ctx; struct msk_rxdesc *jrxd; bus_size_t rxalign; int error, i; if (jumbo_disable != 0 || (sc_if->msk_flags & MSK_FLAG_JUMBO) == 0) { sc_if->msk_flags &= ~MSK_FLAG_JUMBO; device_printf(sc_if->msk_if_dev, "disabling jumbo frame support\n"); return (0); } /* Create tag for jumbo Rx ring. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ MSK_RING_ALIGN, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSK_JUMBO_RX_RING_SZ, /* maxsize */ 1, /* nsegments */ MSK_JUMBO_RX_RING_SZ, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_jumbo_rx_ring_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create jumbo Rx ring DMA tag\n"); goto jumbo_fail; } rxalign = 1; /* * Workaround hardware hang which seems to happen when Rx buffer * is not aligned on multiple of FIFO word(8 bytes). */ if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) rxalign = MSK_RX_BUF_ALIGN; /* Create tag for jumbo Rx buffers. */ error = bus_dma_tag_create(sc_if->msk_cdata.msk_parent_tag,/* parent */ rxalign, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc_if->msk_cdata.msk_jumbo_rx_tag); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create jumbo Rx DMA tag\n"); goto jumbo_fail; } /* Allocate DMA'able memory and load the DMA map for jumbo Rx ring. */ error = bus_dmamem_alloc(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, (void **)&sc_if->msk_rdata.msk_jumbo_rx_ring, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc_if->msk_cdata.msk_jumbo_rx_ring_map); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to allocate DMA'able memory for jumbo Rx ring\n"); goto jumbo_fail; } ctx.msk_busaddr = 0; error = bus_dmamap_load(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, sc_if->msk_rdata.msk_jumbo_rx_ring, MSK_JUMBO_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to load DMA'able memory for jumbo Rx ring\n"); goto jumbo_fail; } sc_if->msk_rdata.msk_jumbo_rx_ring_paddr = ctx.msk_busaddr; /* Create DMA maps for jumbo Rx buffers. */ if ((error = bus_dmamap_create(sc_if->msk_cdata.msk_jumbo_rx_tag, 0, &sc_if->msk_cdata.msk_jumbo_rx_sparemap)) != 0) { device_printf(sc_if->msk_if_dev, "failed to create spare jumbo Rx dmamap\n"); goto jumbo_fail; } for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i]; jrxd->rx_m = NULL; jrxd->rx_dmamap = NULL; error = bus_dmamap_create(sc_if->msk_cdata.msk_jumbo_rx_tag, 0, &jrxd->rx_dmamap); if (error != 0) { device_printf(sc_if->msk_if_dev, "failed to create jumbo Rx dmamap\n"); goto jumbo_fail; } } return (0); jumbo_fail: msk_rx_dma_jfree(sc_if); device_printf(sc_if->msk_if_dev, "disabling jumbo frame support " "due to resource shortage\n"); sc_if->msk_flags &= ~MSK_FLAG_JUMBO; return (error); } static void msk_txrx_dma_free(struct msk_if_softc *sc_if) { struct msk_txdesc *txd; struct msk_rxdesc *rxd; int i; /* Tx ring. */ if (sc_if->msk_cdata.msk_tx_ring_tag) { if (sc_if->msk_rdata.msk_tx_ring_paddr) bus_dmamap_unload(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map); if (sc_if->msk_rdata.msk_tx_ring) bus_dmamem_free(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_rdata.msk_tx_ring, sc_if->msk_cdata.msk_tx_ring_map); sc_if->msk_rdata.msk_tx_ring = NULL; sc_if->msk_rdata.msk_tx_ring_paddr = 0; bus_dma_tag_destroy(sc_if->msk_cdata.msk_tx_ring_tag); sc_if->msk_cdata.msk_tx_ring_tag = NULL; } /* Rx ring. */ if (sc_if->msk_cdata.msk_rx_ring_tag) { if (sc_if->msk_rdata.msk_rx_ring_paddr) bus_dmamap_unload(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map); if (sc_if->msk_rdata.msk_rx_ring) bus_dmamem_free(sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_rdata.msk_rx_ring, sc_if->msk_cdata.msk_rx_ring_map); sc_if->msk_rdata.msk_rx_ring = NULL; sc_if->msk_rdata.msk_rx_ring_paddr = 0; bus_dma_tag_destroy(sc_if->msk_cdata.msk_rx_ring_tag); sc_if->msk_cdata.msk_rx_ring_tag = NULL; } /* Tx buffers. */ if (sc_if->msk_cdata.msk_tx_tag) { for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; if (txd->tx_dmamap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap); txd->tx_dmamap = NULL; } } bus_dma_tag_destroy(sc_if->msk_cdata.msk_tx_tag); sc_if->msk_cdata.msk_tx_tag = NULL; } /* Rx buffers. */ if (sc_if->msk_cdata.msk_rx_tag) { for (i = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[i]; if (rxd->rx_dmamap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); rxd->rx_dmamap = NULL; } } if (sc_if->msk_cdata.msk_rx_sparemap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_rx_tag, sc_if->msk_cdata.msk_rx_sparemap); sc_if->msk_cdata.msk_rx_sparemap = 0; } bus_dma_tag_destroy(sc_if->msk_cdata.msk_rx_tag); sc_if->msk_cdata.msk_rx_tag = NULL; } if (sc_if->msk_cdata.msk_parent_tag) { bus_dma_tag_destroy(sc_if->msk_cdata.msk_parent_tag); sc_if->msk_cdata.msk_parent_tag = NULL; } } static void msk_rx_dma_jfree(struct msk_if_softc *sc_if) { struct msk_rxdesc *jrxd; int i; /* Jumbo Rx ring. */ if (sc_if->msk_cdata.msk_jumbo_rx_ring_tag) { if (sc_if->msk_rdata.msk_jumbo_rx_ring_paddr) bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map); if (sc_if->msk_rdata.msk_jumbo_rx_ring) bus_dmamem_free(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_rdata.msk_jumbo_rx_ring, sc_if->msk_cdata.msk_jumbo_rx_ring_map); sc_if->msk_rdata.msk_jumbo_rx_ring = NULL; sc_if->msk_rdata.msk_jumbo_rx_ring_paddr = 0; bus_dma_tag_destroy(sc_if->msk_cdata.msk_jumbo_rx_ring_tag); sc_if->msk_cdata.msk_jumbo_rx_ring_tag = NULL; } /* Jumbo Rx buffers. */ if (sc_if->msk_cdata.msk_jumbo_rx_tag) { for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i]; if (jrxd->rx_dmamap) { bus_dmamap_destroy( sc_if->msk_cdata.msk_jumbo_rx_tag, jrxd->rx_dmamap); jrxd->rx_dmamap = NULL; } } if (sc_if->msk_cdata.msk_jumbo_rx_sparemap) { bus_dmamap_destroy(sc_if->msk_cdata.msk_jumbo_rx_tag, sc_if->msk_cdata.msk_jumbo_rx_sparemap); sc_if->msk_cdata.msk_jumbo_rx_sparemap = 0; } bus_dma_tag_destroy(sc_if->msk_cdata.msk_jumbo_rx_tag); sc_if->msk_cdata.msk_jumbo_rx_tag = NULL; } } static int msk_encap(struct msk_if_softc *sc_if, struct mbuf **m_head) { struct msk_txdesc *txd, *txd_last; struct msk_tx_desc *tx_le; struct mbuf *m; bus_dmamap_t map; bus_dma_segment_t txsegs[MSK_MAXTXSEGS]; uint32_t control, csum, prod, si; uint16_t offset, tcp_offset, tso_mtu; int error, i, nseg, tso; MSK_IF_LOCK_ASSERT(sc_if); tcp_offset = offset = 0; m = *m_head; if (((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 && (m->m_pkthdr.csum_flags & MSK_CSUM_FEATURES) != 0) || ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (m->m_pkthdr.csum_flags & CSUM_TSO) != 0)) { /* * Since mbuf has no protocol specific structure information * in it we have to inspect protocol information here to * setup TSO and checksum offload. I don't know why Marvell * made a such decision in chip design because other GigE * hardwares normally takes care of all these chores in * hardware. However, TSO performance of Yukon II is very * good such that it's worth to implement it. */ struct ether_header *eh; struct ip *ip; struct tcphdr *tcp; if (M_WRITABLE(m) == 0) { /* Get a writable copy. */ m = m_dup(*m_head, M_NOWAIT); m_freem(*m_head); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m; } offset = sizeof(struct ether_header); m = m_pullup(m, offset); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } eh = mtod(m, struct ether_header *); /* Check if hardware VLAN insertion is off. */ if (eh->ether_type == htons(ETHERTYPE_VLAN)) { offset = sizeof(struct ether_vlan_header); m = m_pullup(m, offset); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } } m = m_pullup(m, offset + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, char *) + offset); offset += (ip->ip_hl << 2); tcp_offset = offset; if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { m = m_pullup(m, offset + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } tcp = (struct tcphdr *)(mtod(m, char *) + offset); offset += (tcp->th_off << 2); } else if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) == 0 && (m->m_pkthdr.len < MSK_MIN_FRAMELEN) && (m->m_pkthdr.csum_flags & CSUM_TCP) != 0) { /* * It seems that Yukon II has Tx checksum offload bug * for small TCP packets that's less than 60 bytes in * size (e.g. TCP window probe packet, pure ACK packet). * Common work around like padding with zeros to make * the frame minimum ethernet frame size didn't work at * all. * Instead of disabling checksum offload completely we * resort to S/W checksum routine when we encounter * short TCP frames. * Short UDP packets appear to be handled correctly by * Yukon II. Also I assume this bug does not happen on * controllers that use newer descriptor format or * automatic Tx checksum calculation. */ m = m_pullup(m, offset + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *(uint16_t *)(m->m_data + offset + m->m_pkthdr.csum_data) = in_cksum_skip(m, m->m_pkthdr.len, offset); m->m_pkthdr.csum_flags &= ~CSUM_TCP; } *m_head = m; } prod = sc_if->msk_cdata.msk_tx_prod; txd = &sc_if->msk_cdata.msk_txdesc[prod]; txd_last = txd; map = txd->tx_dmamap; error = bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_tx_tag, map, *m_head, txsegs, &nseg, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(*m_head, M_NOWAIT, MSK_MAXTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc_if->msk_cdata.msk_tx_tag, map, *m_head, txsegs, &nseg, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* Check number of available descriptors. */ if (sc_if->msk_cdata.msk_tx_cnt + nseg >= (MSK_TX_RING_CNT - MSK_RESERVED_TX_DESC_CNT)) { bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, map); return (ENOBUFS); } control = 0; tso = 0; tx_le = NULL; /* Check TSO support. */ if ((m->m_pkthdr.csum_flags & CSUM_TSO) != 0) { if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) tso_mtu = m->m_pkthdr.tso_segsz; else tso_mtu = offset + m->m_pkthdr.tso_segsz; if (tso_mtu != sc_if->msk_cdata.msk_tso_mtu) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(tso_mtu); if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) tx_le->msk_control = htole32(OP_MSS | HW_OWNER); else tx_le->msk_control = htole32(OP_LRGLEN | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); sc_if->msk_cdata.msk_tso_mtu = tso_mtu; } tso++; } /* Check if we have a VLAN tag to insert. */ if ((m->m_flags & M_VLANTAG) != 0) { if (tx_le == NULL) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(0); tx_le->msk_control = htole32(OP_VLAN | HW_OWNER | htons(m->m_pkthdr.ether_vtag)); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); } else { tx_le->msk_control |= htole32(OP_VLAN | htons(m->m_pkthdr.ether_vtag)); } control |= INS_VLAN; } /* Check if we have to handle checksum offload. */ if (tso == 0 && (m->m_pkthdr.csum_flags & MSK_CSUM_FEATURES) != 0) { if ((sc_if->msk_flags & MSK_FLAG_AUTOTX_CSUM) != 0) control |= CALSUM; else { control |= CALSUM | WR_SUM | INIT_SUM | LOCK_SUM; if ((m->m_pkthdr.csum_flags & CSUM_UDP) != 0) control |= UDPTCP; /* Checksum write position. */ csum = (tcp_offset + m->m_pkthdr.csum_data) & 0xffff; /* Checksum start position. */ csum |= (uint32_t)tcp_offset << 16; if (csum != sc_if->msk_cdata.msk_last_csum) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(csum); tx_le->msk_control = htole32(1 << 16 | (OP_TCPLISW | HW_OWNER)); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); sc_if->msk_cdata.msk_last_csum = csum; } } } #ifdef MSK_64BIT_DMA if (MSK_ADDR_HI(txsegs[0].ds_addr) != sc_if->msk_cdata.msk_tx_high_addr) { sc_if->msk_cdata.msk_tx_high_addr = MSK_ADDR_HI(txsegs[0].ds_addr); tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[0].ds_addr)); tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); } #endif si = prod; tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[0].ds_addr)); if (tso == 0) tx_le->msk_control = htole32(txsegs[0].ds_len | control | OP_PACKET); else tx_le->msk_control = htole32(txsegs[0].ds_len | control | OP_LARGESEND); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); for (i = 1; i < nseg; i++) { tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; #ifdef MSK_64BIT_DMA if (MSK_ADDR_HI(txsegs[i].ds_addr) != sc_if->msk_cdata.msk_tx_high_addr) { sc_if->msk_cdata.msk_tx_high_addr = MSK_ADDR_HI(txsegs[i].ds_addr); tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[i].ds_addr)); tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; } #endif tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[i].ds_addr)); tx_le->msk_control = htole32(txsegs[i].ds_len | control | OP_BUFFER | HW_OWNER); sc_if->msk_cdata.msk_tx_cnt++; MSK_INC(prod, MSK_TX_RING_CNT); } /* Update producer index. */ sc_if->msk_cdata.msk_tx_prod = prod; /* Set EOP on the last descriptor. */ prod = (prod + MSK_TX_RING_CNT - 1) % MSK_TX_RING_CNT; tx_le = &sc_if->msk_rdata.msk_tx_ring[prod]; tx_le->msk_control |= htole32(EOP); /* Turn the first descriptor ownership to hardware. */ tx_le = &sc_if->msk_rdata.msk_tx_ring[si]; tx_le->msk_control |= htole32(HW_OWNER); txd = &sc_if->msk_cdata.msk_txdesc[prod]; map = txd_last->tx_dmamap; txd_last->tx_dmamap = txd->tx_dmamap; txd->tx_dmamap = map; txd->tx_m = m; /* Sync descriptors. */ bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } static void msk_start(struct ifnet *ifp) { struct msk_if_softc *sc_if; sc_if = ifp->if_softc; MSK_IF_LOCK(sc_if); msk_start_locked(ifp); MSK_IF_UNLOCK(sc_if); } static void msk_start_locked(struct ifnet *ifp) { struct msk_if_softc *sc_if; struct mbuf *m_head; int enq; sc_if = ifp->if_softc; MSK_IF_LOCK_ASSERT(sc_if); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc_if->msk_flags & MSK_FLAG_LINK) == 0) return; for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc_if->msk_cdata.msk_tx_cnt < (MSK_TX_RING_CNT - MSK_RESERVED_TX_DESC_CNT); ) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (msk_encap(sc_if, &m_head) != 0) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); } if (enq > 0) { /* Transmit */ CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_txq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_tx_prod); /* Set a timeout in case the chip goes out to lunch. */ sc_if->msk_watchdog_timer = MSK_TX_TIMEOUT; } } static void msk_watchdog(struct msk_if_softc *sc_if) { struct ifnet *ifp; MSK_IF_LOCK_ASSERT(sc_if); if (sc_if->msk_watchdog_timer == 0 || --sc_if->msk_watchdog_timer) return; ifp = sc_if->msk_ifp; if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0) { if (bootverbose) if_printf(sc_if->msk_ifp, "watchdog timeout " "(missed link)\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; msk_init_locked(sc_if); return; } if_printf(ifp, "watchdog timeout\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; msk_init_locked(sc_if); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) msk_start_locked(ifp); } static int mskc_shutdown(device_t dev) { struct msk_softc *sc; int i; sc = device_get_softc(dev); MSK_LOCK(sc); for (i = 0; i < sc->msk_num_port; i++) { if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL && ((sc->msk_if[i]->msk_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)) msk_stop(sc->msk_if[i]); } MSK_UNLOCK(sc); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); return (0); } static int mskc_suspend(device_t dev) { struct msk_softc *sc; int i; sc = device_get_softc(dev); MSK_LOCK(sc); for (i = 0; i < sc->msk_num_port; i++) { if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL && ((sc->msk_if[i]->msk_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)) msk_stop(sc->msk_if[i]); } /* Disable all interrupts. */ CSR_WRITE_4(sc, B0_IMSK, 0); CSR_READ_4(sc, B0_IMSK); CSR_WRITE_4(sc, B0_HWE_IMSK, 0); CSR_READ_4(sc, B0_HWE_IMSK); msk_phy_power(sc, MSK_PHY_POWERDOWN); /* Put hardware reset. */ CSR_WRITE_2(sc, B0_CTST, CS_RST_SET); sc->msk_pflags |= MSK_FLAG_SUSPEND; MSK_UNLOCK(sc); return (0); } static int mskc_resume(device_t dev) { struct msk_softc *sc; int i; sc = device_get_softc(dev); MSK_LOCK(sc); CSR_PCI_WRITE_4(sc, PCI_OUR_REG_3, 0); mskc_reset(sc); for (i = 0; i < sc->msk_num_port; i++) { if (sc->msk_if[i] != NULL && sc->msk_if[i]->msk_ifp != NULL && ((sc->msk_if[i]->msk_ifp->if_flags & IFF_UP) != 0)) { sc->msk_if[i]->msk_ifp->if_drv_flags &= ~IFF_DRV_RUNNING; msk_init_locked(sc->msk_if[i]); } } sc->msk_pflags &= ~MSK_FLAG_SUSPEND; MSK_UNLOCK(sc); return (0); } #ifndef __NO_STRICT_ALIGNMENT static __inline void msk_fixup_rx(struct mbuf *m) { int i; uint16_t *src, *dst; src = mtod(m, uint16_t *); dst = src - 3; for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++) *dst++ = *src++; m->m_data -= (MSK_RX_BUF_ALIGN - ETHER_ALIGN); } #endif static __inline void msk_rxcsum(struct msk_if_softc *sc_if, uint32_t control, struct mbuf *m) { struct ether_header *eh; struct ip *ip; struct udphdr *uh; int32_t hlen, len, pktlen, temp32; uint16_t csum, *opts; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) { if ((control & (CSS_IPV4 | CSS_IPFRAG)) == CSS_IPV4) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((control & CSS_IPV4_CSUM_OK) != 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((control & (CSS_TCP | CSS_UDP)) != 0 && (control & (CSS_TCPUDP_CSUM_OK)) != 0) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } return; } /* * Marvell Yukon controllers that support OP_RXCHKS has known * to have various Rx checksum offloading bugs. These * controllers can be configured to compute simple checksum * at two different positions. So we can compute IP and TCP/UDP * checksum at the same time. We intentionally have controller * compute TCP/UDP checksum twice by specifying the same * checksum start position and compare the result. If the value * is different it would indicate the hardware logic was wrong. */ if ((sc_if->msk_csum & 0xFFFF) != (sc_if->msk_csum >> 16)) { if (bootverbose) device_printf(sc_if->msk_if_dev, "Rx checksum value mismatch!\n"); return; } pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) return; /* can't handle fragmented packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (pktlen < (hlen + sizeof(struct tcphdr))) return; break; case IPPROTO_UDP: if (pktlen < (hlen + sizeof(struct udphdr))) return; uh = (struct udphdr *)((caddr_t)ip + hlen); if (uh->uh_sum == 0) return; /* no checksum */ break; default: return; } csum = bswap16(sc_if->msk_csum & 0xFFFF); /* Checksum fixup for IP options. */ len = hlen - sizeof(struct ip); if (len > 0) { opts = (uint16_t *)(ip + 1); for (; len > 0; len -= sizeof(uint16_t), opts++) { temp32 = csum - *opts; temp32 = (temp32 >> 16) + (temp32 & 65535); csum = temp32 & 65535; } } m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; m->m_pkthdr.csum_data = csum; } static void msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, int len) { struct mbuf *m; struct ifnet *ifp; struct msk_rxdesc *rxd; int cons, rxlen; ifp = sc_if->msk_ifp; MSK_IF_LOCK_ASSERT(sc_if); cons = sc_if->msk_cdata.msk_rx_cons; do { rxlen = status >> 16; if ((status & GMR_FS_VLAN) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) rxlen -= ETHER_VLAN_ENCAP_LEN; if ((sc_if->msk_flags & MSK_FLAG_NORXCHK) != 0) { /* * For controllers that returns bogus status code * just do minimal check and let upper stack * handle this frame. */ if (len > MSK_MAX_FRAMELEN || len < ETHER_HDR_LEN) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); msk_discard_rxbuf(sc_if, cons); break; } } else if (len > sc_if->msk_framesize || ((status & GMR_FS_ANY_ERR) != 0) || ((status & GMR_FS_RX_OK) == 0) || (rxlen != len)) { /* Don't count flow-control packet as errors. */ if ((status & GMR_FS_GOOD_FC) == 0) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); msk_discard_rxbuf(sc_if, cons); break; } #ifdef MSK_64BIT_DMA rxd = &sc_if->msk_cdata.msk_rxdesc[(cons + 1) % MSK_RX_RING_CNT]; #else rxd = &sc_if->msk_cdata.msk_rxdesc[cons]; #endif m = rxd->rx_m; if (msk_newbuf(sc_if, cons) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Reuse old buffer. */ msk_discard_rxbuf(sc_if, cons); break; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; #ifndef __NO_STRICT_ALIGNMENT if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) msk_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) msk_rxcsum(sc_if, control, m); /* Check for VLAN tagged packets. */ if ((status & GMR_FS_VLAN) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m->m_pkthdr.ether_vtag = sc_if->msk_vtag; m->m_flags |= M_VLANTAG; } MSK_IF_UNLOCK(sc_if); (*ifp->if_input)(ifp, m); MSK_IF_LOCK(sc_if); } while (0); MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT); } static void msk_jumbo_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, int len) { struct mbuf *m; struct ifnet *ifp; struct msk_rxdesc *jrxd; int cons, rxlen; ifp = sc_if->msk_ifp; MSK_IF_LOCK_ASSERT(sc_if); cons = sc_if->msk_cdata.msk_rx_cons; do { rxlen = status >> 16; if ((status & GMR_FS_VLAN) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) rxlen -= ETHER_VLAN_ENCAP_LEN; if (len > sc_if->msk_framesize || ((status & GMR_FS_ANY_ERR) != 0) || ((status & GMR_FS_RX_OK) == 0) || (rxlen != len)) { /* Don't count flow-control packet as errors. */ if ((status & GMR_FS_GOOD_FC) == 0) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); msk_discard_jumbo_rxbuf(sc_if, cons); break; } #ifdef MSK_64BIT_DMA jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[(cons + 1) % MSK_JUMBO_RX_RING_CNT]; #else jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[cons]; #endif m = jrxd->rx_m; if (msk_jumbo_newbuf(sc_if, cons) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Reuse old buffer. */ msk_discard_jumbo_rxbuf(sc_if, cons); break; } m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; #ifndef __NO_STRICT_ALIGNMENT if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) != 0) msk_fixup_rx(m); #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) msk_rxcsum(sc_if, control, m); /* Check for VLAN tagged packets. */ if ((status & GMR_FS_VLAN) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m->m_pkthdr.ether_vtag = sc_if->msk_vtag; m->m_flags |= M_VLANTAG; } MSK_IF_UNLOCK(sc_if); (*ifp->if_input)(ifp, m); MSK_IF_LOCK(sc_if); } while (0); MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT); } static void msk_txeof(struct msk_if_softc *sc_if, int idx) { struct msk_txdesc *txd; struct msk_tx_desc *cur_tx; struct ifnet *ifp; uint32_t control; int cons, prog; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; bus_dmamap_sync(sc_if->msk_cdata.msk_tx_ring_tag, sc_if->msk_cdata.msk_tx_ring_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ cons = sc_if->msk_cdata.msk_tx_cons; prog = 0; for (; cons != idx; MSK_INC(cons, MSK_TX_RING_CNT)) { if (sc_if->msk_cdata.msk_tx_cnt <= 0) break; prog++; cur_tx = &sc_if->msk_rdata.msk_tx_ring[cons]; control = le32toh(cur_tx->msk_control); sc_if->msk_cdata.msk_tx_cnt--; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if ((control & EOP) == 0) continue; txd = &sc_if->msk_cdata.msk_txdesc[cons]; bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); KASSERT(txd->tx_m != NULL, ("%s: freeing NULL mbuf!", __func__)); m_freem(txd->tx_m); txd->tx_m = NULL; } if (prog > 0) { sc_if->msk_cdata.msk_tx_cons = cons; if (sc_if->msk_cdata.msk_tx_cnt == 0) sc_if->msk_watchdog_timer = 0; /* No need to sync LEs as we didn't update LEs. */ } } static void msk_tick(void *xsc_if) { + struct epoch_tracker et; struct msk_if_softc *sc_if; struct mii_data *mii; sc_if = xsc_if; MSK_IF_LOCK_ASSERT(sc_if); mii = device_get_softc(sc_if->msk_miibus); mii_tick(mii); if ((sc_if->msk_flags & MSK_FLAG_LINK) == 0) msk_miibus_statchg(sc_if->msk_if_dev); + NET_EPOCH_ENTER(et); msk_handle_events(sc_if->msk_softc); + NET_EPOCH_EXIT(et); msk_watchdog(sc_if); callout_reset(&sc_if->msk_tick_ch, hz, msk_tick, sc_if); } static void msk_intr_phy(struct msk_if_softc *sc_if) { uint16_t status; msk_phy_readreg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_STAT); status = msk_phy_readreg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_STAT); /* Handle FIFO Underrun/Overflow? */ if ((status & PHY_M_IS_FIFO_ERROR)) device_printf(sc_if->msk_if_dev, "PHY FIFO underrun/overflow.\n"); } static void msk_intr_gmac(struct msk_if_softc *sc_if) { struct msk_softc *sc; uint8_t status; sc = sc_if->msk_softc; status = CSR_READ_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_SRC)); /* GMAC Rx FIFO overrun. */ if ((status & GM_IS_RX_FF_OR) != 0) CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_CLI_RX_FO); /* GMAC Tx FIFO underrun. */ if ((status & GM_IS_TX_FF_UR) != 0) { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_CLI_TX_FU); device_printf(sc_if->msk_if_dev, "Tx FIFO underrun!\n"); /* * XXX * In case of Tx underrun, we may need to flush/reset * Tx MAC but that would also require resynchronization * with status LEs. Reinitializing status LEs would * affect other port in dual MAC configuration so it * should be avoided as possible as we can. * Due to lack of documentation it's all vague guess but * it needs more investigation. */ } } static void msk_handle_hwerr(struct msk_if_softc *sc_if, uint32_t status) { struct msk_softc *sc; sc = sc_if->msk_softc; if ((status & Y2_IS_PAR_RD1) != 0) { device_printf(sc_if->msk_if_dev, "RAM buffer read parity error\n"); /* Clear IRQ. */ CSR_WRITE_2(sc, SELECT_RAM_BUFFER(sc_if->msk_port, B3_RI_CTRL), RI_CLR_RD_PERR); } if ((status & Y2_IS_PAR_WR1) != 0) { device_printf(sc_if->msk_if_dev, "RAM buffer write parity error\n"); /* Clear IRQ. */ CSR_WRITE_2(sc, SELECT_RAM_BUFFER(sc_if->msk_port, B3_RI_CTRL), RI_CLR_WR_PERR); } if ((status & Y2_IS_PAR_MAC1) != 0) { device_printf(sc_if->msk_if_dev, "Tx MAC parity error\n"); /* Clear IRQ. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_CLI_TX_PE); } if ((status & Y2_IS_PAR_RX1) != 0) { device_printf(sc_if->msk_if_dev, "Rx parity error\n"); /* Clear IRQ. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_CLR_IRQ_PAR); } if ((status & (Y2_IS_TCP_TXS1 | Y2_IS_TCP_TXA1)) != 0) { device_printf(sc_if->msk_if_dev, "TCP segmentation error\n"); /* Clear IRQ. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_CLR_IRQ_TCP); } } static void msk_intr_hwerr(struct msk_softc *sc) { uint32_t status; uint32_t tlphead[4]; status = CSR_READ_4(sc, B0_HWE_ISRC); /* Time Stamp timer overflow. */ if ((status & Y2_IS_TIST_OV) != 0) CSR_WRITE_1(sc, GMAC_TI_ST_CTRL, GMT_ST_CLR_IRQ); if ((status & Y2_IS_PCI_NEXP) != 0) { /* * PCI Express Error occurred which is not described in PEX * spec. * This error is also mapped either to Master Abort( * Y2_IS_MST_ERR) or Target Abort (Y2_IS_IRQ_STAT) bit and * can only be cleared there. */ device_printf(sc->msk_dev, "PCI Express protocol violation error\n"); } if ((status & (Y2_IS_MST_ERR | Y2_IS_IRQ_STAT)) != 0) { uint16_t v16; if ((status & Y2_IS_MST_ERR) != 0) device_printf(sc->msk_dev, "unexpected IRQ Status error\n"); else device_printf(sc->msk_dev, "unexpected IRQ Master error\n"); /* Reset all bits in the PCI status register. */ v16 = pci_read_config(sc->msk_dev, PCIR_STATUS, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); pci_write_config(sc->msk_dev, PCIR_STATUS, v16 | PCIM_STATUS_PERR | PCIM_STATUS_SERR | PCIM_STATUS_RMABORT | PCIM_STATUS_RTABORT | PCIM_STATUS_MDPERR, 2); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } /* Check for PCI Express Uncorrectable Error. */ if ((status & Y2_IS_PCI_EXP) != 0) { uint32_t v32; /* * On PCI Express bus bridges are called root complexes (RC). * PCI Express errors are recognized by the root complex too, * which requests the system to handle the problem. After * error occurrence it may be that no access to the adapter * may be performed any longer. */ v32 = CSR_PCI_READ_4(sc, PEX_UNC_ERR_STAT); if ((v32 & PEX_UNSUP_REQ) != 0) { /* Ignore unsupported request error. */ device_printf(sc->msk_dev, "Uncorrectable PCI Express error\n"); } if ((v32 & (PEX_FATAL_ERRORS | PEX_POIS_TLP)) != 0) { int i; /* Get TLP header form Log Registers. */ for (i = 0; i < 4; i++) tlphead[i] = CSR_PCI_READ_4(sc, PEX_HEADER_LOG + i * 4); /* Check for vendor defined broadcast message. */ if (!(tlphead[0] == 0x73004001 && tlphead[1] == 0x7f)) { sc->msk_intrhwemask &= ~Y2_IS_PCI_EXP; CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask); CSR_READ_4(sc, B0_HWE_IMSK); } } /* Clear the interrupt. */ CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_ON); CSR_PCI_WRITE_4(sc, PEX_UNC_ERR_STAT, 0xffffffff); CSR_WRITE_1(sc, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } if ((status & Y2_HWE_L1_MASK) != 0 && sc->msk_if[MSK_PORT_A] != NULL) msk_handle_hwerr(sc->msk_if[MSK_PORT_A], status); if ((status & Y2_HWE_L2_MASK) != 0 && sc->msk_if[MSK_PORT_B] != NULL) msk_handle_hwerr(sc->msk_if[MSK_PORT_B], status >> 8); } static __inline void msk_rxput(struct msk_if_softc *sc_if) { struct msk_softc *sc; sc = sc_if->msk_softc; if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) bus_dmamap_sync( sc_if->msk_cdata.msk_jumbo_rx_ring_tag, sc_if->msk_cdata.msk_jumbo_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); else bus_dmamap_sync( sc_if->msk_cdata.msk_rx_ring_tag, sc_if->msk_cdata.msk_rx_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_2(sc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod); } static int msk_handle_events(struct msk_softc *sc) { struct msk_if_softc *sc_if; int rxput[2]; struct msk_stat_desc *sd; uint32_t control, status; int cons, len, port, rxprog; if (sc->msk_stat_cons == CSR_READ_2(sc, STAT_PUT_IDX)) return (0); /* Sync status LEs. */ bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rxput[MSK_PORT_A] = rxput[MSK_PORT_B] = 0; rxprog = 0; cons = sc->msk_stat_cons; for (;;) { sd = &sc->msk_stat_ring[cons]; control = le32toh(sd->msk_control); if ((control & HW_OWNER) == 0) break; control &= ~HW_OWNER; sd->msk_control = htole32(control); status = le32toh(sd->msk_status); len = control & STLE_LEN_MASK; port = (control >> 16) & 0x01; sc_if = sc->msk_if[port]; if (sc_if == NULL) { device_printf(sc->msk_dev, "invalid port opcode " "0x%08x\n", control & STLE_OP_MASK); continue; } switch (control & STLE_OP_MASK) { case OP_RXVLAN: sc_if->msk_vtag = ntohs(len); break; case OP_RXCHKSVLAN: sc_if->msk_vtag = ntohs(len); /* FALLTHROUGH */ case OP_RXCHKS: sc_if->msk_csum = status; break; case OP_RXSTAT: if (!(sc_if->msk_ifp->if_drv_flags & IFF_DRV_RUNNING)) break; if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) msk_jumbo_rxeof(sc_if, status, control, len); else msk_rxeof(sc_if, status, control, len); rxprog++; /* * Because there is no way to sync single Rx LE * put the DMA sync operation off until the end of * event processing. */ rxput[port]++; /* Update prefetch unit if we've passed water mark. */ if (rxput[port] >= sc_if->msk_cdata.msk_rx_putwm) { msk_rxput(sc_if); rxput[port] = 0; } break; case OP_TXINDEXLE: if (sc->msk_if[MSK_PORT_A] != NULL) msk_txeof(sc->msk_if[MSK_PORT_A], status & STLE_TXA1_MSKL); if (sc->msk_if[MSK_PORT_B] != NULL) msk_txeof(sc->msk_if[MSK_PORT_B], ((status & STLE_TXA2_MSKL) >> STLE_TXA2_SHIFTL) | ((len & STLE_TXA2_MSKH) << STLE_TXA2_SHIFTH)); break; default: device_printf(sc->msk_dev, "unhandled opcode 0x%08x\n", control & STLE_OP_MASK); break; } MSK_INC(cons, sc->msk_stat_count); if (rxprog > sc->msk_process_limit) break; } sc->msk_stat_cons = cons; bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (rxput[MSK_PORT_A] > 0) msk_rxput(sc->msk_if[MSK_PORT_A]); if (rxput[MSK_PORT_B] > 0) msk_rxput(sc->msk_if[MSK_PORT_B]); return (sc->msk_stat_cons != CSR_READ_2(sc, STAT_PUT_IDX)); } static void msk_intr(void *xsc) { struct msk_softc *sc; struct msk_if_softc *sc_if0, *sc_if1; struct ifnet *ifp0, *ifp1; uint32_t status; int domore; sc = xsc; MSK_LOCK(sc); /* Reading B0_Y2_SP_ISRC2 masks further interrupts. */ status = CSR_READ_4(sc, B0_Y2_SP_ISRC2); if (status == 0 || status == 0xffffffff || (sc->msk_pflags & MSK_FLAG_SUSPEND) != 0 || (status & sc->msk_intrmask) == 0) { CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2); MSK_UNLOCK(sc); return; } sc_if0 = sc->msk_if[MSK_PORT_A]; sc_if1 = sc->msk_if[MSK_PORT_B]; ifp0 = ifp1 = NULL; if (sc_if0 != NULL) ifp0 = sc_if0->msk_ifp; if (sc_if1 != NULL) ifp1 = sc_if1->msk_ifp; if ((status & Y2_IS_IRQ_PHY1) != 0 && sc_if0 != NULL) msk_intr_phy(sc_if0); if ((status & Y2_IS_IRQ_PHY2) != 0 && sc_if1 != NULL) msk_intr_phy(sc_if1); if ((status & Y2_IS_IRQ_MAC1) != 0 && sc_if0 != NULL) msk_intr_gmac(sc_if0); if ((status & Y2_IS_IRQ_MAC2) != 0 && sc_if1 != NULL) msk_intr_gmac(sc_if1); if ((status & (Y2_IS_CHK_RX1 | Y2_IS_CHK_RX2)) != 0) { device_printf(sc->msk_dev, "Rx descriptor error\n"); sc->msk_intrmask &= ~(Y2_IS_CHK_RX1 | Y2_IS_CHK_RX2); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); } if ((status & (Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXA2)) != 0) { device_printf(sc->msk_dev, "Tx descriptor error\n"); sc->msk_intrmask &= ~(Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXA2); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); } if ((status & Y2_IS_HW_ERR) != 0) msk_intr_hwerr(sc); domore = msk_handle_events(sc); if ((status & Y2_IS_STAT_BMU) != 0 && domore == 0) CSR_WRITE_4(sc, STAT_CTRL, SC_STAT_CLR_IRQ); /* Reenable interrupts. */ CSR_WRITE_4(sc, B0_Y2_SP_ICR, 2); if (ifp0 != NULL && (ifp0->if_drv_flags & IFF_DRV_RUNNING) != 0 && !IFQ_DRV_IS_EMPTY(&ifp0->if_snd)) msk_start_locked(ifp0); if (ifp1 != NULL && (ifp1->if_drv_flags & IFF_DRV_RUNNING) != 0 && !IFQ_DRV_IS_EMPTY(&ifp1->if_snd)) msk_start_locked(ifp1); MSK_UNLOCK(sc); } static void msk_set_tx_stfwd(struct msk_if_softc *sc_if) { struct msk_softc *sc; struct ifnet *ifp; ifp = sc_if->msk_ifp; sc = sc_if->msk_softc; if ((sc->msk_hw_id == CHIP_ID_YUKON_EX && sc->msk_hw_rev != CHIP_REV_YU_EX_A0) || sc->msk_hw_id >= CHIP_ID_YUKON_SUPR) { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_STFW_ENA); } else { if (ifp->if_mtu > ETHERMTU) { /* Set Tx GMAC FIFO Almost Empty Threshold. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_AE_THR), MSK_ECU_JUMBO_WM << 16 | MSK_ECU_AE_THR); /* Disable Store & Forward mode for Tx. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_STFW_DIS); } else { CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), TX_STFW_ENA); } } } static void msk_init(void *xsc) { struct msk_if_softc *sc_if = xsc; MSK_IF_LOCK(sc_if); msk_init_locked(sc_if); MSK_IF_UNLOCK(sc_if); } static void msk_init_locked(struct msk_if_softc *sc_if) { struct msk_softc *sc; struct ifnet *ifp; struct mii_data *mii; uint8_t *eaddr; uint16_t gmac; uint32_t reg; int error; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; sc = sc_if->msk_softc; mii = device_get_softc(sc_if->msk_miibus); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; error = 0; /* Cancel pending I/O and free all Rx/Tx buffers. */ msk_stop(sc_if); if (ifp->if_mtu < ETHERMTU) sc_if->msk_framesize = ETHERMTU; else sc_if->msk_framesize = ifp->if_mtu; sc_if->msk_framesize += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; if (ifp->if_mtu > ETHERMTU && (sc_if->msk_flags & MSK_FLAG_JUMBO_NOCSUM) != 0) { ifp->if_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO); ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); } /* GMAC Control reset. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_SET); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_RST_CLR); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_F_LOOPB_OFF); if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_BYP_MACSECRX_ON | GMC_BYP_MACSECTX_ON | GMC_BYP_RETR_ON); /* * Initialize GMAC first such that speed/duplex/flow-control * parameters are renegotiated when interface is brought up. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, 0); /* Dummy read the Interrupt Source Register. */ CSR_READ_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_SRC)); /* Clear MIB stats. */ msk_stats_clear(sc_if); /* Disable FCS. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_CTRL, GM_RXCR_CRC_DIS); /* Setup Transmit Control Register. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_CTRL, TX_COL_THR(TX_COL_DEF)); /* Setup Transmit Flow Control Register. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_FLOW_CTRL, 0xffff); /* Setup Transmit Parameter Register. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_PARAM, TX_JAM_LEN_VAL(TX_JAM_LEN_DEF) | TX_JAM_IPG_VAL(TX_JAM_IPG_DEF) | TX_IPG_JAM_DATA(TX_IPG_JAM_DEF) | TX_BACK_OFF_LIM(TX_BOF_LIM_DEF)); gmac = DATA_BLIND_VAL(DATA_BLIND_DEF) | GM_SMOD_VLAN_ENA | IPG_DATA_VAL(IPG_DATA_DEF); if (ifp->if_mtu > ETHERMTU) gmac |= GM_SMOD_JUMBO_ENA; GMAC_WRITE_2(sc, sc_if->msk_port, GM_SERIAL_MODE, gmac); /* Set station address. */ eaddr = IF_LLADDR(ifp); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1L, eaddr[0] | (eaddr[1] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1M, eaddr[2] | (eaddr[3] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_1H, eaddr[4] | (eaddr[5] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2L, eaddr[0] | (eaddr[1] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2M, eaddr[2] | (eaddr[3] << 8)); GMAC_WRITE_2(sc, sc_if->msk_port, GM_SRC_ADDR_2H, eaddr[4] | (eaddr[5] << 8)); /* Disable interrupts for counter overflows. */ GMAC_WRITE_2(sc, sc_if->msk_port, GM_TX_IRQ_MSK, 0); GMAC_WRITE_2(sc, sc_if->msk_port, GM_RX_IRQ_MSK, 0); GMAC_WRITE_2(sc, sc_if->msk_port, GM_TR_IRQ_MSK, 0); /* Configure Rx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_SET); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_CLR); reg = GMF_OPER_ON | GMF_RX_F_FL_ON; if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P || sc->msk_hw_id == CHIP_ID_YUKON_EX) reg |= GMF_RX_OVER_ON; CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), reg); /* Set receive filter. */ msk_rxfilter(sc_if); if (sc->msk_hw_id == CHIP_ID_YUKON_XL) { /* Clear flush mask - HW bug. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_MSK), 0); } else { /* Flush Rx MAC FIFO on any flow control or error. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_MSK), GMR_FS_ANY_ERR); } /* * Set Rx FIFO flush threshold to 64 bytes + 1 FIFO word * due to hardware hang on receipt of pause frames. */ reg = RX_GMF_FL_THR_DEF + 1; /* Another magic for Yukon FE+ - From Linux. */ if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P && sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) reg = 0x178; CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_FL_THR), reg); /* Configure Tx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_SET); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_CLR); CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_OPER_ON); /* Configure hardware VLAN tag insertion/stripping. */ msk_setvlan(sc_if, ifp); if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) { /* Set Rx Pause threshold. */ CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_LP_THR), MSK_ECU_LLPP); CSR_WRITE_2(sc, MR_ADDR(sc_if->msk_port, RX_GMF_UP_THR), MSK_ECU_ULPP); /* Configure store-and-forward for Tx. */ msk_set_tx_stfwd(sc_if); } if (sc->msk_hw_id == CHIP_ID_YUKON_FE_P && sc->msk_hw_rev == CHIP_REV_YU_FE_P_A0) { /* Disable dynamic watermark - from Linux. */ reg = CSR_READ_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA)); reg &= ~0x03; CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_EA), reg); } /* * Disable Force Sync bit and Alloc bit in Tx RAM interface * arbiter as we don't use Sync Tx queue. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_DIS_FSYNC | TXA_DIS_ALLOC | TXA_STOP_RC); /* Enable the RAM Interface Arbiter. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_ENA_ARB); /* Setup RAM buffer. */ msk_set_rambuffer(sc_if); /* Disable Tx sync Queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txsq, RB_CTRL), RB_RST_SET); /* Setup Tx Queue Bus Memory Interface. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_CLR_RESET); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_OPER_INIT); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_FIFO_OP_ON); CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_txq, Q_WM), MSK_BMU_TX_WM); switch (sc->msk_hw_id) { case CHIP_ID_YUKON_EC_U: if (sc->msk_hw_rev == CHIP_REV_YU_EC_U_A0) { /* Fix for Yukon-EC Ultra: set BMU FIFO level */ CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_txq, Q_AL), MSK_ECU_TXFF_LEV); } break; case CHIP_ID_YUKON_EX: /* * Yukon Extreme seems to have silicon bug for * automatic Tx checksum calculation capability. */ if (sc->msk_hw_rev == CHIP_REV_YU_EX_B0) CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_F), F_TX_CHK_AUTO_OFF); break; } /* Setup Rx Queue Bus Memory Interface. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_CLR_RESET); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_OPER_INIT); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_FIFO_OP_ON); CSR_WRITE_2(sc, Q_ADDR(sc_if->msk_rxq, Q_WM), MSK_BMU_RX_WM); if (sc->msk_hw_id == CHIP_ID_YUKON_EC_U && sc->msk_hw_rev >= CHIP_REV_YU_EC_U_A1) { /* MAC Rx RAM Read is controlled by hardware. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_F), F_M_RX_RAM_DIS); } msk_set_prefetch(sc, sc_if->msk_txq, sc_if->msk_rdata.msk_tx_ring_paddr, MSK_TX_RING_CNT - 1); msk_init_tx_ring(sc_if); /* Disable Rx checksum offload and RSS hash. */ reg = BMU_DIS_RX_RSS_HASH; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && (ifp->if_capenable & IFCAP_RXCSUM) != 0) reg |= BMU_ENA_RX_CHKSUM; else reg |= BMU_DIS_RX_CHKSUM; CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), reg); if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) { msk_set_prefetch(sc, sc_if->msk_rxq, sc_if->msk_rdata.msk_jumbo_rx_ring_paddr, MSK_JUMBO_RX_RING_CNT - 1); error = msk_init_jumbo_rx_ring(sc_if); } else { msk_set_prefetch(sc, sc_if->msk_rxq, sc_if->msk_rdata.msk_rx_ring_paddr, MSK_RX_RING_CNT - 1); error = msk_init_rx_ring(sc_if); } if (error != 0) { device_printf(sc_if->msk_if_dev, "initialization failed: no memory for Rx buffers\n"); msk_stop(sc_if); return; } if (sc->msk_hw_id == CHIP_ID_YUKON_EX || sc->msk_hw_id == CHIP_ID_YUKON_SUPR) { /* Disable flushing of non-ASF packets. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RX_MACSEC_FLUSH_OFF); } /* Configure interrupt handling. */ if (sc_if->msk_port == MSK_PORT_A) { sc->msk_intrmask |= Y2_IS_PORT_A; sc->msk_intrhwemask |= Y2_HWE_L1_MASK; } else { sc->msk_intrmask |= Y2_IS_PORT_B; sc->msk_intrhwemask |= Y2_HWE_L2_MASK; } /* Configure IRQ moderation mask. */ CSR_WRITE_4(sc, B2_IRQM_MSK, sc->msk_intrmask); if (sc->msk_int_holdoff > 0) { /* Configure initial IRQ moderation timer value. */ CSR_WRITE_4(sc, B2_IRQM_INI, MSK_USECS(sc, sc->msk_int_holdoff)); CSR_WRITE_4(sc, B2_IRQM_VAL, MSK_USECS(sc, sc->msk_int_holdoff)); /* Start IRQ moderation. */ CSR_WRITE_1(sc, B2_IRQM_CTRL, TIM_START); } CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask); CSR_READ_4(sc, B0_HWE_IMSK); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc_if->msk_flags &= ~MSK_FLAG_LINK; mii_mediachg(mii); callout_reset(&sc_if->msk_tick_ch, hz, msk_tick, sc_if); } static void msk_set_rambuffer(struct msk_if_softc *sc_if) { struct msk_softc *sc; int ltpp, utpp; sc = sc_if->msk_softc; if ((sc_if->msk_flags & MSK_FLAG_RAMBUF) == 0) return; /* Setup Rx Queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_CLR); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_START), sc->msk_rxqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_END), sc->msk_rxqend[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_WP), sc->msk_rxqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RP), sc->msk_rxqstart[sc_if->msk_port] / 8); utpp = (sc->msk_rxqend[sc_if->msk_port] + 1 - sc->msk_rxqstart[sc_if->msk_port] - MSK_RB_ULPP) / 8; ltpp = (sc->msk_rxqend[sc_if->msk_port] + 1 - sc->msk_rxqstart[sc_if->msk_port] - MSK_RB_LLPP_B) / 8; if (sc->msk_rxqsize < MSK_MIN_RXQ_SIZE) ltpp += (MSK_RB_LLPP_B - MSK_RB_LLPP_S) / 8; CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RX_UTPP), utpp); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_rxq, RB_RX_LTPP), ltpp); /* Set Rx priority(RB_RX_UTHP/RB_RX_LTHP) thresholds? */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_ENA_OP_MD); CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL)); /* Setup Tx Queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_CLR); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_START), sc->msk_txqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_END), sc->msk_txqend[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_WP), sc->msk_txqstart[sc_if->msk_port] / 8); CSR_WRITE_4(sc, RB_ADDR(sc_if->msk_txq, RB_RP), sc->msk_txqstart[sc_if->msk_port] / 8); /* Enable Store & Forward for Tx side. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_ENA_STFWD); CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_ENA_OP_MD); CSR_READ_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL)); } static void msk_set_prefetch(struct msk_softc *sc, int qaddr, bus_addr_t addr, uint32_t count) { /* Reset the prefetch unit. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_CLR); /* Set LE base address. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_ADDR_LOW_REG), MSK_ADDR_LO(addr)); CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_ADDR_HI_REG), MSK_ADDR_HI(addr)); /* Set the list last index. */ CSR_WRITE_2(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_LAST_IDX_REG), count); /* Turn on prefetch unit. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG), PREF_UNIT_OP_ON); /* Dummy read to ensure write. */ CSR_READ_4(sc, Y2_PREF_Q_ADDR(qaddr, PREF_UNIT_CTRL_REG)); } static void msk_stop(struct msk_if_softc *sc_if) { struct msk_softc *sc; struct msk_txdesc *txd; struct msk_rxdesc *rxd; struct msk_rxdesc *jrxd; struct ifnet *ifp; uint32_t val; int i; MSK_IF_LOCK_ASSERT(sc_if); sc = sc_if->msk_softc; ifp = sc_if->msk_ifp; callout_stop(&sc_if->msk_tick_ch); sc_if->msk_watchdog_timer = 0; /* Disable interrupts. */ if (sc_if->msk_port == MSK_PORT_A) { sc->msk_intrmask &= ~Y2_IS_PORT_A; sc->msk_intrhwemask &= ~Y2_HWE_L1_MASK; } else { sc->msk_intrmask &= ~Y2_IS_PORT_B; sc->msk_intrhwemask &= ~Y2_HWE_L2_MASK; } CSR_WRITE_4(sc, B0_HWE_IMSK, sc->msk_intrhwemask); CSR_READ_4(sc, B0_HWE_IMSK); CSR_WRITE_4(sc, B0_IMSK, sc->msk_intrmask); CSR_READ_4(sc, B0_IMSK); /* Disable Tx/Rx MAC. */ val = GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); val &= ~(GM_GPCR_RX_ENA | GM_GPCR_TX_ENA); GMAC_WRITE_2(sc, sc_if->msk_port, GM_GP_CTRL, val); /* Read again to ensure writing. */ GMAC_READ_2(sc, sc_if->msk_port, GM_GP_CTRL); /* Update stats and clear counters. */ msk_stats_update(sc_if); /* Stop Tx BMU. */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_STOP); val = CSR_READ_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR)); for (i = 0; i < MSK_TIMEOUT; i++) { if ((val & (BMU_STOP | BMU_IDLE)) == 0) { CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_STOP); val = CSR_READ_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR)); } else break; DELAY(1); } if (i == MSK_TIMEOUT) device_printf(sc_if->msk_if_dev, "Tx BMU stop failed\n"); CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_SET | RB_DIS_OP_MD); /* Disable all GMAC interrupt. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, GMAC_IRQ_MSK), 0); /* Disable PHY interrupt. */ msk_phy_writereg(sc_if, PHY_ADDR_MARV, PHY_MARV_INT_MASK, 0); /* Disable the RAM Interface Arbiter. */ CSR_WRITE_1(sc, MR_ADDR(sc_if->msk_port, TXA_CTRL), TXA_DIS_ARB); /* Reset the PCI FIFO of the async Tx queue */ CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_txq, Q_CSR), BMU_RST_SET | BMU_FIFO_RST); /* Reset the Tx prefetch units. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(sc_if->msk_txq, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); /* Reset the RAM Buffer async Tx queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_txq, RB_CTRL), RB_RST_SET); /* Reset Tx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, TX_GMF_CTRL_T), GMF_RST_SET); /* Set Pause Off. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, GMAC_CTRL), GMC_PAUSE_OFF); /* * The Rx Stop command will not work for Yukon-2 if the BMU does not * reach the end of packet and since we can't make sure that we have * incoming data, we must reset the BMU while it is not during a DMA * transfer. Since it is possible that the Rx path is still active, * the Rx RAM buffer will be stopped first, so any possible incoming * data will not trigger a DMA. After the RAM buffer is stopped, the * BMU is polled until any DMA in progress is ended and only then it * will be reset. */ /* Disable the RAM Buffer receive queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_DIS_OP_MD); for (i = 0; i < MSK_TIMEOUT; i++) { if (CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, Q_RSL)) == CSR_READ_1(sc, RB_ADDR(sc_if->msk_rxq, Q_RL))) break; DELAY(1); } if (i == MSK_TIMEOUT) device_printf(sc_if->msk_if_dev, "Rx BMU stop failed\n"); CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), BMU_RST_SET | BMU_FIFO_RST); /* Reset the Rx prefetch unit. */ CSR_WRITE_4(sc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_CTRL_REG), PREF_UNIT_RST_SET); /* Reset the RAM Buffer receive queue. */ CSR_WRITE_1(sc, RB_ADDR(sc_if->msk_rxq, RB_CTRL), RB_RST_SET); /* Reset Rx MAC FIFO. */ CSR_WRITE_4(sc, MR_ADDR(sc_if->msk_port, RX_GMF_CTRL_T), GMF_RST_SET); /* Free Rx and Tx mbufs still in the queues. */ for (i = 0; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[i]; if (rxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap); m_freem(rxd->rx_m); rxd->rx_m = NULL; } } for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[i]; if (jrxd->rx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag, jrxd->rx_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag, jrxd->rx_dmamap); m_freem(jrxd->rx_m); jrxd->rx_m = NULL; } } for (i = 0; i < MSK_TX_RING_CNT; i++) { txd = &sc_if->msk_cdata.msk_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc_if->msk_cdata.msk_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } /* * Mark the interface down. */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc_if->msk_flags &= ~MSK_FLAG_LINK; } /* * When GM_PAR_MIB_CLR bit of GM_PHY_ADDR is set, reading lower * counter clears high 16 bits of the counter such that accessing * lower 16 bits should be the last operation. */ #define MSK_READ_MIB32(x, y) \ (((uint32_t)GMAC_READ_2(sc, x, (y) + 4)) << 16) + \ (uint32_t)GMAC_READ_2(sc, x, y) #define MSK_READ_MIB64(x, y) \ (((uint64_t)MSK_READ_MIB32(x, (y) + 8)) << 32) + \ (uint64_t)MSK_READ_MIB32(x, y) static void msk_stats_clear(struct msk_if_softc *sc_if) { struct msk_softc *sc; uint32_t reg; uint16_t gmac; int i; MSK_IF_LOCK_ASSERT(sc_if); sc = sc_if->msk_softc; /* Set MIB Clear Counter Mode. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_PHY_ADDR); GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac | GM_PAR_MIB_CLR); /* Read all MIB Counters with Clear Mode set. */ for (i = GM_RXF_UC_OK; i <= GM_TXE_FIFO_UR; i += sizeof(uint32_t)) reg = MSK_READ_MIB32(sc_if->msk_port, i); /* Clear MIB Clear Counter Mode. */ gmac &= ~GM_PAR_MIB_CLR; GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac); } static void msk_stats_update(struct msk_if_softc *sc_if) { struct msk_softc *sc; struct ifnet *ifp; struct msk_hw_stats *stats; uint16_t gmac; uint32_t reg; MSK_IF_LOCK_ASSERT(sc_if); ifp = sc_if->msk_ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; sc = sc_if->msk_softc; stats = &sc_if->msk_stats; /* Set MIB Clear Counter Mode. */ gmac = GMAC_READ_2(sc, sc_if->msk_port, GM_PHY_ADDR); GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac | GM_PAR_MIB_CLR); /* Rx stats. */ stats->rx_ucast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_UC_OK); stats->rx_bcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_BC_OK); stats->rx_pause_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MPAUSE); stats->rx_mcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MC_OK); stats->rx_crc_errs += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_FCS_ERR); reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE1); stats->rx_good_octets += MSK_READ_MIB64(sc_if->msk_port, GM_RXO_OK_LO); stats->rx_bad_octets += MSK_READ_MIB64(sc_if->msk_port, GM_RXO_ERR_LO); stats->rx_runts += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SHT); stats->rx_runt_errs += MSK_READ_MIB32(sc_if->msk_port, GM_RXE_FRAG); stats->rx_pkts_64 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_64B); stats->rx_pkts_65_127 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_127B); stats->rx_pkts_128_255 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_255B); stats->rx_pkts_256_511 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_511B); stats->rx_pkts_512_1023 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_1023B); stats->rx_pkts_1024_1518 += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_1518B); stats->rx_pkts_1519_max += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_MAX_SZ); stats->rx_pkts_too_long += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_LNG_ERR); stats->rx_pkts_jabbers += MSK_READ_MIB32(sc_if->msk_port, GM_RXF_JAB_PKT); reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE2); stats->rx_fifo_oflows += MSK_READ_MIB32(sc_if->msk_port, GM_RXE_FIFO_OV); reg = MSK_READ_MIB32(sc_if->msk_port, GM_RXF_SPARE3); /* Tx stats. */ stats->tx_ucast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_UC_OK); stats->tx_bcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_BC_OK); stats->tx_pause_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MPAUSE); stats->tx_mcast_frames += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MC_OK); stats->tx_octets += MSK_READ_MIB64(sc_if->msk_port, GM_TXO_OK_LO); stats->tx_pkts_64 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_64B); stats->tx_pkts_65_127 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_127B); stats->tx_pkts_128_255 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_255B); stats->tx_pkts_256_511 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_511B); stats->tx_pkts_512_1023 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_1023B); stats->tx_pkts_1024_1518 += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_1518B); stats->tx_pkts_1519_max += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MAX_SZ); reg = MSK_READ_MIB32(sc_if->msk_port, GM_TXF_SPARE1); stats->tx_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_COL); stats->tx_late_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_LAT_COL); stats->tx_excess_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_ABO_COL); stats->tx_multi_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_MUL_COL); stats->tx_single_colls += MSK_READ_MIB32(sc_if->msk_port, GM_TXF_SNG_COL); stats->tx_underflows += MSK_READ_MIB32(sc_if->msk_port, GM_TXE_FIFO_UR); /* Clear MIB Clear Counter Mode. */ gmac &= ~GM_PAR_MIB_CLR; GMAC_WRITE_2(sc, sc_if->msk_port, GM_PHY_ADDR, gmac); } static int msk_sysctl_stat32(SYSCTL_HANDLER_ARGS) { struct msk_softc *sc; struct msk_if_softc *sc_if; uint32_t result, *stat; int off; sc_if = (struct msk_if_softc *)arg1; sc = sc_if->msk_softc; off = arg2; stat = (uint32_t *)((uint8_t *)&sc_if->msk_stats + off); MSK_IF_LOCK(sc_if); result = MSK_READ_MIB32(sc_if->msk_port, GM_MIB_CNT_BASE + off * 2); result += *stat; MSK_IF_UNLOCK(sc_if); return (sysctl_handle_int(oidp, &result, 0, req)); } static int msk_sysctl_stat64(SYSCTL_HANDLER_ARGS) { struct msk_softc *sc; struct msk_if_softc *sc_if; uint64_t result, *stat; int off; sc_if = (struct msk_if_softc *)arg1; sc = sc_if->msk_softc; off = arg2; stat = (uint64_t *)((uint8_t *)&sc_if->msk_stats + off); MSK_IF_LOCK(sc_if); result = MSK_READ_MIB64(sc_if->msk_port, GM_MIB_CNT_BASE + off * 2); result += *stat; MSK_IF_UNLOCK(sc_if); return (sysctl_handle_64(oidp, &result, 0, req)); } #undef MSK_READ_MIB32 #undef MSK_READ_MIB64 #define MSK_SYSCTL_STAT32(sc, c, o, p, n, d) \ SYSCTL_ADD_PROC(c, p, OID_AUTO, o, CTLTYPE_UINT | CTLFLAG_RD, \ sc, offsetof(struct msk_hw_stats, n), msk_sysctl_stat32, \ "IU", d) #define MSK_SYSCTL_STAT64(sc, c, o, p, n, d) \ SYSCTL_ADD_PROC(c, p, OID_AUTO, o, CTLTYPE_U64 | CTLFLAG_RD, \ sc, offsetof(struct msk_hw_stats, n), msk_sysctl_stat64, \ "QU", d) static void msk_sysctl_node(struct msk_if_softc *sc_if) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child, *schild; struct sysctl_oid *tree; ctx = device_get_sysctl_ctx(sc_if->msk_if_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc_if->msk_if_dev)); tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, NULL, "MSK Statistics"); schild = SYSCTL_CHILDREN(tree); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "rx", CTLFLAG_RD, NULL, "MSK RX Statistics"); child = SYSCTL_CHILDREN(tree); MSK_SYSCTL_STAT32(sc_if, ctx, "ucast_frames", child, rx_ucast_frames, "Good unicast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "bcast_frames", child, rx_bcast_frames, "Good broadcast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "pause_frames", child, rx_pause_frames, "Pause frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "mcast_frames", child, rx_mcast_frames, "Multicast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "crc_errs", child, rx_crc_errs, "CRC errors"); MSK_SYSCTL_STAT64(sc_if, ctx, "good_octets", child, rx_good_octets, "Good octets"); MSK_SYSCTL_STAT64(sc_if, ctx, "bad_octets", child, rx_bad_octets, "Bad octets"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_64", child, rx_pkts_64, "64 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_65_127", child, rx_pkts_65_127, "65 to 127 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_128_255", child, rx_pkts_128_255, "128 to 255 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_256_511", child, rx_pkts_256_511, "256 to 511 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_512_1023", child, rx_pkts_512_1023, "512 to 1023 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1024_1518", child, rx_pkts_1024_1518, "1024 to 1518 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1519_max", child, rx_pkts_1519_max, "1519 to max frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_too_long", child, rx_pkts_too_long, "frames too long"); MSK_SYSCTL_STAT32(sc_if, ctx, "jabbers", child, rx_pkts_jabbers, "Jabber errors"); MSK_SYSCTL_STAT32(sc_if, ctx, "overflows", child, rx_fifo_oflows, "FIFO overflows"); tree = SYSCTL_ADD_NODE(ctx, schild, OID_AUTO, "tx", CTLFLAG_RD, NULL, "MSK TX Statistics"); child = SYSCTL_CHILDREN(tree); MSK_SYSCTL_STAT32(sc_if, ctx, "ucast_frames", child, tx_ucast_frames, "Unicast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "bcast_frames", child, tx_bcast_frames, "Broadcast frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "pause_frames", child, tx_pause_frames, "Pause frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "mcast_frames", child, tx_mcast_frames, "Multicast frames"); MSK_SYSCTL_STAT64(sc_if, ctx, "octets", child, tx_octets, "Octets"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_64", child, tx_pkts_64, "64 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_65_127", child, tx_pkts_65_127, "65 to 127 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_128_255", child, tx_pkts_128_255, "128 to 255 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_256_511", child, tx_pkts_256_511, "256 to 511 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_512_1023", child, tx_pkts_512_1023, "512 to 1023 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1024_1518", child, tx_pkts_1024_1518, "1024 to 1518 bytes frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "frames_1519_max", child, tx_pkts_1519_max, "1519 to max frames"); MSK_SYSCTL_STAT32(sc_if, ctx, "colls", child, tx_colls, "Collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "late_colls", child, tx_late_colls, "Late collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "excess_colls", child, tx_excess_colls, "Excessive collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "multi_colls", child, tx_multi_colls, "Multiple collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "single_colls", child, tx_single_colls, "Single collisions"); MSK_SYSCTL_STAT32(sc_if, ctx, "underflows", child, tx_underflows, "FIFO underflows"); } #undef MSK_SYSCTL_STAT32 #undef MSK_SYSCTL_STAT64 static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; if (!arg1) return (EINVAL); value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || !req->newptr) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } static int sysctl_hw_msk_proc_limit(SYSCTL_HANDLER_ARGS) { return (sysctl_int_range(oidp, arg1, arg2, req, MSK_PROC_MIN, MSK_PROC_MAX)); } Index: projects/clang1000-import/sys/dev/netmap/netmap_mem2.c =================================================================== --- projects/clang1000-import/sys/dev/netmap/netmap_mem2.c (revision 357178) +++ projects/clang1000-import/sys/dev/netmap/netmap_mem2.c (revision 357179) @@ -1,2857 +1,2858 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2012-2014 Matteo Landi * Copyright (C) 2012-2016 Luigi Rizzo * Copyright (C) 2012-2016 Giuseppe Lettieri * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef linux #include "bsd_glue.h" #endif /* linux */ #ifdef __APPLE__ #include "osx_glue.h" #endif /* __APPLE__ */ #ifdef __FreeBSD__ #include /* prerequisite */ __FBSDID("$FreeBSD$"); #include #include #include /* MALLOC_DEFINE */ #include #include /* vtophys */ #include /* vtophys */ #include /* sockaddrs */ #include #include #include #include #include #include /* bus_dmamap_* */ /* M_NETMAP only used in here */ MALLOC_DECLARE(M_NETMAP); MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); #endif /* __FreeBSD__ */ #ifdef _WIN32 #include #endif #include #include #include #include "netmap_mem2.h" #ifdef _WIN32_USE_SMALL_GENERIC_DEVICES_MEMORY #define NETMAP_BUF_MAX_NUM 8*4096 /* if too big takes too much time to allocate */ #else #define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */ #endif #define NETMAP_POOL_MAX_NAMSZ 32 enum { NETMAP_IF_POOL = 0, NETMAP_RING_POOL, NETMAP_BUF_POOL, NETMAP_POOLS_NR }; struct netmap_obj_params { u_int size; u_int num; u_int last_size; u_int last_num; }; struct netmap_obj_pool { char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */ /* ---------------------------------------------------*/ /* these are only meaningful if the pool is finalized */ /* (see 'finalized' field in netmap_mem_d) */ size_t memtotal; /* actual total memory space */ struct lut_entry *lut; /* virt,phys addresses, objtotal entries */ uint32_t *bitmap; /* one bit per buffer, 1 means free */ uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */ uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ u_int objtotal; /* actual total number of objects. */ u_int numclusters; /* actual number of clusters */ u_int objfree; /* number of free objects. */ int alloc_done; /* we have allocated the memory */ /* ---------------------------------------------------*/ /* limits */ u_int objminsize; /* minimum object size */ u_int objmaxsize; /* maximum object size */ u_int nummin; /* minimum number of objects */ u_int nummax; /* maximum number of objects */ /* these are changed only by config */ u_int _objtotal; /* total number of objects */ u_int _objsize; /* object size */ u_int _clustsize; /* cluster size */ u_int _clustentries; /* objects per cluster */ u_int _numclusters; /* number of clusters */ /* requested values */ u_int r_objtotal; u_int r_objsize; }; #define NMA_LOCK_T NM_MTX_T #define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) #define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) #define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) #define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) #define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) struct netmap_mem_ops { int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*); int (*nmd_get_info)(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t); int (*nmd_config)(struct netmap_mem_d *); int (*nmd_finalize)(struct netmap_mem_d *); void (*nmd_deref)(struct netmap_mem_d *); ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr); void (*nmd_delete)(struct netmap_mem_d *); struct netmap_if * (*nmd_if_new)(struct netmap_adapter *, struct netmap_priv_d *); void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *); int (*nmd_rings_create)(struct netmap_adapter *); void (*nmd_rings_delete)(struct netmap_adapter *); }; struct netmap_mem_d { NMA_LOCK_T nm_mtx; /* protect the allocator */ size_t nm_totalsize; /* shorthand */ u_int flags; #define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */ #define NETMAP_MEM_HIDDEN 0x8 /* beeing prepared */ int lasterr; /* last error for curr config */ int active; /* active users */ int refcount; /* the three allocators */ struct netmap_obj_pool pools[NETMAP_POOLS_NR]; nm_memid_t nm_id; /* allocator identifier */ int nm_grp; /* iommu groupd id */ /* list of all existing allocators, sorted by nm_id */ struct netmap_mem_d *prev, *next; struct netmap_mem_ops *ops; struct netmap_obj_params params[NETMAP_POOLS_NR]; #define NM_MEM_NAMESZ 16 char name[NM_MEM_NAMESZ]; }; int netmap_mem_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) { int rv; NMA_LOCK(nmd); rv = nmd->ops->nmd_get_lut(nmd, lut); NMA_UNLOCK(nmd); return rv; } int netmap_mem_get_info(struct netmap_mem_d *nmd, uint64_t *size, u_int *memflags, nm_memid_t *memid) { int rv; NMA_LOCK(nmd); rv = nmd->ops->nmd_get_info(nmd, size, memflags, memid); NMA_UNLOCK(nmd); return rv; } vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off) { vm_paddr_t pa; #if defined(__FreeBSD__) /* This function is called by netmap_dev_pager_fault(), which holds a * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we * spin on the trylock. */ NMA_SPINLOCK(nmd); #else NMA_LOCK(nmd); #endif pa = nmd->ops->nmd_ofstophys(nmd, off); NMA_UNLOCK(nmd); return pa; } static int netmap_mem_config(struct netmap_mem_d *nmd) { if (nmd->active) { /* already in use. Not fatal, but we * cannot change the configuration */ return 0; } return nmd->ops->nmd_config(nmd); } ssize_t netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *off) { ssize_t rv; NMA_LOCK(nmd); rv = nmd->ops->nmd_if_offset(nmd, off); NMA_UNLOCK(nmd); return rv; } static void netmap_mem_delete(struct netmap_mem_d *nmd) { nmd->ops->nmd_delete(nmd); } struct netmap_if * netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) { struct netmap_if *nifp; struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); nifp = nmd->ops->nmd_if_new(na, priv); NMA_UNLOCK(nmd); return nifp; } void netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif) { struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); nmd->ops->nmd_if_delete(na, nif); NMA_UNLOCK(nmd); } int netmap_mem_rings_create(struct netmap_adapter *na) { int rv; struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); rv = nmd->ops->nmd_rings_create(na); NMA_UNLOCK(nmd); return rv; } void netmap_mem_rings_delete(struct netmap_adapter *na) { struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); nmd->ops->nmd_rings_delete(na); NMA_UNLOCK(nmd); } static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *); static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *); static int nm_mem_assign_group(struct netmap_mem_d *, struct device *); static void nm_mem_release_id(struct netmap_mem_d *); nm_memid_t netmap_mem_get_id(struct netmap_mem_d *nmd) { return nmd->nm_id; } #ifdef NM_DEBUG_MEM_PUTGET #define NM_DBG_REFC(nmd, func, line) \ nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount); #else #define NM_DBG_REFC(nmd, func, line) #endif /* circular list of all existing allocators */ static struct netmap_mem_d *netmap_last_mem_d = &nm_mem; NM_MTX_T nm_mem_list_lock; struct netmap_mem_d * __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line) { NM_MTX_LOCK(nm_mem_list_lock); nmd->refcount++; NM_DBG_REFC(nmd, func, line); NM_MTX_UNLOCK(nm_mem_list_lock); return nmd; } void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line) { int last; NM_MTX_LOCK(nm_mem_list_lock); last = (--nmd->refcount == 0); if (last) nm_mem_release_id(nmd); NM_DBG_REFC(nmd, func, line); NM_MTX_UNLOCK(nm_mem_list_lock); if (last) netmap_mem_delete(nmd); } int netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { int lasterr = 0; if (nm_mem_assign_group(nmd, na->pdev) < 0) { return ENOMEM; } NMA_LOCK(nmd); if (netmap_mem_config(nmd)) goto out; nmd->active++; nmd->lasterr = nmd->ops->nmd_finalize(nmd); if (!nmd->lasterr && na->pdev) { nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); } out: lasterr = nmd->lasterr; NMA_UNLOCK(nmd); if (lasterr) netmap_mem_deref(nmd, na); return lasterr; } static int nm_isset(uint32_t *bitmap, u_int i) { return bitmap[ (i>>5) ] & ( 1U << (i & 31U) ); } static int netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p) { u_int n, j; if (p->bitmap == NULL) { /* Allocate the bitmap */ n = (p->objtotal + 31) / 32; p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n); if (p->bitmap == NULL) { nm_prerr("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, p->name); return ENOMEM; } p->bitmap_slots = n; } else { memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0])); } p->objfree = 0; /* * Set all the bits in the bitmap that have * corresponding buffers to 1 to indicate they are * free. */ for (j = 0; j < p->objtotal; j++) { if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) { if (netmap_debug & NM_DEBUG_MEM) nm_prinf("skipping %s %d", p->name, j); continue; } p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) ); p->objfree++; } if (netmap_verbose) nm_prinf("%s free %u", p->name, p->objfree); if (p->objfree == 0) { if (netmap_verbose) nm_prerr("%s: no objects available", p->name); return ENOMEM; } return 0; } static int netmap_mem_init_bitmaps(struct netmap_mem_d *nmd) { int i, error = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = &nmd->pools[i]; error = netmap_init_obj_allocator_bitmap(p); if (error) return error; } /* * buffers 0 and 1 are reserved */ if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) { nm_prerr("%s: not enough buffers", nmd->pools[NETMAP_BUF_POOL].name); return ENOMEM; } nmd->pools[NETMAP_BUF_POOL].objfree -= 2; if (nmd->pools[NETMAP_BUF_POOL].bitmap) { /* XXX This check is a workaround that prevents a * NULL pointer crash which currently happens only * with ptnetmap guests. * Removed shared-info --> is the bug still there? */ nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3U; } return 0; } int netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { int last_user = 0; NMA_LOCK(nmd); if (na->active_fds <= 0) netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); if (nmd->active == 1) { last_user = 1; /* * Reset the allocator when it falls out of use so that any * pool resources leaked by unclean application exits are * reclaimed. */ netmap_mem_init_bitmaps(nmd); } nmd->ops->nmd_deref(nmd); nmd->active--; if (last_user) { nmd->nm_grp = -1; nmd->lasterr = 0; } NMA_UNLOCK(nmd); return last_user; } /* accessor functions */ static int netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) { lut->lut = nmd->pools[NETMAP_BUF_POOL].lut; #ifdef __FreeBSD__ lut->plut = lut->lut; #endif lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; return 0; } static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = { [NETMAP_IF_POOL] = { .size = 1024, .num = 2, }, [NETMAP_RING_POOL] = { .size = 5*PAGE_SIZE, .num = 4, }, [NETMAP_BUF_POOL] = { .size = 2048, .num = 4098, }, }; /* * nm_mem is the memory allocator used for all physical interfaces * running in netmap mode. * Virtual (VALE) ports will have each its own allocator. */ extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ .pools = { [NETMAP_IF_POOL] = { .name = "netmap_if", .objminsize = sizeof(struct netmap_if), .objmaxsize = 4096, .nummin = 10, /* don't be stingy */ .nummax = 10000, /* XXX very large */ }, [NETMAP_RING_POOL] = { .name = "netmap_ring", .objminsize = sizeof(struct netmap_ring), .objmaxsize = 32*PAGE_SIZE, .nummin = 2, .nummax = 1024, }, [NETMAP_BUF_POOL] = { .name = "netmap_buf", .objminsize = 64, .objmaxsize = 65536, .nummin = 4, .nummax = 1000000, /* one million! */ }, }, .params = { [NETMAP_IF_POOL] = { .size = 1024, .num = 100, }, [NETMAP_RING_POOL] = { .size = 9*PAGE_SIZE, .num = 200, }, [NETMAP_BUF_POOL] = { .size = 2048, .num = NETMAP_BUF_MAX_NUM, }, }, .nm_id = 1, .nm_grp = -1, .prev = &nm_mem, .next = &nm_mem, .ops = &netmap_mem_global_ops, .name = "1" }; /* blueprint for the private memory allocators */ /* XXX clang is not happy about using name as a print format */ static const struct netmap_mem_d nm_blueprint = { .pools = { [NETMAP_IF_POOL] = { .name = "%s_if", .objminsize = sizeof(struct netmap_if), .objmaxsize = 4096, .nummin = 1, .nummax = 100, }, [NETMAP_RING_POOL] = { .name = "%s_ring", .objminsize = sizeof(struct netmap_ring), .objmaxsize = 32*PAGE_SIZE, .nummin = 2, .nummax = 1024, }, [NETMAP_BUF_POOL] = { .name = "%s_buf", .objminsize = 64, .objmaxsize = 65536, .nummin = 4, .nummax = 1000000, /* one million! */ }, }, .nm_grp = -1, .flags = NETMAP_MEM_PRIVATE, .ops = &netmap_mem_global_ops, }; /* memory allocator related sysctls */ #define STRINGIFY(x) #x #define DECLARE_SYSCTLS(id, name) \ SYSBEGIN(mem2_ ## name); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ CTLFLAG_RW, &nm_mem.params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \ CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \ CTLFLAG_RW, &nm_mem.params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \ CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \ CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \ "Default size of private netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_num, \ CTLFLAG_RW, &netmap_min_priv_params[id].num, 0, \ "Default number of private netmap " STRINGIFY(name) "s"); \ SYSEND SYSCTL_DECL(_dev_netmap); DECLARE_SYSCTLS(NETMAP_IF_POOL, if); DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); /* call with nm_mem_list_lock held */ static int nm_mem_assign_id_locked(struct netmap_mem_d *nmd) { nm_memid_t id; struct netmap_mem_d *scan = netmap_last_mem_d; int error = ENOMEM; do { /* we rely on unsigned wrap around */ id = scan->nm_id + 1; if (id == 0) /* reserve 0 as error value */ id = 1; scan = scan->next; if (id != scan->nm_id) { nmd->nm_id = id; nmd->prev = scan->prev; nmd->next = scan; scan->prev->next = nmd; scan->prev = nmd; netmap_last_mem_d = nmd; nmd->refcount = 1; NM_DBG_REFC(nmd, __FUNCTION__, __LINE__); error = 0; break; } } while (scan != netmap_last_mem_d); return error; } /* call with nm_mem_list_lock *not* held */ static int nm_mem_assign_id(struct netmap_mem_d *nmd) { int ret; NM_MTX_LOCK(nm_mem_list_lock); ret = nm_mem_assign_id_locked(nmd); NM_MTX_UNLOCK(nm_mem_list_lock); return ret; } /* call with nm_mem_list_lock held */ static void nm_mem_release_id(struct netmap_mem_d *nmd) { nmd->prev->next = nmd->next; nmd->next->prev = nmd->prev; if (netmap_last_mem_d == nmd) netmap_last_mem_d = nmd->prev; nmd->prev = nmd->next = NULL; } struct netmap_mem_d * netmap_mem_find(nm_memid_t id) { struct netmap_mem_d *nmd; NM_MTX_LOCK(nm_mem_list_lock); nmd = netmap_last_mem_d; do { if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_id == id) { nmd->refcount++; NM_DBG_REFC(nmd, __FUNCTION__, __LINE__); NM_MTX_UNLOCK(nm_mem_list_lock); return nmd; } nmd = nmd->next; } while (nmd != netmap_last_mem_d); NM_MTX_UNLOCK(nm_mem_list_lock); return NULL; } static int nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev) { int err = 0, id; id = nm_iommu_group_id(dev); if (netmap_debug & NM_DEBUG_MEM) nm_prinf("iommu_group %d", id); NMA_LOCK(nmd); if (nmd->nm_grp < 0) nmd->nm_grp = id; if (nmd->nm_grp != id) { if (netmap_verbose) nm_prerr("iommu group mismatch: %u vs %u", nmd->nm_grp, id); nmd->lasterr = err = ENOMEM; } NMA_UNLOCK(nmd); return err; } static struct lut_entry * nm_alloc_lut(u_int nobj) { size_t n = sizeof(struct lut_entry) * nobj; struct lut_entry *lut; #ifdef linux lut = vmalloc(n); #else lut = nm_os_malloc(n); #endif return lut; } static void nm_free_lut(struct lut_entry *lut, u_int objtotal) { bzero(lut, sizeof(struct lut_entry) * objtotal); #ifdef linux vfree(lut); #else nm_os_free(lut); #endif } #if defined(linux) || defined(_WIN32) static struct plut_entry * nm_alloc_plut(u_int nobj) { size_t n = sizeof(struct plut_entry) * nobj; struct plut_entry *lut; lut = vmalloc(n); return lut; } static void nm_free_plut(struct plut_entry * lut) { vfree(lut); } #endif /* linux or _WIN32 */ /* * First, find the allocator that contains the requested offset, * then locate the cluster through a lookup table. */ static vm_paddr_t netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) { int i; vm_ooffset_t o = offset; vm_paddr_t pa; struct netmap_obj_pool *p; p = nmd->pools; for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) { if (offset >= p[i].memtotal) continue; // now lookup the cluster's address #ifndef _WIN32 pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) + offset % p[i]._objsize; #else pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr); pa.QuadPart += offset % p[i]._objsize; #endif return pa; } /* this is only in case of errors */ nm_prerr("invalid ofs 0x%x out of 0x%zx 0x%zx 0x%zx", (u_int)o, p[NETMAP_IF_POOL].memtotal, p[NETMAP_IF_POOL].memtotal + p[NETMAP_RING_POOL].memtotal, p[NETMAP_IF_POOL].memtotal + p[NETMAP_RING_POOL].memtotal + p[NETMAP_BUF_POOL].memtotal); #ifndef _WIN32 return 0; /* bad address */ #else vm_paddr_t res; res.QuadPart = 0; return res; #endif } #ifdef _WIN32 /* * win32_build_virtual_memory_for_userspace * * This function get all the object making part of the pools and maps * a contiguous virtual memory space for the userspace * It works this way * 1 - allocate a Memory Descriptor List wide as the sum * of the memory needed for the pools * 2 - cycle all the objects in every pool and for every object do * * 2a - cycle all the objects in every pool, get the list * of the physical address descriptors * 2b - calculate the offset in the array of pages desciptor in the * main MDL * 2c - copy the descriptors of the object in the main MDL * * 3 - return the resulting MDL that needs to be mapped in userland * * In this way we will have an MDL that describes all the memory for the * objects in a single object */ PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd) { u_int memflags, ofs = 0; PMDL mainMdl, tempMdl; uint64_t memsize; int i, j; if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) { nm_prerr("memory not finalised yet"); return NULL; } mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL); if (mainMdl == NULL) { nm_prerr("failed to allocate mdl"); return NULL; } NMA_LOCK(nmd); for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = &nmd->pools[i]; int clsz = p->_clustsize; int clobjs = p->_clustentries; /* objects per cluster */ int mdl_len = sizeof(PFN_NUMBER) * BYTES_TO_PAGES(clsz); PPFN_NUMBER pSrc, pDst; /* each pool has a different cluster size so we need to reallocate */ tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL); if (tempMdl == NULL) { NMA_UNLOCK(nmd); nm_prerr("fail to allocate tempMdl"); IoFreeMdl(mainMdl); return NULL; } pSrc = MmGetMdlPfnArray(tempMdl); /* create one entry per cluster, the lut[] has one entry per object */ for (j = 0; j < p->numclusters; j++, ofs += clsz) { pDst = &MmGetMdlPfnArray(mainMdl)[BYTES_TO_PAGES(ofs)]; MmInitializeMdl(tempMdl, p->lut[j*clobjs].vaddr, clsz); MmBuildMdlForNonPagedPool(tempMdl); /* compute physical page addresses */ RtlCopyMemory(pDst, pSrc, mdl_len); /* copy the page descriptors */ mainMdl->MdlFlags = tempMdl->MdlFlags; /* XXX what is in here ? */ } IoFreeMdl(tempMdl); } NMA_UNLOCK(nmd); return mainMdl; } #endif /* _WIN32 */ /* * helper function for OS-specific mmap routines (currently only windows). * Given an nmd and a pool index, returns the cluster size and number of clusters. * Returns 0 if memory is finalised and the pool is valid, otherwise 1. * It should be called under NMA_LOCK(nmd) otherwise the underlying info can change. */ int netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize, u_int *numclusters) { if (!nmd || !clustsize || !numclusters || pool >= NETMAP_POOLS_NR) return 1; /* invalid arguments */ // NMA_LOCK_ASSERT(nmd); if (!(nmd->flags & NETMAP_MEM_FINALIZED)) { *clustsize = *numclusters = 0; return 1; /* not ready yet */ } *clustsize = nmd->pools[pool]._clustsize; *numclusters = nmd->pools[pool].numclusters; return 0; /* success */ } static int netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, nm_memid_t *id) { int error = 0; error = netmap_mem_config(nmd); if (error) goto out; if (size) { if (nmd->flags & NETMAP_MEM_FINALIZED) { *size = nmd->nm_totalsize; } else { int i; *size = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = nmd->pools + i; *size += ((size_t)p->_numclusters * (size_t)p->_clustsize); } } } if (memflags) *memflags = nmd->flags; if (id) *id = nmd->nm_id; out: return error; } /* * we store objects by kernel address, need to find the offset * within the pool to export the value to userspace. * Algorithm: scan until we find the cluster, then add the * actual offset in the cluster */ static ssize_t netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) { int i, k = p->_clustentries, n = p->objtotal; ssize_t ofs = 0; for (i = 0; i < n; i += k, ofs += p->_clustsize) { const char *base = p->lut[i].vaddr; ssize_t relofs = (const char *) vaddr - base; if (relofs < 0 || relofs >= p->_clustsize) continue; ofs = ofs + relofs; nm_prdis("%s: return offset %d (cluster %d) for pointer %p", p->name, ofs, i, vaddr); return ofs; } nm_prerr("address %p is not contained inside any cluster (%s)", vaddr, p->name); return 0; /* An error occurred */ } /* Helper functions which convert virtual addresses to offsets */ #define netmap_if_offset(n, v) \ netmap_obj_offset(&(n)->pools[NETMAP_IF_POOL], (v)) #define netmap_ring_offset(n, v) \ ((n)->pools[NETMAP_IF_POOL].memtotal + \ netmap_obj_offset(&(n)->pools[NETMAP_RING_POOL], (v))) static ssize_t netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr) { return netmap_if_offset(nmd, addr); } /* * report the index, and use start position as a hint, * otherwise buffer allocation becomes terribly expensive. */ static void * netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_t *index) { uint32_t i = 0; /* index in the bitmap */ uint32_t mask, j = 0; /* slot counter */ void *vaddr = NULL; if (len > p->_objsize) { nm_prerr("%s request size %d too large", p->name, len); return NULL; } if (p->objfree == 0) { nm_prerr("no more %s objects", p->name); return NULL; } if (start) i = *start; /* termination is guaranteed by p->free, but better check bounds on i */ while (vaddr == NULL && i < p->bitmap_slots) { uint32_t cur = p->bitmap[i]; if (cur == 0) { /* bitmask is fully used */ i++; continue; } /* locate a slot */ for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1) ; p->bitmap[i] &= ~mask; /* mark object as in use */ p->objfree--; vaddr = p->lut[i * 32 + j].vaddr; if (index) *index = i * 32 + j; } nm_prdis("%s allocator: allocated object @ [%d][%d]: vaddr %p",p->name, i, j, vaddr); if (start) *start = i; return vaddr; } /* * free by index, not by address. * XXX should we also cleanup the content ? */ static int netmap_obj_free(struct netmap_obj_pool *p, uint32_t j) { uint32_t *ptr, mask; if (j >= p->objtotal) { nm_prerr("invalid index %u, max %u", j, p->objtotal); return 1; } ptr = &p->bitmap[j / 32]; mask = (1 << (j % 32)); if (*ptr & mask) { nm_prerr("ouch, double free on buffer %d", j); return 1; } else { *ptr |= mask; p->objfree++; return 0; } } /* * free by address. This is slow but is only used for a few * objects (rings, nifp) */ static void netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) { u_int i, j, n = p->numclusters; for (i = 0, j = 0; i < n; i++, j += p->_clustentries) { void *base = p->lut[i * p->_clustentries].vaddr; ssize_t relofs = (ssize_t) vaddr - (ssize_t) base; /* Given address, is out of the scope of the current cluster.*/ if (base == NULL || vaddr < base || relofs >= p->_clustsize) continue; j = j + relofs / p->_objsize; /* KASSERT(j != 0, ("Cannot free object 0")); */ netmap_obj_free(p, j); return; } nm_prerr("address %p is not contained inside any cluster (%s)", vaddr, p->name); } unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd) { return nmd->pools[NETMAP_BUF_POOL]._objsize; } #define netmap_if_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL) #define netmap_if_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v)) #define netmap_ring_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL) #define netmap_ring_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v)) #define netmap_buf_malloc(n, _pos, _index) \ netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index) #if 0 /* currently unused */ /* Return the index associated to the given packet buffer */ #define netmap_buf_index(n, v) \ (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n)) #endif /* * allocate extra buffers in a linked list. * returns the actual number. */ uint32_t netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n) { struct netmap_mem_d *nmd = na->nm_mem; uint32_t i, pos = 0; /* opaque, scan position in the bitmap */ NMA_LOCK(nmd); *head = 0; /* default, 'null' index ie empty list */ for (i = 0 ; i < n; i++) { uint32_t cur = *head; /* save current head */ uint32_t *p = netmap_buf_malloc(nmd, &pos, head); if (p == NULL) { nm_prerr("no more buffers after %d of %d", i, n); *head = cur; /* restore */ break; } nm_prdis(5, "allocate buffer %d -> %d", *head, cur); *p = cur; /* link to previous head */ } NMA_UNLOCK(nmd); return i; } static void netmap_extra_free(struct netmap_adapter *na, uint32_t head) { struct lut_entry *lut = na->na_lut.lut; struct netmap_mem_d *nmd = na->nm_mem; struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; uint32_t i, cur, *buf; nm_prdis("freeing the extra list"); for (i = 0; head >=2 && head < p->objtotal; i++) { cur = head; buf = lut[head].vaddr; head = *buf; *buf = 0; if (netmap_obj_free(p, cur)) break; } if (head != 0) nm_prerr("breaking with head %d", head); if (netmap_debug & NM_DEBUG_MEM) nm_prinf("freed %d buffers", i); } /* Return nonzero on error */ static int netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) { struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; u_int i = 0; /* slot counter */ uint32_t pos = 0; /* slot in p->bitmap */ uint32_t index = 0; /* buffer index */ for (i = 0; i < n; i++) { void *vaddr = netmap_buf_malloc(nmd, &pos, &index); if (vaddr == NULL) { nm_prerr("no more buffers after %d of %d", i, n); goto cleanup; } slot[i].buf_idx = index; slot[i].len = p->_objsize; slot[i].flags = 0; slot[i].ptr = 0; } nm_prdis("%s: allocated %d buffers, %d available, first at %d", p->name, n, p->objfree, pos); return (0); cleanup: while (i > 0) { i--; netmap_obj_free(p, slot[i].buf_idx); } bzero(slot, n * sizeof(slot[0])); return (ENOMEM); } static void netmap_mem_set_ring(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n, uint32_t index) { struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; u_int i; for (i = 0; i < n; i++) { slot[i].buf_idx = index; slot[i].len = p->_objsize; slot[i].flags = 0; } } static void netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i) { struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; if (i < 2 || i >= p->objtotal) { nm_prerr("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal); return; } netmap_obj_free(p, i); } static void netmap_free_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) { u_int i; for (i = 0; i < n; i++) { if (slot[i].buf_idx > 1) netmap_free_buf(nmd, slot[i].buf_idx); } nm_prdis("%s: released some buffers, available: %u", p->name, p->objfree); } static void netmap_reset_obj_allocator(struct netmap_obj_pool *p) { if (p == NULL) return; if (p->bitmap) nm_os_free(p->bitmap); p->bitmap = NULL; if (p->invalid_bitmap) nm_os_free(p->invalid_bitmap); p->invalid_bitmap = NULL; if (!p->alloc_done) { /* allocation was done by somebody else. * Let them clean up after themselves. */ return; } if (p->lut) { u_int i; /* * Free each cluster allocated in * netmap_finalize_obj_allocator(). The cluster start * addresses are stored at multiples of p->_clusterentries * in the lut. */ for (i = 0; i < p->objtotal; i += p->_clustentries) { contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP); } nm_free_lut(p->lut, p->objtotal); } p->lut = NULL; p->objtotal = 0; p->memtotal = 0; p->numclusters = 0; p->objfree = 0; p->alloc_done = 0; } /* * Free all resources related to an allocator. */ static void netmap_destroy_obj_allocator(struct netmap_obj_pool *p) { if (p == NULL) return; netmap_reset_obj_allocator(p); } /* * We receive a request for objtotal objects, of size objsize each. * Internally we may round up both numbers, as we allocate objects * in small clusters multiple of the page size. * We need to keep track of objtotal and clustentries, * as they are needed when freeing memory. * * XXX note -- userspace needs the buffers to be contiguous, * so we cannot afford gaps at the end of a cluster. */ /* call with NMA_LOCK held */ static int netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize) { int i; u_int clustsize; /* the cluster size, multiple of page size */ u_int clustentries; /* how many objects per entry */ /* we store the current request, so we can * detect configuration changes later */ p->r_objtotal = objtotal; p->r_objsize = objsize; #define MAX_CLUSTSIZE (1<<22) // 4 MB #define LINE_ROUND NM_CACHE_ALIGN // 64 if (objsize >= MAX_CLUSTSIZE) { /* we could do it but there is no point */ nm_prerr("unsupported allocation for %d bytes", objsize); return EINVAL; } /* make sure objsize is a multiple of LINE_ROUND */ i = (objsize & (LINE_ROUND - 1)); if (i) { nm_prinf("aligning object by %d bytes", LINE_ROUND - i); objsize += LINE_ROUND - i; } if (objsize < p->objminsize || objsize > p->objmaxsize) { nm_prerr("requested objsize %d out of range [%d, %d]", objsize, p->objminsize, p->objmaxsize); return EINVAL; } if (objtotal < p->nummin || objtotal > p->nummax) { nm_prerr("requested objtotal %d out of range [%d, %d]", objtotal, p->nummin, p->nummax); return EINVAL; } /* * Compute number of objects using a brute-force approach: * given a max cluster size, * we try to fill it with objects keeping track of the * wasted space to the next page boundary. */ for (clustentries = 0, i = 1;; i++) { u_int delta, used = i * objsize; if (used > MAX_CLUSTSIZE) break; delta = used % PAGE_SIZE; if (delta == 0) { // exact solution clustentries = i; break; } } /* exact solution not found */ if (clustentries == 0) { nm_prerr("unsupported allocation for %d bytes", objsize); return EINVAL; } /* compute clustsize */ clustsize = clustentries * objsize; if (netmap_debug & NM_DEBUG_MEM) nm_prinf("objsize %d clustsize %d objects %d", objsize, clustsize, clustentries); /* * The number of clusters is n = ceil(objtotal/clustentries) * objtotal' = n * clustentries */ p->_clustentries = clustentries; p->_clustsize = clustsize; p->_numclusters = (objtotal + clustentries - 1) / clustentries; /* actual values (may be larger than requested) */ p->_objsize = objsize; p->_objtotal = p->_numclusters * clustentries; return 0; } /* call with NMA_LOCK held */ static int netmap_finalize_obj_allocator(struct netmap_obj_pool *p) { int i; /* must be signed */ size_t n; if (p->lut) { /* if the lut is already there we assume that also all the * clusters have already been allocated, possibily by somebody * else (e.g., extmem). In the latter case, the alloc_done flag * will remain at zero, so that we will not attempt to * deallocate the clusters by ourselves in * netmap_reset_obj_allocator. */ return 0; } /* optimistically assume we have enough memory */ p->numclusters = p->_numclusters; p->objtotal = p->_objtotal; p->alloc_done = 1; p->lut = nm_alloc_lut(p->objtotal); if (p->lut == NULL) { nm_prerr("Unable to create lookup table for '%s'", p->name); goto clean; } /* * Allocate clusters, init pointers */ n = p->_clustsize; for (i = 0; i < (int)p->objtotal;) { int lim = i + p->_clustentries; char *clust; /* * XXX Note, we only need contigmalloc() for buffers attached * to native interfaces. In all other cases (nifp, netmap rings * and even buffers for VALE ports or emulated interfaces) we * can live with standard malloc, because the hardware will not * access the pages directly. */ clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO, (size_t)0, -1UL, PAGE_SIZE, 0); if (clust == NULL) { /* * If we get here, there is a severe memory shortage, * so halve the allocated memory to reclaim some. */ nm_prerr("Unable to create cluster at %d for '%s' allocator", i, p->name); if (i < 2) /* nothing to halve */ goto out; lim = i / 2; for (i--; i >= lim; i--) { if (i % p->_clustentries == 0 && p->lut[i].vaddr) contigfree(p->lut[i].vaddr, n, M_NETMAP); p->lut[i].vaddr = NULL; } out: p->objtotal = i; /* we may have stopped in the middle of a cluster */ p->numclusters = (i + p->_clustentries - 1) / p->_clustentries; break; } /* * Set lut state for all buffers in the current cluster. * * [i, lim) is the set of buffer indexes that cover the * current cluster. * * 'clust' is really the address of the current buffer in * the current cluster as we index through it with a stride * of p->_objsize. */ for (; i < lim; i++, clust += p->_objsize) { p->lut[i].vaddr = clust; #if !defined(linux) && !defined(_WIN32) p->lut[i].paddr = vtophys(clust); #endif } } p->memtotal = (size_t)p->numclusters * (size_t)p->_clustsize; if (netmap_verbose) nm_prinf("Pre-allocated %d clusters (%d/%zuKB) for '%s'", p->numclusters, p->_clustsize >> 10, p->memtotal >> 10, p->name); return 0; clean: netmap_reset_obj_allocator(p); return ENOMEM; } /* call with lock held */ static int netmap_mem_params_changed(struct netmap_obj_params* p) { int i, rv = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { if (p[i].last_size != p[i].size || p[i].last_num != p[i].num) { p[i].last_size = p[i].size; p[i].last_num = p[i].num; rv = 1; } } return rv; } static void netmap_mem_reset_all(struct netmap_mem_d *nmd) { int i; if (netmap_debug & NM_DEBUG_MEM) nm_prinf("resetting %p", nmd); for (i = 0; i < NETMAP_POOLS_NR; i++) { netmap_reset_obj_allocator(&nmd->pools[i]); } nmd->flags &= ~NETMAP_MEM_FINALIZED; } static int netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) { int i, lim = p->objtotal; - struct netmap_lut *lut = &na->na_lut; + struct netmap_lut *lut; if (na == NULL || na->pdev == NULL) return 0; + lut = &na->na_lut; #if defined(__FreeBSD__) /* On FreeBSD mapping and unmapping is performed by the txsync * and rxsync routine, packet by packet. */ (void)i; (void)lim; (void)lut; #elif defined(_WIN32) (void)i; (void)lim; (void)lut; nm_prerr("unsupported on Windows"); #else /* linux */ nm_prdis("unmapping and freeing plut for %s", na->name); if (lut->plut == NULL) return 0; for (i = 0; i < lim; i += p->_clustentries) { if (lut->plut[i].paddr) netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->_clustsize); } nm_free_plut(lut->plut); lut->plut = NULL; #endif /* linux */ return 0; } static int netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na) { int error = 0; int i, lim = p->objtotal; struct netmap_lut *lut = &na->na_lut; if (na->pdev == NULL) return 0; #if defined(__FreeBSD__) /* On FreeBSD mapping and unmapping is performed by the txsync * and rxsync routine, packet by packet. */ (void)i; (void)lim; (void)lut; #elif defined(_WIN32) (void)i; (void)lim; (void)lut; nm_prerr("unsupported on Windows"); #else /* linux */ if (lut->plut != NULL) { nm_prdis("plut already allocated for %s", na->name); return 0; } nm_prdis("allocating physical lut for %s", na->name); lut->plut = nm_alloc_plut(lim); if (lut->plut == NULL) { nm_prerr("Failed to allocate physical lut for %s", na->name); return ENOMEM; } for (i = 0; i < lim; i += p->_clustentries) { lut->plut[i].paddr = 0; } for (i = 0; i < lim; i += p->_clustentries) { int j; if (p->lut[i].vaddr == NULL) continue; error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->lut[i].vaddr, p->_clustsize); if (error) { nm_prerr("Failed to map cluster #%d from the %s pool", i, p->name); break; } for (j = 1; j < p->_clustentries; j++) { lut->plut[i + j].paddr = lut->plut[i + j - 1].paddr + p->_objsize; } } if (error) netmap_mem_unmap(p, na); #endif /* linux */ return error; } static int netmap_mem_finalize_all(struct netmap_mem_d *nmd) { int i; if (nmd->flags & NETMAP_MEM_FINALIZED) return 0; nmd->lasterr = 0; nmd->nm_totalsize = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]); if (nmd->lasterr) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; } nmd->lasterr = netmap_mem_init_bitmaps(nmd); if (nmd->lasterr) goto error; nmd->flags |= NETMAP_MEM_FINALIZED; if (netmap_verbose) nm_prinf("interfaces %zd KB, rings %zd KB, buffers %zd MB", nmd->pools[NETMAP_IF_POOL].memtotal >> 10, nmd->pools[NETMAP_RING_POOL].memtotal >> 10, nmd->pools[NETMAP_BUF_POOL].memtotal >> 20); if (netmap_verbose) nm_prinf("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree); return 0; error: netmap_mem_reset_all(nmd); return nmd->lasterr; } /* * allocator for private memory */ static void * _netmap_mem_private_new(size_t size, struct netmap_obj_params *p, struct netmap_mem_ops *ops, int *perr) { struct netmap_mem_d *d = NULL; int i, err = 0; d = nm_os_malloc(size); if (d == NULL) { err = ENOMEM; goto error; } *d = nm_blueprint; d->ops = ops; err = nm_mem_assign_id(d); if (err) goto error_free; snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id); for (i = 0; i < NETMAP_POOLS_NR; i++) { snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ, nm_blueprint.pools[i].name, d->name); d->params[i].num = p[i].num; d->params[i].size = p[i].size; } NMA_LOCK_INIT(d); err = netmap_mem_config(d); if (err) goto error_rel_id; d->flags &= ~NETMAP_MEM_FINALIZED; return d; error_rel_id: NMA_LOCK_DESTROY(d); nm_mem_release_id(d); error_free: nm_os_free(d); error: if (perr) *perr = err; return NULL; } struct netmap_mem_d * netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr) { struct netmap_mem_d *d = NULL; struct netmap_obj_params p[NETMAP_POOLS_NR]; int i; u_int v, maxd; /* account for the fake host rings */ txr++; rxr++; /* copy the min values */ for (i = 0; i < NETMAP_POOLS_NR; i++) { p[i] = netmap_min_priv_params[i]; } /* possibly increase them to fit user request */ v = sizeof(struct netmap_if) + sizeof(ssize_t) * (txr + rxr); if (p[NETMAP_IF_POOL].size < v) p[NETMAP_IF_POOL].size = v; v = 2 + 4 * npipes; if (p[NETMAP_IF_POOL].num < v) p[NETMAP_IF_POOL].num = v; maxd = (txd > rxd) ? txd : rxd; v = sizeof(struct netmap_ring) + sizeof(struct netmap_slot) * maxd; if (p[NETMAP_RING_POOL].size < v) p[NETMAP_RING_POOL].size = v; /* each pipe endpoint needs two tx rings (1 normal + 1 host, fake) * and two rx rings (again, 1 normal and 1 fake host) */ v = txr + rxr + 8 * npipes; if (p[NETMAP_RING_POOL].num < v) p[NETMAP_RING_POOL].num = v; /* for each pipe we only need the buffers for the 4 "real" rings. * On the other end, the pipe ring dimension may be different from * the parent port ring dimension. As a compromise, we allocate twice the * space actually needed if the pipe rings were the same size as the parent rings */ v = (4 * npipes + rxr) * rxd + (4 * npipes + txr) * txd + 2 + extra_bufs; /* the +2 is for the tx and rx fake buffers (indices 0 and 1) */ if (p[NETMAP_BUF_POOL].num < v) p[NETMAP_BUF_POOL].num = v; if (netmap_verbose) nm_prinf("req if %d*%d ring %d*%d buf %d*%d", p[NETMAP_IF_POOL].num, p[NETMAP_IF_POOL].size, p[NETMAP_RING_POOL].num, p[NETMAP_RING_POOL].size, p[NETMAP_BUF_POOL].num, p[NETMAP_BUF_POOL].size); d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr); return d; } /* call with lock held */ static int netmap_mem2_config(struct netmap_mem_d *nmd) { int i; if (!netmap_mem_params_changed(nmd->params)) goto out; nm_prdis("reconfiguring"); if (nmd->flags & NETMAP_MEM_FINALIZED) { /* reset previous allocation */ for (i = 0; i < NETMAP_POOLS_NR; i++) { netmap_reset_obj_allocator(&nmd->pools[i]); } nmd->flags &= ~NETMAP_MEM_FINALIZED; } for (i = 0; i < NETMAP_POOLS_NR; i++) { nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i], nmd->params[i].num, nmd->params[i].size); if (nmd->lasterr) goto out; } out: return nmd->lasterr; } static int netmap_mem2_finalize(struct netmap_mem_d *nmd) { if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; if (netmap_mem_finalize_all(nmd)) goto out; nmd->lasterr = 0; out: return nmd->lasterr; } static void netmap_mem2_delete(struct netmap_mem_d *nmd) { int i; for (i = 0; i < NETMAP_POOLS_NR; i++) { netmap_destroy_obj_allocator(&nmd->pools[i]); } NMA_LOCK_DESTROY(nmd); if (nmd != &nm_mem) nm_os_free(nmd); } #ifdef WITH_EXTMEM /* doubly linekd list of all existing external allocators */ static struct netmap_mem_ext *netmap_mem_ext_list = NULL; NM_MTX_T nm_mem_ext_list_lock; #endif /* WITH_EXTMEM */ int netmap_mem_init(void) { NM_MTX_INIT(nm_mem_list_lock); NMA_LOCK_INIT(&nm_mem); netmap_mem_get(&nm_mem); #ifdef WITH_EXTMEM NM_MTX_INIT(nm_mem_ext_list_lock); #endif /* WITH_EXTMEM */ return (0); } void netmap_mem_fini(void) { netmap_mem_put(&nm_mem); } static void netmap_free_rings(struct netmap_adapter *na) { enum txrx t; for_rx_tx(t) { u_int i; for (i = 0; i < netmap_all_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { if (netmap_debug & NM_DEBUG_MEM) nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)", kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } if (netmap_debug & NM_DEBUG_MEM) nm_prinf("deleting ring %s", kring->name); if (!(kring->nr_kflags & NKR_FAKERING)) { nm_prdis("freeing bufs for %s", kring->name); netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); } else { nm_prdis("NOT freeing bufs for %s", kring->name); } netmap_ring_free(na->nm_mem, ring); kring->ring = NULL; } } } /* call with NMA_LOCK held * * * Allocate netmap rings and buffers for this card * The rings are contiguous, but have variable size. * The kring array must follow the layout described * in netmap_krings_create(). */ static int netmap_mem2_rings_create(struct netmap_adapter *na) { enum txrx t; for_rx_tx(t) { u_int i; for (i = 0; i < netmap_all_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; u_int len, ndesc; if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) { /* uneeded, or already created by somebody else */ if (netmap_debug & NM_DEBUG_MEM) nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)", kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } if (netmap_debug & NM_DEBUG_MEM) nm_prinf("creating %s", kring->name); ndesc = kring->nkr_num_slots; len = sizeof(struct netmap_ring) + ndesc * sizeof(struct netmap_slot); ring = netmap_ring_malloc(na->nm_mem, len); if (ring == NULL) { nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t)); goto cleanup; } nm_prdis("txring at %p", ring); kring->ring = ring; *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; *(int64_t *)(uintptr_t)&ring->buf_ofs = (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - netmap_ring_offset(na->nm_mem, ring); /* copy values from kring */ ring->head = kring->rhead; ring->cur = kring->rcur; ring->tail = kring->rtail; *(uint32_t *)(uintptr_t)&ring->nr_buf_size = netmap_mem_bufsize(na->nm_mem); nm_prdis("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); nm_prdis("initializing slots for %s_ring", nm_txrx2str(t)); if (!(kring->nr_kflags & NKR_FAKERING)) { /* this is a real ring */ if (netmap_debug & NM_DEBUG_MEM) nm_prinf("allocating buffers for %s", kring->name); if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); goto cleanup; } } else { /* this is a fake ring, set all indices to 0 */ if (netmap_debug & NM_DEBUG_MEM) nm_prinf("NOT allocating buffers for %s", kring->name); netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); } /* ring info */ *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id; *(uint16_t *)(uintptr_t)&ring->dir = kring->tx; } } return 0; cleanup: /* we cannot actually cleanup here, since we don't own kring->users * and kring->nr_klags & NKR_NEEDRING. The caller must decrement * the first or zero-out the second, then call netmap_free_rings() * to do the cleanup */ return ENOMEM; } static void netmap_mem2_rings_delete(struct netmap_adapter *na) { /* last instance, release bufs and rings */ netmap_free_rings(na); } /* call with NMA_LOCK held */ /* * Allocate the per-fd structure netmap_if. * * We assume that the configuration stored in na * (number of tx/rx rings and descs) does not change while * the interface is in netmap mode. */ static struct netmap_if * netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) { struct netmap_if *nifp; ssize_t base; /* handy for relative offsets between rings and nifp */ u_int i, len, n[NR_TXRX], ntot; enum txrx t; ntot = 0; for_rx_tx(t) { /* account for the (eventually fake) host rings */ n[t] = netmap_all_rings(na, t); ntot += n[t]; } /* * the descriptor is followed inline by an array of offsets * to the tx and rx rings in the shared memory region. */ len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t)); nifp = netmap_if_malloc(na->nm_mem, len); if (nifp == NULL) { NMA_UNLOCK(na->nm_mem); return NULL; } /* initialize base fields -- override const */ *(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings; *(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings; *(u_int *)(uintptr_t)&nifp->ni_host_tx_rings = (na->num_host_tx_rings ? na->num_host_tx_rings : 1); *(u_int *)(uintptr_t)&nifp->ni_host_rx_rings = (na->num_host_rx_rings ? na->num_host_rx_rings : 1); strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name)); /* * fill the slots for the rx and tx rings. They contain the offset * between the ring and nifp, so the information is usable in * userspace to reach the ring from the nifp. */ base = netmap_if_offset(na->nm_mem, nifp); for (i = 0; i < n[NR_TX]; i++) { /* XXX instead of ofs == 0 maybe use the offset of an error * ring, like we do for buffers? */ ssize_t ofs = 0; if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX] && i < priv->np_qlast[NR_TX]) { ofs = netmap_ring_offset(na->nm_mem, na->tx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs; } for (i = 0; i < n[NR_RX]; i++) { /* XXX instead of ofs == 0 maybe use the offset of an error * ring, like we do for buffers? */ ssize_t ofs = 0; if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX] && i < priv->np_qlast[NR_RX]) { ofs = netmap_ring_offset(na->nm_mem, na->rx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs; } return (nifp); } static void netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) { if (nifp == NULL) /* nothing to do */ return; if (nifp->ni_bufs_head) netmap_extra_free(na, nifp->ni_bufs_head); netmap_if_free(na->nm_mem, nifp); } static void netmap_mem2_deref(struct netmap_mem_d *nmd) { if (netmap_debug & NM_DEBUG_MEM) nm_prinf("active = %d", nmd->active); } struct netmap_mem_ops netmap_mem_global_ops = { .nmd_get_lut = netmap_mem2_get_lut, .nmd_get_info = netmap_mem2_get_info, .nmd_ofstophys = netmap_mem2_ofstophys, .nmd_config = netmap_mem2_config, .nmd_finalize = netmap_mem2_finalize, .nmd_deref = netmap_mem2_deref, .nmd_delete = netmap_mem2_delete, .nmd_if_offset = netmap_mem2_if_offset, .nmd_if_new = netmap_mem2_if_new, .nmd_if_delete = netmap_mem2_if_delete, .nmd_rings_create = netmap_mem2_rings_create, .nmd_rings_delete = netmap_mem2_rings_delete }; int netmap_mem_pools_info_get(struct nmreq_pools_info *req, struct netmap_mem_d *nmd) { int ret; ret = netmap_mem_get_info(nmd, &req->nr_memsize, NULL, &req->nr_mem_id); if (ret) { return ret; } NMA_LOCK(nmd); req->nr_if_pool_offset = 0; req->nr_if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal; req->nr_if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize; req->nr_ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal; req->nr_ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal; req->nr_ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize; req->nr_buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + nmd->pools[NETMAP_RING_POOL].memtotal; req->nr_buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; req->nr_buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; NMA_UNLOCK(nmd); return 0; } #ifdef WITH_EXTMEM struct netmap_mem_ext { struct netmap_mem_d up; struct nm_os_extmem *os; struct netmap_mem_ext *next, *prev; }; /* call with nm_mem_list_lock held */ static void netmap_mem_ext_register(struct netmap_mem_ext *e) { NM_MTX_LOCK(nm_mem_ext_list_lock); if (netmap_mem_ext_list) netmap_mem_ext_list->prev = e; e->next = netmap_mem_ext_list; netmap_mem_ext_list = e; e->prev = NULL; NM_MTX_UNLOCK(nm_mem_ext_list_lock); } /* call with nm_mem_list_lock held */ static void netmap_mem_ext_unregister(struct netmap_mem_ext *e) { if (e->prev) e->prev->next = e->next; else netmap_mem_ext_list = e->next; if (e->next) e->next->prev = e->prev; e->prev = e->next = NULL; } static struct netmap_mem_ext * netmap_mem_ext_search(struct nm_os_extmem *os) { struct netmap_mem_ext *e; NM_MTX_LOCK(nm_mem_ext_list_lock); for (e = netmap_mem_ext_list; e; e = e->next) { if (nm_os_extmem_isequal(e->os, os)) { netmap_mem_get(&e->up); break; } } NM_MTX_UNLOCK(nm_mem_ext_list_lock); return e; } static void netmap_mem_ext_delete(struct netmap_mem_d *d) { int i; struct netmap_mem_ext *e = (struct netmap_mem_ext *)d; netmap_mem_ext_unregister(e); for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = &d->pools[i]; if (p->lut) { nm_free_lut(p->lut, p->objtotal); p->lut = NULL; } } if (e->os) nm_os_extmem_delete(e->os); netmap_mem2_delete(d); } static int netmap_mem_ext_config(struct netmap_mem_d *nmd) { return 0; } struct netmap_mem_ops netmap_mem_ext_ops = { .nmd_get_lut = netmap_mem2_get_lut, .nmd_get_info = netmap_mem2_get_info, .nmd_ofstophys = netmap_mem2_ofstophys, .nmd_config = netmap_mem_ext_config, .nmd_finalize = netmap_mem2_finalize, .nmd_deref = netmap_mem2_deref, .nmd_delete = netmap_mem_ext_delete, .nmd_if_offset = netmap_mem2_if_offset, .nmd_if_new = netmap_mem2_if_new, .nmd_if_delete = netmap_mem2_if_delete, .nmd_rings_create = netmap_mem2_rings_create, .nmd_rings_delete = netmap_mem2_rings_delete }; struct netmap_mem_d * netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror) { int error = 0; int i, j; struct netmap_mem_ext *nme; char *clust; size_t off; struct nm_os_extmem *os = NULL; int nr_pages; // XXX sanity checks if (pi->nr_if_pool_objtotal == 0) pi->nr_if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; if (pi->nr_if_pool_objsize == 0) pi->nr_if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; if (pi->nr_ring_pool_objtotal == 0) pi->nr_ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; if (pi->nr_ring_pool_objsize == 0) pi->nr_ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; if (pi->nr_buf_pool_objtotal == 0) pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; if (pi->nr_buf_pool_objsize == 0) pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; if (netmap_verbose & NM_DEBUG_MEM) nm_prinf("if %d %d ring %d %d buf %d %d", pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize, pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize, pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize); os = nm_os_extmem_create(usrptr, pi, &error); if (os == NULL) { nm_prerr("os extmem creation failed"); goto out; } nme = netmap_mem_ext_search(os); if (nme) { nm_os_extmem_delete(os); return &nme->up; } if (netmap_verbose & NM_DEBUG_MEM) nm_prinf("not found, creating new"); nme = _netmap_mem_private_new(sizeof(*nme), (struct netmap_obj_params[]){ { pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal }, { pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal }, { pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }}, &netmap_mem_ext_ops, &error); if (nme == NULL) goto out_unmap; nr_pages = nm_os_extmem_nr_pages(os); /* from now on pages will be released by nme destructor; * we let res = 0 to prevent release in out_unmap below */ nme->os = os; os = NULL; /* pass ownership */ clust = nm_os_extmem_nextpage(nme->os); off = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { struct netmap_obj_pool *p = &nme->up.pools[i]; struct netmap_obj_params *o = &nme->up.params[i]; p->_objsize = o->size; p->_clustsize = o->size; p->_clustentries = 1; p->lut = nm_alloc_lut(o->num); if (p->lut == NULL) { error = ENOMEM; goto out_delete; } p->bitmap_slots = (o->num + sizeof(uint32_t) - 1) / sizeof(uint32_t); p->invalid_bitmap = nm_os_malloc(sizeof(uint32_t) * p->bitmap_slots); if (p->invalid_bitmap == NULL) { error = ENOMEM; goto out_delete; } if (nr_pages == 0) { p->objtotal = 0; p->memtotal = 0; p->objfree = 0; continue; } for (j = 0; j < o->num && nr_pages > 0; j++) { size_t noff; p->lut[j].vaddr = clust + off; #if !defined(linux) && !defined(_WIN32) p->lut[j].paddr = vtophys(p->lut[j].vaddr); #endif nm_prdis("%s %d at %p", p->name, j, p->lut[j].vaddr); noff = off + p->_objsize; if (noff < PAGE_SIZE) { off = noff; continue; } nm_prdis("too big, recomputing offset..."); while (noff >= PAGE_SIZE) { char *old_clust = clust; noff -= PAGE_SIZE; clust = nm_os_extmem_nextpage(nme->os); nr_pages--; nm_prdis("noff %zu page %p nr_pages %d", noff, page_to_virt(*pages), nr_pages); if (noff > 0 && !nm_isset(p->invalid_bitmap, j) && (nr_pages == 0 || old_clust + PAGE_SIZE != clust)) { /* out of space or non contiguous, * drop this object * */ p->invalid_bitmap[ (j>>5) ] |= 1U << (j & 31U); nm_prdis("non contiguous at off %zu, drop", noff); } if (nr_pages == 0) break; } off = noff; } p->objtotal = j; p->numclusters = p->objtotal; p->memtotal = j * (size_t)p->_objsize; nm_prdis("%d memtotal %zu", j, p->memtotal); } netmap_mem_ext_register(nme); return &nme->up; out_delete: netmap_mem_put(&nme->up); out_unmap: if (os) nm_os_extmem_delete(os); out: if (perror) *perror = error; return NULL; } #endif /* WITH_EXTMEM */ #ifdef WITH_PTNETMAP struct mem_pt_if { struct mem_pt_if *next; struct ifnet *ifp; unsigned int nifp_offset; }; /* Netmap allocator for ptnetmap guests. */ struct netmap_mem_ptg { struct netmap_mem_d up; vm_paddr_t nm_paddr; /* physical address in the guest */ void *nm_addr; /* virtual address in the guest */ struct netmap_lut buf_lut; /* lookup table for BUF pool in the guest */ nm_memid_t host_mem_id; /* allocator identifier in the host */ struct ptnetmap_memdev *ptn_dev;/* ptnetmap memdev */ struct mem_pt_if *pt_ifs; /* list of interfaces in passthrough */ }; /* Link a passthrough interface to a passthrough netmap allocator. */ static int netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp, unsigned int nifp_offset) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; struct mem_pt_if *ptif = nm_os_malloc(sizeof(*ptif)); if (!ptif) { return ENOMEM; } NMA_LOCK(nmd); ptif->ifp = ifp; ptif->nifp_offset = nifp_offset; if (ptnmd->pt_ifs) { ptif->next = ptnmd->pt_ifs; } ptnmd->pt_ifs = ptif; NMA_UNLOCK(nmd); nm_prinf("ifp=%s,nifp_offset=%u", ptif->ifp->if_xname, ptif->nifp_offset); return 0; } /* Called with NMA_LOCK(nmd) held. */ static struct mem_pt_if * netmap_mem_pt_guest_ifp_lookup(struct netmap_mem_d *nmd, struct ifnet *ifp) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; struct mem_pt_if *curr; for (curr = ptnmd->pt_ifs; curr; curr = curr->next) { if (curr->ifp == ifp) { return curr; } } return NULL; } /* Unlink a passthrough interface from a passthrough netmap allocator. */ int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; struct mem_pt_if *prev = NULL; struct mem_pt_if *curr; int ret = -1; NMA_LOCK(nmd); for (curr = ptnmd->pt_ifs; curr; curr = curr->next) { if (curr->ifp == ifp) { if (prev) { prev->next = curr->next; } else { ptnmd->pt_ifs = curr->next; } nm_prinf("removed (ifp=%s,nifp_offset=%u)", curr->ifp->if_xname, curr->nifp_offset); nm_os_free(curr); ret = 0; break; } prev = curr; } NMA_UNLOCK(nmd); return ret; } static int netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; if (!(nmd->flags & NETMAP_MEM_FINALIZED)) { return EINVAL; } *lut = ptnmd->buf_lut; return 0; } static int netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, u_int *memflags, uint16_t *id) { int error = 0; error = nmd->ops->nmd_config(nmd); if (error) goto out; if (size) *size = nmd->nm_totalsize; if (memflags) *memflags = nmd->flags; if (id) *id = nmd->nm_id; out: return error; } static vm_paddr_t netmap_mem_pt_guest_ofstophys(struct netmap_mem_d *nmd, vm_ooffset_t off) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; vm_paddr_t paddr; /* if the offset is valid, just return csb->base_addr + off */ paddr = (vm_paddr_t)(ptnmd->nm_paddr + off); nm_prdis("off %lx padr %lx", off, (unsigned long)paddr); return paddr; } static int netmap_mem_pt_guest_config(struct netmap_mem_d *nmd) { /* nothing to do, we are configured on creation * and configuration never changes thereafter */ return 0; } static int netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; uint64_t mem_size; uint32_t bufsize; uint32_t nbuffers; uint32_t poolofs; vm_paddr_t paddr; char *vaddr; int i; int error = 0; if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; if (ptnmd->ptn_dev == NULL) { nm_prerr("ptnetmap memdev not attached"); error = ENOMEM; goto out; } /* Map memory through ptnetmap-memdev BAR. */ error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr, &ptnmd->nm_addr, &mem_size); if (error) goto out; /* Initialize the lut using the information contained in the * ptnetmap memory device. */ bufsize = nm_os_pt_memdev_ioread(ptnmd->ptn_dev, PTNET_MDEV_IO_BUF_POOL_OBJSZ); nbuffers = nm_os_pt_memdev_ioread(ptnmd->ptn_dev, PTNET_MDEV_IO_BUF_POOL_OBJNUM); /* allocate the lut */ if (ptnmd->buf_lut.lut == NULL) { nm_prinf("allocating lut"); ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers); if (ptnmd->buf_lut.lut == NULL) { nm_prerr("lut allocation failed"); return ENOMEM; } } /* we have physically contiguous memory mapped through PCI BAR */ poolofs = nm_os_pt_memdev_ioread(ptnmd->ptn_dev, PTNET_MDEV_IO_BUF_POOL_OFS); vaddr = (char *)(ptnmd->nm_addr) + poolofs; paddr = ptnmd->nm_paddr + poolofs; for (i = 0; i < nbuffers; i++) { ptnmd->buf_lut.lut[i].vaddr = vaddr; vaddr += bufsize; paddr += bufsize; } ptnmd->buf_lut.objtotal = nbuffers; ptnmd->buf_lut.objsize = bufsize; nmd->nm_totalsize = mem_size; /* Initialize these fields as are needed by * netmap_mem_bufsize(). * XXX please improve this, why do we need this * replication? maybe we nmd->pools[] should no be * there for the guest allocator? */ nmd->pools[NETMAP_BUF_POOL]._objsize = bufsize; nmd->pools[NETMAP_BUF_POOL]._objtotal = nbuffers; nmd->flags |= NETMAP_MEM_FINALIZED; out: return error; } static void netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; if (nmd->active == 1 && (nmd->flags & NETMAP_MEM_FINALIZED)) { nmd->flags &= ~NETMAP_MEM_FINALIZED; /* unmap ptnetmap-memdev memory */ if (ptnmd->ptn_dev) { nm_os_pt_memdev_iounmap(ptnmd->ptn_dev); } ptnmd->nm_addr = NULL; ptnmd->nm_paddr = 0; } } static ssize_t netmap_mem_pt_guest_if_offset(struct netmap_mem_d *nmd, const void *vaddr) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; return (const char *)(vaddr) - (char *)(ptnmd->nm_addr); } static void netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd) { if (nmd == NULL) return; if (netmap_verbose) nm_prinf("deleting %p", nmd); if (nmd->active > 0) nm_prerr("bug: deleting mem allocator with active=%d!", nmd->active); if (netmap_verbose) nm_prinf("done deleting %p", nmd); NMA_LOCK_DESTROY(nmd); nm_os_free(nmd); } static struct netmap_if * netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem; struct mem_pt_if *ptif; struct netmap_if *nifp = NULL; ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { nm_prerr("interface %s is not in passthrough", na->name); goto out; } nifp = (struct netmap_if *)((char *)(ptnmd->nm_addr) + ptif->nifp_offset); out: return nifp; } static void netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) { struct mem_pt_if *ptif; ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { nm_prerr("interface %s is not in passthrough", na->name); } } static int netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem; struct mem_pt_if *ptif; struct netmap_if *nifp; int i, error = -1; ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { nm_prerr("interface %s is not in passthrough", na->name); goto out; } /* point each kring to the corresponding backend ring */ nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset); for (i = 0; i < netmap_all_rings(na, NR_TX); i++) { struct netmap_kring *kring = na->tx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) ((char *)nifp + nifp->ring_ofs[i]); } for (i = 0; i < netmap_all_rings(na, NR_RX); i++) { struct netmap_kring *kring = na->rx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) ((char *)nifp + nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]); } error = 0; out: return error; } static void netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na) { #if 0 enum txrx t; for_rx_tx(t) { u_int i; for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { struct netmap_kring *kring = &NMR(na, t)[i]; kring->ring = NULL; } } #endif } static struct netmap_mem_ops netmap_mem_pt_guest_ops = { .nmd_get_lut = netmap_mem_pt_guest_get_lut, .nmd_get_info = netmap_mem_pt_guest_get_info, .nmd_ofstophys = netmap_mem_pt_guest_ofstophys, .nmd_config = netmap_mem_pt_guest_config, .nmd_finalize = netmap_mem_pt_guest_finalize, .nmd_deref = netmap_mem_pt_guest_deref, .nmd_if_offset = netmap_mem_pt_guest_if_offset, .nmd_delete = netmap_mem_pt_guest_delete, .nmd_if_new = netmap_mem_pt_guest_if_new, .nmd_if_delete = netmap_mem_pt_guest_if_delete, .nmd_rings_create = netmap_mem_pt_guest_rings_create, .nmd_rings_delete = netmap_mem_pt_guest_rings_delete }; /* Called with nm_mem_list_lock held. */ static struct netmap_mem_d * netmap_mem_pt_guest_find_memid(nm_memid_t mem_id) { struct netmap_mem_d *mem = NULL; struct netmap_mem_d *scan = netmap_last_mem_d; do { /* find ptnetmap allocator through host ID */ if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref && ((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) { mem = scan; mem->refcount++; NM_DBG_REFC(mem, __FUNCTION__, __LINE__); break; } scan = scan->next; } while (scan != netmap_last_mem_d); return mem; } /* Called with nm_mem_list_lock held. */ static struct netmap_mem_d * netmap_mem_pt_guest_create(nm_memid_t mem_id) { struct netmap_mem_ptg *ptnmd; int err = 0; ptnmd = nm_os_malloc(sizeof(struct netmap_mem_ptg)); if (ptnmd == NULL) { err = ENOMEM; goto error; } ptnmd->up.ops = &netmap_mem_pt_guest_ops; ptnmd->host_mem_id = mem_id; ptnmd->pt_ifs = NULL; /* Assign new id in the guest (We have the lock) */ err = nm_mem_assign_id_locked(&ptnmd->up); if (err) goto error; ptnmd->up.flags &= ~NETMAP_MEM_FINALIZED; ptnmd->up.flags |= NETMAP_MEM_IO; NMA_LOCK_INIT(&ptnmd->up); snprintf(ptnmd->up.name, NM_MEM_NAMESZ, "%d", ptnmd->up.nm_id); return &ptnmd->up; error: netmap_mem_pt_guest_delete(&ptnmd->up); return NULL; } /* * find host id in guest allocators and create guest allocator * if it is not there */ static struct netmap_mem_d * netmap_mem_pt_guest_get(nm_memid_t mem_id) { struct netmap_mem_d *nmd; NM_MTX_LOCK(nm_mem_list_lock); nmd = netmap_mem_pt_guest_find_memid(mem_id); if (nmd == NULL) { nmd = netmap_mem_pt_guest_create(mem_id); } NM_MTX_UNLOCK(nm_mem_list_lock); return nmd; } /* * The guest allocator can be created by ptnetmap_memdev (during the device * attach) or by ptnetmap device (ptnet), during the netmap_attach. * * The order is not important (we have different order in LINUX and FreeBSD). * The first one, creates the device, and the second one simply attaches it. */ /* Called when ptnetmap_memdev is attaching, to attach a new allocator in * the guest */ struct netmap_mem_d * netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t mem_id) { struct netmap_mem_d *nmd; struct netmap_mem_ptg *ptnmd; nmd = netmap_mem_pt_guest_get(mem_id); /* assign this device to the guest allocator */ if (nmd) { ptnmd = (struct netmap_mem_ptg *)nmd; ptnmd->ptn_dev = ptn_dev; } return nmd; } /* Called when ptnet device is attaching */ struct netmap_mem_d * netmap_mem_pt_guest_new(struct ifnet *ifp, unsigned int nifp_offset, unsigned int memid) { struct netmap_mem_d *nmd; if (ifp == NULL) { return NULL; } nmd = netmap_mem_pt_guest_get((nm_memid_t)memid); if (nmd) { netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset); } return nmd; } #endif /* WITH_PTNETMAP */ Index: projects/clang1000-import/sys/dev/sound/pci/emu10k1.c =================================================================== --- projects/clang1000-import/sys/dev/sound/pci/emu10k1.c (revision 357178) +++ projects/clang1000-import/sys/dev/sound/pci/emu10k1.c (revision 357179) @@ -1,2258 +1,2259 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004 David O'Brien * Copyright (c) 2003 Orlando Bassotto * Copyright (c) 1999 Cameron Grant * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHERIN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_snd.h" #endif #include #include #include #include #include #include #include #include "mpufoi_if.h" SND_DECLARE_FILE("$FreeBSD$"); /* -------------------------------------------------------------------- */ #define NUM_G 64 /* use all channels */ #define WAVEOUT_MAXBUFSIZE 32768 #define EMUPAGESIZE 4096 /* don't change */ #define EMUMAXPAGES (WAVEOUT_MAXBUFSIZE * NUM_G / EMUPAGESIZE) #define EMU10K1_PCI_ID 0x00021102 /* 1102 => Creative Labs Vendor ID */ #define EMU10K2_PCI_ID 0x00041102 #define EMU10K3_PCI_ID 0x00081102 #define EMU_DEFAULT_BUFSZ 4096 #define EMU_MAX_CHANS 8 #define EMU_CHANS 4 #define MAXREQVOICES 8 #define RESERVED 0 #define NUM_MIDI 16 #define NUM_FXSENDS 4 #define TMEMSIZE 256*1024 #define TMEMSIZEREG 4 #define ENABLE 0xffffffff #define DISABLE 0x00000000 #define ENV_ON EMU_CHAN_DCYSUSV_CHANNELENABLE_MASK #define ENV_OFF 0x00 /* XXX: should this be 1? */ #define EMU_A_IOCFG_GPOUT_A 0x40 #define EMU_A_IOCFG_GPOUT_D 0x04 #define EMU_A_IOCFG_GPOUT_AD (EMU_A_IOCFG_GPOUT_A|EMU_A_IOCFG_GPOUT_D) /* EMU_A_IOCFG_GPOUT0 */ #define EMU_HCFG_GPOUT1 0x00000800 /* instruction set */ #define iACC3 0x06 #define iMACINT0 0x04 #define iINTERP 0x0e #define C_00000000 0x40 #define C_00000001 0x41 #define C_00000004 0x44 #define C_40000000 0x4d /* Audigy constants */ #define A_C_00000000 0xc0 #define A_C_40000000 0xcd /* GPRs */ #define FXBUS(x) (0x00 + (x)) #define EXTIN(x) (0x10 + (x)) #define EXTOUT(x) (0x20 + (x)) #define GPR(x) (EMU_FXGPREGBASE + (x)) #define A_EXTIN(x) (0x40 + (x)) #define A_FXBUS(x) (0x00 + (x)) #define A_EXTOUT(x) (0x60 + (x)) #define A_GPR(x) (EMU_A_FXGPREGBASE + (x)) /* FX buses */ #define FXBUS_PCM_LEFT 0x00 #define FXBUS_PCM_RIGHT 0x01 #define FXBUS_MIDI_LEFT 0x04 #define FXBUS_MIDI_RIGHT 0x05 #define FXBUS_MIDI_REVERB 0x0c #define FXBUS_MIDI_CHORUS 0x0d /* Inputs */ #define EXTIN_AC97_L 0x00 #define EXTIN_AC97_R 0x01 #define EXTIN_SPDIF_CD_L 0x02 #define EXTIN_SPDIF_CD_R 0x03 #define EXTIN_TOSLINK_L 0x06 #define EXTIN_TOSLINK_R 0x07 #define EXTIN_COAX_SPDIF_L 0x0a #define EXTIN_COAX_SPDIF_R 0x0b /* Audigy Inputs */ #define A_EXTIN_AC97_L 0x00 #define A_EXTIN_AC97_R 0x01 /* Outputs */ #define EXTOUT_AC97_L 0x00 #define EXTOUT_AC97_R 0x01 #define EXTOUT_TOSLINK_L 0x02 #define EXTOUT_TOSLINK_R 0x03 #define EXTOUT_AC97_CENTER 0x04 #define EXTOUT_AC97_LFE 0x05 #define EXTOUT_HEADPHONE_L 0x06 #define EXTOUT_HEADPHONE_R 0x07 #define EXTOUT_REAR_L 0x08 #define EXTOUT_REAR_R 0x09 #define EXTOUT_ADC_CAP_L 0x0a #define EXTOUT_ADC_CAP_R 0x0b #define EXTOUT_ACENTER 0x11 #define EXTOUT_ALFE 0x12 /* Audigy Outputs */ #define A_EXTOUT_FRONT_L 0x00 #define A_EXTOUT_FRONT_R 0x01 #define A_EXTOUT_CENTER 0x02 #define A_EXTOUT_LFE 0x03 #define A_EXTOUT_HEADPHONE_L 0x04 #define A_EXTOUT_HEADPHONE_R 0x05 #define A_EXTOUT_REAR_L 0x06 #define A_EXTOUT_REAR_R 0x07 #define A_EXTOUT_AFRONT_L 0x08 #define A_EXTOUT_AFRONT_R 0x09 #define A_EXTOUT_ACENTER 0x0a #define A_EXTOUT_ALFE 0x0b #define A_EXTOUT_AREAR_L 0x0e #define A_EXTOUT_AREAR_R 0x0f #define A_EXTOUT_AC97_L 0x10 #define A_EXTOUT_AC97_R 0x11 #define A_EXTOUT_ADC_CAP_L 0x16 #define A_EXTOUT_ADC_CAP_R 0x17 struct emu_memblk { SLIST_ENTRY(emu_memblk) link; void *buf; bus_addr_t buf_addr; u_int32_t pte_start, pte_size; bus_dmamap_t buf_map; }; struct emu_mem { u_int8_t bmap[EMUMAXPAGES / 8]; u_int32_t *ptb_pages; void *silent_page; bus_addr_t silent_page_addr; bus_addr_t ptb_pages_addr; bus_dmamap_t ptb_map; bus_dmamap_t silent_map; SLIST_HEAD(, emu_memblk) blocks; }; struct emu_voice { int vnum; unsigned int b16:1, stereo:1, busy:1, running:1, ismaster:1; int speed; int start, end, vol; int fxrt1; /* FX routing */ int fxrt2; /* FX routing (only for audigy) */ u_int32_t buf; struct emu_voice *slave; struct pcm_channel *channel; }; struct sc_info; /* channel registers */ struct sc_pchinfo { int spd, fmt, blksz, run; struct emu_voice *master, *slave; struct snd_dbuf *buffer; struct pcm_channel *channel; struct sc_info *parent; }; struct sc_rchinfo { int spd, fmt, run, blksz, num; u_int32_t idxreg, basereg, sizereg, setupreg, irqmask; struct snd_dbuf *buffer; struct pcm_channel *channel; struct sc_info *parent; }; /* device private data */ struct sc_info { device_t dev; u_int32_t type, rev; u_int32_t tos_link:1, APS:1, audigy:1, audigy2:1; u_int32_t addrmask; /* wider if audigy */ bus_space_tag_t st; bus_space_handle_t sh; bus_dma_tag_t parent_dmat; struct resource *reg, *irq; void *ih; struct mtx *lock; unsigned int bufsz; int timer, timerinterval; int pnum, rnum; int nchans; struct emu_mem mem; struct emu_voice voice[64]; struct sc_pchinfo pch[EMU_MAX_CHANS]; struct sc_rchinfo rch[3]; struct mpu401 *mpu; mpu401_intr_t *mpu_intr; int mputx; }; /* -------------------------------------------------------------------- */ /* * prototypes */ /* stuff */ static int emu_init(struct sc_info *); static void emu_intr(void *); static void *emu_malloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr, bus_dmamap_t *map); static void *emu_memalloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr); static int emu_memfree(struct sc_info *sc, void *buf); static int emu_memstart(struct sc_info *sc, void *buf); #ifdef EMUDEBUG static void emu_vdump(struct sc_info *sc, struct emu_voice *v); #endif /* talk to the card */ static u_int32_t emu_rd(struct sc_info *, int, int); static void emu_wr(struct sc_info *, int, u_int32_t, int); /* -------------------------------------------------------------------- */ static u_int32_t emu_rfmt_ac97[] = { SND_FORMAT(AFMT_S16_LE, 1, 0), SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static u_int32_t emu_rfmt_mic[] = { SND_FORMAT(AFMT_U8, 1, 0), 0 }; static u_int32_t emu_rfmt_efx[] = { SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static struct pcmchan_caps emu_reccaps[3] = { {8000, 48000, emu_rfmt_ac97, 0}, {8000, 8000, emu_rfmt_mic, 0}, {48000, 48000, emu_rfmt_efx, 0}, }; static u_int32_t emu_pfmt[] = { SND_FORMAT(AFMT_U8, 1, 0), SND_FORMAT(AFMT_U8, 2, 0), SND_FORMAT(AFMT_S16_LE, 1, 0), SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static struct pcmchan_caps emu_playcaps = {4000, 48000, emu_pfmt, 0}; static int adcspeed[8] = {48000, 44100, 32000, 24000, 22050, 16000, 11025, 8000}; /* audigy supports 12kHz. */ static int audigy_adcspeed[9] = { 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000 }; /* -------------------------------------------------------------------- */ /* Hardware */ static u_int32_t emu_rd(struct sc_info *sc, int regno, int size) { switch (size) { case 1: return bus_space_read_1(sc->st, sc->sh, regno); case 2: return bus_space_read_2(sc->st, sc->sh, regno); case 4: return bus_space_read_4(sc->st, sc->sh, regno); default: return 0xffffffff; } } static void emu_wr(struct sc_info *sc, int regno, u_int32_t data, int size) { switch (size) { case 1: bus_space_write_1(sc->st, sc->sh, regno, data); break; case 2: bus_space_write_2(sc->st, sc->sh, regno, data); break; case 4: bus_space_write_4(sc->st, sc->sh, regno, data); break; } } static u_int32_t emu_rdptr(struct sc_info *sc, int chn, int reg) { u_int32_t ptr, val, mask, size, offset; ptr = ((reg << 16) & sc->addrmask) | (chn & EMU_PTR_CHNO_MASK); emu_wr(sc, EMU_PTR, ptr, 4); val = emu_rd(sc, EMU_DATA, 4); if (reg & 0xff000000) { size = (reg >> 24) & 0x3f; offset = (reg >> 16) & 0x1f; mask = ((1 << size) - 1) << offset; val &= mask; val >>= offset; } return val; } static void emu_wrptr(struct sc_info *sc, int chn, int reg, u_int32_t data) { u_int32_t ptr, mask, size, offset; ptr = ((reg << 16) & sc->addrmask) | (chn & EMU_PTR_CHNO_MASK); emu_wr(sc, EMU_PTR, ptr, 4); if (reg & 0xff000000) { size = (reg >> 24) & 0x3f; offset = (reg >> 16) & 0x1f; mask = ((1 << size) - 1) << offset; data <<= offset; data &= mask; data |= emu_rd(sc, EMU_DATA, 4) & ~mask; } emu_wr(sc, EMU_DATA, data, 4); } static void emu_wrefx(struct sc_info *sc, unsigned int pc, unsigned int data) { pc += sc->audigy ? EMU_A_MICROCODEBASE : EMU_MICROCODEBASE; emu_wrptr(sc, 0, pc, data); } /* -------------------------------------------------------------------- */ /* ac97 codec */ /* no locking needed */ static int emu_rdcd(kobj_t obj, void *devinfo, int regno) { struct sc_info *sc = (struct sc_info *)devinfo; emu_wr(sc, EMU_AC97ADDR, regno, 1); return emu_rd(sc, EMU_AC97DATA, 2); } static int emu_wrcd(kobj_t obj, void *devinfo, int regno, u_int32_t data) { struct sc_info *sc = (struct sc_info *)devinfo; emu_wr(sc, EMU_AC97ADDR, regno, 1); emu_wr(sc, EMU_AC97DATA, data, 2); return 0; } static kobj_method_t emu_ac97_methods[] = { KOBJMETHOD(ac97_read, emu_rdcd), KOBJMETHOD(ac97_write, emu_wrcd), KOBJMETHOD_END }; AC97_DECLARE(emu_ac97); /* -------------------------------------------------------------------- */ /* stuff */ static int emu_settimer(struct sc_info *sc) { struct sc_pchinfo *pch; struct sc_rchinfo *rch; int i, tmp, rate; rate = 0; for (i = 0; i < sc->nchans; i++) { pch = &sc->pch[i]; if (pch->buffer) { tmp = (pch->spd * sndbuf_getalign(pch->buffer)) / pch->blksz; if (tmp > rate) rate = tmp; } } for (i = 0; i < 3; i++) { rch = &sc->rch[i]; if (rch->buffer) { tmp = (rch->spd * sndbuf_getalign(rch->buffer)) / rch->blksz; if (tmp > rate) rate = tmp; } } RANGE(rate, 48, 9600); sc->timerinterval = 48000 / rate; emu_wr(sc, EMU_TIMER, sc->timerinterval & 0x03ff, 2); return sc->timerinterval; } static int emu_enatimer(struct sc_info *sc, int go) { u_int32_t x; if (go) { if (sc->timer++ == 0) { x = emu_rd(sc, EMU_INTE, 4); x |= EMU_INTE_INTERTIMERENB; emu_wr(sc, EMU_INTE, x, 4); } } else { sc->timer = 0; x = emu_rd(sc, EMU_INTE, 4); x &= ~EMU_INTE_INTERTIMERENB; emu_wr(sc, EMU_INTE, x, 4); } return 0; } static void emu_enastop(struct sc_info *sc, char channel, int enable) { int reg = (channel & 0x20) ? EMU_SOLEH : EMU_SOLEL; channel &= 0x1f; reg |= 1 << 24; reg |= channel << 16; emu_wrptr(sc, 0, reg, enable); } static int emu_recval(int speed) { int val; val = 0; while (val < 7 && speed < adcspeed[val]) val++; return val; } static int audigy_recval(int speed) { int val; val = 0; while (val < 8 && speed < audigy_adcspeed[val]) val++; return val; } static u_int32_t emu_rate_to_pitch(u_int32_t rate) { static u_int32_t logMagTable[128] = { 0x00000, 0x02dfc, 0x05b9e, 0x088e6, 0x0b5d6, 0x0e26f, 0x10eb3, 0x13aa2, 0x1663f, 0x1918a, 0x1bc84, 0x1e72e, 0x2118b, 0x23b9a, 0x2655d, 0x28ed5, 0x2b803, 0x2e0e8, 0x30985, 0x331db, 0x359eb, 0x381b6, 0x3a93d, 0x3d081, 0x3f782, 0x41e42, 0x444c1, 0x46b01, 0x49101, 0x4b6c4, 0x4dc49, 0x50191, 0x5269e, 0x54b6f, 0x57006, 0x59463, 0x5b888, 0x5dc74, 0x60029, 0x623a7, 0x646ee, 0x66a00, 0x68cdd, 0x6af86, 0x6d1fa, 0x6f43c, 0x7164b, 0x73829, 0x759d4, 0x77b4f, 0x79c9a, 0x7bdb5, 0x7dea1, 0x7ff5e, 0x81fed, 0x8404e, 0x86082, 0x88089, 0x8a064, 0x8c014, 0x8df98, 0x8fef1, 0x91e20, 0x93d26, 0x95c01, 0x97ab4, 0x9993e, 0x9b79f, 0x9d5d9, 0x9f3ec, 0xa11d8, 0xa2f9d, 0xa4d3c, 0xa6ab5, 0xa8808, 0xaa537, 0xac241, 0xadf26, 0xafbe7, 0xb1885, 0xb3500, 0xb5157, 0xb6d8c, 0xb899f, 0xba58f, 0xbc15e, 0xbdd0c, 0xbf899, 0xc1404, 0xc2f50, 0xc4a7b, 0xc6587, 0xc8073, 0xc9b3f, 0xcb5ed, 0xcd07c, 0xceaec, 0xd053f, 0xd1f73, 0xd398a, 0xd5384, 0xd6d60, 0xd8720, 0xda0c3, 0xdba4a, 0xdd3b4, 0xded03, 0xe0636, 0xe1f4e, 0xe384a, 0xe512c, 0xe69f3, 0xe829f, 0xe9b31, 0xeb3a9, 0xecc08, 0xee44c, 0xefc78, 0xf148a, 0xf2c83, 0xf4463, 0xf5c2a, 0xf73da, 0xf8b71, 0xfa2f0, 0xfba57, 0xfd1a7, 0xfe8df }; static char logSlopeTable[128] = { 0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x59, 0x58, 0x58, 0x57, 0x56, 0x56, 0x55, 0x55, 0x54, 0x53, 0x53, 0x52, 0x52, 0x51, 0x51, 0x50, 0x50, 0x4f, 0x4f, 0x4e, 0x4d, 0x4d, 0x4d, 0x4c, 0x4c, 0x4b, 0x4b, 0x4a, 0x4a, 0x49, 0x49, 0x48, 0x48, 0x47, 0x47, 0x47, 0x46, 0x46, 0x45, 0x45, 0x45, 0x44, 0x44, 0x43, 0x43, 0x43, 0x42, 0x42, 0x42, 0x41, 0x41, 0x41, 0x40, 0x40, 0x40, 0x3f, 0x3f, 0x3f, 0x3e, 0x3e, 0x3e, 0x3d, 0x3d, 0x3d, 0x3c, 0x3c, 0x3c, 0x3b, 0x3b, 0x3b, 0x3b, 0x3a, 0x3a, 0x3a, 0x39, 0x39, 0x39, 0x39, 0x38, 0x38, 0x38, 0x38, 0x37, 0x37, 0x37, 0x37, 0x36, 0x36, 0x36, 0x36, 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, 0x34, 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32, 0x32, 0x31, 0x31, 0x31, 0x31, 0x31, 0x30, 0x30, 0x30, 0x30, 0x30, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f }; int i; if (rate == 0) return 0; /* Bail out if no leading "1" */ rate *= 11185; /* Scale 48000 to 0x20002380 */ for (i = 31; i > 0; i--) { if (rate & 0x80000000) { /* Detect leading "1" */ return (((u_int32_t) (i - 15) << 20) + logMagTable[0x7f & (rate >> 24)] + (0x7f & (rate >> 17)) * logSlopeTable[0x7f & (rate >> 24)]); } rate <<= 1; } return 0; /* Should never reach this point */ } static u_int32_t emu_rate_to_linearpitch(u_int32_t rate) { rate = (rate << 8) / 375; return (rate >> 1) + (rate & 1); } static struct emu_voice * emu_valloc(struct sc_info *sc) { struct emu_voice *v; int i; v = NULL; for (i = 0; i < 64 && sc->voice[i].busy; i++); if (i < 64) { v = &sc->voice[i]; v->busy = 1; } return v; } static int emu_vinit(struct sc_info *sc, struct emu_voice *m, struct emu_voice *s, u_int32_t sz, struct snd_dbuf *b) { void *buf; bus_addr_t tmp_addr; buf = emu_memalloc(sc, sz, &tmp_addr); if (buf == NULL) return -1; if (b != NULL) sndbuf_setup(b, buf, sz); m->start = emu_memstart(sc, buf) * EMUPAGESIZE; m->end = m->start + sz; m->channel = NULL; m->speed = 0; m->b16 = 0; m->stereo = 0; m->running = 0; m->ismaster = 1; m->vol = 0xff; m->buf = tmp_addr; m->slave = s; if (sc->audigy) { m->fxrt1 = FXBUS_MIDI_CHORUS | FXBUS_PCM_RIGHT << 8 | FXBUS_PCM_LEFT << 16 | FXBUS_MIDI_REVERB << 24; m->fxrt2 = 0x3f3f3f3f; /* No effects on second route */ } else { m->fxrt1 = FXBUS_MIDI_CHORUS | FXBUS_PCM_RIGHT << 4 | FXBUS_PCM_LEFT << 8 | FXBUS_MIDI_REVERB << 12; m->fxrt2 = 0; } if (s != NULL) { s->start = m->start; s->end = m->end; s->channel = NULL; s->speed = 0; s->b16 = 0; s->stereo = 0; s->running = 0; s->ismaster = 0; s->vol = m->vol; s->buf = m->buf; s->fxrt1 = m->fxrt1; s->fxrt2 = m->fxrt2; s->slave = NULL; } return 0; } static void emu_vsetup(struct sc_pchinfo *ch) { struct emu_voice *v = ch->master; if (ch->fmt) { v->b16 = (ch->fmt & AFMT_16BIT) ? 1 : 0; v->stereo = (AFMT_CHANNEL(ch->fmt) > 1) ? 1 : 0; if (v->slave != NULL) { v->slave->b16 = v->b16; v->slave->stereo = v->stereo; } } if (ch->spd) { v->speed = ch->spd; if (v->slave != NULL) v->slave->speed = v->speed; } } static void emu_vwrite(struct sc_info *sc, struct emu_voice *v) { int s; int l, r, x, y; u_int32_t sa, ea, start, val, silent_page; s = (v->stereo ? 1 : 0) + (v->b16 ? 1 : 0); sa = v->start >> s; ea = v->end >> s; l = r = x = y = v->vol; if (v->stereo) { l = v->ismaster ? l : 0; r = v->ismaster ? 0 : r; } emu_wrptr(sc, v->vnum, EMU_CHAN_CPF, v->stereo ? EMU_CHAN_CPF_STEREO_MASK : 0); val = v->stereo ? 28 : 30; val *= v->b16 ? 1 : 2; start = sa + val; if (sc->audigy) { emu_wrptr(sc, v->vnum, EMU_A_CHAN_FXRT1, v->fxrt1); emu_wrptr(sc, v->vnum, EMU_A_CHAN_FXRT2, v->fxrt2); emu_wrptr(sc, v->vnum, EMU_A_CHAN_SENDAMOUNTS, 0); } else emu_wrptr(sc, v->vnum, EMU_CHAN_FXRT, v->fxrt1 << 16); emu_wrptr(sc, v->vnum, EMU_CHAN_PTRX, (x << 8) | r); emu_wrptr(sc, v->vnum, EMU_CHAN_DSL, ea | (y << 24)); emu_wrptr(sc, v->vnum, EMU_CHAN_PSST, sa | (l << 24)); emu_wrptr(sc, v->vnum, EMU_CHAN_CCCA, start | (v->b16 ? 0 : EMU_CHAN_CCCA_8BITSELECT)); emu_wrptr(sc, v->vnum, EMU_CHAN_Z1, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_Z2, 0); silent_page = ((u_int32_t)(sc->mem.silent_page_addr) << 1) | EMU_CHAN_MAP_PTI_MASK; emu_wrptr(sc, v->vnum, EMU_CHAN_MAPA, silent_page); emu_wrptr(sc, v->vnum, EMU_CHAN_MAPB, silent_page); emu_wrptr(sc, v->vnum, EMU_CHAN_CVCF, EMU_CHAN_CVCF_CURRFILTER_MASK); emu_wrptr(sc, v->vnum, EMU_CHAN_VTFT, EMU_CHAN_VTFT_FILTERTARGET_MASK); emu_wrptr(sc, v->vnum, EMU_CHAN_ATKHLDM, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_DCYSUSM, EMU_CHAN_DCYSUSM_DECAYTIME_MASK); emu_wrptr(sc, v->vnum, EMU_CHAN_LFOVAL1, 0x8000); emu_wrptr(sc, v->vnum, EMU_CHAN_LFOVAL2, 0x8000); emu_wrptr(sc, v->vnum, EMU_CHAN_FMMOD, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_TREMFRQ, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_FM2FRQ2, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_ENVVAL, 0x8000); emu_wrptr(sc, v->vnum, EMU_CHAN_ATKHLDV, EMU_CHAN_ATKHLDV_HOLDTIME_MASK | EMU_CHAN_ATKHLDV_ATTACKTIME_MASK); emu_wrptr(sc, v->vnum, EMU_CHAN_ENVVOL, 0x8000); emu_wrptr(sc, v->vnum, EMU_CHAN_PEFE_FILTERAMOUNT, 0x7f); emu_wrptr(sc, v->vnum, EMU_CHAN_PEFE_PITCHAMOUNT, 0); if (v->slave != NULL) emu_vwrite(sc, v->slave); } static void emu_vtrigger(struct sc_info *sc, struct emu_voice *v, int go) { u_int32_t pitch_target, initial_pitch; u_int32_t cra, cs, ccis; u_int32_t sample, i; if (go) { cra = 64; cs = v->stereo ? 4 : 2; ccis = v->stereo ? 28 : 30; ccis *= v->b16 ? 1 : 2; sample = v->b16 ? 0x00000000 : 0x80808080; for (i = 0; i < cs; i++) emu_wrptr(sc, v->vnum, EMU_CHAN_CD0 + i, sample); emu_wrptr(sc, v->vnum, EMU_CHAN_CCR_CACHEINVALIDSIZE, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_CCR_READADDRESS, cra); emu_wrptr(sc, v->vnum, EMU_CHAN_CCR_CACHEINVALIDSIZE, ccis); emu_wrptr(sc, v->vnum, EMU_CHAN_IFATN, 0xff00); emu_wrptr(sc, v->vnum, EMU_CHAN_VTFT, 0xffffffff); emu_wrptr(sc, v->vnum, EMU_CHAN_CVCF, 0xffffffff); emu_wrptr(sc, v->vnum, EMU_CHAN_DCYSUSV, 0x00007f7f); emu_enastop(sc, v->vnum, 0); pitch_target = emu_rate_to_linearpitch(v->speed); initial_pitch = emu_rate_to_pitch(v->speed) >> 8; emu_wrptr(sc, v->vnum, EMU_CHAN_PTRX_PITCHTARGET, pitch_target); emu_wrptr(sc, v->vnum, EMU_CHAN_CPF_PITCH, pitch_target); emu_wrptr(sc, v->vnum, EMU_CHAN_IP, initial_pitch); } else { emu_wrptr(sc, v->vnum, EMU_CHAN_PTRX_PITCHTARGET, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_CPF_PITCH, 0); emu_wrptr(sc, v->vnum, EMU_CHAN_IFATN, 0xffff); emu_wrptr(sc, v->vnum, EMU_CHAN_VTFT, 0x0000ffff); emu_wrptr(sc, v->vnum, EMU_CHAN_CVCF, 0x0000ffff); emu_wrptr(sc, v->vnum, EMU_CHAN_IP, 0); emu_enastop(sc, v->vnum, 1); } if (v->slave != NULL) emu_vtrigger(sc, v->slave, go); } static int emu_vpos(struct sc_info *sc, struct emu_voice *v) { int s, ptr; s = (v->b16 ? 1 : 0) + (v->stereo ? 1 : 0); ptr = (emu_rdptr(sc, v->vnum, EMU_CHAN_CCCA_CURRADDR) - (v->start >> s)) << s; return ptr & ~0x0000001f; } #ifdef EMUDEBUG static void emu_vdump(struct sc_info *sc, struct emu_voice *v) { char *regname[] = { "cpf", "ptrx", "cvcf", "vtft", "z2", "z1", "psst", "dsl", "ccca", "ccr", "clp", "fxrt", "mapa", "mapb", NULL, NULL, "envvol", "atkhldv", "dcysusv", "lfoval1", "envval", "atkhldm", "dcysusm", "lfoval2", "ip", "ifatn", "pefe", "fmmod", "tremfrq", "fmfrq2", "tempenv" }; char *regname2[] = { "mudata1", "mustat1", "mudata2", "mustat2", "fxwc1", "fxwc2", "spdrate", NULL, NULL, NULL, NULL, NULL, "fxrt2", "sndamnt", "fxrt1", NULL, NULL }; int i, x; printf("voice number %d\n", v->vnum); for (i = 0, x = 0; i <= 0x1e; i++) { if (regname[i] == NULL) continue; printf("%s\t[%08x]", regname[i], emu_rdptr(sc, v->vnum, i)); printf("%s", (x == 2) ? "\n" : "\t"); x++; if (x > 2) x = 0; } /* Print out audigy extra registers */ if (sc->audigy) { for (i = 0; i <= 0xe; i++) { if (regname2[i] == NULL) continue; printf("%s\t[%08x]", regname2[i], emu_rdptr(sc, v->vnum, i + 0x70)); printf("%s", (x == 2)? "\n" : "\t"); x++; if (x > 2) x = 0; } } printf("\n\n"); } #endif /* channel interface */ static void * emupchan_init(kobj_t obj, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir) { struct sc_info *sc = devinfo; struct sc_pchinfo *ch; void *r; KASSERT(dir == PCMDIR_PLAY, ("emupchan_init: bad direction")); ch = &sc->pch[sc->pnum++]; ch->buffer = b; ch->parent = sc; ch->channel = c; ch->blksz = sc->bufsz / 2; ch->fmt = SND_FORMAT(AFMT_U8, 1, 0); ch->spd = 8000; snd_mtxlock(sc->lock); ch->master = emu_valloc(sc); ch->slave = emu_valloc(sc); snd_mtxunlock(sc->lock); r = (emu_vinit(sc, ch->master, ch->slave, sc->bufsz, ch->buffer)) ? NULL : ch; return r; } static int emupchan_free(kobj_t obj, void *data) { struct sc_pchinfo *ch = data; struct sc_info *sc = ch->parent; int r; snd_mtxlock(sc->lock); r = emu_memfree(sc, sndbuf_getbuf(ch->buffer)); snd_mtxunlock(sc->lock); return r; } static int emupchan_setformat(kobj_t obj, void *data, u_int32_t format) { struct sc_pchinfo *ch = data; ch->fmt = format; return 0; } static u_int32_t emupchan_setspeed(kobj_t obj, void *data, u_int32_t speed) { struct sc_pchinfo *ch = data; ch->spd = speed; return ch->spd; } static u_int32_t emupchan_setblocksize(kobj_t obj, void *data, u_int32_t blocksize) { struct sc_pchinfo *ch = data; struct sc_info *sc = ch->parent; int irqrate, blksz; ch->blksz = blocksize; snd_mtxlock(sc->lock); emu_settimer(sc); irqrate = 48000 / sc->timerinterval; snd_mtxunlock(sc->lock); blksz = (ch->spd * sndbuf_getalign(ch->buffer)) / irqrate; return blocksize; } static int emupchan_trigger(kobj_t obj, void *data, int go) { struct sc_pchinfo *ch = data; struct sc_info *sc = ch->parent; if (!PCMTRIG_COMMON(go)) return 0; snd_mtxlock(sc->lock); if (go == PCMTRIG_START) { emu_vsetup(ch); emu_vwrite(sc, ch->master); emu_settimer(sc); emu_enatimer(sc, 1); #ifdef EMUDEBUG printf("start [%d bit, %s, %d hz]\n", ch->master->b16 ? 16 : 8, ch->master->stereo ? "stereo" : "mono", ch->master->speed); emu_vdump(sc, ch->master); emu_vdump(sc, ch->slave); #endif } ch->run = (go == PCMTRIG_START) ? 1 : 0; emu_vtrigger(sc, ch->master, ch->run); snd_mtxunlock(sc->lock); return 0; } static u_int32_t emupchan_getptr(kobj_t obj, void *data) { struct sc_pchinfo *ch = data; struct sc_info *sc = ch->parent; int r; snd_mtxlock(sc->lock); r = emu_vpos(sc, ch->master); snd_mtxunlock(sc->lock); return r; } static struct pcmchan_caps * emupchan_getcaps(kobj_t obj, void *data) { return &emu_playcaps; } static kobj_method_t emupchan_methods[] = { KOBJMETHOD(channel_init, emupchan_init), KOBJMETHOD(channel_free, emupchan_free), KOBJMETHOD(channel_setformat, emupchan_setformat), KOBJMETHOD(channel_setspeed, emupchan_setspeed), KOBJMETHOD(channel_setblocksize, emupchan_setblocksize), KOBJMETHOD(channel_trigger, emupchan_trigger), KOBJMETHOD(channel_getptr, emupchan_getptr), KOBJMETHOD(channel_getcaps, emupchan_getcaps), KOBJMETHOD_END }; CHANNEL_DECLARE(emupchan); /* channel interface */ static void * emurchan_init(kobj_t obj, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir) { struct sc_info *sc = devinfo; struct sc_rchinfo *ch; KASSERT(dir == PCMDIR_REC, ("emurchan_init: bad direction")); ch = &sc->rch[sc->rnum]; ch->buffer = b; ch->parent = sc; ch->channel = c; ch->blksz = sc->bufsz / 2; ch->fmt = SND_FORMAT(AFMT_U8, 1, 0); ch->spd = 8000; ch->num = sc->rnum; switch(sc->rnum) { case 0: ch->idxreg = sc->audigy ? EMU_A_ADCIDX : EMU_ADCIDX; ch->basereg = EMU_ADCBA; ch->sizereg = EMU_ADCBS; ch->setupreg = EMU_ADCCR; ch->irqmask = EMU_INTE_ADCBUFENABLE; break; case 1: ch->idxreg = EMU_FXIDX; ch->basereg = EMU_FXBA; ch->sizereg = EMU_FXBS; ch->setupreg = EMU_FXWC; ch->irqmask = EMU_INTE_EFXBUFENABLE; break; case 2: ch->idxreg = EMU_MICIDX; ch->basereg = EMU_MICBA; ch->sizereg = EMU_MICBS; ch->setupreg = 0; ch->irqmask = EMU_INTE_MICBUFENABLE; break; } sc->rnum++; if (sndbuf_alloc(ch->buffer, sc->parent_dmat, 0, sc->bufsz) != 0) return NULL; else { snd_mtxlock(sc->lock); emu_wrptr(sc, 0, ch->basereg, sndbuf_getbufaddr(ch->buffer)); emu_wrptr(sc, 0, ch->sizereg, 0); /* off */ snd_mtxunlock(sc->lock); return ch; } } static int emurchan_setformat(kobj_t obj, void *data, u_int32_t format) { struct sc_rchinfo *ch = data; ch->fmt = format; return 0; } static u_int32_t emurchan_setspeed(kobj_t obj, void *data, u_int32_t speed) { struct sc_rchinfo *ch = data; if (ch->num == 0) { if (ch->parent->audigy) speed = audigy_adcspeed[audigy_recval(speed)]; else speed = adcspeed[emu_recval(speed)]; } if (ch->num == 1) speed = 48000; if (ch->num == 2) speed = 8000; ch->spd = speed; return ch->spd; } static u_int32_t emurchan_setblocksize(kobj_t obj, void *data, u_int32_t blocksize) { struct sc_rchinfo *ch = data; struct sc_info *sc = ch->parent; int irqrate, blksz; ch->blksz = blocksize; snd_mtxlock(sc->lock); emu_settimer(sc); irqrate = 48000 / sc->timerinterval; snd_mtxunlock(sc->lock); blksz = (ch->spd * sndbuf_getalign(ch->buffer)) / irqrate; return blocksize; } /* semantic note: must start at beginning of buffer */ static int emurchan_trigger(kobj_t obj, void *data, int go) { struct sc_rchinfo *ch = data; struct sc_info *sc = ch->parent; u_int32_t val, sz; if (!PCMTRIG_COMMON(go)) return 0; switch(sc->bufsz) { case 4096: sz = EMU_RECBS_BUFSIZE_4096; break; case 8192: sz = EMU_RECBS_BUFSIZE_8192; break; case 16384: sz = EMU_RECBS_BUFSIZE_16384; break; case 32768: sz = EMU_RECBS_BUFSIZE_32768; break; case 65536: sz = EMU_RECBS_BUFSIZE_65536; break; default: sz = EMU_RECBS_BUFSIZE_4096; } snd_mtxlock(sc->lock); switch(go) { case PCMTRIG_START: ch->run = 1; emu_wrptr(sc, 0, ch->sizereg, sz); if (ch->num == 0) { if (sc->audigy) { val = EMU_A_ADCCR_LCHANENABLE; if (AFMT_CHANNEL(ch->fmt) > 1) val |= EMU_A_ADCCR_RCHANENABLE; val |= audigy_recval(ch->spd); } else { val = EMU_ADCCR_LCHANENABLE; if (AFMT_CHANNEL(ch->fmt) > 1) val |= EMU_ADCCR_RCHANENABLE; val |= emu_recval(ch->spd); } emu_wrptr(sc, 0, ch->setupreg, 0); emu_wrptr(sc, 0, ch->setupreg, val); } val = emu_rd(sc, EMU_INTE, 4); val |= ch->irqmask; emu_wr(sc, EMU_INTE, val, 4); break; case PCMTRIG_STOP: case PCMTRIG_ABORT: ch->run = 0; emu_wrptr(sc, 0, ch->sizereg, 0); if (ch->setupreg) emu_wrptr(sc, 0, ch->setupreg, 0); val = emu_rd(sc, EMU_INTE, 4); val &= ~ch->irqmask; emu_wr(sc, EMU_INTE, val, 4); break; case PCMTRIG_EMLDMAWR: case PCMTRIG_EMLDMARD: default: break; } snd_mtxunlock(sc->lock); return 0; } static u_int32_t emurchan_getptr(kobj_t obj, void *data) { struct sc_rchinfo *ch = data; struct sc_info *sc = ch->parent; int r; snd_mtxlock(sc->lock); r = emu_rdptr(sc, 0, ch->idxreg) & 0x0000ffff; snd_mtxunlock(sc->lock); return r; } static struct pcmchan_caps * emurchan_getcaps(kobj_t obj, void *data) { struct sc_rchinfo *ch = data; return &emu_reccaps[ch->num]; } static kobj_method_t emurchan_methods[] = { KOBJMETHOD(channel_init, emurchan_init), KOBJMETHOD(channel_setformat, emurchan_setformat), KOBJMETHOD(channel_setspeed, emurchan_setspeed), KOBJMETHOD(channel_setblocksize, emurchan_setblocksize), KOBJMETHOD(channel_trigger, emurchan_trigger), KOBJMETHOD(channel_getptr, emurchan_getptr), KOBJMETHOD(channel_getcaps, emurchan_getcaps), KOBJMETHOD_END }; CHANNEL_DECLARE(emurchan); static unsigned char emu_mread(struct mpu401 *arg, void *sc, int reg) { unsigned int d; d = emu_rd((struct sc_info *)sc, 0x18 + reg, 1); return d; } static void emu_mwrite(struct mpu401 *arg, void *sc, int reg, unsigned char b) { emu_wr((struct sc_info *)sc, 0x18 + reg, b, 1); } static int emu_muninit(struct mpu401 *arg, void *cookie) { struct sc_info *sc = cookie; snd_mtxlock(sc->lock); sc->mpu_intr = NULL; snd_mtxunlock(sc->lock); return 0; } static kobj_method_t emu_mpu_methods[] = { KOBJMETHOD(mpufoi_read, emu_mread), KOBJMETHOD(mpufoi_write, emu_mwrite), KOBJMETHOD(mpufoi_uninit, emu_muninit), KOBJMETHOD_END }; static DEFINE_CLASS(emu_mpu, emu_mpu_methods, 0); static void emu_intr2(void *p) { struct sc_info *sc = (struct sc_info *)p; if (sc->mpu_intr) (sc->mpu_intr)(sc->mpu); } static void emu_midiattach(struct sc_info *sc) { int i; i = emu_rd(sc, EMU_INTE, 4); i |= EMU_INTE_MIDIRXENABLE; emu_wr(sc, EMU_INTE, i, 4); sc->mpu = mpu401_init(&emu_mpu_class, sc, emu_intr2, &sc->mpu_intr); } /* -------------------------------------------------------------------- */ /* The interrupt handler */ static void emu_intr(void *data) { struct sc_info *sc = data; u_int32_t stat, ack, i, x; snd_mtxlock(sc->lock); while (1) { stat = emu_rd(sc, EMU_IPR, 4); if (stat == 0) break; ack = 0; /* process irq */ if (stat & EMU_IPR_INTERVALTIMER) ack |= EMU_IPR_INTERVALTIMER; if (stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL)) ack |= stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL); if (stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL)) ack |= stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL); if (stat & (EMU_IPR_MICBUFFULL | EMU_IPR_MICBUFHALFFULL)) ack |= stat & (EMU_IPR_MICBUFFULL | EMU_IPR_MICBUFHALFFULL); if (stat & EMU_PCIERROR) { ack |= EMU_PCIERROR; device_printf(sc->dev, "pci error\n"); /* we still get an nmi with ecc ram even if we ack this */ } if (stat & EMU_IPR_RATETRCHANGE) { ack |= EMU_IPR_RATETRCHANGE; #ifdef EMUDEBUG device_printf(sc->dev, "sample rate tracker lock status change\n"); #endif } - if (stat & EMU_IPR_MIDIRECVBUFE) - if (sc->mpu_intr) { - (sc->mpu_intr)(sc->mpu); - ack |= EMU_IPR_MIDIRECVBUFE | EMU_IPR_MIDITRANSBUFE; - } + if (stat & EMU_IPR_MIDIRECVBUFE) { + if (sc->mpu_intr) { + (sc->mpu_intr)(sc->mpu); + ack |= EMU_IPR_MIDIRECVBUFE | EMU_IPR_MIDITRANSBUFE; + } + } if (stat & ~ack) device_printf(sc->dev, "dodgy irq: %x (harmless)\n", stat & ~ack); emu_wr(sc, EMU_IPR, stat, 4); if (ack) { snd_mtxunlock(sc->lock); if (ack & EMU_IPR_INTERVALTIMER) { x = 0; for (i = 0; i < sc->nchans; i++) { if (sc->pch[i].run) { x = 1; chn_intr(sc->pch[i].channel); } } if (x == 0) emu_enatimer(sc, 0); } if (ack & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL)) { if (sc->rch[0].channel) chn_intr(sc->rch[0].channel); } if (ack & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL)) { if (sc->rch[1].channel) chn_intr(sc->rch[1].channel); } if (ack & (EMU_IPR_MICBUFFULL | EMU_IPR_MICBUFHALFFULL)) { if (sc->rch[2].channel) chn_intr(sc->rch[2].channel); } snd_mtxlock(sc->lock); } } snd_mtxunlock(sc->lock); } /* -------------------------------------------------------------------- */ static void emu_setmap(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *phys = arg; *phys = error ? 0 : (bus_addr_t)segs->ds_addr; if (bootverbose) { printf("emu: setmap (%lx, %lx), nseg=%d, error=%d\n", (unsigned long)segs->ds_addr, (unsigned long)segs->ds_len, nseg, error); } } static void * emu_malloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr, bus_dmamap_t *map) { void *buf; *addr = 0; if (bus_dmamem_alloc(sc->parent_dmat, &buf, BUS_DMA_NOWAIT, map)) return NULL; if (bus_dmamap_load(sc->parent_dmat, *map, buf, sz, emu_setmap, addr, 0) || !*addr) { bus_dmamem_free(sc->parent_dmat, buf, *map); return NULL; } return buf; } static void emu_free(struct sc_info *sc, void *buf, bus_dmamap_t map) { bus_dmamap_unload(sc->parent_dmat, map); bus_dmamem_free(sc->parent_dmat, buf, map); } static void * emu_memalloc(struct sc_info *sc, u_int32_t sz, bus_addr_t *addr) { u_int32_t blksz, start, idx, ofs, tmp, found; struct emu_mem *mem = &sc->mem; struct emu_memblk *blk; void *buf; blksz = sz / EMUPAGESIZE; if (sz > (blksz * EMUPAGESIZE)) blksz++; /* find a free block in the bitmap */ found = 0; start = 1; while (!found && start + blksz < EMUMAXPAGES) { found = 1; for (idx = start; idx < start + blksz; idx++) if (mem->bmap[idx >> 3] & (1 << (idx & 7))) found = 0; if (!found) start++; } if (!found) return NULL; blk = malloc(sizeof(*blk), M_DEVBUF, M_NOWAIT); if (blk == NULL) return NULL; buf = emu_malloc(sc, sz, &blk->buf_addr, &blk->buf_map); *addr = blk->buf_addr; if (buf == NULL) { free(blk, M_DEVBUF); return NULL; } blk->buf = buf; blk->pte_start = start; blk->pte_size = blksz; #ifdef EMUDEBUG printf("buf %p, pte_start %d, pte_size %d\n", blk->buf, blk->pte_start, blk->pte_size); #endif ofs = 0; for (idx = start; idx < start + blksz; idx++) { mem->bmap[idx >> 3] |= 1 << (idx & 7); tmp = (uint32_t)(blk->buf_addr + ofs); #ifdef EMUDEBUG printf("pte[%d] -> %x phys, %x virt\n", idx, tmp, ((u_int32_t)buf) + ofs); #endif mem->ptb_pages[idx] = (tmp << 1) | idx; ofs += EMUPAGESIZE; } SLIST_INSERT_HEAD(&mem->blocks, blk, link); return buf; } static int emu_memfree(struct sc_info *sc, void *buf) { u_int32_t idx, tmp; struct emu_mem *mem = &sc->mem; struct emu_memblk *blk, *i; blk = NULL; SLIST_FOREACH(i, &mem->blocks, link) { if (i->buf == buf) blk = i; } if (blk == NULL) return EINVAL; SLIST_REMOVE(&mem->blocks, blk, emu_memblk, link); emu_free(sc, buf, blk->buf_map); tmp = (u_int32_t)(sc->mem.silent_page_addr) << 1; for (idx = blk->pte_start; idx < blk->pte_start + blk->pte_size; idx++) { mem->bmap[idx >> 3] &= ~(1 << (idx & 7)); mem->ptb_pages[idx] = tmp | idx; } free(blk, M_DEVBUF); return 0; } static int emu_memstart(struct sc_info *sc, void *buf) { struct emu_mem *mem = &sc->mem; struct emu_memblk *blk, *i; blk = NULL; SLIST_FOREACH(i, &mem->blocks, link) { if (i->buf == buf) blk = i; } if (blk == NULL) return -EINVAL; return blk->pte_start; } static void emu_addefxop(struct sc_info *sc, int op, int z, int w, int x, int y, u_int32_t *pc) { emu_wrefx(sc, (*pc) * 2, (x << 10) | y); emu_wrefx(sc, (*pc) * 2 + 1, (op << 20) | (z << 10) | w); (*pc)++; } static void audigy_addefxop(struct sc_info *sc, int op, int z, int w, int x, int y, u_int32_t *pc) { emu_wrefx(sc, (*pc) * 2, (x << 12) | y); emu_wrefx(sc, (*pc) * 2 + 1, (op << 24) | (z << 12) | w); (*pc)++; } static void audigy_initefx(struct sc_info *sc) { int i; u_int32_t pc = 0; /* skip 0, 0, -1, 0 - NOPs */ for (i = 0; i < 512; i++) audigy_addefxop(sc, 0x0f, 0x0c0, 0x0c0, 0x0cf, 0x0c0, &pc); for (i = 0; i < 512; i++) emu_wrptr(sc, 0, EMU_A_FXGPREGBASE + i, 0x0); pc = 16; /* stop fx processor */ emu_wrptr(sc, 0, EMU_A_DBG, EMU_A_DBG_SINGLE_STEP); /* Audigy 2 (EMU10K2) DSP Registers: FX Bus 0x000-0x00f : 16 registers (?) Input 0x040/0x041 : AC97 Codec (l/r) 0x042/0x043 : ADC, S/PDIF (l/r) 0x044/0x045 : Optical S/PDIF in (l/r) 0x046/0x047 : ? 0x048/0x049 : Line/Mic 2 (l/r) 0x04a/0x04b : RCA S/PDIF (l/r) 0x04c/0x04d : Aux 2 (l/r) Output 0x060/0x061 : Digital Front (l/r) 0x062/0x063 : Digital Center/LFE 0x064/0x065 : AudigyDrive Heaphone (l/r) 0x066/0x067 : Digital Rear (l/r) 0x068/0x069 : Analog Front (l/r) 0x06a/0x06b : Analog Center/LFE 0x06c/0x06d : ? 0x06e/0x06f : Analog Rear (l/r) 0x070/0x071 : AC97 Output (l/r) 0x072/0x073 : ? 0x074/0x075 : ? 0x076/0x077 : ADC Recording Buffer (l/r) Constants 0x0c0 - 0x0c4 = 0 - 4 0x0c5 = 0x8, 0x0c6 = 0x10, 0x0c7 = 0x20 0x0c8 = 0x100, 0x0c9 = 0x10000, 0x0ca = 0x80000 0x0cb = 0x10000000, 0x0cc = 0x20000000, 0x0cd = 0x40000000 0x0ce = 0x80000000, 0x0cf = 0x7fffffff, 0x0d0 = 0xffffffff 0x0d1 = 0xfffffffe, 0x0d2 = 0xc0000000, 0x0d3 = 0x41fbbcdc 0x0d4 = 0x5a7ef9db, 0x0d5 = 0x00100000, 0x0dc = 0x00000001 (?) Temporary Values 0x0d6 : Accumulator (?) 0x0d7 : Condition Register 0x0d8 : Noise source 0x0d9 : Noise source Tank Memory Data Registers 0x200 - 0x2ff Tank Memory Address Registers 0x300 - 0x3ff General Purpose Registers 0x400 - 0x5ff */ /* AC97Output[l/r] = FXBus PCM[l/r] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AC97_L), A_C_00000000, A_C_00000000, A_FXBUS(FXBUS_PCM_LEFT), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AC97_R), A_C_00000000, A_C_00000000, A_FXBUS(FXBUS_PCM_RIGHT), &pc); /* GPR[0/1] = RCA S/PDIF[l/r] -- Master volume */ audigy_addefxop(sc, iACC3, A_GPR(0), A_C_00000000, A_C_00000000, A_EXTIN(EXTIN_COAX_SPDIF_L), &pc); audigy_addefxop(sc, iACC3, A_GPR(1), A_C_00000000, A_C_00000000, A_EXTIN(EXTIN_COAX_SPDIF_R), &pc); /* GPR[2] = GPR[0] (Left) / 2 + GPR[1] (Right) / 2 -- Central volume */ audigy_addefxop(sc, iINTERP, A_GPR(2), A_GPR(1), A_C_40000000, A_GPR(0), &pc); /* Headphones[l/r] = GPR[0/1] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_HEADPHONE_L), A_C_00000000, A_C_00000000, A_GPR(0), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_HEADPHONE_R), A_C_00000000, A_C_00000000, A_GPR(1), &pc); /* Analog Front[l/r] = GPR[0/1] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AFRONT_L), A_C_00000000, A_C_00000000, A_GPR(0), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AFRONT_R), A_C_00000000, A_C_00000000, A_GPR(1), &pc); /* Digital Front[l/r] = GPR[0/1] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_FRONT_L), A_C_00000000, A_C_00000000, A_GPR(0), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_FRONT_R), A_C_00000000, A_C_00000000, A_GPR(1), &pc); /* Center and Subwoofer configuration */ /* Analog Center = GPR[0] + GPR[2] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ACENTER), A_C_00000000, A_GPR(0), A_GPR(2), &pc); /* Analog Sub = GPR[1] + GPR[2] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ALFE), A_C_00000000, A_GPR(1), A_GPR(2), &pc); /* Digital Center = GPR[0] + GPR[2] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_CENTER), A_C_00000000, A_GPR(0), A_GPR(2), &pc); /* Digital Sub = GPR[1] + GPR[2] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_LFE), A_C_00000000, A_GPR(1), A_GPR(2), &pc); #if 0 /* Analog Rear[l/r] = (GPR[0/1] * RearVolume[l/r]) >> 31 */ /* RearVolume = GPR[0x10/0x11] (Will this ever be implemented?) */ audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_AREAR_L), A_C_00000000, A_GPR(16), A_GPR(0), &pc); audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_AREAR_R), A_C_00000000, A_GPR(17), A_GPR(1), &pc); /* Digital Rear[l/r] = (GPR[0/1] * RearVolume[l/r]) >> 31 */ /* RearVolume = GPR[0x10/0x11] (Will this ever be implemented?) */ audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_REAR_L), A_C_00000000, A_GPR(16), A_GPR(0), &pc); audigy_addefxop(sc, iMAC0, A_EXTOUT(A_EXTOUT_REAR_R), A_C_00000000, A_GPR(17), A_GPR(1), &pc); #else /* XXX This is just a copy to the channel, since we do not have * a patch manager, it is useful for have another output enabled. */ /* Analog Rear[l/r] = GPR[0/1] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AREAR_L), A_C_00000000, A_C_00000000, A_GPR(0), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_AREAR_R), A_C_00000000, A_C_00000000, A_GPR(1), &pc); /* Digital Rear[l/r] = GPR[0/1] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_REAR_L), A_C_00000000, A_C_00000000, A_GPR(0), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_REAR_R), A_C_00000000, A_C_00000000, A_GPR(1), &pc); #endif /* ADC Recording buffer[l/r] = AC97Input[l/r] */ audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ADC_CAP_L), A_C_00000000, A_C_00000000, A_EXTIN(A_EXTIN_AC97_L), &pc); audigy_addefxop(sc, iACC3, A_EXTOUT(A_EXTOUT_ADC_CAP_R), A_C_00000000, A_C_00000000, A_EXTIN(A_EXTIN_AC97_R), &pc); /* resume normal operations */ emu_wrptr(sc, 0, EMU_A_DBG, 0); } static void emu_initefx(struct sc_info *sc) { int i; u_int32_t pc = 16; /* acc3 0,0,0,0 - NOPs */ for (i = 0; i < 512; i++) { emu_wrefx(sc, i * 2, 0x10040); emu_wrefx(sc, i * 2 + 1, 0x610040); } for (i = 0; i < 256; i++) emu_wrptr(sc, 0, EMU_FXGPREGBASE + i, 0); /* FX-8010 DSP Registers: FX Bus 0x000-0x00f : 16 registers Input 0x010/0x011 : AC97 Codec (l/r) 0x012/0x013 : ADC, S/PDIF (l/r) 0x014/0x015 : Mic(left), Zoom (l/r) 0x016/0x017 : TOS link in (l/r) 0x018/0x019 : Line/Mic 1 (l/r) 0x01a/0x01b : COAX S/PDIF (l/r) 0x01c/0x01d : Line/Mic 2 (l/r) Output 0x020/0x021 : AC97 Output (l/r) 0x022/0x023 : TOS link out (l/r) 0x024/0x025 : Center/LFE 0x026/0x027 : LiveDrive Headphone (l/r) 0x028/0x029 : Rear Channel (l/r) 0x02a/0x02b : ADC Recording Buffer (l/r) 0x02c : Mic Recording Buffer 0x031/0x032 : Analog Center/LFE Constants 0x040 - 0x044 = 0 - 4 0x045 = 0x8, 0x046 = 0x10, 0x047 = 0x20 0x048 = 0x100, 0x049 = 0x10000, 0x04a = 0x80000 0x04b = 0x10000000, 0x04c = 0x20000000, 0x04d = 0x40000000 0x04e = 0x80000000, 0x04f = 0x7fffffff, 0x050 = 0xffffffff 0x051 = 0xfffffffe, 0x052 = 0xc0000000, 0x053 = 0x41fbbcdc 0x054 = 0x5a7ef9db, 0x055 = 0x00100000 Temporary Values 0x056 : Accumulator 0x057 : Condition Register 0x058 : Noise source 0x059 : Noise source 0x05a : IRQ Register 0x05b : TRAM Delay Base Address Count General Purpose Registers 0x100 - 0x1ff Tank Memory Data Registers 0x200 - 0x2ff Tank Memory Address Registers 0x300 - 0x3ff */ /* Routing - this will be configurable in later version */ /* GPR[0/1] = FX * 4 + SPDIF-in */ emu_addefxop(sc, iMACINT0, GPR(0), EXTIN(EXTIN_SPDIF_CD_L), FXBUS(FXBUS_PCM_LEFT), C_00000004, &pc); emu_addefxop(sc, iMACINT0, GPR(1), EXTIN(EXTIN_SPDIF_CD_R), FXBUS(FXBUS_PCM_RIGHT), C_00000004, &pc); /* GPR[0/1] += APS-input */ emu_addefxop(sc, iACC3, GPR(0), GPR(0), C_00000000, sc->APS ? EXTIN(EXTIN_TOSLINK_L) : C_00000000, &pc); emu_addefxop(sc, iACC3, GPR(1), GPR(1), C_00000000, sc->APS ? EXTIN(EXTIN_TOSLINK_R) : C_00000000, &pc); /* FrontOut (AC97) = GPR[0/1] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_L), C_00000000, C_00000000, GPR(0), &pc); emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_R), C_00000000, C_00000001, GPR(1), &pc); /* GPR[2] = GPR[0] (Left) / 2 + GPR[1] (Right) / 2 -- Central volume */ emu_addefxop(sc, iINTERP, GPR(2), GPR(1), C_40000000, GPR(0), &pc); #if 0 /* RearOut = (GPR[0/1] * RearVolume) >> 31 */ /* RearVolume = GPR[0x10/0x11] */ emu_addefxop(sc, iMAC0, EXTOUT(EXTOUT_REAR_L), C_00000000, GPR(16), GPR(0), &pc); emu_addefxop(sc, iMAC0, EXTOUT(EXTOUT_REAR_R), C_00000000, GPR(17), GPR(1), &pc); #else /* XXX This is just a copy to the channel, since we do not have * a patch manager, it is useful for have another output enabled. */ /* Rear[l/r] = GPR[0/1] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_REAR_L), C_00000000, C_00000000, GPR(0), &pc); emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_REAR_R), C_00000000, C_00000000, GPR(1), &pc); #endif /* TOS out[l/r] = GPR[0/1] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_TOSLINK_L), C_00000000, C_00000000, GPR(0), &pc); emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_TOSLINK_R), C_00000000, C_00000000, GPR(1), &pc); /* Center and Subwoofer configuration */ /* Analog Center = GPR[0] + GPR[2] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ACENTER), C_00000000, GPR(0), GPR(2), &pc); /* Analog Sub = GPR[1] + GPR[2] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ALFE), C_00000000, GPR(1), GPR(2), &pc); /* Digital Center = GPR[0] + GPR[2] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_CENTER), C_00000000, GPR(0), GPR(2), &pc); /* Digital Sub = GPR[1] + GPR[2] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_AC97_LFE), C_00000000, GPR(1), GPR(2), &pc); /* Headphones[l/r] = GPR[0/1] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_HEADPHONE_L), C_00000000, C_00000000, GPR(0), &pc); emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_HEADPHONE_R), C_00000000, C_00000000, GPR(1), &pc); /* ADC Recording buffer[l/r] = AC97Input[l/r] */ emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ADC_CAP_L), C_00000000, C_00000000, EXTIN(EXTIN_AC97_L), &pc); emu_addefxop(sc, iACC3, EXTOUT(EXTOUT_ADC_CAP_R), C_00000000, C_00000000, EXTIN(EXTIN_AC97_R), &pc); /* resume normal operations */ emu_wrptr(sc, 0, EMU_DBG, 0); } /* Probe and attach the card */ static int emu_init(struct sc_info *sc) { u_int32_t spcs, ch, tmp, i; if (sc->audigy) { /* enable additional AC97 slots */ emu_wrptr(sc, 0, EMU_AC97SLOT, EMU_AC97SLOT_CENTER | EMU_AC97SLOT_LFE); } /* disable audio and lock cache */ emu_wr(sc, EMU_HCFG, EMU_HCFG_LOCKSOUNDCACHE | EMU_HCFG_LOCKTANKCACHE_MASK | EMU_HCFG_MUTEBUTTONENABLE, 4); /* reset recording buffers */ emu_wrptr(sc, 0, EMU_MICBS, EMU_RECBS_BUFSIZE_NONE); emu_wrptr(sc, 0, EMU_MICBA, 0); emu_wrptr(sc, 0, EMU_FXBS, EMU_RECBS_BUFSIZE_NONE); emu_wrptr(sc, 0, EMU_FXBA, 0); emu_wrptr(sc, 0, EMU_ADCBS, EMU_RECBS_BUFSIZE_NONE); emu_wrptr(sc, 0, EMU_ADCBA, 0); /* disable channel interrupt */ emu_wr(sc, EMU_INTE, EMU_INTE_INTERTIMERENB | EMU_INTE_SAMPLERATER | EMU_INTE_PCIERRENABLE, 4); emu_wrptr(sc, 0, EMU_CLIEL, 0); emu_wrptr(sc, 0, EMU_CLIEH, 0); emu_wrptr(sc, 0, EMU_SOLEL, 0); emu_wrptr(sc, 0, EMU_SOLEH, 0); /* wonder what these do... */ if (sc->audigy) { emu_wrptr(sc, 0, EMU_SPBYPASS, 0xf00); emu_wrptr(sc, 0, EMU_AC97SLOT, 0x3); } /* init envelope engine */ for (ch = 0; ch < NUM_G; ch++) { emu_wrptr(sc, ch, EMU_CHAN_DCYSUSV, ENV_OFF); emu_wrptr(sc, ch, EMU_CHAN_IP, 0); emu_wrptr(sc, ch, EMU_CHAN_VTFT, 0xffff); emu_wrptr(sc, ch, EMU_CHAN_CVCF, 0xffff); emu_wrptr(sc, ch, EMU_CHAN_PTRX, 0); emu_wrptr(sc, ch, EMU_CHAN_CPF, 0); emu_wrptr(sc, ch, EMU_CHAN_CCR, 0); emu_wrptr(sc, ch, EMU_CHAN_PSST, 0); emu_wrptr(sc, ch, EMU_CHAN_DSL, 0x10); emu_wrptr(sc, ch, EMU_CHAN_CCCA, 0); emu_wrptr(sc, ch, EMU_CHAN_Z1, 0); emu_wrptr(sc, ch, EMU_CHAN_Z2, 0); emu_wrptr(sc, ch, EMU_CHAN_FXRT, 0xd01c0000); emu_wrptr(sc, ch, EMU_CHAN_ATKHLDM, 0); emu_wrptr(sc, ch, EMU_CHAN_DCYSUSM, 0); emu_wrptr(sc, ch, EMU_CHAN_IFATN, 0xffff); emu_wrptr(sc, ch, EMU_CHAN_PEFE, 0); emu_wrptr(sc, ch, EMU_CHAN_FMMOD, 0); emu_wrptr(sc, ch, EMU_CHAN_TREMFRQ, 24); /* 1 Hz */ emu_wrptr(sc, ch, EMU_CHAN_FM2FRQ2, 24); /* 1 Hz */ emu_wrptr(sc, ch, EMU_CHAN_TEMPENV, 0); /*** these are last so OFF prevents writing ***/ emu_wrptr(sc, ch, EMU_CHAN_LFOVAL2, 0); emu_wrptr(sc, ch, EMU_CHAN_LFOVAL1, 0); emu_wrptr(sc, ch, EMU_CHAN_ATKHLDV, 0); emu_wrptr(sc, ch, EMU_CHAN_ENVVOL, 0); emu_wrptr(sc, ch, EMU_CHAN_ENVVAL, 0); if (sc->audigy) { /* audigy cards need this to initialize correctly */ emu_wrptr(sc, ch, 0x4c, 0); emu_wrptr(sc, ch, 0x4d, 0); emu_wrptr(sc, ch, 0x4e, 0); emu_wrptr(sc, ch, 0x4f, 0); /* set default routing */ emu_wrptr(sc, ch, EMU_A_CHAN_FXRT1, 0x03020100); emu_wrptr(sc, ch, EMU_A_CHAN_FXRT2, 0x3f3f3f3f); emu_wrptr(sc, ch, EMU_A_CHAN_SENDAMOUNTS, 0); } sc->voice[ch].vnum = ch; sc->voice[ch].slave = NULL; sc->voice[ch].busy = 0; sc->voice[ch].ismaster = 0; sc->voice[ch].running = 0; sc->voice[ch].b16 = 0; sc->voice[ch].stereo = 0; sc->voice[ch].speed = 0; sc->voice[ch].start = 0; sc->voice[ch].end = 0; sc->voice[ch].channel = NULL; } sc->pnum = sc->rnum = 0; /* * Init to 0x02109204 : * Clock accuracy = 0 (1000ppm) * Sample Rate = 2 (48kHz) * Audio Channel = 1 (Left of 2) * Source Number = 0 (Unspecified) * Generation Status = 1 (Original for Cat Code 12) * Cat Code = 12 (Digital Signal Mixer) * Mode = 0 (Mode 0) * Emphasis = 0 (None) * CP = 1 (Copyright unasserted) * AN = 0 (Audio data) * P = 0 (Consumer) */ spcs = EMU_SPCS_CLKACCY_1000PPM | EMU_SPCS_SAMPLERATE_48 | EMU_SPCS_CHANNELNUM_LEFT | EMU_SPCS_SOURCENUM_UNSPEC | EMU_SPCS_GENERATIONSTATUS | 0x00001200 | 0x00000000 | EMU_SPCS_EMPHASIS_NONE | EMU_SPCS_COPYRIGHT; emu_wrptr(sc, 0, EMU_SPCS0, spcs); emu_wrptr(sc, 0, EMU_SPCS1, spcs); emu_wrptr(sc, 0, EMU_SPCS2, spcs); if (!sc->audigy) emu_initefx(sc); else if (sc->audigy2) { /* Audigy 2 */ /* from ALSA initialization code: */ /* Hack for Alice3 to work independent of haP16V driver */ u_int32_t tmp; /* Setup SRCMulti_I2S SamplingRate */ tmp = emu_rdptr(sc, 0, EMU_A_SPDIF_SAMPLERATE) & 0xfffff1ff; emu_wrptr(sc, 0, EMU_A_SPDIF_SAMPLERATE, tmp | 0x400); /* Setup SRCSel (Enable SPDIF, I2S SRCMulti) */ emu_wr(sc, 0x20, 0x00600000, 4); emu_wr(sc, 0x24, 0x00000014, 4); /* Setup SRCMulti Input Audio Enable */ emu_wr(sc, 0x20, 0x006e0000, 4); emu_wr(sc, 0x24, 0xff00ff00, 4); } SLIST_INIT(&sc->mem.blocks); sc->mem.ptb_pages = emu_malloc(sc, EMUMAXPAGES * sizeof(u_int32_t), &sc->mem.ptb_pages_addr, &sc->mem.ptb_map); if (sc->mem.ptb_pages == NULL) return -1; sc->mem.silent_page = emu_malloc(sc, EMUPAGESIZE, &sc->mem.silent_page_addr, &sc->mem.silent_map); if (sc->mem.silent_page == NULL) { emu_free(sc, sc->mem.ptb_pages, sc->mem.ptb_map); return -1; } /* Clear page with silence & setup all pointers to this page */ bzero(sc->mem.silent_page, EMUPAGESIZE); tmp = (u_int32_t)(sc->mem.silent_page_addr) << 1; for (i = 0; i < EMUMAXPAGES; i++) sc->mem.ptb_pages[i] = tmp | i; emu_wrptr(sc, 0, EMU_PTB, (sc->mem.ptb_pages_addr)); emu_wrptr(sc, 0, EMU_TCB, 0); /* taken from original driver */ emu_wrptr(sc, 0, EMU_TCBS, 0); /* taken from original driver */ for (ch = 0; ch < NUM_G; ch++) { emu_wrptr(sc, ch, EMU_CHAN_MAPA, tmp | EMU_CHAN_MAP_PTI_MASK); emu_wrptr(sc, ch, EMU_CHAN_MAPB, tmp | EMU_CHAN_MAP_PTI_MASK); } /* emu_memalloc(sc, EMUPAGESIZE); */ /* * Hokay, now enable the AUD bit * * Audigy * Enable Audio = 0 (enabled after fx processor initialization) * Mute Disable Audio = 0 * Joystick = 1 * * Audigy 2 * Enable Audio = 1 * Mute Disable Audio = 0 * Joystick = 1 * GP S/PDIF AC3 Enable = 1 * CD S/PDIF AC3 Enable = 1 * * EMU10K1 * Enable Audio = 1 * Mute Disable Audio = 0 * Lock Tank Memory = 1 * Lock Sound Memory = 0 * Auto Mute = 1 */ if (sc->audigy) { tmp = EMU_HCFG_AUTOMUTE | EMU_HCFG_JOYENABLE; if (sc->audigy2) /* Audigy 2 */ tmp = EMU_HCFG_AUDIOENABLE | EMU_HCFG_AC3ENABLE_CDSPDIF | EMU_HCFG_AC3ENABLE_GPSPDIF; emu_wr(sc, EMU_HCFG, tmp, 4); audigy_initefx(sc); /* from ALSA initialization code: */ /* enable audio and disable both audio/digital outputs */ emu_wr(sc, EMU_HCFG, emu_rd(sc, EMU_HCFG, 4) | EMU_HCFG_AUDIOENABLE, 4); emu_wr(sc, EMU_A_IOCFG, emu_rd(sc, EMU_A_IOCFG, 4) & ~EMU_A_IOCFG_GPOUT_AD, 4); if (sc->audigy2) { /* Audigy 2 */ /* Unmute Analog. * Set GPO6 to 1 for Apollo. This has to be done after * init Alice3 I2SOut beyond 48kHz. * So, sequence is important. */ emu_wr(sc, EMU_A_IOCFG, emu_rd(sc, EMU_A_IOCFG, 4) | EMU_A_IOCFG_GPOUT_A, 4); } } else { /* EMU10K1 initialization code */ tmp = EMU_HCFG_AUDIOENABLE | EMU_HCFG_LOCKTANKCACHE_MASK | EMU_HCFG_AUTOMUTE; if (sc->rev >= 6) tmp |= EMU_HCFG_JOYENABLE; emu_wr(sc, EMU_HCFG, tmp, 4); /* TOSLink detection */ sc->tos_link = 0; tmp = emu_rd(sc, EMU_HCFG, 4); if (tmp & (EMU_HCFG_GPINPUT0 | EMU_HCFG_GPINPUT1)) { emu_wr(sc, EMU_HCFG, tmp | EMU_HCFG_GPOUT1, 4); DELAY(50); if (tmp != (emu_rd(sc, EMU_HCFG, 4) & ~EMU_HCFG_GPOUT1)) { sc->tos_link = 1; emu_wr(sc, EMU_HCFG, tmp, 4); } } } return 0; } static int emu_uninit(struct sc_info *sc) { u_int32_t ch; emu_wr(sc, EMU_INTE, 0, 4); for (ch = 0; ch < NUM_G; ch++) emu_wrptr(sc, ch, EMU_CHAN_DCYSUSV, ENV_OFF); for (ch = 0; ch < NUM_G; ch++) { emu_wrptr(sc, ch, EMU_CHAN_VTFT, 0); emu_wrptr(sc, ch, EMU_CHAN_CVCF, 0); emu_wrptr(sc, ch, EMU_CHAN_PTRX, 0); emu_wrptr(sc, ch, EMU_CHAN_CPF, 0); } if (sc->audigy) { /* stop fx processor */ emu_wrptr(sc, 0, EMU_A_DBG, EMU_A_DBG_SINGLE_STEP); } /* disable audio and lock cache */ emu_wr(sc, EMU_HCFG, EMU_HCFG_LOCKSOUNDCACHE | EMU_HCFG_LOCKTANKCACHE_MASK | EMU_HCFG_MUTEBUTTONENABLE, 4); emu_wrptr(sc, 0, EMU_PTB, 0); /* reset recording buffers */ emu_wrptr(sc, 0, EMU_MICBS, EMU_RECBS_BUFSIZE_NONE); emu_wrptr(sc, 0, EMU_MICBA, 0); emu_wrptr(sc, 0, EMU_FXBS, EMU_RECBS_BUFSIZE_NONE); emu_wrptr(sc, 0, EMU_FXBA, 0); emu_wrptr(sc, 0, EMU_FXWC, 0); emu_wrptr(sc, 0, EMU_ADCBS, EMU_RECBS_BUFSIZE_NONE); emu_wrptr(sc, 0, EMU_ADCBA, 0); emu_wrptr(sc, 0, EMU_TCB, 0); emu_wrptr(sc, 0, EMU_TCBS, 0); /* disable channel interrupt */ emu_wrptr(sc, 0, EMU_CLIEL, 0); emu_wrptr(sc, 0, EMU_CLIEH, 0); emu_wrptr(sc, 0, EMU_SOLEL, 0); emu_wrptr(sc, 0, EMU_SOLEH, 0); /* init envelope engine */ if (!SLIST_EMPTY(&sc->mem.blocks)) device_printf(sc->dev, "warning: memblock list not empty\n"); emu_free(sc, sc->mem.ptb_pages, sc->mem.ptb_map); emu_free(sc, sc->mem.silent_page, sc->mem.silent_map); if(sc->mpu) mpu401_uninit(sc->mpu); return 0; } static int emu_pci_probe(device_t dev) { char *s = NULL; switch (pci_get_devid(dev)) { case EMU10K1_PCI_ID: s = "Creative EMU10K1"; break; case EMU10K2_PCI_ID: if (pci_get_revid(dev) == 0x04) s = "Creative Audigy 2 (EMU10K2)"; else s = "Creative Audigy (EMU10K2)"; break; case EMU10K3_PCI_ID: s = "Creative Audigy 2 (EMU10K3)"; break; default: return ENXIO; } device_set_desc(dev, s); return BUS_PROBE_LOW_PRIORITY; } static int emu_pci_attach(device_t dev) { struct ac97_info *codec = NULL; struct sc_info *sc; int i, gotmic; char status[SND_STATUSLEN]; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); sc->lock = snd_mtxcreate(device_get_nameunit(dev), "snd_emu10k1 softc"); sc->dev = dev; sc->type = pci_get_devid(dev); sc->rev = pci_get_revid(dev); sc->audigy = sc->type == EMU10K2_PCI_ID || sc->type == EMU10K3_PCI_ID; sc->audigy2 = (sc->audigy && sc->rev == 0x04); sc->nchans = sc->audigy ? 8 : 4; sc->addrmask = sc->audigy ? EMU_A_PTR_ADDR_MASK : EMU_PTR_ADDR_MASK; pci_enable_busmaster(dev); i = PCIR_BAR(0); sc->reg = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &i, RF_ACTIVE); if (sc->reg == NULL) { device_printf(dev, "unable to map register space\n"); goto bad; } sc->st = rman_get_bustag(sc->reg); sc->sh = rman_get_bushandle(sc->reg); sc->bufsz = pcm_getbuffersize(dev, 4096, EMU_DEFAULT_BUFSZ, 65536); if (bus_dma_tag_create(/*parent*/bus_get_dma_tag(dev), /*alignment*/2, /*boundary*/0, /*lowaddr*/(1U << 31) - 1, /* can only access 0-2gb */ /*highaddr*/BUS_SPACE_MAXADDR, /*filter*/NULL, /*filterarg*/NULL, /*maxsize*/sc->bufsz, /*nsegments*/1, /*maxsegz*/0x3ffff, /*flags*/0, /*lockfunc*/busdma_lock_mutex, /*lockarg*/&Giant, &sc->parent_dmat) != 0) { device_printf(dev, "unable to create dma tag\n"); goto bad; } if (emu_init(sc) == -1) { device_printf(dev, "unable to initialize the card\n"); goto bad; } codec = AC97_CREATE(dev, sc, emu_ac97); if (codec == NULL) goto bad; gotmic = (ac97_getcaps(codec) & AC97_CAP_MICCHANNEL) ? 1 : 0; if (mixer_init(dev, ac97_getmixerclass(), codec) == -1) goto bad; emu_midiattach(sc); i = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, RF_ACTIVE | RF_SHAREABLE); if (!sc->irq || snd_setup_intr(dev, sc->irq, INTR_MPSAFE, emu_intr, sc, &sc->ih)) { device_printf(dev, "unable to map interrupt\n"); goto bad; } snprintf(status, SND_STATUSLEN, "at io 0x%jx irq %jd %s", rman_get_start(sc->reg), rman_get_start(sc->irq), PCM_KLDSTRING(snd_emu10k1)); if (pcm_register(dev, sc, sc->nchans, gotmic ? 3 : 2)) goto bad; for (i = 0; i < sc->nchans; i++) pcm_addchan(dev, PCMDIR_PLAY, &emupchan_class, sc); for (i = 0; i < (gotmic ? 3 : 2); i++) pcm_addchan(dev, PCMDIR_REC, &emurchan_class, sc); pcm_setstatus(dev, status); return 0; bad: if (codec) ac97_destroy(codec); if (sc->reg) bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), sc->reg); if (sc->ih) bus_teardown_intr(dev, sc->irq, sc->ih); if (sc->irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq); if (sc->parent_dmat) bus_dma_tag_destroy(sc->parent_dmat); if (sc->lock) snd_mtxfree(sc->lock); free(sc, M_DEVBUF); return ENXIO; } static int emu_pci_detach(device_t dev) { int r; struct sc_info *sc; r = pcm_unregister(dev); if (r) return r; sc = pcm_getdevinfo(dev); /* shutdown chip */ emu_uninit(sc); bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), sc->reg); bus_teardown_intr(dev, sc->irq, sc->ih); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq); bus_dma_tag_destroy(sc->parent_dmat); snd_mtxfree(sc->lock); free(sc, M_DEVBUF); return 0; } /* add suspend, resume */ static device_method_t emu_methods[] = { /* Device interface */ DEVMETHOD(device_probe, emu_pci_probe), DEVMETHOD(device_attach, emu_pci_attach), DEVMETHOD(device_detach, emu_pci_detach), DEVMETHOD_END }; static driver_t emu_driver = { "pcm", emu_methods, PCM_SOFTC_SIZE, }; DRIVER_MODULE(snd_emu10k1, pci, emu_driver, pcm_devclass, NULL, NULL); MODULE_DEPEND(snd_emu10k1, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER); MODULE_VERSION(snd_emu10k1, 1); MODULE_DEPEND(snd_emu10k1, midi, 1, 1, 1); /* dummy driver to silence the joystick device */ static int emujoy_pci_probe(device_t dev) { char *s = NULL; switch (pci_get_devid(dev)) { case 0x70021102: s = "Creative EMU10K1 Joystick"; device_quiet(dev); break; case 0x70031102: s = "Creative EMU10K2 Joystick"; device_quiet(dev); break; } if (s) device_set_desc(dev, s); return s ? -1000 : ENXIO; } static int emujoy_pci_attach(device_t dev) { return 0; } static int emujoy_pci_detach(device_t dev) { return 0; } static device_method_t emujoy_methods[] = { DEVMETHOD(device_probe, emujoy_pci_probe), DEVMETHOD(device_attach, emujoy_pci_attach), DEVMETHOD(device_detach, emujoy_pci_detach), DEVMETHOD_END }; static driver_t emujoy_driver = { "emujoy", emujoy_methods, 1 /* no softc */ }; static devclass_t emujoy_devclass; DRIVER_MODULE(emujoy, pci, emujoy_driver, emujoy_devclass, NULL, NULL); Index: projects/clang1000-import/sys/dev/sound/pci/emu10kx-pcm.c =================================================================== --- projects/clang1000-import/sys/dev/sound/pci/emu10kx-pcm.c (revision 357178) +++ projects/clang1000-import/sys/dev/sound/pci/emu10kx-pcm.c (revision 357179) @@ -1,1540 +1,1541 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 Cameron Grant * Copyright (c) 2003-2007 Yuriy Tsibizov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHERIN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_snd.h" #endif #include #include #include #include "mixer_if.h" #include #include struct emu_pcm_pchinfo { int spd; int fmt; unsigned int blksz; int run; struct emu_voice *master; struct emu_voice *slave; struct snd_dbuf *buffer; struct pcm_channel *channel; struct emu_pcm_info *pcm; int timer; }; struct emu_pcm_rchinfo { int spd; int fmt; unsigned int blksz; int run; uint32_t idxreg; uint32_t basereg; uint32_t sizereg; uint32_t setupreg; uint32_t irqmask; uint32_t iprmask; int ihandle; struct snd_dbuf *buffer; struct pcm_channel *channel; struct emu_pcm_info *pcm; int timer; }; /* XXX Hardware playback channels */ #define MAX_CHANNELS 4 #if MAX_CHANNELS > 13 #error Too many hardware channels defined. 13 is the maximum #endif struct emu_pcm_info { struct mtx *lock; device_t dev; /* device information */ struct emu_sc_info *card; struct emu_pcm_pchinfo pch[MAX_CHANNELS]; /* hardware channels */ int pnum; /* next free channel number */ struct emu_pcm_rchinfo rch_adc; struct emu_pcm_rchinfo rch_efx; struct emu_route rt; struct emu_route rt_mono; int route; int ihandle; /* interrupt handler */ unsigned int bufsz; int is_emu10k1; struct ac97_info *codec; uint32_t ac97_state[0x7F]; kobj_class_t ac97_mixerclass; uint32_t ac97_recdevs; uint32_t ac97_playdevs; struct snd_mixer *sm; int mch_disabled; unsigned int emu10k1_volcache[2][2]; }; static uint32_t emu_rfmt_adc[] = { SND_FORMAT(AFMT_S16_LE, 1, 0), SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static struct pcmchan_caps emu_reccaps_adc = { 8000, 48000, emu_rfmt_adc, 0 }; static uint32_t emu_rfmt_efx[] = { SND_FORMAT(AFMT_S16_LE, 1, 0), 0 }; static struct pcmchan_caps emu_reccaps_efx_live = { 48000*32, 48000*32, emu_rfmt_efx, 0 }; static struct pcmchan_caps emu_reccaps_efx_audigy = { 48000*64, 48000*64, emu_rfmt_efx, 0 }; static int emu_rates_live[] = { 48000*32 }; static int emu_rates_audigy[] = { 48000*64 }; static uint32_t emu_pfmt[] = { SND_FORMAT(AFMT_U8, 1, 0), SND_FORMAT(AFMT_U8, 2, 0), SND_FORMAT(AFMT_S16_LE, 1, 0), SND_FORMAT(AFMT_S16_LE, 2, 0), 0 }; static uint32_t emu_pfmt_mono[] = { SND_FORMAT(AFMT_U8, 1, 0), SND_FORMAT(AFMT_S16_LE, 1, 0), 0 }; static struct pcmchan_caps emu_playcaps = {4000, 48000, emu_pfmt, 0}; static struct pcmchan_caps emu_playcaps_mono = {4000, 48000, emu_pfmt_mono, 0}; static int emu10k1_adcspeed[8] = {48000, 44100, 32000, 24000, 22050, 16000, 11025, 8000}; /* audigy supports 12kHz. */ static int emu10k2_adcspeed[9] = {48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000}; static uint32_t emu_pcm_intr(void *pcm, uint32_t stat); static const struct emu_dspmix_props_k1 { uint8_t present; uint8_t recdev; int8_t input; } dspmix_k1 [SOUND_MIXER_NRDEVICES] = { /* no mixer device for ac97 */ /* in0 AC97 */ [SOUND_MIXER_DIGITAL1] = {1, 1, 1}, /* in1 CD SPDIF */ /* not connected */ /* in2 (zoom) */ [SOUND_MIXER_DIGITAL2] = {1, 1, 3}, /* in3 toslink */ [SOUND_MIXER_LINE2] = {1, 1, 4}, /* in4 Line-In2 */ [SOUND_MIXER_DIGITAL3] = {1, 1, 5}, /* in5 on-card SPDIF */ [SOUND_MIXER_LINE3] = {1, 1, 6}, /* in6 AUX2 */ /* not connected */ /* in7 */ }; static const struct emu_dspmix_props_k2 { uint8_t present; uint8_t recdev; int8_t input; } dspmix_k2 [SOUND_MIXER_NRDEVICES] = { [SOUND_MIXER_VOLUME] = {1, 0, (-1)}, [SOUND_MIXER_PCM] = {1, 0, (-1)}, /* no mixer device */ /* in0 AC97 */ [SOUND_MIXER_DIGITAL1] = {1, 1, 1}, /* in1 CD SPDIF */ [SOUND_MIXER_DIGITAL2] = {1, 1, 2}, /* in2 COAX SPDIF */ /* not connected */ /* in3 */ [SOUND_MIXER_LINE2] = {1, 1, 4}, /* in4 Line-In2 */ [SOUND_MIXER_DIGITAL3] = {1, 1, 5}, /* in5 on-card SPDIF */ [SOUND_MIXER_LINE3] = {1, 1, 6}, /* in6 AUX2 */ /* not connected */ /* in7 */ }; static int emu_dspmixer_init(struct snd_mixer *m) { struct emu_pcm_info *sc; int i; int p, r; p = 0; r = 0; sc = mix_getdevinfo(m); if (sc->route == RT_FRONT) { /* create submixer for AC97 codec */ if ((sc->ac97_mixerclass != NULL) && (sc->codec != NULL)) { sc->sm = mixer_create(sc->dev, sc->ac97_mixerclass, sc->codec, "ac97"); if (sc->sm != NULL) { p = mix_getdevs(sc->sm); r = mix_getrecdevs(sc->sm); } } sc->ac97_playdevs = p; sc->ac97_recdevs = r; } /* This two are always here */ p |= (1 << SOUND_MIXER_PCM); p |= (1 << SOUND_MIXER_VOLUME); if (sc->route == RT_FRONT) { if (sc->is_emu10k1) { for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { if (dspmix_k1[i].present) p |= (1 << i); if (dspmix_k1[i].recdev) r |= (1 << i); } } else { for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { if (dspmix_k2[i].present) p |= (1 << i); if (dspmix_k2[i].recdev) r |= (1 << i); } } } mix_setdevs(m, p); mix_setrecdevs(m, r); return (0); } static int emu_dspmixer_uninit(struct snd_mixer *m) { struct emu_pcm_info *sc; int err = 0; /* drop submixer for AC97 codec */ sc = mix_getdevinfo(m); - if (sc->sm != NULL) + if (sc->sm != NULL) { err = mixer_delete(sc->sm); if (err) return (err); sc->sm = NULL; + } return (0); } static int emu_dspmixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { struct emu_pcm_info *sc; sc = mix_getdevinfo(m); switch (dev) { case SOUND_MIXER_VOLUME: switch (sc->route) { case RT_FRONT: if (sc->sm != NULL) mix_set(sc->sm, dev, left, right); if (sc->mch_disabled) { /* In emu10k1 case PCM volume does not affect sound routed to rear & center/sub (it is connected to AC97 codec). Calculate it manually. */ /* This really should belong to emu10kx.c */ if (sc->is_emu10k1) { sc->emu10k1_volcache[0][0] = left; left = left * sc->emu10k1_volcache[1][0] / 100; sc->emu10k1_volcache[0][1] = right; right = right * sc->emu10k1_volcache[1][1] / 100; } emumix_set_volume(sc->card, M_MASTER_REAR_L, left); emumix_set_volume(sc->card, M_MASTER_REAR_R, right); if (!sc->is_emu10k1) { emumix_set_volume(sc->card, M_MASTER_CENTER, (left+right)/2); emumix_set_volume(sc->card, M_MASTER_SUBWOOFER, (left+right)/2); /* XXX side */ } } /* mch disabled */ break; case RT_REAR: emumix_set_volume(sc->card, M_MASTER_REAR_L, left); emumix_set_volume(sc->card, M_MASTER_REAR_R, right); break; case RT_CENTER: emumix_set_volume(sc->card, M_MASTER_CENTER, (left+right)/2); break; case RT_SUB: emumix_set_volume(sc->card, M_MASTER_SUBWOOFER, (left+right)/2); break; } break; case SOUND_MIXER_PCM: switch (sc->route) { case RT_FRONT: if (sc->sm != NULL) mix_set(sc->sm, dev, left, right); if (sc->mch_disabled) { /* See SOUND_MIXER_VOLUME case */ if (sc->is_emu10k1) { sc->emu10k1_volcache[1][0] = left; left = left * sc->emu10k1_volcache[0][0] / 100; sc->emu10k1_volcache[1][1] = right; right = right * sc->emu10k1_volcache[0][1] / 100; } emumix_set_volume(sc->card, M_MASTER_REAR_L, left); emumix_set_volume(sc->card, M_MASTER_REAR_R, right); if (!sc->is_emu10k1) { emumix_set_volume(sc->card, M_MASTER_CENTER, (left+right)/2); emumix_set_volume(sc->card, M_MASTER_SUBWOOFER, (left+right)/2); /* XXX side */ } } /* mch_disabled */ break; case RT_REAR: emumix_set_volume(sc->card, M_FX2_REAR_L, left); emumix_set_volume(sc->card, M_FX3_REAR_R, right); break; case RT_CENTER: emumix_set_volume(sc->card, M_FX4_CENTER, (left+right)/2); break; case RT_SUB: emumix_set_volume(sc->card, M_FX5_SUBWOOFER, (left+right)/2); break; } break; case SOUND_MIXER_DIGITAL1: /* CD SPDIF, in1 */ emumix_set_volume(sc->card, M_IN1_FRONT_L, left); emumix_set_volume(sc->card, M_IN1_FRONT_R, right); break; case SOUND_MIXER_DIGITAL2: if (sc->is_emu10k1) { /* TOSLink, in3 */ emumix_set_volume(sc->card, M_IN3_FRONT_L, left); emumix_set_volume(sc->card, M_IN3_FRONT_R, right); } else { /* COAX SPDIF, in2 */ emumix_set_volume(sc->card, M_IN2_FRONT_L, left); emumix_set_volume(sc->card, M_IN2_FRONT_R, right); } break; case SOUND_MIXER_LINE2: /* Line-In2, in4 */ emumix_set_volume(sc->card, M_IN4_FRONT_L, left); emumix_set_volume(sc->card, M_IN4_FRONT_R, right); break; case SOUND_MIXER_DIGITAL3: /* on-card SPDIF, in5 */ emumix_set_volume(sc->card, M_IN5_FRONT_L, left); emumix_set_volume(sc->card, M_IN5_FRONT_R, right); break; case SOUND_MIXER_LINE3: /* AUX2, in6 */ emumix_set_volume(sc->card, M_IN6_FRONT_L, left); emumix_set_volume(sc->card, M_IN6_FRONT_R, right); break; default: if (sc->sm != NULL) { /* XXX emumix_set_volume is not required here */ emumix_set_volume(sc->card, M_IN0_FRONT_L, 100); emumix_set_volume(sc->card, M_IN0_FRONT_R, 100); mix_set(sc->sm, dev, left, right); } else device_printf(sc->dev, "mixer error: unknown device %d\n", dev); } return (0); } static u_int32_t emu_dspmixer_setrecsrc(struct snd_mixer *m, u_int32_t src) { struct emu_pcm_info *sc; int i; u_int32_t recmask; int input[8]; sc = mix_getdevinfo(m); recmask = 0; for (i=0; i < 8; i++) input[i]=0; if (sc->sm != NULL) if ((src & sc->ac97_recdevs) !=0) if (mix_setrecsrc(sc->sm, src & sc->ac97_recdevs) == 0) { recmask |= (src & sc->ac97_recdevs); /* Recording from AC97 codec. Enable AC97 route to rec on DSP */ input[0] = 1; } if (sc->is_emu10k1) { for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { if (dspmix_k1[i].recdev) if ((src & (1 << i)) == ((uint32_t)1 << i)) { recmask |= (1 << i); /* enable device i */ input[dspmix_k1[i].input] = 1; } } } else { for (i = 0; i < SOUND_MIXER_NRDEVICES; i++) { if (dspmix_k2[i].recdev) if ((src & (1 << i)) == ((uint32_t)1 << i)) { recmask |= (1 << i); /* enable device i */ input[dspmix_k2[i].input] = 1; } } } emumix_set_volume(sc->card, M_IN0_REC_L, input[0] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN0_REC_R, input[0] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN1_REC_L, input[1] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN1_REC_R, input[1] == 1 ? 100 : 0); if (!sc->is_emu10k1) { emumix_set_volume(sc->card, M_IN2_REC_L, input[2] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN2_REC_R, input[2] == 1 ? 100 : 0); } if (sc->is_emu10k1) { emumix_set_volume(sc->card, M_IN3_REC_L, input[3] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN3_REC_R, input[3] == 1 ? 100 : 0); } emumix_set_volume(sc->card, M_IN4_REC_L, input[4] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN4_REC_R, input[4] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN5_REC_L, input[5] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN5_REC_R, input[5] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN6_REC_L, input[6] == 1 ? 100 : 0); emumix_set_volume(sc->card, M_IN6_REC_R, input[6] == 1 ? 100 : 0); /* XXX check for K1/k2 differences? */ if ((src & (1 << SOUND_MIXER_PCM)) == (1 << SOUND_MIXER_PCM)) { emumix_set_volume(sc->card, M_FX0_REC_L, emumix_get_volume(sc->card, M_FX0_FRONT_L)); emumix_set_volume(sc->card, M_FX1_REC_R, emumix_get_volume(sc->card, M_FX1_FRONT_R)); } else { emumix_set_volume(sc->card, M_FX0_REC_L, 0); emumix_set_volume(sc->card, M_FX1_REC_R, 0); } return (recmask); } static kobj_method_t emudspmixer_methods[] = { KOBJMETHOD(mixer_init, emu_dspmixer_init), KOBJMETHOD(mixer_uninit, emu_dspmixer_uninit), KOBJMETHOD(mixer_set, emu_dspmixer_set), KOBJMETHOD(mixer_setrecsrc, emu_dspmixer_setrecsrc), KOBJMETHOD_END }; MIXER_DECLARE(emudspmixer); static int emu_efxmixer_init(struct snd_mixer *m) { mix_setdevs(m, SOUND_MASK_VOLUME); mix_setrecdevs(m, SOUND_MASK_MONITOR); return (0); } static int emu_efxmixer_set(struct snd_mixer *m, unsigned dev, unsigned left, unsigned right) { if (left + right == 200) return (0); return (0); } static u_int32_t emu_efxmixer_setrecsrc(struct snd_mixer *m __unused, u_int32_t src __unused) { return (SOUND_MASK_MONITOR); } static kobj_method_t emuefxmixer_methods[] = { KOBJMETHOD(mixer_init, emu_efxmixer_init), KOBJMETHOD(mixer_set, emu_efxmixer_set), KOBJMETHOD(mixer_setrecsrc, emu_efxmixer_setrecsrc), KOBJMETHOD_END }; MIXER_DECLARE(emuefxmixer); /* * AC97 emulation code for Audigy and later cards. * Some parts of AC97 codec are not used by hardware, but can be used * to change some DSP controls via AC97 mixer interface. This includes: * - master volume controls MASTER_FRONT_[R|L] * - pcm volume controls FX[0|1]_FRONT_[R|L] * - rec volume controls MASTER_REC_[R|L] * We do it because we need to put it under user control.... * We also keep some parts of AC97 disabled to get better sound quality */ #define AC97LEFT(x) ((x & 0x7F00)>>8) #define AC97RIGHT(x) (x & 0x007F) #define AC97MUTE(x) ((x & 0x8000)>>15) #define BIT4_TO100(x) (100-(x)*100/(0x0f)) #define BIT6_TO100(x) (100-(x)*100/(0x3f)) #define BIT4_TO255(x) (255-(x)*255/(0x0f)) #define BIT6_TO255(x) (255-(x)*255/(0x3f)) #define V100_TOBIT6(x) (0x3f*(100-x)/100) #define V100_TOBIT4(x) (0x0f*(100-x)/100) #define AC97ENCODE(x_muted, x_left, x_right) (((x_muted & 1)<<15) | ((x_left & 0x3f)<<8) | (x_right & 0x3f)) static int emu_ac97_read_emulation(struct emu_pcm_info *sc, int regno) { int use_ac97; int emulated; int tmp; use_ac97 = 1; emulated = 0; switch (regno) { case AC97_MIX_MASTER: emulated = sc->ac97_state[AC97_MIX_MASTER]; use_ac97 = 0; break; case AC97_MIX_PCM: emulated = sc->ac97_state[AC97_MIX_PCM]; use_ac97 = 0; break; case AC97_REG_RECSEL: emulated = 0x0505; use_ac97 = 0; break; case AC97_MIX_RGAIN: emulated = sc->ac97_state[AC97_MIX_RGAIN]; use_ac97 = 0; break; } emu_wr(sc->card, EMU_AC97ADDR, regno, 1); tmp = emu_rd(sc->card, EMU_AC97DATA, 2); if (use_ac97) emulated = tmp; return (emulated); } static void emu_ac97_write_emulation(struct emu_pcm_info *sc, int regno, uint32_t data) { int write_ac97; int left, right; uint32_t emu_left, emu_right; int is_mute; write_ac97 = 1; left = AC97LEFT(data); emu_left = BIT6_TO100(left); /* We show us as 6-bit AC97 mixer */ right = AC97RIGHT(data); emu_right = BIT6_TO100(right); is_mute = AC97MUTE(data); if (is_mute) emu_left = emu_right = 0; switch (regno) { /* TODO: reset emulator on AC97_RESET */ case AC97_MIX_MASTER: emumix_set_volume(sc->card, M_MASTER_FRONT_L, emu_left); emumix_set_volume(sc->card, M_MASTER_FRONT_R, emu_right); sc->ac97_state[AC97_MIX_MASTER] = data & (0x8000 | 0x3f3f); data = 0x8000; /* Mute AC97 main out */ break; case AC97_MIX_PCM: /* PCM OUT VOL */ emumix_set_volume(sc->card, M_FX0_FRONT_L, emu_left); emumix_set_volume(sc->card, M_FX1_FRONT_R, emu_right); sc->ac97_state[AC97_MIX_PCM] = data & (0x8000 | 0x3f3f); data = 0x8000; /* Mute AC97 PCM out */ break; case AC97_REG_RECSEL: /* * PCM recording source is set to "stereo mix" (labeled "vol" * in mixer). There is no 'playback' from AC97 codec - * if you want to hear anything from AC97 you have to _record_ * it. Keep things simple and record "stereo mix". */ data = 0x0505; break; case AC97_MIX_RGAIN: /* RECORD GAIN */ emu_left = BIT4_TO100(left); /* rgain is 4-bit */ emu_right = BIT4_TO100(right); emumix_set_volume(sc->card, M_MASTER_REC_L, 100-emu_left); emumix_set_volume(sc->card, M_MASTER_REC_R, 100-emu_right); /* * Record gain on AC97 should stay zero to get AC97 sound on * AC97_[RL] connectors on EMU10K2 chip. AC97 on Audigy is not * directly connected to any output, only to EMU10K2 chip Use * this control to set AC97 mix volume inside EMU10K2 chip */ sc->ac97_state[AC97_MIX_RGAIN] = data & (0x8000 | 0x0f0f); data = 0x0000; break; } if (write_ac97) { emu_wr(sc->card, EMU_AC97ADDR, regno, 1); emu_wr(sc->card, EMU_AC97DATA, data, 2); } } static int emu_erdcd(kobj_t obj __unused, void *devinfo, int regno) { struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo; return (emu_ac97_read_emulation(sc, regno)); } static int emu_ewrcd(kobj_t obj __unused, void *devinfo, int regno, uint32_t data) { struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo; emu_ac97_write_emulation(sc, regno, data); return (0); } static kobj_method_t emu_eac97_methods[] = { KOBJMETHOD(ac97_read, emu_erdcd), KOBJMETHOD(ac97_write, emu_ewrcd), KOBJMETHOD_END }; AC97_DECLARE(emu_eac97); /* real ac97 codec */ static int emu_rdcd(kobj_t obj __unused, void *devinfo, int regno) { int rd; struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo; KASSERT(sc->card != NULL, ("emu_rdcd: no soundcard")); emu_wr(sc->card, EMU_AC97ADDR, regno, 1); rd = emu_rd(sc->card, EMU_AC97DATA, 2); return (rd); } static int emu_wrcd(kobj_t obj __unused, void *devinfo, int regno, uint32_t data) { struct emu_pcm_info *sc = (struct emu_pcm_info *)devinfo; KASSERT(sc->card != NULL, ("emu_wrcd: no soundcard")); emu_wr(sc->card, EMU_AC97ADDR, regno, 1); emu_wr(sc->card, EMU_AC97DATA, data, 2); return (0); } static kobj_method_t emu_ac97_methods[] = { KOBJMETHOD(ac97_read, emu_rdcd), KOBJMETHOD(ac97_write, emu_wrcd), KOBJMETHOD_END }; AC97_DECLARE(emu_ac97); static int emu_k1_recval(int speed) { int val; val = 0; while ((val < 7) && (speed < emu10k1_adcspeed[val])) val++; return (val); } static int emu_k2_recval(int speed) { int val; val = 0; while ((val < 8) && (speed < emu10k2_adcspeed[val])) val++; return (val); } static void * emupchan_init(kobj_t obj __unused, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir __unused) { struct emu_pcm_info *sc = devinfo; struct emu_pcm_pchinfo *ch; void *r; KASSERT(dir == PCMDIR_PLAY, ("emupchan_init: bad direction")); KASSERT(sc->card != NULL, ("empchan_init: no soundcard")); if (sc->pnum >= MAX_CHANNELS) return (NULL); ch = &(sc->pch[sc->pnum++]); ch->buffer = b; ch->pcm = sc; ch->channel = c; ch->blksz = sc->bufsz; ch->fmt = SND_FORMAT(AFMT_U8, 1, 0); ch->spd = 8000; ch->master = emu_valloc(sc->card); /* * XXX we have to allocate slave even for mono channel until we * fix emu_vfree to handle this case. */ ch->slave = emu_valloc(sc->card); ch->timer = emu_timer_create(sc->card); r = (emu_vinit(sc->card, ch->master, ch->slave, EMU_PLAY_BUFSZ, ch->buffer)) ? NULL : ch; return (r); } static int emupchan_free(kobj_t obj __unused, void *c_devinfo) { struct emu_pcm_pchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; emu_timer_clear(sc->card, ch->timer); if (ch->slave != NULL) emu_vfree(sc->card, ch->slave); emu_vfree(sc->card, ch->master); return (0); } static int emupchan_setformat(kobj_t obj __unused, void *c_devinfo, uint32_t format) { struct emu_pcm_pchinfo *ch = c_devinfo; ch->fmt = format; return (0); } static uint32_t emupchan_setspeed(kobj_t obj __unused, void *c_devinfo, uint32_t speed) { struct emu_pcm_pchinfo *ch = c_devinfo; ch->spd = speed; return (ch->spd); } static uint32_t emupchan_setblocksize(kobj_t obj __unused, void *c_devinfo, uint32_t blocksize) { struct emu_pcm_pchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; if (blocksize > ch->pcm->bufsz) blocksize = ch->pcm->bufsz; snd_mtxlock(sc->lock); ch->blksz = blocksize; emu_timer_set(sc->card, ch->timer, ch->blksz / sndbuf_getalign(ch->buffer)); snd_mtxunlock(sc->lock); return (ch->blksz); } static int emupchan_trigger(kobj_t obj __unused, void *c_devinfo, int go) { struct emu_pcm_pchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; if (!PCMTRIG_COMMON(go)) return (0); snd_mtxlock(sc->lock); /* XXX can we trigger on parallel threads ? */ if (go == PCMTRIG_START) { emu_vsetup(ch->master, ch->fmt, ch->spd); if (AFMT_CHANNEL(ch->fmt) > 1) emu_vroute(sc->card, &(sc->rt), ch->master); else emu_vroute(sc->card, &(sc->rt_mono), ch->master); emu_vwrite(sc->card, ch->master); emu_timer_set(sc->card, ch->timer, ch->blksz / sndbuf_getalign(ch->buffer)); emu_timer_enable(sc->card, ch->timer, 1); } /* PCM interrupt handler will handle PCMTRIG_STOP event */ ch->run = (go == PCMTRIG_START) ? 1 : 0; emu_vtrigger(sc->card, ch->master, ch->run); snd_mtxunlock(sc->lock); return (0); } static uint32_t emupchan_getptr(kobj_t obj __unused, void *c_devinfo) { struct emu_pcm_pchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; int r; r = emu_vpos(sc->card, ch->master); return (r); } static struct pcmchan_caps * emupchan_getcaps(kobj_t obj __unused, void *c_devinfo __unused) { struct emu_pcm_pchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; switch (sc->route) { case RT_FRONT: /* FALLTHROUGH */ case RT_REAR: /* FALLTHROUGH */ case RT_SIDE: return (&emu_playcaps); break; case RT_CENTER: /* FALLTHROUGH */ case RT_SUB: return (&emu_playcaps_mono); break; } return (NULL); } static kobj_method_t emupchan_methods[] = { KOBJMETHOD(channel_init, emupchan_init), KOBJMETHOD(channel_free, emupchan_free), KOBJMETHOD(channel_setformat, emupchan_setformat), KOBJMETHOD(channel_setspeed, emupchan_setspeed), KOBJMETHOD(channel_setblocksize, emupchan_setblocksize), KOBJMETHOD(channel_trigger, emupchan_trigger), KOBJMETHOD(channel_getptr, emupchan_getptr), KOBJMETHOD(channel_getcaps, emupchan_getcaps), KOBJMETHOD_END }; CHANNEL_DECLARE(emupchan); static void * emurchan_init(kobj_t obj __unused, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir __unused) { struct emu_pcm_info *sc = devinfo; struct emu_pcm_rchinfo *ch; KASSERT(dir == PCMDIR_REC, ("emurchan_init: bad direction")); ch = &sc->rch_adc; ch->buffer = b; ch->pcm = sc; ch->channel = c; ch->blksz = sc->bufsz / 2; /* We rise interrupt for half-full buffer */ ch->fmt = SND_FORMAT(AFMT_U8, 1, 0); ch->spd = 8000; ch->idxreg = sc->is_emu10k1 ? EMU_ADCIDX : EMU_A_ADCIDX; ch->basereg = EMU_ADCBA; ch->sizereg = EMU_ADCBS; ch->setupreg = EMU_ADCCR; ch->irqmask = EMU_INTE_ADCBUFENABLE; ch->iprmask = EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL; if (sndbuf_alloc(ch->buffer, emu_gettag(sc->card), 0, sc->bufsz) != 0) return (NULL); else { ch->timer = emu_timer_create(sc->card); emu_wrptr(sc->card, 0, ch->basereg, sndbuf_getbufaddr(ch->buffer)); emu_wrptr(sc->card, 0, ch->sizereg, 0); /* off */ return (ch); } } static int emurchan_free(kobj_t obj __unused, void *c_devinfo) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; emu_timer_clear(sc->card, ch->timer); return (0); } static int emurchan_setformat(kobj_t obj __unused, void *c_devinfo, uint32_t format) { struct emu_pcm_rchinfo *ch = c_devinfo; ch->fmt = format; return (0); } static uint32_t emurchan_setspeed(kobj_t obj __unused, void *c_devinfo, uint32_t speed) { struct emu_pcm_rchinfo *ch = c_devinfo; if (ch->pcm->is_emu10k1) { speed = emu10k1_adcspeed[emu_k1_recval(speed)]; } else { speed = emu10k2_adcspeed[emu_k2_recval(speed)]; } ch->spd = speed; return (ch->spd); } static uint32_t emurchan_setblocksize(kobj_t obj __unused, void *c_devinfo, uint32_t blocksize) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; ch->blksz = blocksize; /* * If blocksize is less than half of buffer size we will not get * BUFHALFFULL interrupt in time and channel will need to generate * (and use) timer interrupts. Otherwise channel will be marked dead. */ if (ch->blksz < (ch->pcm->bufsz / 2)) { emu_timer_set(sc->card, ch->timer, ch->blksz / sndbuf_getalign(ch->buffer)); emu_timer_enable(sc->card, ch->timer, 1); } else { emu_timer_enable(sc->card, ch->timer, 0); } return (ch->blksz); } static int emurchan_trigger(kobj_t obj __unused, void *c_devinfo, int go) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; uint32_t val, sz; if (!PCMTRIG_COMMON(go)) return (0); switch (sc->bufsz) { case 4096: sz = EMU_RECBS_BUFSIZE_4096; break; case 8192: sz = EMU_RECBS_BUFSIZE_8192; break; case 16384: sz = EMU_RECBS_BUFSIZE_16384; break; case 32768: sz = EMU_RECBS_BUFSIZE_32768; break; case 65536: sz = EMU_RECBS_BUFSIZE_65536; break; default: sz = EMU_RECBS_BUFSIZE_4096; } snd_mtxlock(sc->lock); switch (go) { case PCMTRIG_START: ch->run = 1; emu_wrptr(sc->card, 0, ch->sizereg, sz); val = sc->is_emu10k1 ? EMU_ADCCR_LCHANENABLE : EMU_A_ADCCR_LCHANENABLE; if (AFMT_CHANNEL(ch->fmt) > 1) val |= sc->is_emu10k1 ? EMU_ADCCR_RCHANENABLE : EMU_A_ADCCR_RCHANENABLE; val |= sc->is_emu10k1 ? emu_k1_recval(ch->spd) : emu_k2_recval(ch->spd); emu_wrptr(sc->card, 0, ch->setupreg, 0); emu_wrptr(sc->card, 0, ch->setupreg, val); ch->ihandle = emu_intr_register(sc->card, ch->irqmask, ch->iprmask, &emu_pcm_intr, sc); break; case PCMTRIG_STOP: /* FALLTHROUGH */ case PCMTRIG_ABORT: ch->run = 0; emu_wrptr(sc->card, 0, ch->sizereg, 0); if (ch->setupreg) emu_wrptr(sc->card, 0, ch->setupreg, 0); (void)emu_intr_unregister(sc->card, ch->ihandle); break; case PCMTRIG_EMLDMAWR: /* FALLTHROUGH */ case PCMTRIG_EMLDMARD: /* FALLTHROUGH */ default: break; } snd_mtxunlock(sc->lock); return (0); } static uint32_t emurchan_getptr(kobj_t obj __unused, void *c_devinfo) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; int r; r = emu_rdptr(sc->card, 0, ch->idxreg) & 0x0000ffff; return (r); } static struct pcmchan_caps * emurchan_getcaps(kobj_t obj __unused, void *c_devinfo __unused) { return (&emu_reccaps_adc); } static kobj_method_t emurchan_methods[] = { KOBJMETHOD(channel_init, emurchan_init), KOBJMETHOD(channel_free, emurchan_free), KOBJMETHOD(channel_setformat, emurchan_setformat), KOBJMETHOD(channel_setspeed, emurchan_setspeed), KOBJMETHOD(channel_setblocksize, emurchan_setblocksize), KOBJMETHOD(channel_trigger, emurchan_trigger), KOBJMETHOD(channel_getptr, emurchan_getptr), KOBJMETHOD(channel_getcaps, emurchan_getcaps), KOBJMETHOD_END }; CHANNEL_DECLARE(emurchan); static void * emufxrchan_init(kobj_t obj __unused, void *devinfo, struct snd_dbuf *b, struct pcm_channel *c, int dir __unused) { struct emu_pcm_info *sc = devinfo; struct emu_pcm_rchinfo *ch; KASSERT(dir == PCMDIR_REC, ("emurchan_init: bad direction")); if (sc == NULL) return (NULL); ch = &(sc->rch_efx); ch->fmt = SND_FORMAT(AFMT_S16_LE, 1, 0); ch->spd = sc->is_emu10k1 ? 48000*32 : 48000 * 64; ch->idxreg = EMU_FXIDX; ch->basereg = EMU_FXBA; ch->sizereg = EMU_FXBS; ch->irqmask = EMU_INTE_EFXBUFENABLE; ch->iprmask = EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL; ch->buffer = b; ch->pcm = sc; ch->channel = c; ch->blksz = sc->bufsz / 2; if (sndbuf_alloc(ch->buffer, emu_gettag(sc->card), 0, sc->bufsz) != 0) return (NULL); else { emu_wrptr(sc->card, 0, ch->basereg, sndbuf_getbufaddr(ch->buffer)); emu_wrptr(sc->card, 0, ch->sizereg, 0); /* off */ return (ch); } } static int emufxrchan_setformat(kobj_t obj __unused, void *c_devinfo __unused, uint32_t format) { if (format == SND_FORMAT(AFMT_S16_LE, 1, 0)) return (0); return (EINVAL); } static uint32_t emufxrchan_setspeed(kobj_t obj __unused, void *c_devinfo, uint32_t speed) { struct emu_pcm_rchinfo *ch = c_devinfo; /* FIXED RATE CHANNEL */ return (ch->spd); } static uint32_t emufxrchan_setblocksize(kobj_t obj __unused, void *c_devinfo, uint32_t blocksize) { struct emu_pcm_rchinfo *ch = c_devinfo; ch->blksz = blocksize; /* * XXX If blocksize is less than half of buffer size we will not get * interrupt in time and channel will die due to interrupt timeout. * This should not happen with FX rchan, because it will fill buffer * very fast (64K buffer is 0.021seconds on Audigy). */ if (ch->blksz < (ch->pcm->bufsz / 2)) ch->blksz = ch->pcm->bufsz / 2; return (ch->blksz); } static int emufxrchan_trigger(kobj_t obj __unused, void *c_devinfo, int go) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; uint32_t sz; if (!PCMTRIG_COMMON(go)) return (0); switch (sc->bufsz) { case 4096: sz = EMU_RECBS_BUFSIZE_4096; break; case 8192: sz = EMU_RECBS_BUFSIZE_8192; break; case 16384: sz = EMU_RECBS_BUFSIZE_16384; break; case 32768: sz = EMU_RECBS_BUFSIZE_32768; break; case 65536: sz = EMU_RECBS_BUFSIZE_65536; break; default: sz = EMU_RECBS_BUFSIZE_4096; } snd_mtxlock(sc->lock); switch (go) { case PCMTRIG_START: ch->run = 1; emu_wrptr(sc->card, 0, ch->sizereg, sz); ch->ihandle = emu_intr_register(sc->card, ch->irqmask, ch->iprmask, &emu_pcm_intr, sc); /* * SB Live! is limited to 32 mono channels. Audigy * has 64 mono channels. Channels are enabled * by setting a bit in EMU_A_FXWC[1|2] registers. */ /* XXX there is no way to demultiplex this streams for now */ if (sc->is_emu10k1) { emu_wrptr(sc->card, 0, EMU_FXWC, 0xffffffff); } else { emu_wrptr(sc->card, 0, EMU_A_FXWC1, 0xffffffff); emu_wrptr(sc->card, 0, EMU_A_FXWC2, 0xffffffff); } break; case PCMTRIG_STOP: /* FALLTHROUGH */ case PCMTRIG_ABORT: ch->run = 0; if (sc->is_emu10k1) { emu_wrptr(sc->card, 0, EMU_FXWC, 0x0); } else { emu_wrptr(sc->card, 0, EMU_A_FXWC1, 0x0); emu_wrptr(sc->card, 0, EMU_A_FXWC2, 0x0); } emu_wrptr(sc->card, 0, ch->sizereg, 0); (void)emu_intr_unregister(sc->card, ch->ihandle); break; case PCMTRIG_EMLDMAWR: /* FALLTHROUGH */ case PCMTRIG_EMLDMARD: /* FALLTHROUGH */ default: break; } snd_mtxunlock(sc->lock); return (0); } static uint32_t emufxrchan_getptr(kobj_t obj __unused, void *c_devinfo) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; int r; r = emu_rdptr(sc->card, 0, ch->idxreg) & 0x0000ffff; return (r); } static struct pcmchan_caps * emufxrchan_getcaps(kobj_t obj __unused, void *c_devinfo) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; if (sc->is_emu10k1) return (&emu_reccaps_efx_live); return (&emu_reccaps_efx_audigy); } static int emufxrchan_getrates(kobj_t obj __unused, void *c_devinfo, int **rates) { struct emu_pcm_rchinfo *ch = c_devinfo; struct emu_pcm_info *sc = ch->pcm; if (sc->is_emu10k1) *rates = emu_rates_live; else *rates = emu_rates_audigy; return 1; } static kobj_method_t emufxrchan_methods[] = { KOBJMETHOD(channel_init, emufxrchan_init), KOBJMETHOD(channel_setformat, emufxrchan_setformat), KOBJMETHOD(channel_setspeed, emufxrchan_setspeed), KOBJMETHOD(channel_setblocksize, emufxrchan_setblocksize), KOBJMETHOD(channel_trigger, emufxrchan_trigger), KOBJMETHOD(channel_getptr, emufxrchan_getptr), KOBJMETHOD(channel_getcaps, emufxrchan_getcaps), KOBJMETHOD(channel_getrates, emufxrchan_getrates), KOBJMETHOD_END }; CHANNEL_DECLARE(emufxrchan); static uint32_t emu_pcm_intr(void *pcm, uint32_t stat) { struct emu_pcm_info *sc = (struct emu_pcm_info *)pcm; uint32_t ack; int i; ack = 0; snd_mtxlock(sc->lock); if (stat & EMU_IPR_INTERVALTIMER) { ack |= EMU_IPR_INTERVALTIMER; for (i = 0; i < MAX_CHANNELS; i++) if (sc->pch[i].channel) { if (sc->pch[i].run == 1) { snd_mtxunlock(sc->lock); chn_intr(sc->pch[i].channel); snd_mtxlock(sc->lock); } else emu_timer_enable(sc->card, sc->pch[i].timer, 0); } /* ADC may install timer to get low-latency interrupts */ if ((sc->rch_adc.channel) && (sc->rch_adc.run)) { snd_mtxunlock(sc->lock); chn_intr(sc->rch_adc.channel); snd_mtxlock(sc->lock); } /* * EFX does not use timer, because it will fill * buffer at least 32x times faster than ADC. */ } if (stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL)) { ack |= stat & (EMU_IPR_ADCBUFFULL | EMU_IPR_ADCBUFHALFFULL); if (sc->rch_adc.channel) { snd_mtxunlock(sc->lock); chn_intr(sc->rch_adc.channel); snd_mtxlock(sc->lock); } } if (stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL)) { ack |= stat & (EMU_IPR_EFXBUFFULL | EMU_IPR_EFXBUFHALFFULL); if (sc->rch_efx.channel) { snd_mtxunlock(sc->lock); chn_intr(sc->rch_efx.channel); snd_mtxlock(sc->lock); } } snd_mtxunlock(sc->lock); return (ack); } static int emu_pcm_init(struct emu_pcm_info *sc) { sc->bufsz = pcm_getbuffersize(sc->dev, EMUPAGESIZE, EMU_REC_BUFSZ, EMU_MAX_BUFSZ); return (0); } static int emu_pcm_uninit(struct emu_pcm_info *sc __unused) { return (0); } static int emu_pcm_probe(device_t dev) { uintptr_t func, route, r; const char *rt; char buffer[255]; r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_FUNC, &func); if (func != SCF_PCM) return (ENXIO); rt = "UNKNOWN"; r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_ROUTE, &route); switch (route) { case RT_FRONT: rt = "front"; break; case RT_REAR: rt = "rear"; break; case RT_CENTER: rt = "center"; break; case RT_SUB: rt = "subwoofer"; break; case RT_SIDE: rt = "side"; break; case RT_MCHRECORD: rt = "multichannel recording"; break; } snprintf(buffer, 255, "EMU10Kx DSP %s PCM interface", rt); device_set_desc_copy(dev, buffer); return (0); } static int emu_pcm_attach(device_t dev) { struct emu_pcm_info *sc; unsigned int i; char status[SND_STATUSLEN]; uint32_t inte, ipr; uintptr_t route, r, ivar; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); sc->card = (struct emu_sc_info *)(device_get_softc(device_get_parent(dev))); if (sc->card == NULL) { device_printf(dev, "cannot get bridge conf\n"); free(sc, M_DEVBUF); return (ENXIO); } sc->lock = snd_mtxcreate(device_get_nameunit(dev), "snd_emu10kx pcm softc"); sc->dev = dev; r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_ISEMU10K1, &ivar); sc->is_emu10k1 = ivar ? 1 : 0; r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_MCH_DISABLED, &ivar); sc->mch_disabled = ivar ? 1 : 0; sc->codec = NULL; for (i = 0; i < 8; i++) { sc->rt.routing_left[i] = i; sc->rt.amounts_left[i] = 0x00; sc->rt.routing_right[i] = i; sc->rt.amounts_right[i] = 0x00; } for (i = 0; i < 8; i++) { sc->rt_mono.routing_left[i] = i; sc->rt_mono.amounts_left[i] = 0x00; sc->rt_mono.routing_right[i] = i; sc->rt_mono.amounts_right[i] = 0x00; } sc->emu10k1_volcache[0][0] = 75; sc->emu10k1_volcache[1][0] = 75; sc->emu10k1_volcache[0][1] = 75; sc->emu10k1_volcache[1][1] = 75; r = BUS_READ_IVAR(device_get_parent(dev), dev, EMU_VAR_ROUTE, &route); sc->route = route; switch (route) { case RT_FRONT: sc->rt.amounts_left[0] = 0xff; sc->rt.amounts_right[1] = 0xff; sc->rt_mono.amounts_left[0] = 0xff; sc->rt_mono.amounts_left[1] = 0xff; if (sc->is_emu10k1) sc->codec = AC97_CREATE(dev, sc, emu_ac97); else sc->codec = AC97_CREATE(dev, sc, emu_eac97); sc->ac97_mixerclass = NULL; if (sc->codec != NULL) sc->ac97_mixerclass = ac97_getmixerclass(); if (mixer_init(dev, &emudspmixer_class, sc)) { device_printf(dev, "failed to initialize DSP mixer\n"); goto bad; } break; case RT_REAR: sc->rt.amounts_left[2] = 0xff; sc->rt.amounts_right[3] = 0xff; sc->rt_mono.amounts_left[2] = 0xff; sc->rt_mono.amounts_left[3] = 0xff; if (mixer_init(dev, &emudspmixer_class, sc)) { device_printf(dev, "failed to initialize mixer\n"); goto bad; } break; case RT_CENTER: sc->rt.amounts_left[4] = 0xff; sc->rt_mono.amounts_left[4] = 0xff; if (mixer_init(dev, &emudspmixer_class, sc)) { device_printf(dev, "failed to initialize mixer\n"); goto bad; } break; case RT_SUB: sc->rt.amounts_left[5] = 0xff; sc->rt_mono.amounts_left[5] = 0xff; if (mixer_init(dev, &emudspmixer_class, sc)) { device_printf(dev, "failed to initialize mixer\n"); goto bad; } break; case RT_SIDE: sc->rt.amounts_left[6] = 0xff; sc->rt.amounts_right[7] = 0xff; sc->rt_mono.amounts_left[6] = 0xff; sc->rt_mono.amounts_left[7] = 0xff; if (mixer_init(dev, &emudspmixer_class, sc)) { device_printf(dev, "failed to initialize mixer\n"); goto bad; } break; case RT_MCHRECORD: if (mixer_init(dev, &emuefxmixer_class, sc)) { device_printf(dev, "failed to initialize EFX mixer\n"); goto bad; } break; default: device_printf(dev, "invalid default route\n"); goto bad; } inte = EMU_INTE_INTERTIMERENB; ipr = EMU_IPR_INTERVALTIMER; /* Used by playback & ADC */ sc->ihandle = emu_intr_register(sc->card, inte, ipr, &emu_pcm_intr, sc); if (emu_pcm_init(sc) == -1) { device_printf(dev, "unable to initialize PCM part of the card\n"); goto bad; } /* * We don't register interrupt handler with snd_setup_intr * in pcm device. Mark pcm device as MPSAFE manually. */ pcm_setflags(dev, pcm_getflags(dev) | SD_F_MPSAFE); /* XXX we should better get number of available channels from parent */ if (pcm_register(dev, sc, (route == RT_FRONT) ? MAX_CHANNELS : 1, (route == RT_FRONT) ? 1 : 0)) { device_printf(dev, "can't register PCM channels!\n"); goto bad; } sc->pnum = 0; if (route != RT_MCHRECORD) pcm_addchan(dev, PCMDIR_PLAY, &emupchan_class, sc); if (route == RT_FRONT) { for (i = 1; i < MAX_CHANNELS; i++) pcm_addchan(dev, PCMDIR_PLAY, &emupchan_class, sc); pcm_addchan(dev, PCMDIR_REC, &emurchan_class, sc); } if (route == RT_MCHRECORD) pcm_addchan(dev, PCMDIR_REC, &emufxrchan_class, sc); snprintf(status, SND_STATUSLEN, "on %s", device_get_nameunit(device_get_parent(dev))); pcm_setstatus(dev, status); return (0); bad: if (sc->codec) ac97_destroy(sc->codec); if (sc->lock) snd_mtxfree(sc->lock); free(sc, M_DEVBUF); return (ENXIO); } static int emu_pcm_detach(device_t dev) { int r; struct emu_pcm_info *sc; sc = pcm_getdevinfo(dev); r = pcm_unregister(dev); if (r) return (r); emu_pcm_uninit(sc); if (sc->lock) snd_mtxfree(sc->lock); free(sc, M_DEVBUF); return (0); } static device_method_t emu_pcm_methods[] = { DEVMETHOD(device_probe, emu_pcm_probe), DEVMETHOD(device_attach, emu_pcm_attach), DEVMETHOD(device_detach, emu_pcm_detach), DEVMETHOD_END }; static driver_t emu_pcm_driver = { "pcm", emu_pcm_methods, PCM_SOFTC_SIZE, NULL, 0, NULL }; DRIVER_MODULE(snd_emu10kx_pcm, emu10kx, emu_pcm_driver, pcm_devclass, 0, 0); MODULE_DEPEND(snd_emu10kx_pcm, snd_emu10kx, SND_EMU10KX_MINVER, SND_EMU10KX_PREFVER, SND_EMU10KX_MAXVER); MODULE_DEPEND(snd_emu10kx_pcm, sound, SOUND_MINVER, SOUND_PREFVER, SOUND_MAXVER); MODULE_VERSION(snd_emu10kx_pcm, SND_EMU10KX_PREFVER); Index: projects/clang1000-import/sys/fs/msdosfs/msdosfsmount.h =================================================================== --- projects/clang1000-import/sys/fs/msdosfs/msdosfsmount.h (revision 357178) +++ projects/clang1000-import/sys/fs/msdosfs/msdosfsmount.h (revision 357179) @@ -1,263 +1,267 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #ifndef _MSDOSFS_MSDOSFSMOUNT_H_ #define _MSDOSFS_MSDOSFSMOUNT_H_ #if defined (_KERNEL) || defined(MAKEFS) #include +#ifndef MAKEFS #include #include +#endif #include #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MSDOSFSMNT); #endif struct msdosfs_fileno; /* * Layout of the mount control block for a MSDOSFS filesystem. */ struct msdosfsmount { struct mount *pm_mountp;/* vfs mount struct for this fs */ struct g_consumer *pm_cp; struct bufobj *pm_bo; uid_t pm_uid; /* uid to set as owner of the files */ gid_t pm_gid; /* gid to set as owner of the files */ mode_t pm_mask; /* mask to and with file protection bits for files */ mode_t pm_dirmask; /* mask to and with file protection bits for directories */ struct vnode *pm_devvp; /* vnode for character device mounted */ struct cdev *pm_dev; /* character device mounted */ struct bpb50 pm_bpb; /* BIOS parameter blk for this fs */ u_long pm_BlkPerSec; /* How many DEV_BSIZE blocks fit inside a physical sector */ u_long pm_FATsecs; /* actual number of FAT sectors */ u_long pm_fatblk; /* block # of first FAT */ u_long pm_rootdirblk; /* block # (cluster # for FAT32) of root directory number */ u_long pm_rootdirsize; /* size in blocks (not clusters) */ u_long pm_firstcluster; /* block number of first cluster */ u_long pm_maxcluster; /* maximum cluster number */ u_long pm_freeclustercount; /* number of free clusters */ u_long pm_cnshift; /* shift file offset right this amount to get a cluster number */ u_long pm_crbomask; /* and a file offset with this mask to get cluster rel offset */ u_long pm_bnshift; /* shift file offset right this amount to get a block number */ u_long pm_bpcluster; /* bytes per cluster */ u_long pm_fmod; /* ~0 if fs is modified, this can rollover to 0 */ u_long pm_fatblocksize; /* size of FAT blocks in bytes */ u_long pm_fatblocksec; /* size of FAT blocks in sectors */ u_long pm_fatsize; /* size of FAT in bytes */ uint32_t pm_fatmask; /* mask to use for FAT numbers */ u_long pm_fsinfo; /* fsinfo block number */ u_long pm_nxtfree; /* next place to search for a free cluster */ u_int pm_fatmult; /* these 2 values are used in FAT */ u_int pm_fatdiv; /* offset computation */ u_int pm_curfat; /* current FAT for FAT32 (0 otherwise) */ u_int *pm_inusemap; /* ptr to bitmap of in-use clusters */ uint64_t pm_flags; /* see below */ void *pm_u2w; /* Local->Unicode iconv handle */ void *pm_w2u; /* Unicode->Local iconv handle */ void *pm_u2d; /* Unicode->DOS iconv handle */ void *pm_d2u; /* DOS->Local iconv handle */ +#ifndef MAKEFS struct lock pm_fatlock; /* lockmgr protecting allocations */ +#endif }; /* * A 64-bit file number and the 32-bit file number to which it is mapped, * in a red-black tree node. */ struct msdosfs_fileno { RB_ENTRY(msdosfs_fileno) mf_tree; uint32_t mf_fileno32; uint64_t mf_fileno64; }; /* Byte offset in FAT on filesystem pmp, cluster cn */ #define FATOFS(pmp, cn) ((cn) * (pmp)->pm_fatmult / (pmp)->pm_fatdiv) #define VFSTOMSDOSFS(mp) ((struct msdosfsmount *)mp->mnt_data) /* Number of bits in one pm_inusemap item: */ #define N_INUSEBITS (8 * sizeof(u_int)) /* * Shorthand for fields in the bpb contained in the msdosfsmount structure. */ #define pm_BytesPerSec pm_bpb.bpbBytesPerSec #define pm_ResSectors pm_bpb.bpbResSectors #define pm_FATs pm_bpb.bpbFATs #define pm_RootDirEnts pm_bpb.bpbRootDirEnts #define pm_Sectors pm_bpb.bpbSectors #define pm_Media pm_bpb.bpbMedia #define pm_SecPerTrack pm_bpb.bpbSecPerTrack #define pm_Heads pm_bpb.bpbHeads #define pm_HiddenSects pm_bpb.bpbHiddenSecs #define pm_HugeSectors pm_bpb.bpbHugeSectors /* * Convert pointer to buffer -> pointer to direntry */ #define bptoep(pmp, bp, dirofs) \ ((struct direntry *)(((bp)->b_data) \ + ((dirofs) & (pmp)->pm_crbomask))) /* * Convert block number to cluster number */ #define de_bn2cn(pmp, bn) \ ((bn) >> ((pmp)->pm_cnshift - (pmp)->pm_bnshift)) /* * Convert cluster number to block number */ #define de_cn2bn(pmp, cn) \ ((cn) << ((pmp)->pm_cnshift - (pmp)->pm_bnshift)) /* * Convert file offset to cluster number */ #define de_cluster(pmp, off) \ ((off) >> (pmp)->pm_cnshift) /* * Clusters required to hold size bytes */ #define de_clcount(pmp, size) \ (((size) + (pmp)->pm_bpcluster - 1) >> (pmp)->pm_cnshift) /* * Convert file offset to block number */ #define de_blk(pmp, off) \ (de_cn2bn(pmp, de_cluster((pmp), (off)))) /* * Convert cluster number to file offset */ #define de_cn2off(pmp, cn) \ ((cn) << (pmp)->pm_cnshift) /* * Convert block number to file offset */ #define de_bn2off(pmp, bn) \ ((bn) << (pmp)->pm_bnshift) /* * Map a cluster number into a filesystem relative block number. */ #define cntobn(pmp, cn) \ (de_cn2bn((pmp), (cn)-CLUST_FIRST) + (pmp)->pm_firstcluster) /* * Calculate block number for directory entry in root dir, offset dirofs */ #define roottobn(pmp, dirofs) \ (de_blk((pmp), (dirofs)) + (pmp)->pm_rootdirblk) /* * Calculate block number for directory entry at cluster dirclu, offset * dirofs */ #define detobn(pmp, dirclu, dirofs) \ ((dirclu) == MSDOSFSROOT \ ? roottobn((pmp), (dirofs)) \ : cntobn((pmp), (dirclu))) #define MSDOSFS_LOCK_MP(pmp) \ lockmgr(&(pmp)->pm_fatlock, LK_EXCLUSIVE, NULL) #define MSDOSFS_UNLOCK_MP(pmp) \ lockmgr(&(pmp)->pm_fatlock, LK_RELEASE, NULL) #define MSDOSFS_ASSERT_MP_LOCKED(pmp) \ lockmgr_assert(&(pmp)->pm_fatlock, KA_XLOCKED) #endif /* _KERNEL || MAKEFS */ #ifndef MAKEFS /* * Arguments to mount MSDOS filesystems. */ struct msdosfs_args { char *fspec; /* blocks special holding the fs to mount */ struct oexport_args export; /* network export information */ uid_t uid; /* uid that owns msdosfs files */ gid_t gid; /* gid that owns msdosfs files */ mode_t mask; /* file mask to be applied for msdosfs perms */ int flags; /* see below */ int unused1; /* unused, was version number */ uint16_t unused2[128]; /* no longer used, was Local->Unicode table */ char *cs_win; /* Windows(Unicode) Charset */ char *cs_dos; /* DOS Charset */ char *cs_local; /* Local Charset */ mode_t dirmask; /* dir mask to be applied for msdosfs perms */ }; #endif /* MAKEFS */ /* * Msdosfs mount options: */ #define MSDOSFSMNT_SHORTNAME 1 /* Force old DOS short names only */ #define MSDOSFSMNT_LONGNAME 2 /* Force Win'95 long names */ #define MSDOSFSMNT_NOWIN95 4 /* Completely ignore Win95 entries */ #define MSDOSFSMNT_KICONV 0x10 /* Use libiconv to convert chars */ /* All flags above: */ #define MSDOSFSMNT_MNTOPT \ (MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \ |MSDOSFSMNT_KICONV) #define MSDOSFSMNT_RONLY 0x80000000 /* mounted read-only */ #define MSDOSFSMNT_WAITONFAT 0x40000000 /* mounted synchronous */ #define MSDOSFS_FATMIRROR 0x20000000 /* FAT is mirrored */ #define MSDOSFS_FSIMOD 0x01000000 #endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */ Index: projects/clang1000-import/sys/fs/nfsserver/nfs_nfsdstate.c =================================================================== --- projects/clang1000-import/sys/fs/nfsserver/nfs_nfsdstate.c (revision 357178) +++ projects/clang1000-import/sys/fs/nfsserver/nfs_nfsdstate.c (revision 357179) @@ -1,8722 +1,8723 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifndef APPLEKEXT #include #include struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; time_t nfsdev_time = 0; int nfsrv_layouthashsize; volatile int nfsrv_layoutcnt = 0; extern int newnfs_numnfsd; extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; extern struct mtx nfsrv_dslock_mtx; extern struct mtx nfsrv_recalllock_mtx; extern struct mtx nfsrv_dontlistlock_mtx; extern int nfsd_debuglevel; extern u_int nfsrv_dsdirsize; extern struct nfsdevicehead nfsrv_devidhead; extern int nfsrv_doflexfile; extern int nfsrv_maxpnfsmirror; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; extern struct nfsdontlisthead nfsrv_dontlisthead; extern volatile int nfsrv_devidcnt; extern struct nfslayouthead nfsrv_recalllisthead; extern char *nfsrv_zeropnfsdat; SYSCTL_DECL(_vfs_nfsd); int nfsrv_statehashsize = NFSSTATEHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN, &nfsrv_statehashsize, 0, "Size of state hash table set via loader.conf"); int nfsrv_clienthashsize = NFSCLIENTHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN, &nfsrv_clienthashsize, 0, "Size of client hash table set via loader.conf"); int nfsrv_lockhashsize = NFSLOCKHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN, &nfsrv_lockhashsize, 0, "Size of file handle hash table set via loader.conf"); int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN, &nfsrv_sessionhashsize, 0, "Size of session hash table set via loader.conf"); int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER; SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN, &nfsrv_layouthighwater, 0, "High water mark for number of layouts set via loader.conf"); static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT; SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN, &nfsrv_v4statelimit, 0, "High water limit for NFSv4 opens+locks+delegations"); static int nfsrv_writedelegifpos = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW, &nfsrv_writedelegifpos, 0, "Issue a write delegation for read opens if possible"); static int nfsrv_allowreadforwriteopen = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW, &nfsrv_allowreadforwriteopen, 0, "Allow Reads to be done with Write Access StateIDs"); int nfsrv_pnfsatime = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW, &nfsrv_pnfsatime, 0, "For pNFS service, do Getattr ops to keep atime up-to-date"); int nfsrv_flexlinuxhack = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW, &nfsrv_flexlinuxhack, 0, "For Linux clients, hack around Flex File Layout bug"); /* * Hash lists for nfs V4. */ struct nfsclienthashhead *nfsclienthash; struct nfslockhashhead *nfslockhash; struct nfssessionhash *nfssessionhash; struct nfslayouthash *nfslayouthash; volatile int nfsrv_dontlistlen = 0; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; static time_t nfsrvboottime; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; static volatile int nfsrv_writedelegcnt = 0; static int nfsrv_faildscnt; /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p); static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freenfslock(struct nfslock *lop); static void nfsrv_freenfslockfile(struct nfslockfile *lfp); static void nfsrv_freedeleg(struct nfsstate *); static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, u_int32_t flags, struct nfsstate **stpp); static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp); static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p); static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit); static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags); static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p); static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp); static u_int32_t nfsrv_nextclientindex(void); static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp); static void nfsrv_markstable(struct nfsclient *clp); static void nfsrv_markreclaim(struct nfsclient *clp); static int nfsrv_checkstable(struct nfsclient *clp); static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct vnode *vp, NFSPROC_T *p); static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp); static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p); static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp); static time_t nfsrv_leaseexpiry(void); static void nfsrv_delaydelegtimeout(struct nfsstate *stp); static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op); static int nfsrv_nootherstate(struct nfsstate *stp); static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p); static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p); static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end); static void nfsrv_locklf(struct nfslockfile *lfp); static void nfsrv_unlocklf(struct nfslockfile *lfp); static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid); static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid); static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp); static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp); static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p); static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp); static void nfsrv_freelayoutlist(nfsquad_t clientid); static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode); static void nfsrv_freealllayouts(void); static void nfsrv_freedevid(struct nfsdevice *ds); static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp); static void nfsrv_deleteds(struct nfsdevice *fndds); static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost); static void nfsrv_freealldevids(void); static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt, NFSPROC_T *p); static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p); static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype, NFSPROC_T *, struct nfslayout **lypp); static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt); static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs); static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs); static int nfsrv_dontlayout(fhandle_t *fhp); static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, vnode_t *tvpp); static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp); /* * Scan the client list for a match and either return the current one, * create a new entry or return an error. * If returning a non-error, the clp structure must either be linked into * the client list or free'd. */ APPLESTATIC int nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p) { struct nfsclient *clp = NULL, *new_clp = *new_clpp; int i, error = 0, ret; struct nfsstate *stp, *tstp; #ifdef INET struct sockaddr_in *sin, *rin; #endif #ifdef INET6 struct sockaddr_in6 *sin6, *rin6; #endif struct nfsdsession *sep, *nsep; int zapit = 0, gotit, hasstate = 0, igotlock; static u_int64_t confirm_index = 0; /* * Check for state resource limit exceeded. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } if (nfsrv_issuedelegs == 0 || ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* * Don't do callbacks when delegations are disabled or * for AUTH_GSS unless enabled via nfsrv_nogsscallback. * If establishing a callback connection is attempted * when a firewall is blocking the callback path, the * server may wait too long for the connect attempt to * succeed during the Open. Some clients, such as Linux, * may timeout and give up on the Open before the server * replies. Also, since AUTH_GSS callbacks are not * yet interoperability tested, they might cause the * server to crap out, if they get past the Init call to * the client. */ new_clp->lc_program = 0; /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } if (gotit == 0) i++; } if (!gotit || (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) { if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) { /* * For NFSv4.1, if confirmp->lval[1] is non-zero, the * client is trying to update a confirmed clientid. */ NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); confirmp->lval[1] = 0; error = NFSERR_NOENT; goto out; } /* * Get rid of the old one. */ if (i != nfsrv_clienthashsize) { LIST_REMOVE(clp, lc_hash); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); zapit = 1; } /* * Add it after assigning a client id to it. */ new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = (u_int32_t)nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); LIST_INIT(&new_clp->lc_open); LIST_INIT(&new_clp->lc_deleg); LIST_INIT(&new_clp->lc_olddeleg); LIST_INIT(&new_clp->lc_session); for (i = 0; i < nfsrv_statehashsize; i++) LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if (zapit) nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * Now, handle the cases where the id is already issued. */ if (nfsrv_notsamecredname(nd, clp)) { /* * Check to see if there is expired state that should go away. */ if (clp->lc_expiry < NFSD_MONOSEC && (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); } /* * If there is outstanding state, then reply NFSERR_CLIDINUSE per * RFC3530 Sec. 8.1.2 last para. */ if (!LIST_EMPTY(&clp->lc_deleg)) { hasstate = 1; } else if (LIST_EMPTY(&clp->lc_open)) { hasstate = 0; } else { hasstate = 0; /* Look for an Open on the OpenOwner */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { if (!LIST_EMPTY(&stp->ls_open)) { hasstate = 1; break; } } } if (hasstate) { /* * If the uid doesn't match, return NFSERR_CLIDINUSE after * filling out the correct ipaddr and portnum. */ switch (clp->lc_req.nr_nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam; rin = (struct sockaddr_in *)clp->lc_req.nr_nam; sin->sin_addr.s_addr = rin->sin_addr.s_addr; sin->sin_port = rin->sin_port; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam; rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; sin6->sin6_addr = rin6->sin6_addr; sin6->sin6_port = rin6->sin6_port; break; #endif } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIDINUSE; goto out; } } if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) { /* * If the verifier has changed, the client has rebooted * and a new client id is issued. The old state info * can be thrown away once the SETCLIENTID_CONFIRM occurs. */ LIST_REMOVE(clp, lc_hash); /* Get rid of all sessions on this clientid. */ LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) { ret = nfsrv_freesession(sep, NULL); if (ret != 0) printf("nfsrv_setclient: verifier changed free" " session failed=%d\n", ret); } new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); /* * Save the state until confirmed. */ LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* For NFSv4.1, mark that we found a confirmed clientid. */ if ((nd->nd_flag & ND_NFSV41) != 0) { clientidp->lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = clp->lc_clientid.lval[1]; confirmp->lval[0] = 0; /* Ignored by client */ confirmp->lval[1] = 1; } else { /* * id and verifier match, so update the net address info * and get rid of any existing callback authentication * handle, so a new one will be acquired. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN); new_clp->lc_expiry = nfsrv_leaseexpiry(); confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = clp->lc_clientid.lval[1]; new_clp->lc_delegtime = clp->lc_delegtime; new_clp->lc_stateindex = clp->lc_stateindex; new_clp->lc_statemaxindex = clp->lc_statemaxindex; new_clp->lc_cbref = 0; LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if ((nd->nd_flag & ND_NFSV41) == 0) { /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsdclp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; } out: NFSEXITCODE2(error, nd); return (error); } /* * Check to see if the client id exists and optionally confirm it. */ APPLESTATIC int nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int i; struct nfsclienthashhead *hp; int error = 0, igotlock, doneok; struct nfssessionhash *shp; struct nfsdsession *sep; uint64_t sessid[2]; static uint64_t next_sess = 0; if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* * If called with opflags == CLOPS_RENEW, the State Lock is * already held. Otherwise, we need to get either that or, * for the case of Confirm, lock out the nfsd threads. */ if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); /* * Create a new sessionid here, since we need to do it where * there is a mutex held to serialize update of next_sess. */ if ((nd->nd_flag & ND_NFSV41) != 0) { sessid[0] = ++next_sess; sessid[1] = clientid.qval; } NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSLOCKSTATE(); } /* For NFSv4.1, the clp is acquired from the associated session. */ if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 && opflags == CLOPS_RENEW) { clp = NULL; if ((nd->nd_flag & ND_HASSEQUENCE) != 0) { shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep != NULL) clp = sep->sess_clp; NFSUNLOCKSESSION(shp); } } else { hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } } if (clp == NULL) { if (opflags & CLOPS_CONFIRM) error = NFSERR_STALECLIENTID; else error = NFSERR_EXPIRED; } else if (clp->lc_flags & LCL_ADMINREVOKED) { /* * If marked admin revoked, just return the error. */ error = NFSERR_ADMINREVOKED; } if (error) { if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } goto out; } /* * Perform any operations specified by the opflags. */ if (opflags & CLOPS_CONFIRM) { if (((nd->nd_flag & ND_NFSV41) != 0 && clp->lc_confirm.lval[0] != confirm.lval[0]) || ((nd->nd_flag & ND_NFSV41) == 0 && clp->lc_confirm.qval != confirm.qval)) error = NFSERR_STALECLIENTID; else if (nfsrv_notsamecredname(nd, clp)) error = NFSERR_CLIDINUSE; if (!error) { if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) == LCL_NEEDSCONFIRM) { /* * Hang onto the delegations (as old delegations) * for an Open with CLAIM_DELEGATE_PREV unless in * grace, but get rid of the rest of the state. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_olddeleg); if (nfsrv_checkgrace(nd, clp, 0)) { /* In grace, so just delete delegations */ nfsrv_freedeleglist(&clp->lc_deleg); } else { LIST_FOREACH(stp, &clp->lc_deleg, ls_list) stp->ls_flags |= NFSLCK_OLDDELEG; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg, ls_list); } if ((nd->nd_flag & ND_NFSV41) != 0) clp->lc_program = cbprogram; } clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN); if (clp->lc_program) clp->lc_flags |= LCL_NEEDSCBNULL; /* For NFSv4.1, link the session onto the client. */ if (nsep != NULL) { /* Hold a reference on the xprt for a backchannel. */ if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) { if (clp->lc_req.nr_client == NULL) clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, cbprogram, NFSV4_CBVERS); if (clp->lc_req.nr_client != NULL) { SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client->cl_private; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt; } else nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; } NFSBCOPY(sessid, nsep->sess_sessionid, NFSX_V4SESSIONID); NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid, NFSX_V4SESSIONID); shp = NFSSESSIONHASH(nsep->sess_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); LIST_INSERT_HEAD(&shp->list, nsep, sess_hash); LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list); nsep->sess_clp = clp; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); } } } else if (clp->lc_flags & LCL_NEEDSCONFIRM) { error = NFSERR_EXPIRED; } /* * If called by the Renew Op, we must check the principal. */ if (!error && (opflags & CLOPS_RENEWOP)) { if (nfsrv_notsamecredname(nd, clp)) { doneok = 0; for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if ((stp->ls_flags & NFSLCK_OPEN) && stp->ls_uid == nd->nd_cred->cr_uid) { doneok = 1; break; } } } if (!doneok) error = NFSERR_ACCES; } if (!error && (clp->lc_flags & LCL_CBDOWN)) error = NFSERR_CBPATHDOWN; } if ((!error || error == NFSERR_CBPATHDOWN) && (opflags & CLOPS_RENEW)) { clp->lc_expiry = nfsrv_leaseexpiry(); } if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } if (clpp) *clpp = clp; out: NFSEXITCODE2(error, nd); return (error); } /* * Perform the NFSv4.1 destroy clientid. */ int nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsclienthashhead *hp; int error = 0, i, igotlock; if (nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } if (clp == NULL) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* Just return ok, since it is gone. */ goto out; } /* * Free up all layouts on the clientid. Should the client return the * layouts? */ nfsrv_freelayoutlist(clientid); /* Scan for state on the clientid. */ for (i = 0; i < nfsrv_statehashsize; i++) if (!LIST_EMPTY(&clp->lc_stateid[i])) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } /* Destroy the clientid and return ok. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); nfsrv_zapclient(clp, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Called from the new nfssvc syscall to admin revoke a clientid. * Returns 0 for success, error otherwise. */ APPLESTATIC int nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) { struct nfsclient *clp = NULL; int i, error = 0; int gotit, igotlock; /* * First, lock out the nfsd so that state won't change while the * revocation record is being written to the stable storage restart * file. */ NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); error = EPERM; goto out; } /* * Now, write out the revocation record */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); /* * and clear out the state, marking the clientid revoked. */ clp->lc_flags &= ~LCL_CALLBACKSON; clp->lc_flags |= LCL_ADMINREVOKED; nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); out: NFSEXITCODE(error); return (error); } /* * Dump out stats for all clients. Called from nfssvc(2), that is used * nfsstatsv1. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) { struct nfsclient *clp; int i = 0, cnt = 0; /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); } i++; } if (cnt < maxcnt) dumpp[cnt].ndcl_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd. */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp) { struct nfsstate *stp, *openstp, *lckownstp; struct nfslock *lop; sa_family_t af; #ifdef INET struct sockaddr_in *rin; #endif #ifdef INET6 struct sockaddr_in6 *rin6; #endif dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0; dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0; dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0; dumpp->ndcl_flags = clp->lc_flags; dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen; NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen); af = clp->lc_req.nr_nam->sa_family; dumpp->ndcl_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)clp->lc_req.nr_nam; dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } /* * Now, scan the state lists and total up the opens and locks. */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { dumpp->ndcl_nopenowners++; LIST_FOREACH(openstp, &stp->ls_open, ls_list) { dumpp->ndcl_nopens++; LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) { dumpp->ndcl_nlockowners++; LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) { dumpp->ndcl_nlocks++; } } } } /* * and the delegation lists. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { dumpp->ndcl_ndelegs++; } LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { dumpp->ndcl_nolddelegs++; } } /* * Dump out lock stats for a file. */ APPLESTATIC void nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, NFSPROC_T *p) { struct nfsstate *stp; struct nfslock *lop; int cnt = 0; struct nfslockfile *lfp; sa_family_t af; #ifdef INET struct sockaddr_in *rin; #endif #ifdef INET6 struct sockaddr_in6 *rin6; #endif int ret; fhandle_t nfh; ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0); if (ret) { ldumpp[0].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); return; } /* * For each open share on file, dump it out. */ stp = LIST_FIRST(&lfp->lf_open); while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_openowner->ls_ownerlen; NFSBCOPY(stp->ls_openowner->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_openowner->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); af = stp->ls_clp->lc_req.nr_nam->sa_family; ldumpp[cnt].ndlck_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *) stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * and all locks. */ lop = LIST_FIRST(&lfp->lf_lock); while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) { stp = lop->lo_stp; ldumpp[cnt].ndlck_flags = lop->lo_flags; ldumpp[cnt].ndlck_first = lop->lo_first; ldumpp[cnt].ndlck_end = lop->lo_end; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen; NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); af = stp->ls_clp->lc_req.nr_nam->sa_family; ldumpp[cnt].ndlck_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *) stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } lop = LIST_NEXT(lop, lo_lckfile); cnt++; } /* * and the delegations. */ stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = 0; ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); af = stp->ls_clp->lc_req.nr_nam->sa_family; ldumpp[cnt].ndlck_addrfam = af; switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *) stp->ls_clp->lc_req.nr_nam; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr; break; #endif } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * If list isn't full, mark end of list by setting the client name * to zero length. */ if (cnt < maxcnt) ldumpp[cnt].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Server timer routine. It can scan any linked list, so long * as it holds the spin/mutex lock and there is no exclusive lock on * nfsv4rootfs_lock. * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok * to do this from a callout, since the spin locks work. For * Darwin, I'm not sure what will work correctly yet.) * Should be called once per second. */ APPLESTATIC void nfsrv_servertimer(void) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int got_ref, i; /* * Make sure nfsboottime is set. This is used by V3 as well * as V4. Note that nfsboottime is not nfsrvboottime, which is * only used by the V4 server for leases. */ if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); /* * If server hasn't started yet, just return. */ NFSLOCKSTATE(); if (nfsrv_stablefirst.nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; } /* * Try and get a reference count on the nfsv4rootfs_lock so that * no nfsd thread can acquire an exclusive lock on it before this * call is done. If it is already exclusively locked, just return. */ NFSLOCKV4ROOTMUTEX(); got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); if (got_ref == 0) { NFSUNLOCKSTATE(); return; } /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC && ((LIST_EMPTY(&clp->lc_deleg) && LIST_EMPTY(&clp->lc_open)) || nfsrv_clients > nfsrv_clienthighwater)) || (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC || (clp->lc_expiry < NFSD_MONOSEC && (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) { /* * Lease has expired several nfsrv_lease times ago: * PLUS * - no state is associated with it * OR * - above high water mark for number of clients * (nfsrv_clienthighwater should be large enough * that this only occurs when clients fail to * use the same nfs_client_id4.id. Maybe somewhat * higher that the maximum number of clients that * will mount this server?) * OR * Lease has expired a very long time ago * OR * Lease has expired PLUS the number of opens + locks * has exceeded 90% of capacity * * --> Mark for expiry. The actual expiry will be done * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; nfsrv_stablefirst.nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* * If there are no opens, increment no open tick cnt * If time exceeds NFSNOOPEN, mark it to be thrown away * otherwise, if there is an open, reset no open time * Hopefully, this will avoid excessive re-creation * of open owners and subsequent open confirms. */ stp = LIST_FIRST(&clp->lc_open); while (stp != LIST_END(&clp->lc_open)) { nstp = LIST_NEXT(stp, ls_list); if (LIST_EMPTY(&stp->ls_open)) { stp->ls_noopens++; if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) nfsrv_stablefirst.nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; } stp = nstp; } } } clp = nclp; } } NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * The following set of functions free up the various data structures. */ /* * Clear out all open/lock state related to this nfsclient. * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that * there are no other active nfsd threads. */ APPLESTATIC void nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; struct nfsdsession *sep, *nsep; LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) nfsrv_freeopenowner(stp, 1, p); if ((clp->lc_flags & LCL_ADMINREVOKED) == 0) LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) (void)nfsrv_freesession(sep, NULL); } /* * Free a client that has been cleaned. It should also already have been * removed from the lists. * (Just to be safe w.r.t. newnfs_disconnect(), call this function when * softclock interrupts are enabled.) */ APPLESTATIC void nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) { #ifdef notyet if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) == (LCL_GSS | LCL_CALLBACKSON) && (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) && clp->lc_handlelen > 0) { clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE; clp->lc_hand.nfsh_flag |= NFSG_DESTROYED; (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, 0, p); } #endif newnfs_disconnect(&clp->lc_req); free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); } /* * Free a list of delegation state structures. * (This function will also free all nfslockfile structures that no * longer have associated state.) */ APPLESTATIC void nfsrv_freedeleglist(struct nfsstatehead *sthp) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) { nfsrv_freedeleg(stp); } LIST_INIT(sthp); } /* * Free up a delegation. */ static void nfsrv_freedeleg(struct nfsstate *stp) { struct nfslockfile *lfp; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) nfsrv_writedelegcnt--; lfp = stp->ls_lfp; if (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); free(stp, M_NFSDSTATE); nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } /* * This function frees an open owner and all associated opens. */ static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; LIST_REMOVE(stp, ls_list); /* * Now, free all associated opens. */ nstp = LIST_FIRST(&stp->ls_open); while (nstp != LIST_END(&stp->ls_open)) { tstp = nstp; nstp = LIST_NEXT(nstp, ls_list); (void) nfsrv_freeopen(tstp, NULL, cansleep, p); } if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } /* * This function frees an open (nfsstate open structure) with all associated * lock_owners and locks. It also frees the nfslockfile structure iff there * are no other opens on the file. * Returns 1 if it free'd the nfslockfile, 0 otherwise. */ static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; struct nfslockfile *lfp; int ret; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; /* * Now, free all lockowners associated with this open. */ LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp) nfsrv_freelockowner(tstp, vp, cansleep, p); /* * The nfslockfile is freed here if there are no locks * associated with the open. * If there are locks associated with the open, the * nfslockfile structure can be freed via nfsrv_freelockowner(). * Acquire the state mutex to avoid races with calls to * nfsrv_getlockfile(). */ if (cansleep != 0) NFSLOCKSTATE(); if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) { nfsrv_freenfslockfile(lfp); ret = 1; } else ret = 0; if (cansleep != 0) NFSUNLOCKSTATE(); free(stp, M_NFSDSTATE); nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } /* * Frees a lockowner and all associated locks. */ static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); nfsrv_freeallnfslocks(stp, vp, cansleep, p); if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } /* * Free all the nfs locks on a lockowner. */ static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfslock *lop, *nlop; struct nfsrollback *rlp, *nrlp; struct nfslockfile *lfp = NULL; int gottvp = 0; vnode_t tvp = NULL; uint64_t first, end; if (vp != NULL) ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked"); lop = LIST_FIRST(&stp->ls_lock); while (lop != LIST_END(&stp->ls_lock)) { nlop = LIST_NEXT(lop, lo_lckowner); /* * Since all locks should be for the same file, lfp should * not change. */ if (lfp == NULL) lfp = lop->lo_lfp; else if (lfp != lop->lo_lfp) panic("allnfslocks"); /* * If vp is NULL and cansleep != 0, a vnode must be acquired * from the file handle. This only occurs when called from * nfsrv_cleanclient(). */ if (gottvp == 0) { if (nfsrv_dolocallocks == 0) tvp = NULL; else if (vp == NULL && cansleep != 0) { tvp = nfsvno_getvp(&lfp->lf_fh); - NFSVOPUNLOCK(tvp); + if (tvp != NULL) + NFSVOPUNLOCK(tvp); } else tvp = vp; gottvp = 1; } if (tvp != NULL) { if (cansleep == 0) panic("allnfs2"); first = lop->lo_first; end = lop->lo_end; nfsrv_freenfslock(lop); nfsrv_localunlock(tvp, lfp, first, end, p); LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } else nfsrv_freenfslock(lop); lop = nlop; } if (vp == NULL && tvp != NULL) vrele(tvp); } /* * Free an nfslock structure. */ static void nfsrv_freenfslock(struct nfslock *lop) { if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); free(lop, M_NFSDLOCK); } /* * This function frees an nfslockfile structure. */ static void nfsrv_freenfslockfile(struct nfslockfile *lfp) { LIST_REMOVE(lfp, lf_hash); free(lfp, M_NFSDLOCKFILE); } /* * This function looks up an nfsstate structure via stateid. */ static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags, struct nfsstate **stpp) { struct nfsstate *stp; struct nfsstatehead *hp; int error = 0; *stpp = NULL; hp = NFSSTATEHASH(clp, *stateidp); LIST_FOREACH(stp, hp, ls_hash) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } /* * If no state id in list, return NFSERR_BADSTATEID. */ if (stp == LIST_END(hp)) { error = NFSERR_BADSTATEID; goto out; } *stpp = stp; out: NFSEXITCODE(error); return (error); } /* * This function gets an nfsstate structure via owner string. */ static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp) { struct nfsstate *stp; *stpp = NULL; LIST_FOREACH(stp, hp, ls_list) { if (new_stp->ls_ownerlen == stp->ls_ownerlen && !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) { *stpp = stp; return; } } } /* * Lock control function called to update lock status. * Returns 0 upon success, -1 if there is no lock and the flags indicate * that one isn't to be created and an NFSERR_xxx for other errors. * The structures new_stp and new_lop are passed in as pointers that should * be set to NULL if the structure is used and shouldn't be free'd. * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are * never used and can safely be allocated on the stack. For all other * cases, *new_stpp and *new_lopp should be malloc'd before the call, * in case they are used. */ APPLESTATIC int nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, struct nfslock **new_lopp, struct nfslockconflict *cfp, nfsquad_t clientid, nfsv4stateid_t *stateidp, __unused struct nfsexstuff *exp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfslock *lop; struct nfsstate *new_stp = *new_stpp; struct nfslock *new_lop = *new_lopp; struct nfsstate *tstp, *mystp, *nstp; int specialid = 0; struct nfslockfile *lfp; struct nfslock *other_lop = NULL; struct nfsstate *stp, *lckstp = NULL; struct nfsclient *clp = NULL; u_int32_t bits; int error = 0, haslock = 0, ret, reterr; int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0; fhandle_t nfh; uint64_t first, end; uint32_t lock_flags; if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Note the special cases of "all 1s" or "all 0s" stateids and * let reads with all 1s go ahead. */ if (new_stp->ls_stateid.seqid == 0x0 && new_stp->ls_stateid.other[0] == 0x0 && new_stp->ls_stateid.other[1] == 0x0 && new_stp->ls_stateid.other[2] == 0x0) specialid = 1; else if (new_stp->ls_stateid.seqid == 0xffffffff && new_stp->ls_stateid.other[0] == 0xffffffff && new_stp->ls_stateid.other[1] == 0xffffffff && new_stp->ls_stateid.other[2] == 0xffffffff) specialid = 2; } /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, specialid); if (error) goto out; /* * Check for state resource limit exceeded. */ if ((new_stp->ls_flags & NFSLCK_LOCK) && nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } /* * For the lock case, get another nfslock structure, * just in case we need it. * Malloc now, before we start sifting through the linked lists, * in case we have to wait for memory. */ tryagain: if (new_stp->ls_flags & NFSLCK_LOCK) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); filestruct_locked = 0; reterr = 0; lfp = NULL; /* * Get the lockfile structure for CFH now, so we can do a sanity * check against the stateid, before incrementing the seqid#, since * we want to return NFSERR_BADSTATEID on failure and the seqid# * shouldn't be incremented for this case. * If nfsrv_getlockfile() returns -1, it means "not found", which * will be handled later. * If we are doing Lock/LockU and local locking is enabled, sleep * lock the nfslockfile structure. */ getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p); NFSLOCKSTATE(); if (getlckret == 0) { if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 && nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) { getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 1); if (getlckret == 0) filestruct_locked = 1; } else getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 0); } if (getlckret != 0 && getlckret != -1) reterr = getlckret; if (filestruct_locked != 0) { LIST_INIT(&lfp->lf_rollback); if ((new_stp->ls_flags & NFSLCK_LOCK)) { /* * For local locking, do the advisory locking now, so * that any conflict can be detected. A failure later * can be rolled back locally. If an error is returned, * struct nfslockfile has been unlocked and any local * locking rolled back. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } reterr = nfsrv_locallock(vp, lfp, (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)), new_lop->lo_first, new_lop->lo_end, cfp, p); NFSLOCKSTATE(); } } if (specialid == 0) { if (new_stp->ls_flags & NFSLCK_TEST) { /* * RFC 3530 does not list LockT as an op that renews a * lease, but the consensus seems to be that it is ok * for a server to do so. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid * error returns for LockT, just go ahead and test for a lock, * since there are no locks for this client, but other locks * can conflict. (ie. same client will always be false) */ if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED) error = 0; lckstp = new_stp; } else { error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) /* * Look up the stateid */ error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * do some sanity checks for an unconfirmed open or a * stateid that refers to the wrong file, for an open stateid */ if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) && ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) || (getlckret == 0 && stp->ls_lfp != lfp))){ /* * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID * The only exception is using SETATTR with SIZE. * */ if ((new_stp->ls_flags & (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR) error = NFSERR_BADSTATEID; } if (error == 0 && (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) && getlckret == 0 && stp->ls_lfp != lfp) error = NFSERR_BADSTATEID; /* * If the lockowner stateid doesn't refer to the same file, * I believe that is considered ok, since some clients will * only create a single lockowner and use that for all locks * on all files. * For now, log it as a diagnostic, instead of considering it * a BadStateid. */ if (error == 0 && (stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 && getlckret == 0 && stp->ls_lfp != lfp) { #ifdef DIAGNOSTIC printf("Got a lock statid for different file open\n"); #endif /* error = NFSERR_BADSTATEID; */ } if (error == 0) { if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) { /* * If haslock set, we've already checked the seqid. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); else error = NFSERR_BADSTATEID; } if (!error) nfsrv_getowner(&stp->ls_open, new_stp, &lckstp); if (lckstp) /* * I believe this should be an error, but it * isn't obvious what NFSERR_xxx would be * appropriate, so I'll use NFSERR_INVAL for now. */ error = NFSERR_INVAL; else lckstp = new_stp; } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) { /* * If haslock set, ditto above. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = NFSERR_BADSTATEID; else error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp, new_stp->ls_op); } lckstp = stp; } else { lckstp = stp; } } /* * If the seqid part of the stateid isn't the same, return * NFSERR_OLDSTATEID for cases other than I/O Ops. * For I/O Ops, only return NFSERR_OLDSTATEID if * nfsrv_returnoldstateid is set. (The consensus on the email * list was that most clients would prefer to not receive * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that * is what will happen, so I use the nfsrv_returnoldstateid to * allow for either server configuration.) */ if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && (!(new_stp->ls_flags & NFSLCK_CHECK) || nfsrv_returnoldstateid)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; } } /* * Now we can check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If we successfully Reclaimed state, note that. */ if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error) nfsrv_markstable(clp); /* * At this point, either error == NFSERR_BADSTATEID or the * seqid# has been updated, so we can return any error. * If error == 0, there may be an error in: * nd_repstat - Set by the calling function. * reterr - Set above, if getting the nfslockfile structure * or acquiring the local lock failed. * (If both of these are set, nd_repstat should probably be * returned, since that error was detected before this * function call.) */ if (error != 0 || nd->nd_repstat != 0 || reterr != 0) { if (error == 0) { if (nd->nd_repstat != 0) error = nd->nd_repstat; else error = reterr; } if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Check the nfsrv_getlockfile return. * Returned -1 if no structure found. */ if (getlckret == -1) { error = NFSERR_EXPIRED; /* * Called from lockt, so no lock is OK. */ if (new_stp->ls_flags & NFSLCK_TEST) { error = 0; } else if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Called to check for a lock, OK if the stateid is all * 1s or all 0s, but there should be an nfsstate * otherwise. * (ie. If there is no open, I'll assume no share * deny bits.) */ if (specialid) error = 0; else error = NFSERR_BADSTATEID; } NFSUNLOCKSTATE(); goto out; } /* * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict. * For NFSLCK_CHECK, allow a read if write access is granted, * but check for a deny. For NFSLCK_LOCK, require correct access, * which implies a conflicting deny can't exist. */ if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) { /* * Four kinds of state id: * - specialid (all 0s or all 1s), only for NFSLCK_CHECK * - stateid for an open * - stateid for a delegation * - stateid for a lock owner */ if (!specialid) { if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { delegation = 1; mystp = stp; nfsrv_delaydelegtimeout(stp); } else if (stp->ls_flags & NFSLCK_OPEN) { mystp = stp; } else { mystp = stp->ls_openstp; } /* * If locking or checking, require correct access * bit set. */ if (((new_stp->ls_flags & NFSLCK_LOCK) && !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) & mystp->ls_flags & NFSLCK_ACCESSBITS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) == (NFSLCK_CHECK | NFSLCK_READACCESS) && !(mystp->ls_flags & NFSLCK_READACCESS) && nfsrv_allowreadforwriteopen == 0) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) == (NFSLCK_CHECK | NFSLCK_WRITEACCESS) && !(mystp->ls_flags & NFSLCK_WRITEACCESS))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl3"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); error = NFSERR_OPENMODE; goto out; } } else mystp = NULL; if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) { /* * Check for a conflicting deny bit. */ LIST_FOREACH(tstp, &lfp->lf_open, ls_file) { if (tstp != mystp) { bits = tstp->ls_flags; bits >>= NFSLCK_SHIFT; if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked1")); ret = nfsrv_clientconflict(tstp->ls_clp, &haslock, vp, p); if (ret == 1) { /* * nfsrv_clientconflict unlocks state * when it returns non-zero. */ lckstp = NULL; goto tryagain; } if (ret == 0) NFSUNLOCKSTATE(); if (ret == 2) error = NFSERR_PERM; else error = NFSERR_OPENMODE; goto out; } } } /* We're outta here */ NFSUNLOCKSTATE(); goto out; } } /* * For setattr, just get rid of all the Delegations for other clients. */ if (new_stp->ls_flags & NFSLCK_SETATTR) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked2")); ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (ret) { /* * nfsrv_cleandeleg() unlocks state when it * returns non-zero. */ if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } if (!(new_stp->ls_flags & NFSLCK_CHECK) || (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg))) { NFSUNLOCKSTATE(); goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * I currently believe the conflict algorithm to be: * For Lock Ops (Lock/LockT/LockU) * - there is a conflict iff a different client has a write delegation * For Reading (Read Op) * - there is a conflict iff a different client has a write delegation * (the specialids are always a different client) * For Writing (Write/Setattr of size) * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (I don't understand why this isn't allowed, but that seems to be * the current consensus?) */ tstp = LIST_FIRST(&lfp->lf_deleg); while (tstp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(tstp, ls_file); if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))|| ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_READ))) && clp != tstp->ls_clp && (tstp->ls_flags & NFSLCK_DELEGWRITE)) || ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_WRITE) && (clp != tstp->ls_clp || (tstp->ls_flags & NFSLCK_DELEGREAD)))) { ret = 0; if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4"); NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if (VN_IS_DOOMED(vp)) ret = NFSERR_SERVERFAULT; NFSLOCKSTATE(); } if (ret == 0) ret = nfsrv_delegconflict(tstp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict unlocks state when it * returns non-zero, which it always does. */ if (other_lop) { free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } /* Never gets here. */ } tstp = nstp; } /* * Handle the unlock case by calling nfsrv_updatelock(). * (Should I have done some access checking above for unlock? For now, * just let it happen.) */ if (new_stp->ls_flags & NFSLCK_UNLOCK) { first = new_lop->lo_first; end = new_lop->lo_end; nfsrv_updatelock(stp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(stp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = stp->ls_stateid.seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; if (filestruct_locked != 0) { NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } /* Update the local locks. */ nfsrv_localunlock(vp, lfp, first, end, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Search for a conflicting lock. A lock conflicts if: * - the lock range overlaps and * - at least one lock is a write lock and * - it is not owned by the same lock owner */ if (!delegation) { LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { if (new_lop->lo_end > lop->lo_first && new_lop->lo_first < lop->lo_end && (new_lop->lo_flags == NFSLCK_WRITE || lop->lo_flags == NFSLCK_WRITE) && lckstp != lop->lo_stp && (clp != lop->lo_stp->ls_clp || lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen || NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner, lckstp->ls_ownerlen))) { if (other_lop) { free(other_lop, M_NFSDLOCK); other_lop = NULL; } if (vnode_unlocked != 0) ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, NULL, p); else ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, vp, p); if (ret == 1) { if (filestruct_locked != 0) { if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6"); NFSVOPUNLOCK(vp); } /* Roll back local locks. */ nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if (VN_IS_DOOMED(vp)) { error = NFSERR_SERVERFAULT; goto out; } } /* * nfsrv_clientconflict() unlocks state when it * returns non-zero. */ lckstp = NULL; goto tryagain; } /* * Found a conflicting lock, so record the conflict and * return the error. */ if (cfp != NULL && ret == 0) { cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0]; cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1]; cfp->cl_first = lop->lo_first; cfp->cl_end = lop->lo_end; cfp->cl_flags = lop->lo_flags; cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen; NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner, cfp->cl_ownerlen); } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else if (new_stp->ls_flags & NFSLCK_CHECK) error = NFSERR_LOCKED; else error = NFSERR_DENIED; if (filestruct_locked != 0 && ret == 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7"); vnode_unlocked = 1; NFSVOPUNLOCK(vp); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } if (ret == 0) NFSUNLOCKSTATE(); goto out; } } } /* * We only get here if there was no lock that conflicted. */ if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) { NFSUNLOCKSTATE(); goto out; } /* * We only get here when we are creating or modifying a lock. * There are two variants: * - exist_lock_owner where lock_owner exists * - open_to_lock_owner with new lock_owner */ first = new_lop->lo_first; end = new_lop->lo_end; lock_flags = new_lop->lo_flags; if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) { nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(lckstp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = lckstp->ls_stateid.seqid = 1; stateidp->other[0] = lckstp->ls_stateid.other[0]; stateidp->other[1] = lckstp->ls_stateid.other[1]; stateidp->other[2] = lckstp->ls_stateid.other[2]; } else { /* * The new open_to_lock_owner case. * Link the new nfsstate into the lists. */ new_stp->ls_seq = new_stp->ls_opentolockseq; nfsrvd_refcache(new_stp->ls_op); stateidp->seqid = new_stp->ls_stateid.seqid = 1; stateidp->other[0] = new_stp->ls_stateid.other[0] = clp->lc_clientid.lval[0]; stateidp->other[1] = new_stp->ls_stateid.other[1] = clp->lc_clientid.lval[1]; stateidp->other[2] = new_stp->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_stp->ls_clp = clp; LIST_INIT(&new_stp->ls_lock); new_stp->ls_openstp = stp; new_stp->ls_lfp = lfp; nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp, lfp); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid), new_stp, ls_hash); LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { NFSUNLOCKSTATE(); nfsrv_locallock_commit(lfp, lock_flags, first, end); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); out: if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (vnode_unlocked != 0) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (error == 0 && VN_IS_DOOMED(vp)) error = NFSERR_SERVERFAULT; } if (other_lop) free(other_lop, M_NFSDLOCK); NFSEXITCODE2(error, nd); return (error); } /* * Check for state errors for Open. * repstat is passed back out as an error if more critical errors * are not detected. */ APPLESTATIC int nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd, NFSPROC_T *p, int repstat) { struct nfsstate *stp, *nstp; struct nfsclient *clp; struct nfsstate *ownerstp; struct nfslockfile *lfp, *new_lfp; int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; /* * Check for state resource limit exceeded. * Technically this should be SMP protected, but the worst * case error is "out by one or two" on the count when it * returns NFSERR_RESOURCE and the limit is just a rather * arbitrary high water mark, so no harm is done. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } tryagain: new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); if (vp) getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the nfsclient structure. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (!error && ownerstp) { error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp, new_stp->ls_op); /* * If the OpenOwner hasn't been confirmed, assume the * old one was a replay and this one is ok. * See: RFC3530 Sec. 14.2.18. */ if (error == NFSERR_BADSEQID && (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM)) error = 0; } /* * Check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If none of the above errors occurred, let repstat be * returned. */ if (repstat && !error) error = repstat; if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * If vp == NULL, the file doesn't exist yet, so return ok. * (This always happens on the first pass, so haslock must be 0.) */ if (vp == NULL) { NFSUNLOCKSTATE(); free(new_lfp, M_NFSDLOCKFILE); goto out; } /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } } /* * Check for access/deny bit conflicts. I check for the same * owner as well, in case the client didn't bother. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) && (((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks * state when it returns non-zero. */ goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) * Don't check for a Reclaim, since that will be dealt with * by nfsrv_openctrl(). */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ if (ret == -1) goto tryagain; error = ret; goto out; } } stp = nstp; } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Open control function to create/update open state for an open. */ APPLESTATIC int nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp, nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp, NFSPROC_T *p, u_quad_t filerev) { struct nfsstate *new_stp = *new_stpp; struct nfsstate *stp, *nstp; struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg; struct nfslockfile *lfp, *new_lfp; struct nfsclient *clp; int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1; int readonly = 0, cbret = 1, getfhret = 0; int gotstate = 0, len = 0; u_char *clidp = NULL; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). * (Paranoia, should have been detected by nfsrv_opencheck().) * If an error does show up, return NFSERR_EXPIRED, since the * the seqid# has already been incremented. */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) { printf("Nfsd: openctrl unexpected restart err=%d\n", error); error = NFSERR_EXPIRED; goto out; } clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); tryagain: new_lfp = malloc(sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); new_open = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); new_deleg = malloc(sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the client structure. Since the linked lists could be changed * by other nfsd processes if this process does a tsleep(), one of * two things must be done. * 1 - don't tsleep() * or * 2 - get the nfsv4_lock() { indicated by haslock == 1 } * before using the lists, since this lock stops the other * nfsd. This should only be used for rare cases, since it * essentially single threads the nfsd. * At this time, it is only done for cases where the stable * storage file must be written prior to completion of state * expiration. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) && clp->lc_program) { /* * This happens on the first open for a client * that supports callbacks. */ NFSUNLOCKSTATE(); /* * Although nfsrv_docallback() will sleep, clp won't * go away, since they are only removed when the * nfsv4_lock() has blocked the nfsd threads. The * fields in clp can change, but having multiple * threads do this Null callback RPC should be * harmless. */ cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, 0, p); NFSLOCKSTATE(); clp->lc_flags &= ~LCL_NEEDSCBNULL; if (!cbret) clp->lc_flags |= LCL_CALLBACKSON; } /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (error) { NFSUNLOCKSTATE(); printf("Nfsd: openctrl unexpected state err=%d\n", error); free(new_lfp, M_NFSDLOCKFILE); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } if (new_stp->ls_flags & NFSLCK_RECLAIM) nfsrv_markstable(clp); /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) free(new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected getlockfile err=%d\n", error); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected expiry\n"); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } /* * Don't issue a Delegation, since one already exists and * delay delegation timeout, as required. */ delegate = 0; nfsrv_delaydelegtimeout(stp); } /* * Check for access/deny bit conflicts. I also check for the * same owner, since the client might not have bothered to check. * Also, note an open for the same file and owner, if found, * which is all we do here for Delegate_Cur, since conflict * checking is already done. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (ownerstp && stp->ls_openowner == ownerstp) openstp = stp; if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) { /* * If another client has the file open, the only * delegation that can be issued is a Read delegation * and only if it is a Read open with Deny none. */ if (clp != stp->ls_clp) { if ((stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) writedeleg = 0; else delegate = 0; } if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks state * when it returns non-zero. */ free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); openstp = NULL; goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); printf("nfsd openctrl unexpected client cnfl\n"); goto out; } } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD)) writedeleg = 0; else delegate = 0; if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { if (new_stp->ls_flags & NFSLCK_RECLAIM) { delegate = 2; } else { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ printf("Nfsd openctrl unexpected deleg cnfl\n"); free(new_open, M_NFSDSTATE); free(new_deleg, M_NFSDSTATE); if (ret == -1) { openstp = NULL; goto tryagain; } error = ret; goto out; } } } stp = nstp; } } /* * We only get here if there was no open that conflicted. * If an open for the owner exists, or in the access/deny bits. * Otherwise it is a new open. If the open_owner hasn't been * confirmed, replace the open with the new one needing confirmation, * otherwise add the open. */ if (new_stp->ls_flags & NFSLCK_DELEGPREV) { /* * Handle NFSLCK_DELEGPREV by searching the old delegations for * a match. If found, just move the old delegation to the current * delegation list and issue open. If not found, return * NFSERR_EXPIRED. */ LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { if (stp->ls_lfp == lfp) { /* Found it */ if (stp->ls_clp != clp) panic("olddeleg clp"); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_hash); stp->ls_flags &= ~NFSLCK_OLDDELEG; stp->ls_stateid.seqid = delegstateidp->seqid = 1; stp->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; stp->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; stp->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); stp->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, stp->ls_stateid), stp, ls_hash); if (stp->ls_flags & NFSLCK_DELEGWRITE) *rflagsp |= NFSV4OPEN_WRITEDELEGATE; else *rflagsp |= NFSV4OPEN_READDELEGATE; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)| NFSLCK_OPEN; if (stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } } if (stp == LIST_END(&clp->lc_olddeleg)) error = NFSERR_EXPIRED; } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { /* * Scan to see that no delegation for this client and file * doesn't already exist. * There also shouldn't yet be an Open for this file and * openowner. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_clp == clp) break; } if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) { /* * This is the Claim_Previous case with a delegation * type != Delegate_None. */ /* * First, add the delegation. (Although we must issue the * delegation, we can also ask for an immediate return.) */ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (new_stp->ls_flags & NFSLCK_DELEGWRITE) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; if (delegate == 2 || nfsrv_issuedelegs == 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) | NFSLCK_OPEN; if (new_stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; } } else if (ownerstp) { if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) { /* Replace the open */ if (ownerstp->ls_op) nfsrvd_derefcache(ownerstp->ls_op); ownerstp->ls_op = new_stp->ls_op; nfsrvd_refcache(ownerstp->ls_op); ownerstp->ls_seq = new_stp->ls_seq; *rflagsp |= NFSV4OPEN_RESULTCONFIRM; stp = LIST_FIRST(&ownerstp->ls_open); stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; stp->ls_stateid.seqid = 1; stp->ls_uid = new_stp->ls_uid; if (lfp != stp->ls_lfp) { LIST_REMOVE(stp, ls_file); LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file); stp->ls_lfp = lfp; } openstp = stp; } else if (openstp) { openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS); openstp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && openstp->ls_stateid.seqid == 0) openstp->ls_stateid.seqid = 1; /* * This is where we can choose to issue a delegation. */ if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else if (nfsrv_issuedelegs == 0) *rflagsp |= NFSV4OPEN_WDSUPPFTYPE; else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if (delegate == 0 || writedeleg == 0 || NFSVNO_EXRDONLY(exp) || (readonly != 0 && nfsrv_writedelegifpos == 0) || !NFSVNO_DELEGOK(vp) || (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; nfsrv_writedelegcnt++; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } else { new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)| NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; new_open->ls_openowner = ownerstp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* * This is where we can choose to issue a delegation. */ if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else if (nfsrv_issuedelegs == 0) *rflagsp |= NFSV4OPEN_WDSUPPFTYPE; else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if (delegate == 0 || (writedeleg == 0 && readonly == 0) || !NFSVNO_DELEGOK(vp) || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } } else { /* * New owner case. Start the open_owner sequence with a * Needs confirmation (unless a reclaim) and hang the * new open off it. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; LIST_INIT(&new_open->ls_open); new_open->ls_openowner = new_stp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); if (new_stp->ls_flags & NFSLCK_RECLAIM) { new_stp->ls_flags = 0; } else if ((nd->nd_flag & ND_NFSV41) != 0) { /* NFSv4.1 never needs confirmation. */ new_stp->ls_flags = 0; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && (writedeleg || readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0)) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; nfsrv_writedelegcnt++; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } /* * Since NFSv4.1 never does an OpenConfirm, the first * open state will be acquired here. */ if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } } else { *rflagsp |= NFSV4OPEN_RESULTCONFIRM; new_stp->ls_flags = NFSLCK_NEEDSCONFIRM; } nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; *new_stpp = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { stateidp->seqid = openstp->ls_stateid.seqid; stateidp->other[0] = openstp->ls_stateid.other[0]; stateidp->other[1] = openstp->ls_stateid.other[1]; stateidp->other[2] = openstp->ls_stateid.other[2]; } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (new_open) free(new_open, M_NFSDSTATE); if (new_deleg) free(new_deleg, M_NFSDSTATE); /* * If the NFSv4.1 client just acquired its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Open update. Does the confirm, downgrade and close. */ APPLESTATIC int nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p, int *retwriteaccessp) { struct nfsstate *stp; struct nfsclient *clp; struct nfslockfile *lfp; u_int32_t bits; int error = 0, gotstate = 0, len = 0; u_char *clidp = NULL; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * Sanity check the open. */ if (!error && (!(stp->ls_flags & NFSLCK_OPEN) || (!(new_stp->ls_flags & NFSLCK_CONFIRM) && (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) || ((new_stp->ls_flags & NFSLCK_CONFIRM) && (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))))) error = NFSERR_BADSTATEID; if (!error) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && !(new_stp->ls_flags & NFSLCK_CONFIRM)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; if (!error && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) error = NFSERR_ISDIR; else error = NFSERR_INVAL; } if (error) { /* * If a client tries to confirm an Open with a bad * seqid# and there are no byte range locks or other Opens * on the openowner, just throw it away, so the next use of the * openowner will start a fresh seq#. */ if (error == NFSERR_BADSEQID && (new_stp->ls_flags & NFSLCK_CONFIRM) && nfsrv_nootherstate(stp)) nfsrv_freeopenowner(stp->ls_openowner, 0, p); NFSUNLOCKSTATE(); goto out; } /* * Set the return stateid. */ stateidp->seqid = stp->ls_stateid.seqid + 1; if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; /* * Now, handle the three cases. */ if (new_stp->ls_flags & NFSLCK_CONFIRM) { /* * If the open doesn't need confirmation, it seems to me that * there is a client error, but I'll just log it and keep going? */ if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) printf("Nfsv4d: stray open confirm\n"); stp->ls_openowner->ls_flags = 0; stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } NFSUNLOCKSTATE(); } else if (new_stp->ls_flags & NFSLCK_CLOSE) { lfp = stp->ls_lfp; if (retwriteaccessp != NULL) { if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0) *retwriteaccessp = 1; else *retwriteaccessp = 0; } if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) { /* Get the lf lock */ nfsrv_locklf(lfp); NFSUNLOCKSTATE(); ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate"); NFSVOPUNLOCK(vp); if (nfsrv_freeopen(stp, vp, 1, p) == 0) { NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); } else { (void) nfsrv_freeopen(stp, NULL, 0, p); NFSUNLOCKSTATE(); } } else { /* * Update the share bits, making sure that the new set are a * subset of the old ones. */ bits = (new_stp->ls_flags & NFSLCK_SHAREBITS); if (~(stp->ls_flags) & bits) { NFSUNLOCKSTATE(); error = NFSERR_INVAL; goto out; } stp->ls_flags = (bits | NFSLCK_OPEN); stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; NFSUNLOCKSTATE(); } /* * If the client just confirmed its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Delegation update. Does the purge and return. */ APPLESTATIC int nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid, nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p, int *retwriteaccessp) { struct nfsstate *stp; struct nfsclient *clp; int error = 0; fhandle_t fh; /* * Do a sanity check against the file handle for DelegReturn. */ if (vp) { error = nfsvno_getfh(vp, &fh, p); if (error) goto out; } /* * Check for restart conditions (client and server). */ if (op == NFSV4OP_DELEGRETURN) error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN, stateidp, 0); else error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE, stateidp, 0); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ if (!error) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error) { if (error == NFSERR_CBPATHDOWN) error = 0; if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN) error = NFSERR_STALESTATEID; } if (!error && op == NFSV4OP_DELEGRETURN) { error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp); if (!error && stp->ls_stateid.seqid != stateidp->seqid && ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0)) error = NFSERR_OLDSTATEID; } /* * NFSERR_EXPIRED means that the state has gone away, * so Delegations have been purged. Just return ok. */ if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) { NFSUNLOCKSTATE(); error = 0; goto out; } if (error) { NFSUNLOCKSTATE(); goto out; } if (op == NFSV4OP_DELEGRETURN) { if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh, sizeof (fhandle_t))) { NFSUNLOCKSTATE(); error = NFSERR_BADSTATEID; goto out; } if (retwriteaccessp != NULL) { if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0) *retwriteaccessp = 1; else *retwriteaccessp = 0; } nfsrv_freedeleg(stp); } else { nfsrv_freedeleglist(&clp->lc_olddeleg); } NFSUNLOCKSTATE(); error = 0; out: NFSEXITCODE(error); return (error); } /* * Release lock owner. */ APPLESTATIC int nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid, NFSPROC_T *p) { struct nfsstate *stp, *nstp, *openstp, *ownstp; struct nfsclient *clp; int error = 0; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the lock owner by name. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); if (error) { NFSUNLOCKSTATE(); goto out; } LIST_FOREACH(ownstp, &clp->lc_open, ls_list) { LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) { stp = LIST_FIRST(&openstp->ls_open); while (stp != LIST_END(&openstp->ls_open)) { nstp = LIST_NEXT(stp, ls_list); /* * If the owner matches, check for locks and * then free or return an error. */ if (stp->ls_ownerlen == new_stp->ls_ownerlen && !NFSBCMP(stp->ls_owner, new_stp->ls_owner, stp->ls_ownerlen)){ if (LIST_EMPTY(&stp->ls_lock)) { nfsrv_freelockowner(stp, NULL, 0, p); } else { NFSUNLOCKSTATE(); error = NFSERR_LOCKSHELD; goto out; } } stp = nstp; } } } NFSUNLOCKSTATE(); out: NFSEXITCODE(error); return (error); } /* * Get the file handle for a lock structure. */ static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p) { fhandle_t *fhp = NULL; int error; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL")); fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfh"); } error = nfsvno_getfh(vp, fhp, p); NFSEXITCODE(error); return (error); } /* * Get an nfs lock structure. Allocate one, as required, and return a * pointer to it. * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock. */ static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit) { struct nfslockfile *lfp; fhandle_t *fhp = NULL, *tfhp; struct nfslockhashhead *hp; struct nfslockfile *new_lfp = NULL; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfile"); } hp = NFSLOCKHASH(fhp); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(fhp, tfhp)) { if (lockit) nfsrv_locklf(lfp); *lfpp = lfp; return (0); } } if (!(flags & NFSLCK_OPEN)) return (-1); /* * No match, so chain the new one into the list. */ LIST_INIT(&new_lfp->lf_open); LIST_INIT(&new_lfp->lf_lock); LIST_INIT(&new_lfp->lf_deleg); LIST_INIT(&new_lfp->lf_locallock); LIST_INIT(&new_lfp->lf_rollback); new_lfp->lf_locallock_lck.nfslock_usecnt = 0; new_lfp->lf_locallock_lck.nfslock_lock = 0; new_lfp->lf_usecount = 0; LIST_INSERT_HEAD(hp, new_lfp, lf_hash); *lfpp = new_lfp; *new_lfpp = NULL; return (0); } /* * This function adds a nfslock lock structure to the list for the associated * nfsstate and nfslockfile structures. It will be inserted after the * entry pointed at by insert_lop. */ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp) { struct nfslock *lop, *nlop; new_lop->lo_stp = stp; new_lop->lo_lfp = lfp; if (stp != NULL) { /* Insert in increasing lo_first order */ lop = LIST_FIRST(&lfp->lf_lock); if (lop == LIST_END(&lfp->lf_lock) || new_lop->lo_first <= lop->lo_first) { LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile); } else { nlop = LIST_NEXT(lop, lo_lckfile); while (nlop != LIST_END(&lfp->lf_lock) && nlop->lo_first < new_lop->lo_first) { lop = nlop; nlop = LIST_NEXT(lop, lo_lckfile); } LIST_INSERT_AFTER(lop, new_lop, lo_lckfile); } } else { new_lop->lo_lckfile.le_prev = NULL; /* list not used */ } /* * Insert after insert_lop, which is overloaded as stp or lfp for * an empty list. */ if (stp == NULL && (struct nfslockfile *)insert_lop == lfp) LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner); else if ((struct nfsstate *)insert_lop == stp) LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner); else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. */ static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp) { struct nfslock *new_lop = *new_lopp; struct nfslock *lop, *tlop, *ilop; struct nfslock *other_lop = *other_lopp; int unlock = 0, myfile = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->lo_flags & NFSLCK_UNLOCK) unlock = 1; if (stp != NULL) { ilop = (struct nfslock *)stp; lop = LIST_FIRST(&stp->ls_lock); } else { ilop = (struct nfslock *)lfp; lop = LIST_FIRST(&lfp->lf_locallock); } while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->lo_lfp == lfp) { myfile = 1; if (lop->lo_end >= new_lop->lo_first) { if (new_lop->lo_end < lop->lo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->lo_flags == lop->lo_flags || (new_lop->lo_first <= lop->lo_first && new_lop->lo_end >= lop->lo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (lop->lo_first < new_lop->lo_first) new_lop->lo_first = lop->lo_first; if (lop->lo_end > new_lop->lo_end) new_lop->lo_end = lop->lo_end; tlop = lop; lop = LIST_NEXT(lop, lo_lckowner); nfsrv_freenfslock(tlop); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->lo_first <= lop->lo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ lop->lo_first = new_lop->lo_end; break; } if (new_lop->lo_end >= lop->lo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ lop->lo_end = new_lop->lo_first; ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->lo_flags any longer. */ tmp = new_lop->lo_first; if (other_lop == NULL) { if (!unlock) panic("nfsd srv update unlock"); other_lop = new_lop; *new_lopp = NULL; } other_lop->lo_first = new_lop->lo_end; other_lop->lo_end = lop->lo_end; other_lop->lo_flags = lop->lo_flags; other_lop->lo_stp = stp; other_lop->lo_lfp = lfp; lop->lo_end = tmp; nfsrv_insertlock(other_lop, lop, stp, lfp); *other_lopp = NULL; ilop = lop; break; } } ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); if (myfile && (lop == NULL || lop->lo_lfp != lfp)) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfsrv_insertlock(new_lop, ilop, stp, lfp); *new_lopp = NULL; } } /* * This function handles sequencing of locks, etc. * It returns an error that indicates what the caller should do. */ static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op) { int error = 0; if ((nd->nd_flag & ND_NFSV41) != 0) /* NFSv4.1 ignores the open_seqid and lock_seqid. */ goto out; if (op != nd->nd_rp) panic("nfsrvstate checkseqid"); if (!(op->rc_flag & RC_INPROG)) panic("nfsrvstate not inprog"); if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) { printf("refcnt=%d\n", stp->ls_op->rc_refcnt); panic("nfsrvstate op refcnt"); } if ((stp->ls_seq + 1) == seqid) { if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); stp->ls_op = op; nfsrvd_refcache(op); stp->ls_seq = seqid; goto out; } else if (stp->ls_seq == seqid && stp->ls_op && op->rc_xid == stp->ls_op->rc_xid && op->rc_refcnt == 0 && op->rc_reqlen == stp->ls_op->rc_reqlen && op->rc_cksum == stp->ls_op->rc_cksum) { if (stp->ls_op->rc_flag & RC_INPROG) { error = NFSERR_DONTREPLY; goto out; } nd->nd_rp = stp->ls_op; nd->nd_rp->rc_flag |= RC_INPROG; nfsrvd_delcache(op); error = NFSERR_REPLYFROMCACHE; goto out; } error = NFSERR_BADSEQID; out: NFSEXITCODE2(error, nd); return (error); } /* * Get the client ip address for callbacks. If the strings can't be parsed, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { u_int32_t *tl; u_char *cp, *cp2; int i, j, maxalen = 0, minalen = 0; sa_family_t af; #ifdef INET struct sockaddr_in *rin = NULL, *sin; #endif #ifdef INET6 struct sockaddr_in6 *rin6 = NULL, *sin6; #endif u_char *addr; int error = 0, cantparse = 0; union { in_addr_t ival; u_char cval[4]; } ip; union { in_port_t sval; u_char cval[2]; } port; /* 8 is the maximum length of the port# string. */ addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK); clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; af = AF_UNSPEC; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; #ifdef INET if (!strcmp(addr, "tcp")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; af = AF_INET; } else if (!strcmp(addr, "udp")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; af = AF_INET; } #endif #ifdef INET6 if (af == AF_UNSPEC) { if (!strcmp(addr, "tcp6")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; af = AF_INET6; } else if (!strcmp(addr, "udp6")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; af = AF_INET6; } } #endif if (af == AF_UNSPEC) { cantparse = 1; } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } /* * The caller has allocated clp->lc_req.nr_nam to be large enough * for either AF_INET or AF_INET6 and zeroed out the contents. * maxalen is set to the maximum length of the host IP address string * plus 8 for the maximum length of the port#. * minalen is set to the minimum length of the host IP address string * plus 4 for the minimum length of the port#. * These lengths do not include NULL termination, * so INET[6]_ADDRSTRLEN - 1 is used in the calculations. */ switch (af) { #ifdef INET case AF_INET: rin = (struct sockaddr_in *)clp->lc_req.nr_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); maxalen = INET_ADDRSTRLEN - 1 + 8; minalen = 7 + 4; break; #endif #ifdef INET6 case AF_INET6: rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); maxalen = INET6_ADDRSTRLEN - 1 + 8; minalen = 3 + 4; break; #endif } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (i == 0) { cantparse = 1; } else if (!cantparse && i <= maxalen && i >= minalen) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* * Parse out the address fields. We expect 6 decimal numbers * separated by '.'s for AF_INET and two decimal numbers * preceeded by '.'s for AF_INET6. */ cp = NULL; switch (af) { #ifdef INET6 /* * For AF_INET6, first parse the host address. */ case AF_INET6: cp = strchr(addr, '.'); if (cp != NULL) { *cp++ = '\0'; if (inet_pton(af, addr, &rin6->sin6_addr) == 1) i = 4; else { cp = NULL; cantparse = 1; } } break; #endif #ifdef INET case AF_INET: cp = addr; i = 0; break; #endif } while (cp != NULL && *cp && i < 6) { cp2 = cp; while (*cp2 && *cp2 != '.') cp2++; if (*cp2) *cp2++ = '\0'; else if (i != 5) { cantparse = 1; break; } j = nfsrv_getipnumber(cp); if (j >= 0) { if (i < 4) ip.cval[3 - i] = j; else port.cval[5 - i] = j; } else { cantparse = 1; break; } cp = cp2; i++; } if (!cantparse) { /* * The host address INADDR_ANY is (mis)used to indicate * "there is no valid callback address". */ switch (af) { #ifdef INET6 case AF_INET6: if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr, &in6addr_any)) rin6->sin6_port = htons(port.sval); else cantparse = 1; break; #endif #ifdef INET case AF_INET: if (ip.ival != INADDR_ANY) { rin->sin_addr.s_addr = htonl(ip.ival); rin->sin_port = htons(port.sval); } else { cantparse = 1; } break; #endif } } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } if (cantparse) { switch (nd->nd_nam->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)nd->nd_nam; rin = (struct sockaddr_in *)clp->lc_req.nr_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); rin->sin_addr.s_addr = sin->sin_addr.s_addr; rin->sin_port = 0x0; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nd->nd_nam; rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); rin6->sin6_addr = sin6->sin6_addr; rin6->sin6_port = 0x0; break; #endif } clp->lc_program = 0; } nfsmout: free(addr, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Turn a string of up to three decimal digits into a number. Return -1 upon * error. */ static int nfsrv_getipnumber(u_char *cp) { int i = 0, j = 0; while (*cp) { if (j > 2 || *cp < '0' || *cp > '9') return (-1); i *= 10; i += (*cp - '0'); cp++; j++; } if (i < 256) return (i); return (-1); } /* * This function checks for restart conditions. */ static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid) { int ret = 0; /* * First check for a server restart. Open, LockT, ReleaseLockOwner * and DelegPurge have a clientid, the rest a stateid. */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { if (clientid.lval[0] != nfsrvboottime) { ret = NFSERR_STALECLIENTID; goto out; } } else if (stateidp->other[0] != nfsrvboottime && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; } /* * Read, Write, Setattr and LockT can return NFSERR_GRACE and do * not use a lock/open owner seqid#, so the check can be done now. * (The others will be checked, as required, later.) */ if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST))) goto out; NFSLOCKSTATE(); ret = nfsrv_checkgrace(NULL, NULL, flags); NFSUNLOCKSTATE(); out: NFSEXITCODE(ret); return (ret); } /* * Check for grace. */ static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags) { int error = 0, notreclaimed; struct nfsrv_stable *sp; if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE | NFSNSF_GRACEOVER)) == 0) { /* * First, check to see if all of the clients have done a * ReclaimComplete. If so, grace can end now. */ notreclaimed = 0; LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) { notreclaimed = 1; break; } } if (notreclaimed == 0) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); } if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; } } else { if (!(flags & NFSLCK_RECLAIM)) { error = NFSERR_GRACE; goto out; } if (nd != NULL && clp != NULL && (nd->nd_flag & ND_NFSV41) != 0 && (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) { error = NFSERR_NOGRACE; goto out; } /* * If grace is almost over and we are still getting Reclaims, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } out: NFSEXITCODE(error); return (error); } /* * Do a server callback. * The "trunc" argument is slightly overloaded and refers to different * boolean arguments for CBRECALL and CBLAYOUTRECALL. */ static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p) { mbuf_t m; u_int32_t *tl; struct nfsrv_descript *nd; struct ucred *cred; int error = 0; u_int32_t callback; struct nfsdsession *sep = NULL; uint64_t tval; nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); cred = newnfs_getcred(); NFSLOCKSTATE(); /* mostly for lc_cbref++ */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); panic("docallb"); } clp->lc_cbref++; /* * Fill the callback program# and version into the request * structure for newnfs_connect() to use. */ clp->lc_req.nr_prog = clp->lc_program; #ifdef notnow if ((clp->lc_flags & LCL_NFSV41) != 0) clp->lc_req.nr_vers = NFSV41_CBVERS; else #endif clp->lc_req.nr_vers = NFSV4_CBVERS; /* * First, fill in some of the fields of nd and cr. */ nd->nd_flag = ND_NFSV4; if (clp->lc_flags & LCL_GSS) nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; if ((clp->lc_flags & LCL_NFSV42) != 0) nd->nd_flag |= ND_NFSV42; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; callback = clp->lc_callback; NFSUNLOCKSTATE(); cred->cr_ngroups = 1; /* * Get the first mbuf for the request. */ MGET(m, M_WAITOK, MT_DATA); mbuf_setlen(m, 0); nd->nd_mreq = nd->nd_mb = m; nd->nd_bpos = NFSMTOD(m, caddr_t); /* * and build the callback request. */ if (procnum == NFSV4OP_CBGETATTR) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR, "CB Getattr", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); (void)nfsrv_putattrbit(nd, attrbitp); } else if (procnum == NFSV4OP_CBRECALL) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL, "CB Recall", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (trunc) *tl = newnfs_true; else *tl = newnfs_false; (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); } else if (procnum == NFSV4OP_CBLAYOUTRECALL) { NFSD_DEBUG(4, "docallback layout recall\n"); nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep); NFSD_DEBUG(4, "aft cbcallargs=%d\n", error); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(laytype); *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY); if (trunc) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE); nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID); tval = 0; txdr_hyper(tval, tl); tl += 2; tval = UINT64_MAX; txdr_hyper(tval, tl); tl += 2; *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); NFSD_DEBUG(4, "aft args\n"); } else if (procnum == NFSV4PROC_CBNULL) { nd->nd_procnum = NFSV4PROC_CBNULL; if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsv4_getcbsession(clp, &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } } } else { error = NFSERR_SERVERFAULT; mbuf_freem(nd->nd_mreq); goto errout; } /* * Call newnfs_connect(), as required, and then newnfs_request(). */ (void) newnfs_sndlock(&clp->lc_req.nr_lock); if (clp->lc_req.nr_client == NULL) { if ((clp->lc_flags & LCL_NFSV41) != 0) { error = ECONNREFUSED; nfsrv_freesession(sep, NULL); } else if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 1); else error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 3); } newnfs_sndunlock(&clp->lc_req.nr_lock); NFSD_DEBUG(4, "aft sndunlock=%d\n", error); if (!error) { if ((nd->nd_flag & ND_NFSV41) != 0) { KASSERT(sep != NULL, ("sep NULL")); if (sep->sess_cbsess.nfsess_xprt != NULL) error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, &sep->sess_cbsess); else { /* * This should probably never occur, but if a * client somehow does an RPC without a * SequenceID Op that causes a callback just * after the nfsd threads have been terminated * and restared we could conceivably get here * without a backchannel xprt. */ printf("nfsrv_docallback: no xprt\n"); error = ECONNREFUSED; } NFSD_DEBUG(4, "aft newnfs_request=%d\n", error); nfsrv_freesession(sep, NULL); } else error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, NULL); } errout: NFSFREECRED(cred); /* * If error is set here, the Callback path isn't working * properly, so twiddle the appropriate LCL_ flags. * (nd_repstat != 0 indicates the Callback path is working, * but the callback failed on the client.) */ if (error) { /* * Mark the callback pathway down, which disabled issuing * of delegations and gets Renew to return NFSERR_CBPATHDOWN. */ NFSLOCKSTATE(); clp->lc_flags |= LCL_CBDOWN; NFSUNLOCKSTATE(); } else { /* * Callback worked. If the callback path was down, disable * callbacks, so no more delegations will be issued. (This * is done on the assumption that the callback pathway is * flakey.) */ NFSLOCKSTATE(); if (clp->lc_flags & LCL_CBDOWN) clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON); NFSUNLOCKSTATE(); if (nd->nd_repstat) { error = nd->nd_repstat; NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n", procnum, error); } else if (error == 0 && procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, NULL); mbuf_freem(nd->nd_mrep); } NFSLOCKSTATE(); clp->lc_cbref--; if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) { clp->lc_flags &= ~LCL_WAKEUPWANTED; wakeup(clp); } NFSUNLOCKSTATE(); free(nd, M_TEMP); NFSEXITCODE(error); return (error); } /* * Set up the compound RPC for the callback. */ static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp) { uint32_t *tl; int error, len; len = strlen(optag); (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { if ((nd->nd_flag & ND_NFSV42) != 0) *tl++ = txdr_unsigned(NFSV42_MINORVERSION); else *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); error = nfsv4_setcbsequence(nd, clp, 1, sepp); if (error != 0) return (error); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(op); } else { *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(op); } return (0); } /* * Return the next index# for a clientid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around, * it should be rebooted. * At an average rate of one new client per second, it will wrap around in * approximately 136 years. (I think the server will have been shut * down or rebooted before then.) */ static u_int32_t nfsrv_nextclientindex(void) { static u_int32_t client_index = 0; client_index++; if (client_index != 0) return (client_index); printf("%s: out of clientids\n", __func__); return (client_index); } /* * Return the next index# for a stateid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around * (will a BSD server stay up that long?), find * new start and end values. */ static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp) { struct nfsstate *stp; int i; u_int32_t canuse, min_index, max_index; if (!(clp->lc_flags & LCL_INDEXNOTOK)) { clp->lc_stateindex++; if (clp->lc_stateindex != clp->lc_statemaxindex) return (clp->lc_stateindex); } /* * Yuck, we've hit the end. * Look for a new min and max. */ min_index = 0; max_index = 0xffffffff; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] > 0x80000000) { if (stp->ls_stateid.other[2] < max_index) max_index = stp->ls_stateid.other[2]; } else { if (stp->ls_stateid.other[2] > min_index) min_index = stp->ls_stateid.other[2]; } } } /* * Yikes, highly unlikely, but I'll handle it anyhow. */ if (min_index == 0x80000000 && max_index == 0x80000001) { canuse = 0; /* * Loop around until we find an unused entry. Return that * and set LCL_INDEXNOTOK, so the search will continue next time. * (This is one of those rare cases where a goto is the * cleanest way to code the loop.) */ tryagain: for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] == canuse) { canuse++; goto tryagain; } } } clp->lc_flags |= LCL_INDEXNOTOK; return (canuse); } /* * Ok to start again from min + 1. */ clp->lc_stateindex = min_index + 1; clp->lc_statemaxindex = max_index; clp->lc_flags &= ~LCL_INDEXNOTOK; return (clp->lc_stateindex); } /* * The following functions handle the stable storage file that deals with * the edge conditions described in RFC3530 Sec. 8.6.3. * The file is as follows: * - a single record at the beginning that has the lease time of the * previous server instance (before the last reboot) and the nfsrvboottime * values for the previous server boots. * These previous boot times are used to ensure that the current * nfsrvboottime does not, somehow, get set to a previous one. * (This is important so that Stale ClientIDs and StateIDs can * be recognized.) * The number of previous nfsvrboottime values precedes the list. * - followed by some number of appended records with: * - client id string * - flag that indicates it is a record revoking state via lease * expiration or similar * OR has successfully acquired state. * These structures vary in length, with the client string at the end, up * to NFSV4_OPAQUELIMIT in size. * * At the end of the grace period, the file is truncated, the first * record is rewritten with updated information and any acquired state * records for successful reclaims of state are written. * * Subsequent records are appended when the first state is issued to * a client and when state is revoked for a client. * * When reading the file in, state issued records that come later in * the file override older ones, since the append log is in cronological order. * If, for some reason, the file can't be read, the grace period is * immediately terminated and all reclaims get NFSERR_NOGRACE. */ /* * Read in the stable storage file. Called by nfssvc() before the nfsd * processes start servicing requests. */ APPLESTATIC void nfsrv_setupstable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; off_t off = 0; ssize_t aresid, len; /* * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without * a reboot, so state has not been lost. */ if (sf->nsf_flags & NFSNSF_UPDATEDONE) return; /* * Set Grace over just until the file reads successfully. */ nfsrvboottime = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; if (sf->nsf_fp == NULL) return; error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid || sf->nsf_numboots == 0 || sf->nsf_numboots > NFSNSF_MAXNUMBOOTS) return; /* * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * sizeof (time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid) { free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } /* * Make sure this nfsrvboottime is different from all recorded * previous ones. */ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { if (nfsrvboottime == sf->nsf_bootvals[i]) { nfsrvboottime++; tryagain = 1; break; } } } while (tryagain); sf->nsf_flags |= NFSNSF_OK; off += (sf->nsf_numboots * sizeof (time_t)); /* * Read through the file, building a list of records for grace * checking. * Each record is between sizeof (struct nfst_rec) and * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1 * and is actually sizeof (struct nfst_rec) + nst_len - 1. */ tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK); do { error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid; if (error || (len > 0 && (len < sizeof (struct nfst_rec) || len < (sizeof (struct nfst_rec) + tsp->len - 1)))) { /* * Yuck, the file has been corrupted, so just return * after clearing out any restart state, so the grace period * is over. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { LIST_REMOVE(sp, nst_list); free(sp, M_TEMP); } free(tsp, M_TEMP); sf->nsf_flags &= ~NFSNSF_OK; free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } if (len > 0) { off += sizeof (struct nfst_rec) + tsp->len - 1; /* * Search the list for a matching client. */ LIST_FOREACH(sp, &sf->nsf_head, nst_list) { if (tsp->len == sp->nst_len && !NFSBCMP(tsp->client, sp->nst_client, tsp->len)) break; } if (sp == LIST_END(&sf->nsf_head)) { sp = (struct nfsrv_stable *)malloc(tsp->len + sizeof (struct nfsrv_stable) - 1, M_TEMP, M_WAITOK); NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec, sizeof (struct nfst_rec) + tsp->len - 1); LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list); } else { if (tsp->flag == NFSNST_REVOKE) sp->nst_flag |= NFSNST_REVOKE; else /* * A subsequent timestamp indicates the client * did a setclientid/confirm and any previous * revoke is no longer relevant. */ sp->nst_flag &= ~NFSNST_REVOKE; } } } while (len > 0); free(tsp, M_TEMP); sf->nsf_flags = NFSNSF_OK; sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease + NFSRV_LEASEDELTA; } /* * Update the stable storage file, now that the grace period is over. */ APPLESTATIC void nfsrv_updatestable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; vnode_t vp; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) mount_t mp = NULL; #endif int error; if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE)) return; sf->nsf_flags |= NFSNSF_UPDATEDONE; /* * Ok, we need to rewrite the stable storage file. * - truncate to 0 length * - write the new first structure * - loop through the data structures, writing out any that * have timestamps older than the old boot */ if (sf->nsf_bootvals) { sf->nsf_numboots++; for (i = sf->nsf_numboots - 2; i >= 0; i--) sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), M_TEMP, M_WAITOK); } sf->nsf_bootvals[0] = nfsrvboottime; sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); vp = NFSFPVNODE(sf->nsf_fp); vn_start_write(vp, &mp, V_WAIT); if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p, NULL); NFSVOPUNLOCK(vp); } else error = EPERM; vn_finished_write(mp); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0, UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), (off_t)(sizeof (struct nfsf_rec)), UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); free(sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); return; } sf->nsf_flags |= NFSNSF_OK; /* * Loop through the list and write out timestamp records for * any clients that successfully reclaimed state. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { if (sp->nst_flag & NFSNST_GOTSTATE) { nfsrv_writestable(sp->nst_client, sp->nst_len, NFSNST_NEWSTATE, p); sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE; } LIST_REMOVE(sp, nst_list); free(sp, M_TEMP); } nfsrv_backupstable(); } /* * Append a record to the stable storage file. */ APPLESTATIC void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfst_rec *sp; int error; if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL) return; sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + len - 1, M_TEMP, M_WAITOK); sp->len = len; NFSBCOPY(client, sp->client, len); sp->flag = flag; error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp), (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0, UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p); free(sp, M_TEMP); if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); } } /* * This function is called during the grace period to mark a client * that successfully reclaimed state. */ static void nfsrv_markstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just mark it and set the nfsclient back pointer. */ sp->nst_flag |= NFSNST_GOTSTATE; sp->nst_clp = clp; } /* * This function is called when a NFSv4.1 client does a ReclaimComplete. * Very similar to nfsrv_markstable(), except for the flag being set. */ static void nfsrv_markreclaim(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just set the flag. */ sp->nst_flag |= NFSNST_RECLAIMED; } /* * This function is called for a reclaim, to see if it gets grace. * It returns 0 if a reclaim is allowed, 1 otherwise. */ static int nfsrv_checkstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First, find the entry for the client. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } /* * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) return (1); return (0); } /* * Test for and try to clear out a conflicting client. This is called by * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients * a found. * The trick here is that it can't revoke a conflicting client with an * expired lease unless it holds the v4root lock, so... * If no v4root lock, get the lock and return 1 to indicate "try again". * Return 0 to indicate the conflict can't be revoked and 1 to indicate * the revocation worked and the conflicting client is "bye, bye", so it * can be tried again. * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK(). * Unlocks State before a non-zero value is returned. */ static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, NFSPROC_T *p) { int gotlock, lktype = 0; /* * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if (VN_IS_DOOMED(vp)) return (2); } return (1); } NFSUNLOCKSTATE(); /* * Ok, we can expire the conflicting client. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); return (1); } /* * Resolve a delegation conflict. * Returns 0 to indicate the conflict was resolved without sleeping. * Return -1 to indicate that the caller should check for conflicts again. * Return > 0 for an error that should be returned, normally NFSERR_DELAY. * * Also, manipulate the nfsv4root_lock, as required. It isn't changed * for a return of 0, since there was no sleep and it could be required * later. It is released for a return of NFSERR_DELAY, since the caller * will return that error. It is released when a sleep was done waiting * for the delegation to be returned or expire (so that other nfsds can * handle ops). Then, it must be acquired for the write to stable storage. * (This function is somewhat similar to nfsrv_clientconflict(), but * the semantics differ in a couple of subtle ways. The return of 0 * indicates the conflict was resolved without sleeping here, not * that the conflict can't be resolved and the handling of nfsv4root_lock * differs, as noted above.) * Unlocks State before returning a non-zero value. */ static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp) { struct nfsclient *clp = stp->ls_clp; int gotlock, error, lktype = 0, retrycnt, zapped_clp; nfsv4stateid_t tstateid; fhandle_t tfh; /* * If the conflict is with an old delegation... */ if (stp->ls_flags & NFSLCK_OLDDELEG) { /* * You can delete it, if it has expired. */ if (clp->lc_delegtime < NFSD_MONOSEC) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); error = -1; goto out; } NFSUNLOCKSTATE(); /* * During this delay, the old delegation could expire or it * could be recovered by the client via an Open with * CLAIM_DELEGATE_PREV. * Release the nfsv4root_lock, if held. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * It's a current delegation, so: * - check to see if the delegation has expired * - if so, get the v4root lock and then expire it */ if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) { /* * - do a recall callback, since not yet done * For now, never allow truncate to be set. To use * truncate safely, it must be guaranteed that the * Remove, Rename or Setattr with size of 0 will * succeed and that would require major changes to * the VFS/Vnode OPs. * Set the expiry time large enough so that it won't expire * until after the callback, then set it correctly, once * the callback is done. (The delegation will now time * out whether or not the Recall worked ok. The timeout * will be extended when ops are done on the delegation * stateid, up to the timelimit.) */ stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_flags |= NFSLCK_DELEGRECALL; /* * Loop NFSRV_CBRETRYCNT times while the CBRecall replies * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done * in order to try and avoid a race that could happen * when a CBRecall request passed the Open reply with * the delegation in it when transitting the network. * Since nfsrv_docallback will sleep, don't use stp after * the call. */ NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid, sizeof (tstateid)); NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh, sizeof (tfh)); NFSUNLOCKSTATE(); if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } retrycnt = 0; do { error = nfsrv_docallback(clp, NFSV4OP_CBRECALL, &tstateid, 0, &tfh, NULL, NULL, 0, p); retrycnt++; } while ((error == NFSERR_BADSTATEID || error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT); error = NFSERR_DELAY; goto out; } if (clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); /* * A recall has been done, but it has not yet expired. * So, RETURN_DELAY. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * If we don't yet have the lock, just get it and then return, * since we need that before deleting expired state, such as * this delegation. * When getting the lock, unlock the vnode, so other nfsds that * are in progress, won't get stuck waiting for the vnode lock. */ if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if (VN_IS_DOOMED(vp)) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_PERM; goto out; } } error = -1; goto out; } NFSUNLOCKSTATE(); /* * Ok, we can delete the expired delegation. * First, write the Revoke record to stable storage and then * clear out the conflict. * Since all other nfsd threads are now blocked, we can safely * sleep without the state changing. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); if (clp->lc_expiry < NFSD_MONOSEC) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); zapped_clp = 1; } else { nfsrv_freedeleg(stp); zapped_clp = 0; } if (zapped_clp) nfsrv_zapclient(clp, p); error = -1; out: NFSEXITCODE(error); return (error); } /* * Check for a remove allowed, if remove is set to 1 and get rid of * delegations. */ APPLESTATIC int nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; clp = NULL; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); if (error == 0 && clientid.qval != 0) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (error == -1) error = 0; goto out; } /* * Now, we must Recall any delegations. */ error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero * return. */ if (error == -1) goto tryagain; if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Now, look for a conflicting open share. */ if (remove) { /* * If the entry in the directory was the last reference to the * corresponding filesystem object, the object can be destroyed * */ if(lfp->lf_usecount>1) LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (stp->ls_flags & NFSLCK_WRITEDENY) { error = NFSERR_FILEOPEN; break; } } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE(error); return (error); } /* * Clear out all delegations for the file referred to by lfp. * May return NFSERR_DELAY, if there will be a delay waiting for * delegations to expire. * Returns -1 to indicate it slept while recalling a delegation. * This function has the side effect of deleting the nfslockfile structure, * if it no longer has associated state and didn't have to sleep. * Unlocks State before a non-zero value is returned. */ static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; int ret = 0; stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp) { ret = nfsrv_delegconflict(stp, haslockp, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ goto out; } } stp = nstp; } out: NFSEXITCODE(ret); return (ret); } /* * There are certain operations that, when being done outside of NFSv4, * require that any NFSv4 delegation for the file be recalled. * This function is to be called for those cases: * VOP_RENAME() - When a delegation is being recalled for any reason, * the client may have to do Opens against the server, using the file's * final component name. If the file has been renamed on the server, * that component name will be incorrect and the Open will fail. * VOP_REMOVE() - Theoretically, a client could Open a file after it has * been removed on the server, if there is a delegation issued to * that client for the file. I say "theoretically" since clients * normally do an Access Op before the Open and that Access Op will * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so * they will detect the file's removal in the same manner. (There is * one case where RFC3530 allows a client to do an Open without first * doing an Access Op, which is passage of a check against the ACE * returned with a Write delegation, but current practice is to ignore * the ACE and always do an Access Op.) * Since the functions can only be called with an unlocked vnode, this * can't be done at this time. * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range * locks locally in the client, which are not visible to the server. To * deal with this, issuing of delegations for a vnode must be disabled * and all delegations for the vnode recalled. This is done via the * second function, using the VV_DISABLEDELEG vflag on the vnode. */ APPLESTATIC void nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) { time_t starttime; int error; /* * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ if (newnfs_numnfsd == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp)); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* * Now, call nfsrv_checkremove() in a loop while it returns * NFSERR_DELAY. Return upon any other error or when timed out. */ starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(vp, 0, NULL, (nfsquad_t)((u_quad_t)0), p); NFSVOPUNLOCK(vp); } else error = EPERM; if (error == NFSERR_DELAY) { if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO) break; /* Sleep for a short period of time */ (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } APPLESTATIC void nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p) { #ifdef VV_DISABLEDELEG /* * First, flag issuance of delegations disabled. */ atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG); #endif /* * Then call nfsd_recalldelegation() to get rid of all extant * delegations. */ nfsd_recalldelegation(vp, p); } /* * Check for conflicting locks, etc. and then get rid of delegations. * (At one point I thought that I should get rid of delegations for any * Setattr, since it could potentially disallow the I/O op (read or write) * allowed by the delegation. However, Setattr Ops that aren't changing * the size get a stateid of all 0s, so you can't tell if it is a delegation * for the same client or a different one, so I decided to only get rid * of delegations for other clients when the size is being changed.) * In general, a Setattr can disable NFS I/O Ops that are outstanding, such * as Write backs, even if there is no delegation, so it really isn't any * different?) */ APPLESTATIC int nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct nfsexstuff *exp, NFSPROC_T *p) { struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; int error = 0; nfsquad_t clientid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_first = nvap->na_size; } else { stp->ls_flags = 0; lop->lo_first = 0; } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL)) stp->ls_flags |= NFSLCK_SETATTR; if (stp->ls_flags == 0) goto out; lop->lo_end = NFS64BITSSET; lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = stateidp->seqid; clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0]; clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1]; stp->ls_stateid.other[2] = stateidp->other[2]; error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, stateidp, exp, nd, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Check for a write delegation and do a CBGETATTR if there is one, updating * the attributes, as required. * Should I return an error if I can't get the attributes? (For now, I'll * just return ok. */ APPLESTATIC int nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; struct nfsclient *clp; struct nfsvattr nva; fhandle_t nfh; int error = 0; nfsattrbit_t cbbits; u_quad_t delegfilerev; NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits); if (!NFSNONZERO_ATTRBIT(&cbbits)) goto out; if (nfsrv_writedelegcnt == 0) goto out; /* * Get the lock file structure. * (A return of -1 means no associated state, so return ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (error == -1) error = 0; goto out; } /* * Now, look for a write delegation. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_flags & NFSLCK_DELEGWRITE) break; } if (stp == LIST_END(&lfp->lf_deleg)) { NFSUNLOCKSTATE(); goto out; } clp = stp->ls_clp; delegfilerev = stp->ls_filerev; /* * If the Write delegation was issued as a part of this Compound RPC * or if we have an Implied Clientid (used in a previous Op in this * compound) and it is the client the delegation was issued to, * just return ok. * I also assume that it is from the same client iff the network * host IP address is the same as the callback address. (Not * exactly correct by the RFC, but avoids a lot of Getattr * callbacks.) */ if (nd->nd_compref == stp->ls_compref || ((nd->nd_flag & ND_IMPLIEDCLID) && clp->lc_clientid.qval == nd->nd_clientid.qval) || nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) { NFSUNLOCKSTATE(); goto out; } /* * We are now done with the delegation state structure, * so the statelock can be released and we can now tsleep(). */ /* * Now, we must do the CB Getattr callback, to see if Change or Size * has changed. */ if (clp->lc_expiry >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); NFSVNO_ATTRINIT(&nva); nva.na_filerev = NFS64BITSSET; error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL, 0, &nfh, &nva, &cbbits, 0, p); if (!error) { if ((nva.na_filerev != NFS64BITSSET && nva.na_filerev > delegfilerev) || (NFSVNO_ISSETSIZE(&nva) && nva.na_size != nvap->na_size)) { error = nfsvno_updfilerev(vp, nvap, nd, p); if (NFSVNO_ISSETSIZE(&nva)) nvap->na_size = nva.na_size; } } else error = 0; /* Ignore callback errors for now. */ } else { NFSUNLOCKSTATE(); } out: NFSEXITCODE2(error, nd); return (error); } /* * This function looks for openowners that haven't had any opens for * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS * is set. */ APPLESTATIC void nfsrv_throwawayopens(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int i; NFSLOCKSTATE(); nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit)) nfsrv_freeopenowner(stp, 0, p); } } } NFSUNLOCKSTATE(); } /* * This function checks to see if the credentials are the same. * Returns 1 for not same, 0 otherwise. */ static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp) { if (nd->nd_flag & ND_GSS) { if (!(clp->lc_flags & LCL_GSS)) return (1); if (clp->lc_flags & LCL_NAME) { if (nd->nd_princlen != clp->lc_namelen || NFSBCMP(nd->nd_principal, clp->lc_name, clp->lc_namelen)) return (1); else return (0); } if (nd->nd_cred->cr_uid == clp->lc_uid) return (0); else return (1); } else if (clp->lc_flags & LCL_GSS) return (1); /* * For AUTH_SYS, allow the same uid or root. (This is underspecified * in RFC3530, which talks about principals, but doesn't say anything * about uids for AUTH_SYS.) */ if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0) return (0); else return (1); } /* * Calculate the lease expiry time. */ static time_t nfsrv_leaseexpiry(void) { if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } /* * Delay the delegation timeout as far as ls_delegtimelimit, as required. */ static void nfsrv_delaydelegtimeout(struct nfsstate *stp) { if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) return; if ((stp->ls_delegtime + 15) > NFSD_MONOSEC && stp->ls_delegtime < stp->ls_delegtimelimit) { stp->ls_delegtime += nfsrv_lease; if (stp->ls_delegtime > stp->ls_delegtimelimit) stp->ls_delegtime = stp->ls_delegtimelimit; } } /* * This function checks to see if there is any other state associated * with the openowner for this Open. * It returns 1 if there is no other state, 0 otherwise. */ static int nfsrv_nootherstate(struct nfsstate *stp) { struct nfsstate *tstp; LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) { if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock)) return (0); } return (1); } /* * Create a list of lock deltas (changes to local byte range locking * that can be rolled back using the list) and apply the changes via * nfsvno_advlock(). Optionally, lock the list. It is expected that either * the rollback or update function will be called after this. * It returns an error (and rolls back, as required), if any nfsvno_advlock() * call fails. If it returns an error, it will unlock the list. */ static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfslock *lop, *nlop; int error = 0; /* Loop through the list of locks. */ lop = LIST_FIRST(&lfp->lf_locallock); while (first < end && lop != NULL) { nlop = LIST_NEXT(lop, lo_lckowner); if (first >= lop->lo_end) { /* not there yet */ lop = nlop; } else if (first < lop->lo_first) { /* new one starts before entry in list */ if (end <= lop->lo_first) { /* no overlap between old and new */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, lop->lo_first, cfp, p); if (error != 0) break; first = lop->lo_first; } } else { /* new one overlaps this entry in list */ if (end <= lop->lo_end) { /* overlaps all of new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, lop->lo_end, cfp, p); if (error != 0) break; first = lop->lo_end; lop = nlop; } } } if (first < end && error == 0) /* handle fragment past end of list */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); NFSEXITCODE(error); return (error); } /* * Local lock unlock. Unlock all byte ranges that are no longer locked * by NFSv4. To do this, unlock any subranges of first-->end that * do not overlap with the byte ranges of any lock in the lfp->lf_lock * list. This list has all locks for the file held by other * tuples. The list is ordered by increasing * lo_first value, but may have entries that overlap each other, for * the case of read locks. */ static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p) { struct nfslock *lop; uint64_t first, end, prevfirst __unused; first = init_first; end = init_end; while (first < init_end) { /* Loop through all nfs locks, adjusting first and end */ prevfirst = 0; LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { KASSERT(prevfirst <= lop->lo_first, ("nfsv4 locks out of order")); KASSERT(lop->lo_first < lop->lo_end, ("nfsv4 bogus lock")); prevfirst = lop->lo_first; if (first >= lop->lo_first && first < lop->lo_end) /* * Overlaps with initial part, so trim * off that initial part by moving first past * it. */ first = lop->lo_end; else if (end > lop->lo_first && lop->lo_first > first) { /* * This lock defines the end of the * segment to unlock, so set end to the * start of it and break out of the loop. */ end = lop->lo_first; break; } if (first >= end) /* * There is no segment left to do, so * break out of this loop and then exit * the outer while() since first will be set * to end, which must equal init_end here. */ break; } if (first < end) { /* Unlock this segment */ (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK, NFSLCK_READ, first, end, NULL, p); nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK, first, end); } /* * Now move past this segment and look for any further * segment in the range, if there is one. */ first = end; end = init_end; } } /* * Do the local lock operation and update the rollback list, as required. * Perform the rollback and return the error if nfsvno_advlock() fails. */ static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfsrollback *rlp; int error = 0, ltype, oldltype; if (flags & NFSLCK_WRITE) ltype = F_WRLCK; else if (flags & NFSLCK_READ) ltype = F_RDLCK; else ltype = F_UNLCK; if (oldflags & NFSLCK_WRITE) oldltype = F_WRLCK; else if (oldflags & NFSLCK_READ) oldltype = F_RDLCK; else oldltype = F_UNLCK; if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK)) /* nothing to do */ goto out; error = nfsvno_advlock(vp, ltype, first, end, p); if (error != 0) { if (cfp != NULL) { cfp->cl_clientid.lval[0] = 0; cfp->cl_clientid.lval[1] = 0; cfp->cl_first = 0; cfp->cl_end = NFS64BITSSET; cfp->cl_flags = NFSLCK_WRITE; cfp->cl_ownerlen = 5; NFSBCOPY("LOCAL", cfp->cl_owner, 5); } nfsrv_locallock_rollback(vp, lfp, p); } else if (ltype != F_UNLCK) { rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK, M_WAITOK); rlp->rlck_first = first; rlp->rlck_end = end; rlp->rlck_type = oldltype; LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list); } out: NFSEXITCODE(error); return (error); } /* * Roll back local lock changes and free up the rollback list. */ static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p) { struct nfsrollback *rlp, *nrlp; LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) { (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first, rlp->rlck_end, p); free(rlp, M_NFSDROLLBACK); } LIST_INIT(&lfp->lf_rollback); } /* * Update local lock list and delete rollback list (ie now committed to the * local locks). Most of the work is done by the internal function. */ static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end) { struct nfsrollback *rlp, *nrlp; struct nfslock *new_lop, *other_lop; new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); if (flags & (NFSLCK_READ | NFSLCK_WRITE)) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); else other_lop = NULL; new_lop->lo_flags = flags; new_lop->lo_first = first; new_lop->lo_end = end; nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp); if (new_lop != NULL) free(new_lop, M_NFSDLOCK); if (other_lop != NULL) free(other_lop, M_NFSDLOCK); /* and get rid of the rollback list */ LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } /* * Lock the struct nfslockfile for local lock updating. */ static void nfsrv_locklf(struct nfslockfile *lfp) { int gotlock; /* lf_usecount ensures *lfp won't be free'd */ lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } /* * Unlock the struct nfslockfile after local lock updating. */ static void nfsrv_unlocklf(struct nfslockfile *lfp) { nfsv4_unlock(&lfp->lf_locallock_lck, 0); } /* * Clear out all state for the NFSv4 server. * Must be called by a thread that can sleep when no nfsds are running. */ void nfsrv_throwawayallstate(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfslockfile *lfp, *nlfp; int i; /* * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } } /* * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) { printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); } } /* And get rid of the deviceid structures and layouts. */ nfsrv_freealllayoutsanddevids(); } /* * Check the sequence# for the session and slot provided as an argument. * Also, renew the lease if the session will return NFS_OK. */ int nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this, uint32_t *sflagsp, NFSPROC_T *p) { struct nfsdsession *sep; struct nfssessionhash *shp; int error; SVCXPRT *savxprt; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); return (NFSERR_BADSESSION); } error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp, sep->sess_slots, NULL, NFSV4_SLOTS - 1); if (error != 0) { NFSUNLOCKSESSION(shp); return (error); } if (cache_this != 0) nd->nd_flag |= ND_SAVEREPLY; /* Renew the lease. */ sep->sess_clp->lc_expiry = nfsrv_leaseexpiry(); nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; /* Save maximum request and reply sizes. */ nd->nd_maxreq = sep->sess_maxreq; nd->nd_maxresp = sep->sess_maxresp; /* * If this session handles the backchannel, save the nd_xprt for this * RPC, since this is the one being used. * RFC-5661 specifies that the fore channel will be implicitly * bound by a Sequence operation. However, since some NFSv4.1 clients * erroneously assumed that the back channel would be implicitly * bound as well, do the implicit binding unless a * BindConnectiontoSession has already been done on the session. */ if (sep->sess_clp->lc_req.nr_client != NULL && sep->sess_cbsess.nfsess_xprt != nd->nd_xprt && (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 && (sep->sess_clp->lc_flags & LCL_DONEBINDCONN) == 0) { NFSD_DEBUG(2, "nfsrv_checksequence: implicit back channel bind\n"); savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = sep->sess_clp->lc_req.nr_client->cl_private; nd->nd_xprt->xp_idletimeout = 0; /* Disable timeout. */ sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; if (savxprt != NULL) SVC_RELEASE(savxprt); } *sflagsp = 0; if (sep->sess_clp->lc_req.nr_client == NULL) *sflagsp |= NFSV4SEQ_CBPATHDOWN; NFSUNLOCKSESSION(shp); if (error == NFSERR_EXPIRED) { *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED; error = 0; } else if (error == NFSERR_ADMINREVOKED) { *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED; error = 0; } *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1; return (0); } /* * Check/set reclaim complete for this session/clientid. */ int nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs) { struct nfsdsession *sep; struct nfssessionhash *shp; int error = 0; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } if (onefs != 0) sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS; /* Check to see if reclaim complete has already happened. */ else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) error = NFSERR_COMPLETEALREADY; else { sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE; nfsrv_markreclaim(sep->sess_clp); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Cache the reply in a session slot. */ void nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat, struct mbuf **m) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); printf("nfsrv_cache_session: no session\n"); m_freem(*m); return; } nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m); NFSUNLOCKSESSION(shp); } /* * Search for a session that matches the sessionid. */ static struct nfsdsession * nfsrv_findsession(uint8_t *sessionid) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); LIST_FOREACH(sep, &shp->list, sess_hash) { if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID)) break; } return (sep); } /* * Destroy a session. */ int nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid) { int error, igotlock, samesess; samesess = 0; if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) && (nd->nd_flag & ND_HASSEQUENCE) != 0) { samesess = 1; if ((nd->nd_flag & ND_LASTOP) == 0) return (NFSERR_BADSESSION); } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); error = nfsrv_freesession(NULL, sessionid); if (error == 0 && samesess != 0) nd->nd_flag &= ~ND_HASSEQUENCE; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); return (error); } /* * Bind a connection to a session. * For now, only certain variants are supported, since the current session * structure can only handle a single backchannel entry, which will be * applied to all connections if it is set. */ int nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp) { struct nfssessionhash *shp; struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *savxprt; int error; error = 0; shp = NFSSESSIONHASH(sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep != NULL) { clp = sep->sess_clp; if (*foreaftp == NFSCDFC4_BACK || *foreaftp == NFSCDFC4_BACK_OR_BOTH || *foreaftp == NFSCDFC4_FORE_OR_BOTH) { /* Try to set up a backchannel. */ if (clp->lc_req.nr_client == NULL) { NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire " "backchannel\n"); clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, sep->sess_cbprogram, NFSV4_CBVERS); } if (clp->lc_req.nr_client != NULL) { NFSD_DEBUG(2, "nfsrv_bindconnsess: set up " "backchannel\n"); savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client->cl_private; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; if (savxprt != NULL) SVC_RELEASE(savxprt); sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN; clp->lc_flags |= LCL_DONEBINDCONN; if (*foreaftp == NFSCDFS4_BACK) *foreaftp = NFSCDFS4_BACK; else *foreaftp = NFSCDFS4_BOTH; } else if (*foreaftp != NFSCDFC4_BACK) { NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set " "up backchannel\n"); sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; clp->lc_flags |= LCL_DONEBINDCONN; *foreaftp = NFSCDFS4_FORE; } else { error = NFSERR_NOTSUPP; printf("nfsrv_bindconnsess: Can't add " "backchannel\n"); } } else { NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n"); clp->lc_flags |= LCL_DONEBINDCONN; *foreaftp = NFSCDFS4_FORE; } } else error = NFSERR_BADSESSION; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Free up a session structure. */ static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid) { struct nfssessionhash *shp; int i; NFSLOCKSTATE(); if (sep == NULL) { shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); } else { shp = NFSSESSIONHASH(sep->sess_sessionid); NFSLOCKSESSION(shp); } if (sep != NULL) { sep->sess_refcnt--; if (sep->sess_refcnt > 0) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BACKCHANBUSY); } LIST_REMOVE(sep, sess_hash); LIST_REMOVE(sep, sess_list); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); if (sep == NULL) return (NFSERR_BADSESSION); for (i = 0; i < NFSV4_SLOTS; i++) if (sep->sess_slots[i].nfssl_reply != NULL) m_freem(sep->sess_slots[i].nfssl_reply); if (sep->sess_cbsess.nfsess_xprt != NULL) SVC_RELEASE(sep->sess_cbsess.nfsess_xprt); free(sep, M_NFSDSESSION); return (0); } /* * Free a stateid. * RFC5661 says that it should fail when there are associated opens, locks * or delegations. Since stateids represent opens, I don't see how you can * free an open stateid (it will be free'd when closed), so this function * only works for lock stateids (freeing the lock_owner) or delegations. */ int nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) { /* First, check for a delegation. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } if (stp != NULL) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); return (error); } } /* Not a delegation, try for a lock_owner. */ if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0)) /* Not a lock_owner stateid. */ error = NFSERR_LOCKSHELD; if (error == 0 && !LIST_EMPTY(&stp->ls_lock)) error = NFSERR_LOCKSHELD; if (error == 0) nfsrv_freelockowner(stp, NULL, 0, p); NFSUNLOCKSTATE(); return (error); } /* * Test a stateid. */ int nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && stateidp->seqid != 0 && SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid)) error = NFSERR_OLDSTATEID; NFSUNLOCKSTATE(); return (error); } /* * Generate the xdr for an NFSv4.1 CBSequence Operation. */ static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp) { struct nfsdsession *sep; uint32_t *tl, slotseq = 0; int maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; int error; error = nfsv4_getcbsession(clp, sepp); if (error != 0) return (error); sep = *sepp; (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot, &slotseq, sessionid); KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot")); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = 0; /* No referring call list, for now. */ nd->nd_flag |= ND_HASSEQUENCE; return (0); } /* * Get a session for the callback. */ static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp) { struct nfsdsession *sep; NFSLOCKSTATE(); LIST_FOREACH(sep, &clp->lc_session, sess_list) { if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) break; } if (sep == NULL) { NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } sep->sess_refcnt++; *sepp = sep; NFSUNLOCKSTATE(); return (0); } /* * Free up all backchannel xprts. This needs to be done when the nfsd threads * exit, since those transports will all be going away. * This is only called after all the nfsd threads are done performing RPCs, * so locking shouldn't be an issue. */ APPLESTATIC void nfsrv_freeallbackchannel_xprts(void) { struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *xprt; int i; for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; if (xprt != NULL) SVC_RELEASE(xprt); } } } } /* * Do a layout commit. Actually just call nfsrv_updatemdsattr(). * I have no idea if the rest of these arguments will ever be useful? */ int nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype, int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len, int hasnewmtime, struct timespec *newmtimep, int reclaim, nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep, uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr na; int error; error = nfsrv_updatemdsattr(vp, &na, p); if (error == 0) { *hasnewsizep = 1; *newsizep = na.na_size; } return (error); } /* * Try and get a layout. */ int nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp, int layouttype, int *iomode, uint64_t *offset, uint64_t *len, uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose, int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p) { struct nfslayouthash *lhyp; struct nfslayout *lyp; char *devid; fhandle_t fh, *dsfhp; int error, mirrorcnt; if (nfsrv_devidcnt == 0) return (NFSERR_UNKNLAYOUTTYPE); if (*offset != 0) printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset, (uintmax_t)*len); error = nfsvno_getfh(vp, &fh, p); NFSD_DEBUG(4, "layoutget getfh=%d\n", error); if (error != 0) return (error); /* * For now, all layouts are for entire files. * Only issue Read/Write layouts if requested for a non-readonly fs. */ if (NFSVNO_EXRDONLY(exp)) { if (*iomode == NFSLAYOUTIOMODE_RW) return (NFSERR_LAYOUTTRYLATER); *iomode = NFSLAYOUTIOMODE_READ; } if (*iomode != NFSLAYOUTIOMODE_RW) *iomode = NFSLAYOUTIOMODE_READ; /* * Check to see if a write layout can be issued for this file. * This is used during mirror recovery to avoid RW layouts being * issued for a file while it is being copied to the recovered * mirror. */ if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0) return (NFSERR_LAYOUTTRYLATER); *retonclose = 0; *offset = 0; *len = UINT64_MAX; /* First, see if a layout already exists and return if found. */ lhyp = NFSLAYOUTHASH(&fh); NFSLOCKLAYOUT(lhyp); error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp); NFSD_DEBUG(4, "layoutget findlay=%d\n", error); /* * Not sure if the seqid must be the same, so I won't check it. */ if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] || stateidp->other[1] != lyp->lay_stateid.other[1] || stateidp->other[2] != lyp->lay_stateid.other[2])) { if ((lyp->lay_flags & NFSLAY_CALLB) == 0) { NFSUNLOCKLAYOUT(lhyp); NFSD_DEBUG(1, "ret bad stateid\n"); return (NFSERR_BADSTATEID); } /* * I believe we get here because there is a race between * the client processing the CBLAYOUTRECALL and the layout * being deleted here on the server. * The client has now done a LayoutGet with a non-layout * stateid, as it would when there is no layout. * As such, free this layout and set error == NFSERR_BADSTATEID * so the code below will create a new layout structure as * would happen if no layout was found. * "lyp" will be set before being used below, but set it NULL * as a safety belt. */ nfsrv_freelayout(&lhyp->list, lyp); lyp = NULL; error = NFSERR_BADSTATEID; } if (error == 0) { if (lyp->lay_layoutlen > maxcnt) { NFSUNLOCKLAYOUT(lhyp); NFSD_DEBUG(1, "ret layout too small\n"); return (NFSERR_TOOSMALL); } if (*iomode == NFSLAYOUTIOMODE_RW) lyp->lay_flags |= NFSLAY_RW; else lyp->lay_flags |= NFSLAY_READ; NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); *layoutlenp = lyp->lay_layoutlen; if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; stateidp->seqid = lyp->lay_stateid.seqid; NFSUNLOCKLAYOUT(lhyp); NFSD_DEBUG(4, "ret fnd layout\n"); return (0); } NFSUNLOCKLAYOUT(lhyp); /* Find the device id and file handle. */ dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK); devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK); error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid); NFSD_DEBUG(4, "layoutget devandfh=%d\n", error); if (error == 0) { if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { if (NFSX_V4FILELAYOUT > maxcnt) error = NFSERR_TOOSMALL; else lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp, devid, vp->v_mount->mnt_stat.f_fsid); } else { if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt) error = NFSERR_TOOSMALL; else lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt, &fh, dsfhp, devid, vp->v_mount->mnt_stat.f_fsid); } } free(dsfhp, M_TEMP); free(devid, M_TEMP); if (error != 0) return (error); /* * Now, add this layout to the list. */ error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p); NFSD_DEBUG(4, "layoutget addl=%d\n", error); /* * The lyp will be set to NULL by nfsrv_addlayout() if it * linked the new structure into the lists. */ free(lyp, M_NFSDSTATE); return (error); } /* * Generate a File Layout. */ static struct nfslayout * nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs) { uint32_t *tl; struct nfslayout *lyp; uint64_t pattern_offset; lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE, M_WAITOK | M_ZERO); lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES; if (iomode == NFSLAYOUTIOMODE_RW) lyp->lay_flags = NFSLAY_RW; else lyp->lay_flags = NFSLAY_READ; NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp)); lyp->lay_clientid.qval = nd->nd_clientid.qval; lyp->lay_fsid = fs; /* Fill in the xdr for the files layout. */ tl = (uint32_t *)lyp->lay_xdr; NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */ tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); /* * Make the stripe size as many 64K blocks as will fit in the stripe * mask. Since there is only one stripe, the stripe size doesn't really * matter, except that the Linux client will only handle an exact * multiple of their PAGE_SIZE (usually 4K). I chose 64K as a value * that should cover most/all arches w.r.t. PAGE_SIZE. */ *tl++ = txdr_unsigned(NFSFLAYUTIL_STRIPE_MASK & ~0xffff); *tl++ = 0; /* 1st stripe index. */ pattern_offset = 0; txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */ *tl++ = txdr_unsigned(1); /* 1 file handle. */ *tl++ = txdr_unsigned(NFSX_V4PNFSFH); NFSBCOPY(dsfhp, tl, sizeof(*dsfhp)); lyp->lay_layoutlen = NFSX_V4FILELAYOUT; return (lyp); } #define FLEX_OWNERID "999" #define FLEX_UID0 "0" /* * Generate a Flex File Layout. * The FLEX_OWNERID can be any string of 3 decimal digits. Although this * string goes on the wire, it isn't supposed to be used by the client, * since this server uses tight coupling. * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use * a string of "0". This works around the Linux Flex File Layout driver bug * which uses the synthetic uid/gid strings for the "tightly coupled" case. */ static struct nfslayout * nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs) { uint32_t *tl; struct nfslayout *lyp; uint64_t lenval; int i; lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt), M_NFSDSTATE, M_WAITOK | M_ZERO); lyp->lay_type = NFSLAYOUT_FLEXFILE; if (iomode == NFSLAYOUTIOMODE_RW) lyp->lay_flags = NFSLAY_RW; else lyp->lay_flags = NFSLAY_READ; NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp)); lyp->lay_clientid.qval = nd->nd_clientid.qval; lyp->lay_fsid = fs; lyp->lay_mirrorcnt = mirrorcnt; /* Fill in the xdr for the files layout. */ tl = (uint32_t *)lyp->lay_xdr; lenval = 0; txdr_hyper(lenval, tl); tl += 2; /* Stripe unit. */ *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */ for (i = 0; i < mirrorcnt; i++) { *tl++ = txdr_unsigned(1); /* One stripe. */ NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */ tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); devid += NFSX_V4DEVICEID; *tl++ = txdr_unsigned(1); /* Efficiency. */ *tl++ = 0; /* Proxy Stateid. */ *tl++ = 0x55555555; *tl++ = 0x55555555; *tl++ = 0x55555555; *tl++ = txdr_unsigned(1); /* 1 file handle. */ *tl++ = txdr_unsigned(NFSX_V4PNFSFH); NFSBCOPY(dsfhp, tl, sizeof(*dsfhp)); tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED); dsfhp++; if (nfsrv_flexlinuxhack != 0) { *tl++ = txdr_unsigned(strlen(FLEX_UID0)); *tl = 0; /* 0 pad string. */ NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0)); *tl++ = txdr_unsigned(strlen(FLEX_UID0)); *tl = 0; /* 0 pad string. */ NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0)); } else { *tl++ = txdr_unsigned(strlen(FLEX_OWNERID)); NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED); *tl++ = txdr_unsigned(strlen(FLEX_OWNERID)); NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED); } } *tl++ = txdr_unsigned(0); /* ff_flags. */ *tl = txdr_unsigned(60); /* Status interval hint. */ lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt); return (lyp); } /* * Parse and process Flex File errors returned via LayoutReturn. */ static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt, NFSPROC_T *p) { uint32_t *tl; int cnt, errcnt, i, j, opnum, stat; char devid[NFSX_V4DEVICEID]; tl = layp; cnt = fxdr_unsigned(int, *tl++); NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt); for (i = 0; i < cnt; i++) { /* Skip offset, length and stateid for now. */ tl += (4 + NFSX_STATEID / NFSX_UNSIGNED); errcnt = fxdr_unsigned(int, *tl++); NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt); for (j = 0; j < errcnt; j++) { NFSBCOPY(tl, devid, NFSX_V4DEVICEID); tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); stat = fxdr_unsigned(int, *tl++); opnum = fxdr_unsigned(int, *tl++); NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum, stat); /* * Except for NFSERR_ACCES and NFSERR_STALE errors, * disable the mirror. */ if (stat != NFSERR_ACCES && stat != NFSERR_STALE) nfsrv_delds(devid, p); } } } /* * This function removes all flex file layouts which has a mirror with * a device id that matches the argument. * Called when the DS represented by the device id has failed. */ void nfsrv_flexmirrordel(char *devid, NFSPROC_T *p) { uint32_t *tl; struct nfslayout *lyp, *nlyp; struct nfslayouthash *lhyp; struct nfslayouthead loclyp; int i, j; NFSD_DEBUG(4, "flexmirrordel\n"); /* Move all layouts found onto a local list. */ TAILQ_INIT(&loclyp); for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (lyp->lay_type == NFSLAYOUT_FLEXFILE && lyp->lay_mirrorcnt > 1) { NFSD_DEBUG(4, "possible match\n"); tl = lyp->lay_xdr; tl += 3; for (j = 0; j < lyp->lay_mirrorcnt; j++) { tl++; if (NFSBCMP(devid, tl, NFSX_V4DEVICEID) == 0) { /* Found one. */ NFSD_DEBUG(4, "fnd one\n"); TAILQ_REMOVE(&lhyp->list, lyp, lay_list); TAILQ_INSERT_HEAD(&loclyp, lyp, lay_list); break; } tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED + NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED + 11 * NFSX_UNSIGNED); } } } NFSUNLOCKLAYOUT(lhyp); } /* Now, try to do a Layout recall for each one found. */ TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) { NFSD_DEBUG(4, "do layout recall\n"); /* * The layout stateid.seqid needs to be incremented * before doing a LAYOUT_RECALL callback. */ if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid, &lyp->lay_fh, lyp, 1, lyp->lay_type, p); nfsrv_freelayout(&loclyp, lyp); } } /* * Do a recall callback to the client for this layout. */ static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p) { struct nfsclient *clp; int error; NFSD_DEBUG(4, "nfsrv_recalllayout\n"); error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error); if (error != 0) { printf("nfsrv_recalllayout: getclient err=%d\n", error); return (error); } if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL, stateidp, changed, fhp, NULL, NULL, laytype, p); /* If lyp != NULL, handle an error return here. */ if (error != 0 && lyp != NULL) { NFSDRECALLLOCK(); /* * Mark it returned, since no layout recall * has been done. * All errors seem to be non-recoverable, although * NFSERR_NOMATCHLAYOUT is a normal event. */ if ((lyp->lay_flags & NFSLAY_RECALL) != 0) { lyp->lay_flags |= NFSLAY_RETURNED; wakeup(lyp); } NFSDRECALLUNLOCK(); if (error != NFSERR_NOMATCHLAYOUT) printf("nfsrv_recalllayout: err=%d\n", error); } } else printf("nfsrv_recalllayout: clp not NFSv4.1\n"); return (error); } /* * Find a layout to recall when we exceed our high water mark. */ void nfsrv_recalloldlayout(NFSPROC_T *p) { struct nfslayouthash *lhyp; struct nfslayout *lyp; nfsquad_t clientid; nfsv4stateid_t stateid; fhandle_t fh; int error, laytype = 0, ret; lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) { if ((lyp->lay_flags & NFSLAY_CALLB) == 0) { lyp->lay_flags |= NFSLAY_CALLB; /* * The layout stateid.seqid needs to be incremented * before doing a LAYOUT_RECALL callback. */ if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; clientid = lyp->lay_clientid; stateid = lyp->lay_stateid; NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh)); laytype = lyp->lay_type; break; } } NFSUNLOCKLAYOUT(lhyp); if (lyp != NULL) { error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0, laytype, p); if (error != 0 && error != NFSERR_NOMATCHLAYOUT) NFSD_DEBUG(4, "recallold=%d\n", error); if (error != 0) { NFSLOCKLAYOUT(lhyp); /* * Since the hash list was unlocked, we need to * find it again. */ ret = nfsrv_findlayout(&clientid, &fh, laytype, p, &lyp); if (ret == 0 && (lyp->lay_flags & NFSLAY_CALLB) != 0 && lyp->lay_stateid.other[0] == stateid.other[0] && lyp->lay_stateid.other[1] == stateid.other[1] && lyp->lay_stateid.other[2] == stateid.other[2]) { /* * The client no longer knows this layout, so * it can be free'd now. */ if (error == NFSERR_NOMATCHLAYOUT) nfsrv_freelayout(&lhyp->list, lyp); else { /* * Leave it to be tried later by * clearing NFSLAY_CALLB and moving * it to the head of the list, so it * won't be tried again for a while. */ lyp->lay_flags &= ~NFSLAY_CALLB; TAILQ_REMOVE(&lhyp->list, lyp, lay_list); TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); } } NFSUNLOCKLAYOUT(lhyp); } } } /* * Try and return layout(s). */ int nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp, int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim, int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp, struct ucred *cred, NFSPROC_T *p) { struct nfsvattr na; struct nfslayouthash *lhyp; struct nfslayout *lyp; fhandle_t fh; int error = 0; *fndp = 0; if (kind == NFSV4LAYOUTRET_FILE) { error = nfsvno_getfh(vp, &fh, p); if (error == 0) { error = nfsrv_updatemdsattr(vp, &na, p); if (error != 0) printf("nfsrv_layoutreturn: updatemdsattr" " failed=%d\n", error); } if (error == 0) { if (reclaim == newnfs_true) { error = nfsrv_checkgrace(NULL, NULL, NFSLCK_RECLAIM); if (error != NFSERR_NOGRACE) error = 0; return (error); } lhyp = NFSLAYOUTHASH(&fh); NFSDRECALLLOCK(); NFSLOCKLAYOUT(lhyp); error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp); NFSD_DEBUG(4, "layoutret findlay=%d\n", error); if (error == 0 && stateidp->other[0] == lyp->lay_stateid.other[0] && stateidp->other[1] == lyp->lay_stateid.other[1] && stateidp->other[2] == lyp->lay_stateid.other[2]) { NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d" " %x %x %x laystateid %d %x %x %x" " off=%ju len=%ju flgs=0x%x\n", stateidp->seqid, stateidp->other[0], stateidp->other[1], stateidp->other[2], lyp->lay_stateid.seqid, lyp->lay_stateid.other[0], lyp->lay_stateid.other[1], lyp->lay_stateid.other[2], (uintmax_t)offset, (uintmax_t)len, lyp->lay_flags); if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; stateidp->seqid = lyp->lay_stateid.seqid; if (offset == 0 && len == UINT64_MAX) { if ((iomode & NFSLAYOUTIOMODE_READ) != 0) lyp->lay_flags &= ~NFSLAY_READ; if ((iomode & NFSLAYOUTIOMODE_RW) != 0) lyp->lay_flags &= ~NFSLAY_RW; if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0) nfsrv_freelayout(&lhyp->list, lyp); else *fndp = 1; } else *fndp = 1; } NFSUNLOCKLAYOUT(lhyp); /* Search the nfsrv_recalllist for a match. */ TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && lyp->lay_clientid.qval == nd->nd_clientid.qval && stateidp->other[0] == lyp->lay_stateid.other[0] && stateidp->other[1] == lyp->lay_stateid.other[1] && stateidp->other[2] == lyp->lay_stateid.other[2]) { lyp->lay_flags |= NFSLAY_RETURNED; wakeup(lyp); error = 0; } } NFSDRECALLUNLOCK(); } if (layouttype == NFSLAYOUT_FLEXFILE) nfsrv_flexlayouterr(nd, layp, maxcnt, p); } else if (kind == NFSV4LAYOUTRET_FSID) nfsrv_freelayouts(&nd->nd_clientid, &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode); else if (kind == NFSV4LAYOUTRET_ALL) nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode); else error = NFSERR_INVAL; if (error == -1) error = 0; return (error); } /* * Look for an existing layout. */ static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype, NFSPROC_T *p, struct nfslayout **lypp) { struct nfslayouthash *lhyp; struct nfslayout *lyp; int ret; *lypp = NULL; ret = 0; lhyp = NFSLAYOUTHASH(fhp); TAILQ_FOREACH(lyp, &lhyp->list, lay_list) { if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 && lyp->lay_clientid.qval == clientidp->qval && lyp->lay_type == laytype) break; } if (lyp != NULL) *lypp = lyp; else ret = -1; return (ret); } /* * Add the new layout, as required. */ static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p) { struct nfsclient *clp; struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; fhandle_t *fhp; int error; KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0, ("nfsrv_layoutget: no nd_clientid\n")); lyp = *lypp; fhp = &lyp->lay_fh; NFSLOCKSTATE(); error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error != 0) { NFSUNLOCKSTATE(); return (error); } lyp->lay_stateid.seqid = stateidp->seqid = 1; lyp->lay_stateid.other[0] = stateidp->other[0] = clp->lc_clientid.lval[0]; lyp->lay_stateid.other[1] = stateidp->other[1] = clp->lc_clientid.lval[1]; lyp->lay_stateid.other[2] = stateidp->other[2] = nfsrv_nextstateindex(clp); NFSUNLOCKSTATE(); lhyp = NFSLAYOUTHASH(fhp); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) { if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 && nlyp->lay_clientid.qval == nd->nd_clientid.qval) break; } if (nlyp != NULL) { /* A layout already exists, so use it. */ nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)); NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen); *layoutlenp = nlyp->lay_layoutlen; if (++nlyp->lay_stateid.seqid == 0) nlyp->lay_stateid.seqid = 1; stateidp->seqid = nlyp->lay_stateid.seqid; stateidp->other[0] = nlyp->lay_stateid.other[0]; stateidp->other[1] = nlyp->lay_stateid.other[1]; stateidp->other[2] = nlyp->lay_stateid.other[2]; NFSUNLOCKLAYOUT(lhyp); return (0); } /* Insert the new layout in the lists. */ *lypp = NULL; atomic_add_int(&nfsrv_layoutcnt, 1); NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); *layoutlenp = lyp->lay_layoutlen; TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); NFSUNLOCKLAYOUT(lhyp); return (0); } /* * Get the devinfo for a deviceid. */ int nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt, uint32_t *notify, int *devaddrlen, char **devaddr) { struct nfsdevice *ds; if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype != NFSLAYOUT_FLEXFILE) || (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES)) return (NFSERR_UNKNLAYOUTTYPE); /* * Now, search for the device id. Note that the structures won't go * away, but the order changes in the list. As such, the lock only * needs to be held during the search through the list. */ NFSDDSLOCK(); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 && ds->nfsdev_nmp != NULL) break; } NFSDDSUNLOCK(); if (ds == NULL) return (NFSERR_NOENT); /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */ *devaddrlen = 0; if (layouttype == NFSLAYOUT_NFSV4_1_FILES) { *devaddrlen = ds->nfsdev_fileaddrlen; *devaddr = ds->nfsdev_fileaddr; } else if (layouttype == NFSLAYOUT_FLEXFILE) { *devaddrlen = ds->nfsdev_flexaddrlen; *devaddr = ds->nfsdev_flexaddr; } if (*devaddrlen == 0) return (NFSERR_UNKNLAYOUTTYPE); /* * The XDR overhead is 3 unsigned values: layout_type, * length_of_address and notify bitmap. * If the notify array is changed to not all zeros, the * count of unsigned values must be increased. */ if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED) { *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED; return (NFSERR_TOOSMALL); } return (0); } /* * Free a list of layout state structures. */ static void nfsrv_freelayoutlist(nfsquad_t clientid) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; int i; for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (lyp->lay_clientid.qval == clientid.qval) nfsrv_freelayout(&lhyp->list, lyp); } NFSUNLOCKLAYOUT(lhyp); } } /* * Free up a layout. */ static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp) { NFSD_DEBUG(4, "Freelayout=%p\n", lyp); atomic_add_int(&nfsrv_layoutcnt, -1); TAILQ_REMOVE(lhp, lyp, lay_list); free(lyp, M_NFSDSTATE); } /* * Free up a device id. */ void nfsrv_freeonedevid(struct nfsdevice *ds) { int i; atomic_add_int(&nfsrv_devidcnt, -1); vrele(ds->nfsdev_dvp); for (i = 0; i < nfsrv_dsdirsize; i++) if (ds->nfsdev_dsdir[i] != NULL) vrele(ds->nfsdev_dsdir[i]); free(ds->nfsdev_fileaddr, M_NFSDSTATE); free(ds->nfsdev_flexaddr, M_NFSDSTATE); free(ds->nfsdev_host, M_NFSDSTATE); free(ds, M_NFSDSTATE); } /* * Free up a device id and its mirrors. */ static void nfsrv_freedevid(struct nfsdevice *ds) { TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list); nfsrv_freeonedevid(ds); } /* * Free all layouts and device ids. * Done when the nfsd threads are shut down since there may be a new * modified device id list created when the nfsd is restarted. */ void nfsrv_freealllayoutsanddevids(void) { struct nfsdontlist *mrp, *nmrp; struct nfslayout *lyp, *nlyp; /* Get rid of the deviceid structures. */ nfsrv_freealldevids(); TAILQ_INIT(&nfsrv_devidhead); nfsrv_devidcnt = 0; /* Get rid of all layouts. */ nfsrv_freealllayouts(); /* Get rid of any nfsdontlist entries. */ LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp) free(mrp, M_NFSDSTATE); LIST_INIT(&nfsrv_dontlisthead); nfsrv_dontlistlen = 0; /* Free layouts in the recall list. */ TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp) nfsrv_freelayout(&nfsrv_recalllisthead, lyp); TAILQ_INIT(&nfsrv_recalllisthead); } /* * Free layouts that match the arguments. */ static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; int i; for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (clid->qval != lyp->lay_clientid.qval) continue; if (fs != NULL && (fs->val[0] != lyp->lay_fsid.val[0] || fs->val[1] != lyp->lay_fsid.val[1])) continue; if (laytype != lyp->lay_type) continue; if ((iomode & NFSLAYOUTIOMODE_READ) != 0) lyp->lay_flags &= ~NFSLAY_READ; if ((iomode & NFSLAYOUTIOMODE_RW) != 0) lyp->lay_flags &= ~NFSLAY_RW; if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0) nfsrv_freelayout(&lhyp->list, lyp); } NFSUNLOCKLAYOUT(lhyp); } } /* * Free all layouts for the argument file. */ void nfsrv_freefilelayouts(fhandle_t *fhp) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; lhyp = NFSLAYOUTHASH(fhp); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0) nfsrv_freelayout(&lhyp->list, lyp); } NFSUNLOCKLAYOUT(lhyp); } /* * Free all layouts. */ static void nfsrv_freealllayouts(void) { struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; int i; for (i = 0; i < nfsrv_layouthashsize; i++) { lhyp = &nfslayouthash[i]; NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) nfsrv_freelayout(&lhyp->list, lyp); NFSUNLOCKLAYOUT(lhyp); } } /* * Look up the mount path for the DS server. */ static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p, struct nfsdevice **dsp) { struct nameidata nd; struct nfsdevice *ds; struct mount *mp; int error, i; char *dsdirpath; size_t dsdirsize; NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp); *dsp = NULL; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dspathp, p); error = namei(&nd); NFSD_DEBUG(4, "lookup=%d\n", error); if (error != 0) return (error); if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); NFSD_DEBUG(4, "dspath not dir\n"); return (ENOTDIR); } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); NFSD_DEBUG(4, "dspath not an NFS mount\n"); return (ENXIO); } /* * Allocate a DS server structure with the NFS mounted directory * vnode reference counted, so that a non-forced dismount will * fail with EBUSY. * This structure is always linked into the list, even if an error * is being returned. The caller will free the entire list upon * an error return. */ *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t), M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_dvp = nd.ni_vp; ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount); NFSVOPUNLOCK(nd.ni_vp); dsdirsize = strlen(dspathp) + 16; dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK); /* Now, create the DS directory structures. */ for (i = 0; i < nfsrv_dsdirsize; i++) { snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dsdirpath, p); error = namei(&nd); NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error); if (error != 0) break; if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); error = ENOTDIR; NFSD_DEBUG(4, "dsdirpath not a VDIR\n"); break; } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); error = ENXIO; NFSD_DEBUG(4, "dsdirpath not an NFS mount\n"); break; } ds->nfsdev_dsdir[i] = nd.ni_vp; NFSVOPUNLOCK(nd.ni_vp); } free(dsdirpath, M_TEMP); if (strlen(mdspathp) > 0) { /* * This DS stores file for a specific MDS exported file * system. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, mdspathp, p); error = namei(&nd); NFSD_DEBUG(4, "mds lookup=%d\n", error); if (error != 0) goto out; if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); error = ENOTDIR; NFSD_DEBUG(4, "mdspath not dir\n"); goto out; } mp = nd.ni_vp->v_mount; if ((mp->mnt_flag & MNT_EXPORTED) == 0) { vput(nd.ni_vp); error = ENXIO; NFSD_DEBUG(4, "mdspath not an exported fs\n"); goto out; } ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid; ds->nfsdev_mdsisset = 1; vput(nd.ni_vp); } out: TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list); atomic_add_int(&nfsrv_devidcnt, 1); return (error); } /* * Look up the mount path for the DS server and delete it. */ int nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p) { struct mount *mp; struct nfsmount *nmp; struct nfsdevice *ds; int error; NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp); /* * Search for the path in the mount list. Avoid looking the path * up, since this mount point may be hung, with associated locked * vnodes, etc. * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked * until this completes. * As noted in the man page, this should be done before any forced * dismount on the mount point, but at least the handshake on * NFSMNTP_CANCELRPCS should make it safe. */ error = 0; ds = NULL; nmp = NULL; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 && strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 && mp->mnt_data != NULL) { nmp = VFSTONFS(mp); NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); } else { NFSUNLOCKMNT(nmp); nmp = NULL; } break; } } mtx_unlock(&mountlist_mtx); if (nmp != NULL) { ds = nfsrv_deldsnmp(op, nmp, p); NFSD_DEBUG(4, "deldsnmp=%p\n", ds); if (ds != NULL) { nfsrv_killrpcs(nmp); NFSD_DEBUG(4, "aft killrpcs\n"); } else error = ENXIO; NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); NFSUNLOCKMNT(nmp); } else error = EINVAL; return (error); } /* * Search for and remove a DS entry which matches the "nmp" argument. * The nfsdevice structure pointer is returned so that the caller can * free it via nfsrv_freeonedevid(). * For the forced case, do not try to do LayoutRecalls, since the server * must be shut down now anyhow. */ struct nfsdevice * nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p) { struct nfsdevice *fndds; NFSD_DEBUG(4, "deldsdvp\n"); NFSDDSLOCK(); if (op == PNFSDOP_FORCEDELDS) fndds = nfsv4_findmirror(nmp); else fndds = nfsrv_findmirroredds(nmp); if (fndds != NULL) nfsrv_deleteds(fndds); NFSDDSUNLOCK(); if (fndds != NULL) { if (op != PNFSDOP_FORCEDELDS) nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); } return (fndds); } /* * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid. * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount * point. * Also, returns an error instead of the nfsdevice found. */ APPLESTATIC int nfsrv_delds(char *devid, NFSPROC_T *p) { struct nfsdevice *ds, *fndds; struct nfsmount *nmp; int fndmirror; NFSD_DEBUG(4, "delds\n"); /* * Search the DS server list for a match with devid. * Remove the DS entry if found and there is a mirror. */ fndds = NULL; nmp = NULL; fndmirror = 0; NFSDDSLOCK(); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 && ds->nfsdev_nmp != NULL) { NFSD_DEBUG(4, "fnd main ds\n"); fndds = ds; break; } } if (fndds == NULL) { NFSDDSUNLOCK(); return (ENXIO); } if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0) fndmirror = 1; else if (fndds->nfsdev_mdsisset != 0) { /* For the fsid is set case, search for a mirror. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds != fndds && ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && ds->nfsdev_mdsfsid.val[0] == fndds->nfsdev_mdsfsid.val[0] && ds->nfsdev_mdsfsid.val[1] == fndds->nfsdev_mdsfsid.val[1]) { fndmirror = 1; break; } } } if (fndmirror != 0) { nmp = fndds->nfsdev_nmp; NFSLOCKMNT(nmp); if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM | NFSMNTP_CANCELRPCS)) == 0) { nmp->nm_privflag |= NFSMNTP_CANCELRPCS; NFSUNLOCKMNT(nmp); nfsrv_deleteds(fndds); } else { NFSUNLOCKMNT(nmp); nmp = NULL; } } NFSDDSUNLOCK(); if (nmp != NULL) { nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p); printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host); nfsrv_killrpcs(nmp); NFSLOCKMNT(nmp); nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS; wakeup(nmp); NFSUNLOCKMNT(nmp); return (0); } return (ENXIO); } /* * Mark a DS as disabled by setting nfsdev_nmp = NULL. */ static void nfsrv_deleteds(struct nfsdevice *fndds) { NFSD_DEBUG(4, "deleteds: deleting a mirror\n"); fndds->nfsdev_nmp = NULL; if (fndds->nfsdev_mdsisset == 0) nfsrv_faildscnt--; } /* * Fill in the addr structures for the File and Flex File layouts. */ static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost) { uint32_t *tl; char *netprot; int addrlen; static uint64_t new_devid = 0; if (strchr(addr, ':') != NULL) netprot = "tcp6"; else netprot = "tcp"; /* Fill in the device id. */ NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time)); new_devid++; NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)], sizeof(new_devid)); /* * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4 * as defined in RFC5661) in XDR. */ addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + 6 * NFSX_UNSIGNED; NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot); ds->nfsdev_fileaddrlen = addrlen; tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_fileaddr = (char *)tl; *tl++ = txdr_unsigned(1); /* One stripe with index 0. */ *tl++ = 0; *tl++ = txdr_unsigned(1); /* One multipath list */ *tl++ = txdr_unsigned(1); /* with one entry in it. */ /* The netaddr for this one entry. */ *tl++ = txdr_unsigned(strlen(netprot)); NFSBCOPY(netprot, tl, strlen(netprot)); tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED); *tl++ = txdr_unsigned(strlen(addr)); NFSBCOPY(addr, tl, strlen(addr)); /* * Fill in the flex file addr (actually the ff_device_addr4 * as defined for Flexible File Layout) in XDR. */ addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) + 14 * NFSX_UNSIGNED; ds->nfsdev_flexaddrlen = addrlen; tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO); ds->nfsdev_flexaddr = (char *)tl; *tl++ = txdr_unsigned(1); /* One multipath entry. */ /* The netaddr for this one entry. */ *tl++ = txdr_unsigned(strlen(netprot)); NFSBCOPY(netprot, tl, strlen(netprot)); tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED); *tl++ = txdr_unsigned(strlen(addr)); NFSBCOPY(addr, tl, strlen(addr)); tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED); *tl++ = txdr_unsigned(2); /* Two NFS Versions. */ *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */ *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ *tl++ = newnfs_true; /* Tightly coupled. */ *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */ *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */ *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */ *tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */ *tl = newnfs_true; /* Tightly coupled. */ ds->nfsdev_hostnamelen = strlen(dnshost); ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE, M_WAITOK); NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1); } /* * Create the device id list. * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument * is misconfigured. */ int nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p) { struct nfsdevice *ds; char *addrp, *dnshostp, *dspathp, *mdspathp; int error, i; addrp = args->addr; dnshostp = args->dnshost; dspathp = args->dspath; mdspathp = args->mdspath; nfsrv_maxpnfsmirror = args->mirrorcnt; if (addrp == NULL || dnshostp == NULL || dspathp == NULL || mdspathp == NULL) return (0); /* * Loop around for each nul-terminated string in args->addr, * args->dnshost, args->dnspath and args->mdspath. */ while (addrp < (args->addr + args->addrlen) && dnshostp < (args->dnshost + args->dnshostlen) && dspathp < (args->dspath + args->dspathlen) && mdspathp < (args->mdspath + args->mdspathlen)) { error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds); if (error != 0) { /* Free all DS servers. */ nfsrv_freealldevids(); nfsrv_devidcnt = 0; return (ENXIO); } nfsrv_allocdevid(ds, addrp, dnshostp); addrp += (strlen(addrp) + 1); dnshostp += (strlen(dnshostp) + 1); dspathp += (strlen(dspathp) + 1); mdspathp += (strlen(mdspathp) + 1); } if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) { /* Free all DS servers. */ nfsrv_freealldevids(); nfsrv_devidcnt = 0; nfsrv_maxpnfsmirror = 1; return (ENXIO); } /* We can fail at most one less DS than the mirror level. */ nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1; /* * Allocate the nfslayout hash table now, since this is a pNFS server. * Make it 1% of the high water mark and at least 100. */ if (nfslayouthash == NULL) { nfsrv_layouthashsize = nfsrv_layouthighwater / 100; if (nfsrv_layouthashsize < 100) nfsrv_layouthashsize = 100; nfslayouthash = mallocarray(nfsrv_layouthashsize, sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_layouthashsize; i++) { mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF); TAILQ_INIT(&nfslayouthash[i].list); } } return (0); } /* * Free all device ids. */ static void nfsrv_freealldevids(void) { struct nfsdevice *ds, *nds; TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds) nfsrv_freedevid(ds); } /* * Check to see if there is a Read/Write Layout plus either: * - A Write Delegation * or * - An Open with Write_access. * Return 1 if this is the case and 0 otherwise. * This function is used by nfsrv_proxyds() to decide if doing a Proxy * Getattr RPC to the Data Server (DS) is necessary. */ #define NFSCLIDVECSIZE 6 APPLESTATIC int nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p) { fhandle_t fh, *tfhp; struct nfsstate *stp; struct nfslayout *lyp; struct nfslayouthash *lhyp; struct nfslockhashhead *hp; struct nfslockfile *lfp; nfsquad_t clid[NFSCLIDVECSIZE]; int clidcnt, ret; ret = nfsvno_getfh(vp, &fh, p); if (ret != 0) return (0); /* First check for a Read/Write Layout. */ clidcnt = 0; lhyp = NFSLAYOUTHASH(&fh); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH(lyp, &lhyp->list, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && ((lyp->lay_flags & NFSLAY_RW) != 0 || ((lyp->lay_flags & NFSLAY_READ) != 0 && nfsrv_pnfsatime != 0))) { if (clidcnt < NFSCLIDVECSIZE) clid[clidcnt].qval = lyp->lay_clientid.qval; clidcnt++; } } NFSUNLOCKLAYOUT(lhyp); if (clidcnt == 0) { /* None found, so return 0. */ return (0); } /* Get the nfslockfile for this fh. */ NFSLOCKSTATE(); hp = NFSLOCKHASH(&fh); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(&fh, tfhp)) break; } if (lfp == NULL) { /* None found, so return 0. */ NFSUNLOCKSTATE(); return (0); } /* Now, look for a Write delegation for this clientid. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 && nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0) break; } if (stp != NULL) { /* Found one, so return 1. */ NFSUNLOCKSTATE(); return (1); } /* No Write delegation, so look for an Open with Write_access. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0, ("nfsrv_checkdsattr: Non-open in Open list\n")); if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 && nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0) break; } NFSUNLOCKSTATE(); if (stp != NULL) return (1); return (0); } /* * Look for a matching clientid in the vector. Return 1 if one might match. */ static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt) { int i; /* If too many for the vector, return 1 since there might be a match. */ if (clidcnt > NFSCLIDVECSIZE) return (1); for (i = 0; i < clidcnt; i++) if (clidvec[i].qval == clid.qval) return (1); return (0); } /* * Check the don't list for "vp" and see if issuing an rw layout is allowed. * Return 1 if issuing an rw layout isn't allowed, 0 otherwise. */ static int nfsrv_dontlayout(fhandle_t *fhp) { struct nfsdontlist *mrp; int ret; if (nfsrv_dontlistlen == 0) return (0); ret = 0; NFSDDONTLISTLOCK(); LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) { if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 && (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) { ret = 1; break; } } NFSDDONTLISTUNLOCK(); return (ret); } #define PNFSDS_COPYSIZ 65536 /* * Create a new file on a DS and copy the contents of an extant DS file to it. * This can be used for recovery of a DS file onto a recovered DS. * The steps are: * - When called, the MDS file's vnode is locked, blocking LayoutGet operations. * - Disable issuing of read/write layouts for the file via the nfsdontlist, * so that they will be disabled after the MDS file's vnode is unlocked. * - Set up the nfsrv_recalllist so that recall of read/write layouts can * be done. * - Unlock the MDS file's vnode, so that the client(s) can perform proxied * writes, LayoutCommits and LayoutReturns for the file when completing the * LayoutReturn requested by the LayoutRecall callback. * - Issue a LayoutRecall callback for all read/write layouts and wait for * them to be returned. (If the LayoutRecall callback replies * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.) * - Exclusively lock the MDS file's vnode. This ensures that no proxied * writes are in progress or can occur during the DS file copy. * It also blocks Setattr operations. * - Create the file on the recovered mirror. * - Copy the file from the operational DS. * - Copy any ACL from the MDS file to the new DS file. * - Set the modify time of the new DS file to that of the MDS file. * - Update the extended attribute for the MDS file. * - Enable issuing of rw layouts by deleting the nfsdontlist entry. * - The caller will unlock the MDS file's vnode allowing operations * to continue normally, since it is now on the mirror again. */ int nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds, struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt, struct ucred *cred, NFSPROC_T *p) { struct nfsdontlist *mrp, *nmrp; struct nfslayouthash *lhyp; struct nfslayout *lyp, *nlyp; struct nfslayouthead thl; struct mount *mp, *tvmp; struct acl *aclp; struct vattr va; struct timespec mtime; fhandle_t fh; vnode_t tvp; off_t rdpos, wrpos; ssize_t aresid; char *dat; int didprintf, ret, retacl, xfer; ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp"); ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp"); /* * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag * so that no more RW layouts will get issued. */ ret = nfsvno_getfh(vp, &fh, p); if (ret != 0) { NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret); return (ret); } nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK); nmrp->nfsmr_flags = NFSMR_DONTLAYOUT; NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh)); NFSDDONTLISTLOCK(); LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) { if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0) break; } if (mrp == NULL) { LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list); mrp = nmrp; nmrp = NULL; nfsrv_dontlistlen++; NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n"); } else { NFSDDONTLISTUNLOCK(); free(nmrp, M_NFSDSTATE); NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n"); return (ENXIO); } NFSDDONTLISTUNLOCK(); /* * Search for all RW layouts for this file. Move them to the * recall list, so they can be recalled and their return noted. */ lhyp = NFSLAYOUTHASH(&fh); NFSDRECALLLOCK(); NFSLOCKLAYOUT(lhyp); TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && (lyp->lay_flags & NFSLAY_RW) != 0) { TAILQ_REMOVE(&lhyp->list, lyp, lay_list); TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list); lyp->lay_trycnt = 0; } } NFSUNLOCKLAYOUT(lhyp); NFSDRECALLUNLOCK(); ret = 0; mp = tvmp = NULL; didprintf = 0; TAILQ_INIT(&thl); /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */ NFSVOPUNLOCK(vp); /* Now, do a recall for all layouts not yet recalled. */ tryagain: NFSDRECALLLOCK(); TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 && (lyp->lay_flags & NFSLAY_RECALL) == 0) { lyp->lay_flags |= NFSLAY_RECALL; /* * The layout stateid.seqid needs to be incremented * before doing a LAYOUT_RECALL callback. */ if (++lyp->lay_stateid.seqid == 0) lyp->lay_stateid.seqid = 1; NFSDRECALLUNLOCK(); nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid, &lyp->lay_fh, lyp, 0, lyp->lay_type, p); NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n"); goto tryagain; } } /* Now wait for them to be returned. */ tryagain2: TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) { if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) { if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) { TAILQ_REMOVE(&nfsrv_recalllisthead, lyp, lay_list); TAILQ_INSERT_HEAD(&thl, lyp, lay_list); NFSD_DEBUG(4, "nfsrv_copymr: layout returned\n"); } else { lyp->lay_trycnt++; ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR, PVFS | PCATCH, "nfsmrl", hz); NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n", ret); if (ret == EINTR || ret == ERESTART) break; if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) { /* * Give up after 60sec and return * ENXIO, failing the copymr. * This layout will remain on the * recalllist. It can only be cleared * by restarting the nfsd. * This seems the safe way to handle * it, since it cannot be safely copied * with an outstanding RW layout. */ if (lyp->lay_trycnt >= 60) { ret = ENXIO; break; } if (didprintf == 0) { printf("nfsrv_copymr: layout " "not returned\n"); didprintf = 1; } } } goto tryagain2; } } NFSDRECALLUNLOCK(); /* We can now get rid of the layouts that have been returned. */ TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp) nfsrv_freelayout(&thl, lyp); /* * Do the vn_start_write() calls here, before the MDS vnode is * locked and the tvp is created (locked) in the NFS file system * that dvp is in. * For tvmp, this probably isn't necessary, since it will be an * NFS mount and they are not suspendable at this time. */ if (ret == 0) ret = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (ret == 0) { tvmp = dvp->v_mount; ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH); } /* * LK_EXCLUSIVE lock the MDS vnode, so that any * proxied writes through the MDS will be blocked until we have * completed the copy and update of the extended attributes. * This will also ensure that any attributes and ACL will not be * changed until the copy is complete. */ NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (ret == 0 && VN_IS_DOOMED(vp)) { NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n"); ret = ESTALE; } /* Create the data file on the recovered DS. */ if (ret == 0) ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp); /* Copy the DS file, if created successfully. */ if (ret == 0) { /* * Get any NFSv4 ACL on the MDS file, so that it can be set * on the new DS file. */ aclp = acl_alloc(M_WAITOK | M_ZERO); retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); if (retacl != 0 && retacl != ENOATTR) NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl); dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK); /* Malloc a block of 0s used to check for holes. */ if (nfsrv_zeropnfsdat == NULL) nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK | M_ZERO); rdpos = wrpos = 0; ret = VOP_GETATTR(fvp, &va, cred); aresid = 0; while (ret == 0 && aresid == 0) { ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ, rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, &aresid, p); xfer = PNFSDS_COPYSIZ - aresid; if (ret == 0 && xfer > 0) { rdpos += xfer; /* * Skip the write for holes, except for the * last block. */ if (xfer < PNFSDS_COPYSIZ || rdpos == va.va_size || NFSBCMP(dat, nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0) ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer, wrpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, NULL, p); if (ret == 0) wrpos += xfer; } } /* If there is an ACL and the copy succeeded, set the ACL. */ if (ret == 0 && retacl == 0) { ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p); /* * Don't consider these as errors, since VOP_GETACL() * can return an ACL when they are not actually * supported. For example, for UFS, VOP_GETACL() * will return a trivial ACL based on the uid/gid/mode * when there is no ACL on the file. * This case should be recognized as a trivial ACL * by UFS's VOP_SETACL() and succeed, but... */ if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM) ret = 0; } if (ret == 0) ret = VOP_FSYNC(tvp, MNT_WAIT, p); /* Set the DS data file's modify time that of the MDS file. */ if (ret == 0) ret = VOP_GETATTR(vp, &va, cred); if (ret == 0) { mtime = va.va_mtime; VATTR_NULL(&va); va.va_mtime = mtime; ret = VOP_SETATTR(tvp, &va, cred); } vput(tvp); acl_free(aclp); free(dat, M_TEMP); } if (tvmp != NULL) vn_finished_write(tvmp); /* Update the extended attributes for the newly created DS file. */ if (ret == 0) ret = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*wpf) * mirrorcnt, (char *)wpf, p); if (mp != NULL) vn_finished_write(mp); /* Get rid of the dontlist entry, so that Layouts can be issued. */ NFSDDONTLISTLOCK(); LIST_REMOVE(mrp, nfsmr_list); NFSDDONTLISTUNLOCK(); free(mrp, M_NFSDSTATE); return (ret); } /* * Create a data storage file on the recovered DS. */ static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf, vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p, vnode_t *tvpp) { struct vattr va, nva; int error; /* Make data file name based on FH. */ error = VOP_GETATTR(vp, &va, cred); if (error == 0) { /* Set the attributes for "vp" to Setattr the DS vp. */ VATTR_NULL(&nva); nva.va_uid = va.va_uid; nva.va_gid = va.va_gid; nva.va_mode = va.va_mode; nva.va_size = 0; VATTR_NULL(&va); va.va_type = VREG; va.va_mode = nva.va_mode; NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf); error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL, pf->dsf_filename, cred, p, tvpp); } return (error); } /* * Look up the MDS file shared locked, and then get the extended attribute * to find the extant DS file to be copied to the new mirror. * If successful, *vpp is set to the MDS file's vp and *nvpp is * set to a DS data file for the MDS file, both exclusively locked. * The "buf" argument has the pnfsdsfile structure from the MDS file * in it and buflen is set to its length. */ int nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf, int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp, struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp, struct nfsdevice **fdsp) { struct nameidata nd; struct vnode *vp, *curvp; struct pnfsdsfile *pf; struct nfsmount *nmp, *curnmp; int dsdir, error, mirrorcnt, ippos; vp = NULL; curvp = NULL; curnmp = NULL; *dsp = NULL; *fdsp = NULL; if (dspathp == NULL && curdspathp != NULL) return (EPERM); /* * Look up the MDS file shared locked. The lock will be upgraded * to an exclusive lock after any rw layouts have been returned. */ NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, mdspathp, p); error = namei(&nd); NFSD_DEBUG(4, "lookup=%d\n", error); if (error != 0) return (error); if (nd.ni_vp->v_type != VREG) { vput(nd.ni_vp); NFSD_DEBUG(4, "mdspath not reg\n"); return (EISDIR); } vp = nd.ni_vp; if (curdspathp != NULL) { /* * Look up the current DS path and find the nfsdev structure for * it. */ NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, curdspathp, p); error = namei(&nd); NFSD_DEBUG(4, "ds lookup=%d\n", error); if (error != 0) { vput(vp); return (error); } if (nd.ni_vp->v_type != VDIR) { vput(nd.ni_vp); vput(vp); NFSD_DEBUG(4, "curdspath not dir\n"); return (ENOTDIR); } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); vput(vp); NFSD_DEBUG(4, "curdspath not an NFS mount\n"); return (ENXIO); } curnmp = VFSTONFS(nd.ni_vp->v_mount); /* Search the nfsdev list for a match. */ NFSDDSLOCK(); *fdsp = nfsv4_findmirror(curnmp); NFSDDSUNLOCK(); if (*fdsp == NULL) curnmp = NULL; if (curnmp == NULL) { vput(nd.ni_vp); vput(vp); NFSD_DEBUG(4, "mdscopymr: no current ds\n"); return (ENXIO); } curvp = nd.ni_vp; } if (dspathp != NULL) { /* Look up the nfsdev path and find the nfsdev structure. */ NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, dspathp, p); error = namei(&nd); NFSD_DEBUG(4, "ds lookup=%d\n", error); if (error != 0) { vput(vp); if (curvp != NULL) vput(curvp); return (error); } if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) { vput(nd.ni_vp); vput(vp); if (curvp != NULL) vput(curvp); NFSD_DEBUG(4, "dspath not dir\n"); if (nd.ni_vp == curvp) return (EPERM); return (ENOTDIR); } if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) { vput(nd.ni_vp); vput(vp); if (curvp != NULL) vput(curvp); NFSD_DEBUG(4, "dspath not an NFS mount\n"); return (ENXIO); } nmp = VFSTONFS(nd.ni_vp->v_mount); /* * Search the nfsdevice list for a match. If curnmp == NULL, * this is a recovery and there must be a mirror. */ NFSDDSLOCK(); if (curnmp == NULL) *dsp = nfsrv_findmirroredds(nmp); else *dsp = nfsv4_findmirror(nmp); NFSDDSUNLOCK(); if (*dsp == NULL) { vput(nd.ni_vp); vput(vp); if (curvp != NULL) vput(curvp); NFSD_DEBUG(4, "mdscopymr: no ds\n"); return (ENXIO); } } else { nd.ni_vp = NULL; nmp = NULL; } /* * Get a vp for an available DS data file using the extended * attribute on the MDS file. * If there is a valid entry for the new DS in the extended attribute * on the MDS file (as checked via the nmp argument), * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur. */ error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p, NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir); if (curvp != NULL) vput(curvp); if (nd.ni_vp == NULL) { if (error == 0 && nmp != NULL) { /* Search the nfsdev list for a match. */ NFSDDSLOCK(); *dsp = nfsrv_findmirroredds(nmp); NFSDDSUNLOCK(); } if (error == 0 && (nmp == NULL || *dsp == NULL)) { if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } error = ENXIO; } } else vput(nd.ni_vp); /* * When dspathp != NULL and curdspathp == NULL, this is a recovery * and is only allowed if there is a 0.0.0.0 IP address entry. * When curdspathp != NULL, the ippos will be set to that entry. */ if (error == 0 && dspathp != NULL && ippos == -1) { if (nvpp != NULL && *nvpp != NULL) { vput(*nvpp); *nvpp = NULL; } error = ENXIO; } if (error == 0) { *vpp = vp; pf = (struct pnfsdsfile *)buf; if (ippos == -1) { /* If no zeroip pnfsdsfile, add one. */ ippos = *buflenp / sizeof(*pf); *buflenp += sizeof(*pf); pf += ippos; pf->dsf_dir = dsdir; strlcpy(pf->dsf_filename, fname, sizeof(pf->dsf_filename)); } else pf += ippos; *pfp = pf; } else vput(vp); return (error); } /* * Search for a matching pnfsd mirror device structure, base on the nmp arg. * Return one if found, NULL otherwise. */ static struct nfsdevice * nfsrv_findmirroredds(struct nfsmount *nmp) { struct nfsdevice *ds, *fndds; int fndmirror; mtx_assert(NFSDDSMUTEXPTR, MA_OWNED); /* * Search the DS server list for a match with nmp. * Remove the DS entry if found and there is a mirror. */ fndds = NULL; fndmirror = 0; if (nfsrv_devidcnt == 0) return (fndds); TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds->nfsdev_nmp == nmp) { NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n"); fndds = ds; break; } } if (fndds == NULL) return (fndds); if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0) fndmirror = 1; else if (fndds->nfsdev_mdsisset != 0) { /* For the fsid is set case, search for a mirror. */ TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) { if (ds != fndds && ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 && ds->nfsdev_mdsfsid.val[0] == fndds->nfsdev_mdsfsid.val[0] && ds->nfsdev_mdsfsid.val[1] == fndds->nfsdev_mdsfsid.val[1]) { fndmirror = 1; break; } } } if (fndmirror == 0) { NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n"); return (NULL); } return (fndds); } Index: projects/clang1000-import/sys/geom/geom_subr.c =================================================================== --- projects/clang1000-import/sys/geom/geom_subr.c (revision 357178) +++ projects/clang1000-import/sys/geom/geom_subr.c (revision 357179) @@ -1,1658 +1,1661 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Poul-Henning Kamp * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Poul-Henning Kamp * and NAI Labs, the Security Research Division of Network Associates, Inc. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the * DARPA CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #ifdef KDB #include #endif SDT_PROVIDER_DEFINE(geom); struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes); static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; struct g_hh00 { struct g_class *mp; struct g_provider *pp; off_t size; int error; int post; }; void g_dbg_printf(const char *classname, int lvl, struct bio *bp, const char *format, ...) { #ifndef PRINTF_BUFR_SIZE #define PRINTF_BUFR_SIZE 64 #endif char bufr[PRINTF_BUFR_SIZE]; struct sbuf sb, *sbp __unused; va_list ap; sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN); KASSERT(sbp != NULL, ("sbuf_new misused?")); sbuf_set_drain(&sb, sbuf_printf_drain, NULL); sbuf_cat(&sb, classname); if (lvl >= 0) sbuf_printf(&sb, "[%d]", lvl); va_start(ap, format); sbuf_vprintf(&sb, format, ap); va_end(ap); if (bp != NULL) { sbuf_putc(&sb, ' '); g_format_bio(&sb, bp); } /* Terminate the debug line with a single '\n'. */ sbuf_nl_terminate(&sb); /* Flush line to printf. */ sbuf_finish(&sb); sbuf_delete(&sb); } /* * This event offers a new class a chance to taste all preexisting providers. */ static void g_load_class(void *arg, int flag) { struct g_hh00 *hh; struct g_class *mp2, *mp; struct g_geom *gp; struct g_provider *pp; g_topology_assert(); if (flag == EV_CANCEL) /* XXX: can't happen ? */ return; if (g_shutdown) return; hh = arg; mp = hh->mp; hh->error = 0; if (hh->post) { g_free(hh); hh = NULL; } g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name); KASSERT(mp->name != NULL && *mp->name != '\0', ("GEOM class has no name")); LIST_FOREACH(mp2, &g_classes, class) { if (mp2 == mp) { printf("The GEOM class %s is already loaded.\n", mp2->name); if (hh != NULL) hh->error = EEXIST; return; } else if (strcmp(mp2->name, mp->name) == 0) { printf("A GEOM class %s is already loaded.\n", mp2->name); if (hh != NULL) hh->error = EEXIST; return; } } LIST_INIT(&mp->geom); LIST_INSERT_HEAD(&g_classes, mp, class); if (mp->init != NULL) mp->init(mp); if (mp->taste == NULL) return; LIST_FOREACH(mp2, &g_classes, class) { if (mp == mp2) continue; LIST_FOREACH(gp, &mp2->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { mp->taste(mp, pp, 0); g_topology_assert(); } } } } static int g_unload_class(struct g_class *mp) { struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp; int error; g_topology_lock(); g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name); retry: G_VALID_CLASS(mp); LIST_FOREACH(gp, &mp->geom, geom) { /* We refuse to unload if anything is open */ LIST_FOREACH(pp, &gp->provider, provider) if (pp->acr || pp->acw || pp->ace) { g_topology_unlock(); return (EBUSY); } LIST_FOREACH(cp, &gp->consumer, consumer) if (cp->acr || cp->acw || cp->ace) { g_topology_unlock(); return (EBUSY); } /* If the geom is withering, wait for it to finish. */ if (gp->flags & G_GEOM_WITHER) { g_topology_sleep(mp, 1); goto retry; } } /* * We allow unloading if we have no geoms, or a class * method we can use to get rid of them. */ if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) { g_topology_unlock(); return (EOPNOTSUPP); } /* Bar new entries */ mp->taste = NULL; mp->config = NULL; LIST_FOREACH(gp, &mp->geom, geom) { error = mp->destroy_geom(NULL, mp, gp); if (error != 0) { g_topology_unlock(); return (error); } } /* Wait for withering to finish. */ for (;;) { gp = LIST_FIRST(&mp->geom); if (gp == NULL) break; KASSERT(gp->flags & G_GEOM_WITHER, ("Non-withering geom in class %s", mp->name)); g_topology_sleep(mp, 1); } G_VALID_CLASS(mp); if (mp->fini != NULL) mp->fini(mp); LIST_REMOVE(mp, class); g_topology_unlock(); return (0); } int g_modevent(module_t mod, int type, void *data) { struct g_hh00 *hh; int error; static int g_ignition; struct g_class *mp; mp = data; if (mp->version != G_VERSION) { printf("GEOM class %s has Wrong version %x\n", mp->name, mp->version); return (EINVAL); } if (!g_ignition) { g_ignition++; g_init(); } error = EOPNOTSUPP; switch (type) { case MOD_LOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name); hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->mp = mp; /* * Once the system is not cold, MOD_LOAD calls will be * from the userland and the g_event thread will be able * to acknowledge their completion. */ if (cold) { hh->post = 1; error = g_post_event(g_load_class, hh, M_WAITOK, NULL); } else { error = g_waitfor_event(g_load_class, hh, M_WAITOK, NULL); if (error == 0) error = hh->error; g_free(hh); } break; case MOD_UNLOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", mp->name); error = g_unload_class(mp); if (error == 0) { KASSERT(LIST_EMPTY(&mp->geom), ("Unloaded class (%s) still has geom", mp->name)); } break; } return (error); } static void g_retaste_event(void *arg, int flag) { struct g_class *mp, *mp2; struct g_geom *gp; struct g_hh00 *hh; struct g_provider *pp; struct g_consumer *cp; g_topology_assert(); if (flag == EV_CANCEL) /* XXX: can't happen ? */ return; if (g_shutdown || g_notaste) return; hh = arg; mp = hh->mp; hh->error = 0; if (hh->post) { g_free(hh); hh = NULL; } g_trace(G_T_TOPOLOGY, "g_retaste(%s)", mp->name); LIST_FOREACH(mp2, &g_classes, class) { LIST_FOREACH(gp, &mp2->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (pp->acr || pp->acw || pp->ace) continue; LIST_FOREACH(cp, &pp->consumers, consumers) { if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; } if (cp != NULL) { cp->flags |= G_CF_ORPHAN; g_wither_geom(cp->geom, ENXIO); } mp->taste(mp, pp, 0); g_topology_assert(); } } } } int g_retaste(struct g_class *mp) { struct g_hh00 *hh; int error; if (mp->taste == NULL) return (EINVAL); hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->mp = mp; if (cold) { hh->post = 1; error = g_post_event(g_retaste_event, hh, M_WAITOK, NULL); } else { error = g_waitfor_event(g_retaste_event, hh, M_WAITOK, NULL); if (error == 0) error = hh->error; g_free(hh); } return (error); } struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...) { struct g_geom *gp; va_list ap; struct sbuf *sb; g_topology_assert(); G_VALID_CLASS(mp); sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO); gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO); gp->class = mp; gp->rank = 1; LIST_INIT(&gp->consumer); LIST_INIT(&gp->provider); LIST_INIT(&gp->aliases); LIST_INSERT_HEAD(&mp->geom, gp, geom); TAILQ_INSERT_HEAD(&geoms, gp, geoms); strcpy(gp->name, sbuf_data(sb)); sbuf_delete(sb); /* Fill in defaults from class */ gp->start = mp->start; gp->spoiled = mp->spoiled; gp->attrchanged = mp->attrchanged; gp->providergone = mp->providergone; gp->dumpconf = mp->dumpconf; gp->access = mp->access; gp->orphan = mp->orphan; gp->ioctl = mp->ioctl; gp->resize = mp->resize; return (gp); } void g_destroy_geom(struct g_geom *gp) { struct g_geom_alias *gap, *gaptmp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name); KASSERT(LIST_EMPTY(&gp->consumer), ("g_destroy_geom(%s) with consumer(s) [%p]", gp->name, LIST_FIRST(&gp->consumer))); KASSERT(LIST_EMPTY(&gp->provider), ("g_destroy_geom(%s) with provider(s) [%p]", gp->name, LIST_FIRST(&gp->provider))); g_cancel_event(gp); LIST_REMOVE(gp, geom); TAILQ_REMOVE(&geoms, gp, geoms); LIST_FOREACH_SAFE(gap, &gp->aliases, ga_next, gaptmp) g_free(gap); g_free(gp->name); g_free(gp); } /* * This function is called (repeatedly) until the geom has withered away. */ void g_wither_geom(struct g_geom *gp, int error) { struct g_provider *pp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name); if (!(gp->flags & G_GEOM_WITHER)) { gp->flags |= G_GEOM_WITHER; LIST_FOREACH(pp, &gp->provider, provider) if (!(pp->flags & G_PF_ORPHAN)) g_orphan_provider(pp, error); } g_do_wither(); } /* * Convenience function to destroy a particular provider. */ void g_wither_provider(struct g_provider *pp, int error) { pp->flags |= G_PF_WITHER; if (!(pp->flags & G_PF_ORPHAN)) g_orphan_provider(pp, error); } /* * This function is called (repeatedly) until the has withered away. */ void g_wither_geom_close(struct g_geom *gp, int error) { struct g_consumer *cp; g_topology_assert(); G_VALID_GEOM(gp); g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name); LIST_FOREACH(cp, &gp->consumer, consumer) if (cp->acr || cp->acw || cp->ace) g_access(cp, -cp->acr, -cp->acw, -cp->ace); g_wither_geom(gp, error); } /* * This function is called (repeatedly) until we cant wash away more * withered bits at present. */ void g_wither_washer() { struct g_class *mp; struct g_geom *gp, *gp2; struct g_provider *pp, *pp2; struct g_consumer *cp, *cp2; g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) { if (!(pp->flags & G_PF_WITHER)) continue; if (LIST_EMPTY(&pp->consumers)) g_destroy_provider(pp); } if (!(gp->flags & G_GEOM_WITHER)) continue; LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) { if (LIST_EMPTY(&pp->consumers)) g_destroy_provider(pp); } LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) { if (cp->acr || cp->acw || cp->ace) continue; if (cp->provider != NULL) g_detach(cp); g_destroy_consumer(cp); } if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) g_destroy_geom(gp); } } } struct g_consumer * g_new_consumer(struct g_geom *gp) { struct g_consumer *cp; g_topology_assert(); G_VALID_GEOM(gp); KASSERT(!(gp->flags & G_GEOM_WITHER), ("g_new_consumer on WITHERing geom(%s) (class %s)", gp->name, gp->class->name)); KASSERT(gp->orphan != NULL, ("g_new_consumer on geom(%s) (class %s) without orphan", gp->name, gp->class->name)); cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); cp->geom = gp; cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); LIST_INSERT_HEAD(&gp->consumer, cp, consumer); return(cp); } void g_destroy_consumer(struct g_consumer *cp) { struct g_geom *gp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp); KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached")); KASSERT (cp->acr == 0, ("g_destroy_consumer with acr")); KASSERT (cp->acw == 0, ("g_destroy_consumer with acw")); KASSERT (cp->ace == 0, ("g_destroy_consumer with ace")); g_cancel_event(cp); gp = cp->geom; LIST_REMOVE(cp, consumer); devstat_remove_entry(cp->stat); g_free(cp); if (gp->flags & G_GEOM_WITHER) g_do_wither(); } static void g_new_provider_event(void *arg, int flag) { struct g_class *mp; struct g_provider *pp; struct g_consumer *cp, *next_cp; g_topology_assert(); if (flag == EV_CANCEL) return; if (g_shutdown) return; pp = arg; G_VALID_PROVIDER(pp); KASSERT(!(pp->flags & G_PF_WITHER), ("g_new_provider_event but withered")); LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, next_cp) { if ((cp->flags & G_CF_ORPHAN) == 0 && cp->geom->attrchanged != NULL) cp->geom->attrchanged(cp, "GEOM::media"); } if (g_notaste) return; LIST_FOREACH(mp, &g_classes, class) { if (mp->taste == NULL) continue; LIST_FOREACH(cp, &pp->consumers, consumers) if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; if (cp != NULL) continue; mp->taste(mp, pp, 0); g_topology_assert(); } } struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...) { struct g_provider *pp; struct sbuf *sb; va_list ap; g_topology_assert(); G_VALID_GEOM(gp); KASSERT(gp->access != NULL, ("new provider on geom(%s) without ->access (class %s)", gp->name, gp->class->name)); KASSERT(gp->start != NULL, ("new provider on geom(%s) without ->start (class %s)", gp->name, gp->class->name)); KASSERT(!(gp->flags & G_GEOM_WITHER), ("new provider on WITHERing geom(%s) (class %s)", gp->name, gp->class->name)); sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); pp->name = (char *)(pp + 1); strcpy(pp->name, sbuf_data(sb)); sbuf_delete(sb); LIST_INIT(&pp->consumers); pp->error = ENXIO; pp->geom = gp; pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); LIST_INSERT_HEAD(&gp->provider, pp, provider); g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL); return (pp); } void g_error_provider(struct g_provider *pp, int error) { /* G_VALID_PROVIDER(pp); We may not have g_topology */ pp->error = error; } static void g_resize_provider_event(void *arg, int flag) { struct g_hh00 *hh; struct g_class *mp; struct g_geom *gp; struct g_provider *pp; struct g_consumer *cp, *cp2; off_t size; g_topology_assert(); if (g_shutdown) return; hh = arg; pp = hh->pp; size = hh->size; g_free(hh); G_VALID_PROVIDER(pp); KASSERT(!(pp->flags & G_PF_WITHER), ("g_resize_provider_event but withered")); g_trace(G_T_TOPOLOGY, "g_resize_provider_event(%p)", pp); LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) { gp = cp->geom; if (gp->resize == NULL && size < pp->mediasize) { /* * XXX: g_dev_orphan method does deferred destroying * and it is possible, that other event could already * call the orphan method. Check consumer's flags to * do not schedule it twice. */ if (cp->flags & G_CF_ORPHAN) continue; cp->flags |= G_CF_ORPHAN; cp->geom->orphan(cp); } } pp->mediasize = size; LIST_FOREACH_SAFE(cp, &pp->consumers, consumers, cp2) { gp = cp->geom; if ((gp->flags & G_GEOM_WITHER) == 0 && gp->resize != NULL) gp->resize(cp); } /* * After resizing, the previously invalid GEOM class metadata * might become valid. This means we should retaste. */ LIST_FOREACH(mp, &g_classes, class) { if (mp->taste == NULL) continue; LIST_FOREACH(cp, &pp->consumers, consumers) if (cp->geom->class == mp && (cp->flags & G_CF_ORPHAN) == 0) break; if (cp != NULL) continue; mp->taste(mp, pp, 0); g_topology_assert(); } } void g_resize_provider(struct g_provider *pp, off_t size) { struct g_hh00 *hh; G_VALID_PROVIDER(pp); if (pp->flags & G_PF_WITHER) return; if (size == pp->mediasize) return; hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); hh->pp = pp; hh->size = size; g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL); } #ifndef _PATH_DEV #define _PATH_DEV "/dev/" #endif struct g_provider * g_provider_by_name(char const *arg) { struct g_class *cp; struct g_geom *gp; struct g_provider *pp, *wpp; if (strncmp(arg, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) arg += sizeof(_PATH_DEV) - 1; wpp = NULL; LIST_FOREACH(cp, &g_classes, class) { LIST_FOREACH(gp, &cp->geom, geom) { LIST_FOREACH(pp, &gp->provider, provider) { if (strcmp(arg, pp->name) != 0) continue; if ((gp->flags & G_GEOM_WITHER) == 0 && (pp->flags & G_PF_WITHER) == 0) return (pp); else wpp = pp; } } } return (wpp); } void g_destroy_provider(struct g_provider *pp) { struct g_geom *gp; g_topology_assert(); G_VALID_PROVIDER(pp); KASSERT(LIST_EMPTY(&pp->consumers), ("g_destroy_provider but attached")); KASSERT (pp->acr == 0, ("g_destroy_provider with acr")); KASSERT (pp->acw == 0, ("g_destroy_provider with acw")); KASSERT (pp->ace == 0, ("g_destroy_provider with ace")); g_cancel_event(pp); LIST_REMOVE(pp, provider); gp = pp->geom; devstat_remove_entry(pp->stat); /* * If a callback was provided, send notification that the provider * is now gone. */ if (gp->providergone != NULL) gp->providergone(pp); g_free(pp); if ((gp->flags & G_GEOM_WITHER)) g_do_wither(); } /* * We keep the "geoms" list sorted by topological order (== increasing * numerical rank) at all times. * When an attach is done, the attaching geoms rank is invalidated * and it is moved to the tail of the list. * All geoms later in the sequence has their ranks reevaluated in * sequence. If we cannot assign rank to a geom because it's * prerequisites do not have rank, we move that element to the tail * of the sequence with invalid rank as well. * At some point we encounter our original geom and if we stil fail * to assign it a rank, there must be a loop and we fail back to * g_attach() which detach again and calls redo_rank again * to fix up the damage. * It would be much simpler code wise to do it recursively, but we * can't risk that on the kernel stack. */ static int redo_rank(struct g_geom *gp) { struct g_consumer *cp; struct g_geom *gp1, *gp2; int n, m; g_topology_assert(); G_VALID_GEOM(gp); /* Invalidate this geoms rank and move it to the tail */ gp1 = TAILQ_NEXT(gp, geoms); if (gp1 != NULL) { gp->rank = 0; TAILQ_REMOVE(&geoms, gp, geoms); TAILQ_INSERT_TAIL(&geoms, gp, geoms); } else { gp1 = gp; } /* re-rank the rest of the sequence */ for (; gp1 != NULL; gp1 = gp2) { gp1->rank = 0; m = 1; LIST_FOREACH(cp, &gp1->consumer, consumer) { if (cp->provider == NULL) continue; n = cp->provider->geom->rank; if (n == 0) { m = 0; break; } else if (n >= m) m = n + 1; } gp1->rank = m; gp2 = TAILQ_NEXT(gp1, geoms); /* got a rank, moving on */ if (m != 0) continue; /* no rank to original geom means loop */ if (gp == gp1) return (ELOOP); /* no rank, put it at the end move on */ TAILQ_REMOVE(&geoms, gp1, geoms); TAILQ_INSERT_TAIL(&geoms, gp1, geoms); } return (0); } int g_attach(struct g_consumer *cp, struct g_provider *pp) { int error; g_topology_assert(); G_VALID_CONSUMER(cp); G_VALID_PROVIDER(pp); g_trace(G_T_TOPOLOGY, "g_attach(%p, %p)", cp, pp); KASSERT(cp->provider == NULL, ("attach but attached")); cp->provider = pp; cp->flags &= ~G_CF_ORPHAN; LIST_INSERT_HEAD(&pp->consumers, cp, consumers); error = redo_rank(cp->geom); if (error) { LIST_REMOVE(cp, consumers); cp->provider = NULL; redo_rank(cp->geom); } return (error); } void g_detach(struct g_consumer *cp) { struct g_provider *pp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp); KASSERT(cp->provider != NULL, ("detach but not attached")); KASSERT(cp->acr == 0, ("detach but nonzero acr")); KASSERT(cp->acw == 0, ("detach but nonzero acw")); KASSERT(cp->ace == 0, ("detach but nonzero ace")); KASSERT(cp->nstart == cp->nend, ("detach with active requests")); pp = cp->provider; LIST_REMOVE(cp, consumers); cp->provider = NULL; if ((cp->geom->flags & G_GEOM_WITHER) || (pp->geom->flags & G_GEOM_WITHER) || (pp->flags & G_PF_WITHER)) g_do_wither(); redo_rank(cp->geom); } /* * g_access() * * Access-check with delta values. The question asked is "can provider * "cp" change the access counters by the relative amounts dc[rwe] ?" */ int g_access(struct g_consumer *cp, int dcr, int dcw, int dce) { struct g_provider *pp; struct g_geom *gp; int pw, pe; #ifdef INVARIANTS int sr, sw, se; #endif int error; g_topology_assert(); G_VALID_CONSUMER(cp); pp = cp->provider; KASSERT(pp != NULL, ("access but not attached")); G_VALID_PROVIDER(pp); gp = pp->geom; g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)", cp, pp->name, dcr, dcw, dce); KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr")); KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw")); KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace")); KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request")); KASSERT(cp->acr + dcr != 0 || cp->acw + dcw != 0 || cp->ace + dce != 0 || cp->nstart == cp->nend, ("Last close with active requests")); KASSERT(gp->access != NULL, ("NULL geom->access")); /* * If our class cares about being spoiled, and we have been, we * are probably just ahead of the event telling us that. Fail * now rather than having to unravel this later. */ if (cp->geom->spoiled != NULL && (cp->flags & G_CF_SPOILED) && (dcr > 0 || dcw > 0 || dce > 0)) return (ENXIO); /* * A number of GEOM classes either need to perform an I/O on the first * open or to acquire a different subsystem's lock. To do that they * may have to drop the topology lock. * Other GEOM classes perform special actions when opening a lower rank * geom for the first time. As a result, more than one thread may * end up performing the special actions. * So, we prevent concurrent "first" opens by marking the consumer with * special flag. * * Note that if the geom's access method never drops the topology lock, * then we will never see G_GEOM_IN_ACCESS here. */ while ((gp->flags & G_GEOM_IN_ACCESS) != 0) { g_trace(G_T_ACCESS, "%s: race on geom %s via provider %s and consumer of %s", __func__, gp->name, pp->name, cp->geom->name); gp->flags |= G_GEOM_ACCESS_WAIT; g_topology_sleep(gp, 0); } /* * Figure out what counts the provider would have had, if this * consumer had (r0w0e0) at this time. */ pw = pp->acw - cp->acw; pe = pp->ace - cp->ace; g_trace(G_T_ACCESS, "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)", dcr, dcw, dce, cp->acr, cp->acw, cp->ace, pp->acr, pp->acw, pp->ace, pp, pp->name); /* If foot-shooting is enabled, any open on rank#1 is OK */ if ((g_debugflags & G_F_FOOTSHOOTING) && gp->rank == 1) ; /* If we try exclusive but already write: fail */ else if (dce > 0 && pw > 0) return (EPERM); /* If we try write but already exclusive: fail */ else if (dcw > 0 && pe > 0) return (EPERM); /* If we try to open more but provider is error'ed: fail */ else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) { printf("%s(%d): provider %s has error %d set\n", __func__, __LINE__, pp->name, pp->error); return (pp->error); } /* Ok then... */ #ifdef INVARIANTS sr = cp->acr; sw = cp->acw; se = cp->ace; #endif gp->flags |= G_GEOM_IN_ACCESS; error = gp->access(pp, dcr, dcw, dce); KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0, ("Geom provider %s::%s dcr=%d dcw=%d dce=%d error=%d failed " "closing ->access()", gp->class->name, pp->name, dcr, dcw, dce, error)); g_topology_assert(); gp->flags &= ~G_GEOM_IN_ACCESS; KASSERT(cp->acr == sr && cp->acw == sw && cp->ace == se, ("Access counts changed during geom->access")); if ((gp->flags & G_GEOM_ACCESS_WAIT) != 0) { gp->flags &= ~G_GEOM_ACCESS_WAIT; wakeup(gp); } if (!error) { /* * If we open first write, spoil any partner consumers. * If we close last write and provider is not errored, * trigger re-taste. */ if (pp->acw == 0 && dcw != 0) g_spoil(pp, cp); else if (pp->acw != 0 && pp->acw == -dcw && pp->error == 0 && !(gp->flags & G_GEOM_WITHER)) g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL); pp->acr += dcr; pp->acw += dcw; pp->ace += dce; cp->acr += dcr; cp->acw += dcw; cp->ace += dce; if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) KASSERT(pp->sectorsize > 0, ("Provider %s lacks sectorsize", pp->name)); if ((cp->geom->flags & G_GEOM_WITHER) && cp->acr == 0 && cp->acw == 0 && cp->ace == 0) g_do_wither(); } return (error); } int g_handleattr_int(struct bio *bp, const char *attribute, int val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val) { return (g_handleattr(bp, attribute, &val, sizeof val)); } int g_handleattr_str(struct bio *bp, const char *attribute, const char *str) { return (g_handleattr(bp, attribute, str, 0)); } int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len) { int error = 0; if (strcmp(bp->bio_attribute, attribute)) return (0); if (len == 0) { bzero(bp->bio_data, bp->bio_length); if (strlcpy(bp->bio_data, val, bp->bio_length) >= bp->bio_length) { printf("%s: %s %s bio_length %jd strlen %zu -> EFAULT\n", __func__, bp->bio_to->name, attribute, (intmax_t)bp->bio_length, strlen(val)); error = EFAULT; } } else if (bp->bio_length == len) { bcopy(val, bp->bio_data, len); } else { printf("%s: %s %s bio_length %jd len %d -> EFAULT\n", __func__, bp->bio_to->name, attribute, (intmax_t)bp->bio_length, len); error = EFAULT; } if (error == 0) bp->bio_completed = bp->bio_length; g_io_deliver(bp, error); return (1); } int g_std_access(struct g_provider *pp, int dr __unused, int dw __unused, int de __unused) { g_topology_assert(); G_VALID_PROVIDER(pp); return (0); } void g_std_done(struct bio *bp) { struct bio *bp2; bp2 = bp->bio_parent; if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; g_destroy_bio(bp); bp2->bio_inbed++; - if (bp2->bio_children == bp2->bio_inbed) + if (bp2->bio_children == bp2->bio_inbed) { + if (bp2->bio_cmd == BIO_SPEEDUP) + bp2->bio_completed = bp2->bio_length; g_io_deliver(bp2, bp2->bio_error); + } } /* XXX: maybe this is only g_slice_spoiled */ void g_std_spoiled(struct g_consumer *cp) { struct g_geom *gp; struct g_provider *pp; g_topology_assert(); G_VALID_CONSUMER(cp); g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp); cp->flags |= G_CF_ORPHAN; g_detach(cp); gp = cp->geom; LIST_FOREACH(pp, &gp->provider, provider) g_orphan_provider(pp, ENXIO); g_destroy_consumer(cp); if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) g_destroy_geom(gp); else gp->flags |= G_GEOM_WITHER; } /* * Spoiling happens when a provider is opened for writing, but consumers * which are configured by in-band data are attached (slicers for instance). * Since the write might potentially change the in-band data, such consumers * need to re-evaluate their existence after the writing session closes. * We do this by (offering to) tear them down when the open for write happens * in return for a re-taste when it closes again. * Together with the fact that such consumers grab an 'e' bit whenever they * are open, regardless of mode, this ends up DTRT. */ static void g_spoil_event(void *arg, int flag) { struct g_provider *pp; struct g_consumer *cp, *cp2; g_topology_assert(); if (flag == EV_CANCEL) return; pp = arg; G_VALID_PROVIDER(pp); g_trace(G_T_TOPOLOGY, "%s %p(%s:%s:%s)", __func__, pp, pp->geom->class->name, pp->geom->name, pp->name); for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) { cp2 = LIST_NEXT(cp, consumers); if ((cp->flags & G_CF_SPOILED) == 0) continue; cp->flags &= ~G_CF_SPOILED; if (cp->geom->spoiled == NULL) continue; cp->geom->spoiled(cp); g_topology_assert(); } } void g_spoil(struct g_provider *pp, struct g_consumer *cp) { struct g_consumer *cp2; g_topology_assert(); G_VALID_PROVIDER(pp); G_VALID_CONSUMER(cp); LIST_FOREACH(cp2, &pp->consumers, consumers) { if (cp2 == cp) continue; /* KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr)); KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw)); */ KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace)); cp2->flags |= G_CF_SPOILED; } g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL); } static void g_media_changed_event(void *arg, int flag) { struct g_provider *pp; int retaste; g_topology_assert(); if (flag == EV_CANCEL) return; pp = arg; G_VALID_PROVIDER(pp); /* * If provider was not open for writing, queue retaste after spoiling. * If it was, retaste will happen automatically on close. */ retaste = (pp->acw == 0 && pp->error == 0 && !(pp->geom->flags & G_GEOM_WITHER)); g_spoil_event(arg, flag); if (retaste) g_post_event(g_new_provider_event, pp, M_WAITOK, pp, NULL); } int g_media_changed(struct g_provider *pp, int flag) { struct g_consumer *cp; LIST_FOREACH(cp, &pp->consumers, consumers) cp->flags |= G_CF_SPOILED; return (g_post_event(g_media_changed_event, pp, flag, pp, NULL)); } int g_media_gone(struct g_provider *pp, int flag) { struct g_consumer *cp; LIST_FOREACH(cp, &pp->consumers, consumers) cp->flags |= G_CF_SPOILED; return (g_post_event(g_spoil_event, pp, flag, pp, NULL)); } int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len) { int error, i; i = len; error = g_io_getattr(attr, cp, &i, var); if (error) return (error); if (i != len) return (EINVAL); return (0); } static int g_get_device_prefix_len(const char *name) { int len; if (strncmp(name, "ada", 3) == 0) len = 3; else if (strncmp(name, "ad", 2) == 0) len = 2; else return (0); if (name[len] < '0' || name[len] > '9') return (0); do { len++; } while (name[len] >= '0' && name[len] <= '9'); return (len); } int g_compare_names(const char *namea, const char *nameb) { int deva, devb; if (strcmp(namea, nameb) == 0) return (1); deva = g_get_device_prefix_len(namea); if (deva == 0) return (0); devb = g_get_device_prefix_len(nameb); if (devb == 0) return (0); if (strcmp(namea + deva, nameb + devb) == 0) return (1); return (0); } void g_geom_add_alias(struct g_geom *gp, const char *alias) { struct g_geom_alias *gap; gap = (struct g_geom_alias *)g_malloc( sizeof(struct g_geom_alias) + strlen(alias) + 1, M_WAITOK); strcpy((char *)(gap + 1), alias); gap->ga_alias = (const char *)(gap + 1); LIST_INSERT_HEAD(&gp->aliases, gap, ga_next); } #if defined(DIAGNOSTIC) || defined(DDB) /* * This function walks the mesh and returns a non-zero integer if it * finds the argument pointer is an object. The return value indicates * which type of object it is believed to be. If topology is not locked, * this function is potentially dangerous, but we don't assert that the * topology lock is held when called from debugger. */ int g_valid_obj(void const *ptr) { struct g_class *mp; struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp; #ifdef KDB if (kdb_active == 0) #endif g_topology_assert(); LIST_FOREACH(mp, &g_classes, class) { if (ptr == mp) return (1); LIST_FOREACH(gp, &mp->geom, geom) { if (ptr == gp) return (2); LIST_FOREACH(cp, &gp->consumer, consumer) if (ptr == cp) return (3); LIST_FOREACH(pp, &gp->provider, provider) if (ptr == pp) return (4); } } return(0); } #endif #ifdef DDB #define gprintf(...) do { \ db_printf("%*s", indent, ""); \ db_printf(__VA_ARGS__); \ } while (0) #define gprintln(...) do { \ gprintf(__VA_ARGS__); \ db_printf("\n"); \ } while (0) #define ADDFLAG(obj, flag, sflag) do { \ if ((obj)->flags & (flag)) { \ if (comma) \ strlcat(str, ",", size); \ strlcat(str, (sflag), size); \ comma = 1; \ } \ } while (0) static char * provider_flags_to_string(struct g_provider *pp, char *str, size_t size) { int comma = 0; bzero(str, size); if (pp->flags == 0) { strlcpy(str, "NONE", size); return (str); } ADDFLAG(pp, G_PF_WITHER, "G_PF_WITHER"); ADDFLAG(pp, G_PF_ORPHAN, "G_PF_ORPHAN"); return (str); } static char * geom_flags_to_string(struct g_geom *gp, char *str, size_t size) { int comma = 0; bzero(str, size); if (gp->flags == 0) { strlcpy(str, "NONE", size); return (str); } ADDFLAG(gp, G_GEOM_WITHER, "G_GEOM_WITHER"); return (str); } static void db_show_geom_consumer(int indent, struct g_consumer *cp) { if (indent == 0) { gprintln("consumer: %p", cp); gprintln(" class: %s (%p)", cp->geom->class->name, cp->geom->class); gprintln(" geom: %s (%p)", cp->geom->name, cp->geom); if (cp->provider == NULL) gprintln(" provider: none"); else { gprintln(" provider: %s (%p)", cp->provider->name, cp->provider); } gprintln(" access: r%dw%de%d", cp->acr, cp->acw, cp->ace); gprintln(" flags: 0x%04x", cp->flags); #ifdef INVARIANTS gprintln(" nstart: %u", cp->nstart); gprintln(" nend: %u", cp->nend); #endif } else { gprintf("consumer: %p (%s), access=r%dw%de%d", cp, cp->provider != NULL ? cp->provider->name : "none", cp->acr, cp->acw, cp->ace); if (cp->flags) db_printf(", flags=0x%04x", cp->flags); db_printf("\n"); } } static void db_show_geom_provider(int indent, struct g_provider *pp) { struct g_consumer *cp; char flags[64]; if (indent == 0) { gprintln("provider: %s (%p)", pp->name, pp); gprintln(" class: %s (%p)", pp->geom->class->name, pp->geom->class); gprintln(" geom: %s (%p)", pp->geom->name, pp->geom); gprintln(" mediasize: %jd", (intmax_t)pp->mediasize); gprintln(" sectorsize: %u", pp->sectorsize); gprintln(" stripesize: %ju", (uintmax_t)pp->stripesize); gprintln(" stripeoffset: %ju", (uintmax_t)pp->stripeoffset); gprintln(" access: r%dw%de%d", pp->acr, pp->acw, pp->ace); gprintln(" flags: %s (0x%04x)", provider_flags_to_string(pp, flags, sizeof(flags)), pp->flags); gprintln(" error: %d", pp->error); if (LIST_EMPTY(&pp->consumers)) gprintln(" consumers: none"); } else { gprintf("provider: %s (%p), access=r%dw%de%d", pp->name, pp, pp->acr, pp->acw, pp->ace); if (pp->flags != 0) { db_printf(", flags=%s (0x%04x)", provider_flags_to_string(pp, flags, sizeof(flags)), pp->flags); } db_printf("\n"); } if (!LIST_EMPTY(&pp->consumers)) { LIST_FOREACH(cp, &pp->consumers, consumers) { db_show_geom_consumer(indent + 2, cp); if (db_pager_quit) break; } } } static void db_show_geom_geom(int indent, struct g_geom *gp) { struct g_provider *pp; struct g_consumer *cp; char flags[64]; if (indent == 0) { gprintln("geom: %s (%p)", gp->name, gp); gprintln(" class: %s (%p)", gp->class->name, gp->class); gprintln(" flags: %s (0x%04x)", geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags); gprintln(" rank: %d", gp->rank); if (LIST_EMPTY(&gp->provider)) gprintln(" providers: none"); if (LIST_EMPTY(&gp->consumer)) gprintln(" consumers: none"); } else { gprintf("geom: %s (%p), rank=%d", gp->name, gp, gp->rank); if (gp->flags != 0) { db_printf(", flags=%s (0x%04x)", geom_flags_to_string(gp, flags, sizeof(flags)), gp->flags); } db_printf("\n"); } if (!LIST_EMPTY(&gp->provider)) { LIST_FOREACH(pp, &gp->provider, provider) { db_show_geom_provider(indent + 2, pp); if (db_pager_quit) break; } } if (!LIST_EMPTY(&gp->consumer)) { LIST_FOREACH(cp, &gp->consumer, consumer) { db_show_geom_consumer(indent + 2, cp); if (db_pager_quit) break; } } } static void db_show_geom_class(struct g_class *mp) { struct g_geom *gp; db_printf("class: %s (%p)\n", mp->name, mp); LIST_FOREACH(gp, &mp->geom, geom) { db_show_geom_geom(2, gp); if (db_pager_quit) break; } } /* * Print the GEOM topology or the given object. */ DB_SHOW_COMMAND(geom, db_show_geom) { struct g_class *mp; if (!have_addr) { /* No address given, print the entire topology. */ LIST_FOREACH(mp, &g_classes, class) { db_show_geom_class(mp); db_printf("\n"); if (db_pager_quit) break; } } else { switch (g_valid_obj((void *)addr)) { case 1: db_show_geom_class((struct g_class *)addr); break; case 2: db_show_geom_geom(0, (struct g_geom *)addr); break; case 3: db_show_geom_consumer(0, (struct g_consumer *)addr); break; case 4: db_show_geom_provider(0, (struct g_provider *)addr); break; default: db_printf("Not a GEOM object.\n"); break; } } } static void db_print_bio_cmd(struct bio *bp) { db_printf(" cmd: "); switch (bp->bio_cmd) { case BIO_READ: db_printf("BIO_READ"); break; case BIO_WRITE: db_printf("BIO_WRITE"); break; case BIO_DELETE: db_printf("BIO_DELETE"); break; case BIO_GETATTR: db_printf("BIO_GETATTR"); break; case BIO_FLUSH: db_printf("BIO_FLUSH"); break; case BIO_CMD0: db_printf("BIO_CMD0"); break; case BIO_CMD1: db_printf("BIO_CMD1"); break; case BIO_CMD2: db_printf("BIO_CMD2"); break; case BIO_ZONE: db_printf("BIO_ZONE"); break; default: db_printf("UNKNOWN"); break; } db_printf("\n"); } static void db_print_bio_flags(struct bio *bp) { int comma; comma = 0; db_printf(" flags: "); if (bp->bio_flags & BIO_ERROR) { db_printf("BIO_ERROR"); comma = 1; } if (bp->bio_flags & BIO_DONE) { db_printf("%sBIO_DONE", (comma ? ", " : "")); comma = 1; } if (bp->bio_flags & BIO_ONQUEUE) db_printf("%sBIO_ONQUEUE", (comma ? ", " : "")); db_printf("\n"); } /* * Print useful information in a BIO */ DB_SHOW_COMMAND(bio, db_show_bio) { struct bio *bp; if (have_addr) { bp = (struct bio *)addr; db_printf("BIO %p\n", bp); db_print_bio_cmd(bp); db_print_bio_flags(bp); db_printf(" cflags: 0x%hx\n", bp->bio_cflags); db_printf(" pflags: 0x%hx\n", bp->bio_pflags); db_printf(" offset: %jd\n", (intmax_t)bp->bio_offset); db_printf(" length: %jd\n", (intmax_t)bp->bio_length); db_printf(" bcount: %ld\n", bp->bio_bcount); db_printf(" resid: %ld\n", bp->bio_resid); db_printf(" completed: %jd\n", (intmax_t)bp->bio_completed); db_printf(" children: %u\n", bp->bio_children); db_printf(" inbed: %u\n", bp->bio_inbed); db_printf(" error: %d\n", bp->bio_error); db_printf(" parent: %p\n", bp->bio_parent); db_printf(" driver1: %p\n", bp->bio_driver1); db_printf(" driver2: %p\n", bp->bio_driver2); db_printf(" caller1: %p\n", bp->bio_caller1); db_printf(" caller2: %p\n", bp->bio_caller2); db_printf(" bio_from: %p\n", bp->bio_from); db_printf(" bio_to: %p\n", bp->bio_to); #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) db_printf(" bio_track_bp: %p\n", bp->bio_track_bp); #endif } } #undef gprintf #undef gprintln #undef ADDFLAG #endif /* DDB */ Index: projects/clang1000-import/sys/geom/stripe/g_stripe.c =================================================================== --- projects/clang1000-import/sys/geom/stripe/g_stripe.c (revision 357178) +++ projects/clang1000-import/sys/geom/stripe/g_stripe.c (revision 357179) @@ -1,1276 +1,1278 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2005 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_stripe, "GEOM striping support"); static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data"); static uma_zone_t g_stripe_zone; static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static g_taste_t g_stripe_taste; static g_ctl_req_t g_stripe_config; static g_dumpconf_t g_stripe_dumpconf; static g_init_t g_stripe_init; static g_fini_t g_stripe_fini; struct g_class g_stripe_class = { .name = G_STRIPE_CLASS_NAME, .version = G_VERSION, .ctlreq = g_stripe_config, .taste = g_stripe_taste, .destroy_geom = g_stripe_destroy_geom, .init = g_stripe_init, .fini = g_stripe_fini }; SYSCTL_DECL(_kern_geom); static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff"); static u_int g_stripe_debug = 0; SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0, "Debug level"); static int g_stripe_fast = 0; static int g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) { int error, fast; fast = g_stripe_fast; error = sysctl_handle_int(oidp, &fast, 0, req); if (error == 0 && req->newptr != NULL) g_stripe_fast = fast; return (error); } SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN, NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode"); static u_int g_stripe_maxmem = MAXPHYS * 100; SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem, 0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)"); static u_int g_stripe_fast_failed = 0; SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD, &g_stripe_fast_failed, 0, "How many times \"fast\" mode failed"); /* * Greatest Common Divisor. */ static u_int gcd(u_int a, u_int b) { u_int c; while (b != 0) { c = a; a = b; b = (c % b); } return (a); } /* * Least Common Multiple. */ static u_int lcm(u_int a, u_int b) { return ((a * b) / gcd(a, b)); } static void g_stripe_init(struct g_class *mp __unused) { g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL, NULL, NULL, 0, 0); g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS; uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS); } static void g_stripe_fini(struct g_class *mp __unused) { uma_zdestroy(g_stripe_zone); } /* * Return the number of valid disks. */ static u_int g_stripe_nvalid(struct g_stripe_softc *sc) { u_int i, no; no = 0; for (i = 0; i < sc->sc_ndisks; i++) { if (sc->sc_disks[i] != NULL) no++; } return (no); } static void g_stripe_remove_disk(struct g_consumer *cp) { struct g_stripe_softc *sc; g_topology_assert(); KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__)); sc = (struct g_stripe_softc *)cp->geom->softc; KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); if (cp->private == NULL) { G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name, sc->sc_name); cp->private = (void *)(uintptr_t)-1; } if (sc->sc_provider != NULL) { G_STRIPE_DEBUG(0, "Device %s deactivated.", sc->sc_provider->name); g_wither_provider(sc->sc_provider, ENXIO); sc->sc_provider = NULL; } if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) return; sc->sc_disks[cp->index] = NULL; cp->index = 0; g_detach(cp); g_destroy_consumer(cp); /* If there are no valid disks anymore, remove device. */ if (LIST_EMPTY(&sc->sc_geom->consumer)) g_stripe_destroy(sc, 1); } static void g_stripe_orphan(struct g_consumer *cp) { struct g_stripe_softc *sc; struct g_geom *gp; g_topology_assert(); gp = cp->geom; sc = gp->softc; if (sc == NULL) return; g_stripe_remove_disk(cp); } static int g_stripe_access(struct g_provider *pp, int dr, int dw, int de) { struct g_consumer *cp1, *cp2, *tmp; struct g_stripe_softc *sc; struct g_geom *gp; int error; g_topology_assert(); gp = pp->geom; sc = gp->softc; KASSERT(sc != NULL, ("NULL sc in %s.", __func__)); /* On first open, grab an extra "exclusive" bit */ if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0) de++; /* ... and let go of it on last close */ if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0) de--; LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) { error = g_access(cp1, dr, dw, de); if (error != 0) goto fail; if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 && cp1->private != NULL) { g_stripe_remove_disk(cp1); /* May destroy geom. */ } } return (0); fail: LIST_FOREACH(cp2, &gp->consumer, consumer) { if (cp1 == cp2) break; g_access(cp2, -dr, -dw, -de); } return (error); } static void g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, off_t length, int mode) { off_t stripesize; size_t len; stripesize = sc->sc_stripesize; len = (size_t)(stripesize - (offset & (stripesize - 1))); do { bcopy(src, dst, len); if (mode) { dst += len + stripesize * (sc->sc_ndisks - 1); src += len; } else { dst += len; src += len + stripesize * (sc->sc_ndisks - 1); } length -= len; KASSERT(length >= 0, ("Length < 0 (stripesize=%ju, offset=%ju, length=%jd).", (uintmax_t)stripesize, (uintmax_t)offset, (intmax_t)length)); if (length > stripesize) len = stripesize; else len = length; } while (length > 0); } static void g_stripe_done(struct bio *bp) { struct g_stripe_softc *sc; struct bio *pbp; pbp = bp->bio_parent; sc = pbp->bio_to->geom->softc; if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, bp->bio_length, 1); bp->bio_data = bp->bio_caller1; bp->bio_caller1 = NULL; } mtx_lock(&sc->sc_lock); if (pbp->bio_error == 0) pbp->bio_error = bp->bio_error; pbp->bio_completed += bp->bio_completed; pbp->bio_inbed++; if (pbp->bio_children == pbp->bio_inbed) { mtx_unlock(&sc->sc_lock); if (pbp->bio_driver1 != NULL) uma_zfree(g_stripe_zone, pbp->bio_driver1); + if (bp->bio_cmd == BIO_SPEEDUP) + pbp->bio_completed = pbp->bio_length; g_io_deliver(pbp, pbp->bio_error); } else mtx_unlock(&sc->sc_lock); g_destroy_bio(bp); } static int g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); struct g_stripe_softc *sc; char *addr, *data = NULL; struct bio *cbp; off_t stripesize; u_int nparts = 0; int error; sc = bp->bio_to->geom->softc; addr = bp->bio_data; stripesize = sc->sc_stripesize; cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); nparts++; /* * Fill in the component buf structure. */ cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_data = addr; cbp->bio_caller1 = NULL; cbp->bio_length = length; cbp->bio_caller2 = sc->sc_disks[no]; /* offset -= offset % stripesize; */ offset -= offset & (stripesize - 1); addr += length; length = bp->bio_length - length; for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { if (no > sc->sc_ndisks - 1) { no = 0; offset += stripesize; } if (nparts >= sc->sc_ndisks) { cbp = TAILQ_NEXT(cbp, bio_queue); if (cbp == NULL) cbp = TAILQ_FIRST(&queue); nparts++; /* * Update bio structure. */ /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length += MIN(stripesize, length); if (cbp->bio_caller1 == NULL) { cbp->bio_caller1 = cbp->bio_data; cbp->bio_data = NULL; if (data == NULL) { data = uma_zalloc(g_stripe_zone, M_NOWAIT); if (data == NULL) { error = ENOMEM; goto failure; } } } } else { cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); nparts++; /* * Fill in the component buf structure. */ cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_data = addr; cbp->bio_caller1 = NULL; /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length = MIN(stripesize, length); cbp->bio_caller2 = sc->sc_disks[no]; } } if (data != NULL) bp->bio_driver1 = data; /* * Fire off all allocated requests! */ while ((cbp = TAILQ_FIRST(&queue)) != NULL) { struct g_consumer *cp; TAILQ_REMOVE(&queue, cbp, bio_queue); cp = cbp->bio_caller2; cbp->bio_caller2 = NULL; cbp->bio_to = cp->provider; if (cbp->bio_caller1 != NULL) { cbp->bio_data = data; if (bp->bio_cmd == BIO_WRITE) { g_stripe_copy(sc, cbp->bio_caller1, data, cbp->bio_offset, cbp->bio_length, 0); } data += cbp->bio_length; } G_STRIPE_LOGREQ(cbp, "Sending request."); g_io_request(cbp, cp); } return (0); failure: if (data != NULL) uma_zfree(g_stripe_zone, data); while ((cbp = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, cbp, bio_queue); if (cbp->bio_caller1 != NULL) { cbp->bio_data = cbp->bio_caller1; cbp->bio_caller1 = NULL; } bp->bio_children--; g_destroy_bio(cbp); } return (error); } static int g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); struct g_stripe_softc *sc; off_t stripesize; struct bio *cbp; char *addr; int error; sc = bp->bio_to->geom->softc; stripesize = sc->sc_stripesize; cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); /* * Fill in the component buf structure. */ if (bp->bio_length == length) cbp->bio_done = g_std_done; /* Optimized lockless case. */ else cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; cbp->bio_length = length; if ((bp->bio_flags & BIO_UNMAPPED) != 0) { bp->bio_ma_n = round_page(bp->bio_ma_offset + bp->bio_length) / PAGE_SIZE; addr = NULL; } else addr = bp->bio_data; cbp->bio_caller2 = sc->sc_disks[no]; /* offset -= offset % stripesize; */ offset -= offset & (stripesize - 1); if (bp->bio_cmd != BIO_DELETE) addr += length; length = bp->bio_length - length; for (no++; length > 0; no++, length -= stripesize) { if (no > sc->sc_ndisks - 1) { no = 0; offset += stripesize; } cbp = g_clone_bio(bp); if (cbp == NULL) { error = ENOMEM; goto failure; } TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); /* * Fill in the component buf structure. */ cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length = MIN(stripesize, length); if ((bp->bio_flags & BIO_UNMAPPED) != 0) { cbp->bio_ma_offset += (uintptr_t)addr; cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; cbp->bio_ma_offset %= PAGE_SIZE; cbp->bio_ma_n = round_page(cbp->bio_ma_offset + cbp->bio_length) / PAGE_SIZE; } else cbp->bio_data = addr; cbp->bio_caller2 = sc->sc_disks[no]; if (bp->bio_cmd != BIO_DELETE) addr += stripesize; } /* * Fire off all allocated requests! */ while ((cbp = TAILQ_FIRST(&queue)) != NULL) { struct g_consumer *cp; TAILQ_REMOVE(&queue, cbp, bio_queue); cp = cbp->bio_caller2; cbp->bio_caller2 = NULL; cbp->bio_to = cp->provider; G_STRIPE_LOGREQ(cbp, "Sending request."); g_io_request(cbp, cp); } return (0); failure: while ((cbp = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, cbp, bio_queue); bp->bio_children--; g_destroy_bio(cbp); } return (error); } static void g_stripe_pushdown(struct g_stripe_softc *sc, struct bio *bp) { struct bio_queue_head queue; struct g_consumer *cp; struct bio *cbp; u_int no; bioq_init(&queue); for (no = 0; no < sc->sc_ndisks; no++) { cbp = g_clone_bio(bp); if (cbp == NULL) { for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { bioq_remove(&queue, cbp); g_destroy_bio(cbp); } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } bioq_insert_tail(&queue, cbp); cbp->bio_done = g_stripe_done; cbp->bio_caller2 = sc->sc_disks[no]; cbp->bio_to = sc->sc_disks[no]->provider; } for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { bioq_remove(&queue, cbp); G_STRIPE_LOGREQ(cbp, "Sending request."); cp = cbp->bio_caller2; cbp->bio_caller2 = NULL; g_io_request(cbp, cp); } } static void g_stripe_start(struct bio *bp) { off_t offset, start, length, nstripe, stripesize; struct g_stripe_softc *sc; u_int no; int error, fast = 0; sc = bp->bio_to->geom->softc; /* * If sc == NULL, provider's error should be set and g_stripe_start() * should not be called at all. */ KASSERT(sc != NULL, ("Provider's error should be set (error=%d)(device=%s).", bp->bio_to->error, bp->bio_to->name)); G_STRIPE_LOGREQ(bp, "Request received."); switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; case BIO_SPEEDUP: case BIO_FLUSH: g_stripe_pushdown(sc, bp); return; case BIO_GETATTR: /* To which provider it should be delivered? */ default: g_io_deliver(bp, EOPNOTSUPP); return; } stripesize = sc->sc_stripesize; /* * Calculations are quite messy, but fast I hope. */ /* Stripe number. */ /* nstripe = bp->bio_offset / stripesize; */ nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits; /* Disk number. */ no = nstripe % sc->sc_ndisks; /* Start position in stripe. */ /* start = bp->bio_offset % stripesize; */ start = bp->bio_offset & (stripesize - 1); /* Start position in disk. */ /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; /* Length of data to operate. */ length = MIN(bp->bio_length, stripesize - start); /* * Do use "fast" mode when: * 1. "Fast" mode is ON. * and * 2. Request size is less than or equal to MAXPHYS, * which should always be true. * and * 3. Request size is bigger than stripesize * ndisks. If it isn't, * there will be no need to send more than one I/O request to * a provider, so there is nothing to optmize. * and * 4. Request is not unmapped. * and * 5. It is not a BIO_DELETE. */ if (g_stripe_fast && bp->bio_length <= MAXPHYS && bp->bio_length >= stripesize * sc->sc_ndisks && (bp->bio_flags & BIO_UNMAPPED) == 0 && bp->bio_cmd != BIO_DELETE) { fast = 1; } error = 0; if (fast) { error = g_stripe_start_fast(bp, no, offset, length); if (error != 0) g_stripe_fast_failed++; } /* * Do use "economic" when: * 1. "Economic" mode is ON. * or * 2. "Fast" mode failed. It can only fail if there is no memory. */ if (!fast || error != 0) error = g_stripe_start_economic(bp, no, offset, length); if (error != 0) { if (bp->bio_error == 0) bp->bio_error = error; g_io_deliver(bp, bp->bio_error); } } static void g_stripe_check_and_run(struct g_stripe_softc *sc) { struct g_provider *dp; off_t mediasize, ms; u_int no, sectorsize = 0; g_topology_assert(); if (g_stripe_nvalid(sc) != sc->sc_ndisks) return; sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", sc->sc_name); sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (g_stripe_fast == 0) sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; /* * Find the smallest disk. */ mediasize = sc->sc_disks[0]->provider->mediasize; if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) mediasize -= sc->sc_disks[0]->provider->sectorsize; mediasize -= mediasize % sc->sc_stripesize; sectorsize = sc->sc_disks[0]->provider->sectorsize; for (no = 1; no < sc->sc_ndisks; no++) { dp = sc->sc_disks[no]->provider; ms = dp->mediasize; if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) ms -= dp->sectorsize; ms -= ms % sc->sc_stripesize; if (ms < mediasize) mediasize = ms; sectorsize = lcm(sectorsize, dp->sectorsize); /* A provider underneath us doesn't support unmapped */ if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { G_STRIPE_DEBUG(1, "Cancelling unmapped " "because of %s.", dp->name); sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; } } sc->sc_provider->sectorsize = sectorsize; sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; sc->sc_provider->stripesize = sc->sc_stripesize; sc->sc_provider->stripeoffset = 0; g_error_provider(sc->sc_provider, 0); G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name); } static int g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md) { struct g_provider *pp; u_char *buf; int error; g_topology_assert(); error = g_access(cp, 1, 0, 0); if (error != 0) return (error); pp = cp->provider; g_topology_unlock(); buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, &error); g_topology_lock(); g_access(cp, -1, 0, 0); if (buf == NULL) return (error); /* Decode metadata. */ stripe_metadata_decode(buf, md); g_free(buf); return (0); } /* * Add disk to given device. */ static int g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) { struct g_consumer *cp, *fcp; struct g_geom *gp; int error; g_topology_assert(); /* Metadata corrupted? */ if (no >= sc->sc_ndisks) return (EINVAL); /* Check if disk is not already attached. */ if (sc->sc_disks[no] != NULL) return (EEXIST); gp = sc->sc_geom; fcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; cp->private = NULL; cp->index = no; error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); return (error); } if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) { error = g_access(cp, fcp->acr, fcp->acw, fcp->ace); if (error != 0) { g_detach(cp); g_destroy_consumer(cp); return (error); } } if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) { struct g_stripe_metadata md; /* Reread metadata. */ error = g_stripe_read_metadata(cp, &md); if (error != 0) goto fail; if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 || strcmp(md.md_name, sc->sc_name) != 0 || md.md_id != sc->sc_id) { G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name); goto fail; } } sc->sc_disks[no] = cp; G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name); g_stripe_check_and_run(sc); return (0); fail: if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace); g_detach(cp); g_destroy_consumer(cp); return (error); } static struct g_geom * g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, u_int type) { struct g_stripe_softc *sc; struct g_geom *gp; u_int no; g_topology_assert(); G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); /* Two disks is minimum. */ if (md->md_all < 2) { G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name); return (NULL); } #if 0 /* Stripe size have to be grater than or equal to sector size. */ if (md->md_stripesize < sectorsize) { G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); return (NULL); } #endif /* Stripe size have to be power of 2. */ if (!powerof2(md->md_stripesize)) { G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name); return (NULL); } /* Check for duplicate unit */ LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) { G_STRIPE_DEBUG(0, "Device %s already configured.", sc->sc_name); return (NULL); } } gp = g_new_geomf(mp, "%s", md->md_name); sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO); gp->start = g_stripe_start; gp->spoiled = g_stripe_orphan; gp->orphan = g_stripe_orphan; gp->access = g_stripe_access; gp->dumpconf = g_stripe_dumpconf; sc->sc_id = md->md_id; sc->sc_stripesize = md->md_stripesize; sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1); sc->sc_ndisks = md->md_all; sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks, M_STRIPE, M_WAITOK | M_ZERO); for (no = 0; no < sc->sc_ndisks; no++) sc->sc_disks[no] = NULL; sc->sc_type = type; mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); gp->softc = sc; sc->sc_geom = gp; sc->sc_provider = NULL; G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id); return (gp); } static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) { struct g_provider *pp; struct g_consumer *cp, *cp1; struct g_geom *gp; g_topology_assert(); if (sc == NULL) return (ENXIO); pp = sc->sc_provider; if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_STRIPE_DEBUG(0, "Device %s is still open, so it " "can't be definitely removed.", pp->name); } else { G_STRIPE_DEBUG(1, "Device %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } gp = sc->sc_geom; LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) { g_stripe_remove_disk(cp); if (cp1 == NULL) return (0); /* Recursion happened. */ } if (!LIST_EMPTY(&gp->consumer)) return (EINPROGRESS); gp->softc = NULL; KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", gp->name)); free(sc->sc_disks, M_STRIPE); mtx_destroy(&sc->sc_lock); free(sc, M_STRIPE); G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom(gp, ENXIO); return (0); } static int g_stripe_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, struct g_geom *gp) { struct g_stripe_softc *sc; sc = gp->softc; return (g_stripe_destroy(sc, 0)); } static struct g_geom * g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_stripe_metadata md; struct g_stripe_softc *sc; struct g_consumer *cp; struct g_geom *gp; int error; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); G_STRIPE_DEBUG(3, "Tasting %s.", pp->name); gp = g_new_geomf(mp, "stripe:taste"); gp->start = g_stripe_start; gp->access = g_stripe_access; gp->orphan = g_stripe_orphan; cp = g_new_consumer(gp); g_attach(cp, pp); error = g_stripe_read_metadata(cp, &md); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); if (error != 0) return (NULL); gp = NULL; if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0) return (NULL); if (md.md_version > G_STRIPE_VERSION) { printf("geom_stripe.ko module is too old to handle %s.\n", pp->name); return (NULL); } /* * Backward compatibility: */ /* There was no md_provider field in earlier versions of metadata. */ if (md.md_version < 2) bzero(md.md_provider, sizeof(md.md_provider)); /* There was no md_provsize field in earlier versions of metadata. */ if (md.md_version < 3) md.md_provsize = pp->mediasize; if (md.md_provider[0] != '\0' && !g_compare_names(md.md_provider, pp->name)) return (NULL); if (md.md_provsize != pp->mediasize) return (NULL); /* * Let's check if device already exists. */ sc = NULL; LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc == NULL) continue; if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC) continue; if (strcmp(md.md_name, sc->sc_name) != 0) continue; if (md.md_id != sc->sc_id) continue; break; } if (gp != NULL) { G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); error = g_stripe_add_disk(sc, pp, md.md_no); if (error != 0) { G_STRIPE_DEBUG(0, "Cannot add disk %s to %s (error=%d).", pp->name, gp->name, error); return (NULL); } } else { gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC); if (gp == NULL) { G_STRIPE_DEBUG(0, "Cannot create device %s.", md.md_name); return (NULL); } sc = gp->softc; G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); error = g_stripe_add_disk(sc, pp, md.md_no); if (error != 0) { G_STRIPE_DEBUG(0, "Cannot add disk %s to %s (error=%d).", pp->name, gp->name, error); g_stripe_destroy(sc, 1); return (NULL); } } return (gp); } static void g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp) { u_int attached, no; struct g_stripe_metadata md; struct g_provider *pp; struct g_stripe_softc *sc; struct g_geom *gp; struct sbuf *sb; off_t *stripesize; const char *name; char param[16]; int *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs <= 2) { gctl_error(req, "Too few arguments."); return; } strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic)); md.md_version = G_STRIPE_VERSION; name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } strlcpy(md.md_name, name, sizeof(md.md_name)); md.md_id = arc4random(); md.md_no = 0; md.md_all = *nargs - 1; stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize)); if (stripesize == NULL) { gctl_error(req, "No '%s' argument.", "stripesize"); return; } md.md_stripesize = (uint32_t)*stripesize; bzero(md.md_provider, sizeof(md.md_provider)); /* This field is not important here. */ md.md_provsize = 0; /* Check all providers are valid */ for (no = 1; no < *nargs; no++) { snprintf(param, sizeof(param), "arg%u", no); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", no); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { G_STRIPE_DEBUG(1, "Disk %s is invalid.", name); gctl_error(req, "Disk %s is invalid.", name); return; } } gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL); if (gp == NULL) { gctl_error(req, "Can't configure %s.", md.md_name); return; } sc = gp->softc; sb = sbuf_new_auto(); sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name); for (attached = 0, no = 1; no < *nargs; no++) { snprintf(param, sizeof(param), "arg%u", no); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", no); continue; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); KASSERT(pp != NULL, ("Provider %s disappear?!", name)); if (g_stripe_add_disk(sc, pp, no - 1) != 0) { G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.", no, pp->name, gp->name); sbuf_printf(sb, " %s", pp->name); continue; } attached++; } sbuf_finish(sb); if (md.md_all != attached) { g_stripe_destroy(gp->softc, 1); gctl_error(req, "%s", sbuf_data(sb)); } sbuf_delete(sb); } static struct g_stripe_softc * g_stripe_find_device(struct g_class *mp, const char *name) { struct g_stripe_softc *sc; struct g_geom *gp; LIST_FOREACH(gp, &mp->geom, geom) { sc = gp->softc; if (sc == NULL) continue; if (strcmp(sc->sc_name, name) == 0) return (sc); } return (NULL); } static void g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp) { struct g_stripe_softc *sc; int *force, *nargs, error; const char *name; char param[16]; u_int i; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument.", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No '%s' argument.", "force"); return; } for (i = 0; i < (u_int)*nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", i); return; } sc = g_stripe_find_device(mp, name); if (sc == NULL) { gctl_error(req, "No such device: %s.", name); return; } error = g_stripe_destroy(sc, *force); if (error != 0) { gctl_error(req, "Cannot destroy device %s (error=%d).", sc->sc_name, error); return; } } } static void g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } if (*version != G_STRIPE_VERSION) { gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "create") == 0) { g_stripe_ctl_create(req, mp); return; } else if (strcmp(verb, "destroy") == 0 || strcmp(verb, "stop") == 0) { g_stripe_ctl_destroy(req, mp); return; } gctl_error(req, "Unknown verb."); } static void g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) { struct g_stripe_softc *sc; sc = gp->softc; if (sc == NULL) return; if (pp != NULL) { /* Nothing here. */ } else if (cp != NULL) { sbuf_printf(sb, "%s%u\n", indent, (u_int)cp->index); } else { sbuf_printf(sb, "%s%u\n", indent, (u_int)sc->sc_id); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t)sc->sc_stripesize); sbuf_printf(sb, "%s", indent); switch (sc->sc_type) { case G_STRIPE_TYPE_AUTOMATIC: sbuf_cat(sb, "AUTOMATIC"); break; case G_STRIPE_TYPE_MANUAL: sbuf_cat(sb, "MANUAL"); break; default: sbuf_cat(sb, "UNKNOWN"); break; } sbuf_cat(sb, "\n"); sbuf_printf(sb, "%sTotal=%u, Online=%u\n", indent, sc->sc_ndisks, g_stripe_nvalid(sc)); sbuf_printf(sb, "%s", indent); if (sc->sc_provider != NULL && sc->sc_provider->error == 0) sbuf_cat(sb, "UP"); else sbuf_cat(sb, "DOWN"); sbuf_cat(sb, "\n"); } } DECLARE_GEOM_CLASS(g_stripe_class, g_stripe); MODULE_VERSION(geom_stripe, 0); Index: projects/clang1000-import/sys/kern/vfs_default.c =================================================================== --- projects/clang1000-import/sys/kern/vfs_default.c (revision 357178) +++ projects/clang1000-import/sys/kern/vfs_default.c (revision 357179) @@ -1,1473 +1,1460 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed * to Berkeley by John Heidemann of the UCLA Ficus project. * * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int vop_nolookup(struct vop_lookup_args *); static int vop_norename(struct vop_rename_args *); static int vop_nostrategy(struct vop_strategy_args *); static int get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, int dirbuflen, off_t *off, char **cpos, int *len, int *eofflag, struct thread *td); static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td); #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) static int vop_stdis_text(struct vop_is_text_args *ap); static int vop_stdunset_text(struct vop_unset_text_args *ap); static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); static int vop_stdfdatasync(struct vop_fdatasync_args *ap); static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't * implement a particular VOP. * * If there is no specific entry here, we will return EOPNOTSUPP. * * Note that every filesystem has to implement either vop_access * or vop_accessx; failing to do so will result in immediate crash * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), * which calls vop_stdaccess() etc. */ struct vop_vector default_vnodeops = { .vop_default = NULL, .vop_bypass = VOP_EOPNOTSUPP, .vop_access = vop_stdaccess, .vop_accessx = vop_stdaccessx, .vop_advise = vop_stdadvise, .vop_advlock = vop_stdadvlock, .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, .vop_fdatasync = vop_stdfdatasync, .vop_getpages = vop_stdgetpages, .vop_getpages_async = vop_stdgetpages_async, .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_need_inactive = vop_stdneed_inactive, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, .vop_lock1 = vop_stdlock, .vop_lookup = vop_nolookup, .vop_open = VOP_NULL, .vop_pathconf = VOP_EINVAL, .vop_poll = vop_nopoll, .vop_putpages = vop_stdputpages, .vop_readlink = VOP_EINVAL, .vop_rename = vop_norename, .vop_revoke = VOP_PANIC, .vop_strategy = vop_nostrategy, .vop_unlock = vop_stdunlock, .vop_vptocnp = vop_stdvptocnp, .vop_vptofh = vop_stdvptofh, .vop_unp_bind = vop_stdunp_bind, .vop_unp_connect = vop_stdunp_connect, .vop_unp_detach = vop_stdunp_detach, .vop_is_text = vop_stdis_text, .vop_set_text = vop_stdset_text, .vop_unset_text = vop_stdunset_text, .vop_add_writecount = vop_stdadd_writecount, .vop_copy_file_range = vop_stdcopy_file_range, }; VFS_VOP_VECTOR_REGISTER(default_vnodeops); /* * Series of placeholder functions for various error returns for * VOPs. */ int vop_eopnotsupp(struct vop_generic_args *ap) { /* printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); */ return (EOPNOTSUPP); } int vop_ebadf(struct vop_generic_args *ap) { return (EBADF); } int vop_enotty(struct vop_generic_args *ap) { return (ENOTTY); } int vop_einval(struct vop_generic_args *ap) { return (EINVAL); } int vop_enoent(struct vop_generic_args *ap) { return (ENOENT); } int vop_null(struct vop_generic_args *ap) { return (0); } /* * Helper function to panic on some bad VOPs in some filesystems. */ int vop_panic(struct vop_generic_args *ap) { panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); } /* * vop_std and vop_no are default functions for use by * filesystems that need the "default reasonable" implementation for a * particular operation. * * The documentation for the operations they implement exists (if it exists) * in the VOP_(9) manpage (all uppercase). */ /* * Default vop for filesystems that do not support name lookup */ static int vop_nolookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * vop_norename: * * Handle unlock and reference counting for arguments of vop_rename * for filesystems that do not implement rename operation. */ static int vop_norename(struct vop_rename_args *ap) { vop_rename_fail(ap); return (EOPNOTSUPP); } /* * vop_nostrategy: * * Strategy routine for VFS devices that have none. * * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy * routine. Typically this is done for a BIO_READ strategy call. * Typically B_INVAL is assumed to already be clear prior to a write * and should not be cleared manually unless you just made the buffer * invalid. BIO_ERROR should be cleared either way. */ static int vop_nostrategy (struct vop_strategy_args *ap) { printf("No strategy for buffer at %p\n", ap->a_bp); vn_printf(ap->a_vp, "vnode "); ap->a_bp->b_ioflags |= BIO_ERROR; ap->a_bp->b_error = EOPNOTSUPP; bufdone(ap->a_bp); return (EOPNOTSUPP); } static int get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, int dirbuflen, off_t *off, char **cpos, int *len, int *eofflag, struct thread *td) { int error, reclen; struct uio uio; struct iovec iov; struct dirent *dp; KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); if (*len == 0) { iov.iov_base = dirbuf; iov.iov_len = dirbuflen; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = *off; uio.uio_resid = dirbuflen; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; *eofflag = 0; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, NULL, NULL); if (error) return (error); *off = uio.uio_offset; *cpos = dirbuf; *len = (dirbuflen - uio.uio_resid); if (*len == 0) return (ENOENT); } dp = (struct dirent *)(*cpos); reclen = dp->d_reclen; *dpp = dp; /* check for malformed directory.. */ if (reclen < DIRENT_MINSIZE) return (EINVAL); *cpos += reclen; *len -= reclen; return (0); } /* * Check if a named file exists in a given directory vnode. */ static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) { char *dirbuf, *cpos; int error, eofflag, dirbuflen, len, found; off_t off; struct dirent *dp; struct vattr va; KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); found = 0; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error) return (found); dirbuflen = DEV_BSIZE; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); off = 0; len = 0; do { error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, &cpos, &len, &eofflag, td); if (error) goto out; if (dp->d_type != DT_WHT && dp->d_fileno != 0 && strcmp(dp->d_name, dirname) == 0) { found = 1; goto out; } } while (len > 0 || !eofflag); out: free(dirbuf, M_TEMP); return (found); } int vop_stdaccess(struct vop_access_args *ap) { KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); } int vop_stdaccessx(struct vop_accessx_args *ap) { int error; accmode_t accmode = ap->a_accmode; error = vfs_unixify_accmode(&accmode); if (error != 0) return (error); if (accmode == 0) return (0); return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); } /* * Advisory record locking support */ int vop_stdadvlock(struct vop_advlock_args *ap) { struct vnode *vp; struct vattr vattr; int error; vp = ap->a_vp; if (ap->a_fl->l_whence == SEEK_END) { /* * The NFSv4 server must avoid doing a vn_lock() here, since it * can deadlock the nfsd threads, due to a LOR. Fortunately * the NFSv4 server always uses SEEK_SET and this code is * only required for the SEEK_END case. */ vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); VOP_UNLOCK(vp); if (error) return (error); } else vattr.va_size = 0; return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); } int vop_stdadvlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp; struct vattr vattr; int error; vp = ap->a_vp; if (ap->a_fl->l_whence == SEEK_END) { /* The size argument is only needed for SEEK_END. */ vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); VOP_UNLOCK(vp); if (error) return (error); } else vattr.va_size = 0; return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); } int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) { struct vnode *vp; vp = ap->a_vp; lf_purgelocks(vp, &vp->v_lockf); return (0); } /* * vop_stdpathconf: * * Standard implementation of POSIX pathconf, to get information about limits * for a filesystem. * Override per filesystem for the case where the filesystem has smaller * limits. */ int vop_stdpathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_ASYNC_IO: *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_ACL_EXTENDED: case _PC_ACL_NFS4: case _PC_CAP_PRESENT: case _PC_INF_PRESENT: case _PC_MAC_PRESENT: *ap->a_retval = 0; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Standard lock, unlock and islocked functions. */ int vop_stdlock(ap) struct vop_lock1_args /* { struct vnode *a_vp; int a_flags; char *file; int line; } */ *ap; { struct vnode *vp = ap->a_vp; struct mtx *ilk; ilk = VI_MTX(vp); return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags, &ilk->lock_object, ap->a_file, ap->a_line)); } /* See above. */ int vop_stdunlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; return (lockmgr_unlock(vp->v_vnlock)); } /* See above. */ int vop_stdislocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { return (lockstatus(ap->a_vp->v_vnlock)); } /* * Variants of the above set. * * Differences are: * - shared locking disablement is not supported * - v_vnlock pointer is not honored */ int vop_lock(ap) struct vop_lock1_args /* { struct vnode *a_vp; int a_flags; char *file; int line; } */ *ap; { struct vnode *vp = ap->a_vp; int flags = ap->a_flags; struct mtx *ilk; MPASS(vp->v_vnlock == &vp->v_lock); if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0)) goto other; switch (flags & LK_TYPE_MASK) { case LK_SHARED: return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line)); case LK_EXCLUSIVE: return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line)); } other: ilk = VI_MTX(vp); return (lockmgr_lock_fast_path(&vp->v_lock, flags, &ilk->lock_object, ap->a_file, ap->a_line)); } int vop_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; MPASS(vp->v_vnlock == &vp->v_lock); return (lockmgr_unlock(&vp->v_lock)); } int vop_islocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; MPASS(vp->v_vnlock == &vp->v_lock); return (lockstatus(&vp->v_lock)); } /* * Return true for select/poll. */ int vop_nopoll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { return (poll_no_poll(ap->a_events)); } /* * Implement poll for local filesystems that support it. */ int vop_stdpoll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { if (ap->a_events & ~POLLSTANDARD) return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); } /* * Return our mount point, as we will take charge of the writes. */ int vop_stdgetwritemount(ap) struct vop_getwritemount_args /* { struct vnode *a_vp; struct mount **a_mpp; } */ *ap; { struct mount *mp; struct vnode *vp; /* * Note that having a reference does not prevent forced unmount from * setting ->v_mount to NULL after the lock gets released. This is of * no consequence for typical consumers (most notably vn_start_write) * since in this case the vnode is VIRF_DOOMED. Unmount might have * progressed far enough that its completion is only delayed by the * reference obtained here. The consumer only needs to concern itself * with releasing it. */ vp = ap->a_vp; mp = vp->v_mount; if (mp == NULL) { *(ap->a_mpp) = NULL; return (0); } if (vfs_op_thread_enter(mp)) { if (mp == vp->v_mount) { vfs_mp_count_add_pcpu(mp, ref, 1); vfs_op_thread_exit(mp); } else { vfs_op_thread_exit(mp); mp = NULL; } } else { MNT_ILOCK(mp); if (mp == vp->v_mount) { MNT_REF(mp); MNT_IUNLOCK(mp); } else { MNT_IUNLOCK(mp); mp = NULL; } } *(ap->a_mpp) = mp; return (0); } /* * If the file system doesn't implement VOP_BMAP, then return sensible defaults: * - Return the vnode's bufobj instead of any underlying device's bufobj * - Calculate the physical block number as if there were equal size * consecutive blocks, but * - Report no contiguous runs of blocks. */ int vop_stdbmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_bop != NULL) *ap->a_bop = &ap->a_vp->v_bufobj; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } int vop_stdfsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; int a_waitfor; struct thread *a_td; } */ *ap; { return (vn_fsync_buf(ap->a_vp, ap->a_waitfor)); } static int vop_stdfdatasync(struct vop_fdatasync_args *ap) { return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td)); } int vop_stdfdatasync_buf(struct vop_fdatasync_args *ap) { return (vn_fsync_buf(ap->a_vp, MNT_WAIT)); } /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ int vop_stdgetpages(ap) struct vop_getpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int *a_rbehind; int *a_rahead; } */ *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); } static int vop_stdgetpages_async(struct vop_getpages_async_args *ap) { int error; error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead); ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); return (error); } int vop_stdkqfilter(struct vop_kqfilter_args *ap) { return vfs_kqfilter(ap); } /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ int vop_stdputpages(ap) struct vop_putpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_sync; int *a_rtvals; } */ *ap; { return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); } int vop_stdvptofh(struct vop_vptofh_args *ap) { return (EOPNOTSUPP); } int vop_stdvptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct ucred *cred = ap->a_cred; char *buf = ap->a_buf; int *buflen = ap->a_buflen; char *dirbuf, *cpos; int i, error, eofflag, dirbuflen, flags, locked, len, covered; off_t off; ino_t fileno; struct vattr va; struct nameidata nd; struct thread *td; struct dirent *dp; struct vnode *mvp; i = *buflen; error = 0; covered = 0; td = curthread; if (vp->v_type != VDIR) return (ENOENT); error = VOP_GETATTR(vp, &va, cred); if (error) return (error); VREF(vp); locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp); NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, "..", vp, td); flags = FREAD; error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); if (error) { vn_lock(vp, locked | LK_RETRY); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); mvp = *dvp = nd.ni_vp; if (vp->v_mount != (*dvp)->v_mount && ((*dvp)->v_vflag & VV_ROOT) && ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { *dvp = (*dvp)->v_mount->mnt_vnodecovered; VREF(mvp); VOP_UNLOCK(mvp); vn_close(mvp, FREAD, cred, td); VREF(*dvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); covered = 1; } fileno = va.va_fileid; dirbuflen = DEV_BSIZE; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); if ((*dvp)->v_type != VDIR) { error = ENOENT; goto out; } off = 0; len = 0; do { /* call VOP_READDIR of parent */ error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, &cpos, &len, &eofflag, td); if (error) goto out; if ((dp->d_type != DT_WHT) && (dp->d_fileno == fileno)) { if (covered) { VOP_UNLOCK(*dvp); vn_lock(mvp, LK_SHARED | LK_RETRY); if (dirent_exists(mvp, dp->d_name, td)) { error = ENOENT; VOP_UNLOCK(mvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); goto out; } VOP_UNLOCK(mvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); } i -= dp->d_namlen; if (i < 0) { error = ENOMEM; goto out; } if (dp->d_namlen == 1 && dp->d_name[0] == '.') { error = ENOENT; } else { bcopy(dp->d_name, buf + i, dp->d_namlen); error = 0; } goto out; } } while (len > 0 || !eofflag); error = ENOENT; out: free(dirbuf, M_TEMP); if (!error) { *buflen = i; vref(*dvp); } if (covered) { vput(*dvp); vrele(mvp); } else { VOP_UNLOCK(mvp); vn_close(mvp, FREAD, cred, td); } vn_lock(vp, locked | LK_RETRY); return (error); } int vop_stdallocate(struct vop_allocate_args *ap) { #ifdef __notyet__ struct statfs *sfs; off_t maxfilesize = 0; #endif struct iovec aiov; struct vattr vattr, *vap; struct uio auio; off_t fsize, len, cur, offset; uint8_t *buf; struct thread *td; struct vnode *vp; size_t iosize; int error; buf = NULL; error = 0; td = curthread; vap = &vattr; vp = ap->a_vp; len = *ap->a_len; offset = *ap->a_offset; error = VOP_GETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; fsize = vap->va_size; iosize = vap->va_blocksize; if (iosize == 0) iosize = BLKDEV_IOSIZE; if (iosize > MAXPHYS) iosize = MAXPHYS; buf = malloc(iosize, M_TEMP, M_WAITOK); #ifdef __notyet__ /* * Check if the filesystem sets f_maxfilesize; if not use * VOP_SETATTR to perform the check. */ sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = VFS_STATFS(vp->v_mount, sfs, td); if (error == 0) maxfilesize = sfs->f_maxfilesize; free(sfs, M_STATFS); if (error != 0) goto out; if (maxfilesize) { if (offset > maxfilesize || len > maxfilesize || offset + len > maxfilesize) { error = EFBIG; goto out; } } else #endif if (offset + len > vap->va_size) { /* * Test offset + len against the filesystem's maxfilesize. */ VATTR_NULL(vap); vap->va_size = offset + len; error = VOP_SETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; VATTR_NULL(vap); vap->va_size = fsize; error = VOP_SETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; } for (;;) { /* * Read and write back anything below the nominal file * size. There's currently no way outside the filesystem * to know whether this area is sparse or not. */ cur = iosize; if ((offset % iosize) != 0) cur -= (offset % iosize); if (cur > len) cur = len; if (offset < fsize) { aiov.iov_base = buf; aiov.iov_len = cur; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = offset; auio.uio_resid = cur; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; error = VOP_READ(vp, &auio, 0, td->td_ucred); if (error != 0) break; if (auio.uio_resid > 0) { bzero(buf + cur - auio.uio_resid, auio.uio_resid); } } else { bzero(buf, cur); } aiov.iov_base = buf; aiov.iov_len = cur; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = offset; auio.uio_resid = cur; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_td = td; error = VOP_WRITE(vp, &auio, 0, td->td_ucred); if (error != 0) break; len -= cur; offset += cur; if (len == 0) break; if (should_yield()) break; } out: *ap->a_len = len; *ap->a_offset = offset; free(buf, M_TEMP); return (error); } int vop_stdadvise(struct vop_advise_args *ap) { struct vnode *vp; struct bufobj *bo; daddr_t startn, endn; off_t bstart, bend, start, end; int bsize, error; vp = ap->a_vp; switch (ap->a_advice) { case POSIX_FADV_WILLNEED: /* * Do nothing for now. Filesystems should provide a * custom method which starts an asynchronous read of * the requested region. */ error = 0; break; case POSIX_FADV_DONTNEED: error = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (VN_IS_DOOMED(vp)) { VOP_UNLOCK(vp); break; } /* * Round to block boundaries (and later possibly further to * page boundaries). Applications cannot reasonably be aware * of the boundaries, and the rounding must be to expand at * both extremities to cover enough. It still doesn't cover * read-ahead. For partial blocks, this gives unnecessary * discarding of buffers but is efficient enough since the * pages usually remain in VMIO for some time. */ bsize = vp->v_bufobj.bo_bsize; bstart = rounddown(ap->a_start, bsize); bend = roundup(ap->a_end, bsize); /* * Deactivate pages in the specified range from the backing VM * object. Pages that are resident in the buffer cache will * remain wired until their corresponding buffers are released * below. */ if (vp->v_object != NULL) { start = trunc_page(bstart); end = round_page(bend); VM_OBJECT_RLOCK(vp->v_object); vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), OFF_TO_IDX(end)); VM_OBJECT_RUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_RLOCK(bo); startn = bstart / bsize; endn = bend / bsize; error = bnoreuselist(&bo->bo_clean, bo, startn, endn); if (error == 0) error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); BO_RUNLOCK(bo); VOP_UNLOCK(vp); break; default: error = EINVAL; break; } return (error); } int vop_stdunp_bind(struct vop_unp_bind_args *ap) { ap->a_vp->v_unpcb = ap->a_unpcb; return (0); } int vop_stdunp_connect(struct vop_unp_connect_args *ap) { *ap->a_unpcb = ap->a_vp->v_unpcb; return (0); } int vop_stdunp_detach(struct vop_unp_detach_args *ap) { ap->a_vp->v_unpcb = NULL; return (0); } static int vop_stdis_text(struct vop_is_text_args *ap) { return (ap->a_vp->v_writecount < 0); } int vop_stdset_text(struct vop_set_text_args *ap) { struct vnode *vp; struct mount *mp; int error; vp = ap->a_vp; VI_LOCK(vp); if (vp->v_writecount > 0) { error = ETXTBSY; } else { /* * If requested by fs, keep a use reference to the * vnode until the last text reference is released. */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 && vp->v_writecount == 0) { vp->v_iflag |= VI_TEXT_REF; vrefl(vp); } vp->v_writecount--; error = 0; } VI_UNLOCK(vp); return (error); } static int vop_stdunset_text(struct vop_unset_text_args *ap) { struct vnode *vp; int error; bool last; vp = ap->a_vp; last = false; VI_LOCK(vp); if (vp->v_writecount < 0) { if ((vp->v_iflag & VI_TEXT_REF) != 0 && vp->v_writecount == -1) { last = true; vp->v_iflag &= ~VI_TEXT_REF; } vp->v_writecount++; error = 0; } else { error = EINVAL; } VI_UNLOCK(vp); if (last) vunref(vp); return (error); } static int vop_stdadd_writecount(struct vop_add_writecount_args *ap) { struct vnode *vp; struct mount *mp; int error; vp = ap->a_vp; VI_LOCK_FLAGS(vp, MTX_DUPOK); if (vp->v_writecount < 0) { error = ETXTBSY; } else { VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp, ("neg writecount increment %d", ap->a_inc)); if (vp->v_writecount == 0) { mp = vp->v_mount; if (mp != NULL && (mp->mnt_kern_flag & MNTK_NOMSYNC) == 0) vlazy(vp); } vp->v_writecount += ap->a_inc; error = 0; } VI_UNLOCK(vp); return (error); } int vop_stdneed_inactive(struct vop_need_inactive_args *ap) { return (1); } int vop_stdioctl(struct vop_ioctl_args *ap) { struct vnode *vp; struct vattr va; off_t *offp; int error; switch (ap->a_command) { case FIOSEEKDATA: case FIOSEEKHOLE: vp = ap->a_vp; error = vn_lock(vp, LK_SHARED); if (error != 0) return (EBADF); if (vp->v_type == VREG) error = VOP_GETATTR(vp, &va, ap->a_cred); else error = ENOTTY; if (error == 0) { offp = ap->a_data; if (*offp < 0 || *offp >= va.va_size) error = ENXIO; else if (ap->a_command == FIOSEEKHOLE) *offp = va.va_size; } VOP_UNLOCK(vp); break; default: error = ENOTTY; break; } return (error); } /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. */ int vfs_stdroot (mp, flags, vpp) struct mount *mp; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdstatfs (mp, sbp) struct mount *mp; struct statfs *sbp; { return (EOPNOTSUPP); } int vfs_stdquotactl (mp, cmds, uid, arg) struct mount *mp; int cmds; uid_t uid; void *arg; { return (EOPNOTSUPP); } int vfs_stdsync(mp, waitfor) struct mount *mp; int waitfor; { struct vnode *vp, *mvp; struct thread *td; int error, lockreq, allerror = 0; td = curthread; lockreq = LK_EXCLUSIVE | LK_INTERLOCK; if (waitfor != MNT_WAIT) lockreq |= LK_NOWAIT; /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq, td)) != 0) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; vput(vp); } return (allerror); } int vfs_stdnosync (mp, waitfor) struct mount *mp; int waitfor; { return (0); } static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap) { int error; error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred, ap->a_outcred, ap->a_fsizetd); return (error); } int vfs_stdvget (mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdfhtovp (mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdinit (vfsp) struct vfsconf *vfsp; { return (0); } int vfs_stduninit (vfsp) struct vfsconf *vfsp; { return(0); } int vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) struct mount *mp; int cmd; struct vnode *filename_vp; int attrnamespace; const char *attrname; { if (filename_vp != NULL) VOP_UNLOCK(filename_vp); return (EOPNOTSUPP); } int vfs_stdsysctl(mp, op, req) struct mount *mp; fsctlop_t op; struct sysctl_req *req; { return (EOPNOTSUPP); } static vop_bypass_t * bp_by_off(struct vop_vector *vop, struct vop_generic_args *a) { return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset)); } int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a) { vop_bypass_t *bp; int prev_stops, rc; - for (; vop != NULL; vop = vop->vop_default) { - bp = bp_by_off(vop, a); - if (bp != NULL) - break; - - /* - * Bypass is not really supported. It is done for - * fallback to unimplemented vops in the default - * vector. - */ - bp = vop->vop_bypass; - if (bp != NULL) - break; - } + bp = bp_by_off(vop, a); MPASS(bp != NULL); prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); rc = bp(a); sigallowstop(prev_stops); return (rc); } Index: projects/clang1000-import/sys/kern/vfs_subr.c =================================================================== --- projects/clang1000-import/sys/kern/vfs_subr.c (revision 357178) +++ projects/clang1000-import/sys/kern/vfs_subr.c (revision 357179) @@ -1,6397 +1,6401 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void v_init_counters(struct vnode *); static void v_incr_devcount(struct vnode *); static void v_decr_devcount(struct vnode *); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); static void destroy_vpollinfo(struct vpollinfo *vi); static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, daddr_t startlbn, daddr_t endlbn); static void vnlru_recalc(void); /* * These fences are intended for cases where some synchronization is * needed between access of v_iflags and lockless vnode refcount (v_holdcnt * and v_usecount) updates. Access to v_iflags is generally synchronized * by the interlock, but we have some internal assertions that check vnode * flags without acquiring the lock. Thus, these fences are INVARIANTS-only * for now. */ #ifdef INVARIANTS #define VNODE_REFCOUNT_FENCE_ACQ() atomic_thread_fence_acq() #define VNODE_REFCOUNT_FENCE_REL() atomic_thread_fence_rel() #else #define VNODE_REFCOUNT_FENCE_ACQ() #define VNODE_REFCOUNT_FENCE_REL() #endif /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased in vdropl() for VIRF_DOOMED vnode. */ static u_long __exclusive_cache_line numvnodes; SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes in existence"); static counter_u64_t vnodes_created; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, "Number of vnodes created by getnewvnode"); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of allocates vnodes in the system. */ static TAILQ_HEAD(freelst, vnode) vnode_list; static struct vnode *vnode_list_free_marker; static struct vnode *vnode_list_reclaim_marker; /* * "Free" vnode target. Free vnodes are rarely completely free, but are * just ones that are cheap to recycle. Usually they are for files which * have been stat'd but not read; these usually have inode and namecache * data attached to them. This target is the preferred minimum size of a * sub-cache consisting mostly of such files. The system balances the size * of this sub-cache with its complement to try to prevent either from * thrashing while the other is relatively inactive. The targets express * a preference for the best balance. * * "Above" this target there are 2 further targets (watermarks) related * to recyling of free vnodes. In the best-operating case, the cache is * exactly full, the free list has size between vlowat and vhiwat above the * free target, and recycling from it and normal use maintains this state. * Sometimes the free list is below vlowat or even empty, but this state * is even better for immediate use provided the cache is not full. * Otherwise, vnlru_proc() runs to reclaim enough vnodes (usually non-free * ones) to reach one of these states. The watermarks are currently hard- * coded as 4% and 9% of the available space higher. These and the default * of 25% for wantfreevnodes are too large if the memory size is large. * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim * whenever vnlru_proc() becomes active. */ static long wantfreevnodes; static long __exclusive_cache_line freevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "Number of \"free\" vnodes"); static long freevnodes_old; static counter_u64_t recycles_count; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, "Number of vnodes recycled to meet vnode cache targets"); static counter_u64_t recycles_free_count; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_free, CTLFLAG_RD, &recycles_free_count, "Number of free vnodes recycled to meet vnode cache targets"); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW | CTLFLAG_STATS, &reassignbufcalls, 0, "Number of calls to reassignbuf"); static counter_u64_t deferred_inact; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact, "Number of times inactive processing was deferred"); /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_list * numvnodes * freevnodes */ static struct mtx __exclusive_cache_line vnode_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; static struct cv sync_wakeup; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "Time to delay syncing files (in seconds)"); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "Time to delay syncing directories (in seconds)"); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "Time to delay syncing metadata (in seconds)"); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "Number of times I/O speeded up (rush requests)"); #define VDBATCH_SIZE 8 struct vdbatch { u_int index; long freevnodes; struct mtx lock; struct vnode *tab[VDBATCH_SIZE]; }; DPCPU_DEFINE_STATIC(struct vdbatch, vd); static void vdbatch_dequeue(struct vnode *vp); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* Target for maximum number of vnodes. */ u_long desiredvnodes; static u_long gapvnodes; /* gap between wanted and desired */ static u_long vhiwat; /* enough extras after expansion */ static u_long vlowat; /* minimal extras before expansion */ static u_long vstir; /* nonzero to stir non-free vnodes */ static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */ static u_long vnlru_read_freevnodes(void); /* * Note that no attempt is made to sanitize these parameters. */ static int sysctl_maxvnodes(SYSCTL_HANDLER_ARGS) { u_long val; int error; val = desiredvnodes; error = sysctl_handle_long(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == desiredvnodes) return (0); mtx_lock(&vnode_list_mtx); desiredvnodes = val; wantfreevnodes = desiredvnodes / 4; vnlru_recalc(); mtx_unlock(&vnode_list_mtx); /* * XXX There is no protection against multiple threads changing * desiredvnodes at the same time. Locking above only helps vnlru and * getnewvnode. */ vfs_hash_changesize(desiredvnodes); cache_changesize(desiredvnodes); return (0); } SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_maxvnodes, "UL", "Target for maximum number of vnodes"); static int sysctl_wantfreevnodes(SYSCTL_HANDLER_ARGS) { u_long val; int error; val = wantfreevnodes; error = sysctl_handle_long(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == wantfreevnodes) return (0); mtx_lock(&vnode_list_mtx); wantfreevnodes = val; vnlru_recalc(); mtx_unlock(&vnode_list_mtx); return (0); } SYSCTL_PROC(_vfs, OID_AUTO, wantfreevnodes, CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_wantfreevnodes, "UL", "Target for minimum number of \"free\" vnodes"); SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); static int sysctl_try_reclaim_vnode(SYSCTL_HANDLER_ARGS) { struct vnode *vp; struct nameidata nd; char *buf; unsigned long ndflags; int error; if (req->newptr == NULL) return (EINVAL); if (req->newlen >= PATH_MAX) return (E2BIG); buf = malloc(PATH_MAX, M_TEMP, M_WAITOK); error = SYSCTL_IN(req, buf, req->newlen); if (error != 0) goto out; buf[req->newlen] = '\0'; ndflags = LOCKLEAF | NOFOLLOW | AUDITVNODE1 | NOCACHE | SAVENAME; NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, buf, curthread); if ((error = namei(&nd)) != 0) goto out; vp = nd.ni_vp; if (VN_IS_DOOMED(vp)) { /* * This vnode is being recycled. Return != 0 to let the caller * know that the sysctl had no effect. Return EAGAIN because a * subsequent call will likely succeed (since namei will create * a new vnode if necessary) */ error = EAGAIN; goto putvnode; } counter_u64_add(recycles_count, 1); vgone(vp); putvnode: NDFREE(&nd, 0); out: free(buf, M_TEMP); return (error); } static int sysctl_ftry_reclaim_vnode(SYSCTL_HANDLER_ARGS) { struct thread *td = curthread; struct vnode *vp; struct file *fp; int error; int fd; if (req->newptr == NULL) return (EBADF); error = sysctl_handle_int(oidp, &fd, 0, req); if (error != 0) return (error); error = getvnode(curthread, fd, &cap_fcntl_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) goto drop; counter_u64_add(recycles_count, 1); vgone(vp); VOP_UNLOCK(vp); drop: fdrop(fp, td); return (error); } SYSCTL_PROC(_debug, OID_AUTO, try_reclaim_vnode, CTLTYPE_STRING | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, sysctl_try_reclaim_vnode, "A", "Try to reclaim a vnode by its pathname"); SYSCTL_PROC(_debug, OID_AUTO, ftry_reclaim_vnode, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, sysctl_ftry_reclaim_vnode, "I", "Try to reclaim a vnode by its file descriptor"); /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ static int vnsz2log; /* * Support for the bufobj clean & dirty pctrie. */ static void * buf_trie_alloc(struct pctrie *ptree) { return uma_zalloc(buf_trie_zone, M_NOWAIT); } static void buf_trie_free(struct pctrie *ptree, void *node) { uma_zfree(buf_trie_zone, node); } PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); /* * Initialize the vnode management data structures. * * Reevaluate the following cap on the number of vnodes after the physical * memory size exceeds 512GB. In the limit, as the physical memory size * grows, the ratio of the memory size in KB to vnodes approaches 64:1. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX (512UL * 1024 * 1024 / 64) /* 8M */ #endif static MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); static struct vnode * vn_alloc_marker(struct mount *mp) { struct vnode *vp; vp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); vp->v_type = VMARKER; vp->v_mount = mp; return (vp); } static void vn_free_marker(struct vnode *vp) { MPASS(vp->v_type == VMARKER); free(vp, M_VNODE_MARKER); } /* * Initialize a vnode as it first enters the zone. */ static int vnode_init(void *mem, int size, int flags) { struct vnode *vp; vp = mem; bzero(vp, size); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems opt-in. */ lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE); /* * Initialize bufobj. */ bufobj_init(&vp->v_bufobj, vp); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Initialize rangelocks. */ rangelock_init(&vp->v_rl); vp->v_dbatchcpu = NOCPU; mtx_lock(&vnode_list_mtx); TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist); mtx_unlock(&vnode_list_mtx); return (0); } /* * Free a vnode when it is cleared from the zone. */ static void vnode_fini(void *mem, int size) { struct vnode *vp; struct bufobj *bo; vp = mem; vdbatch_dequeue(vp); mtx_lock(&vnode_list_mtx); TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); mtx_unlock(&vnode_list_mtx); rangelock_destroy(&vp->v_rl); lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); bo = &vp->v_bufobj; rw_destroy(BO_LOCKPTR(bo)); } /* * Provide the size of NFS nclnode and NFS fh for calculation of the * vnode memory consumption. The size is specified directly to * eliminate dependency on NFS-private header. * * Other filesystems may use bigger or smaller (like UFS and ZFS) * private inode data, but the NFS-based estimation is ample enough. * Still, we care about differences in the size between 64- and 32-bit * platforms. * * Namecache structure size is heuristically * sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1. */ #ifdef _LP64 #define NFS_NCLNODE_SZ (528 + 64) #define NC_SZ 148 #else #define NFS_NCLNODE_SZ (360 + 32) #define NC_SZ 92 #endif static void vntblinit(void *dummy __unused) { struct vdbatch *vd; int cpu, physvnodes, virtvnodes; u_int i; /* * Desiredvnodes is a function of the physical memory size and the * kernel's heap size. Generally speaking, it scales with the * physical memory size. The ratio of desiredvnodes to the physical * memory size is 1:16 until desiredvnodes exceeds 98,304. * Thereafter, the * marginal ratio of desiredvnodes to the physical memory size is * 1:64. However, desiredvnodes is limited by the kernel's heap * size. The memory required by desiredvnodes vnodes and vm objects * must not exceed 1/10th of the kernel's heap size. */ physvnodes = maxproc + pgtok(vm_cnt.v_page_count) / 64 + 3 * min(98304 * 16, pgtok(vm_cnt.v_page_count)) / 64; virtvnodes = vm_kmem_size / (10 * (sizeof(struct vm_object) + sizeof(struct vnode) + NC_SZ * ncsizefactor + NFS_NCLNODE_SZ)); desiredvnodes = min(physvnodes, virtvnodes); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %lu -> %lu\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_list); mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF); /* * The lock is taken to appease WITNESS. */ mtx_lock(&vnode_list_mtx); vnlru_recalc(); mtx_unlock(&vnode_list_mtx); vnode_list_free_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist); vnode_list_reclaim_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* * Preallocate enough nodes to support one-per buf so that * we can not fail an insert. reassignbuf() callers can not * tolerate the insertion failure. */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); uma_prealloc(buf_trie_zone, nbuf); vnodes_created = counter_u64_alloc(M_WAITOK); recycles_count = counter_u64_alloc(M_WAITOK); recycles_free_count = counter_u64_alloc(M_WAITOK); deferred_inact = counter_u64_alloc(M_WAITOK); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); cv_init(&sync_wakeup, "syncer"); for (i = 1; i <= sizeof(struct vnode); i <<= 1) vnsz2log++; vnsz2log--; CPU_FOREACH(cpu) { vd = DPCPU_ID_PTR((cpu), vd); bzero(vd, sizeof(*vd)); mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF); } } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Eventually, mountlist_mtx is not released on failure. * * vfs_busy() is a custom lock, it can block the caller. * vfs_busy() only sleeps if the unmount is active on the mount point. * For a mountpoint mp, vfs_busy-enforced lock is before lock of any * vnode belonging to mp. * * Lookup uses vfs_busy() to traverse mount points. * root fs var fs * / vnode lock A / vnode lock (/var) D * /var vnode lock B /log vnode lock(/var/log) E * vfs_busy lock C vfs_busy lock F * * Within each file system, the lock order is C->A->B and F->D->E. * * When traversing across mounts, the system follows that lock order: * * C->A->B * | * +->F->D->E * * The lookup() process for namei("/var") illustrates the process: * VOP_LOOKUP() obtains B while A is held * vfs_busy() obtains a shared lock on F while A and B are held * vput() releases lock on B * vput() releases lock on A * VFS_ROOT() obtains lock on D while shared lock on F is held * vfs_unbusy() releases shared lock on F * vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A. * Attempt to lock A (instead of vp_crossmp) while D is held would * violate the global order, causing deadlocks. * * dounmount() locks B while F is drained. */ int vfs_busy(struct mount *mp, int flags) { MPASS((flags & ~MBF_MASK) == 0); CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags); if (vfs_op_thread_enter(mp)) { MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0); MPASS((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0); MPASS((mp->mnt_kern_flag & MNTK_REFEXPIRE) == 0); vfs_mp_count_add_pcpu(mp, ref, 1); vfs_mp_count_add_pcpu(mp, lockref, 1); vfs_op_thread_exit(mp); if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); return (0); } MNT_ILOCK(mp); vfs_assert_mount_counters(mp); MNT_REF(mp); /* * If mount point is currently being unmounted, sleep until the * mount point fate is decided. If thread doing the unmounting fails, * it will clear MNTK_UNMOUNT flag before waking us up, indicating * that this mount point has survived the unmount attempt and vfs_busy * should retry. Otherwise the unmounter thread will set MNTK_REFEXPIRE * flag in addition to MNTK_UNMOUNT, indicating that mount point is * about to be really destroyed. vfs_busy needs to release its * reference on the mount point in this case and return with ENOENT, * telling the caller that mount mount it tried to busy is no longer * valid. */ while (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) { MNT_REL(mp); MNT_IUNLOCK(mp); CTR1(KTR_VFS, "%s: failed busying before sleeping", __func__); return (ENOENT); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_kern_flag |= MNTK_MWAIT; msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0); if (flags & MBF_MNTLSTLOCK) mtx_lock(&mountlist_mtx); MNT_ILOCK(mp); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_lockref++; MNT_IUNLOCK(mp); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp) { int c; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); if (vfs_op_thread_enter(mp)) { MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0); vfs_mp_count_sub_pcpu(mp, lockref, 1); vfs_mp_count_sub_pcpu(mp, ref, 1); vfs_op_thread_exit(mp); return; } MNT_ILOCK(mp); vfs_assert_mount_counters(mp); MNT_REL(mp); c = --mp->mnt_lockref; if (mp->mnt_vfs_ops == 0) { MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0); MNT_IUNLOCK(mp); return; } if (c < 0) vfs_dump_mount_counters(mp); if (c == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) { MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT); CTR1(KTR_VFS, "%s: waking up waiters", __func__); mp->mnt_kern_flag &= ~MNTK_DRAINING; wakeup(&mp->mnt_lockref); } MNT_IUNLOCK(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); return ((struct mount *) 0); } /* * Lookup a mount point by filesystem identifier, busying it before * returning. * * To avoid congestion on mountlist_mtx, implement simple direct-mapped * cache for popular filesystem identifiers. The cache is lockess, using * the fact that struct mount's are never freed. In worst case we may * get pointer to unmounted or even different filesystem, so we have to * check what we got, and go slow way if so. */ struct mount * vfs_busyfs(fsid_t *fsid) { #define FSID_CACHE_SIZE 256 typedef struct mount * volatile vmp_t; static vmp_t cache[FSID_CACHE_SIZE]; struct mount *mp; int error; uint32_t hash; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); hash = fsid->val[0] ^ fsid->val[1]; hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1); mp = cache[hash]; if (mp == NULL || mp->mnt_stat.f_fsid.val[0] != fsid->val[0] || mp->mnt_stat.f_fsid.val[1] != fsid->val[1]) goto slow; if (vfs_busy(mp, 0) != 0) { cache[hash] = NULL; goto slow; } if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); else vfs_unbusy(mp); slow: mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error) { cache[hash] = NULL; mtx_unlock(&mountlist_mtx); return (NULL); } cache[hash] = mp; return (mp); } } CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; if (jailed(td->td_ucred)) { /* * If the jail of the calling thread lacks permission for * this type of file system, deny immediately. */ if (!prison_allow(td->td_ucred, mp->mnt_vfc->vfc_prison_flag)) return (EPERM); /* * If the file system was mounted outside the jail of the * calling thread, deny immediately. */ if (prison_check(td->td_ucred, mp->mnt_cred) != 0) return (EPERM); } /* * If file system supports delegated administration, we don't check * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified * by the file system itself. * If this is not the user that did original mount, we check for * the PRIV_VFS_MOUNT_OWNER privilege. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) && mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static uint16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_USEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, "File timestamp precision (0: seconds, " "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to us, " "3+: sec + ns (max. precision))"); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Try to reduce the total number of vnodes. * * This routine (and its user) are buggy in at least the following ways: * - all parameters were picked years ago when RAM sizes were significantly * smaller * - it can pick vnodes based on pages used by the vm object, but filesystems * like ZFS don't use it making the pick broken * - since ZFS has its own aging policy it gets partially combated by this one * - a dedicated method should be provided for filesystems to let them decide * whether the vnode should be recycled * * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desirable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. * * @param reclaim_nc_src Only reclaim directories with outgoing namecache * entries if this argument is strue * @param trigger Only reclaim vnodes with fewer than this many resident * pages. * @param target How many vnodes to reclaim. * @return The number of vnodes that were reclaimed. */ static int vlrureclaim(bool reclaim_nc_src, int trigger, u_long target) { struct vnode *vp, *mvp; struct mount *mp; u_long done; bool retried; mtx_assert(&vnode_list_mtx, MA_OWNED); retried = false; done = 0; mvp = vnode_list_reclaim_marker; restart: vp = mvp; while (done < target) { vp = TAILQ_NEXT(vp, v_vnodelist); if (__predict_false(vp == NULL)) break; if (__predict_false(vp->v_type == VMARKER)) continue; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. * Also skip free vnodes. We are trying to make space * to expand the free list, not reduce it. */ if (vp->v_usecount > 0 || vp->v_holdcnt == 0 || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src))) goto next_iter; if (vp->v_type == VBAD || vp->v_type == VNON) goto next_iter; if (!VI_TRYLOCK(vp)) goto next_iter; if (vp->v_usecount > 0 || vp->v_holdcnt == 0 || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || vp->v_type == VBAD || vp->v_type == VNON || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } vholdl(vp); VI_UNLOCK(vp); TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); mtx_unlock(&vnode_list_mtx); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); goto next_iter_unlocked; } if (VOP_LOCK(vp, LK_EXCLUSIVE|LK_NOWAIT) != 0) { vdrop(vp); vn_finished_write(mp); goto next_iter_unlocked; } VI_LOCK(vp); if (vp->v_usecount > 0 || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp); vdropl(vp); vn_finished_write(mp); goto next_iter_unlocked; } counter_u64_add(recycles_count, 1); vgonel(vp); VOP_UNLOCK(vp); vdropl(vp); vn_finished_write(mp); done++; next_iter_unlocked: if (should_yield()) kern_yield(PRI_USER); mtx_lock(&vnode_list_mtx); goto restart; next_iter: MPASS(vp->v_type != VMARKER); if (!should_yield()) continue; TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); mtx_unlock(&vnode_list_mtx); kern_yield(PRI_USER); mtx_lock(&vnode_list_mtx); goto restart; } if (done == 0 && !retried) { TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist); retried = true; goto restart; } return (done); } static int max_vnlru_free = 10000; /* limit on vnode free requests per call */ SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free, 0, "limit on vnode free requests per call to the vnlru_free routine"); /* * Attempt to reduce the free list by the requested amount. */ static int vnlru_free_locked(int count, struct vfsops *mnt_op) { struct vnode *vp, *mvp; struct mount *mp; int ocount; mtx_assert(&vnode_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; ocount = count; mvp = vnode_list_free_marker; restart: vp = mvp; while (count > 0) { vp = TAILQ_NEXT(vp, v_vnodelist); if (__predict_false(vp == NULL)) { TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist); break; } if (__predict_false(vp->v_type == VMARKER)) continue; /* * Don't recycle if our vnode is from different type * of mount point. Note that mp is type-safe, the * check does not reach unmapped address even if * vnode is reclaimed. * Don't recycle if we can't get the interlock without * blocking. */ if (vp->v_holdcnt > 0 || (mnt_op != NULL && (mp = vp->v_mount) != NULL && mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) { continue; } TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) { VI_UNLOCK(vp); continue; } vholdl(vp); count--; mtx_unlock(&vnode_list_mtx); VI_UNLOCK(vp); vtryrecycle(vp); vdrop(vp); mtx_lock(&vnode_list_mtx); goto restart; } return (ocount - count); } void vnlru_free(int count, struct vfsops *mnt_op) { mtx_lock(&vnode_list_mtx); vnlru_free_locked(count, mnt_op); mtx_unlock(&vnode_list_mtx); } static void vnlru_recalc(void) { mtx_assert(&vnode_list_mtx, MA_OWNED); gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ vlowat = vhiwat / 2; } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; /* * The main freevnodes counter is only updated when threads requeue their vnode * batches. CPUs are conditionally walked to compute a more accurate total. * * Limit how much of a slop are we willing to tolerate. Note: the actual value * at any given moment can still exceed slop, but it should not be by significant * margin in practice. */ #define VNLRU_FREEVNODES_SLOP 128 static u_long vnlru_read_freevnodes(void) { struct vdbatch *vd; long slop; int cpu; mtx_assert(&vnode_list_mtx, MA_OWNED); if (freevnodes > freevnodes_old) slop = freevnodes - freevnodes_old; else slop = freevnodes_old - freevnodes; if (slop < VNLRU_FREEVNODES_SLOP) return (freevnodes >= 0 ? freevnodes : 0); freevnodes_old = freevnodes; CPU_FOREACH(cpu) { vd = DPCPU_ID_PTR((cpu), vd); freevnodes_old += vd->freevnodes; } return (freevnodes_old >= 0 ? freevnodes_old : 0); } static bool vnlru_under(u_long rnumvnodes, u_long limit) { u_long rfreevnodes, space; if (__predict_false(rnumvnodes > desiredvnodes)) return (true); space = desiredvnodes - rnumvnodes; if (space < limit) { rfreevnodes = vnlru_read_freevnodes(); if (rfreevnodes > wantfreevnodes) space += rfreevnodes - wantfreevnodes; } return (space < limit); } static bool vnlru_under_unlocked(u_long rnumvnodes, u_long limit) { long rfreevnodes, space; if (__predict_false(rnumvnodes > desiredvnodes)) return (true); space = desiredvnodes - rnumvnodes; if (space < limit) { rfreevnodes = atomic_load_long(&freevnodes); if (rfreevnodes > wantfreevnodes) space += rfreevnodes - wantfreevnodes; } return (space < limit); } static void vnlru_kick(void) { mtx_assert(&vnode_list_mtx, MA_OWNED); if (vnlruproc_sig == 0) { vnlruproc_sig = 1; wakeup(vnlruproc); } } static void vnlru_proc(void) { u_long rnumvnodes, rfreevnodes, target; unsigned long onumvnodes; int done, force, trigger, usevnodes; bool reclaim_nc_src, want_reread; EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, SHUTDOWN_PRI_FIRST); force = 0; want_reread = false; for (;;) { kproc_suspend_check(vnlruproc); mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); if (want_reread) { force = vnlru_under(numvnodes, vhiwat) ? 1 : 0; want_reread = false; } /* * If numvnodes is too large (due to desiredvnodes being * adjusted using its sysctl, or emergency growth), first * try to reduce it by discarding from the free list. */ if (rnumvnodes > desiredvnodes) { vnlru_free_locked(rnumvnodes - desiredvnodes, NULL); rnumvnodes = atomic_load_long(&numvnodes); } /* * Sleep if the vnode cache is in a good state. This is * when it is not over-full and has space for about a 4% * or 9% expansion (by growing its size or inexcessively * reducing its free list). Otherwise, try to reclaim * space for a 10% expansion. */ if (vstir && force == 0) { force = 1; vstir = 0; } if (force == 0 && !vnlru_under(rnumvnodes, vlowat)) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } rfreevnodes = vnlru_read_freevnodes(); onumvnodes = rnumvnodes; /* * Calculate parameters for recycling. These are the same * throughout the loop to give some semblance of fairness. * The trigger point is to avoid recycling vnodes with lots * of resident pages. We aren't trying to free memory; we * are trying to recycle or at least free vnodes. */ if (rnumvnodes <= desiredvnodes) usevnodes = rnumvnodes - rfreevnodes; else usevnodes = rnumvnodes; if (usevnodes <= 0) usevnodes = 1; /* * The trigger value is is chosen to give a conservatively * large value to ensure that it alone doesn't prevent * making progress. The value can easily be so large that * it is effectively infinite in some congested and * misconfigured cases, and this is necessary. Normally * it is about 8 to 100 (pages), which is quite large. */ trigger = vm_cnt.v_page_count * 2 / usevnodes; if (force < 2) trigger = vsmalltrigger; reclaim_nc_src = force >= 3; target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1); target = target / 10 + 1; done = vlrureclaim(reclaim_nc_src, trigger, target); mtx_unlock(&vnode_list_mtx); if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes) uma_reclaim(UMA_RECLAIM_DRAIN); if (done == 0) { if (force == 0 || force == 1) { force = 2; continue; } if (force == 2) { force = 3; continue; } want_reread = true; force = 0; vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); } else { want_reread = true; kern_yield(PRI_USER); } } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp); /* * Routines having to do with the management of the vnode table. */ /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct mount *vnmp; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { CTR2(KTR_VFS, "%s: impossible to recycle, vp %p lock is already held", __func__, vp); return (EWOULDBLOCK); } /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp); CTR2(KTR_VFS, "%s: impossible to recycle, cannot start the write for %p", __func__, vp); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp); VI_UNLOCK(vp); vn_finished_write(vnmp); CTR2(KTR_VFS, "%s: impossible to recycle, %p is already referenced", __func__, vp); return (EBUSY); } if (!VN_IS_DOOMED(vp)) { counter_u64_add(recycles_free_count, 1); vgonel(vp); } VOP_UNLOCK(vp); VI_UNLOCK(vp); vn_finished_write(vnmp); return (0); } /* * Allocate a new vnode. * * The operation never returns an error. Returning an error was disabled * in r145385 (dated 2005) with the following comment: * * XXX Not all VFS_VGET/ffs_vget callers check returns. * * Given the age of this commit (almost 15 years at the time of writing this * comment) restoring the ability to fail requires a significant audit of * all codepaths. * * The routine can try to free a vnode or stall for up to 1 second waiting for * vnlru to clear things up, but ultimately always performs a M_WAITOK allocation. */ static u_long vn_alloc_cyclecount; static struct vnode * __noinline vn_alloc_hard(struct mount *mp) { u_long rnumvnodes, rfreevnodes; mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); if (rnumvnodes + 1 < desiredvnodes) { vn_alloc_cyclecount = 0; goto alloc; } rfreevnodes = vnlru_read_freevnodes(); if (vn_alloc_cyclecount++ >= rfreevnodes) { vn_alloc_cyclecount = 0; vstir = 1; } /* * Grow the vnode cache if it will not be above its target max * after growing. Otherwise, if the free list is nonempty, try * to reclaim 1 item from it before growing the cache (possibly * above its target max if the reclamation failed or is delayed). * Otherwise, wait for some space. In all cases, schedule * vnlru_proc() if we are getting short of space. The watermarks * should be chosen so that we never wait or even reclaim from * the free list to below its target minimum. */ if (vnlru_free_locked(1, NULL) > 0) goto alloc; if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) { /* * Wait for space for a new vnode. */ vnlru_kick(); msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz); if (atomic_load_long(&numvnodes) + 1 > desiredvnodes && vnlru_read_freevnodes() > 1) vnlru_free_locked(1, NULL); } alloc: rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1; if (vnlru_under(rnumvnodes, vlowat)) vnlru_kick(); mtx_unlock(&vnode_list_mtx); return (uma_zalloc(vnode_zone, M_WAITOK)); } static struct vnode * vn_alloc(struct mount *mp) { u_long rnumvnodes; if (__predict_false(vn_alloc_cyclecount != 0)) return (vn_alloc_hard(mp)); rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1; if (__predict_false(vnlru_under_unlocked(rnumvnodes, vlowat))) { atomic_subtract_long(&numvnodes, 1); return (vn_alloc_hard(mp)); } return (uma_zalloc(vnode_zone, M_WAITOK)); } static void vn_free(struct vnode *vp) { atomic_subtract_long(&numvnodes, 1); uma_zfree(vnode_zone, vp); } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp; struct thread *td; struct lock_object *lo; CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); KASSERT(vops->registered, ("%s: not registered vector op %p\n", __func__, vops)); td = curthread; if (td->td_vp_reserved != NULL) { vp = td->td_vp_reserved; td->td_vp_reserved = NULL; } else { vp = vn_alloc(mp); } counter_u64_add(vnodes_created, 1); /* * Locks are given the generic name "vnode" when created. * Follow the historic practice of using the filesystem * name when they allocated, e.g., "zfs", "ufs", "nfs, etc. * * Locks live in a witness group keyed on their name. Thus, * when a lock is renamed, it must also move from the witness * group of its old name to the witness group of its new name. * * The change only needs to be made when the vnode moves * from one filesystem type to another. We ensure that each * filesystem use a single static name pointer for its tag so * that we can compare pointers rather than doing a strcmp(). */ lo = &vp->v_vnlock->lock_object; #ifdef WITNESS if (lo->lo_name != tag) { #endif lo->lo_name = tag; #ifdef WITNESS WITNESS_DESTROY(lo); WITNESS_INIT(lo, tag); } #endif /* * By default, don't allow shared locks unless filesystems opt-in. */ vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE; /* * Finalize various vnode identity bits. */ KASSERT(vp->v_object == NULL, ("stale v_object %p", vp)); KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp)); KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp)); vp->v_type = VNON; vp->v_op = vops; v_init_counters(vp); vp->v_bufobj.bo_ops = &buf_ops_bio; #ifdef DIAGNOSTIC if (mp == NULL && vops != &dead_vnodeops) printf("NULL mp in getnewvnode(9), tag %s\n", tag); #endif #ifdef MAC mac_vnode_init(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_vnode_associate_singlelabel(mp, vp); #endif if (mp != NULL) { vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } /* * For the filesystems which do not use vfs_hash_insert(), * still initialize v_hash to have vfs_hash_index() useful. * E.g., nullfs uses vfs_hash_index() on the lower vnode for * its own hashing. */ vp->v_hash = (uintptr_t)vp >> vnsz2log; *vpp = vp; return (0); } void getnewvnode_reserve(void) { struct thread *td; td = curthread; MPASS(td->td_vp_reserved == NULL); td->td_vp_reserved = vn_alloc(NULL); } void getnewvnode_drop_reserve(void) { struct thread *td; td = curthread; if (td->td_vp_reserved != NULL) { vn_free(td->td_vp_reserved); td->td_vp_reserved = NULL; } } static void freevnode(struct vnode *vp) { struct bufobj *bo; /* * The vnode has been marked for destruction, so free it. * * The vnode will be returned to the zone where it will * normally remain until it is needed for another vnode. We * need to cleanup (or verify that the cleanup has already * been done) any residual data left from its current use * so as not to contaminate the freshly allocated vnode. */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); bo = &vp->v_bufobj; VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, ("clean blk trie not empty")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp, ("dirty blk trie not empty")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp, ("Dangling rangelock waiters")); VI_UNLOCK(vp); #ifdef MAC mac_vnode_destroy(vp); #endif if (vp->v_pollinfo != NULL) { destroy_vpollinfo(vp->v_pollinfo); vp->v_pollinfo = NULL; } #ifdef INVARIANTS /* XXX Elsewhere we detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif vp->v_mountedhere = NULL; vp->v_unpcb = NULL; vp->v_rdev = NULL; vp->v_fifoinfo = NULL; vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; vp->v_irflag = 0; vp->v_iflag = 0; vp->v_vflag = 0; bo->bo_flag = 0; vn_free(vp); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); VI_LOCK(vp); if (vp->v_mflag & VMP_LAZYLIST) { mtx_lock(&mp->mnt_listmtx); if (vp->v_mflag & VMP_LAZYLIST) { vp->v_mflag &= ~VMP_LAZYLIST; TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist); mp->mnt_lazyvnodelistsize--; } mtx_unlock(&mp->mnt_listmtx); } vp->v_mount = NULL; VI_UNLOCK(vp); VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); /* * We acquire the vnode interlock early to ensure that the * vnode cannot be recycled by another process releasing a * holdcnt on it before we get it on both the vnode list * and the active vnode list. The mount mutex protects only * manipulation of the vnode list and the vnode freelist * mutex protects only manipulation of the active vnode list. * Hence the need to hold the vnode interlock throughout. */ MNT_ILOCK(mp); VI_LOCK(vp); if (((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 && ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || mp->mnt_nvnodelistsize == 0)) && (vp->v_vflag & VV_FORCEINSMQ) == 0) { VI_UNLOCK(vp); MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; VI_UNLOCK(vp); MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0 && !(flags & V_CLEANONLY)) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); if ((flags & V_VMIO) == 0 && bo->bo_object != NULL) { BO_UNLOCK(bo); vm_object_pip_wait_unlocked(bo->bo_object, "bovlbx"); BO_LOCK(bo); } } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? OBJPR_CLEANONLY : 0); VM_OBJECT_WUNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO | V_ALLOWCLEAN)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0 && bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: flush dirty failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo) { CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); if (vp->v_object != NULL && vp->v_object->handle != vp) return (0); return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_WLOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { /* * If we are flushing both V_NORMAL and V_ALT buffers then * do not skip any buffers. If we are flushing only V_NORMAL * buffers then skip buffers marked as BX_ALTDATA. If we are * flushing only V_ALT buffers then skip buffers not marked * as BX_ALTDATA. */ if (((flags & (V_NORMAL | V_ALT)) != (V_NORMAL | V_ALT)) && (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA) != 0) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0))) { continue; } if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { bremfree(bp); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); if (nbp == NULL) break; nbp = gbincore(bo, lblkno); if (nbp == NULL || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags) break; /* nbp invalid */ } return (retval); } int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn) { struct buf *bp; int error; daddr_t lblkno; ASSERT_BO_LOCKED(bo); for (lblkno = startn;;) { again: bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno); if (bp == NULL || bp->b_lblkno >= endn || bp->b_lblkno < startn) break; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0); if (error != 0) { BO_RLOCK(bo); if (error == ENOLCK) goto again; return (error); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); lblkno = bp->b_lblkno + 1; if ((bp->b_flags & B_MANAGED) == 0) bremfree(bp); bp->b_flags |= B_RELBUF; /* * In the VMIO case, use the B_NOREUSE flag to hint that the * pages backing each buffer in the range are unlikely to be * reused. Dirty buffers will have the hint applied once * they've been written. */ if ((bp->b_flags & B_VMIO) != 0) bp->b_flags |= B_NOREUSE; brelse(bp); BO_RLOCK(bo); } return (0); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, off_t length, int blksize) { struct buf *bp, *nbp; struct bufobj *bo; daddr_t startlbn; CTR4(KTR_VFS, "%s: vp %p with block %d:%ju", __func__, vp, blksize, (uintmax_t)length); /* * Round up to the *next* lbn. */ startlbn = howmany(length, blksize); ASSERT_VOP_LOCKED(vp, "vtruncbuf"); bo = &vp->v_bufobj; restart_unlocked: BO_LOCK(bo); while (v_inval_buf_range_locked(vp, bo, startlbn, INT64_MAX) == EAGAIN) ; if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart_unlocked; VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); bremfree(bp); bawrite(bp); BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); } /* * Invalidate the cached pages of a file's buffer within the range of block * numbers [startlbn, endlbn). */ void v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn, int blksize) { struct bufobj *bo; off_t start, end; ASSERT_VOP_LOCKED(vp, "v_inval_buf_range"); start = blksize * startlbn; end = blksize * endlbn; bo = &vp->v_bufobj; BO_LOCK(bo); MPASS(blksize == bo->bo_bsize); while (v_inval_buf_range_locked(vp, bo, startlbn, endlbn) == EAGAIN) ; BO_UNLOCK(bo); vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end + PAGE_SIZE - 1)); } static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, daddr_t startlbn, daddr_t endlbn) { struct buf *bp, *nbp; bool anyfreed; ASSERT_VOP_LOCKED(vp, "v_inval_buf_range_locked"); ASSERT_BO_LOCKED(bo); do { anyfreed = false; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); return (EAGAIN); } bremfree(bp); bp->b_flags |= B_INVAL | B_RELBUF; bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = true; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || nbp->b_vp != vp || (nbp->b_flags & B_DELWRI) != 0)) return (EAGAIN); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); return (EAGAIN); } bremfree(bp); bp->b_flags |= B_INVAL | B_RELBUF; bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = true; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) return (EAGAIN); } } while (anyfreed); return (0); } static void buf_vlist_remove(struct buf *bp) { struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_WLOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno); TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct bufv *bv; struct buf *n; int error; ASSERT_BO_WLOCKED(bo); KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0, ("dead bo %p", bo)); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; /* * Keep the list ordered. Optimize empty list insertion. Assume * we tend to grow at the tail so lookup_le should usually be cheaper * than _ge. */ if (bv->bv_cnt == 0 || bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno) TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL) TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs); else TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs); error = BUF_PCTRIE_INSERT(&bv->bv_root, bp); if (error) panic("buf_vlist_add: Preallocated nodes insufficient."); bv->bv_cnt++; } /* * Look up a buffer using the buffer tries. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); if (bp != NULL) return (bp); return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { struct bufobj *bo; bo = &vp->v_bufobj; ASSERT_BO_WLOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); vhold(vp); bp->b_vp = vp; bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, bo, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_vp = NULL; bp->b_bufobj = NULL; BO_UNLOCK(bo); vdrop(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int slot; ASSERT_BO_WLOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_MPSAFE| CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); static int sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; *bo = LIST_FIRST(slp); if (*bo == NULL) return (0); vp = bo2vnode(*bo); if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) return (1); /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); mtx_lock(&sync_mtx); return (*bo == LIST_FIRST(slp)); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp); vn_finished_write(mp); BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } BO_UNLOCK(*bo); vdrop(vp); mtx_lock(&sync_mtx); return (0); } static int first_printf = 1; /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; int last_work_seen; int net_worklist_len; int syncer_final_iter; int error; last_work_seen = 0; syncer_final_iter = 0; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kproc_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining... "); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } if (first_printf == 0) { /* * Drop the sync mutex, because some watchdog * drivers need to sleep while patting */ mtx_unlock(&sync_mtx); wdog_kern_pat(WD_LASTVAL); mtx_lock(&sync_mtx); } } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING || time_uptime == starttime) { thread_lock(td); sched_prio(td, PPAUSE); thread_unlock(td); } if (syncer_state != SYNCER_RUNNING) cv_timedwait(&sync_wakeup, &sync_mtx, hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) cv_timedwait(&sync_wakeup, &sync_mtx, hz); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { int ret = 0; mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { if (howto & RB_NOSYNC) return; mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_shutdown(arg, howto); } void syncer_suspend(void) { syncer_shutdown(updateproc, 0); } void syncer_resume(void) { mtx_lock(&sync_mtx); first_printf = 1; syncer_state = SYNCER_RUNNING; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_resume(updateproc); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif BO_UNLOCK(bo); } static void v_init_counters(struct vnode *vp) { VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0, vp, ("%s called for an initialized vnode", __FUNCTION__)); ASSERT_VI_UNLOCKED(vp, __FUNCTION__); refcount_init(&vp->v_holdcnt, 1); refcount_init(&vp->v_usecount, 1); } /* * Increment si_usecount of the associated device, if any. */ static void v_incr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement si_usecount of the associated device, if any. */ static void v_decr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. VIRF_DOOMED is set if the vnode * is being destroyed. Only callers who specify LK_RETRY will * see doomed vnodes. If inactive processing was delayed in * vput try to do it here. * * usecount is manipulated using atomics without holding any locks. * * holdcnt can be manipulated using atomics without holding any locks, * except when transitioning 1<->0, in which case the interlock is held. */ enum vgetstate vget_prep(struct vnode *vp) { enum vgetstate vs; if (refcount_acquire_if_not_zero(&vp->v_usecount)) { vs = VGET_USECOUNT; } else { vhold(vp); vs = VGET_HOLDCNT; } return (vs); } int vget(struct vnode *vp, int flags, struct thread *td) { enum vgetstate vs; MPASS(td == curthread); vs = vget_prep(vp); return (vget_finish(vp, flags, vs)); } static int __noinline vget_finish_vchr(struct vnode *vp) { VNASSERT(vp->v_type == VCHR, vp, ("type != VCHR)")); /* * See the comment in vget_finish before usecount bump. */ if (refcount_acquire_if_not_zero(&vp->v_usecount)) { #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_holdcnt, -1); VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old)); #else refcount_release(&vp->v_holdcnt); #endif return (0); } VI_LOCK(vp); if (refcount_acquire_if_not_zero(&vp->v_usecount)) { #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_holdcnt, -1); VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); #else refcount_release(&vp->v_holdcnt); #endif VI_UNLOCK(vp); return (0); } v_incr_devcount(vp); refcount_acquire(&vp->v_usecount); VI_UNLOCK(vp); return (0); } int vget_finish(struct vnode *vp, int flags, enum vgetstate vs) { int error, old; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("%s: invalid lock operation", __func__)); if ((flags & LK_INTERLOCK) != 0) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__)); if (vs == VGET_USECOUNT) { VNASSERT(vp->v_usecount > 0, vp, ("%s: vnode without usecount when VGET_USECOUNT was passed", __func__)); } - if ((error = vn_lock(vp, flags)) != 0) { + error = vn_lock(vp, flags); + if (__predict_false(error != 0)) { if (vs == VGET_USECOUNT) vrele(vp); else vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } if (vs == VGET_USECOUNT) { return (0); } if (__predict_false(vp->v_type == VCHR)) return (vget_finish_vchr(vp)); /* * We hold the vnode. If the usecount is 0 it will be utilized to keep * the vnode around. Otherwise someone else lended their hold count and * we have to drop ours. */ old = atomic_fetchadd_int(&vp->v_usecount, 1); VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old)); if (old != 0) { #ifdef INVARIANTS old = atomic_fetchadd_int(&vp->v_holdcnt, -1); VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); #else refcount_release(&vp->v_holdcnt); #endif } return (0); } /* * Increase the reference (use) and hold count of a vnode. * This will also remove the vnode from the free list if it is presently free. */ static void __noinline vref_vchr(struct vnode *vp, bool interlock) { /* * See the comment in vget_finish before usecount bump. */ if (!interlock) { if (refcount_acquire_if_not_zero(&vp->v_usecount)) { VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: active vnode not held", __func__)); return; } VI_LOCK(vp); /* * By the time we get here the vnode might have been doomed, at * which point the 0->1 use count transition is no longer * protected by the interlock. Since it can't bounce back to * VCHR and requires vref semantics, punt it back */ if (__predict_false(vp->v_type == VBAD)) { VI_UNLOCK(vp); vref(vp); return; } } VNASSERT(vp->v_type == VCHR, vp, ("type != VCHR)")); if (refcount_acquire_if_not_zero(&vp->v_usecount)) { VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: active vnode not held", __func__)); if (!interlock) VI_UNLOCK(vp); return; } vhold(vp); v_incr_devcount(vp); refcount_acquire(&vp->v_usecount); if (!interlock) VI_UNLOCK(vp); return; } void vref(struct vnode *vp) { int old; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (__predict_false(vp->v_type == VCHR)) { vref_vchr(vp, false); return; } if (refcount_acquire_if_not_zero(&vp->v_usecount)) { VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: active vnode not held", __func__)); return; } vhold(vp); /* * See the comment in vget_finish. */ old = atomic_fetchadd_int(&vp->v_usecount, 1); VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old)); if (old != 0) { #ifdef INVARIANTS old = atomic_fetchadd_int(&vp->v_holdcnt, -1); VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); #else refcount_release(&vp->v_holdcnt); #endif } } void vrefl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (__predict_false(vp->v_type == VCHR)) { vref_vchr(vp, true); return; } vref(vp); } void vrefact(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_usecount, 1); VNASSERT(old > 0, vp, ("%s: wrong use count %d", __func__, old)); #else refcount_acquire(&vp->v_usecount); #endif } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism is used to * stop other processes from gaining references to the vnode. This may be the * case if the caller holds the only reference. This is also useful when stale * data is acceptable as race conditions may be accounted for by some other * means. */ int vrefcnt(struct vnode *vp) { return (vp->v_usecount); } void vlazy(struct vnode *vp) { struct mount *mp; VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__)); if ((vp->v_mflag & VMP_LAZYLIST) != 0) return; mp = vp->v_mount; mtx_lock(&mp->mnt_listmtx); if ((vp->v_mflag & VMP_LAZYLIST) == 0) { vp->v_mflag |= VMP_LAZYLIST; TAILQ_INSERT_TAIL(&mp->mnt_lazyvnodelist, vp, v_lazylist); mp->mnt_lazyvnodelistsize++; } mtx_unlock(&mp->mnt_listmtx); } static void vdefer_inactive(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode without hold count", __func__)); if (VN_IS_DOOMED(vp)) { vdropl(vp); return; } if (vp->v_iflag & VI_DEFINACT) { VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count")); vdropl(vp); return; } if (vp->v_usecount > 0) { vp->v_iflag &= ~VI_OWEINACT; vdropl(vp); return; } vlazy(vp); vp->v_iflag |= VI_DEFINACT; VI_UNLOCK(vp); counter_u64_add(deferred_inact, 1); } static void vdefer_inactive_unlocked(struct vnode *vp) { VI_LOCK(vp); if ((vp->v_iflag & VI_OWEINACT) == 0) { vdropl(vp); return; } vdefer_inactive(vp); } enum vputx_op { VPUTX_VRELE, VPUTX_VPUT, VPUTX_VUNREF }; /* * Decrement the use and hold counts for a vnode. * * See an explanation near vget() as to why atomic operation is safe. * * XXX Some filesystems pass in an exclusively locked vnode and strongly depend * on the lock being held all the way until VOP_INACTIVE. This in particular * happens with UFS which adds half-constructed vnodes to the hash, where they * can be found by other code. */ static void vputx(struct vnode *vp, enum vputx_op func) { int error; KASSERT(vp != NULL, ("vputx: null vp")); if (func == VPUTX_VUNREF) ASSERT_VOP_LOCKED(vp, "vunref"); else if (func == VPUTX_VPUT) ASSERT_VOP_LOCKED(vp, "vput"); ASSERT_VI_UNLOCKED(vp, __func__); VNASSERT(vp->v_holdcnt > 0 && vp->v_usecount > 0, vp, ("%s: wrong ref counts", __func__)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. * * If we release the last usecount we take ownership of the hold * count which provides liveness of the vnode, in which case we * have to vdrop. */ if (!refcount_release(&vp->v_usecount)) { if (func == VPUTX_VPUT) VOP_UNLOCK(vp); return; } VI_LOCK(vp); v_decr_devcount(vp); /* * By the time we got here someone else might have transitioned * the count back to > 0. */ if (vp->v_usecount > 0 || vp->v_iflag & VI_DOINGINACT) goto out; /* * Check if the fs wants to perform inactive processing. Note we * may be only holding the interlock, in which case it is possible * someone else called vgone on the vnode and ->v_data is now NULL. * Since vgone performs inactive on its own there is nothing to do * here but to drop our hold count. */ if (__predict_false(VN_IS_DOOMED(vp)) || VOP_NEED_INACTIVE(vp) == 0) goto out; /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; switch (func) { case VPUTX_VRELE: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); VI_LOCK(vp); break; case VPUTX_VPUT: error = 0; if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | LK_NOWAIT); VI_LOCK(vp); } break; case VPUTX_VUNREF: error = 0; if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK); VI_LOCK(vp); } break; } if (error == 0) { vinactive(vp); if (func != VPUTX_VUNREF) VOP_UNLOCK(vp); vdropl(vp); } else { vdefer_inactive(vp); } return; out: if (func == VPUTX_VPUT) VOP_UNLOCK(vp); vdropl(vp); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { vputx(vp, VPUTX_VRELE); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and * re-aquiring the lock (as vrele() acquires the lock internally.) */ void vput(struct vnode *vp) { vputx(vp, VPUTX_VPUT); } /* * Release an exclusively locked vnode. Do not unlock the vnode lock. */ void vunref(struct vnode *vp) { vputx(vp, VPUTX_VUNREF); } void vhold(struct vnode *vp) { struct vdbatch *vd; int old; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); old = atomic_fetchadd_int(&vp->v_holdcnt, 1); VNASSERT(old >= 0, vp, ("%s: wrong hold count %d", __func__, old)); if (old != 0) return; critical_enter(); vd = DPCPU_PTR(vd); vd->freevnodes--; critical_exit(); } void vholdl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vhold(vp); } void vholdnz(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_holdcnt, 1); VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old)); #else atomic_add_int(&vp->v_holdcnt, 1); #endif } static void __noinline vdbatch_process(struct vdbatch *vd) { struct vnode *vp; int i; mtx_assert(&vd->lock, MA_OWNED); MPASS(curthread->td_pinned > 0); MPASS(vd->index == VDBATCH_SIZE); mtx_lock(&vnode_list_mtx); critical_enter(); freevnodes += vd->freevnodes; for (i = 0; i < VDBATCH_SIZE; i++) { vp = vd->tab[i]; TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); MPASS(vp->v_dbatchcpu != NOCPU); vp->v_dbatchcpu = NOCPU; } mtx_unlock(&vnode_list_mtx); - critical_exit(); vd->freevnodes = 0; bzero(vd->tab, sizeof(vd->tab)); vd->index = 0; + critical_exit(); } static void vdbatch_enqueue(struct vnode *vp) { struct vdbatch *vd; ASSERT_VI_LOCKED(vp, __func__); VNASSERT(!VN_IS_DOOMED(vp), vp, ("%s: deferring requeue of a doomed vnode", __func__)); critical_enter(); vd = DPCPU_PTR(vd); vd->freevnodes++; if (vp->v_dbatchcpu != NOCPU) { VI_UNLOCK(vp); critical_exit(); return; } sched_pin(); critical_exit(); mtx_lock(&vd->lock); MPASS(vd->index < VDBATCH_SIZE); MPASS(vd->tab[vd->index] == NULL); /* * A hack: we depend on being pinned so that we know what to put in * ->v_dbatchcpu. */ vp->v_dbatchcpu = curcpu; vd->tab[vd->index] = vp; vd->index++; VI_UNLOCK(vp); if (vd->index == VDBATCH_SIZE) vdbatch_process(vd); mtx_unlock(&vd->lock); sched_unpin(); } /* * This routine must only be called for vnodes which are about to be * deallocated. Supporting dequeue for arbitrary vndoes would require * validating that the locked batch matches. */ static void vdbatch_dequeue(struct vnode *vp) { struct vdbatch *vd; int i; short cpu; VNASSERT(vp->v_type == VBAD || vp->v_type == VNON, vp, ("%s: called for a used vnode\n", __func__)); cpu = vp->v_dbatchcpu; if (cpu == NOCPU) return; vd = DPCPU_ID_PTR(cpu, vd); mtx_lock(&vd->lock); for (i = 0; i < vd->index; i++) { if (vd->tab[i] != vp) continue; vp->v_dbatchcpu = NOCPU; vd->index--; vd->tab[i] = vd->tab[vd->index]; vd->tab[vd->index] = NULL; break; } mtx_unlock(&vd->lock); /* * Either we dequeued the vnode above or the target CPU beat us to it. */ MPASS(vp->v_dbatchcpu == NOCPU); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we place it on the free list unless it has been vgone'd * (marked VIRF_DOOMED) in which case we will free it. * * Because the vnode vm object keeps a hold reference on the vnode if * there is at least one resident non-cached page, the vnode cannot * leave the active list without the page cleanup done. */ static void vdrop_deactivate(struct vnode *vp) { struct mount *mp; ASSERT_VI_LOCKED(vp, __func__); /* * Mark a vnode as free: remove it from its active list * and put it up for recycling on the freelist. */ VNASSERT(!VN_IS_DOOMED(vp), vp, ("vdrop: returning doomed vnode")); VNASSERT(vp->v_op != NULL, vp, ("vdrop: vnode already reclaimed.")); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with VI_OWEINACT set")); VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("vnode with VI_DEFINACT set")); if (vp->v_mflag & VMP_LAZYLIST) { mp = vp->v_mount; mtx_lock(&mp->mnt_listmtx); VNASSERT(vp->v_mflag & VMP_LAZYLIST, vp, ("lost VMP_LAZYLIST")); /* * Don't remove the vnode from the lazy list if another thread * has increased the hold count. It may have re-enqueued the * vnode to the lazy list and is now responsible for its * removal. */ if (vp->v_holdcnt == 0) { vp->v_mflag &= ~VMP_LAZYLIST; TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist); mp->mnt_lazyvnodelistsize--; } mtx_unlock(&mp->mnt_listmtx); } vdbatch_enqueue(vp); } void vdrop(struct vnode *vp) { ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (refcount_release_if_not_last(&vp->v_holdcnt)) return; VI_LOCK(vp); vdropl(vp); } void vdropl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (!refcount_release(&vp->v_holdcnt)) { VI_UNLOCK(vp); return; } if (VN_IS_DOOMED(vp)) { freevnode(vp); return; } vdrop_deactivate(vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. */ static void vinactivef(struct vnode *vp) { struct vm_object *obj; ASSERT_VOP_ELOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); /* * Before moving off the active list, we must be sure that any * modified pages are converted into the vnode's dirty * buffers, since these will no longer be checked once the * vnode is on the inactive list. * * The write-out of the dirty pages is asynchronous. At the * point that VOP_INACTIVE() is called, there could still be * pending I/O and dirty pages in the object. */ if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 && vm_object_mightbedirty(obj)) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, 0); VM_OBJECT_WUNLOCK(obj); } VOP_INACTIVE(vp, curthread); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } void vinactive(struct vnode *vp) { ASSERT_VOP_ELOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if ((vp->v_iflag & VI_OWEINACT) == 0) return; if (vp->v_iflag & VI_DOINGINACT) return; if (vp->v_usecount > 0) { vp->v_iflag &= ~VI_OWEINACT; return; } vinactivef(vp); } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes"); #endif int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp, rootrefs, flags); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) { CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d", __func__, error); return (error); } vput(rootvp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { vholdl(vp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE); if (error) { vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp); vdrop(vp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); if (error != 0) { VOP_UNLOCK(vp); vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } error = VOP_GETATTR(vp, &vattr, td->td_ucred); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount <= 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp); vdropl(vp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vn_printf(vp, "vflush: busy vnode "); #endif } VOP_UNLOCK(vp); vdropl(vp); } if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK); vgone(rootvp); VOP_UNLOCK(rootvp); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) { CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__, busy); return (EBUSY); } for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp) { int recycled; VI_LOCK(vp); recycled = vrecyclel(vp); VI_UNLOCK(vp); return (recycled); } /* * vrecycle, with the vp interlock held. */ int vrecyclel(struct vnode *vp) { int recycled; ASSERT_VOP_ELOCKED(vp, __func__); ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); recycled = 0; if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } static void notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* * Notify upper mounts about reclaimed or unlinked vnode. */ void vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; mp = vp->v_mount; if (mp == NULL) return; if (TAILQ_EMPTY(&mp->mnt_uppers)) return; mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); mmp->mnt_op = &vgonel_vfsops; mmp->mnt_kern_flag |= MNTK_MARKER; MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_VGONE_UPPER; for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) { if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) { ump = TAILQ_NEXT(ump, mnt_upper_link); continue; } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); switch (event) { case VFS_NOTIFY_UPPER_RECLAIM: VFS_RECLAIM_LOWERVP(ump, vp); break; case VFS_NOTIFY_UPPER_UNLINK: VFS_UNLINK_LOWERVP(ump, vp); break; default: KASSERT(0, ("invalid event %d", event)); break; } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); } free(mmp, M_TEMP); mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER; if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) { mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER; wakeup(&mp->mnt_uppers); } MNT_IUNLOCK(mp); } /* * vgone, with the vp interlock held. */ static void vgonel(struct vnode *vp) { struct thread *td; struct mount *mp; vm_object_t object; bool active, oweinact; ASSERT_VOP_ELOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_irflag & VIRF_DOOMED) return; vp->v_irflag |= VIRF_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount > 0; oweinact = (vp->v_iflag & VI_OWEINACT) != 0; /* * If we need to do inactive VI_OWEINACT will be set. */ if (vp->v_iflag & VI_DEFINACT) { VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count")); vp->v_iflag &= ~VI_DEFINACT; vdropl(vp); } else { VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count")); VI_UNLOCK(vp); } vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); vinactivef(vp); VI_UNLOCK(vp); } if (vp->v_type == VSOCK) vfs_unp_reclaim(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) { while (vinvalbuf(vp, 0, 0, 0) != 0) ; } BO_LOCK(&vp->v_bufobj); KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) && vp->v_bufobj.bo_dirty.bv_cnt == 0 && TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) && vp->v_bufobj.bo_clean.bv_cnt == 0, ("vp %p bufobj not invalidated", vp)); /* * For VMIO bufobj, BO_DEAD is set later, or in * vm_object_terminate() after the object's page queue is * flushed. */ object = vp->v_bufobj.bo_object; if (object == NULL) vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); /* * Handle the VM part. Tmpfs handles v_object on its own (the * OBJT_VNODE check). Nullfs or other bypassing filesystems * should not touch the object borrowed from the lower vnode * (the handle check). */ if (object != NULL && object->type == OBJT_VNODE && object->handle == vp) vnode_destroy_vobject(vp); /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p", vp)); /* * Clear the advisory locks and wake up waiting threads. */ (void)VOP_ADVLOCKPURGE(vp); vp->v_lockf = NULL; /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[256], buf2[16]; u_long flags; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("type %s\n", typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d", vp->v_usecount, vp->v_writecount, vp->v_holdcnt); switch (vp->v_type) { case VDIR: printf(" mountedhere %p\n", vp->v_mountedhere); break; case VCHR: printf(" rdev %p\n", vp->v_rdev); break; case VSOCK: printf(" socket %p\n", vp->v_unpcb); break; case VFIFO: printf(" fifoinfo %p\n", vp->v_fifoinfo); break; default: printf("\n"); break; } buf[0] = '\0'; buf[1] = '\0'; if (vp->v_irflag & VIRF_DOOMED) strlcat(buf, "|VIRF_DOOMED", sizeof(buf)); flags = vp->v_irflag & ~(VIRF_DOOMED); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VIRF(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_vflag & VV_ROOT) strlcat(buf, "|VV_ROOT", sizeof(buf)); if (vp->v_vflag & VV_ISTTY) strlcat(buf, "|VV_ISTTY", sizeof(buf)); if (vp->v_vflag & VV_NOSYNC) strlcat(buf, "|VV_NOSYNC", sizeof(buf)); if (vp->v_vflag & VV_ETERNALDEV) strlcat(buf, "|VV_ETERNALDEV", sizeof(buf)); if (vp->v_vflag & VV_CACHEDLABEL) strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf)); if (vp->v_vflag & VV_VMSIZEVNLOCK) strlcat(buf, "|VV_VMSIZEVNLOCK", sizeof(buf)); if (vp->v_vflag & VV_COPYONWRITE) strlcat(buf, "|VV_COPYONWRITE", sizeof(buf)); if (vp->v_vflag & VV_SYSTEM) strlcat(buf, "|VV_SYSTEM", sizeof(buf)); if (vp->v_vflag & VV_PROCDEP) strlcat(buf, "|VV_PROCDEP", sizeof(buf)); if (vp->v_vflag & VV_NOKNOTE) strlcat(buf, "|VV_NOKNOTE", sizeof(buf)); if (vp->v_vflag & VV_DELETED) strlcat(buf, "|VV_DELETED", sizeof(buf)); if (vp->v_vflag & VV_MD) strlcat(buf, "|VV_MD", sizeof(buf)); if (vp->v_vflag & VV_FORCEINSMQ) strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf)); if (vp->v_vflag & VV_READLINK) strlcat(buf, "|VV_READLINK", sizeof(buf)); flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV | VV_CACHEDLABEL | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP | VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_iflag & VI_TEXT_REF) strlcat(buf, "|VI_TEXT_REF", sizeof(buf)); if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); if (vp->v_iflag & VI_DEFINACT) strlcat(buf, "|VI_DEFINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_DOINGINACT | VI_OWEINACT | VI_DEFINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_mflag & VMP_LAZYLIST) strlcat(buf, "|VMP_LAZYLIST", sizeof(buf)); flags = vp->v_mflag & ~(VMP_LAZYLIST); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d " "cleanbuf %d dirtybuf %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count, vp->v_bufobj.bo_clean.bv_cnt, vp->v_bufobj.bo_dirty.bv_cnt); printf(" "); lockmgr_printinfo(vp->v_vnlock); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ db_printf("Locked vnodes\n"); TAILQ_FOREACH(mp, &mountlist, mnt_list) { TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) vn_printf(vp, "vnode "); } } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } /* * Show details about the given mount point. */ DB_SHOW_COMMAND(mount, db_show_mount) { struct mount *mp; struct vfsopt *opt; struct statfs *sp; struct vnode *vp; char buf[512]; uint64_t mflags; u_int flags; if (!have_addr) { /* No address given, print short info about all mount points. */ TAILQ_FOREACH(mp, &mountlist, mnt_list) { db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); if (db_pager_quit) break; } db_printf("\nMore info: show mount \n"); return; } mp = (struct mount *)addr; db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); buf[0] = '\0'; mflags = mp->mnt_flag; #define MNT_FLAG(flag) do { \ if (mflags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 4, sizeof(buf)); \ mflags &= ~(flag); \ } \ } while (0) MNT_FLAG(MNT_RDONLY); MNT_FLAG(MNT_SYNCHRONOUS); MNT_FLAG(MNT_NOEXEC); MNT_FLAG(MNT_NOSUID); MNT_FLAG(MNT_NFS4ACLS); MNT_FLAG(MNT_UNION); MNT_FLAG(MNT_ASYNC); MNT_FLAG(MNT_SUIDDIR); MNT_FLAG(MNT_SOFTDEP); MNT_FLAG(MNT_NOSYMFOLLOW); MNT_FLAG(MNT_GJOURNAL); MNT_FLAG(MNT_MULTILABEL); MNT_FLAG(MNT_ACLS); MNT_FLAG(MNT_NOATIME); MNT_FLAG(MNT_NOCLUSTERR); MNT_FLAG(MNT_NOCLUSTERW); MNT_FLAG(MNT_SUJ); MNT_FLAG(MNT_EXRDONLY); MNT_FLAG(MNT_EXPORTED); MNT_FLAG(MNT_DEFEXPORTED); MNT_FLAG(MNT_EXPORTANON); MNT_FLAG(MNT_EXKERB); MNT_FLAG(MNT_EXPUBLIC); MNT_FLAG(MNT_LOCAL); MNT_FLAG(MNT_QUOTA); MNT_FLAG(MNT_ROOTFS); MNT_FLAG(MNT_USER); MNT_FLAG(MNT_IGNORE); MNT_FLAG(MNT_UPDATE); MNT_FLAG(MNT_DELEXPORT); MNT_FLAG(MNT_RELOAD); MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); #undef MNT_FLAG if (mflags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%016jx", mflags); } db_printf(" mnt_flag = %s\n", buf); buf[0] = '\0'; flags = mp->mnt_kern_flag; #define MNT_KERN_FLAG(flag) do { \ if (flags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 5, sizeof(buf)); \ flags &= ~(flag); \ } \ } while (0) MNT_KERN_FLAG(MNTK_UNMOUNTF); MNT_KERN_FLAG(MNTK_ASYNC); MNT_KERN_FLAG(MNTK_SOFTDEP); MNT_KERN_FLAG(MNTK_DRAINING); MNT_KERN_FLAG(MNTK_REFEXPIRE); MNT_KERN_FLAG(MNTK_EXTENDED_SHARED); MNT_KERN_FLAG(MNTK_SHARED_WRITES); MNT_KERN_FLAG(MNTK_NO_IOPF); MNT_KERN_FLAG(MNTK_VGONE_UPPER); MNT_KERN_FLAG(MNTK_VGONE_WAITER); MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); MNT_KERN_FLAG(MNTK_SUSPEND); MNT_KERN_FLAG(MNTK_SUSPEND2); MNT_KERN_FLAG(MNTK_SUSPENDED); MNT_KERN_FLAG(MNTK_LOOKUP_SHARED); MNT_KERN_FLAG(MNTK_NOKNOTE); #undef MNT_KERN_FLAG if (flags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%08x", flags); } db_printf(" mnt_kern_flag = %s\n", buf); db_printf(" mnt_opt = "); opt = TAILQ_FIRST(mp->mnt_opt); if (opt != NULL) { db_printf("%s", opt->name); opt = TAILQ_NEXT(opt, link); while (opt != NULL) { db_printf(", %s", opt->name); opt = TAILQ_NEXT(opt, link); } } db_printf("\n"); sp = &mp->mnt_stat; db_printf(" mnt_stat = { version=%u type=%u flags=0x%016jx " "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju " "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju " "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n", (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags, (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize, (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree, (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files, (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites, (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads, (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax, (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]); db_printf(" mnt_cred = { uid=%u ruid=%u", (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); if (jailed(mp->mnt_cred)) db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); db_printf(" }\n"); db_printf(" mnt_ref = %d (with %d in the struct)\n", vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); db_printf(" mnt_lazyvnodelistsize = %d\n", mp->mnt_lazyvnodelistsize); db_printf(" mnt_writeopcount = %d (with %d in the struct)\n", vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount); db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); db_printf(" mnt_lockref = %d (with %d in the struct)\n", vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); db_printf(" mnt_secondary_accwrites = %d\n", mp->mnt_secondary_accwrites); db_printf(" mnt_gjprovider = %s\n", mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops); db_printf("\n\nList of active vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && vp->v_holdcnt > 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } db_printf("\n\nList of inactive vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && vp->v_holdcnt == 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static int vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf xvfsp; bzero(&xvfsp, sizeof(xvfsp)); strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp.vfc_vfsops = NULL; xvfsp.vfc_next = NULL; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #ifdef COMPAT_FREEBSD32 struct xvfsconf32 { uint32_t vfc_vfsops; char vfc_name[MFSNAMELEN]; int32_t vfc_typenum; int32_t vfc_refcount; int32_t vfc_flags; uint32_t vfc_next; }; static int vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf32 xvfsp; bzero(&xvfsp, sizeof(xvfsp)); strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; int error; error = 0; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) error = vfsconf2x32(req, vfsp); else #endif error = vfsconf2x(req, vfsp); if (error) break; } vfsconf_sunlock(); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; log(LOG_WARNING, "userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_typenum == name[2]) break; } vfsconf_sunlock(); if (vfsp == NULL) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) return (vfsconf2x32(req, vfsp)); else #endif return (vfsconf2x(req, vfsp)); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error != 0) { vfsconf_sunlock(); return (error); } } vfsconf_sunlock(); return (0); } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif static void unmount_or_warn(struct mount *mp) { int error; error = dounmount(mp, MNT_FORCE, curthread); if (error != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp, *tmp; CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__); /* * Since this only runs when rebooting, it is not interlocked. */ TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) { vfs_ref(mp); /* * Forcibly unmounting "/dev" before "/" would prevent clean * unmount of the latter. */ if (mp == rootdevmp) continue; unmount_or_warn(mp); } if (rootdevmp != NULL) unmount_or_warn(rootdevmp); } static void vfs_deferred_inactive(struct vnode *vp, int lkflags) { ASSERT_VI_LOCKED(vp, __func__); VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("VI_DEFINACT still set")); if ((vp->v_iflag & VI_OWEINACT) == 0) { vdropl(vp); return; } if (vn_lock(vp, lkflags) == 0) { VI_LOCK(vp); vinactive(vp); VOP_UNLOCK(vp); vdropl(vp); return; } vdefer_inactive_unlocked(vp); } static int vfs_periodic_inactive_filter(struct vnode *vp, void *arg) { return (vp->v_iflag & VI_DEFINACT); } static void __noinline vfs_periodic_inactive(struct mount *mp, int flags) { struct vnode *vp, *mvp; int lkflags; lkflags = LK_EXCLUSIVE | LK_INTERLOCK; if (flags != MNT_WAIT) lkflags |= LK_NOWAIT; MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_inactive_filter, NULL) { if ((vp->v_iflag & VI_DEFINACT) == 0) { VI_UNLOCK(vp); continue; } vp->v_iflag &= ~VI_DEFINACT; vfs_deferred_inactive(vp, lkflags); } } static inline bool vfs_want_msync(struct vnode *vp) { struct vm_object *obj; /* * This test may be performed without any locks held. * We rely on vm_object's type stability. */ if (vp->v_vflag & VV_NOSYNC) return (false); obj = vp->v_object; return (obj != NULL && vm_object_mightbedirty(obj)); } static int vfs_periodic_msync_inactive_filter(struct vnode *vp, void *arg __unused) { if (vp->v_vflag & VV_NOSYNC) return (false); if (vp->v_iflag & VI_DEFINACT) return (true); return (vfs_want_msync(vp)); } static void __noinline vfs_periodic_msync_inactive(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; struct thread *td; int lkflags, objflags; bool seen_defer; td = curthread; lkflags = LK_EXCLUSIVE | LK_INTERLOCK; if (flags != MNT_WAIT) { lkflags |= LK_NOWAIT; objflags = OBJPC_NOSYNC; } else { objflags = OBJPC_SYNC; } MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_msync_inactive_filter, NULL) { seen_defer = false; if (vp->v_iflag & VI_DEFINACT) { vp->v_iflag &= ~VI_DEFINACT; seen_defer = true; } if (!vfs_want_msync(vp)) { if (seen_defer) vfs_deferred_inactive(vp, lkflags); else VI_UNLOCK(vp); continue; } if (vget(vp, lkflags, td) == 0) { obj = vp->v_object; if (obj != NULL && (vp->v_vflag & VV_NOSYNC) == 0) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, objflags); VM_OBJECT_WUNLOCK(obj); } vput(vp); if (seen_defer) vdrop(vp); } else { if (seen_defer) vdefer_inactive_unlocked(vp); } } } void vfs_periodic(struct mount *mp, int flags) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0) vfs_periodic_inactive(mp, flags); else vfs_periodic_msync_inactive(mp, flags); } static void destroy_vpollinfo_free(struct vpollinfo *vi) { knlist_destroy(&vi->vpi_selinfo.si_note); mtx_destroy(&vi->vpi_lock); uma_zfree(vnodepoll_zone, vi); } static void destroy_vpollinfo(struct vpollinfo *vi) { knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); } /* * Initialize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; if (vp->v_pollinfo != NULL) return; vi = uma_zalloc(vnodepoll_zone, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); destroy_vpollinfo_free(vi); return; } vp->v_pollinfo = vi; VI_UNLOCK(vp); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return (events); } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return (0); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_need_inactive = vop_stdneed_inactive, /* need_inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; VFS_VOP_VECTOR_REGISTER(sync_vnodeops); /* * Create a new filesystem syncer vnode for the specified mount point. */ void vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; struct bufobj *bo; static long start, incr, next; int error; /* Allocate a new vnode */ error = getnewvnode("syncer", mp, &sync_vnodeops, &vp); if (error != 0) panic("vfs_allocate_syncvnode: getnewvnode() failed"); vp->v_type = VNON; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque() failed"); vp->v_vflag &= ~VV_FORCEINSMQ; VOP_UNLOCK(vp); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } bo = &vp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; if (mp->mnt_syncer == NULL) { mp->mnt_syncer = vp; vp = NULL; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); if (vp != NULL) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vgone(vp); vput(vp); } } void vfs_deallocate_syncvnode(struct mount *mp) { struct vnode *vp; mtx_lock(&sync_mtx); vp = mp->mnt_syncer; if (vp != NULL) mp->mnt_syncer = NULL; mtx_unlock(&sync_mtx); if (vp != NULL) vrele(vp); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; int error, save; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ if (vfs_busy(mp, MBF_NOWAIT) != 0) return (0); if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp); return (0); } save = curthread_pflags_set(TDP_SYNCIO); /* * The filesystem at hand may be idle with free vnodes stored in the * batch. Return them instead of letting them stay there indefinitely. */ vfs_periodic(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); vn_finished_write(mp); vfs_unbusy(mp); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; bo = &vp->v_bufobj; BO_LOCK(bo); mtx_lock(&sync_mtx); if (vp->v_mount->mnt_syncer == vp) vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; bo->bo_flag &= ~BO_ONWORKLST; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); return (0); } int vn_need_pageq_flush(struct vnode *vp) { struct vm_object *obj; int need; MPASS(mtx_owned(VI_MTX(vp))); need = 0; if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 && vm_object_mightbedirty(obj)) need = 1; return (need); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; if (vp->v_type != VCHR) { error = ENOTBLK; goto out; } error = 0; dev_lock(); if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); out: if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused) { accmode_t dac_granted; accmode_t priv_granted; KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE), ("VAPPEND without VWRITE")); /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP)) priv_granted |= VEXEC; } else { /* * Ensure that at least one execute bit is on. Otherwise, * a privileged user will always succeed, and we don't want * this to happen unless the file really is executable. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 && !priv_check_cred(cred, PRIV_VFS_EXEC)) priv_granted |= VEXEC; } if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ)) priv_granted |= VREAD; if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE)) priv_granted |= (VWRITE | VAPPEND); if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN)) priv_granted |= VADMIN; if ((accmode & (priv_granted | dac_granted)) == accmode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((accmode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, accmode_t accmode) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, accmode, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to suppress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) (KERNEL_PANICKED() || (vp) == NULL || \ (vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, "Drop into debugger on lock violation"); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, "Check for interlock across VOPs"); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); int vfs_badlock_vnode = 1; /* Print vnode details on lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode, 0, "Print vnode details on lock violations"); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, "Print backtrace at lock violations"); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_vnode) vn_printf(vp, "vnode "); if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { int locked; if (!IGNORE_LOCK(vp)) { locked = VOP_ISLOCKED(vp); if (locked == 0 || locked == LK_EXCLOTHER) vfs_badlock("is not locked but should be", str, vp); } } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #endif /* DEBUG_VFS_LOCKS */ void vop_rename_fail(struct vop_rename_args *ap) { if (ap->a_tvp != NULL) vput(ap->a_tvp); if (ap->a_tdvp == ap->a_tvp) vrele(ap->a_tdvp); else vput(ap->a_tdvp); vrele(ap->a_fdvp); vrele(ap->a_fvp); } void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock && (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock)) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } #ifdef DEBUG_VFS_LOCKS void vop_strategy_pre(void *ap) { struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (!KERNEL_PANICKED() && !BUF_ISLOCKED(bp)) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } } void vop_lock_pre(void *ap) { struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); } void vop_lock_post(void *ap, int rc) { struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); } void vop_unlock_pre(void *ap) { struct vop_unlock_args *a = ap; ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); } void vop_unlock_post(void *ap, int rc) { return; } void vop_need_inactive_pre(void *ap) { struct vop_need_inactive_args *a = ap; ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE"); } void vop_need_inactive_post(void *ap, int rc) { struct vop_need_inactive_args *a = ap; ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE"); } #endif void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_deleteextattr_post(void *ap, int rc) { struct vop_deleteextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_reclaim_post(void *ap, int rc) { struct vop_reclaim_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_REVOKE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; long hint; if (!rc) { hint = NOTE_WRITE; if (a->a_fdvp == a->a_tdvp) { if (a->a_tvp != NULL && a->a_tvp->v_type == VDIR) hint |= NOTE_LINK; VFS_KNOTE_UNLOCKED(a->a_fdvp, hint); VFS_KNOTE_UNLOCKED(a->a_tdvp, hint); } else { hint |= NOTE_EXTEND; if (a->a_fvp->v_type == VDIR) hint |= NOTE_LINK; VFS_KNOTE_UNLOCKED(a->a_fdvp, hint); if (a->a_fvp->v_type == VDIR && a->a_tvp != NULL && a->a_tvp->v_type == VDIR) hint &= ~NOTE_LINK; VFS_KNOTE_UNLOCKED(a->a_tdvp, hint); } VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_setextattr_post(void *ap, int rc) { struct vop_setextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_open_post(void *ap, int rc) { struct vop_open_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN); } void vop_close_post(void *ap, int rc) { struct vop_close_args *a = ap; if (!rc && (a->a_cred != NOCRED || /* filter out revokes */ !VN_IS_DOOMED(a->a_vp))) { VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ? NOTE_CLOSE_WRITE : NOTE_CLOSE); } } void vop_read_post(void *ap, int rc) { struct vop_read_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); } void vop_readdir_post(void *ap, int rc) { struct vop_readdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init_mtx(&fs_knlist, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { .f_isfd = 0, .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread }; static struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite }; static struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp); } static void vfs_knl_assert_locked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked"); #endif } static void vfs_knl_assert_unlocked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked"); #endif } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; vhold(vp); knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); vdrop(vp); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; int res; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) { VI_LOCK(vp); kn->kn_flags |= (EV_EOF | EV_ONESHOT); VI_UNLOCK(vp); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred)) return (0); VI_LOCK(vp); kn->kn_data = va.va_size - kn->kn_fp->f_offset; res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0; VI_UNLOCK(vp); return (res); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; VI_LOCK(vp); /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; VI_UNLOCK(vp); return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; int res; VI_LOCK(vp); if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) { kn->kn_flags |= EV_EOF; VI_UNLOCK(vp); return (1); } res = (kn->kn_fflags != 0); VI_UNLOCK(vp); return (res); } /* * Returns whether the directory is empty or not. * If it is empty, the return value is 0; otherwise * the return value is an error value (which may * be ENOTEMPTY). */ int vfs_emptydir(struct vnode *vp) { struct uio uio; struct iovec iov; struct dirent *dirent, *dp, *endp; int error, eof; error = 0; eof = 0; ASSERT_VOP_LOCKED(vp, "vfs_emptydir"); dirent = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK); iov.iov_base = dirent; iov.iov_len = sizeof(struct dirent); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(struct dirent); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = curthread; while (eof == 0 && error == 0) { error = VOP_READDIR(vp, &uio, curthread->td_ucred, &eof, NULL, NULL); if (error != 0) break; endp = (void *)((uint8_t *)dirent + sizeof(struct dirent) - uio.uio_resid); for (dp = dirent; dp < endp; dp = (void *)((uint8_t *)dp + GENERIC_DIRSIZ(dp))) { if (dp->d_type == DT_WHT) continue; if (dp->d_namlen == 0) continue; if (dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN) { error = ENOTEMPTY; break; } if (dp->d_namlen > 2) { error = ENOTEMPTY; break; } if (dp->d_namlen == 1 && dp->d_name[0] != '.') { error = ENOTEMPTY; break; } if (dp->d_namlen == 2 && dp->d_name[1] != '.') { error = ENOTEMPTY; break; } uio.uio_resid = sizeof(struct dirent); } } free(dirent, M_TEMP); return (error); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; *ap->a_ncookies += 1; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VOP_MARKATIME. This functionality is used by execve and * mmap, so we want to avoid the I/O implied by directly setting * va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct ucred *cred) { struct mount *mp; mp = vp->v_mount; ASSERT_VOP_LOCKED(vp, "vfs_mark_atime"); if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) (void)VOP_MARKATIME(vp); } /* * The purpose of this routine is to remove granularity from accmode_t, * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE, * VADMIN and VAPPEND. * * If it returns 0, the caller is supposed to continue with the usual * access checks using 'accmode' as modified by this routine. If it * returns nonzero value, the caller is supposed to return that value * as errno. * * Note that after this routine runs, accmode may be zero. */ int vfs_unixify_accmode(accmode_t *accmode) { /* * There is no way to specify explicit "deny" rule using * file mode or POSIX.1e ACLs. */ if (*accmode & VEXPLICIT_DENY) { *accmode = 0; return (0); } /* * None of these can be translated into usual access bits. * Also, the common case for NFSv4 ACLs is to not contain * either of these bits. Caller should check for VWRITE * on the containing directory instead. */ if (*accmode & (VDELETE_CHILD | VDELETE)) return (EPERM); if (*accmode & VADMIN_PERMS) { *accmode &= ~VADMIN_PERMS; *accmode |= VADMIN; } /* * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL * or VSYNCHRONIZE using file mode or POSIX.1e ACL. */ *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE); return (0); } /* * Clear out a doomed vnode (if any) and replace it with a new one as long * as the fs is not being unmounted. Return the root vnode to the caller. */ static int __noinline vfs_cache_root_fallback(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; int error; restart: if (mp->mnt_rootvnode != NULL) { MNT_ILOCK(mp); vp = mp->mnt_rootvnode; if (vp != NULL) { if (!VN_IS_DOOMED(vp)) { vrefact(vp); MNT_IUNLOCK(mp); error = vn_lock(vp, flags); if (error == 0) { *vpp = vp; return (0); } vrele(vp); goto restart; } /* * Clear the old one. */ mp->mnt_rootvnode = NULL; } MNT_IUNLOCK(mp); if (vp != NULL) { /* * Paired with a fence in vfs_op_thread_exit(). */ atomic_thread_fence_acq(); vfs_op_barrier_wait(mp); vrele(vp); } } error = VFS_CACHEDROOT(mp, flags, vpp); if (error != 0) return (error); if (mp->mnt_vfs_ops == 0) { MNT_ILOCK(mp); if (mp->mnt_vfs_ops != 0) { MNT_IUNLOCK(mp); return (0); } if (mp->mnt_rootvnode == NULL) { vrefact(*vpp); mp->mnt_rootvnode = *vpp; } else { if (mp->mnt_rootvnode != *vpp) { if (!VN_IS_DOOMED(mp->mnt_rootvnode)) { panic("%s: mismatch between vnode returned " " by VFS_CACHEDROOT and the one cached " " (%p != %p)", __func__, *vpp, mp->mnt_rootvnode); } } } MNT_IUNLOCK(mp); } return (0); } int vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; int error; if (!vfs_op_thread_enter(mp)) return (vfs_cache_root_fallback(mp, flags, vpp)); vp = (struct vnode *)atomic_load_ptr(&mp->mnt_rootvnode); if (vp == NULL || VN_IS_DOOMED(vp)) { vfs_op_thread_exit(mp); return (vfs_cache_root_fallback(mp, flags, vpp)); } vrefact(vp); vfs_op_thread_exit(mp); error = vn_lock(vp, flags); if (error != 0) { vrele(vp); return (vfs_cache_root_fallback(mp, flags, vpp)); } *vpp = vp; return (0); } struct vnode * vfs_cache_root_clear(struct mount *mp) { struct vnode *vp; /* * ops > 0 guarantees there is nobody who can see this vnode */ MPASS(mp->mnt_vfs_ops > 0); vp = mp->mnt_rootvnode; mp->mnt_rootvnode = NULL; return (vp); } void vfs_cache_root_set(struct mount *mp, struct vnode *vp) { MPASS(mp->mnt_vfs_ops > 0); vrefact(vp); mp->mnt_rootvnode = vp; } /* * These are helper functions for filesystems to traverse all * their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h. * * This interface replaces MNT_VNODE_FOREACH. */ struct vnode * __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; if (should_yield()) kern_yield(PRI_USER); MNT_ILOCK(mp); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); for (vp = TAILQ_NEXT(*mvp, v_nmntvnodes); vp != NULL; vp = TAILQ_NEXT(vp, v_nmntvnodes)) { /* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */ if (vp->v_type == VMARKER || VN_IS_DOOMED(vp)) continue; VI_LOCK(vp); if (VN_IS_DOOMED(vp)) { VI_UNLOCK(vp); continue; } break; } if (vp == NULL) { __mnt_vnode_markerfree_all(mvp, mp); /* MNT_IUNLOCK(mp); -- done in above function */ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); return (NULL); } TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); MNT_IUNLOCK(mp); return (vp); } struct vnode * __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; *mvp = vn_alloc_marker(mp); MNT_ILOCK(mp); MNT_REF(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { /* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */ if (vp->v_type == VMARKER || VN_IS_DOOMED(vp)) continue; VI_LOCK(vp); if (VN_IS_DOOMED(vp)) { VI_UNLOCK(vp); continue; } break; } if (vp == NULL) { MNT_REL(mp); MNT_IUNLOCK(mp); vn_free_marker(*mvp); *mvp = NULL; return (NULL); } TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); MNT_IUNLOCK(mp); return (vp); } void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) { MNT_IUNLOCK(mp); return; } mtx_assert(MNT_MTX(mp), MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); MNT_REL(mp); MNT_IUNLOCK(mp); vn_free_marker(*mvp); *mvp = NULL; } /* * These are helper functions for filesystems to traverse their * lazy vnodes. See MNT_VNODE_FOREACH_LAZY() in sys/mount.h */ static void mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp) { KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); vn_free_marker(*mvp); *mvp = NULL; } /* * Relock the mp mount vnode list lock with the vp vnode interlock in the * conventional lock order during mnt_vnode_next_lazy iteration. * * On entry, the mount vnode list lock is held and the vnode interlock is not. * The list lock is dropped and reacquired. On success, both locks are held. * On failure, the mount vnode list lock is held but the vnode interlock is * not, and the procedure may have yielded. */ static bool mnt_vnode_next_lazy_relock(struct vnode *mvp, struct mount *mp, struct vnode *vp) { const struct vnode *tmp; bool held, ret; VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER && TAILQ_NEXT(mvp, v_lazylist) != NULL, mvp, ("%s: bad marker", __func__)); VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp, ("%s: inappropriate vnode", __func__)); ASSERT_VI_UNLOCKED(vp, __func__); mtx_assert(&mp->mnt_listmtx, MA_OWNED); ret = false; TAILQ_REMOVE(&mp->mnt_lazyvnodelist, mvp, v_lazylist); TAILQ_INSERT_BEFORE(vp, mvp, v_lazylist); /* * Use a hold to prevent vp from disappearing while the mount vnode * list lock is dropped and reacquired. Normally a hold would be * acquired with vhold(), but that might try to acquire the vnode * interlock, which would be a LOR with the mount vnode list lock. */ held = refcount_acquire_if_not_zero(&vp->v_holdcnt); mtx_unlock(&mp->mnt_listmtx); if (!held) goto abort; VI_LOCK(vp); if (!refcount_release_if_not_last(&vp->v_holdcnt)) { vdropl(vp); goto abort; } mtx_lock(&mp->mnt_listmtx); /* * Determine whether the vnode is still the next one after the marker, * excepting any other markers. If the vnode has not been doomed by * vgone() then the hold should have ensured that it remained on the * lazy list. If it has been doomed but is still on the lazy list, * don't abort, but rather skip over it (avoid spinning on doomed * vnodes). */ tmp = mvp; do { tmp = TAILQ_NEXT(tmp, v_lazylist); } while (tmp != NULL && tmp->v_type == VMARKER); if (tmp != vp) { mtx_unlock(&mp->mnt_listmtx); VI_UNLOCK(vp); goto abort; } ret = true; goto out; abort: maybe_yield(); mtx_lock(&mp->mnt_listmtx); out: if (ret) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); mtx_assert(&mp->mnt_listmtx, MA_OWNED); return (ret); } static struct vnode * mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg) { struct vnode *vp, *nvp; mtx_assert(&mp->mnt_listmtx, MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); restart: vp = TAILQ_NEXT(*mvp, v_lazylist); while (vp != NULL) { if (vp->v_type == VMARKER) { vp = TAILQ_NEXT(vp, v_lazylist); continue; } /* * See if we want to process the vnode. Note we may encounter a * long string of vnodes we don't care about and hog the list * as a result. Check for it and requeue the marker. */ if (VN_IS_DOOMED(vp) || !cb(vp, cbarg)) { if (!should_yield()) { vp = TAILQ_NEXT(vp, v_lazylist); continue; } TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist); TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); kern_yield(PRI_USER); mtx_lock(&mp->mnt_listmtx); goto restart; } /* * Try-lock because this is the wrong lock order. If that does * not succeed, drop the mount vnode list lock and try to * reacquire it and the vnode interlock in the right order. */ if (!VI_TRYLOCK(vp) && !mnt_vnode_next_lazy_relock(*mvp, mp, vp)) goto restart; KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); KASSERT(vp->v_mount == mp || vp->v_mount == NULL, ("alien vnode on the lazy list %p %p", vp, mp)); if (vp->v_mount == mp && !VN_IS_DOOMED(vp)) break; nvp = TAILQ_NEXT(vp, v_lazylist); VI_UNLOCK(vp); vp = nvp; } TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist); /* Check if we are done */ if (vp == NULL) { mtx_unlock(&mp->mnt_listmtx); mnt_vnode_markerfree_lazy(mvp, mp); return (NULL); } TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); ASSERT_VI_LOCKED(vp, "lazy iter"); return (vp); } struct vnode * __mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg) { if (should_yield()) kern_yield(PRI_USER); mtx_lock(&mp->mnt_listmtx); return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg)); } struct vnode * __mnt_vnode_first_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg) { struct vnode *vp; + + if (TAILQ_EMPTY(&mp->mnt_lazyvnodelist)) + return (NULL); *mvp = vn_alloc_marker(mp); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); mtx_lock(&mp->mnt_listmtx); vp = TAILQ_FIRST(&mp->mnt_lazyvnodelist); if (vp == NULL) { mtx_unlock(&mp->mnt_listmtx); mnt_vnode_markerfree_lazy(mvp, mp); return (NULL); } TAILQ_INSERT_BEFORE(vp, *mvp, v_lazylist); return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg)); } void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) return; mtx_lock(&mp->mnt_listmtx); TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); mnt_vnode_markerfree_lazy(mvp, mp); } Index: projects/clang1000-import/sys/netinet/ip_divert.c =================================================================== --- projects/clang1000-import/sys/netinet/ip_divert.c (revision 357178) +++ projects/clang1000-import/sys/netinet/ip_divert.c (revision 357179) @@ -1,835 +1,837 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_sctp.h" #ifndef INET #error "IPDIVERT requires INET" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef SCTP #include #endif #include /* * Divert sockets */ /* * Allocate enough space to hold a full IP packet */ #define DIVSNDQ (65536 + 100) #define DIVRCVQ (65536 + 100) /* * Divert sockets work in conjunction with ipfw or other packet filters, * see the divert(4) manpage for features. * Packets are selected by the packet filter and tagged with an * MTAG_IPFW_RULE tag carrying the 'divert port' number (as set by * the packet filter) and information on the matching filter rule for * subsequent reinjection. The divert_port is used to put the packet * on the corresponding divert socket, while the rule number is passed * up (at least partially) as the sin_port in the struct sockaddr. * * Packets written to the divert socket carry in sin_addr a * destination address, and in sin_port the number of the filter rule * after which to continue processing. * If the destination address is INADDR_ANY, the packet is treated as * as outgoing and sent to ip_output(); otherwise it is treated as * incoming and sent to ip_input(). * Further, sin_zero carries some information on the interface, * which can be used in the reinject -- see comments in the code. * * On reinjection, processing in ip_input() and ip_output() * will be exactly the same as for the original packet, except that * packet filter processing will start at the rule number after the one * written in the sin_port (ipfw does not allow a rule #0, so sin_port=0 * will apply the entire ruleset to the packet). */ /* Internal variables. */ VNET_DEFINE_STATIC(struct inpcbhead, divcb); VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo); #define V_divcb VNET(divcb) #define V_divcbinfo VNET(divcbinfo) static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ static eventhandler_tag ip_divert_event_tag; static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m, struct sockaddr_in *sin); static int div_output_outbound(int family, struct socket *so, struct mbuf *m); /* * Initialize divert connection block queue. */ static void div_zone_change(void *tag) { uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); } static int div_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "divinp"); return (0); } static void div_init(void) { /* * XXX We don't use the hash list for divert IP, but it's easier to * allocate one-entry hash lists than it is to check all over the * place for hashbase == NULL. */ in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb", div_inpcb_init, IPI_HASHFIELDS_NONE); } static void div_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_divcbinfo); } VNET_SYSUNINIT(divert, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, div_destroy, NULL); /* * IPPROTO_DIVERT is not in the real IP protocol number space; this * function should never be called. Just in case, drop any packets. */ static int div_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; KMOD_IPSTAT_INC(ips_noproto); m_freem(m); return (IPPROTO_DONE); } /* * Divert a packet by passing it up to the divert socket at port 'port'. * * Setup generic address and protocol structures for div_input routine, * then pass them along with mbuf chain. */ static void divert_packet(struct mbuf *m, bool incoming) { struct ip *ip; struct inpcb *inp; struct socket *sa; u_int16_t nport; struct sockaddr_in divsrc; struct m_tag *mtag; NET_EPOCH_ASSERT(); mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); if (mtag == NULL) { m_freem(m); return; } /* Assure header */ if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == NULL) return; ip = mtod(m, struct ip *); /* Delayed checksums are currently not compatible with divert. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP) { sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); m->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif bzero(&divsrc, sizeof(divsrc)); divsrc.sin_len = sizeof(divsrc); divsrc.sin_family = AF_INET; /* record matching rule, in host format */ divsrc.sin_port = ((struct ipfw_rule_ref *)(mtag+1))->rulenum; /* * Record receive interface address, if any. * But only for incoming packets. */ if (incoming) { struct ifaddr *ifa; struct ifnet *ifp; /* Sanity check */ M_ASSERTPKTHDR(m); /* Find IP address for receive interface */ ifp = m->m_pkthdr.rcvif; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; divsrc.sin_addr = ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr; break; } } /* * Record the incoming interface name whenever we have one. */ if (m->m_pkthdr.rcvif) { /* * Hide the actual interface name in there in the * sin_zero array. XXX This needs to be moved to a * different sockaddr type for divert, e.g. * sockaddr_div with multiple fields like * sockaddr_dl. Presently we have only 7 bytes * but that will do for now as most interfaces * are 4 or less + 2 or less bytes for unit. * There is probably a faster way of doing this, * possibly taking it from the sockaddr_dl on the iface. * This solves the problem of a P2P link and a LAN interface * having the same address, which can result in the wrong * interface being assigned to the packet when fed back * into the divert socket. Theoretically if the daemon saves * and re-uses the sockaddr_in as suggested in the man pages, * this iface name will come along for the ride. * (see div_output for the other half of this.) */ strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, sizeof(divsrc.sin_zero)); } /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); CK_LIST_FOREACH(inp, &V_divcb, inp_list) { /* XXX why does only one socket match? */ if (inp->inp_lport == nport) { INP_RLOCK(inp); sa = inp->inp_socket; SOCKBUF_LOCK(&sa->so_rcv); if (sbappendaddr_locked(&sa->so_rcv, (struct sockaddr *)&divsrc, m, (struct mbuf *)0) == 0) { SOCKBUF_UNLOCK(&sa->so_rcv); sa = NULL; /* force mbuf reclaim below */ } else sorwakeup_locked(sa); INP_RUNLOCK(inp); break; } } if (sa == NULL) { m_freem(m); KMOD_IPSTAT_INC(ips_noproto); KMOD_IPSTAT_DEC(ips_delivered); } } /* * Deliver packet back into the IP processing machinery. * * If no address specified, or address is 0.0.0.0, send to ip_output(); * otherwise, send to ip_input() and mark as having been received on * the interface with that address. */ static int div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { struct epoch_tracker et; const struct ip *ip; struct m_tag *mtag; struct ipfw_rule_ref *dt; int error, family; /* * An mbuf may hasn't come from userland, but we pretend * that it has. */ m->m_pkthdr.rcvif = NULL; m->m_nextpkt = NULL; M_SETFIB(m, so->so_fibnum); if (control) m_freem(control); /* XXX */ mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); if (mtag == NULL) { /* this should be normal */ mtag = m_tag_alloc(MTAG_IPFW_RULE, 0, sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); if (mtag == NULL) { m_freem(m); return (ENOBUFS); } m_tag_prepend(m, mtag); } dt = (struct ipfw_rule_ref *)(mtag+1); /* Loopback avoidance and state recovery */ if (sin) { int i; /* set the starting point. We provide a non-zero slot, * but a non_matching chain_id to skip that info and use * the rulenum/rule_id. */ dt->slot = 1; /* dummy, chain_id is invalid */ dt->chain_id = 0; dt->rulenum = sin->sin_port+1; /* host format ? */ dt->rule_id = 0; /* XXX: broken for IPv6 */ /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. * The name is user supplied data so don't trust its size * or that it is zero terminated. */ for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++) ; if ( i > 0 && i < sizeof(sin->sin_zero)) m->m_pkthdr.rcvif = ifunit(sin->sin_zero); } ip = mtod(m, struct ip *); switch (ip->ip_v) { case IPVERSION: family = AF_INET; break; +#ifdef INET6 case IPV6_VERSION >> 4: family = AF_INET6; break; +#endif default: m_freem(m); return (EAFNOSUPPORT); } /* Reinject packet into the system as incoming or outgoing */ NET_EPOCH_ENTER(et); if (!sin || sin->sin_addr.s_addr == 0) { dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT; error = div_output_outbound(family, so, m); } else { dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN; error = div_output_inbound(family, so, m, sin); } NET_EPOCH_EXIT(et); if (error != 0) m_freem(m); return (error); } /* * Sends mbuf @m to the wire via ip[6]_output(). * * Returns 0 on success, @m is consumed. * On failure, returns error code. It is caller responsibility to free @m. */ static int div_output_outbound(int family, struct socket *so, struct mbuf *m) { struct ip *const ip = mtod(m, struct ip *); struct mbuf *options; struct inpcb *inp; int error; inp = sotoinpcb(so); INP_RLOCK(inp); switch (family) { case AF_INET: /* * Don't allow both user specified and setsockopt * options, and don't allow packet length sizes that * will crash. */ if ((((ip->ip_hl << 2) != sizeof(struct ip)) && inp->inp_options != NULL) || ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) { INP_RUNLOCK(inp); return (EINVAL); } break; #ifdef INET6 case AF_INET6: { struct ip6_hdr *const ip6 = mtod(m, struct ip6_hdr *); /* Don't allow packet length sizes that will crash */ if (((u_short)ntohs(ip6->ip6_plen) > m->m_pkthdr.len)) { INP_RUNLOCK(inp); return (EINVAL); } break; } #endif } /* Send packet to output processing */ KMOD_IPSTAT_INC(ips_rawout); /* XXX */ #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif /* * Get ready to inject the packet into ip_output(). * Just in case socket options were specified on the * divert socket, we duplicate them. This is done * to avoid having to hold the PCB locks over the call * to ip_output(), as doing this results in a number of * lock ordering complexities. * * Note that we set the multicast options argument for * ip_output() to NULL since it should be invariant that * they are not present. */ KASSERT(inp->inp_moptions == NULL, ("multicast options set on a divert socket")); /* * XXXCSJP: It is unclear to me whether or not it makes * sense for divert sockets to have options. However, * for now we will duplicate them with the INP locks * held so we can use them in ip_output() without * requring a reference to the pcb. */ options = NULL; if (inp->inp_options != NULL) { options = m_dup(inp->inp_options, M_NOWAIT); if (options == NULL) { INP_RUNLOCK(inp); return (ENOBUFS); } } INP_RUNLOCK(inp); error = 0; switch (family) { case AF_INET: error = ip_output(m, options, NULL, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL); break; #ifdef INET6 case AF_INET6: error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); break; #endif } if (options != NULL) m_freem(options); return (error); } /* * Schedules mbuf @m for local processing via IPv4/IPv6 netisr queue. * * Returns 0 on success, @m is consumed. * Returns error code on failure. It is caller responsibility to free @m. */ static int div_output_inbound(int family, struct socket *so, struct mbuf *m, struct sockaddr_in *sin) { const struct ip *ip; struct ifaddr *ifa; if (m->m_pkthdr.rcvif == NULL) { /* * No luck with the name, check by IP address. * Clear the port and the ifname to make sure * there are no distractions for ifa_ifwithaddr. */ /* XXX: broken for IPv6 */ bzero(sin->sin_zero, sizeof(sin->sin_zero)); sin->sin_port = 0; ifa = ifa_ifwithaddr((struct sockaddr *) sin); if (ifa == NULL) return (EADDRNOTAVAIL); m->m_pkthdr.rcvif = ifa->ifa_ifp; } #ifdef MAC mac_socket_create_mbuf(so, m); #endif /* Send packet to input processing via netisr */ switch (family) { case AF_INET: ip = mtod(m, struct ip *); /* * Restore M_BCAST flag when destination address is * broadcast. It is expected by ip_tryforward(). */ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) m->m_flags |= M_MCAST; else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) m->m_flags |= M_BCAST; netisr_queue_src(NETISR_IP, (uintptr_t)so, m); break; #ifdef INET6 case AF_INET6: netisr_queue_src(NETISR_IPV6, (uintptr_t)so, m); break; #endif default: return (EINVAL); } return (0); } static int div_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("div_attach: inp != NULL")); if (td != NULL) { error = priv_check(td, PRIV_NETINET_DIVERT); if (error) return (error); } error = soreserve(so, div_sendspace, div_recvspace); if (error) return error; INP_INFO_WLOCK(&V_divcbinfo); error = in_pcballoc(so, &V_divcbinfo); if (error) { INP_INFO_WUNLOCK(&V_divcbinfo); return error; } inp = (struct inpcb *)so->so_pcb; INP_INFO_WUNLOCK(&V_divcbinfo); inp->inp_ip_p = proto; inp->inp_vflag |= INP_IPV4; inp->inp_flags |= INP_HDRINCL; INP_WUNLOCK(inp); return 0; } static void div_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("div_detach: inp == NULL")); INP_INFO_WLOCK(&V_divcbinfo); INP_WLOCK(inp); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_divcbinfo); } static int div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("div_bind: inp == NULL")); /* in_pcbbind assumes that nam is a sockaddr_in * and in_pcbbind requires a valid address. Since divert * sockets don't we need to make sure the address is * filled in properly. * XXX -- divert should not be abusing in_pcbind * and should probably have its own family. */ if (nam->sa_family != AF_INET) return EAFNOSUPPORT; ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; INP_INFO_WLOCK(&V_divcbinfo); INP_WLOCK(inp); INP_HASH_WLOCK(&V_divcbinfo); error = in_pcbbind(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(&V_divcbinfo); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_divcbinfo); return error; } static int div_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("div_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return 0; } static int div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { /* Packet must have a header (but that's about it) */ if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { KMOD_IPSTAT_INC(ips_toosmall); m_freem(m); return EINVAL; } /* Send packet */ return div_output(so, m, (struct sockaddr_in *)nam, control); } static void div_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct in_addr faddr; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (PRC_IS_REDIRECT(cmd)) return; } static int div_pcblist(SYSCTL_HANDLER_ARGS) { struct xinpgen xig; struct epoch_tracker et; struct inpcb *inp; int error; if (req->newptr != 0) return EPERM; if (req->oldptr == 0) { int n; n = V_divcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return 0; } if ((error = sysctl_wire_old_buffer(req, 0)) != 0) return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = V_divcbinfo.ipi_count; xig.xig_gen = V_divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return error; NET_EPOCH_ENTER(et); for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead); inp != NULL; inp = CK_LIST_NEXT(inp, inp_list)) { INP_RLOCK(inp); if (inp->inp_gencnt <= xig.xig_gen) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); } else INP_RUNLOCK(inp); } NET_EPOCH_EXIT(et); if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ xig.xig_gen = V_divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_divcbinfo.ipi_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } return (error); } #ifdef SYSCTL_NODE static SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT"); SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, div_pcblist, "S,xinpcb", "List of active divert sockets"); #endif struct pr_usrreqs div_usrreqs = { .pru_attach = div_attach, .pru_bind = div_bind, .pru_control = in_control, .pru_detach = div_detach, .pru_peeraddr = in_getpeeraddr, .pru_send = div_send, .pru_shutdown = div_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel }; struct protosw div_protosw = { .pr_type = SOCK_RAW, .pr_protocol = IPPROTO_DIVERT, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = div_input, .pr_ctlinput = div_ctlinput, .pr_ctloutput = ip_ctloutput, .pr_init = div_init, .pr_usrreqs = &div_usrreqs }; static int div_modevent(module_t mod, int type, void *unused) { int err = 0; switch (type) { case MOD_LOAD: /* * Protocol will be initialized by pf_proto_register(). * We don't have to register ip_protox because we are not * a true IP protocol that goes over the wire. */ err = pf_proto_register(PF_INET, &div_protosw); if (err != 0) return (err); ip_divert_ptr = divert_packet; ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_QUIESCE: /* * IPDIVERT may normally not be unloaded because of the * potential race conditions. Tell kldunload we can't be * unloaded unless the unload is forced. */ err = EPERM; break; case MOD_UNLOAD: /* * Forced unload. * * Module ipdivert can only be unloaded if no sockets are * connected. Maybe this can be changed later to forcefully * disconnect any open sockets. * * XXXRW: Note that there is a slight race here, as a new * socket open request could be spinning on the lock and then * we destroy the lock. */ INP_INFO_WLOCK(&V_divcbinfo); if (V_divcbinfo.ipi_count != 0) { err = EBUSY; INP_INFO_WUNLOCK(&V_divcbinfo); break; } ip_divert_ptr = NULL; err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW); INP_INFO_WUNLOCK(&V_divcbinfo); #ifndef VIMAGE div_destroy(NULL); #endif EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag); break; default: err = EOPNOTSUPP; break; } return err; } static moduledata_t ipdivertmod = { "ipdivert", div_modevent, 0 }; DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); MODULE_DEPEND(ipdivert, ipfw, 3, 3, 3); MODULE_VERSION(ipdivert, 1); Index: projects/clang1000-import/sys/sys/tree.h =================================================================== --- projects/clang1000-import/sys/sys/tree.h (revision 357178) +++ projects/clang1000-import/sys/sys/tree.h (revision 357179) @@ -1,825 +1,813 @@ /* $NetBSD: tree.h,v 1.8 2004/03/28 19:38:30 provos Exp $ */ /* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ /* $FreeBSD$ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright 2002 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _SYS_TREE_H_ #define _SYS_TREE_H_ #include /* * This file defines data structures for different types of trees: * splay trees and red-black trees. * * A splay tree is a self-organizing data structure. Every operation * on the tree causes a splay to happen. The splay moves the requested * node to the root of the tree and partly rebalances it. * * This has the benefit that request locality causes faster lookups as * the requested nodes move to the top of the tree. On the other hand, * every lookup causes memory writes. * * The Balance Theorem bounds the total access time for m operations * and n inserts on an initially empty tree as O((m + n)lg n). The * amortized cost for a sequence of m accesses to a splay tree is O(lg n); * * A red-black tree is a binary search tree with the node color as an * extra attribute. It fulfills a set of conditions: * - every search path from the root to a leaf consists of the * same number of black nodes, * - each red node (except for the root) has a black parent, * - each leaf node is black. * * Every operation on a red-black tree is bounded as O(lg n). * The maximum height of a red-black tree is 2lg (n+1). */ #define SPLAY_HEAD(name, type) \ struct name { \ struct type *sph_root; /* root of the tree */ \ } #define SPLAY_INITIALIZER(root) \ { NULL } #define SPLAY_INIT(root) do { \ (root)->sph_root = NULL; \ } while (/*CONSTCOND*/ 0) #define SPLAY_ENTRY(type) \ struct { \ struct type *spe_left; /* left element */ \ struct type *spe_right; /* right element */ \ } #define SPLAY_LEFT(elm, field) (elm)->field.spe_left #define SPLAY_RIGHT(elm, field) (elm)->field.spe_right #define SPLAY_ROOT(head) (head)->sph_root #define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) /* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ #define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ (head)->sph_root = tmp; \ } while (/*CONSTCOND*/ 0) #define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ SPLAY_LEFT(tmp, field) = (head)->sph_root; \ (head)->sph_root = tmp; \ } while (/*CONSTCOND*/ 0) #define SPLAY_LINKLEFT(head, tmp, field) do { \ SPLAY_LEFT(tmp, field) = (head)->sph_root; \ tmp = (head)->sph_root; \ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ } while (/*CONSTCOND*/ 0) #define SPLAY_LINKRIGHT(head, tmp, field) do { \ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ tmp = (head)->sph_root; \ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ } while (/*CONSTCOND*/ 0) #define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ } while (/*CONSTCOND*/ 0) /* Generates prototypes and inline functions */ #define SPLAY_PROTOTYPE(name, type, field, cmp) \ void name##_SPLAY(struct name *, struct type *); \ void name##_SPLAY_MINMAX(struct name *, int); \ struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ \ /* Finds the node with the same key as elm */ \ static __unused __inline struct type * \ name##_SPLAY_FIND(struct name *head, struct type *elm) \ { \ if (SPLAY_EMPTY(head)) \ return(NULL); \ name##_SPLAY(head, elm); \ if ((cmp)(elm, (head)->sph_root) == 0) \ return (head->sph_root); \ return (NULL); \ } \ \ static __unused __inline struct type * \ name##_SPLAY_NEXT(struct name *head, struct type *elm) \ { \ name##_SPLAY(head, elm); \ if (SPLAY_RIGHT(elm, field) != NULL) { \ elm = SPLAY_RIGHT(elm, field); \ while (SPLAY_LEFT(elm, field) != NULL) { \ elm = SPLAY_LEFT(elm, field); \ } \ } else \ elm = NULL; \ return (elm); \ } \ \ static __unused __inline struct type * \ name##_SPLAY_MIN_MAX(struct name *head, int val) \ { \ name##_SPLAY_MINMAX(head, val); \ return (SPLAY_ROOT(head)); \ } /* Main splay operation. * Moves node close to the key of elm to top */ #define SPLAY_GENERATE(name, type, field, cmp) \ struct type * \ name##_SPLAY_INSERT(struct name *head, struct type *elm) \ { \ if (SPLAY_EMPTY(head)) { \ SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ } else { \ int __comp; \ name##_SPLAY(head, elm); \ __comp = (cmp)(elm, (head)->sph_root); \ if(__comp < 0) { \ SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ SPLAY_RIGHT(elm, field) = (head)->sph_root; \ SPLAY_LEFT((head)->sph_root, field) = NULL; \ } else if (__comp > 0) { \ SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ SPLAY_LEFT(elm, field) = (head)->sph_root; \ SPLAY_RIGHT((head)->sph_root, field) = NULL; \ } else \ return ((head)->sph_root); \ } \ (head)->sph_root = (elm); \ return (NULL); \ } \ \ struct type * \ name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ { \ struct type *__tmp; \ if (SPLAY_EMPTY(head)) \ return (NULL); \ name##_SPLAY(head, elm); \ if ((cmp)(elm, (head)->sph_root) == 0) { \ if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ } else { \ __tmp = SPLAY_RIGHT((head)->sph_root, field); \ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ name##_SPLAY(head, elm); \ SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ } \ return (elm); \ } \ return (NULL); \ } \ \ void \ name##_SPLAY(struct name *head, struct type *elm) \ { \ struct type __node, *__left, *__right, *__tmp; \ int __comp; \ \ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ __left = __right = &__node; \ \ while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \ if (__comp < 0) { \ __tmp = SPLAY_LEFT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if ((cmp)(elm, __tmp) < 0){ \ SPLAY_ROTATE_RIGHT(head, __tmp, field); \ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKLEFT(head, __right, field); \ } else if (__comp > 0) { \ __tmp = SPLAY_RIGHT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if ((cmp)(elm, __tmp) > 0){ \ SPLAY_ROTATE_LEFT(head, __tmp, field); \ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKRIGHT(head, __left, field); \ } \ } \ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ } \ \ /* Splay with either the minimum or the maximum element \ * Used to find minimum or maximum element in tree. \ */ \ void name##_SPLAY_MINMAX(struct name *head, int __comp) \ { \ struct type __node, *__left, *__right, *__tmp; \ \ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ __left = __right = &__node; \ \ while (1) { \ if (__comp < 0) { \ __tmp = SPLAY_LEFT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if (__comp < 0){ \ SPLAY_ROTATE_RIGHT(head, __tmp, field); \ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKLEFT(head, __right, field); \ } else if (__comp > 0) { \ __tmp = SPLAY_RIGHT((head)->sph_root, field); \ if (__tmp == NULL) \ break; \ if (__comp > 0) { \ SPLAY_ROTATE_LEFT(head, __tmp, field); \ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ break; \ } \ SPLAY_LINKRIGHT(head, __left, field); \ } \ } \ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ } #define SPLAY_NEGINF -1 #define SPLAY_INF 1 #define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) #define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) #define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) #define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) #define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) #define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) #define SPLAY_FOREACH(x, name, head) \ for ((x) = SPLAY_MIN(name, head); \ (x) != NULL; \ (x) = SPLAY_NEXT(name, head, x)) /* Macros that define a red-black tree */ #define RB_HEAD(name, type) \ struct name { \ struct type *rbh_root; /* root of the tree */ \ } #define RB_INITIALIZER(root) \ { NULL } #define RB_INIT(root) do { \ (root)->rbh_root = NULL; \ } while (/*CONSTCOND*/ 0) #define RB_BLACK 0 #define RB_RED 1 #define RB_ENTRY(type) \ struct { \ struct type *rbe_left; /* left element */ \ struct type *rbe_right; /* right element */ \ struct type *rbe_parent; /* parent element */ \ int rbe_color; /* node color */ \ } #define RB_LEFT(elm, field) (elm)->field.rbe_left #define RB_RIGHT(elm, field) (elm)->field.rbe_right #define RB_PARENT(elm, field) (elm)->field.rbe_parent #define RB_COLOR(elm, field) (elm)->field.rbe_color #define RB_ROOT(head) (head)->rbh_root #define RB_EMPTY(head) (RB_ROOT(head) == NULL) #define RB_SET(elm, parent, field) do { \ RB_PARENT(elm, field) = parent; \ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ RB_COLOR(elm, field) = RB_RED; \ } while (/*CONSTCOND*/ 0) #define RB_SET_BLACKRED(black, red, field) do { \ RB_COLOR(black, field) = RB_BLACK; \ RB_COLOR(red, field) = RB_RED; \ } while (/*CONSTCOND*/ 0) +/* + * Something to be invoked in a loop at the root of every modified subtree, + * from the bottom up to the root, to update augmented node data. + */ #ifndef RB_AUGMENT -#define RB_AUGMENT(x) do {} while (0) +#define RB_AUGMENT(x) break #endif #define RB_ROTATE_LEFT(head, elm, tmp, field) do { \ (tmp) = RB_RIGHT(elm, field); \ if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) { \ RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ } \ - RB_AUGMENT(elm); \ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ else \ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ } else \ (head)->rbh_root = (tmp); \ RB_LEFT(tmp, field) = (elm); \ RB_PARENT(elm, field) = (tmp); \ - RB_AUGMENT(tmp); \ - if ((RB_PARENT(tmp, field))) \ - RB_AUGMENT(RB_PARENT(tmp, field)); \ + RB_AUGMENT(elm); \ } while (/*CONSTCOND*/ 0) #define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \ (tmp) = RB_LEFT(elm, field); \ if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) { \ RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ } \ - RB_AUGMENT(elm); \ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ else \ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ } else \ (head)->rbh_root = (tmp); \ RB_RIGHT(tmp, field) = (elm); \ RB_PARENT(elm, field) = (tmp); \ - RB_AUGMENT(tmp); \ - if ((RB_PARENT(tmp, field))) \ - RB_AUGMENT(RB_PARENT(tmp, field)); \ + RB_AUGMENT(elm); \ } while (/*CONSTCOND*/ 0) /* Generates prototypes and inline functions */ #define RB_PROTOTYPE(name, type, field, cmp) \ RB_PROTOTYPE_INTERNAL(name, type, field, cmp,) #define RB_PROTOTYPE_STATIC(name, type, field, cmp) \ RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __unused static) #define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) \ RB_PROTOTYPE_INSERT_COLOR(name, type, attr); \ RB_PROTOTYPE_REMOVE_COLOR(name, type, attr); \ RB_PROTOTYPE_INSERT(name, type, attr); \ RB_PROTOTYPE_REMOVE(name, type, attr); \ RB_PROTOTYPE_FIND(name, type, attr); \ RB_PROTOTYPE_NFIND(name, type, attr); \ RB_PROTOTYPE_NEXT(name, type, attr); \ RB_PROTOTYPE_PREV(name, type, attr); \ RB_PROTOTYPE_MINMAX(name, type, attr); \ RB_PROTOTYPE_REINSERT(name, type, attr); #define RB_PROTOTYPE_INSERT_COLOR(name, type, attr) \ attr void name##_RB_INSERT_COLOR(struct name *, struct type *) #define RB_PROTOTYPE_REMOVE_COLOR(name, type, attr) \ attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *) #define RB_PROTOTYPE_REMOVE(name, type, attr) \ attr struct type *name##_RB_REMOVE(struct name *, struct type *) #define RB_PROTOTYPE_INSERT(name, type, attr) \ attr struct type *name##_RB_INSERT(struct name *, struct type *) #define RB_PROTOTYPE_FIND(name, type, attr) \ attr struct type *name##_RB_FIND(struct name *, struct type *) #define RB_PROTOTYPE_NFIND(name, type, attr) \ attr struct type *name##_RB_NFIND(struct name *, struct type *) #define RB_PROTOTYPE_NEXT(name, type, attr) \ attr struct type *name##_RB_NEXT(struct type *) #define RB_PROTOTYPE_PREV(name, type, attr) \ attr struct type *name##_RB_PREV(struct type *) #define RB_PROTOTYPE_MINMAX(name, type, attr) \ attr struct type *name##_RB_MINMAX(struct name *, int) #define RB_PROTOTYPE_REINSERT(name, type, attr) \ attr struct type *name##_RB_REINSERT(struct name *, struct type *) /* Main rb operation. * Moves node close to the key of elm to top */ #define RB_GENERATE(name, type, field, cmp) \ RB_GENERATE_INTERNAL(name, type, field, cmp,) #define RB_GENERATE_STATIC(name, type, field, cmp) \ RB_GENERATE_INTERNAL(name, type, field, cmp, __unused static) #define RB_GENERATE_INTERNAL(name, type, field, cmp, attr) \ RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ RB_GENERATE_INSERT(name, type, field, cmp, attr) \ RB_GENERATE_REMOVE(name, type, field, attr) \ RB_GENERATE_FIND(name, type, field, cmp, attr) \ RB_GENERATE_NFIND(name, type, field, cmp, attr) \ RB_GENERATE_NEXT(name, type, field, attr) \ RB_GENERATE_PREV(name, type, field, attr) \ RB_GENERATE_MINMAX(name, type, field, attr) \ RB_GENERATE_REINSERT(name, type, field, cmp, attr) #define RB_GENERATE_INSERT_COLOR(name, type, field, attr) \ attr void \ name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ { \ struct type *parent, *gparent, *tmp; \ while ((parent = RB_PARENT(elm, field)) != NULL && \ RB_COLOR(parent, field) == RB_RED) { \ gparent = RB_PARENT(parent, field); \ if (parent == RB_LEFT(gparent, field)) { \ tmp = RB_RIGHT(gparent, field); \ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ RB_COLOR(tmp, field) = RB_BLACK; \ RB_SET_BLACKRED(parent, gparent, field);\ elm = gparent; \ continue; \ } \ if (RB_RIGHT(parent, field) == elm) { \ RB_ROTATE_LEFT(head, parent, tmp, field);\ tmp = parent; \ parent = elm; \ elm = tmp; \ } \ RB_SET_BLACKRED(parent, gparent, field); \ RB_ROTATE_RIGHT(head, gparent, tmp, field); \ } else { \ tmp = RB_LEFT(gparent, field); \ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ RB_COLOR(tmp, field) = RB_BLACK; \ RB_SET_BLACKRED(parent, gparent, field);\ elm = gparent; \ continue; \ } \ if (RB_LEFT(parent, field) == elm) { \ RB_ROTATE_RIGHT(head, parent, tmp, field);\ tmp = parent; \ parent = elm; \ elm = tmp; \ } \ RB_SET_BLACKRED(parent, gparent, field); \ RB_ROTATE_LEFT(head, gparent, tmp, field); \ } \ } \ RB_COLOR(head->rbh_root, field) = RB_BLACK; \ } #define RB_GENERATE_REMOVE_COLOR(name, type, field, attr) \ attr void \ name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ { \ struct type *tmp; \ while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \ elm != RB_ROOT(head)) { \ if (RB_LEFT(parent, field) == elm) { \ tmp = RB_RIGHT(parent, field); \ if (RB_COLOR(tmp, field) == RB_RED) { \ RB_SET_BLACKRED(tmp, parent, field); \ RB_ROTATE_LEFT(head, parent, tmp, field);\ tmp = RB_RIGHT(parent, field); \ } \ if ((RB_LEFT(tmp, field) == NULL || \ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ (RB_RIGHT(tmp, field) == NULL || \ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ RB_COLOR(tmp, field) = RB_RED; \ elm = parent; \ parent = RB_PARENT(elm, field); \ } else { \ if (RB_RIGHT(tmp, field) == NULL || \ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\ struct type *oleft; \ if ((oleft = RB_LEFT(tmp, field)) \ != NULL) \ RB_COLOR(oleft, field) = RB_BLACK;\ RB_COLOR(tmp, field) = RB_RED; \ RB_ROTATE_RIGHT(head, tmp, oleft, field);\ tmp = RB_RIGHT(parent, field); \ } \ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ RB_COLOR(parent, field) = RB_BLACK; \ if (RB_RIGHT(tmp, field)) \ RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\ RB_ROTATE_LEFT(head, parent, tmp, field);\ elm = RB_ROOT(head); \ break; \ } \ } else { \ tmp = RB_LEFT(parent, field); \ if (RB_COLOR(tmp, field) == RB_RED) { \ RB_SET_BLACKRED(tmp, parent, field); \ RB_ROTATE_RIGHT(head, parent, tmp, field);\ tmp = RB_LEFT(parent, field); \ } \ if ((RB_LEFT(tmp, field) == NULL || \ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ (RB_RIGHT(tmp, field) == NULL || \ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ RB_COLOR(tmp, field) = RB_RED; \ elm = parent; \ parent = RB_PARENT(elm, field); \ } else { \ if (RB_LEFT(tmp, field) == NULL || \ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\ struct type *oright; \ if ((oright = RB_RIGHT(tmp, field)) \ != NULL) \ RB_COLOR(oright, field) = RB_BLACK;\ RB_COLOR(tmp, field) = RB_RED; \ RB_ROTATE_LEFT(head, tmp, oright, field);\ tmp = RB_LEFT(parent, field); \ } \ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ RB_COLOR(parent, field) = RB_BLACK; \ if (RB_LEFT(tmp, field)) \ RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\ RB_ROTATE_RIGHT(head, parent, tmp, field);\ elm = RB_ROOT(head); \ break; \ } \ } \ } \ if (elm) \ RB_COLOR(elm, field) = RB_BLACK; \ } #define RB_GENERATE_REMOVE(name, type, field, attr) \ attr struct type * \ name##_RB_REMOVE(struct name *head, struct type *elm) \ { \ struct type *child, *parent, *old = elm; \ int color; \ if (RB_LEFT(elm, field) == NULL) \ child = RB_RIGHT(elm, field); \ else if (RB_RIGHT(elm, field) == NULL) \ child = RB_LEFT(elm, field); \ else { \ - struct type *left; \ - elm = RB_RIGHT(elm, field); \ - while ((left = RB_LEFT(elm, field)) != NULL) \ - elm = left; \ - child = RB_RIGHT(elm, field); \ - parent = RB_PARENT(elm, field); \ - color = RB_COLOR(elm, field); \ - if (child) \ - RB_PARENT(child, field) = parent; \ - if (parent) { \ - if (RB_LEFT(parent, field) == elm) \ - RB_LEFT(parent, field) = child; \ + elm = RB_RIGHT(old, field); \ + if ((child = RB_LEFT(elm, field)) == NULL) { \ + child = RB_RIGHT(elm, field); \ + RB_RIGHT(old, field) = child; \ + RB_PARENT(elm, field) = elm; \ + } else { \ + do \ + elm = child; \ + while ((child = RB_LEFT(elm, field)) != NULL); \ + child = RB_RIGHT(elm, field); \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + } \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + parent = RB_PARENT(old, field); \ + if (parent != NULL) { \ + if (RB_LEFT(parent, field) == old) \ + RB_LEFT(parent, field) = elm; \ else \ - RB_RIGHT(parent, field) = child; \ - RB_AUGMENT(parent); \ + RB_RIGHT(parent, field) = elm; \ } else \ - RB_ROOT(head) = child; \ - if (RB_PARENT(elm, field) == old) \ - parent = elm; \ - (elm)->field = (old)->field; \ - if (RB_PARENT(old, field)) { \ - if (RB_LEFT(RB_PARENT(old, field), field) == old)\ - RB_LEFT(RB_PARENT(old, field), field) = elm;\ - else \ - RB_RIGHT(RB_PARENT(old, field), field) = elm;\ - RB_AUGMENT(RB_PARENT(old, field)); \ - } else \ RB_ROOT(head) = elm; \ - RB_PARENT(RB_LEFT(old, field), field) = elm; \ - if (RB_RIGHT(old, field)) \ - RB_PARENT(RB_RIGHT(old, field), field) = elm; \ - if (parent) { \ - left = parent; \ - do { \ - RB_AUGMENT(left); \ - } while ((left = RB_PARENT(left, field)) != NULL); \ - } \ - goto color; \ } \ parent = RB_PARENT(elm, field); \ color = RB_COLOR(elm, field); \ - if (child) \ + if (child != NULL) \ RB_PARENT(child, field) = parent; \ - if (parent) { \ + if (parent != NULL) { \ if (RB_LEFT(parent, field) == elm) \ RB_LEFT(parent, field) = child; \ else \ RB_RIGHT(parent, field) = child; \ - RB_AUGMENT(parent); \ } else \ RB_ROOT(head) = child; \ -color: \ + if (elm != old) \ + (elm)->field = (old)->field; \ if (color == RB_BLACK) \ name##_RB_REMOVE_COLOR(head, parent, child); \ + while (parent != NULL) { \ + RB_AUGMENT(parent); \ + parent = RB_PARENT(parent, field); \ + } \ return (old); \ -} \ +} #define RB_GENERATE_INSERT(name, type, field, cmp, attr) \ /* Inserts a node into the RB tree */ \ attr struct type * \ name##_RB_INSERT(struct name *head, struct type *elm) \ { \ struct type *tmp; \ struct type *parent = NULL; \ int comp = 0; \ tmp = RB_ROOT(head); \ while (tmp) { \ parent = tmp; \ comp = (cmp)(elm, parent); \ if (comp < 0) \ tmp = RB_LEFT(tmp, field); \ else if (comp > 0) \ tmp = RB_RIGHT(tmp, field); \ else \ return (tmp); \ } \ RB_SET(elm, parent, field); \ if (parent != NULL) { \ if (comp < 0) \ RB_LEFT(parent, field) = elm; \ else \ RB_RIGHT(parent, field) = elm; \ - RB_AUGMENT(parent); \ } else \ RB_ROOT(head) = elm; \ name##_RB_INSERT_COLOR(head, elm); \ + while (elm != NULL) { \ + RB_AUGMENT(elm); \ + elm = RB_PARENT(elm, field); \ + } \ return (NULL); \ } #define RB_GENERATE_FIND(name, type, field, cmp, attr) \ /* Finds the node with the same key as elm */ \ attr struct type * \ name##_RB_FIND(struct name *head, struct type *elm) \ { \ struct type *tmp = RB_ROOT(head); \ int comp; \ while (tmp) { \ comp = cmp(elm, tmp); \ if (comp < 0) \ tmp = RB_LEFT(tmp, field); \ else if (comp > 0) \ tmp = RB_RIGHT(tmp, field); \ else \ return (tmp); \ } \ return (NULL); \ } #define RB_GENERATE_NFIND(name, type, field, cmp, attr) \ /* Finds the first node greater than or equal to the search key */ \ attr struct type * \ name##_RB_NFIND(struct name *head, struct type *elm) \ { \ struct type *tmp = RB_ROOT(head); \ struct type *res = NULL; \ int comp; \ while (tmp) { \ comp = cmp(elm, tmp); \ if (comp < 0) { \ res = tmp; \ tmp = RB_LEFT(tmp, field); \ } \ else if (comp > 0) \ tmp = RB_RIGHT(tmp, field); \ else \ return (tmp); \ } \ return (res); \ } #define RB_GENERATE_NEXT(name, type, field, attr) \ /* ARGSUSED */ \ attr struct type * \ name##_RB_NEXT(struct type *elm) \ { \ if (RB_RIGHT(elm, field)) { \ elm = RB_RIGHT(elm, field); \ while (RB_LEFT(elm, field)) \ elm = RB_LEFT(elm, field); \ } else { \ if (RB_PARENT(elm, field) && \ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ elm = RB_PARENT(elm, field); \ else { \ while (RB_PARENT(elm, field) && \ (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\ elm = RB_PARENT(elm, field); \ elm = RB_PARENT(elm, field); \ } \ } \ return (elm); \ } #define RB_GENERATE_PREV(name, type, field, attr) \ /* ARGSUSED */ \ attr struct type * \ name##_RB_PREV(struct type *elm) \ { \ if (RB_LEFT(elm, field)) { \ elm = RB_LEFT(elm, field); \ while (RB_RIGHT(elm, field)) \ elm = RB_RIGHT(elm, field); \ } else { \ if (RB_PARENT(elm, field) && \ (elm == RB_RIGHT(RB_PARENT(elm, field), field))) \ elm = RB_PARENT(elm, field); \ else { \ while (RB_PARENT(elm, field) && \ (elm == RB_LEFT(RB_PARENT(elm, field), field)))\ elm = RB_PARENT(elm, field); \ elm = RB_PARENT(elm, field); \ } \ } \ return (elm); \ } #define RB_GENERATE_MINMAX(name, type, field, attr) \ attr struct type * \ name##_RB_MINMAX(struct name *head, int val) \ { \ struct type *tmp = RB_ROOT(head); \ struct type *parent = NULL; \ while (tmp) { \ parent = tmp; \ if (val < 0) \ tmp = RB_LEFT(tmp, field); \ else \ tmp = RB_RIGHT(tmp, field); \ } \ return (parent); \ } #define RB_GENERATE_REINSERT(name, type, field, cmp, attr) \ attr struct type * \ name##_RB_REINSERT(struct name *head, struct type *elm) \ { \ struct type *cmpelm; \ if (((cmpelm = RB_PREV(name, head, elm)) != NULL && \ cmp(cmpelm, elm) >= 0) || \ ((cmpelm = RB_NEXT(name, head, elm)) != NULL && \ cmp(elm, cmpelm) >= 0)) { \ /* XXXLAS: Remove/insert is heavy handed. */ \ RB_REMOVE(name, head, elm); \ return (RB_INSERT(name, head, elm)); \ } \ return (NULL); \ } \ #define RB_NEGINF -1 #define RB_INF 1 #define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) #define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) #define RB_FIND(name, x, y) name##_RB_FIND(x, y) #define RB_NFIND(name, x, y) name##_RB_NFIND(x, y) #define RB_NEXT(name, x, y) name##_RB_NEXT(y) #define RB_PREV(name, x, y) name##_RB_PREV(y) #define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) #define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) #define RB_REINSERT(name, x, y) name##_RB_REINSERT(x, y) #define RB_FOREACH(x, name, head) \ for ((x) = RB_MIN(name, head); \ (x) != NULL; \ (x) = name##_RB_NEXT(x)) #define RB_FOREACH_FROM(x, name, y) \ for ((x) = (y); \ ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ (x) = (y)) #define RB_FOREACH_SAFE(x, name, head, y) \ for ((x) = RB_MIN(name, head); \ ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ (x) = (y)) #define RB_FOREACH_REVERSE(x, name, head) \ for ((x) = RB_MAX(name, head); \ (x) != NULL; \ (x) = name##_RB_PREV(x)) #define RB_FOREACH_REVERSE_FROM(x, name, y) \ for ((x) = (y); \ ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ (x) = (y)) #define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ for ((x) = RB_MAX(name, head); \ ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ (x) = (y)) #endif /* _SYS_TREE_H_ */ Index: projects/clang1000-import/sys/tools/vnode_if.awk =================================================================== --- projects/clang1000-import/sys/tools/vnode_if.awk (revision 357178) +++ projects/clang1000-import/sys/tools/vnode_if.awk (revision 357179) @@ -1,476 +1,478 @@ #!/usr/bin/awk -f #- # SPDX-License-Identifier: BSD-3-Clause # # Copyright (c) 1992, 1993 # The Regents of the University of California. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93 # $FreeBSD$ # # Script to produce VFS front-end sugar. # # usage: vnode_if.awk [-c | -h | -p | -q] # (where is currently /sys/kern/vnode_if.src) # The source file must have a .src extension # function usage() { print "usage: vnode_if.awk [-c|-h|-p|-q]"; exit 1; } function die(msg, what) { printf srcfile "(" fnr "): " > "/dev/stderr"; printf msg "\n", what > "/dev/stderr"; exit 1; } function t_spc(type) { # Append a space if the type is not a pointer return (type ~ /\*$/) ? type : type " "; } # These are just for convenience ... function printc(s) {print s > cfile;} function printh(s) {print s > hfile;} function printp(s) {print s > pfile;} function printq(s) {print s > qfile;} function add_debug_code(name, arg, pos, ind) { if (arg == "vpp") star = "*"; else star = ""; if (lockdata[name, arg, pos] && (lockdata[name, arg, pos] != "-")) { printc(ind"ASSERT_VI_UNLOCKED("star"a->a_"arg", \""uname"\");"); # Add assertions for locking if (lockdata[name, arg, pos] == "L") printc(ind"ASSERT_VOP_LOCKED(" star "a->a_"arg", \""uname"\");"); else if (lockdata[name, arg, pos] == "U") printc(ind"ASSERT_VOP_UNLOCKED(" star "a->a_"arg", \""uname"\");"); else if (lockdata[name, arg, pos] == "E") printc(ind"ASSERT_VOP_ELOCKED(" star "a->a_"arg", \""uname"\");"); else if (0) { # XXX More checks! } } } function add_pre(name) { if (lockdata[name, "pre"]) { printc("\t"lockdata[name, "pre"]"(a);"); } } function add_post(name) { if (lockdata[name, "post"]) { printc("\t"lockdata[name, "post"]"(a, rc);"); } } function find_arg_with_type (type) { for (jj = 0; jj < numargs; jj++) { if (types[jj] == type) { return "VOPARG_OFFSETOF(struct " \ name "_args,a_" args[jj] ")"; } } return "VDESC_NO_OFFSET"; } BEGIN{ # Process the command line for (i = 1; i < ARGC; i++) { arg = ARGV[i]; if (arg !~ /^-[chpq]+$/ && arg !~ /\.src$/) usage(); if (arg ~ /^-.*c/) cfile = "vnode_if.c"; if (arg ~ /^-.*h/) hfile = "vnode_if.h"; if (arg ~ /^-.*p/) pfile = "vnode_if_newproto.h"; if (arg ~ /^-.*q/) qfile = "vnode_if_typedef.h"; if (arg ~ /\.src$/) srcfile = arg; } ARGC = 1; if (!cfile && !hfile && !pfile && !qfile) exit 0; if (!srcfile) usage(); # Avoid a literal generated file tag here. generated = "@" "generated"; common_head = \ "/*\n" \ " * This file is " generated " automatically.\n" \ " * Do not modify anything in here by hand.\n" \ " *\n" \ " * Created from $FreeBSD$\n" \ " */\n" \ "\n"; if (pfile) { printp(common_head) printp("struct vop_vector {") printp("\tstruct vop_vector\t*vop_default;") printp("\tvop_bypass_t\t*vop_bypass;") } if (qfile) { printq(common_head) } if (hfile) { printh(common_head "extern struct vnodeop_desc vop_default_desc;"); printh("#include \"vnode_if_typedef.h\"") printh("#include \"vnode_if_newproto.h\"") } if (cfile) { printc(common_head \ "#include \n" \ "#include \n" \ "#include \n" \ "#include \n" \ "#include \n" \ "#include \n" \ "#include \n" \ "#include \n" \ "\n" \ "SDT_PROVIDER_DECLARE(vfs);\n" \ "\n" \ "struct vnodeop_desc vop_default_desc = {\n" \ " \"default\",\n" \ " 0,\n" \ " 0,\n" \ " (vop_bypass_t *)vop_panic,\n" \ " NULL,\n" \ " VDESC_NO_OFFSET,\n" \ " VDESC_NO_OFFSET,\n" \ " VDESC_NO_OFFSET,\n" \ " VDESC_NO_OFFSET,\n" \ "};\n"); } while ((getline < srcfile) > 0) { fnr++; if (NF == 0) continue; if ($1 ~ /^%%/) { if (NF != 6 || $2 !~ /^[a-z_]+$/ || $3 !~ /^[a-z]+$/ || $4 !~ /^.$/ || $5 !~ /^.$/ || $6 !~ /^.$/) { die("Invalid %s construction", "%%"); continue; } lockdata["vop_" $2, $3, "Entry"] = $4; lockdata["vop_" $2, $3, "OK"] = $5; lockdata["vop_" $2, $3, "Error"] = $6; continue; } if ($1 ~ /^%!/) { if (NF != 4 || ($3 != "pre" && $3 != "post")) { die("Invalid %s construction", "%!"); continue; } lockdata["vop_" $2, $3] = $4; continue; } if ($1 ~ /^#/) continue; # Get the function name. name = $1; uname = toupper(name); # Get the function arguments. for (numargs = 0; ; ++numargs) { if ((getline < srcfile) <= 0) { die("Unable to read through the arguments for \"%s\"", name); } fnr++; if ($1 ~ /^\};/) break; # Delete comments, if any. gsub (/\/\*.*\*\//, ""); # Condense whitespace and delete leading/trailing space. gsub(/[[:space:]]+/, " "); sub(/^ /, ""); sub(/ $/, ""); # Pick off direction. if ($1 != "INOUT" && $1 != "IN" && $1 != "OUT") die("No IN/OUT direction for \"%s\".", $0); dirs[numargs] = $1; sub(/^[A-Z]* /, ""); if ((reles[numargs] = $1) == "WILLRELE") sub(/^[A-Z]* /, ""); else reles[numargs] = "WONTRELE"; # kill trailing ; if (sub(/;$/, "") < 1) die("Missing end-of-line ; in \"%s\".", $0); # pick off variable name if ((argp = match($0, /[A-Za-z0-9_]+$/)) < 1) die("Missing var name \"a_foo\" in \"%s\".", $0); args[numargs] = substr($0, argp); $0 = substr($0, 1, argp - 1); # what is left must be type # remove trailing space (if any) sub(/ $/, ""); types[numargs] = $0; } if (numargs > 4) ctrargs = 4; else ctrargs = numargs; ctrstr = ctrargs "(KTR_VOP, \"VOP\", \"" uname "\", (uintptr_t)a,\n\t "; ctrstr = ctrstr "\"" args[0] ":0x%jX\", (uintptr_t)a->a_" args[0]; for (i = 1; i < ctrargs; ++i) ctrstr = ctrstr ", \"" args[i] ":0x%jX\", a->a_" args[i]; ctrstr = ctrstr ");"; if (pfile) { printp("\t"name"_t\t*"name";") } if (qfile) { printq("struct "name"_args;") printq("typedef int "name"_t(struct "name"_args *);\n") } if (hfile) { # Print out the vop_F_args structure. printh("struct "name"_args {\n\tstruct vop_generic_args a_gen;"); for (i = 0; i < numargs; ++i) printh("\t" t_spc(types[i]) "a_" args[i] ";"); printh("};"); printh(""); # Print out extern declaration. printh("extern struct vnodeop_desc " name "_desc;"); printh(""); # Print out function prototypes. printh("int " uname "_AP(struct " name "_args *);"); printh("int " uname "_APV(struct vop_vector *vop, struct " name "_args *);"); printh(""); printh("static __inline int " uname "("); for (i = 0; i < numargs; ++i) { printh("\t" t_spc(types[i]) args[i] \ (i < numargs - 1 ? "," : ")")); } printh("{"); printh("\tstruct " name "_args a;"); printh(""); printh("\ta.a_gen.a_desc = &" name "_desc;"); for (i = 0; i < numargs; ++i) printh("\ta.a_" args[i] " = " args[i] ";"); printh("\treturn (" uname "_APV("args[0]"->v_op, &a));"); printh("}"); printh(""); } if (cfile) { funcarr[name] = 1; # Print out the vop_F_vp_offsets structure. This all depends # on naming conventions and nothing else. printc("static int " name "_vp_offsets[] = {"); # as a side effect, figure out the releflags releflags = ""; vpnum = 0; for (i = 0; i < numargs; i++) { if (types[i] == "struct vnode *") { printc("\tVOPARG_OFFSETOF(struct " name \ "_args,a_" args[i] "),"); if (reles[i] == "WILLRELE") { releflags = releflags \ "|VDESC_VP" vpnum "_WILLRELE"; } vpnum++; } } sub(/^\|/, "", releflags); printc("\tVDESC_NO_OFFSET"); printc("};"); printc("\n"); printc("SDT_PROBE_DEFINE2(vfs, vop, " name ", entry, \"struct vnode *\", \"struct " name "_args *\");\n"); printc("SDT_PROBE_DEFINE3(vfs, vop, " name ", return, \"struct vnode *\", \"struct " name "_args *\", \"int\");\n"); # Print out function. printc("\nint\n" uname "_AP(struct " name "_args *a)"); printc("{"); printc(""); printc("\treturn(" uname "_APV(a->a_" args[0] "->v_op, a));"); printc("}"); printc("\nint\n" uname "_APV(struct vop_vector *vop, struct " name "_args *a)"); printc("{"); printc("\tint rc;"); printc(""); printc("\tVNASSERT(a->a_gen.a_desc == &" name "_desc, a->a_" args[0]","); printc("\t (\"Wrong a_desc in " name "(%p, %p)\", a->a_" args[0]", a));"); printc("\tVNASSERT(vop != NULL, a->a_" args[0]", (\"No "name"(%p, %p)\", a->a_" args[0]", a));") printc("\tKTR_START" ctrstr); add_pre(name); for (i = 0; i < numargs; ++i) add_debug_code(name, args[i], "Entry", "\t"); - printc("\tif (__predict_true(!SDT_PROBES_ENABLED() && vop->"name" != NULL)) {"); + printc("\tif (!SDT_PROBES_ENABLED()) {"); printc("\t\trc = vop->"name"(a);") printc("\t} else {") printc("\t\tSDT_PROBE2(vfs, vop, " name ", entry, a->a_" args[0] ", a);"); - printc("\t\tif (vop->"name" != NULL)") - printc("\t\t\trc = vop->"name"(a);") - printc("\t\telse") - printc("\t\t\trc = vop->vop_bypass(&a->a_gen);") + printc("\t\trc = vop->"name"(a);") printc("\t\tSDT_PROBE3(vfs, vop, " name ", return, a->a_" args[0] ", a, rc);"); printc("\t}") printc("\tif (rc == 0) {"); for (i = 0; i < numargs; ++i) add_debug_code(name, args[i], "OK", "\t\t"); printc("\t} else {"); for (i = 0; i < numargs; ++i) add_debug_code(name, args[i], "Error", "\t\t"); printc("\t}"); add_post(name); printc("\tKTR_STOP" ctrstr); printc("\treturn (rc);"); printc("}\n"); # Print out the vnodeop_desc structure. printc("struct vnodeop_desc " name "_desc = {"); # printable name printc("\t\"" name "\","); # flags vppwillrele = ""; for (i = 0; i < numargs; i++) { if (types[i] == "struct vnode **" && \ reles[i] == "WILLRELE") { vppwillrele = "|VDESC_VPP_WILLRELE"; } } if (!releflags) releflags = "0"; printc("\t" releflags vppwillrele ","); # index in struct vop_vector printc("\t__offsetof(struct vop_vector, " name "),"); # function to call printc("\t(vop_bypass_t *)" uname "_AP,"); # vp offsets printc("\t" name "_vp_offsets,"); # vpp (if any) printc("\t" find_arg_with_type("struct vnode **") ","); # cred (if any) printc("\t" find_arg_with_type("struct ucred *") ","); # thread (if any) printc("\t" find_arg_with_type("struct thread *") ","); # componentname printc("\t" find_arg_with_type("struct componentname *") ","); # transport layer information printc("};\n"); } } if (cfile) { printc("void"); printc("vfs_vector_op_register(struct vop_vector *orig_vop)"); printc("{"); printc("\tstruct vop_vector *vop;"); printc(""); printc("\tif (orig_vop->registered)"); printc("\t\tpanic(\"%s: vop_vector %p already registered\",") printc("\t\t __func__, orig_vop);"); printc(""); for (name in funcarr) { printc("\tvop = orig_vop;"); printc("\twhile (vop != NULL && \\"); printc("\t vop->"name" == NULL && vop->vop_bypass == NULL)") printc("\t\tvop = vop->vop_default;") printc("\tif (vop != NULL)"); printc("\t\torig_vop->"name" = vop->"name";"); printc(""); } printc("\tvop = orig_vop;"); printc("\twhile (vop != NULL && vop->vop_bypass == NULL)") printc("\t\tvop = vop->vop_default;") printc("\tif (vop != NULL)"); printc("\t\torig_vop->vop_bypass = vop->vop_bypass;"); + printc(""); + for (name in funcarr) { + printc("\tif (orig_vop->"name" == NULL)"); + printc("\t\torig_vop->"name" = (void *)orig_vop->vop_bypass;"); + } printc(""); printc("\torig_vop->registered = true;"); printc("}") } if (hfile) { printh("void vfs_vector_op_register(struct vop_vector *orig_vop);"); } if (pfile) { printp("\tbool\tregistered;") printp("};") } if (hfile) close(hfile); if (cfile) close(cfile); if (pfile) close(pfile); close(srcfile); exit 0; } Index: projects/clang1000-import/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- projects/clang1000-import/sys/ufs/ffs/ffs_vfsops.c (revision 357178) +++ projects/clang1000-import/sys/ufs/ffs/ffs_vfsops.c (revision 357179) @@ -1,2398 +1,2402 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_ufs.h" #include "opt_ffs.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, ufs2_daddr_t); static void ffs_ifree(struct ufsmount *ump, struct inode *ip); static int ffs_sync_lazy(struct mount *mp); static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size); static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size); static vfs_init_t ffs_init; static vfs_uninit_t ffs_uninit; static vfs_extattrctl_t ffs_extattrctl; static vfs_cmount_t ffs_cmount; static vfs_unmount_t ffs_unmount; static vfs_mount_t ffs_mount; static vfs_statfs_t ffs_statfs; static vfs_fhtovp_t ffs_fhtovp; static vfs_sync_t ffs_sync; static struct vfsops ufs_vfsops = { .vfs_extattrctl = ffs_extattrctl, .vfs_fhtovp = ffs_fhtovp, .vfs_init = ffs_init, .vfs_mount = ffs_mount, .vfs_cmount = ffs_cmount, .vfs_quotactl = ufs_quotactl, .vfs_root = vfs_cache_root, .vfs_cachedroot = ufs_root, .vfs_statfs = ffs_statfs, .vfs_sync = ffs_sync, .vfs_uninit = ffs_uninit, .vfs_unmount = ffs_unmount, .vfs_vget = ffs_vget, .vfs_susp_clean = process_deferred_inactive, }; VFS_SET(ufs_vfsops, ufs, 0); MODULE_VERSION(ufs, 1); static b_strategy_t ffs_geom_strategy; static b_write_t ffs_bufwrite; static struct buf_ops ffs_ops = { .bop_name = "FFS", .bop_write = ffs_bufwrite, .bop_strategy = ffs_geom_strategy, .bop_sync = bufsync, #ifdef NO_FFS_SNAPSHOT .bop_bdflush = bufbdflush, #else .bop_bdflush = ffs_bdflush, #endif }; /* * Note that userquota and groupquota options are not currently used * by UFS/FFS code and generally mount(8) does not pass those options * from userland, but they can be passed by loader(8) via * vfs.root.mountfrom.options. */ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "groupquota", "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", "union", "userquota", "untrusted", NULL }; static int ffs_mount(struct mount *mp) { struct vnode *devvp; struct thread *td; struct ufsmount *ump = NULL; struct fs *fs; pid_t fsckpid = 0; int error, error1, flags; uint64_t mntorflags, saved_mnt_flag; accmode_t accmode; struct nameidata ndp; char *fspec; td = curthread; if (vfs_filteropt(mp->mnt_optnew, ffs_opts)) return (EINVAL); if (uma_inode == NULL) { uma_inode = uma_zcreate("FFS inode", sizeof(struct inode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs1 = uma_zcreate("FFS1 dinode", sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs2 = uma_zcreate("FFS2 dinode", sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } vfs_deleteopt(mp->mnt_optnew, "groupquota"); vfs_deleteopt(mp->mnt_optnew, "userquota"); fspec = vfs_getopts(mp->mnt_optnew, "from", &error); if (error) return (error); mntorflags = 0; if (vfs_getopt(mp->mnt_optnew, "untrusted", NULL, NULL) == 0) mntorflags |= MNT_UNTRUSTED; if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0) mntorflags |= MNT_ACLS; if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) { mntorflags |= MNT_SNAPSHOT; /* * Once we have set the MNT_SNAPSHOT flag, do not * persist "snapshot" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "snapshot"); vfs_deleteopt(mp->mnt_opt, "snapshot"); } if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 && vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) { /* * Once we have set the restricted PID, do not * persist "fsckpid" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "fsckpid"); vfs_deleteopt(mp->mnt_opt, "fsckpid"); if (mp->mnt_flag & MNT_UPDATE) { if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { vfs_mount_error(mp, "Checker enable: Must be read-only"); return (EINVAL); } } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { vfs_mount_error(mp, "Checker enable: Must be read-only"); return (EINVAL); } /* Set to -1 if we are done */ if (fsckpid == 0) fsckpid = -1; } if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) { if (mntorflags & MNT_ACLS) { vfs_mount_error(mp, "\"acls\" and \"nfsv4acls\" options " "are mutually exclusive"); return (EINVAL); } mntorflags |= MNT_NFS4ACLS; } MNT_ILOCK(mp); mp->mnt_flag |= mntorflags; MNT_IUNLOCK(mp); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; devvp = ump->um_devvp; if (fsckpid == -1 && ump->um_fsckpid > 0) { if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 || (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) return (error); g_topology_lock(); /* * Return to normal read-only mode. */ error = g_access(ump->um_cp, 0, -1, 0); g_topology_unlock(); ump->um_fsckpid = 0; } if (fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * Flush any dirty data and suspend filesystem. */ if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); /* * Check for and optionally get rid of files open * for writing. */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (MOUNTEDSOFTDEP(mp)) { error = softdep_flushfiles(mp, flags, td); } else { error = ffs_flushfiles(mp, flags, td); } if (error) { vfs_write_resume(mp, 0); return (error); } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s Update error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) fs->fs_clean = 1; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 0; fs->fs_clean = 0; vfs_write_resume(mp, 0); return (error); } if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); g_topology_lock(); /* * Drop our write and exclusive access. */ g_access(ump->um_cp, 0, -1, -1); g_topology_unlock(); fs->fs_ronly = 1; MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); /* * Allow the writers to note that filesystem * is ro now. */ vfs_write_resume(mp, 0); } if ((mp->mnt_flag & MNT_RELOAD) && (error = ffs_reload(mp, td, 0)) != 0) return (error); if (fs->fs_ronly && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * If we are running a checker, do not allow upgrade. */ if (ump->um_fsckpid > 0) { vfs_mount_error(mp, "Active checker, cannot upgrade to write"); return (EINVAL); } /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp); return (error); } VOP_UNLOCK(devvp); fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if ((mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly " "dismounted\n", fs->fs_fsmnt); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s.%s", fs->fs_fsmnt, "Filesystem is not clean - run fsck", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate" " journal contents"); return (EPERM); } } g_topology_lock(); /* * Request exclusive write access. */ error = g_access(ump->um_cp, 0, 1, 1); g_topology_unlock(); if (error) return (error); if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); fs->fs_ronly = 0; MNT_ILOCK(mp); saved_mnt_flag = MNT_RDONLY; if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag & MNT_ASYNC) != 0) saved_mnt_flag |= MNT_ASYNC; mp->mnt_flag &= ~saved_mnt_flag; MNT_IUNLOCK(mp); fs->fs_mtime = time_second; /* check to see if we need to start softdep */ if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ fs->fs_ronly = 1; MNT_ILOCK(mp); mp->mnt_flag |= saved_mnt_flag; MNT_IUNLOCK(mp); vfs_write_resume(mp, 0); return (error); } fs->fs_clean = 0; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 1; MNT_ILOCK(mp); mp->mnt_flag |= saved_mnt_flag; MNT_IUNLOCK(mp); vfs_write_resume(mp, 0); return (error); } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); vfs_write_resume(mp, 0); } /* * Soft updates is incompatible with "async", * so if we are doing softupdates stop the user * from setting the async flag in an update. * Softdep_mount() clears it in an initial mount * or ro->rw remount. */ if (MOUNTEDSOFTDEP(mp)) { /* XXX: Reset too late ? */ MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_ASYNC; MNT_IUNLOCK(mp); } /* * Keep MNT_ACLS flag if it is stored in superblock. */ if ((fs->fs_flags & FS_ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * If this is a request from fsck to clean up the filesystem, * then allow the specified pid to proceed. */ if (fsckpid > 0) { if (ump->um_fsckpid != 0) { vfs_mount_error(mp, "Active checker already running on %s", fs->fs_fsmnt); return (EINVAL); } KASSERT(MOUNTEDSOFTDEP(mp) == 0, ("soft updates enabled on read-only file system")); g_topology_lock(); /* * Request write access. */ error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) { vfs_mount_error(mp, "Checker activation failed on %s", fs->fs_fsmnt); return (error); } ump->um_fsckpid = fsckpid; if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_mtime = time_second; fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * If this is a snapshot request, take the snapshot. */ if (mp->mnt_flag & MNT_SNAPSHOT) return (ffs_snapshot(mp, fspec)); /* * Must not call namei() while owning busy ref. */ vfs_unbusy(mp); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); error = namei(&ndp); if ((mp->mnt_flag & MNT_UPDATE) != 0) { /* * Unmount does not start if MNT_UPDATE is set. Mount * update busies mp before setting MNT_UPDATE. We * must be able to retain our busy ref succesfully, * without sleep. */ error1 = vfs_busy(mp, MBF_NOWAIT); MPASS(error1 == 0); } if (error != 0) return (error); NDFREE(&ndp, NDF_ONLY_PNBUF); devvp = ndp.ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if (mp->mnt_flag & MNT_UPDATE) { /* * Update only * * If it's not the same vnode, or at least the same device * then it's not correct. */ if (devvp->v_rdev != ump->um_devvp->v_rdev) error = EINVAL; /* needs translation */ vput(devvp); if (error) return (error); } else { /* * New mount * * We need the name for the mount point (also used for * "last mounted on") copied in. If an error occurs, * the mount point is discarded by the upper level code. * Note that vfs_mount_alloc() populates f_mntonname for us. */ if ((error = ffs_mountfs(devvp, mp, td)) != 0) { vrele(devvp); return (error); } if (fsckpid > 0) { KASSERT(MOUNTEDSOFTDEP(mp) == 0, ("soft updates enabled on read-only file system")); ump = VFSTOUFS(mp); fs = ump->um_fs; g_topology_lock(); /* * Request write access. */ error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) { printf("WARNING: %s: Checker activation " "failed\n", fs->fs_fsmnt); } else { ump->um_fsckpid = fsckpid; if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_mtime = time_second; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } } } vfs_mountedfrom(mp, fspec); return (0); } /* * Compatibility with old mount system call. */ static int ffs_cmount(struct mntarg *ma, void *data, uint64_t flags) { struct ufs_args args; struct export_args exp; int error; if (data == NULL) return (EINVAL); error = copyin(data, &args, sizeof args); if (error) return (error); vfs_oexport_conv(&args.export, &exp); ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); ma = mount_arg(ma, "export", &exp, sizeof(exp)); error = kernel_mount(ma, flags); return (error); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). If the 'force' flag * is 0, the filesystem must be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary * writers, if requested. * 6) invalidate all cached file data. * 7) re-read inode data for all active vnodes. */ int ffs_reload(struct mount *mp, struct thread *td, int flags) { struct vnode *vp, *mvp, *devvp; struct inode *ip; void *space; struct buf *bp; struct fs *fs, *newfs; struct ufsmount *ump; ufs2_daddr_t sblockloc; int i, blks, error; u_long size; int32_t *lp; ump = VFSTOUFS(mp); MNT_ILOCK(mp); if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) { MNT_IUNLOCK(mp); return (EINVAL); } MNT_IUNLOCK(mp); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ffs_reload: dirty1"); VOP_UNLOCK(devvp); /* * Step 2: re-read superblock from disk. */ fs = VFSTOUFS(mp)->um_fs; if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize, NOCRED, &bp)) != 0) return (error); newfs = (struct fs *)bp->b_data; if ((newfs->fs_magic != FS_UFS1_MAGIC && newfs->fs_magic != FS_UFS2_MAGIC) || newfs->fs_bsize > MAXBSIZE || newfs->fs_bsize < sizeof(struct fs)) { brelse(bp); return (EIO); /* XXX needs translation */ } /* * Copy pointer fields back into superblock before copying in XXX * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ newfs->fs_csp = fs->fs_csp; newfs->fs_maxcluster = fs->fs_maxcluster; newfs->fs_contigdirs = fs->fs_contigdirs; newfs->fs_active = fs->fs_active; newfs->fs_ronly = fs->fs_ronly; sblockloc = fs->fs_sblockloc; bcopy(newfs, fs, (u_int)fs->fs_sbsize); brelse(bp); mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc); UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: reload pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); /* * Step 3: re-read summary information from disk. */ size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); size += fs->fs_ncg * sizeof(u_int8_t); free(fs->fs_csp, M_UFSMNT); space = malloc(size, M_UFSMNT, M_WAITOK); fs->fs_csp = space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) return (error); bcopy(bp->b_data, space, (u_int)size); space = (char *)space + size; brelse(bp); } /* * We no longer know anything about clusters per cylinder group. */ if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; space = lp; } size = fs->fs_ncg * sizeof(u_int8_t); fs->fs_contigdirs = (u_int8_t *)space; bzero(fs->fs_contigdirs, size); if ((flags & FFSR_UNSUSPEND) != 0) { MNT_ILOCK(mp); mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2); wakeup(&mp->mnt_flag); MNT_IUNLOCK(mp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Skip syncer vnode. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { vput(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) { brelse(bp); vput(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } ip->i_effnlink = ip->i_nlink; brelse(bp); vput(vp); } return (0); } /* * Common code for mount and mountroot */ static int ffs_mountfs(devvp, mp, td) struct vnode *devvp; struct mount *mp; struct thread *td; { struct ufsmount *ump; struct fs *fs; struct cdev *dev; int error, i, len, ronly; struct ucred *cred; struct g_consumer *cp; struct mount *nmp; int candelete; off_t loc; fs = NULL; ump = NULL; cred = td ? td->td_ucred : NOCRED; ronly = (mp->mnt_flag & MNT_RDONLY) != 0; KASSERT(devvp->v_type == VCHR, ("reclaimed devvp")); dev = devvp->v_rdev; if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0, (uintptr_t)mp) == 0) { VOP_UNLOCK(devvp); return (EBUSY); } g_topology_lock(); error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); g_topology_unlock(); if (error != 0) { atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); VOP_UNLOCK(devvp); return (error); } dev_ref(dev); devvp->v_bufobj.bo_ops = &ffs_ops; VOP_UNLOCK(devvp); if (dev->si_iosize_max != 0) mp->mnt_iosize_max = dev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { error = EINVAL; vfs_mount_error(mp, "Invalid sectorsize %d for superblock size %d", cp->provider->sectorsize, SBLOCKSIZE); goto out; } /* fetch the superblock and summary information */ loc = STDSB; if ((mp->mnt_flag & MNT_ROOTFS) != 0) loc = STDSB_NOHASHFAIL; if ((error = ffs_sbget(devvp, &fs, loc, M_UFSMNT, ffs_use_bread)) != 0) goto out; /* none of these types of check-hashes are maintained by this kernel */ fs->fs_metackhash &= ~(CK_INDIR | CK_DIR); /* no support for any undefined flags */ fs->fs_flags &= FS_SUPPORTED; fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if (ronly || (mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly dismounted\n", fs->fs_fsmnt); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s%s", fs->fs_fsmnt, "Filesystem is not clean - run fsck.", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate journal contents"); error = EPERM; goto out; } if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && (mp->mnt_flag & MNT_FORCE)) { printf("WARNING: %s: lost blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: mount pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & FS_GJOURNAL) != 0) { #ifdef UFS_GJOURNAL /* * Get journal provider name. */ len = 1024; mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK); if (g_io_getattr("GJOURNAL::provider", cp, &len, mp->mnt_gjprovider) == 0) { mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len, M_UFSMNT, M_WAITOK); MNT_ILOCK(mp); mp->mnt_flag |= MNT_GJOURNAL; MNT_IUNLOCK(mp); } else { printf("WARNING: %s: GJOURNAL flag on fs " "but no gjournal provider below\n", mp->mnt_stat.f_mntonname); free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } #else printf("WARNING: %s: GJOURNAL flag on fs but no " "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname); #endif } else { mp->mnt_gjprovider = NULL; } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); ump->um_cp = cp; ump->um_bo = &devvp->v_bufobj; ump->um_fs = fs; if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_fstype = UFS1; ump->um_balloc = ffs_balloc_ufs1; } else { ump->um_fstype = UFS2; ump->um_balloc = ffs_balloc_ufs2; } ump->um_blkatoff = ffs_blkatoff; ump->um_truncate = ffs_truncate; ump->um_update = ffs_update; ump->um_valloc = ffs_valloc; ump->um_vfree = ffs_vfree; ump->um_ifree = ffs_ifree; ump->um_rdonly = ffs_rdonly; ump->um_snapgone = ffs_snapgone; if ((mp->mnt_flag & MNT_UNTRUSTED) != 0) ump->um_check_blkno = ffs_check_blkno; else ump->um_check_blkno = NULL; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); ffs_oldfscompat_read(fs, ump, fs->fs_sblockloc); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; nmp = NULL; if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) { if (nmp) vfs_rel(nmp); vfs_getnewfsid(mp); } mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); if ((fs->fs_flags & FS_MULTILABEL) != 0) { #ifdef MAC MNT_ILOCK(mp); mp->mnt_flag |= MNT_MULTILABEL; MNT_IUNLOCK(mp); #else printf("WARNING: %s: multilabel flag on fs but " "no MAC support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_NFS4ACLS) printf("WARNING: %s: ACLs flag on fs conflicts with " "\"nfsv4acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_NFS4ACLS; mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: ACLs flag on fs but no ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_ACLS) printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts " "with \"acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_ACLS; mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: NFSv4 ACLs flag on fs but no " "ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_TRIM) != 0) { len = sizeof(int); if (g_io_getattr("GEOM::candelete", cp, &len, &candelete) == 0) { if (candelete) ump->um_flags |= UM_CANDELETE; else printf("WARNING: %s: TRIM flag on fs but disk " "does not support TRIM\n", mp->mnt_stat.f_mntonname); } else { printf("WARNING: %s: TRIM flag on fs but disk does " "not confirm that it supports TRIM\n", mp->mnt_stat.f_mntonname); } if (((ump->um_flags) & UM_CANDELETE) != 0) { ump->um_trim_tq = taskqueue_create("trim", M_WAITOK, taskqueue_thread_enqueue, &ump->um_trim_tq); taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS, "%s trim", mp->mnt_stat.f_mntonname); ump->um_trimhash = hashinit(MAXTRIMIO, M_TRIM, &ump->um_trimlisthashsize); } } ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; #ifdef UFS_EXTATTR ufs_extattr_uepm_init(&ump->um_extattr); #endif /* * Set FS local "last mounted on" information (NULL pad) */ bzero(fs->fs_fsmnt, MAXMNTLEN); strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); mp->mnt_stat.f_iosize = fs->fs_bsize; if (mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } if (ronly == 0) { fs->fs_mtime = time_second; if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { ffs_flushfiles(mp, FORCECLOSE, td); goto out; } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * Initialize filesystem state information in mount struct. */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); #ifdef UFS_EXTATTR #ifdef UFS_EXTATTR_AUTOSTART /* * * Auto-starting does the following: * - check for /.attribute in the fs, and extattr_start if so * - for each file in .attribute, enable that file with * an attribute of the same name. * Not clear how to report errors -- probably eat them. * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ return (0); out: if (fs != NULL) { free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); } if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (ump) { mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } free(ump, M_UFSMNT); mp->mnt_data = NULL; } atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); dev_rel(dev); return (error); } /* * A read function for use by filesystem-layer routines. */ static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size) { struct buf *bp; int error; KASSERT(*bufp == NULL, ("ffs_use_bread: non-NULL *bufp %p\n", *bufp)); *bufp = malloc(size, M_UFSMNT, M_WAITOK); if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED, &bp)) != 0) return (error); bcopy(bp->b_data, *bufp, size); bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); return (0); } #include static int bigcgs = 0; SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); /* * Sanity checks for loading old filesystem superblocks. * See ffs_oldfscompat_write below for unwound actions. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ static void ffs_oldfscompat_read(fs, ump, sblockloc) struct fs *fs; struct ufsmount *ump; ufs2_daddr_t sblockloc; { off_t maxfilesize; /* * If not yet done, update fs_flags location and value of fs_sblockloc. */ if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { fs->fs_flags = fs->fs_old_flags; fs->fs_old_flags |= FS_FLAGS_UPDATED; fs->fs_sblockloc = sblockloc; } /* * If not yet done, update UFS1 superblock with new wider fields. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { fs->fs_maxbsize = fs->fs_bsize; fs->fs_time = fs->fs_old_time; fs->fs_size = fs->fs_old_size; fs->fs_dsize = fs->fs_old_dsize; fs->fs_csaddr = fs->fs_old_csaddr; fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_old_inodefmt < FS_44INODEFMT) { fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1; fs->fs_qbmask = ~fs->fs_bmask; fs->fs_qfmask = ~fs->fs_fmask; } if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_savedmaxfilesize = fs->fs_maxfilesize; maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1; if (fs->fs_maxfilesize > maxfilesize) fs->fs_maxfilesize = maxfilesize; } /* Compatibility for old filesystems */ if (fs->fs_avgfilesize <= 0) fs->fs_avgfilesize = AVFILESIZ; if (fs->fs_avgfpdir <= 0) fs->fs_avgfpdir = AFPDIR; if (bigcgs) { fs->fs_save_cgsize = fs->fs_cgsize; fs->fs_cgsize = fs->fs_bsize; } } /* * Unwinding superblock updates for old filesystems. * See ffs_oldfscompat_read above for details. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ void ffs_oldfscompat_write(fs, ump) struct fs *fs; struct ufsmount *ump; { /* * Copy back UFS2 updated fields that UFS1 inspects. */ if (fs->fs_magic == FS_UFS1_MAGIC) { fs->fs_old_time = fs->fs_time; fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; fs->fs_maxfilesize = ump->um_savedmaxfilesize; } if (bigcgs) { fs->fs_cgsize = fs->fs_save_cgsize; fs->fs_save_cgsize = 0; } } /* * unmount system call */ static int ffs_unmount(mp, mntflags) struct mount *mp; int mntflags; { struct thread *td; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, flags, susp; #ifdef UFS_EXTATTR int e_restart; #endif flags = 0; td = curthread; fs = ump->um_fs; susp = 0; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; susp = fs->fs_ronly == 0; } #ifdef UFS_EXTATTR if ((error = ufs_extattr_stop(mp, td))) { if (error != EOPNOTSUPP) printf("WARNING: unmount %s: ufs_extattr_stop " "returned errno %d\n", mp->mnt_stat.f_mntonname, error); e_restart = 0; } else { ufs_extattr_uepm_destroy(&ump->um_extattr); e_restart = 1; } #endif if (susp) { error = vfs_write_suspend_umnt(mp); if (error != 0) goto fail1; } if (MOUNTEDSOFTDEP(mp)) error = softdep_flushfiles(mp, flags, td); else error = ffs_flushfiles(mp, flags, td); if (error != 0 && error != ENXIO) goto fail; UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: unmount %s: pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); if (error && error != ENXIO) { fs->fs_clean = 0; goto fail; } } if (susp) vfs_write_resume(mp, VR_START_WRITE); if (ump->um_trim_tq != NULL) { while (ump->um_trim_inflight != 0) pause("ufsutr", hz); taskqueue_drain_all(ump->um_trim_tq); taskqueue_free(ump->um_trim_tq); free (ump->um_trimhash, M_TRIM); } g_topology_lock(); if (ump->um_fsckpid > 0) { /* * Return to normal read-only mode. */ error = g_access(ump->um_cp, 0, -1, 0); ump->um_fsckpid = 0; } g_vfs_close(ump->um_cp); g_topology_unlock(); atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0); vrele(ump->um_devvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); if (td->td_su == mp) { td->td_su = NULL; vfs_rel(mp); } return (error); fail: if (susp) vfs_write_resume(mp, VR_START_WRITE); fail1: #ifdef UFS_EXTATTR if (e_restart) { ufs_extattr_uepm_init(&ump->um_extattr); #ifdef UFS_EXTATTR_AUTOSTART (void) ufs_extattr_autostart(mp, td); #endif } #endif return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct ufsmount *ump; int qerror, error; ump = VFSTOUFS(mp); qerror = 0; #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, 0, SKIPSYSTEM|flags, td); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { error = quotaoff(td, mp, i); if (error != 0) { if ((flags & EARLYFLUSH) == 0) return (error); else qerror = error; } } /* * Here we fall through to vflush again to ensure that * we have gotten rid of all the system vnodes, unless * quotas must not be closed. */ } #endif ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles"); if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0) return (error); ffs_snapshot_unmount(mp); flags |= FORCECLOSE; /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } /* * Do not close system files if quotas were not closed, to be * able to sync the remaining dquots. The freeblks softupdate * workitems might hold a reference on a dquot, preventing * quotaoff() from completing. Next round of * softdep_flushworklist() iteration should process the * blockers, allowing the next run of quotaoff() to finally * flush held dquots. * * Otherwise, flush all the files. */ if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0) return (error); /* * Flush filesystem metadata. */ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td); VOP_UNLOCK(ump->um_devvp); return (error); } /* * Get filesystem statistics. */ static int ffs_statfs(mp, sbp) struct mount *mp; struct statfs *sbp; { struct ufsmount *ump; struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_statfs"); sbp->f_version = STATFS_VERSION; sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; UFS_LOCK(ump); sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_bavail = freespace(fs, fs->fs_minfree) + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; UFS_UNLOCK(ump); sbp->f_namemax = UFS_MAXNAMLEN; return (0); } static bool sync_doupdate(struct inode *ip) { return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) != 0); } static int ffs_sync_lazy_filter(struct vnode *vp, void *arg __unused) { struct inode *ip; /* * Flags are safe to access because ->v_data invalidation * is held off by listmtx. */ if (vp->v_type == VNON) return (false); ip = VTOI(vp); if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) return (false); return (true); } /* * For a lazy sync, we only care about access times, quotas and the * superblock. Other filesystem changes are already converted to * cylinder group blocks or inode blocks updates and are written to * disk by syncer. */ static int ffs_sync_lazy(mp) struct mount *mp; { struct vnode *mvp, *vp; struct inode *ip; struct thread *td; int allerror, error; allerror = 0; td = curthread; if ((mp->mnt_flag & MNT_NOATIME) != 0) { #ifdef QUOTA qsync(mp); #endif goto sbupdate; } MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, ffs_sync_lazy_filter, NULL) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); /* * The IN_ACCESS flag is converted to IN_MODIFIED by * ufs_close() and ufs_getattr() by the calls to * ufs_itimes_locked(), without subsequent UFS_UPDATE(). * Test also all the other timestamp flags too, to pick up * any other cases that could be missed. */ if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td)) != 0) continue; #ifdef QUOTA qsyncvp(vp); #endif if (sync_doupdate(ip)) error = ffs_update(vp, 0); if (error != 0) allerror = error; vput(vp); } sbupdate: if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 && (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0) allerror = error; return (allerror); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked busy using * vfs_busy(). */ static int ffs_sync(mp, waitfor) struct mount *mp; int waitfor; { struct vnode *mvp, *vp, *devvp; struct thread *td; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, count, lockreq, allerror = 0; int suspend; int suspended; int secondary_writes; int secondary_accwrites; int softdep_deps; int softdep_accdeps; struct bufobj *bo; suspend = 0; suspended = 0; td = curthread; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0) panic("%s: ffs_sync: modification on read-only filesystem", fs->fs_fsmnt); if (waitfor == MNT_LAZY) { if (!rebooting) return (ffs_sync_lazy(mp)); waitfor = MNT_NOWAIT; } /* * Write back each (modified) inode. */ lockreq = LK_EXCLUSIVE | LK_NOWAIT; if (waitfor == MNT_SUSPEND) { suspend = 1; waitfor = MNT_WAIT; } if (waitfor == MNT_WAIT) lockreq = LK_EXCLUSIVE; lockreq |= LK_INTERLOCK | LK_SLEEPFAIL; loop: /* Grab snapshot of secondary write counts */ MNT_ILOCK(mp); secondary_writes = mp->mnt_secondary_writes; secondary_accwrites = mp->mnt_secondary_accwrites; MNT_IUNLOCK(mp); /* Grab snapshot of softdep dependency counts */ softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps); MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Depend on the vnode interlock to keep things stable enough * for a quick test. Since there might be hundreds of * thousands of vnodes, we cannot afford even a subroutine * call unless there's a good chance that we have work to do. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq, td)) != 0) { if (error == ENOENT || error == ENOLCK) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } #ifdef QUOTA qsyncvp(vp); #endif if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0) allerror = error; vput(vp); } /* * Force stale filesystem control information to be flushed. */ if (waitfor == MNT_WAIT || rebooting) { if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) allerror = error; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) goto loop; } devvp = ump->um_devvp; bo = &devvp->v_bufobj; BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(devvp, waitfor, td); VOP_UNLOCK(devvp); if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN)) error = ffs_sbupdate(ump, waitfor, 0); if (error != 0) allerror = error; if (allerror == 0 && waitfor == MNT_WAIT) goto loop; } else if (suspend != 0) { if (softdep_check_suspend(mp, devvp, softdep_deps, softdep_accdeps, secondary_writes, secondary_accwrites) != 0) { MNT_IUNLOCK(mp); goto loop; /* More work needed */ } mtx_assert(MNT_MTX(mp), MA_OWNED); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); suspended = 1; } else BO_UNLOCK(bo); /* * Write back modified superblock. */ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) allerror = error; return (allerror); } int ffs_vget(mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { return (ffs_vgetf(mp, ino, flags, vpp, 0)); } int ffs_vgetf(mp, ino, flags, vpp, ffs_flags) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; int ffs_flags; { struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; int error; MPASS((ffs_flags & FFSV_REPLACE) == 0 || (flags & LK_EXCLUSIVE) != 0); error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); if (error != 0) return (error); if (*vpp != NULL) { if ((ffs_flags & FFSV_REPLACE) == 0) return (0); vgone(*vpp); vput(*vpp); } /* * We must promote to an exclusive lock for vnode creation. This * can happen if lookup is passed LOCKSHARED. */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; } /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ ump = VFSTOUFS(mp); fs = ump->um_fs; ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ? &ffs_vnodeops1 : &ffs_vnodeops2, &vp); if (error) { *vpp = NULL; uma_zfree(uma_inode, ip); return (error); } /* * FFS supports recursive locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); VN_LOCK_AREC(vp); vp->v_data = ip; vp->v_bufobj.bo_bsize = fs->fs_bsize; ip->i_vnode = vp; ip->i_ump = ump; ip->i_number = ino; ip->i_ea_refs = 0; ip->i_nextclustercg = -1; ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2; ip->i_mode = 0; /* ensure error cases below throw away vnode */ #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif if (ffs_flags & FFSV_FORCEINSMQ) vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) { uma_zfree(uma_inode, ip); *vpp = NULL; return (error); } vp->v_vflag &= ~VV_FORCEINSMQ; error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); if (error != 0) return (error); if (*vpp != NULL) { /* * Calls from ffs_valloc() (i.e. FFSV_REPLACE set) * operate on empty inode, which must not be found by * other threads until fully filled. Vnode for empty * inode must be not re-inserted on the hash by other * thread, after removal by us at the beginning. */ MPASS((ffs_flags & FFSV_REPLACE) == 0); return (0); } /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ + vgone(vp); vput(vp); *vpp = NULL; return (error); } if (I_IS_UFS1(ip)) ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); else ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) { bqrelse(bp); + vgone(vp); vput(vp); *vpp = NULL; return (error); } if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else ip->i_effnlink = ip->i_nlink; bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2, &vp); if (error) { + vgone(vp); vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ if (vp->v_type != VFIFO) { /* FFS supports shared locking for all files except fifos. */ VN_LOCK_ASHARE(vp); } /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { while (ip->i_gen == 0) ip->i_gen = arc4random(); if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { UFS_INODE_SET_FLAG(ip, IN_MODIFIED); DIP_SET(ip, i_gen, ip->i_gen); } } #ifdef MAC if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { /* * If this vnode is already allocated, and we're running * multi-label, attempt to perform a label association * from the extended attributes on the inode. */ error = mac_vnode_associate_extattr(mp, vp); if (error) { /* ufs_inactive will release ip->i_devvp ref. */ + vgone(vp); vput(vp); *vpp = NULL; return (error); } } #endif *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - for UFS2 check that the inode number is initialized * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ffs_fhtovp(mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { struct ufid *ufhp; struct ufsmount *ump; struct fs *fs; struct cg *cgp; struct buf *bp; ino_t ino; u_int cg; int error; ufhp = (struct ufid *)fhp; ino = ufhp->ufid_ino; ump = VFSTOUFS(mp); fs = ump->um_fs; if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); /* * Need to check if inode is initialized because UFS2 does lazy * initialization and nfs_fhtovp can offer arbitrary inode numbers. */ if (fs->fs_magic != FS_UFS2_MAGIC) return (ufs_fhtovp(mp, ufhp, flags, vpp)); cg = ino_to_cg(fs, ino); if ((error = ffs_getcg(fs, ump->um_devvp, cg, 0, &bp, &cgp)) != 0) return (error); if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) { brelse(bp); return (ESTALE); } brelse(bp); return (ufs_fhtovp(mp, ufhp, flags, vpp)); } /* * Initialize the filesystem. */ static int ffs_init(vfsp) struct vfsconf *vfsp; { ffs_susp_initialize(); softdep_initialize(); return (ufs_init(vfsp)); } /* * Undo the work of ffs_init(). */ static int ffs_uninit(vfsp) struct vfsconf *vfsp; { int ret; ret = ufs_uninit(vfsp); softdep_uninitialize(); ffs_susp_uninitialize(); return (ret); } /* * Structure used to pass information from ffs_sbupdate to its * helper routine ffs_use_bwrite. */ struct devfd { struct ufsmount *ump; struct buf *sbbp; int waitfor; int suspended; int error; }; /* * Write a superblock and associated information back to disk. */ int ffs_sbupdate(ump, waitfor, suspended) struct ufsmount *ump; int waitfor; int suspended; { struct fs *fs; struct buf *sbbp; struct devfd devfd; fs = ump->um_fs; if (fs->fs_ronly == 1 && (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0) panic("ffs_sbupdate: write read-only filesystem"); /* * We use the superblock's buf to serialize calls to ffs_sbupdate(). */ sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 0, 0, 0); /* * Initialize info needed for write function. */ devfd.ump = ump; devfd.sbbp = sbbp; devfd.waitfor = waitfor; devfd.suspended = suspended; devfd.error = 0; return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite)); } /* * Write function for use by filesystem-layer routines. */ static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size) { struct devfd *devfdp; struct ufsmount *ump; struct buf *bp; struct fs *fs; int error; devfdp = devfd; ump = devfdp->ump; fs = ump->um_fs; /* * Writing the superblock summary information. */ if (loc != fs->fs_sblockloc) { bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0); bcopy(buf, bp->b_data, (u_int)size); if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) devfdp->error = error; return (0); } /* * Writing the superblock itself. We need to do special checks for it. */ bp = devfdp->sbbp; if (devfdp->error != 0) { brelse(bp); return (devfdp->error); } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); fs->fs_sblockloc = SBLOCK_UFS1; } if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); fs->fs_sblockloc = SBLOCK_UFS2; } if (MOUNTEDSOFTDEP(ump->um_mountp)) softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp); bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); fs = (struct fs *)bp->b_data; ffs_oldfscompat_write(fs, ump); /* * Because we may have made changes to the superblock, we need to * recompute its check-hash. */ fs->fs_ckhash = ffs_calc_sbhash(fs); if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) devfdp->error = error; return (devfdp->error); } static int ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname) { #ifdef UFS_EXTATTR return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #else return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #endif } static void ffs_ifree(struct ufsmount *ump, struct inode *ip) { if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) uma_zfree(uma_ufs1, ip->i_din1); else if (ip->i_din2 != NULL) uma_zfree(uma_ufs2, ip->i_din2); uma_zfree(uma_inode, ip); } static int dobkgrdwrite = 1; SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, "Do background writes (honoring the BV_BKGRDWRITE flag)?"); /* * Complete a background write started from bwrite. */ static void ffs_backgroundwritedone(struct buf *bp) { struct bufobj *bufobj; struct buf *origbp; /* * Find the original buffer that we are writing. */ bufobj = bp->b_bufobj; BO_LOCK(bufobj); if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) panic("backgroundwritedone: lost buffer"); /* * We should mark the cylinder group buffer origbp as * dirty, to not loose the failed write. */ if ((bp->b_ioflags & BIO_ERROR) != 0) origbp->b_vflags |= BV_BKGRDERR; BO_UNLOCK(bufobj); /* * Process dependencies then return any unfinished ones. */ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0) buf_complete(bp); #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep)) softdep_move_dependencies(bp, origbp); #endif /* * This buffer is marked B_NOCACHE so when it is released * by biodone it will be tossed. */ bp->b_flags |= B_NOCACHE; bp->b_flags &= ~B_CACHE; pbrelvp(bp); /* * Prevent brelse() from trying to keep and re-dirtying bp on * errors. It causes b_bufobj dereference in * bdirty()/reassignbuf(), and b_bufobj was cleared in * pbrelvp() above. */ if ((bp->b_ioflags & BIO_ERROR) != 0) bp->b_flags |= B_INVAL; bufdone(bp); BO_LOCK(bufobj); /* * Clear the BV_BKGRDINPROG flag in the original buffer * and awaken it if it is waiting for the write to complete. * If BV_BKGRDINPROG is not set in the original buffer it must * have been released and re-instantiated - which is not legal. */ KASSERT((origbp->b_vflags & BV_BKGRDINPROG), ("backgroundwritedone: lost buffer2")); origbp->b_vflags &= ~BV_BKGRDINPROG; if (origbp->b_vflags & BV_BKGRDWAIT) { origbp->b_vflags &= ~BV_BKGRDWAIT; wakeup(&origbp->b_xflags); } BO_UNLOCK(bufobj); } /* * Write, release buffer on completion. (Done by iodone * if async). Do not bother writing anything if the buffer * is invalid. * * Note that we set B_CACHE here, indicating that buffer is * fully valid and thus cacheable. This is true even of NFS * now so we set it generally. This could be set either here * or in biodone() since the I/O is synchronous. We put it * here. */ static int ffs_bufwrite(struct buf *bp) { struct buf *newbp; struct cg *cgp; CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } if (!BUF_ISLOCKED(bp)) panic("bufwrite: buffer is not busy???"); /* * If a background write is already in progress, delay * writing this block if it is asynchronous. Otherwise * wait for the background write to complete. */ BO_LOCK(bp->b_bufobj); if (bp->b_vflags & BV_BKGRDINPROG) { if (bp->b_flags & B_ASYNC) { BO_UNLOCK(bp->b_bufobj); bdwrite(bp); return (0); } bp->b_vflags |= BV_BKGRDWAIT; msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO, "bwrbg", 0); if (bp->b_vflags & BV_BKGRDINPROG) panic("bufwrite: still writing"); } bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); /* * If this buffer is marked for background writing and we * do not have to wait for it, make a copy and write the * copy so as to leave this buffer ready for further use. * * This optimization eats a lot of memory. If we have a page * or buffer shortfall we can't do it. */ if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC) && !vm_page_count_severe() && !buf_dirty_count_severe()) { KASSERT(bp->b_iodone == NULL, ("bufwrite: needs chained iodone (%p)", bp->b_iodone)); /* get a new block */ newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD); if (newbp == NULL) goto normal_write; KASSERT(buf_mapped(bp), ("Unmapped cg")); memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); BO_LOCK(bp->b_bufobj); bp->b_vflags |= BV_BKGRDINPROG; BO_UNLOCK(bp->b_bufobj); newbp->b_xflags |= (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER; newbp->b_lblkno = bp->b_lblkno; newbp->b_blkno = bp->b_blkno; newbp->b_offset = bp->b_offset; newbp->b_iodone = ffs_backgroundwritedone; newbp->b_flags |= B_ASYNC; newbp->b_flags &= ~B_INVAL; pbgetvp(bp->b_vp, newbp); #ifdef SOFTUPDATES /* * Move over the dependencies. If there are rollbacks, * leave the parent buffer dirtied as it will need to * be written again. */ if (LIST_EMPTY(&bp->b_dep) || softdep_move_dependencies(bp, newbp) == 0) bundirty(bp); #else bundirty(bp); #endif /* * Initiate write on the copy, release the original. The * BKGRDINPROG flag prevents it from going away until * the background write completes. We have to recalculate * its check hash in case the buffer gets freed and then * reconstituted from the buffer cache during a later read. */ if ((bp->b_xflags & BX_CYLGRP) != 0) { cgp = (struct cg *)bp->b_data; cgp->cg_ckhash = 0; cgp->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); } bqrelse(bp); bp = newbp; } else /* Mark the buffer clean */ bundirty(bp); /* Let the normal bufwrite do the rest for us */ normal_write: /* * If we are writing a cylinder group, update its time. */ if ((bp->b_xflags & BX_CYLGRP) != 0) { cgp = (struct cg *)bp->b_data; cgp->cg_old_time = cgp->cg_time = time_second; } return (bufwrite(bp)); } static void ffs_geom_strategy(struct bufobj *bo, struct buf *bp) { struct vnode *vp; struct buf *tbp; int error, nocopy; vp = bo2vnode(bo); if (bp->b_iocmd == BIO_WRITE) { if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && bp->b_vp != NULL && bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("ffs_geom_strategy: bad I/O"); nocopy = bp->b_flags & B_NOCOPY; bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY); if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 && vp->v_rdev->si_snapdata != NULL) { if ((bp->b_flags & B_CLUSTER) != 0) { runningbufwakeup(bp); TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { error = ffs_copyonwrite(vp, tbp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } } bp->b_runningbufspace = bp->b_bufsize; atomic_add_long(&runningbufspace, bp->b_runningbufspace); } else { error = ffs_copyonwrite(vp, bp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } } } #ifdef SOFTUPDATES if ((bp->b_flags & B_CLUSTER) != 0) { TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { if (!LIST_EMPTY(&tbp->b_dep)) buf_start(tbp); } } else { if (!LIST_EMPTY(&bp->b_dep)) buf_start(bp); } #endif /* * Check for metadata that needs check-hashes and update them. */ switch (bp->b_xflags & BX_FSPRIV) { case BX_CYLGRP: ((struct cg *)bp->b_data)->cg_ckhash = 0; ((struct cg *)bp->b_data)->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); break; case BX_SUPERBLOCK: case BX_INODE: case BX_INDIR: case BX_DIR: printf("Check-hash write is unimplemented!!!\n"); break; case 0: break; default: printf("multiple buffer types 0x%b\n", (u_int)(bp->b_xflags & BX_FSPRIV), PRINT_UFS_BUF_XFLAGS); break; } } g_vfs_strategy(bo, bp); } int ffs_own_mount(const struct mount *mp) { if (mp->mnt_op == &ufs_vfsops) return (1); return (0); } #ifdef DDB #ifdef SOFTUPDATES /* defined in ffs_softdep.c */ extern void db_print_ffs(struct ufsmount *ump); DB_SHOW_COMMAND(ffs, db_show_ffs) { struct mount *mp; struct ufsmount *ump; if (have_addr) { ump = VFSTOUFS((struct mount *)addr); db_print_ffs(ump); return; } TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name)) db_print_ffs(VFSTOUFS(mp)); } } #endif /* SOFTUPDATES */ #endif /* DDB */ Index: projects/clang1000-import/sys/ufs/ufs/ufs_vnops.c =================================================================== --- projects/clang1000-import/sys/ufs/ufs/ufs_vnops.c (revision 357178) +++ projects/clang1000-import/sys/ufs/ufs/ufs_vnops.c (revision 357179) @@ -1,2802 +1,2807 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_suiddir.h" #include "opt_ufs.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX */ #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #ifdef UFS_GJOURNAL #include FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); #endif #ifdef QUOTA FEATURE(ufs_quota, "UFS disk quotas support"); FEATURE(ufs_quota64, "64bit UFS disk quotas support"); #endif #ifdef SUIDDIR FEATURE(suiddir, "Give all new files in directory the same ownership as the directory"); #endif #include static vop_accessx_t ufs_accessx; static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); static vop_close_t ufs_close; static vop_create_t ufs_create; static vop_getattr_t ufs_getattr; static vop_ioctl_t ufs_ioctl; static vop_link_t ufs_link; static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *); static vop_markatime_t ufs_markatime; static vop_mkdir_t ufs_mkdir; static vop_mknod_t ufs_mknod; static vop_open_t ufs_open; static vop_pathconf_t ufs_pathconf; static vop_print_t ufs_print; static vop_readlink_t ufs_readlink; static vop_remove_t ufs_remove; static vop_rename_t ufs_rename; static vop_rmdir_t ufs_rmdir; static vop_setattr_t ufs_setattr; static vop_strategy_t ufs_strategy; static vop_symlink_t ufs_symlink; static vop_whiteout_t ufs_whiteout; static vop_close_t ufsfifo_close; static vop_kqfilter_t ufsfifo_kqfilter; SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); /* * A virgin directory (no blushing please). */ static struct dirtemplate mastertemplate = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; static struct odirtemplate omastertemplate = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static void ufs_itimes_locked(struct vnode *vp) { struct inode *ip; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); ip = VTOI(vp); if (UFS_RDONLY(ip)) goto out; if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) UFS_INODE_SET_FLAG(ip, IN_LAZYMOD); else if (((vp->v_mount->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || (ip->i_flag & (IN_CHANGE | IN_UPDATE))) UFS_INODE_SET_FLAG(ip, IN_MODIFIED); else if (ip->i_flag & IN_ACCESS) UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS); vfs_timestamp(&ts); if (ip->i_flag & IN_ACCESS) { DIP_SET(ip, i_atime, ts.tv_sec); DIP_SET(ip, i_atimensec, ts.tv_nsec); } if (ip->i_flag & IN_UPDATE) { DIP_SET(ip, i_mtime, ts.tv_sec); DIP_SET(ip, i_mtimensec, ts.tv_nsec); } if (ip->i_flag & IN_CHANGE) { DIP_SET(ip, i_ctime, ts.tv_sec); DIP_SET(ip, i_ctimensec, ts.tv_nsec); DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); } out: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void ufs_itimes(struct vnode *vp) { VI_LOCK(vp); ufs_itimes_locked(vp); VI_UNLOCK(vp); } /* * Create a regular file */ static int ufs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create"); if (error != 0) return (error); if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); return (0); } /* * Mknod vnode call */ /* ARGSUSED */ static int ufs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; ino_t ino; int error; error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod"); if (error) return (error); ip = VTOI(*vpp); UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ DIP_SET(ip, i_rdev, vap->va_rdev); } /* * Remove inode, then reload it through VFS_VGET so it is * checked to see if it is an alias of an existing entry in * the inode cache. XXX I don't believe this is necessary now. */ (*vpp)->v_type = VNON; ino = ip->i_number; /* Save this before vgone() invalidates ip. */ vgone(*vpp); vput(*vpp); error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); if (error) { *vpp = NULL; return (error); } return (0); } /* * Open called. */ /* ARGSUSED */ static int ufs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); ip = VTOI(vp); /* * Files marked append-only must be opened for appending. */ if ((ip->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); return (0); } /* * Close called. * * Update the times on the inode. */ /* ARGSUSED */ static int ufs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (0); } static int ufs_accessx(ap) struct vop_accessx_args /* { struct vnode *a_vp; accmode_t a_accmode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); accmode_t accmode = ap->a_accmode; int error; #ifdef UFS_ACL struct acl *acl; acl_type_t type; #endif /* * Disallow write attempts on read-only filesystems; * unless the file is a socket, fifo, or a block or * character device resident on the filesystem. */ if (accmode & VMODIFY_PERMS) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #ifdef QUOTA /* * Inode is accounted in the quotas only if struct * dquot is attached to it. VOP_ACCESS() is called * from vn_open_cred() and provides a convenient * point to call getinoquota(). The lock mode is * exclusive when the file is opening for write. */ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = getinoquota(ip); if (error != 0) return (error); } #endif break; default: break; } } /* * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" * permits the owner of the file to remove the IMMUTABLE flag. */ if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) return (EPERM); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) type = ACL_TYPE_NFS4; else type = ACL_TYPE_ACCESS; acl = acl_alloc(M_WAITOK); if (type == ACL_TYPE_NFS4) error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); else error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); switch (error) { case 0: if (type == ACL_TYPE_NFS4) { error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred, NULL); } else { error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred, NULL); } break; default: if (error != EOPNOTSUPP) printf( "ufs_accessx(): Error retrieving ACL on object (%d).\n", error); /* * XXX: Fall back until debugged. Should * eventually possibly log an error, and return * EPERM for safety. */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred, NULL); } acl_free(acl); return (error); } #endif /* !UFS_ACL */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred, NULL); return (error); } /* ARGSUSED */ static int ufs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct vattr *vap = ap->a_vap; VI_LOCK(vp); ufs_itimes_locked(vp); if (I_IS_UFS1(ip)) { vap->va_atime.tv_sec = ip->i_din1->di_atime; vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; } else { vap->va_atime.tv_sec = ip->i_din2->di_atime; vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; } VI_UNLOCK(vp); /* * Copy from inode table */ vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev); vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; if (I_IS_UFS1(ip)) { vap->va_rdev = ip->i_din1->di_rdev; vap->va_size = ip->i_din1->di_size; vap->va_mtime.tv_sec = ip->i_din1->di_mtime; vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din1->di_ctime; vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); vap->va_filerev = ip->i_din1->di_modrev; } else { vap->va_rdev = ip->i_din2->di_rdev; vap->va_size = ip->i_din2->di_size; vap->va_mtime.tv_sec = ip->i_din2->di_mtime; vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din2->di_ctime; vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); vap->va_filerev = ip->i_din2->di_modrev; } vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_type = IFTOVT(ip->i_mode); return (0); } /* * Set attribute vnode op. called from several syscalls */ static int ufs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | UF_SPARSE | UF_SYSTEM)) != 0) return (EOPNOTSUPP); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. * Privileged jail processes behave like privileged non-jail * processes if the PR_ALLOW_CHFLAGS permission bit is set; * otherwise, they behave like unprivileged processes. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } /* The snapshot flag cannot be toggled. */ if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT) return (EPERM); } else { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) return (EPERM); } ip->i_flags = vap->va_flags; DIP_SET(ip, i_flags, vap->va_flags); UFS_INODE_SET_FLAG(ip, IN_CHANGE); error = UFS_UPDATE(vp, 0); if (ip->i_flags & (IMMUTABLE | APPEND)) return (error); } /* * If immutable or append, no one can change any of its attributes * except the ones already handled (in some cases, file flags * including the immutability flags themselves for the superuser). */ if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, td)) != 0) return (error); } if (vap->va_size != VNOVAL) { /* * XXX most of the following special cases should be in * callers instead of in N filesystems. The VDIR check * mostly already is. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: /* * Truncation should have an effect in these cases. * Disallow it if the filesystem is read-only or * the file is being snapshotted. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0) return (EPERM); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support shared memory objects in the file * system, and have dubious support for truncating * symlinks. Just ignore the request in other cases. */ return (0); } if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), cred)) != 0) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0) return (EPERM); error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED); if (vap->va_atime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_ACCESS; DIP_SET(ip, i_atime, vap->va_atime.tv_sec); DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); } if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_UPDATE; DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); } if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) { ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; } error = UFS_UPDATE(vp, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) return (EPERM); error = ufs_chmod(vp, (int)vap->va_mode, cred, td); } return (error); } #ifdef UFS_ACL static int ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, int file_owner_id, struct ucred *cred, struct thread *td) { int error; struct acl *aclp; aclp = acl_alloc(M_WAITOK); error = ufs_getacl_nfs4_internal(vp, aclp, td); /* * We don't have to handle EOPNOTSUPP here, as the filesystem claims * it supports ACLs. */ if (error) goto out; acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); error = ufs_setacl_nfs4_internal(vp, aclp, td); out: acl_free(aclp); return (error); } #endif /* UFS_ACL */ /* * Mark this file's access time for update for vfs_mark_atime(). This * is called from execve() and mmap(). */ static int ufs_markatime(ap) struct vop_markatime_args /* { struct vnode *a_vp; } */ *ap; { struct inode *ip = VTOI(ap->a_vp); UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS); /* * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. */ return (0); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ufs_chmod(vp, mode, cred, td) struct vnode *vp; int mode; struct ucred *cred; struct thread *td; { struct inode *ip = VTOI(vp); int error; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return (EFTYPE); } if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID); if (error) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error) return (error); } ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); DIP_SET(ip, i_mode, ip->i_mode); UFS_INODE_SET_FLAG(ip, IN_CHANGE); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); #endif if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) error = UFS_UPDATE(vp, 0); return (error); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ufs_chown(vp, uid, gid, cred, td) struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct thread *td; { struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA int i; ufs2_daddr_t change; #endif if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != ip->i_uid && uid != cred->cr_uid) || (gid != ip->i_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) return (error); ogid = ip->i_gid; ouid = ip->i_uid; #ifdef QUOTA if ((error = getinoquota(ip)) != 0) return (error); if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } change = DIP(ip, i_blocks); (void) chkdq(ip, -change, cred, CHOWN|FORCE); (void) chkiq(ip, -1, cred, CHOWN|FORCE); for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif ip->i_gid = gid; DIP_SET(ip, i_gid, gid); ip->i_uid = uid; DIP_SET(ip, i_uid, uid); #ifdef QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) goto good; else (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } ip->i_gid = ogid; DIP_SET(ip, i_gid, ogid); ip->i_uid = ouid; DIP_SET(ip, i_uid, ouid); if (getinoquota(ip) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); (void) chkiq(ip, 1, cred, FORCE|CHOWN); (void) getinoquota(ip); } return (error); good: if (getinoquota(ip)) panic("ufs_chown: lost quota"); #endif /* QUOTA */ UFS_INODE_SET_FLAG(ip, IN_CHANGE); if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { ip->i_mode &= ~(ISUID | ISGID); DIP_SET(ip, i_mode, ip->i_mode); } } error = UFS_UPDATE(vp, 0); return (error); } static int ufs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; struct thread *td; td = curthread; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); if (ip->i_nlink <= 0) vp->v_vflag |= VV_NOSYNC; if ((ip->i_flags & SF_SNAPSHOT) != 0) { /* * Avoid deadlock where another thread is trying to * update the inodeblock for dvp and is waiting on * snaplk. Temporary unlock the vnode lock for the * unlinked file and sync the directory. This should * allow vput() of the directory to not block later on * while holding the snapshot vnode locked, assuming * that the directory hasn't been unlinked too. */ VOP_UNLOCK(vp); (void) VOP_FSYNC(dvp, MNT_WAIT, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } out: return (error); } static void print_bad_link_count(const char *funcname, struct vnode *dvp) { struct inode *dip; dip = VTOI(dvp); uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n", funcname, dip->i_effnlink, (intmax_t)dip->i_number, dvp->v_mount->mnt_stat.f_mntonname); } /* * link vnode call */ static int ufs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct inode *ip; struct direct newdir; int error; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (VTOI(tdvp)->i_effnlink < 2) { print_bad_link_count("ufs_link", tdvp); error = EINVAL; goto out; } ip = VTOI(vp); if (ip->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto out; } /* * The file may have been removed after namei droped the original * lock. */ if (ip->i_effnlink == 0) { error = ENOENT; goto out; } if (ip->i_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } ip->i_effnlink++; ip->i_nlink++; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(vp)) softdep_setup_link(VTOI(tdvp), ip); error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp)); if (!error) { ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0); } if (error) { ip->i_effnlink--; ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(vp)) softdep_revert_link(VTOI(tdvp), ip); } out: return (error); } /* * whiteout vnode call */ static int ufs_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; int error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (dvp->v_mount->mnt_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif newdir.d_ino = UFS_WINO; newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0); break; case DELETE: /* remove an existing directory whiteout */ #ifdef INVARIANTS if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; default: panic("ufs_whiteout: unknown op"); } return (error); } static volatile int rename_restarts; SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, __DEVOLATILE(int *, &rename_restarts), 0, "Times rename had to restart due to lock contention"); /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ static int ufs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *nvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct thread *td = fcnp->cn_thread; struct inode *fip, *tip, *tdp, *fdp; struct direct newdir; off_t endoff; int doingdirectory, newparent; int error = 0; struct mount *mp; ino_t ino; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif endoff = 0; mp = tdvp->v_mount; VOP_UNLOCK(tdvp); if (tvp && tvp != tdvp) VOP_UNLOCK(tvp); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; mp = NULL; goto releout; } relock: /* * We need to acquire 2 to 4 locks depending on whether tvp is NULL * and fdvp and tdvp are the same directory. Subsequently we need * to double-check all paths and in the directory rename case we * need to verify that we are not creating a directory loop. To * handle this we acquire all but fdvp using non-blocking * acquisitions. If we fail to acquire any lock in the path we will * drop all held locks, acquire the new lock in a blocking fashion, * and then release it and restart the rename. This acquire/release * step ensures that we do not spin on a lock waiting for release. */ error = vn_lock(fdvp, LK_EXCLUSIVE); if (error) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto releout; VOP_UNLOCK(tdvp); atomic_add_int(&rename_restarts, 1); goto relock; } /* * Re-resolve fvp to be certain it still exists and fetch the * correct vnode. */ error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); goto releout; } error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp); vrele(fvp); fvp = nvp; atomic_add_int(&rename_restarts, 1); goto relock; } vrele(fvp); fvp = nvp; /* * Re-resolve tvp and acquire the vnode lock if present. */ error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); if (error != 0 && error != EJUSTRETURN) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); goto releout; } /* * If tvp disappeared we just carry on. */ if (error == EJUSTRETURN && tvp != NULL) { vrele(tvp); tvp = NULL; } /* * Get the tvp ino if the lookup succeeded. We may have to restart * if the non-blocking acquire fails. */ if (error == 0) { nvp = NULL; error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (tvp) vrele(tvp); tvp = nvp; if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } } fdp = VTOI(fdvp); fip = VTOI(fvp); tdp = VTOI(tdvp); tip = NULL; if (tvp) tip = VTOI(tvp); if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto unlockout; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. However, things could change after * we drop the locks above. */ if (fvp == tvp) { error = 0; goto unlockout; } doingdirectory = 0; newparent = 0; ino = fip->i_number; if (fip->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto unlockout; } if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdp->i_flags & APPEND)) { error = EPERM; goto unlockout; } if ((fip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || fdp == fip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { error = EINVAL; goto unlockout; } if (fdp->i_number != tdp->i_number) newparent = tdp->i_number; doingdirectory = 1; } if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || (tvp != NULL && tvp->v_type == VDIR && tvp->v_mountedhere != NULL)) { error = EXDEV; goto unlockout; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". */ if (doingdirectory && newparent) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); if (error) goto unlockout; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* * We encountered a lock that we have to wait for. Unlock * everything else and VGET before restarting. */ if (ino) { VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); if (tvp) VOP_UNLOCK(tvp); error = VFS_VGET(mp, ino, LK_SHARED, &nvp); if (error == 0) vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } if (error) goto unlockout; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); } if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || tdp->i_effnlink == 0) panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ fip->i_effnlink++; fip->i_nlink++; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_setup_link(tdp, fip); error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp)); if (error) goto bad; /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (tip == NULL) { if (ITODEV(tdp) != ITODEV(fip)) panic("ufs_rename: EXDEV"); if (doingdirectory && newparent) { /* * Account for ".." in new directory. * When source and destination have the same * parent we don't adjust the link count. The * actual link modification is completed when * .. is rewritten below. */ if (tdp->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto bad; } } ufs_makedirentry(fip, tcnp, &newdir); error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1); if (error) goto bad; /* Setup tdvp for directory compaction if needed. */ if (tdp->i_count && tdp->i_endoff && tdp->i_endoff < tdp->i_size) endoff = tdp->i_endoff; } else { if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip)) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (tip->i_number == fip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the caller * must possess VADMIN for the parent directory, or the * destination of the rename. This implements append-only * directories. */ if ((tdp->i_mode & S_ISTXT) && VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((tip->i_mode & IFMT) == IFDIR) { if ((tip->i_effnlink > 2) || !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } if (doingdirectory) { if (!newparent) { tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink--; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } error = ufs_dirrewrite(tdp, tip, fip->i_number, IFTODT(fip->i_mode), (doingdirectory && newparent) ? newparent : doingdirectory); if (error) { if (doingdirectory) { if (!newparent) { tdp->i_effnlink++; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink++; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* * The only stuff left in the directory is "." * and "..". The "." reference is inconsequential * since we are quashing it. We have removed the "." * reference and the reference in the parent directory, * but there may be other hard links. The soft * dependency code will arrange to do these operations * after the parent directory entry has been deleted on * disk, so when running with that code we avoid doing * them now. */ if (!newparent) { tdp->i_nlink--; DIP_SET(tdp, i_nlink, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); } tip->i_nlink--; DIP_SET(tip, i_nlink, tip->i_nlink); UFS_INODE_SET_FLAG(tip, IN_CHANGE); } } /* * 3) Unlink the source. We have to resolve the path again to * fixup the directory offset and count for ufs_dirremove. */ if (fdvp == tdvp) { error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) panic("ufs_rename: from entry went away!"); if (ino != fip->i_number) panic("ufs_rename: ino mismatch %ju != %ju\n", (uintmax_t)ino, (uintmax_t)fip->i_number); } /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { /* * If tip exists we simply use its link, otherwise we must * add a new one. */ if (tip == NULL) { tdp->i_effnlink++; tdp->i_nlink++; DIP_SET(tdp, i_nlink, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); if (DOINGSOFTDEP(tdvp)) softdep_setup_dotdot_link(tdp, fip); error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) && !DOINGASYNC(tdvp)); /* Don't go to bad here as the new link exists. */ if (error) goto unlockout; } else if (DOINGSUJ(tdvp)) /* Journal must account for each new link. */ softdep_setup_dotdot_link(tdp, fip); fip->i_offset = mastertemplate.dot_reclen; ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); cache_purge(fdvp); } error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); /* * The kern_renameat() looks up the fvp using the DELETE flag, which * causes the removal of the name cache entry for fvp. * As the relookup of the fvp is done in two steps: * ufs_lookup_ino() and then VFS_VGET(), another thread might do a * normal lookup of the from name just before the VFS_VGET() call, * causing the cache entry to be re-instantiated. * * The same issue also applies to tvp if it exists as * otherwise we may have a stale name cache entry for the new * name that references the old i-node if it has other links * or open file descriptors. */ cache_purge(fvp); if (tvp) cache_purge(tvp); cache_purge_negative(tdvp); unlockout: vput(fdvp); vput(fvp); if (tvp) vput(tvp); /* * If compaction or fsync was requested do it now that other locks * are no longer needed. */ if (error == 0 && endoff != 0) { error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred); if (error != 0) vn_printf(tdvp, "ufs_rename: failed to truncate, error %d\n", error); #ifdef UFS_DIRHASH else if (tdp->i_dirhash != NULL) ufsdirhash_dirtrunc(tdp, endoff); #endif /* * Even if the directory compaction failed, rename was * succesful. Do not propagate a UFS_TRUNCATE() error * to the caller. */ error = 0; } if (error == 0 && tdp->i_flag & IN_NEEDSYNC) error = VOP_FSYNC(tdvp, MNT_WAIT, td); vput(tdvp); return (error); bad: fip->i_effnlink--; fip->i_nlink--; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_revert_link(tdp, fip); goto unlockout; releout: vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp) vrele(tvp); return (error); } #ifdef UFS_ACL static int ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, mode_t dmode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *dacl, *acl; acl = acl_alloc(M_WAITOK); dacl = acl_alloc(M_WAITOK); /* * Retrieve default ACL from parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. If the ACL is empty, fall through to * the "not defined or available" case. */ if (acl->acl_cnt != 0) { dmode = acl_posix1e_newfilemode(dmode, acl); ip->i_mode = dmode; DIP_SET(ip, i_mode, dmode); *dacl = *acl; ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ ip->i_mode = dmode; DIP_SET(ip, i_mode, dmode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); if (error == 0) error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above * was supposed to free acl. */ printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); /* panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); acl_free(dacl); return (error); } static int ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, mode_t mode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *acl; acl = acl_alloc(M_WAITOK); /* * Retrieve default ACL for parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. */ if (acl->acl_cnt != 0) { /* * Two possible ways for default ACL to not * be present. First, the EA can be * undefined, or second, the default ACL can * be blank. If it's blank, fall through to * the it's not defined case. */ mode = acl_posix1e_newfilemode(mode, acl); ip->i_mode = mode; DIP_SET(ip, i_mode, mode); ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ ip->i_mode = mode; DIP_SET(ip, i_mode, mode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above was * supposed to free acl. */ printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()\n"); /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); return (error); } static int ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, mode_t child_mode, struct ucred *cred, struct thread *td) { int error; struct acl *parent_aclp, *child_aclp; parent_aclp = acl_alloc(M_WAITOK); child_aclp = acl_alloc(M_WAITOK | M_ZERO); error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); if (error) goto out; acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); if (error) goto out; out: acl_free(parent_aclp); acl_free(child_aclp); return (error); } #endif /* * Mkdir system call */ static int ufs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct buf *bp; struct dirtemplate dirtemplate, *dtp; struct direct newdir; int error, dmode; long blkoff; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if (dp->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ if (dp->i_effnlink < 2) { print_bad_link_count("ufs_mkdir", dvp); error = EINVAL; goto out; } error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; DIP_SET(ip, i_gid, dp->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; DIP_SET(ip, i_uid, dp->i_uid); #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * Make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); + vgone(tvp); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); + vgone(tvp); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); ip->i_mode = dmode; DIP_SET(ip, i_mode, dmode); tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; ip->i_nlink = 2; DIP_SET(ip, i_nlink, 2); if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Bump link count in parent directory to reflect work done below. * Should be done before reference is created so cleanup is * possible if we crash. */ dp->i_effnlink++; dp->i_nlink++; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); if (DOINGSOFTDEP(dvp)) softdep_setup_mkdir(dp, ip); error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ /* * Initialize directory with "." and ".." from static template. */ if (dvp->v_mount->mnt_maxsymlinklen > 0) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; vnode_pager_setsize(tvp, DIRBLKSIZ); if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, BA_CLRBUF, &bp)) != 0) goto bad; ip->i_size = DIRBLKSIZ; DIP_SET(ip, i_size, DIRBLKSIZ); UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if (DOINGSOFTDEP(tvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff = DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } } if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp))) != 0) { (void)bwrite(bp); goto bad; } /* * Directory set up, now install its entry in the parent directory. * * If we are not doing soft dependencies, then we must write out the * buffer containing the new directory body before entering the new * name in the parent. If we are doing soft dependencies, then the * buffer containing the new directory body will be passed to and * released in the soft dependency code after the code has attached * an appropriate ordering dependency to the buffer which ensures that * the buffer is written before the new name is written in the parent. */ if (DOINGASYNC(dvp)) bdwrite(bp); else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0); bad: if (error == 0) { *ap->a_vpp = tvp; } else { dp->i_effnlink--; dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_mkdir(dp, ip); - + vgone(tvp); vput(tvp); } out: return (error); } /* * Rmdir system call. */ static int ufs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. Do not allow the removal of mounted on * directories (this can happen when an NFS exported filesystem * tries to remove a locally mounted on directory). */ error = 0; if (dp->i_effnlink <= 2) { if (dp->i_effnlink == 2) print_bad_link_count("ufs_rmdir", dvp); error = EINVAL; goto out; } if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } if (vp->v_mountedhere != 0) { error = EINVAL; goto out; } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ dp->i_effnlink--; ip->i_effnlink--; if (DOINGSOFTDEP(vp)) softdep_setup_rmdir(dp, ip); error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) { dp->i_effnlink++; ip->i_effnlink++; if (DOINGSOFTDEP(vp)) softdep_revert_rmdir(dp, ip); goto out; } cache_purge(dvp); /* * The only stuff left in the directory is "." and "..". The "." * reference is inconsequential since we are quashing it. The soft * dependency code will arrange to do these operations after * the parent directory entry has been deleted on disk, so * when running with that code we avoid doing them now. */ if (!DOINGSOFTDEP(vp)) { dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); error = UFS_UPDATE(dvp, 0); ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); } cache_purge(vp); #ifdef UFS_DIRHASH /* Kill any active hash; i_effnlink == 0, so it will not come back. */ if (ip->i_dirhash != NULL) ufsdirhash_free(ip); #endif out: return (error); } /* * symlink -- make a symbolic link */ static int ufs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; const char *a_target; } */ *ap; { struct vnode *vp, **vpp = ap->a_vpp; struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp, "ufs_symlink"); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, SHORTLINK(ip), len); ip->i_size = len; DIP_SET(ip, i_size, len); UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); error = UFS_UPDATE(vp, 0); } else error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target), len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, ap->a_cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(vp); return (error); } /* * Vnode op for reading directories. */ int ufs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct buf *bp; struct inode *ip; struct direct *dp, *edp; u_long *cookies; struct dirent dstdp; off_t offset, startoffset; size_t readcnt, skipcnt; ssize_t startresid; u_int ncookies; int error; if (uio->uio_offset < 0) return (EINVAL); ip = VTOI(vp); if (ip->i_effnlink == 0) return (0); if (ap->a_ncookies != NULL) { if (uio->uio_resid < 0) ncookies = 0; else ncookies = uio->uio_resid; if (uio->uio_offset >= ip->i_size) ncookies = 0; else if (ip->i_size - uio->uio_offset < ncookies) ncookies = ip->i_size - uio->uio_offset; ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } else { ncookies = 0; cookies = NULL; } offset = startoffset = uio->uio_offset; startresid = uio->uio_resid; error = 0; while (error == 0 && uio->uio_resid > 0 && uio->uio_offset < ip->i_size) { error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp); if (error) break; if (bp->b_offset + bp->b_bcount > ip->i_size) readcnt = ip->i_size - bp->b_offset; else readcnt = bp->b_bcount; skipcnt = (size_t)(uio->uio_offset - bp->b_offset) & ~(size_t)(DIRBLKSIZ - 1); offset = bp->b_offset + skipcnt; dp = (struct direct *)&bp->b_data[skipcnt]; edp = (struct direct *)&bp->b_data[readcnt]; while (error == 0 && uio->uio_resid > 0 && dp < edp) { if (dp->d_reclen <= offsetof(struct direct, d_name) || (caddr_t)dp + dp->d_reclen > (caddr_t)edp) { error = EIO; break; } #if BYTE_ORDER == LITTLE_ENDIAN /* Old filesystem format. */ if (vp->v_mount->mnt_maxsymlinklen <= 0) { dstdp.d_namlen = dp->d_type; dstdp.d_type = dp->d_namlen; } else #endif { dstdp.d_namlen = dp->d_namlen; dstdp.d_type = dp->d_type; } if (offsetof(struct direct, d_name) + dstdp.d_namlen > dp->d_reclen) { error = EIO; break; } if (offset < startoffset || dp->d_ino == 0) goto nextentry; dstdp.d_fileno = dp->d_ino; dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); /* NOTE: d_off is the offset of the *next* entry. */ dstdp.d_off = offset + dp->d_reclen; dirent_terminate(&dstdp); if (dstdp.d_reclen > uio->uio_resid) { if (uio->uio_resid == startresid) error = EINVAL; else error = EJUSTRETURN; break; } /* Advance dp. */ error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio); if (error) break; if (cookies != NULL) { KASSERT(ncookies > 0, ("ufs_readdir: cookies buffer too small")); *cookies = offset + dp->d_reclen; cookies++; ncookies--; } nextentry: offset += dp->d_reclen; dp = (struct direct *)((caddr_t)dp + dp->d_reclen); } bqrelse(bp); uio->uio_offset = offset; } /* We need to correct uio_offset. */ uio->uio_offset = offset; if (error == EJUSTRETURN) error = 0; if (ap->a_ncookies != NULL) { if (error == 0) { ap->a_ncookies -= ncookies; } else { free(*ap->a_cookies, M_TEMP); *ap->a_ncookies = 0; *ap->a_cookies = NULL; } } if (error == 0 && ap->a_eofflag) *ap->a_eofflag = ip->i_size <= uio->uio_offset; return (error); } /* * Return target name of a symbolic link */ static int ufs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); doff_t isize; isize = ip->i_size; if ((isize < vp->v_mount->mnt_maxsymlinklen) || DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */ return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. * * In order to be able to swap to a file, the ufs_bmaparray() operation may not * deadlock on memory. See ufs_bmap() for details. */ static int ufs_strategy(ap) struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; ufs2_daddr_t blkno; int error; if (bp->b_blkno == bp->b_lblkno) { error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { bufdone(bp); return (0); } bp->b_iooffset = dbtob(bp->b_blkno); BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp); return (0); } /* * Print out the contents of an inode. */ static int ufs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink, ip->i_effnlink, (intmax_t)ip->i_size); if (I_IS_UFS2(ip)) printf(", extsize %d", ip->i_din2->di_extsize); printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n", (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid, (u_int)ip->i_flags, PRINT_INODE_FLAGS); printf("\tino %lu, on dev %s", (u_long)ip->i_number, devtoname(ITODEV(ip))); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Close wrapper for fifos. * * Update the times on the inode then do device close. */ static int ufsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (fifo_specops.vop_close(ap)); } /* * Kqfilter wrapper for fifos. * * Fall through to ufs kqfilter routines if needed */ static int ufsfifo_kqfilter(ap) struct vop_kqfilter_args *ap; { int error; error = fifo_specops.vop_kqfilter(ap); if (error) error = vfs_kqfilter(ap); return (error); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ static int ufs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { int error; error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = UFS_LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = UFS_MAXNAMLEN; break; case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) *ap->a_retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; #ifdef UFS_ACL case _PC_ACL_EXTENDED: if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_ACL_PATH_MAX: #ifdef UFS_ACL if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; #else *ap->a_retval = 3; #endif break; #ifdef MAC case _PC_MAC_PRESENT: if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_MIN_HOLE_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; default: error = vop_stdpathconf(ap); break; } return (error); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ufs_vinit(mntp, fifoops, vpp) struct mount *mntp; struct vop_vector *fifoops; struct vnode **vpp; { struct inode *ip; struct vnode *vp; vp = *vpp; ASSERT_VOP_LOCKED(vp, "ufs_vinit"); ip = VTOI(vp); vp->v_type = IFTOVT(ip->i_mode); /* * Only unallocated inodes should be of type VNON. */ if (ip->i_mode != 0 && vp->v_type == VNON) return (EINVAL); if (vp->v_type == VFIFO) vp->v_op = fifoops; if (ip->i_number == UFS_ROOTINO) vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Allocate a new inode. * Vnode dvp must be locked. */ static int ufs_makeinode(mode, dvp, vpp, cnp, callfunc) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; const char *callfunc; { struct inode *ip, *pdir; struct direct newdir; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("%s: no name", callfunc); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; if (pdir->i_effnlink < 2) { print_bad_link_count(callfunc, dvp); return (EINVAL); } error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) return (error); ip = VTOI(tvp); ip->i_gid = pdir->i_gid; DIP_SET(ip, i_gid, pdir->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; DIP_SET(ip, i_uid, ip->i_uid); mode &= ~07111; #ifdef QUOTA /* * Make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); + vgone(tvp); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); + vgone(tvp); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); ip->i_mode = mode; DIP_SET(ip, i_mode, mode); tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; ip->i_nlink = 1; DIP_SET(ip, i_nlink, 1); if (DOINGSOFTDEP(tvp)) softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { ip->i_mode &= ~ISGID; DIP_SET(ip, i_mode, ip->i_mode); } if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Make sure inode goes to disk before directory entry. */ error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0); if (error) goto bad; *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_create(VTOI(dvp), ip); + vgone(tvp); vput(tvp); return (error); } static int ufs_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp; int error; vp = ap->a_vp; switch (ap->a_command) { case FIOSEEKDATA: error = vn_lock(vp, LK_SHARED); if (error == 0) { error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data); VOP_UNLOCK(vp); } else error = EBADF; return (error); case FIOSEEKHOLE: return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data, ap->a_cred)); default: return (ENOTTY); } } /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, .vop_fsync = VOP_PANIC, .vop_read = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_write = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_bmap = ufs_bmap, .vop_cachedlookup = ufs_lookup, .vop_close = ufs_close, .vop_create = ufs_create, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_ioctl = ufs_ioctl, .vop_link = ufs_link, .vop_lookup = vfs_cache_lookup, .vop_markatime = ufs_markatime, .vop_mkdir = ufs_mkdir, .vop_mknod = ufs_mknod, .vop_need_inactive = ufs_need_inactive, .vop_open = ufs_open, .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim, .vop_remove = ufs_remove, .vop_rename = ufs_rename, .vop_rmdir = ufs_rmdir, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_strategy = ufs_strategy, .vop_symlink = ufs_symlink, .vop_whiteout = ufs_whiteout, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; VFS_VOP_VECTOR_REGISTER(ufs_vnodeops); struct vop_vector ufs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_close = ufsfifo_close, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_kqfilter = ufsfifo_kqfilter, .vop_markatime = ufs_markatime, .vop_pathconf = ufs_pathconf, .vop_print = ufs_print, .vop_read = VOP_PANIC, .vop_reclaim = ufs_reclaim, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_write = VOP_PANIC, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; VFS_VOP_VECTOR_REGISTER(ufs_fifoops); Index: projects/clang1000-import/sys/x86/cpufreq/hwpstate_amd.c =================================================================== --- projects/clang1000-import/sys/x86/cpufreq/hwpstate_amd.c (revision 357178) +++ projects/clang1000-import/sys/x86/cpufreq/hwpstate_amd.c (revision 357179) @@ -1,543 +1,547 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Nate Lawson * Copyright (c) 2004 Colin Percival * Copyright (c) 2004-2005 Bruno Durcot * Copyright (c) 2004 FUKUDA Nobuhiko * Copyright (c) 2009 Michael Reifenberger * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2008-2009 Gen Otsuji * * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c * in various parts. The authors of these files are Nate Lawson, * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko. * This code contains patches by Michael Reifenberger and Norikatsu Shigemura. * Thank you. * * Redistribution and use in source and binary forms, with or without * modification, are permitted providing that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * For more info: * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors * 31116 Rev 3.20 February 04, 2009 * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors * 41256 Rev 3.00 - July 07, 2008 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "cpufreq_if.h" #define MSR_AMD_10H_11H_LIMIT 0xc0010061 #define MSR_AMD_10H_11H_CONTROL 0xc0010062 #define MSR_AMD_10H_11H_STATUS 0xc0010063 #define MSR_AMD_10H_11H_CONFIG 0xc0010064 #define AMD_10H_11H_MAX_STATES 16 /* for MSR_AMD_10H_11H_LIMIT C001_0061 */ #define AMD_10H_11H_GET_PSTATE_MAX_VAL(msr) (((msr) >> 4) & 0x7) #define AMD_10H_11H_GET_PSTATE_LIMIT(msr) (((msr)) & 0x7) /* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */ #define AMD_10H_11H_CUR_VID(msr) (((msr) >> 9) & 0x7F) #define AMD_10H_11H_CUR_DID(msr) (((msr) >> 6) & 0x07) #define AMD_10H_11H_CUR_FID(msr) ((msr) & 0x3F) #define AMD_17H_CUR_VID(msr) (((msr) >> 14) & 0xFF) #define AMD_17H_CUR_DID(msr) (((msr) >> 8) & 0x3F) #define AMD_17H_CUR_FID(msr) ((msr) & 0xFF) #define HWPSTATE_DEBUG(dev, msg...) \ do { \ if (hwpstate_verbose) \ device_printf(dev, msg); \ } while (0) struct hwpstate_setting { int freq; /* CPU clock in Mhz or 100ths of a percent. */ int volts; /* Voltage in mV. */ int power; /* Power consumed in mW. */ int lat; /* Transition latency in us. */ int pstate_id; /* P-State id */ }; struct hwpstate_softc { device_t dev; struct hwpstate_setting hwpstate_settings[AMD_10H_11H_MAX_STATES]; int cfnum; }; static void hwpstate_identify(driver_t *driver, device_t parent); static int hwpstate_probe(device_t dev); static int hwpstate_attach(device_t dev); static int hwpstate_detach(device_t dev); static int hwpstate_set(device_t dev, const struct cf_setting *cf); static int hwpstate_get(device_t dev, struct cf_setting *cf); static int hwpstate_settings(device_t dev, struct cf_setting *sets, int *count); static int hwpstate_type(device_t dev, int *type); static int hwpstate_shutdown(device_t dev); static int hwpstate_features(driver_t *driver, u_int *features); static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev); static int hwpstate_get_info_from_msr(device_t dev); static int hwpstate_goto_pstate(device_t dev, int pstate_id); static int hwpstate_verbose; SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RWTUN, &hwpstate_verbose, 0, "Debug hwpstate"); static int hwpstate_verify; SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN, &hwpstate_verify, 0, "Verify P-state after setting"); static device_method_t hwpstate_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hwpstate_identify), DEVMETHOD(device_probe, hwpstate_probe), DEVMETHOD(device_attach, hwpstate_attach), DEVMETHOD(device_detach, hwpstate_detach), DEVMETHOD(device_shutdown, hwpstate_shutdown), /* cpufreq interface */ DEVMETHOD(cpufreq_drv_set, hwpstate_set), DEVMETHOD(cpufreq_drv_get, hwpstate_get), DEVMETHOD(cpufreq_drv_settings, hwpstate_settings), DEVMETHOD(cpufreq_drv_type, hwpstate_type), /* ACPI interface */ DEVMETHOD(acpi_get_features, hwpstate_features), {0, 0} }; static devclass_t hwpstate_devclass; static driver_t hwpstate_driver = { "hwpstate", hwpstate_methods, sizeof(struct hwpstate_softc), }; DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0); /* * Go to Px-state on all cpus considering the limit. */ static int hwpstate_goto_pstate(device_t dev, int id) { sbintime_t sbt; uint64_t msr; int cpu, i, j, limit; /* get the current pstate limit */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr); - if (limit > id) + if (limit > id) { + HWPSTATE_DEBUG(dev, + "Restricting requested P%d to P%d due to HW limit\n", id, + limit); id = limit; + } cpu = curcpu; HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, cpu); /* Go To Px-state */ wrmsr(MSR_AMD_10H_11H_CONTROL, id); /* * We are going to the same Px-state on all cpus. * Probably should take _PSD into account. */ CPU_FOREACH(i) { if (i == cpu) continue; /* Bind to each cpu. */ thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n", id, i); /* Go To Px-state */ wrmsr(MSR_AMD_10H_11H_CONTROL, id); } /* * Verify whether each core is in the requested P-state. */ if (hwpstate_verify) { CPU_FOREACH(i) { thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); /* wait loop (100*100 usec is enough ?) */ for (j = 0; j < 100; j++) { /* get the result. not assure msr=id */ msr = rdmsr(MSR_AMD_10H_11H_STATUS); if (msr == id) break; sbt = SBT_1MS / 10; tsleep_sbt(dev, PZERO, "pstate_goto", sbt, sbt >> tc_precexp, 0); } HWPSTATE_DEBUG(dev, "result: P%d-state on cpu%d\n", (int)msr, i); if (msr != id) { HWPSTATE_DEBUG(dev, "error: loop is not enough.\n"); return (ENXIO); } } } return (0); } static int hwpstate_set(device_t dev, const struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting *set; int i; if (cf == NULL) return (EINVAL); sc = device_get_softc(dev); set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) if (CPUFREQ_CMP(cf->freq, set[i].freq)) break; if (i == sc->cfnum) return (EINVAL); return (hwpstate_goto_pstate(dev, set[i].pstate_id)); } static int hwpstate_get(device_t dev, struct cf_setting *cf) { struct hwpstate_softc *sc; struct hwpstate_setting set; uint64_t msr; sc = device_get_softc(dev); if (cf == NULL) return (EINVAL); msr = rdmsr(MSR_AMD_10H_11H_STATUS); if (msr >= sc->cfnum) return (EINVAL); set = sc->hwpstate_settings[msr]; cf->freq = set.freq; cf->volts = set.volts; cf->power = set.power; cf->lat = set.lat; cf->dev = dev; return (0); } static int hwpstate_settings(device_t dev, struct cf_setting *sets, int *count) { struct hwpstate_softc *sc; struct hwpstate_setting set; int i; if (sets == NULL || count == NULL) return (EINVAL); sc = device_get_softc(dev); if (*count < sc->cfnum) return (E2BIG); for (i = 0; i < sc->cfnum; i++, sets++) { set = sc->hwpstate_settings[i]; sets->freq = set.freq; sets->volts = set.volts; sets->power = set.power; sets->lat = set.lat; sets->dev = dev; } *count = sc->cfnum; return (0); } static int hwpstate_type(device_t dev, int *type) { if (type == NULL) return (EINVAL); *type = CPUFREQ_TYPE_ABSOLUTE; return (0); } static void hwpstate_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "hwpstate", -1) != NULL) return; if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) && cpu_vendor_id != CPU_VENDOR_HYGON) return; /* * Check if hardware pstate enable bit is set. */ if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) { HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n"); return; } if (resource_disabled("hwpstate", 0)) return; if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL) device_printf(parent, "hwpstate: add child failed\n"); } static int hwpstate_probe(device_t dev) { struct hwpstate_softc *sc; device_t perf_dev; uint64_t msr; int error, type; /* * Only hwpstate0. * It goes well with acpi_throttle. */ if (device_get_unit(dev) != 0) return (ENXIO); sc = device_get_softc(dev); sc->dev = dev; /* * Check if acpi_perf has INFO only flag. */ perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1); error = TRUE; if (perf_dev && device_is_attached(perf_dev)) { error = CPUFREQ_DRV_TYPE(perf_dev, &type); if (error == 0) { if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) { /* * If acpi_perf doesn't have INFO_ONLY flag, * it will take care of pstate transitions. */ HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n"); return (ENXIO); } else { /* * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW) * we can get _PSS info from acpi_perf * without going into ACPI. */ HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n"); error = hwpstate_get_info_from_acpi_perf(dev, perf_dev); } } } if (error == 0) { /* * Now we get _PSS info from acpi_perf without error. * Let's check it. */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) { HWPSTATE_DEBUG(dev, "MSR (%jd) and ACPI _PSS (%d)" " count mismatch\n", (intmax_t)msr, sc->cfnum); error = TRUE; } } /* * If we cannot get info from acpi_perf, * Let's get info from MSRs. */ if (error) error = hwpstate_get_info_from_msr(dev); if (error) return (error); device_set_desc(dev, "Cool`n'Quiet 2.0"); return (0); } static int hwpstate_attach(device_t dev) { return (cpufreq_register(dev)); } static int hwpstate_get_info_from_msr(device_t dev) { struct hwpstate_softc *sc; struct hwpstate_setting *hwpstate_set; uint64_t msr; int family, i, fid, did; family = CPUID_TO_FAMILY(cpu_id); sc = device_get_softc(dev); /* Get pstate count */ msr = rdmsr(MSR_AMD_10H_11H_LIMIT); sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr); hwpstate_set = sc->hwpstate_settings; for (i = 0; i < sc->cfnum; i++) { msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i); if ((msr & ((uint64_t)1 << 63)) == 0) { HWPSTATE_DEBUG(dev, "msr is not valid.\n"); return (ENXIO); } did = AMD_10H_11H_CUR_DID(msr); fid = AMD_10H_11H_CUR_FID(msr); /* Convert fid/did to frequency. */ switch (family) { case 0x11: hwpstate_set[i].freq = (100 * (fid + 0x08)) >> did; break; case 0x10: case 0x12: case 0x15: case 0x16: hwpstate_set[i].freq = (100 * (fid + 0x10)) >> did; break; case 0x17: case 0x18: did = AMD_17H_CUR_DID(msr); if (did == 0) { HWPSTATE_DEBUG(dev, "unexpected did: 0\n"); did = 1; } fid = AMD_17H_CUR_FID(msr); hwpstate_set[i].freq = (200 * fid) / did; break; default: HWPSTATE_DEBUG(dev, "get_info_from_msr: %s family" " 0x%02x CPUs are not supported yet\n", cpu_vendor_id == CPU_VENDOR_HYGON ? "Hygon" : "AMD", family); return (ENXIO); } hwpstate_set[i].pstate_id = i; /* There was volts calculation, but deleted it. */ hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN; hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN; hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN; } return (0); } static int hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev) { struct hwpstate_softc *sc; struct cf_setting *perf_set; struct hwpstate_setting *hwpstate_set; int count, error, i; perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT); if (perf_set == NULL) { HWPSTATE_DEBUG(dev, "nomem\n"); return (ENOMEM); } /* * Fetch settings from acpi_perf. * Now it is attached, and has info only flag. */ count = MAX_SETTINGS; error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count); if (error) { HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n"); goto out; } sc = device_get_softc(dev); sc->cfnum = count; hwpstate_set = sc->hwpstate_settings; for (i = 0; i < count; i++) { if (i == perf_set[i].spec[0]) { hwpstate_set[i].pstate_id = i; hwpstate_set[i].freq = perf_set[i].freq; hwpstate_set[i].volts = perf_set[i].volts; hwpstate_set[i].power = perf_set[i].power; hwpstate_set[i].lat = perf_set[i].lat; } else { HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n"); error = ENXIO; goto out; } } out: if (perf_set) free(perf_set, M_TEMP); return (error); } static int hwpstate_detach(device_t dev) { hwpstate_goto_pstate(dev, 0); return (cpufreq_unregister(dev)); } static int hwpstate_shutdown(device_t dev) { /* hwpstate_goto_pstate(dev, 0); */ return (0); } static int hwpstate_features(driver_t *driver, u_int *features) { /* Notify the ACPI CPU that we support direct access to MSRs */ *features = ACPI_CAP_PERF_MSRS; return (0); } Index: projects/clang1000-import/sys/x86/iommu/intel_dmar.h =================================================================== --- projects/clang1000-import/sys/x86/iommu/intel_dmar.h (revision 357178) +++ projects/clang1000-import/sys/x86/iommu/intel_dmar.h (revision 357179) @@ -1,575 +1,576 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013-2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_IOMMU_INTEL_DMAR_H #define __X86_IOMMU_INTEL_DMAR_H /* Host or physical memory address, after translation. */ typedef uint64_t dmar_haddr_t; /* Guest or bus address, before translation. */ typedef uint64_t dmar_gaddr_t; struct dmar_qi_genseq { u_int gen; uint32_t seq; }; struct dmar_map_entry { dmar_gaddr_t start; dmar_gaddr_t end; - dmar_gaddr_t free_after; /* Free space after the entry */ + dmar_gaddr_t first; /* Least start in subtree */ + dmar_gaddr_t last; /* Greatest end in subtree */ dmar_gaddr_t free_down; /* Max free space below the current R/B tree node */ u_int flags; TAILQ_ENTRY(dmar_map_entry) dmamap_link; /* Link for dmamap entries */ RB_ENTRY(dmar_map_entry) rb_entry; /* Links for domain entries */ TAILQ_ENTRY(dmar_map_entry) unroll_link; /* Link for unroll after dmamap_load failure */ struct dmar_domain *domain; struct dmar_qi_genseq gseq; }; RB_HEAD(dmar_gas_entries_tree, dmar_map_entry); RB_PROTOTYPE(dmar_gas_entries_tree, dmar_map_entry, rb_entry, dmar_gas_cmp_entries); #define DMAR_MAP_ENTRY_PLACE 0x0001 /* Fake entry */ #define DMAR_MAP_ENTRY_RMRR 0x0002 /* Permanent, not linked by dmamap_link */ #define DMAR_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by dmamap_link */ #define DMAR_MAP_ENTRY_UNMAPPED 0x0010 /* No backing pages */ #define DMAR_MAP_ENTRY_QI_NF 0x0020 /* qi task, do not free entry */ #define DMAR_MAP_ENTRY_READ 0x1000 /* Read permitted */ #define DMAR_MAP_ENTRY_WRITE 0x2000 /* Write permitted */ #define DMAR_MAP_ENTRY_SNOOP 0x4000 /* Snoop */ #define DMAR_MAP_ENTRY_TM 0x8000 /* Transient */ /* * Locking annotations: * (u) - Protected by dmar unit lock * (d) - Protected by domain lock * (c) - Immutable after initialization */ /* * The domain abstraction. Most non-constant members of the domain * are protected by owning dmar unit lock, not by the domain lock. * Most important, the dmar lock protects the contexts list. * * The domain lock protects the address map for the domain, and list * of unload entries delayed. * * Page tables pages and pages content is protected by the vm object * lock pgtbl_obj, which contains the page tables pages. */ struct dmar_domain { int domain; /* (c) DID, written in context entry */ int mgaw; /* (c) Real max address width */ int agaw; /* (c) Adjusted guest address width */ int pglvl; /* (c) The pagelevel */ int awlvl; /* (c) The pagelevel as the bitmask, to set in context entry */ dmar_gaddr_t end; /* (c) Highest address + 1 in the guest AS */ u_int ctx_cnt; /* (u) Number of contexts owned */ u_int refs; /* (u) Refs, including ctx */ struct dmar_unit *dmar; /* (c) */ struct mtx lock; /* (c) */ LIST_ENTRY(dmar_domain) link; /* (u) Member in the dmar list */ LIST_HEAD(, dmar_ctx) contexts; /* (u) */ vm_object_t pgtbl_obj; /* (c) Page table pages */ u_int flags; /* (u) */ u_int entries_cnt; /* (d) */ struct dmar_gas_entries_tree rb_root; /* (d) */ struct dmar_map_entries_tailq unload_entries; /* (d) Entries to unload */ struct dmar_map_entry *first_place, *last_place; /* (d) */ struct task unload_task; /* (c) */ u_int batch_no; }; struct dmar_ctx { struct bus_dma_tag_dmar ctx_tag; /* (c) Root tag */ uint16_t rid; /* (c) pci RID */ uint64_t last_fault_rec[2]; /* Last fault reported */ struct dmar_domain *domain; /* (c) */ LIST_ENTRY(dmar_ctx) link; /* (u) Member in the domain list */ u_int refs; /* (u) References from tags */ u_int flags; /* (u) */ u_long loads; /* atomic updates, for stat only */ u_long unloads; /* same */ }; #define DMAR_DOMAIN_GAS_INITED 0x0001 #define DMAR_DOMAIN_PGTBL_INITED 0x0002 #define DMAR_DOMAIN_IDMAP 0x0010 /* Domain uses identity page table */ #define DMAR_DOMAIN_RMRR 0x0020 /* Domain contains RMRR entry, cannot be turned off */ /* struct dmar_ctx flags */ #define DMAR_CTX_FAULTED 0x0001 /* Fault was reported, last_fault_rec is valid */ #define DMAR_CTX_DISABLED 0x0002 /* Device is disabled, the ephemeral reference is kept to prevent context destruction */ #define DMAR_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj) #define DMAR_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj) #define DMAR_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj) #define DMAR_DOMAIN_ASSERT_PGLOCKED(dom) \ VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj) #define DMAR_DOMAIN_LOCK(dom) mtx_lock(&(dom)->lock) #define DMAR_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->lock) #define DMAR_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->lock, MA_OWNED) struct dmar_msi_data { int irq; int irq_rid; struct resource *irq_res; void *intr_handle; int (*handler)(void *); int msi_data_reg; int msi_addr_reg; int msi_uaddr_reg; void (*enable_intr)(struct dmar_unit *); void (*disable_intr)(struct dmar_unit *); const char *name; }; #define DMAR_INTR_FAULT 0 #define DMAR_INTR_QI 1 #define DMAR_INTR_TOTAL 2 struct dmar_unit { device_t dev; int unit; uint16_t segment; uint64_t base; /* Resources */ int reg_rid; struct resource *regs; struct dmar_msi_data intrs[DMAR_INTR_TOTAL]; /* Hardware registers cache */ uint32_t hw_ver; uint64_t hw_cap; uint64_t hw_ecap; uint32_t hw_gcmd; /* Data for being a dmar */ struct mtx lock; LIST_HEAD(, dmar_domain) domains; struct unrhdr *domids; vm_object_t ctx_obj; u_int barrier_flags; /* Fault handler data */ struct mtx fault_lock; uint64_t *fault_log; int fault_log_head; int fault_log_tail; int fault_log_size; struct task fault_task; struct taskqueue *fault_taskqueue; /* QI */ int qi_enabled; vm_offset_t inv_queue; vm_size_t inv_queue_size; uint32_t inv_queue_avail; uint32_t inv_queue_tail; volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait descr completion */ uint64_t inv_waitd_seq_hw_phys; uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */ u_int inv_waitd_gen; /* seq number generation AKA seq overflows */ u_int inv_seq_waiters; /* count of waiters for seq */ u_int inv_queue_full; /* informational counter */ /* IR */ int ir_enabled; vm_paddr_t irt_phys; dmar_irte_t *irt; u_int irte_cnt; vmem_t *irtids; /* Delayed freeing of map entries queue processing */ struct dmar_map_entries_tailq tlb_flush_entries; struct task qi_task; struct taskqueue *qi_taskqueue; /* Busdma delayed map load */ struct task dmamap_load_task; TAILQ_HEAD(, bus_dmamap_dmar) delayed_maps; struct taskqueue *delayed_taskqueue; int dma_enabled; /* * Bitmap of buses for which context must ignore slot:func, * duplicating the page table pointer into all context table * entries. This is a client-controlled quirk to support some * NTBs. */ uint32_t buswide_ctxs[(PCI_BUSMAX + 1) / NBBY / sizeof(uint32_t)]; }; #define DMAR_LOCK(dmar) mtx_lock(&(dmar)->lock) #define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->lock) #define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->lock, MA_OWNED) #define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock) #define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock) #define DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED) #define DMAR_IS_COHERENT(dmar) (((dmar)->hw_ecap & DMAR_ECAP_C) != 0) #define DMAR_HAS_QI(dmar) (((dmar)->hw_ecap & DMAR_ECAP_QI) != 0) #define DMAR_X2APIC(dmar) \ (x2apic_mode && ((dmar)->hw_ecap & DMAR_ECAP_EIM) != 0) /* Barrier ids */ #define DMAR_BARRIER_RMRR 0 #define DMAR_BARRIER_USEQ 1 struct dmar_unit *dmar_find(device_t dev, bool verbose); struct dmar_unit *dmar_find_hpet(device_t dev, uint16_t *rid); struct dmar_unit *dmar_find_ioapic(u_int apic_id, uint16_t *rid); u_int dmar_nd2mask(u_int nd); bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl); int domain_set_agaw(struct dmar_domain *domain, int mgaw); int dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less); vm_pindex_t pglvl_max_pages(int pglvl); int domain_is_sp_lvl(struct dmar_domain *domain, int lvl); dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl); dmar_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl); int calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size, dmar_gaddr_t *isizep); struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags); void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags); void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, struct sf_buf **sf); void dmar_unmap_pgtbl(struct sf_buf *sf); int dmar_load_root_entry_ptr(struct dmar_unit *unit); int dmar_inv_ctx_glob(struct dmar_unit *unit); int dmar_inv_iotlb_glob(struct dmar_unit *unit); int dmar_flush_write_bufs(struct dmar_unit *unit); void dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst); void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst); void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst); int dmar_enable_translation(struct dmar_unit *unit); int dmar_disable_translation(struct dmar_unit *unit); int dmar_load_irt_ptr(struct dmar_unit *unit); int dmar_enable_ir(struct dmar_unit *unit); int dmar_disable_ir(struct dmar_unit *unit); bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id); void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id); uint64_t dmar_get_timeout(void); void dmar_update_timeout(uint64_t newval); int dmar_fault_intr(void *arg); void dmar_enable_fault_intr(struct dmar_unit *unit); void dmar_disable_fault_intr(struct dmar_unit *unit); int dmar_init_fault_log(struct dmar_unit *unit); void dmar_fini_fault_log(struct dmar_unit *unit); int dmar_qi_intr(void *arg); void dmar_enable_qi_intr(struct dmar_unit *unit); void dmar_disable_qi_intr(struct dmar_unit *unit); int dmar_init_qi(struct dmar_unit *unit); void dmar_fini_qi(struct dmar_unit *unit); void dmar_qi_invalidate_locked(struct dmar_domain *domain, dmar_gaddr_t start, dmar_gaddr_t size, struct dmar_qi_genseq *psec, bool emit_wait); void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit); void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit); void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit); void dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt); vm_object_t domain_get_idmap_pgtbl(struct dmar_domain *domain, dmar_gaddr_t maxaddr); void put_idmap_pgtbl(vm_object_t obj); int domain_map_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags); int domain_unmap_buf(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size, int flags); void domain_flush_iotlb_sync(struct dmar_domain *domain, dmar_gaddr_t base, dmar_gaddr_t size); int domain_alloc_pgtbl(struct dmar_domain *domain); void domain_free_pgtbl(struct dmar_domain *domain); int dmar_dev_depth(device_t child); void dmar_dev_path(device_t child, int *busno, void *path1, int depth); struct dmar_ctx *dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr); struct dmar_ctx *dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init); struct dmar_ctx *dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, bool id_mapped, bool rmrr_init); int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx); void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); void dmar_free_ctx(struct dmar_ctx *ctx); struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid); void dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free); void dmar_domain_unload(struct dmar_domain *domain, struct dmar_map_entries_tailq *entries, bool cansleep); void dmar_domain_free_entry(struct dmar_map_entry *entry, bool free); int dmar_init_busdma(struct dmar_unit *unit); void dmar_fini_busdma(struct dmar_unit *unit); device_t dmar_get_requester(device_t dev, uint16_t *rid); void dmar_gas_init_domain(struct dmar_domain *domain); void dmar_gas_fini_domain(struct dmar_domain *domain); struct dmar_map_entry *dmar_gas_alloc_entry(struct dmar_domain *domain, u_int flags); void dmar_gas_free_entry(struct dmar_domain *domain, struct dmar_map_entry *entry); void dmar_gas_free_space(struct dmar_domain *domain, struct dmar_map_entry *entry); int dmar_gas_map(struct dmar_domain *domain, const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct dmar_map_entry **res); void dmar_gas_free_region(struct dmar_domain *domain, struct dmar_map_entry *entry); int dmar_gas_map_region(struct dmar_domain *domain, struct dmar_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma); int dmar_gas_reserve_region(struct dmar_domain *domain, dmar_gaddr_t start, dmar_gaddr_t end); void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, struct dmar_map_entries_tailq *rmrr_entries); int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar); void dmar_quirks_post_ident(struct dmar_unit *dmar); void dmar_quirks_pre_use(struct dmar_unit *dmar); int dmar_init_irt(struct dmar_unit *unit); void dmar_fini_irt(struct dmar_unit *unit); void dmar_set_buswide_ctx(struct dmar_unit *unit, u_int busno); bool dmar_is_buswide_ctx(struct dmar_unit *unit, u_int busno); #define DMAR_GM_CANWAIT 0x0001 #define DMAR_GM_CANSPLIT 0x0002 #define DMAR_GM_RMRR 0x0004 #define DMAR_PGF_WAITOK 0x0001 #define DMAR_PGF_ZERO 0x0002 #define DMAR_PGF_ALLOC 0x0004 #define DMAR_PGF_NOALLOC 0x0008 #define DMAR_PGF_OBJL 0x0010 extern dmar_haddr_t dmar_high; extern int haw; extern int dmar_tbl_pagecnt; extern int dmar_batch_coalesce; extern int dmar_check_free; static inline uint32_t dmar_read4(const struct dmar_unit *unit, int reg) { return (bus_read_4(unit->regs, reg)); } static inline uint64_t dmar_read8(const struct dmar_unit *unit, int reg) { #ifdef __i386__ uint32_t high, low; low = bus_read_4(unit->regs, reg); high = bus_read_4(unit->regs, reg + 4); return (low | ((uint64_t)high << 32)); #else return (bus_read_8(unit->regs, reg)); #endif } static inline void dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val) { KASSERT(reg != DMAR_GCMD_REG || (val & DMAR_GCMD_TE) == (unit->hw_gcmd & DMAR_GCMD_TE), ("dmar%d clearing TE 0x%08x 0x%08x", unit->unit, unit->hw_gcmd, val)); bus_write_4(unit->regs, reg, val); } static inline void dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val) { KASSERT(reg != DMAR_GCMD_REG, ("8byte GCMD write")); #ifdef __i386__ uint32_t high, low; low = val; high = val >> 32; bus_write_4(unit->regs, reg, low); bus_write_4(unit->regs, reg + 4, high); #else bus_write_8(unit->regs, reg, val); #endif } /* * dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes * are issued in the correct order. For store, the lower word, * containing the P or R and W bits, is set only after the high word * is written. For clear, the P bit is cleared first, then the high * word is cleared. * * dmar_pte_update updates the pte. For amd64, the update is atomic. * For i386, it first disables the entry by clearing the word * containing the P bit, and then defer to dmar_pte_store. The locked * cmpxchg8b is probably available on any machine having DMAR support, * but interrupt translation table may be mapped uncached. */ static inline void dmar_pte_store1(volatile uint64_t *dst, uint64_t val) { #ifdef __i386__ volatile uint32_t *p; uint32_t hi, lo; hi = val >> 32; lo = val; p = (volatile uint32_t *)dst; *(p + 1) = hi; *p = lo; #else *dst = val; #endif } static inline void dmar_pte_store(volatile uint64_t *dst, uint64_t val) { KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx", dst, (uintmax_t)*dst, (uintmax_t)val)); dmar_pte_store1(dst, val); } static inline void dmar_pte_update(volatile uint64_t *dst, uint64_t val) { #ifdef __i386__ volatile uint32_t *p; p = (volatile uint32_t *)dst; *p = 0; #endif dmar_pte_store1(dst, val); } static inline void dmar_pte_clear(volatile uint64_t *dst) { #ifdef __i386__ volatile uint32_t *p; p = (volatile uint32_t *)dst; *p = 0; *(p + 1) = 0; #else *dst = 0; #endif } static inline bool dmar_test_boundary(dmar_gaddr_t start, dmar_gaddr_t size, dmar_gaddr_t boundary) { if (boundary == 0) return (true); return (start + size <= ((start + boundary) & ~(boundary - 1))); } extern struct timespec dmar_hw_timeout; #define DMAR_WAIT_UNTIL(cond) \ { \ struct timespec last, curr; \ bool forever; \ \ if (dmar_hw_timeout.tv_sec == 0 && \ dmar_hw_timeout.tv_nsec == 0) { \ forever = true; \ } else { \ forever = false; \ nanouptime(&curr); \ timespecadd(&curr, &dmar_hw_timeout, &last); \ } \ for (;;) { \ if (cond) { \ error = 0; \ break; \ } \ nanouptime(&curr); \ if (!forever && timespeccmp(&last, &curr, <)) { \ error = ETIMEDOUT; \ break; \ } \ cpu_spinwait(); \ } \ } #ifdef INVARIANTS #define TD_PREP_PINNED_ASSERT \ int old_td_pinned; \ old_td_pinned = curthread->td_pinned #define TD_PINNED_ASSERT \ KASSERT(curthread->td_pinned == old_td_pinned, \ ("pin count leak: %d %d %s:%d", curthread->td_pinned, \ old_td_pinned, __FILE__, __LINE__)) #else #define TD_PREP_PINNED_ASSERT #define TD_PINNED_ASSERT #endif #endif Index: projects/clang1000-import/sys/x86/iommu/intel_drv.c =================================================================== --- projects/clang1000-import/sys/x86/iommu/intel_drv.c (revision 357178) +++ projects/clang1000-import/sys/x86/iommu/intel_drv.c (revision 357179) @@ -1,1344 +1,1344 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013-2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #if defined(__amd64__) #define DEV_APIC #else #include "opt_apic.h" #endif #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include "pcib_if.h" #include #include #include #endif #define DMAR_FAULT_IRQ_RID 0 #define DMAR_QI_IRQ_RID 1 #define DMAR_REG_RID 2 static devclass_t dmar_devclass; static device_t *dmar_devs; static int dmar_devcnt; typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *); static void dmar_iterate_tbl(dmar_iter_t iter, void *arg) { ACPI_TABLE_DMAR *dmartbl; ACPI_DMAR_HEADER *dmarh; char *ptr, *ptrend; ACPI_STATUS status; status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl); if (ACPI_FAILURE(status)) return; ptr = (char *)dmartbl + sizeof(*dmartbl); ptrend = (char *)dmartbl + dmartbl->Header.Length; for (;;) { if (ptr >= ptrend) break; dmarh = (ACPI_DMAR_HEADER *)ptr; if (dmarh->Length <= 0) { printf("dmar_identify: corrupted DMAR table, l %d\n", dmarh->Length); break; } ptr += dmarh->Length; if (!iter(dmarh, arg)) break; } AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl); } struct find_iter_args { int i; ACPI_DMAR_HARDWARE_UNIT *res; }; static int dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { struct find_iter_args *fia; if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) return (1); fia = arg; if (fia->i == 0) { fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh; return (0); } fia->i--; return (1); } static ACPI_DMAR_HARDWARE_UNIT * dmar_find_by_index(int idx) { struct find_iter_args fia; fia.i = idx; fia.res = NULL; dmar_iterate_tbl(dmar_find_iter, &fia); return (fia.res); } static int dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT) dmar_devcnt++; return (1); } static int dmar_enable = 0; static void dmar_identify(driver_t *driver, device_t parent) { ACPI_TABLE_DMAR *dmartbl; ACPI_DMAR_HARDWARE_UNIT *dmarh; ACPI_STATUS status; int i, error; if (acpi_disabled("dmar")) return; TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable); if (!dmar_enable) return; #ifdef INVARIANTS TUNABLE_INT_FETCH("hw.dmar.check_free", &dmar_check_free); #endif status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl); if (ACPI_FAILURE(status)) return; haw = dmartbl->Width + 1; if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR) dmar_high = BUS_SPACE_MAXADDR; else dmar_high = 1ULL << (haw + 1); if (bootverbose) { printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width, (unsigned)dmartbl->Flags, "\020\001INTR_REMAP\002X2APIC_OPT_OUT"); } AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl); dmar_iterate_tbl(dmar_count_iter, NULL); if (dmar_devcnt == 0) return; dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < dmar_devcnt; i++) { dmarh = dmar_find_by_index(i); if (dmarh == NULL) { printf("dmar_identify: cannot find HWUNIT %d\n", i); continue; } dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i); if (dmar_devs[i] == NULL) { printf("dmar_identify: cannot create instance %d\n", i); continue; } error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY, DMAR_REG_RID, dmarh->Address, PAGE_SIZE); if (error != 0) { printf( "dmar%d: unable to alloc register window at 0x%08jx: error %d\n", i, (uintmax_t)dmarh->Address, error); device_delete_child(parent, dmar_devs[i]); dmar_devs[i] = NULL; } } } static int dmar_probe(device_t dev) { if (acpi_get_handle(dev) != NULL) return (ENXIO); device_set_desc(dev, "DMA remap"); return (BUS_PROBE_NOWILDCARD); } static void dmar_release_intr(device_t dev, struct dmar_unit *unit, int idx) { struct dmar_msi_data *dmd; dmd = &unit->intrs[idx]; if (dmd->irq == -1) return; bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)), dev, dmd->irq); dmd->irq = -1; } static void dmar_release_resources(device_t dev, struct dmar_unit *unit) { int i; dmar_fini_busdma(unit); dmar_fini_irt(unit); dmar_fini_qi(unit); dmar_fini_fault_log(unit); for (i = 0; i < DMAR_INTR_TOTAL; i++) dmar_release_intr(dev, unit, i); if (unit->regs != NULL) { bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid, unit->regs); bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid, unit->regs); unit->regs = NULL; } if (unit->domids != NULL) { delete_unrhdr(unit->domids); unit->domids = NULL; } if (unit->ctx_obj != NULL) { vm_object_deallocate(unit->ctx_obj); unit->ctx_obj = NULL; } } static int dmar_alloc_irq(device_t dev, struct dmar_unit *unit, int idx) { device_t pcib; struct dmar_msi_data *dmd; uint64_t msi_addr; uint32_t msi_data; int error; dmd = &unit->intrs[idx]; pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */ error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq); if (error != 0) { device_printf(dev, "cannot allocate %s interrupt, %d\n", dmd->name, error); goto err1; } error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq, 1); if (error != 0) { device_printf(dev, "cannot set %s interrupt resource, %d\n", dmd->name, error); goto err2; } dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &dmd->irq_rid, RF_ACTIVE); if (dmd->irq_res == NULL) { device_printf(dev, "cannot allocate resource for %s interrupt\n", dmd->name); error = ENXIO; goto err3; } error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC, dmd->handler, NULL, unit, &dmd->intr_handle); if (error != 0) { device_printf(dev, "cannot setup %s interrupt, %d\n", dmd->name, error); goto err4; } bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name); error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data); if (error != 0) { device_printf(dev, "cannot map %s interrupt, %d\n", dmd->name, error); goto err5; } dmar_write4(unit, dmd->msi_data_reg, msi_data); dmar_write4(unit, dmd->msi_addr_reg, msi_addr); /* Only for xAPIC mode */ dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32); return (0); err5: bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle); err4: bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res); err3: bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid); err2: PCIB_RELEASE_MSIX(pcib, dev, dmd->irq); dmd->irq = -1; err1: return (error); } #ifdef DEV_APIC static int dmar_remap_intr(device_t dev, device_t child, u_int irq) { struct dmar_unit *unit; struct dmar_msi_data *dmd; uint64_t msi_addr; uint32_t msi_data; int i, error; unit = device_get_softc(dev); for (i = 0; i < DMAR_INTR_TOTAL; i++) { dmd = &unit->intrs[i]; if (irq == dmd->irq) { error = PCIB_MAP_MSI(device_get_parent( device_get_parent(dev)), dev, irq, &msi_addr, &msi_data); if (error != 0) return (error); DMAR_LOCK(unit); (dmd->disable_intr)(unit); dmar_write4(unit, dmd->msi_data_reg, msi_data); dmar_write4(unit, dmd->msi_addr_reg, msi_addr); dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32); (dmd->enable_intr)(unit); DMAR_UNLOCK(unit); return (0); } } return (ENOENT); } #endif static void dmar_print_caps(device_t dev, struct dmar_unit *unit, ACPI_DMAR_HARDWARE_UNIT *dmaru) { uint32_t caphi, ecaphi; device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n", (uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver), DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment, dmaru->Flags, "\020\001INCLUDE_ALL_PCI"); caphi = unit->hw_cap >> 32; device_printf(dev, "cap=%b,", (u_int)unit->hw_cap, "\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH"); printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD\031FL1GP\034PSI"); printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d", DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap), DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap), DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap)); if ((unit->hw_cap & DMAR_CAP_PSI) != 0) printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap)); printf("\n"); ecaphi = unit->hw_ecap >> 32; device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap, "\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC\031ECS\032MTS" "\033NEST\034DIS\035PASID\036PRS\037ERS\040SRS"); printf("%b, ", ecaphi, "\020\002NWFS\003EAFS"); printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap), DMAR_ECAP_IRO(unit->hw_ecap)); } static int dmar_attach(device_t dev) { struct dmar_unit *unit; ACPI_DMAR_HARDWARE_UNIT *dmaru; uint64_t timeout; int i, error; unit = device_get_softc(dev); unit->dev = dev; unit->unit = device_get_unit(dev); dmaru = dmar_find_by_index(unit->unit); if (dmaru == NULL) return (EINVAL); unit->segment = dmaru->Segment; unit->base = dmaru->Address; unit->reg_rid = DMAR_REG_RID; unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &unit->reg_rid, RF_ACTIVE); if (unit->regs == NULL) { device_printf(dev, "cannot allocate register window\n"); return (ENOMEM); } unit->hw_ver = dmar_read4(unit, DMAR_VER_REG); unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG); unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG); if (bootverbose) dmar_print_caps(dev, unit, dmaru); dmar_quirks_post_ident(unit); timeout = dmar_get_timeout(); TUNABLE_UINT64_FETCH("hw.dmar.timeout", &timeout); dmar_update_timeout(timeout); for (i = 0; i < DMAR_INTR_TOTAL; i++) unit->intrs[i].irq = -1; unit->intrs[DMAR_INTR_FAULT].name = "fault"; unit->intrs[DMAR_INTR_FAULT].irq_rid = DMAR_FAULT_IRQ_RID; unit->intrs[DMAR_INTR_FAULT].handler = dmar_fault_intr; unit->intrs[DMAR_INTR_FAULT].msi_data_reg = DMAR_FEDATA_REG; unit->intrs[DMAR_INTR_FAULT].msi_addr_reg = DMAR_FEADDR_REG; unit->intrs[DMAR_INTR_FAULT].msi_uaddr_reg = DMAR_FEUADDR_REG; unit->intrs[DMAR_INTR_FAULT].enable_intr = dmar_enable_fault_intr; unit->intrs[DMAR_INTR_FAULT].disable_intr = dmar_disable_fault_intr; error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT); if (error != 0) { dmar_release_resources(dev, unit); return (error); } if (DMAR_HAS_QI(unit)) { unit->intrs[DMAR_INTR_QI].name = "qi"; unit->intrs[DMAR_INTR_QI].irq_rid = DMAR_QI_IRQ_RID; unit->intrs[DMAR_INTR_QI].handler = dmar_qi_intr; unit->intrs[DMAR_INTR_QI].msi_data_reg = DMAR_IEDATA_REG; unit->intrs[DMAR_INTR_QI].msi_addr_reg = DMAR_IEADDR_REG; unit->intrs[DMAR_INTR_QI].msi_uaddr_reg = DMAR_IEUADDR_REG; unit->intrs[DMAR_INTR_QI].enable_intr = dmar_enable_qi_intr; unit->intrs[DMAR_INTR_QI].disable_intr = dmar_disable_qi_intr; error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI); if (error != 0) { dmar_release_resources(dev, unit); return (error); } } mtx_init(&unit->lock, "dmarhw", NULL, MTX_DEF); unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)), &unit->lock); LIST_INIT(&unit->domains); /* * 9.2 "Context Entry": * When Caching Mode (CM) field is reported as Set, the * domain-id value of zero is architecturally reserved. * Software must not use domain-id value of zero * when CM is Set. */ if ((unit->hw_cap & DMAR_CAP_CM) != 0) alloc_unr_specific(unit->domids, 0); unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 + DMAR_CTX_CNT), 0, 0, NULL); /* * Allocate and load the root entry table pointer. Enable the * address translation after the required invalidations are * done. */ dmar_pgalloc(unit->ctx_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO); DMAR_LOCK(unit); error = dmar_load_root_entry_ptr(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } error = dmar_inv_ctx_glob(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) { error = dmar_inv_iotlb_glob(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } } DMAR_UNLOCK(unit); error = dmar_init_fault_log(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } error = dmar_init_qi(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } error = dmar_init_irt(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } error = dmar_init_busdma(unit); if (error != 0) { dmar_release_resources(dev, unit); return (error); } #ifdef NOTYET DMAR_LOCK(unit); error = dmar_enable_translation(unit); if (error != 0) { DMAR_UNLOCK(unit); dmar_release_resources(dev, unit); return (error); } DMAR_UNLOCK(unit); #endif return (0); } static int dmar_detach(device_t dev) { return (EBUSY); } static int dmar_suspend(device_t dev) { return (0); } static int dmar_resume(device_t dev) { /* XXXKIB */ return (0); } static device_method_t dmar_methods[] = { DEVMETHOD(device_identify, dmar_identify), DEVMETHOD(device_probe, dmar_probe), DEVMETHOD(device_attach, dmar_attach), DEVMETHOD(device_detach, dmar_detach), DEVMETHOD(device_suspend, dmar_suspend), DEVMETHOD(device_resume, dmar_resume), #ifdef DEV_APIC DEVMETHOD(bus_remap_intr, dmar_remap_intr), #endif DEVMETHOD_END }; static driver_t dmar_driver = { "dmar", dmar_methods, sizeof(struct dmar_unit), }; DRIVER_MODULE(dmar, acpi, dmar_driver, dmar_devclass, 0, 0); MODULE_DEPEND(dmar, acpi, 1, 1, 1); void dmar_set_buswide_ctx(struct dmar_unit *unit, u_int busno) { MPASS(busno <= PCI_BUSMAX); DMAR_LOCK(unit); unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 1 << (busno % (NBBY * sizeof(uint32_t))); DMAR_UNLOCK(unit); } bool dmar_is_buswide_ctx(struct dmar_unit *unit, u_int busno) { MPASS(busno <= PCI_BUSMAX); return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); } static void dmar_print_path(int busno, int depth, const ACPI_DMAR_PCI_PATH *path) { int i; printf("[%d, ", busno); for (i = 0; i < depth; i++) { if (i != 0) printf(", "); printf("(%d, %d)", path[i].Device, path[i].Function); } printf("]"); } int dmar_dev_depth(device_t child) { devclass_t pci_class; device_t bus, pcib; int depth; pci_class = devclass_find("pci"); for (depth = 1; ; depth++) { bus = device_get_parent(child); pcib = device_get_parent(bus); if (device_get_devclass(device_get_parent(pcib)) != pci_class) return (depth); child = pcib; } } void dmar_dev_path(device_t child, int *busno, void *path1, int depth) { devclass_t pci_class; device_t bus, pcib; ACPI_DMAR_PCI_PATH *path; pci_class = devclass_find("pci"); path = path1; for (depth--; depth != -1; depth--) { path[depth].Device = pci_get_slot(child); path[depth].Function = pci_get_function(child); bus = device_get_parent(child); pcib = device_get_parent(bus); if (device_get_devclass(device_get_parent(pcib)) != pci_class) { /* reached a host bridge */ *busno = pcib_get_bus(bus); return; } child = pcib; } panic("wrong depth"); } static int dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1, int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2, enum AcpiDmarScopeType scope_type) { int i, depth; if (busno1 != busno2) return (0); if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2) return (0); depth = depth1; if (depth2 < depth) depth = depth2; for (i = 0; i < depth; i++) { if (path1[i].Device != path2[i].Device || path1[i].Function != path2[i].Function) return (0); } return (1); } static int dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len) { ACPI_DMAR_PCI_PATH *path; int path_len; if (devscope->Length < sizeof(*devscope)) { printf("dmar_match_devscope: corrupted DMAR table, dl %d\n", devscope->Length); return (-1); } if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE) return (0); path_len = devscope->Length - sizeof(*devscope); if (path_len % 2 != 0) { printf("dmar_match_devscope: corrupted DMAR table, dl %d\n", devscope->Length); return (-1); } path_len /= 2; path = (ACPI_DMAR_PCI_PATH *)(devscope + 1); if (path_len == 0) { printf("dmar_match_devscope: corrupted DMAR table, dl %d\n", devscope->Length); return (-1); } return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno, dev_path, dev_path_len, devscope->EntryType)); } static bool dmar_match_by_path(struct dmar_unit *unit, int dev_domain, int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len, const char **banner) { ACPI_DMAR_HARDWARE_UNIT *dmarh; ACPI_DMAR_DEVICE_SCOPE *devscope; char *ptr, *ptrend; int match; dmarh = dmar_find_by_index(unit->unit); if (dmarh == NULL) return (false); if (dmarh->Segment != dev_domain) return (false); if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) { if (banner != NULL) *banner = "INCLUDE_ALL"; return (true); } ptr = (char *)dmarh + sizeof(*dmarh); ptrend = (char *)dmarh + dmarh->Header.Length; while (ptr < ptrend) { devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; match = dmar_match_devscope(devscope, dev_busno, dev_path, dev_path_len); if (match == -1) return (false); if (match == 1) { if (banner != NULL) *banner = "specific match"; return (true); } } return (false); } static struct dmar_unit * dmar_find_by_scope(int dev_domain, int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len) { struct dmar_unit *unit; int i; for (i = 0; i < dmar_devcnt; i++) { if (dmar_devs[i] == NULL) continue; unit = device_get_softc(dmar_devs[i]); if (dmar_match_by_path(unit, dev_domain, dev_busno, dev_path, dev_path_len, NULL)) return (unit); } return (NULL); } struct dmar_unit * dmar_find(device_t dev, bool verbose) { device_t dmar_dev; struct dmar_unit *unit; const char *banner; int i, dev_domain, dev_busno, dev_path_len; /* * This function can only handle PCI(e) devices. */ if (device_get_devclass(device_get_parent(dev)) != devclass_find("pci")) return (NULL); dmar_dev = NULL; dev_domain = pci_get_domain(dev); dev_path_len = dmar_dev_depth(dev); ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); banner = ""; for (i = 0; i < dmar_devcnt; i++) { if (dmar_devs[i] == NULL) continue; unit = device_get_softc(dmar_devs[i]); if (dmar_match_by_path(unit, dev_domain, dev_busno, dev_path, dev_path_len, &banner)) break; } if (i == dmar_devcnt) return (NULL); if (verbose) { device_printf(dev, "pci%d:%d:%d:%d matched dmar%d by %s", dev_domain, pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), unit->unit, banner); printf(" scope path "); dmar_print_path(dev_busno, dev_path_len, dev_path); printf("\n"); } return (unit); } static struct dmar_unit * dmar_find_nonpci(u_int id, u_int entry_type, uint16_t *rid) { device_t dmar_dev; struct dmar_unit *unit; ACPI_DMAR_HARDWARE_UNIT *dmarh; ACPI_DMAR_DEVICE_SCOPE *devscope; ACPI_DMAR_PCI_PATH *path; char *ptr, *ptrend; #ifdef DEV_APIC int error; #endif int i; for (i = 0; i < dmar_devcnt; i++) { dmar_dev = dmar_devs[i]; if (dmar_dev == NULL) continue; unit = (struct dmar_unit *)device_get_softc(dmar_dev); dmarh = dmar_find_by_index(i); if (dmarh == NULL) continue; ptr = (char *)dmarh + sizeof(*dmarh); ptrend = (char *)dmarh + dmarh->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; if (devscope->EntryType != entry_type) continue; if (devscope->EnumerationId != id) continue; #ifdef DEV_APIC if (entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) { error = ioapic_get_rid(id, rid); /* * If our IOAPIC has PCI bindings then * use the PCI device rid. */ if (error == 0) return (unit); } #endif if (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE) == 2) { if (rid != NULL) { path = (ACPI_DMAR_PCI_PATH *) (devscope + 1); *rid = PCI_RID(devscope->Bus, path->Device, path->Function); } return (unit); } printf( "dmar_find_nonpci: id %d type %d path length != 2\n", id, entry_type); break; } } return (NULL); } struct dmar_unit * dmar_find_hpet(device_t dev, uint16_t *rid) { return (dmar_find_nonpci(hpet_get_uid(dev), ACPI_DMAR_SCOPE_TYPE_HPET, rid)); } struct dmar_unit * dmar_find_ioapic(u_int apic_id, uint16_t *rid) { return (dmar_find_nonpci(apic_id, ACPI_DMAR_SCOPE_TYPE_IOAPIC, rid)); } struct rmrr_iter_args { struct dmar_domain *domain; int dev_domain; int dev_busno; const ACPI_DMAR_PCI_PATH *dev_path; int dev_path_len; struct dmar_map_entries_tailq *rmrr_entries; }; static int dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { struct rmrr_iter_args *ria; ACPI_DMAR_RESERVED_MEMORY *resmem; ACPI_DMAR_DEVICE_SCOPE *devscope; struct dmar_map_entry *entry; char *ptr, *ptrend; int match; if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY) return (1); ria = arg; resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh; if (resmem->Segment != ria->dev_domain) return (1); ptr = (char *)resmem + sizeof(*resmem); ptrend = (char *)resmem + resmem->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; match = dmar_match_devscope(devscope, ria->dev_busno, ria->dev_path, ria->dev_path_len); if (match == 1) { entry = dmar_gas_alloc_entry(ria->domain, DMAR_PGF_WAITOK); entry->start = resmem->BaseAddress; /* The RMRR entry end address is inclusive. */ entry->end = resmem->EndAddress; TAILQ_INSERT_TAIL(ria->rmrr_entries, entry, unroll_link); } } return (1); } void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, struct dmar_map_entries_tailq *rmrr_entries) { struct rmrr_iter_args ria; ria.domain = domain; ria.dev_domain = dev_domain; ria.dev_busno = dev_busno; ria.dev_path = (const ACPI_DMAR_PCI_PATH *)dev_path; ria.dev_path_len = dev_path_len; ria.rmrr_entries = rmrr_entries; dmar_iterate_tbl(dmar_rmrr_iter, &ria); } struct inst_rmrr_iter_args { struct dmar_unit *dmar; }; static device_t dmar_path_dev(int segment, int path_len, int busno, const ACPI_DMAR_PCI_PATH *path, uint16_t *rid) { device_t dev; int i; dev = NULL; for (i = 0; i < path_len; i++) { dev = pci_find_dbsf(segment, busno, path->Device, path->Function); if (i != path_len - 1) { busno = pci_cfgregread(busno, path->Device, path->Function, PCIR_SECBUS_1, 1); path++; } } *rid = PCI_RID(busno, path->Device, path->Function); return (dev); } static int dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg) { const ACPI_DMAR_RESERVED_MEMORY *resmem; const ACPI_DMAR_DEVICE_SCOPE *devscope; struct inst_rmrr_iter_args *iria; const char *ptr, *ptrend; device_t dev; struct dmar_unit *unit; int dev_path_len; uint16_t rid; iria = arg; if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY) return (1); resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh; if (resmem->Segment != iria->dmar->segment) return (1); ptr = (const char *)resmem + sizeof(*resmem); ptrend = (const char *)resmem + resmem->Header.Length; for (;;) { if (ptr >= ptrend) break; devscope = (const ACPI_DMAR_DEVICE_SCOPE *)ptr; ptr += devscope->Length; /* XXXKIB bridge */ if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT) continue; rid = 0; dev_path_len = (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2; dev = dmar_path_dev(resmem->Segment, dev_path_len, devscope->Bus, (const ACPI_DMAR_PCI_PATH *)(devscope + 1), &rid); if (dev == NULL) { if (bootverbose) { printf("dmar%d no dev found for RMRR " "[%#jx, %#jx] rid %#x scope path ", iria->dmar->unit, (uintmax_t)resmem->BaseAddress, (uintmax_t)resmem->EndAddress, rid); dmar_print_path(devscope->Bus, dev_path_len, (const ACPI_DMAR_PCI_PATH *)(devscope + 1)); printf("\n"); } unit = dmar_find_by_scope(resmem->Segment, devscope->Bus, (const ACPI_DMAR_PCI_PATH *)(devscope + 1), dev_path_len); if (iria->dmar != unit) continue; dmar_get_ctx_for_devpath(iria->dmar, rid, resmem->Segment, devscope->Bus, (const ACPI_DMAR_PCI_PATH *)(devscope + 1), dev_path_len, false, true); } else { unit = dmar_find(dev, false); if (iria->dmar != unit) continue; dmar_instantiate_ctx(iria->dmar, dev, true); } } return (1); } /* * Pre-create all contexts for the DMAR which have RMRR entries. */ int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar) { struct inst_rmrr_iter_args iria; int error; if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR)) return (0); error = 0; iria.dmar = dmar; dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria); DMAR_LOCK(dmar); if (!LIST_EMPTY(&dmar->domains)) { KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0, ("dmar%d: RMRR not handled but translation is already enabled", dmar->unit)); error = dmar_enable_translation(dmar); if (bootverbose) { if (error == 0) { printf("dmar%d: enabled translation\n", dmar->unit); } else { printf("dmar%d: enabling translation failed, " "error %d\n", dmar->unit, error); } } } dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR); return (error); } #ifdef DDB #include #include static void dmar_print_domain_entry(const struct dmar_map_entry *entry) { struct dmar_map_entry *l, *r; db_printf( - " start %jx end %jx free_after %jx free_down %jx flags %x ", - entry->start, entry->end, entry->free_after, entry->free_down, - entry->flags); + " start %jx end %jx first %jx last %jx free_down %jx flags %x ", + entry->start, entry->end, entry->first, entry->last, + entry->free_down, entry->flags); db_printf("left "); l = RB_LEFT(entry, rb_entry); if (l == NULL) db_printf("NULL "); else db_printf("%jx ", l->start); db_printf("right "); r = RB_RIGHT(entry, rb_entry); if (r == NULL) db_printf("NULL"); else db_printf("%jx", r->start); db_printf("\n"); } static void dmar_print_ctx(struct dmar_ctx *ctx) { db_printf( " @%p pci%d:%d:%d refs %d flags %x loads %lu unloads %lu\n", ctx, pci_get_bus(ctx->ctx_tag.owner), pci_get_slot(ctx->ctx_tag.owner), pci_get_function(ctx->ctx_tag.owner), ctx->refs, ctx->flags, ctx->loads, ctx->unloads); } static void dmar_print_domain(struct dmar_domain *domain, bool show_mappings) { struct dmar_map_entry *entry; struct dmar_ctx *ctx; db_printf( " @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n" " ctx_cnt %d flags %x pgobj %p map_ents %u\n", domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl, (uintmax_t)domain->end, domain->refs, domain->ctx_cnt, domain->flags, domain->pgtbl_obj, domain->entries_cnt); if (!LIST_EMPTY(&domain->contexts)) { db_printf(" Contexts:\n"); LIST_FOREACH(ctx, &domain->contexts, link) dmar_print_ctx(ctx); } if (!show_mappings) return; db_printf(" mapped:\n"); RB_FOREACH(entry, dmar_gas_entries_tree, &domain->rb_root) { dmar_print_domain_entry(entry); if (db_pager_quit) break; } if (db_pager_quit) return; db_printf(" unloading:\n"); TAILQ_FOREACH(entry, &domain->unload_entries, dmamap_link) { dmar_print_domain_entry(entry); if (db_pager_quit) break; } } DB_FUNC(dmar_domain, db_dmar_print_domain, db_show_table, CS_OWN, NULL) { struct dmar_unit *unit; struct dmar_domain *domain; struct dmar_ctx *ctx; bool show_mappings, valid; int pci_domain, bus, device, function, i, t; db_expr_t radix; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tSLASH) { t = db_read_token(); if (t != tIDENT) { db_printf("Bad modifier\n"); db_radix = radix; db_skip_to_eol(); return; } show_mappings = strchr(db_tok_string, 'm') != NULL; t = db_read_token(); } else { show_mappings = false; } if (t == tNUMBER) { pci_domain = db_tok_number; t = db_read_token(); if (t == tNUMBER) { bus = db_tok_number; t = db_read_token(); if (t == tNUMBER) { device = db_tok_number; t = db_read_token(); if (t == tNUMBER) { function = db_tok_number; valid = true; } } } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show dmar_domain [/m] " " \n"); return; } for (i = 0; i < dmar_devcnt; i++) { unit = device_get_softc(dmar_devs[i]); LIST_FOREACH(domain, &unit->domains, link) { LIST_FOREACH(ctx, &domain->contexts, link) { if (pci_domain == unit->segment && bus == pci_get_bus(ctx->ctx_tag.owner) && device == pci_get_slot(ctx->ctx_tag.owner) && function == pci_get_function(ctx->ctx_tag.owner)) { dmar_print_domain(domain, show_mappings); goto out; } } } } out:; } static void dmar_print_one(int idx, bool show_domains, bool show_mappings) { struct dmar_unit *unit; struct dmar_domain *domain; int i, frir; unit = device_get_softc(dmar_devs[idx]); db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->unit, unit, dmar_read8(unit, DMAR_RTADDR_REG), dmar_read4(unit, DMAR_VER_REG)); db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n", (uintmax_t)dmar_read8(unit, DMAR_CAP_REG), (uintmax_t)dmar_read8(unit, DMAR_ECAP_REG), dmar_read4(unit, DMAR_GSTS_REG), dmar_read4(unit, DMAR_FSTS_REG), dmar_read4(unit, DMAR_FECTL_REG)); if (unit->ir_enabled) { db_printf("ir is enabled; IRT @%p phys 0x%jx maxcnt %d\n", unit->irt, (uintmax_t)unit->irt_phys, unit->irte_cnt); } db_printf("fed 0x%x fea 0x%x feua 0x%x\n", dmar_read4(unit, DMAR_FEDATA_REG), dmar_read4(unit, DMAR_FEADDR_REG), dmar_read4(unit, DMAR_FEUADDR_REG)); db_printf("primary fault log:\n"); for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) { frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16; db_printf(" %d at 0x%x: %jx %jx\n", i, frir, (uintmax_t)dmar_read8(unit, frir), (uintmax_t)dmar_read8(unit, frir + 8)); } if (DMAR_HAS_QI(unit)) { db_printf("ied 0x%x iea 0x%x ieua 0x%x\n", dmar_read4(unit, DMAR_IEDATA_REG), dmar_read4(unit, DMAR_IEADDR_REG), dmar_read4(unit, DMAR_IEUADDR_REG)); if (unit->qi_enabled) { db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) " "size 0x%jx\n" " head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n" " hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n", (uintmax_t)unit->inv_queue, (uintmax_t)dmar_read8(unit, DMAR_IQA_REG), (uintmax_t)unit->inv_queue_size, dmar_read4(unit, DMAR_IQH_REG), dmar_read4(unit, DMAR_IQT_REG), unit->inv_queue_avail, dmar_read4(unit, DMAR_ICS_REG), dmar_read4(unit, DMAR_IECTL_REG), unit->inv_waitd_seq_hw, &unit->inv_waitd_seq_hw, (uintmax_t)unit->inv_waitd_seq_hw_phys, unit->inv_waitd_seq, unit->inv_waitd_gen); } else { db_printf("qi is disabled\n"); } } if (show_domains) { db_printf("domains:\n"); LIST_FOREACH(domain, &unit->domains, link) { dmar_print_domain(domain, show_mappings); if (db_pager_quit) break; } } } DB_SHOW_COMMAND(dmar, db_dmar_print) { bool show_domains, show_mappings; show_domains = strchr(modif, 'd') != NULL; show_mappings = strchr(modif, 'm') != NULL; if (!have_addr) { db_printf("usage: show dmar [/d] [/m] index\n"); return; } dmar_print_one((int)addr, show_domains, show_mappings); } DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars) { int i; bool show_domains, show_mappings; show_domains = strchr(modif, 'd') != NULL; show_mappings = strchr(modif, 'm') != NULL; for (i = 0; i < dmar_devcnt; i++) { dmar_print_one(i, show_domains, show_mappings); if (db_pager_quit) break; } } #endif Index: projects/clang1000-import/sys/x86/iommu/intel_gas.c =================================================================== --- projects/clang1000-import/sys/x86/iommu/intel_gas.c (revision 357178) +++ projects/clang1000-import/sys/x86/iommu/intel_gas.c (revision 357179) @@ -1,745 +1,684 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define RB_AUGMENT(entry) dmar_gas_augment_entry(entry) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Guest Address Space management. */ static uma_zone_t dmar_map_entry_zone; static void intel_gas_init(void) { dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY", sizeof(struct dmar_map_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); } SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); struct dmar_map_entry * dmar_gas_alloc_entry(struct dmar_domain *domain, u_int flags) { struct dmar_map_entry *res; KASSERT((flags & ~(DMAR_PGF_WAITOK)) == 0, ("unsupported flags %x", flags)); res = uma_zalloc(dmar_map_entry_zone, ((flags & DMAR_PGF_WAITOK) != 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); if (res != NULL) { res->domain = domain; atomic_add_int(&domain->entries_cnt, 1); } return (res); } void dmar_gas_free_entry(struct dmar_domain *domain, struct dmar_map_entry *entry) { KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); atomic_subtract_int(&domain->entries_cnt, 1); uma_zfree(dmar_map_entry_zone, entry); } static int dmar_gas_cmp_entries(struct dmar_map_entry *a, struct dmar_map_entry *b) { /* Last entry have zero size, so <= */ KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", a, (uintmax_t)a->start, (uintmax_t)a->end)); KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", b, (uintmax_t)b->start, (uintmax_t)b->end)); KASSERT(a->end <= b->start || b->end <= a->start || a->end == a->start || b->end == b->start, ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", a, (uintmax_t)a->start, (uintmax_t)a->end, b, (uintmax_t)b->start, (uintmax_t)b->end)); if (a->end < b->end) return (-1); else if (b->end < a->end) return (1); return (0); } static void dmar_gas_augment_entry(struct dmar_map_entry *entry) { - struct dmar_map_entry *l, *r; + struct dmar_map_entry *child; + dmar_gaddr_t free_down; - for (; entry != NULL; entry = RB_PARENT(entry, rb_entry)) { - l = RB_LEFT(entry, rb_entry); - r = RB_RIGHT(entry, rb_entry); - if (l == NULL && r == NULL) { - entry->free_down = entry->free_after; - } else if (l == NULL && r != NULL) { - entry->free_down = MAX(entry->free_after, r->free_down); - } else if (/*l != NULL && */ r == NULL) { - entry->free_down = MAX(entry->free_after, l->free_down); - } else /* if (l != NULL && r != NULL) */ { - entry->free_down = MAX(entry->free_after, l->free_down); - entry->free_down = MAX(entry->free_down, r->free_down); - } - } + free_down = 0; + if ((child = RB_LEFT(entry, rb_entry)) != NULL) { + free_down = MAX(free_down, child->free_down); + free_down = MAX(free_down, entry->start - child->last); + entry->first = child->first; + } else + entry->first = entry->start; + + if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { + free_down = MAX(free_down, child->free_down); + free_down = MAX(free_down, child->first - entry->end); + entry->last = child->last; + } else + entry->last = entry->end; + entry->free_down = free_down; } RB_GENERATE(dmar_gas_entries_tree, dmar_map_entry, rb_entry, dmar_gas_cmp_entries); -static void -dmar_gas_fix_free(struct dmar_domain *domain, struct dmar_map_entry *entry) -{ - struct dmar_map_entry *next; - - next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); - entry->free_after = (next != NULL ? next->start : domain->end) - - entry->end; - dmar_gas_augment_entry(entry); -} - #ifdef INVARIANTS static void dmar_gas_check_free(struct dmar_domain *domain) { - struct dmar_map_entry *entry, *next, *l, *r; + struct dmar_map_entry *entry, *l, *r; dmar_gaddr_t v; RB_FOREACH(entry, dmar_gas_entries_tree, &domain->rb_root) { KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); - next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); - if (next == NULL) { - MPASS(entry->free_after == domain->end - entry->end); - } else { - MPASS(entry->free_after = next->start - entry->end); - MPASS(entry->end <= next->start); - } l = RB_LEFT(entry, rb_entry); r = RB_RIGHT(entry, rb_entry); - if (l == NULL && r == NULL) { - MPASS(entry->free_down == entry->free_after); - } else if (l == NULL && r != NULL) { - MPASS(entry->free_down = MAX(entry->free_after, - r->free_down)); - } else if (r == NULL) { - MPASS(entry->free_down = MAX(entry->free_after, - l->free_down)); - } else { - v = MAX(entry->free_after, l->free_down); + v = 0; + if (l != NULL) { + v = MAX(v, l->free_down); + v = MAX(v, entry->start - l->last); + } + if (r != NULL) { v = MAX(v, r->free_down); - MPASS(entry->free_down == v); + v = MAX(v, r->first - entry->end); } + MPASS(entry->free_down == v); } } #endif static bool dmar_gas_rb_insert(struct dmar_domain *domain, struct dmar_map_entry *entry) { - struct dmar_map_entry *prev, *found; + struct dmar_map_entry *found; found = RB_INSERT(dmar_gas_entries_tree, &domain->rb_root, entry); - dmar_gas_fix_free(domain, entry); - prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); - if (prev != NULL) - dmar_gas_fix_free(domain, prev); return (found == NULL); } static void dmar_gas_rb_remove(struct dmar_domain *domain, struct dmar_map_entry *entry) { - struct dmar_map_entry *prev; - prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); - if (prev != NULL) - dmar_gas_fix_free(domain, prev); } void dmar_gas_init_domain(struct dmar_domain *domain) { struct dmar_map_entry *begin, *end; begin = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK); end = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK); DMAR_DOMAIN_LOCK(domain); KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); KASSERT(RB_EMPTY(&domain->rb_root), ("non-empty entries %p", domain)); begin->start = 0; begin->end = DMAR_PAGE_SIZE; - begin->free_after = domain->end - begin->end; begin->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED; dmar_gas_rb_insert(domain, begin); end->start = domain->end; end->end = domain->end; - end->free_after = 0; end->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED; dmar_gas_rb_insert(domain, end); domain->first_place = begin; domain->last_place = end; domain->flags |= DMAR_DOMAIN_GAS_INITED; DMAR_DOMAIN_UNLOCK(domain); } void dmar_gas_fini_domain(struct dmar_domain *domain) { struct dmar_map_entry *entry, *entry1; DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT(domain->entries_cnt == 2, ("domain still in use %p", domain)); entry = RB_MIN(dmar_gas_entries_tree, &domain->rb_root); KASSERT(entry->start == 0, ("start entry start %p", domain)); KASSERT(entry->end == DMAR_PAGE_SIZE, ("start entry end %p", domain)); KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE, ("start entry flags %p", domain)); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_free_entry(domain, entry); entry = RB_MAX(dmar_gas_entries_tree, &domain->rb_root); KASSERT(entry->start == domain->end, ("end entry start %p", domain)); KASSERT(entry->end == domain->end, ("end entry end %p", domain)); - KASSERT(entry->free_after == 0, ("end entry free_after %p", domain)); KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE, ("end entry flags %p", domain)); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_free_entry(domain, entry); RB_FOREACH_SAFE(entry, dmar_gas_entries_tree, &domain->rb_root, entry1) { KASSERT((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0, ("non-RMRR entry left %p", domain)); RB_REMOVE(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_free_entry(domain, entry); } } struct dmar_gas_match_args { struct dmar_domain *domain; dmar_gaddr_t size; int offset; const struct bus_dma_tag_common *common; u_int gas_flags; struct dmar_map_entry *entry; }; +/* + * The interval [beg, end) is a free interval between two dmar_map_entries. + * maxaddr is an upper bound on addresses that can be allocated. Try to + * allocate space in the free interval, subject to the conditions expressed + * by a, and return 'true' if and only if the allocation attempt succeeds. + */ static bool -dmar_gas_match_one(struct dmar_gas_match_args *a, struct dmar_map_entry *prev, - dmar_gaddr_t end) +dmar_gas_match_one(struct dmar_gas_match_args *a, dmar_gaddr_t beg, + dmar_gaddr_t end, dmar_gaddr_t maxaddr) { dmar_gaddr_t bs, start; - if (a->entry->start + a->size > end) + a->entry->start = roundup2(beg + DMAR_PAGE_SIZE, + a->common->alignment); + if (a->entry->start + a->size > maxaddr) return (false); /* DMAR_PAGE_SIZE to create gap after new entry. */ - if (a->entry->start < prev->end + DMAR_PAGE_SIZE || - a->entry->start + a->size + a->offset + DMAR_PAGE_SIZE > - prev->end + prev->free_after) + if (a->entry->start < beg + DMAR_PAGE_SIZE || + a->entry->start + a->size + a->offset + DMAR_PAGE_SIZE > end) return (false); /* No boundary crossing. */ if (dmar_test_boundary(a->entry->start + a->offset, a->size, a->common->boundary)) return (true); /* * The start + offset to start + offset + size region crosses * the boundary. Check if there is enough space after the - * next boundary after the prev->end. + * next boundary after the beg. */ bs = rounddown2(a->entry->start + a->offset + a->common->boundary, a->common->boundary); start = roundup2(bs, a->common->alignment); /* DMAR_PAGE_SIZE to create gap after new entry. */ - if (start + a->offset + a->size + DMAR_PAGE_SIZE <= - prev->end + prev->free_after && - start + a->offset + a->size <= end && + if (start + a->offset + a->size + DMAR_PAGE_SIZE <= end && + start + a->offset + a->size <= maxaddr && dmar_test_boundary(start + a->offset, a->size, a->common->boundary)) { a->entry->start = start; return (true); } /* * Not enough space to align at the requested boundary, or * boundary is smaller than the size, but allowed to split. - * We already checked that start + size does not overlap end. + * We already checked that start + size does not overlap maxaddr. * * XXXKIB. It is possible that bs is exactly at the start of * the next entry, then we do not have gap. Ignore for now. */ if ((a->gas_flags & DMAR_GM_CANSPLIT) != 0) { a->size = bs - a->entry->start; return (true); } return (false); } static void -dmar_gas_match_insert(struct dmar_gas_match_args *a, - struct dmar_map_entry *prev) +dmar_gas_match_insert(struct dmar_gas_match_args *a) { - struct dmar_map_entry *next; bool found; /* * The prev->end is always aligned on the page size, which * causes page alignment for the entry->start too. The size * is checked to be multiple of the page size. * * The page sized gap is created between consequent * allocations to ensure that out-of-bounds accesses fault. */ a->entry->end = a->entry->start + a->size; - next = RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, prev); - KASSERT(next->start >= a->entry->end && - next->start - a->entry->start >= a->size && - prev->end <= a->entry->end, - ("dmar_gas_match_insert hole failed %p prev (%jx, %jx) " - "free_after %jx next (%jx, %jx) entry (%jx, %jx)", a->domain, - (uintmax_t)prev->start, (uintmax_t)prev->end, - (uintmax_t)prev->free_after, - (uintmax_t)next->start, (uintmax_t)next->end, - (uintmax_t)a->entry->start, (uintmax_t)a->entry->end)); - - prev->free_after = a->entry->start - prev->end; - a->entry->free_after = next->start - a->entry->end; - found = dmar_gas_rb_insert(a->domain, a->entry); KASSERT(found, ("found dup %p start %jx size %jx", a->domain, (uintmax_t)a->entry->start, (uintmax_t)a->size)); a->entry->flags = DMAR_MAP_ENTRY_MAP; - - KASSERT(RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, - a->entry) == prev, - ("entry %p prev %p inserted prev %p", a->entry, prev, - RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, a->entry))); - KASSERT(RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, - a->entry) == next, - ("entry %p next %p inserted next %p", a->entry, next, - RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, a->entry))); } static int -dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *prev) +dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *entry) { - struct dmar_map_entry *l; - int ret; + struct dmar_map_entry *child; - if (prev->end < a->common->lowaddr) { - a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE, - a->common->alignment); - if (dmar_gas_match_one(a, prev, a->common->lowaddr)) { - dmar_gas_match_insert(a, prev); - return (0); - } + child = RB_RIGHT(entry, rb_entry); + if (child != NULL && entry->end < a->common->lowaddr && + dmar_gas_match_one(a, entry->end, child->first, + a->common->lowaddr)) { + dmar_gas_match_insert(a); + return (0); } - if (prev->free_down < a->size + a->offset + DMAR_PAGE_SIZE) + if (entry->free_down < a->size + a->offset + DMAR_PAGE_SIZE) return (ENOMEM); - l = RB_LEFT(prev, rb_entry); - if (l != NULL) { - ret = dmar_gas_lowermatch(a, l); - if (ret == 0) - return (0); + child = RB_LEFT(entry, rb_entry); + if (child != NULL && 0 == dmar_gas_lowermatch(a, child)) + return (0); + if (child != NULL && child->last < a->common->lowaddr && + dmar_gas_match_one(a, child->last, entry->start, + a->common->lowaddr)) { + dmar_gas_match_insert(a); + return (0); } - l = RB_RIGHT(prev, rb_entry); - if (l != NULL) - return (dmar_gas_lowermatch(a, l)); + child = RB_RIGHT(entry, rb_entry); + if (child != NULL && 0 == dmar_gas_lowermatch(a, child)) + return (0); return (ENOMEM); } static int -dmar_gas_uppermatch(struct dmar_gas_match_args *a) +dmar_gas_uppermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *entry) { - struct dmar_map_entry *next, *prev, find_entry; + struct dmar_map_entry *child; - find_entry.start = a->common->highaddr; - next = RB_NFIND(dmar_gas_entries_tree, &a->domain->rb_root, - &find_entry); - if (next == NULL) + if (entry->last < a->common->highaddr) return (ENOMEM); - prev = RB_PREV(dmar_gas_entries_tree, &a->domain->rb_root, next); - KASSERT(prev != NULL, ("no prev %p %jx", a->domain, - (uintmax_t)find_entry.start)); - for (;;) { - a->entry->start = prev->start + DMAR_PAGE_SIZE; - if (a->entry->start < a->common->highaddr) - a->entry->start = a->common->highaddr; - a->entry->start = roundup2(a->entry->start, - a->common->alignment); - if (dmar_gas_match_one(a, prev, a->domain->end)) { - dmar_gas_match_insert(a, prev); - return (0); - } - - /* - * XXXKIB. This falls back to linear iteration over - * the free space in the high region. But high - * regions are almost unused, the code should be - * enough to cover the case, although in the - * non-optimal way. - */ - prev = next; - next = RB_NEXT(dmar_gas_entries_tree, &a->domain->rb_root, - prev); - KASSERT(next != NULL, ("no next %p %jx", a->domain, - (uintmax_t)find_entry.start)); - if (next->end >= a->domain->end) - return (ENOMEM); + child = RB_LEFT(entry, rb_entry); + if (child != NULL && 0 == dmar_gas_uppermatch(a, child)) + return (0); + if (child != NULL && child->last >= a->common->highaddr && + dmar_gas_match_one(a, child->last, entry->start, + a->domain->end)) { + dmar_gas_match_insert(a); + return (0); } + child = RB_RIGHT(entry, rb_entry); + if (child != NULL && entry->end >= a->common->highaddr && + dmar_gas_match_one(a, entry->end, child->first, + a->domain->end)) { + dmar_gas_match_insert(a); + return (0); + } + if (child != NULL && 0 == dmar_gas_uppermatch(a, child)) + return (0); + return (ENOMEM); } static int dmar_gas_find_space(struct dmar_domain *domain, const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset, u_int flags, struct dmar_map_entry *entry) { struct dmar_gas_match_args a; int error; DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT(entry->flags == 0, ("dirty entry %p %p", domain, entry)); KASSERT((size & DMAR_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size)); a.domain = domain; a.size = size; a.offset = offset; a.common = common; a.gas_flags = flags; a.entry = entry; /* Handle lower region. */ if (common->lowaddr > 0) { error = dmar_gas_lowermatch(&a, RB_ROOT(&domain->rb_root)); if (error == 0) return (0); KASSERT(error == ENOMEM, ("error %d from dmar_gas_lowermatch", error)); } /* Handle upper region. */ if (common->highaddr >= domain->end) return (ENOMEM); - error = dmar_gas_uppermatch(&a); + error = dmar_gas_uppermatch(&a, RB_ROOT(&domain->rb_root)); KASSERT(error == ENOMEM, ("error %d from dmar_gas_uppermatch", error)); return (error); } static int dmar_gas_alloc_region(struct dmar_domain *domain, struct dmar_map_entry *entry, u_int flags) { struct dmar_map_entry *next, *prev; bool found; DMAR_DOMAIN_ASSERT_LOCKED(domain); if ((entry->start & DMAR_PAGE_MASK) != 0 || (entry->end & DMAR_PAGE_MASK) != 0) return (EINVAL); if (entry->start >= entry->end) return (EINVAL); if (entry->end >= domain->end) return (EINVAL); next = RB_NFIND(dmar_gas_entries_tree, &domain->rb_root, entry); KASSERT(next != NULL, ("next must be non-null %p %jx", domain, (uintmax_t)entry->start)); prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, next); /* prev could be NULL */ /* * Adapt to broken BIOSes which specify overlapping RMRR * entries. * * XXXKIB: this does not handle a case when prev or next * entries are completely covered by the current one, which * extends both ways. */ if (prev != NULL && prev->end > entry->start && (prev->flags & DMAR_MAP_ENTRY_PLACE) == 0) { if ((flags & DMAR_GM_RMRR) == 0 || (prev->flags & DMAR_MAP_ENTRY_RMRR) == 0) return (EBUSY); entry->start = prev->end; } if (next->start < entry->end && (next->flags & DMAR_MAP_ENTRY_PLACE) == 0) { if ((flags & DMAR_GM_RMRR) == 0 || (next->flags & DMAR_MAP_ENTRY_RMRR) == 0) return (EBUSY); entry->end = next->start; } if (entry->end == entry->start) return (0); if (prev != NULL && prev->end > entry->start) { /* This assumes that prev is the placeholder entry. */ dmar_gas_rb_remove(domain, prev); prev = NULL; } if (next->start < entry->end) { dmar_gas_rb_remove(domain, next); next = NULL; } found = dmar_gas_rb_insert(domain, entry); KASSERT(found, ("found RMRR dup %p start %jx end %jx", domain, (uintmax_t)entry->start, (uintmax_t)entry->end)); if ((flags & DMAR_GM_RMRR) != 0) entry->flags = DMAR_MAP_ENTRY_RMRR; #ifdef INVARIANTS struct dmar_map_entry *ip, *in; ip = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); in = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); KASSERT(prev == NULL || ip == prev, ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", entry, entry->start, entry->end, prev, prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); KASSERT(next == NULL || in == next, ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", entry, entry->start, entry->end, next, next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); #endif return (0); } void dmar_gas_free_space(struct dmar_domain *domain, struct dmar_map_entry *entry) { DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR | DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_MAP, ("permanent entry %p %p", domain, entry)); dmar_gas_rb_remove(domain, entry); entry->flags &= ~DMAR_MAP_ENTRY_MAP; #ifdef INVARIANTS if (dmar_check_free) dmar_gas_check_free(domain); #endif } void dmar_gas_free_region(struct dmar_domain *domain, struct dmar_map_entry *entry) { struct dmar_map_entry *next, *prev; DMAR_DOMAIN_ASSERT_LOCKED(domain); KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR | DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_RMRR, ("non-RMRR entry %p %p", domain, entry)); prev = RB_PREV(dmar_gas_entries_tree, &domain->rb_root, entry); next = RB_NEXT(dmar_gas_entries_tree, &domain->rb_root, entry); dmar_gas_rb_remove(domain, entry); entry->flags &= ~DMAR_MAP_ENTRY_RMRR; if (prev == NULL) dmar_gas_rb_insert(domain, domain->first_place); if (next == NULL) dmar_gas_rb_insert(domain, domain->last_place); } int dmar_gas_map(struct dmar_domain *domain, const struct bus_dma_tag_common *common, dmar_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct dmar_map_entry **res) { struct dmar_map_entry *entry; int error; KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_CANSPLIT)) == 0, ("invalid flags 0x%x", flags)); entry = dmar_gas_alloc_entry(domain, (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); if (entry == NULL) return (ENOMEM); DMAR_DOMAIN_LOCK(domain); error = dmar_gas_find_space(domain, common, size, offset, flags, entry); if (error == ENOMEM) { DMAR_DOMAIN_UNLOCK(domain); dmar_gas_free_entry(domain, entry); return (error); } #ifdef INVARIANTS if (dmar_check_free) dmar_gas_check_free(domain); #endif KASSERT(error == 0, ("unexpected error %d from dmar_gas_find_entry", error)); KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", (uintmax_t)entry->end, (uintmax_t)domain->end)); entry->flags |= eflags; DMAR_DOMAIN_UNLOCK(domain); error = domain_map_buf(domain, entry->start, entry->end - entry->start, ma, ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) | ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) | ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) | ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0), (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); if (error == ENOMEM) { dmar_domain_unload_entry(entry, true); return (error); } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); *res = entry; return (0); } int dmar_gas_map_region(struct dmar_domain *domain, struct dmar_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma) { dmar_gaddr_t start; int error; KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, entry, entry->flags)); KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_RMRR)) == 0, ("invalid flags 0x%x", flags)); start = entry->start; DMAR_DOMAIN_LOCK(domain); error = dmar_gas_alloc_region(domain, entry, flags); if (error != 0) { DMAR_DOMAIN_UNLOCK(domain); return (error); } entry->flags |= eflags; DMAR_DOMAIN_UNLOCK(domain); if (entry->end == entry->start) return (0); error = domain_map_buf(domain, entry->start, entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) | ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) | ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) | ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0), (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0); if (error == ENOMEM) { dmar_domain_unload_entry(entry, false); return (error); } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); return (0); } int dmar_gas_reserve_region(struct dmar_domain *domain, dmar_gaddr_t start, dmar_gaddr_t end) { struct dmar_map_entry *entry; int error; entry = dmar_gas_alloc_entry(domain, DMAR_PGF_WAITOK); entry->start = start; entry->end = end; DMAR_DOMAIN_LOCK(domain); error = dmar_gas_alloc_region(domain, entry, DMAR_GM_CANWAIT); if (error == 0) entry->flags |= DMAR_MAP_ENTRY_UNMAPPED; DMAR_DOMAIN_UNLOCK(domain); if (error != 0) dmar_gas_free_entry(domain, entry); return (error); } Index: projects/clang1000-import/tests/sys/kern/ptrace_test.c =================================================================== --- projects/clang1000-import/tests/sys/kern/ptrace_test.c (revision 357178) +++ projects/clang1000-import/tests/sys/kern/ptrace_test.c (revision 357179) @@ -1,4304 +1,4307 @@ /*- * Copyright (c) 2015 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #define _WANT_MIPS_REGNUM #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Architectures with a user-visible breakpoint(). */ #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__i386__) || defined(__mips__) || defined(__riscv) || \ defined(__sparc64__) #define HAVE_BREAKPOINT #endif /* * Adjust PC to skip over a breakpoint when stopped for a breakpoint trap. */ #ifdef HAVE_BREAKPOINT #if defined(__aarch64__) #define SKIP_BREAK(reg) ((reg)->elr += 4) #elif defined(__amd64__) || defined(__i386__) #define SKIP_BREAK(reg) #elif defined(__arm__) #define SKIP_BREAK(reg) ((reg)->r_pc += 4) #elif defined(__mips__) #define SKIP_BREAK(reg) ((reg)->r_regs[PC] += 4) #elif defined(__riscv) #define SKIP_BREAK(reg) ((reg)->sepc += 4) #elif defined(__sparc64__) #define SKIP_BREAK(reg) do { \ (reg)->r_tpc = (reg)->r_tnpc + 4; \ (reg)->r_tnpc += 8; \ } while (0) #endif #endif /* * A variant of ATF_REQUIRE that is suitable for use in child * processes. This only works if the parent process is tripped up by * the early exit and fails some requirement itself. */ #define CHILD_REQUIRE(exp) do { \ if (!(exp)) \ child_fail_require(__FILE__, __LINE__, \ #exp " not met"); \ } while (0) static __dead2 void child_fail_require(const char *file, int line, const char *str) { char buf[128]; snprintf(buf, sizeof(buf), "%s:%d: %s\n", file, line, str); write(2, buf, strlen(buf)); _exit(32); } static void trace_me(void) { /* Attach the parent process as a tracer of this process. */ CHILD_REQUIRE(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1); /* Trigger a stop. */ raise(SIGSTOP); } static void attach_child(pid_t pid) { pid_t wpid; int status; ATF_REQUIRE(ptrace(PT_ATTACH, pid, NULL, 0) == 0); wpid = waitpid(pid, &status, 0); ATF_REQUIRE(wpid == pid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); } static void wait_for_zombie(pid_t pid) { /* * Wait for a process to exit. This is kind of gross, but * there is not a better way. * * Prior to r325719, the kern.proc.pid. sysctl failed * with ESRCH. After that change, a valid struct kinfo_proc * is returned for zombies with ki_stat set to SZOMB. */ for (;;) { struct kinfo_proc kp; size_t len; int mib[4]; mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_PID; mib[3] = pid; len = sizeof(kp); if (sysctl(mib, nitems(mib), &kp, &len, NULL, 0) == -1) { ATF_REQUIRE(errno == ESRCH); break; } if (kp.ki_stat == SZOMB) break; usleep(5000); } } /* * Verify that a parent debugger process "sees" the exit of a debugged * process exactly once when attached via PT_TRACE_ME. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_trace_me); ATF_TC_BODY(ptrace__parent_wait_after_trace_me, tc) { pid_t child, wpid; int status; ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ trace_me(); _exit(1); } /* Parent process. */ /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* The second wait() should report the exit status. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); /* The child should no longer exist. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a parent debugger process "sees" the exit of a debugged * process exactly once when attached via PT_ATTACH. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_wait_after_attach); ATF_TC_BODY(ptrace__parent_wait_after_attach, tc) { pid_t child, wpid; int cpipe[2], status; char c; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for the parent to attach. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == 0); _exit(1); } close(cpipe[1]); /* Parent process. */ /* Attach to the child process. */ attach_child(child); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal the child to exit. */ close(cpipe[0]); /* The second wait() should report the exit status. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); /* The child should no longer exist. */ wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a parent process "sees" the exit of a debugged process only * after the debugger has seen it. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_child_debugger); ATF_TC_BODY(ptrace__parent_sees_exit_after_child_debugger, tc) { pid_t child, debugger, wpid; int cpipe[2], dpipe[2], status; char c; if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) atf_tc_skip("https://bugs.freebsd.org/239399"); ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for parent to be ready. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c)); _exit(1); } close(cpipe[1]); ATF_REQUIRE(pipe(dpipe) == 0); ATF_REQUIRE((debugger = fork()) != -1); if (debugger == 0) { /* Debugger process. */ close(dpipe[0]); CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal parent that debugger is attached. */ CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c)); /* Wait for parent's failed wait. */ CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == 0); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 1); _exit(0); } close(dpipe[1]); /* Parent process. */ /* Wait for the debugger to attach to the child. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Release the child. */ ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c)); ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0); close(cpipe[0]); wait_for_zombie(child); /* * This wait should return a pid of 0 to indicate no status to * report. The parent should see the child as non-exited * until the debugger sees the exit. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == 0); /* Signal the debugger to wait for the child. */ close(dpipe[0]); /* Wait for the debugger. */ wpid = waitpid(debugger, &status, 0); ATF_REQUIRE(wpid == debugger); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* The child process should now be ready. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); } /* * Verify that a parent process "sees" the exit of a debugged process * only after a non-direct-child debugger has seen it. In particular, * various wait() calls in the parent must avoid failing with ESRCH by * checking the parent's orphan list for the debugee. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_sees_exit_after_unrelated_debugger); ATF_TC_BODY(ptrace__parent_sees_exit_after_unrelated_debugger, tc) { pid_t child, debugger, fpid, wpid; int cpipe[2], dpipe[2], status; char c; ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for parent to be ready. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c)); _exit(1); } close(cpipe[1]); ATF_REQUIRE(pipe(dpipe) == 0); ATF_REQUIRE((debugger = fork()) != -1); if (debugger == 0) { /* Debugger parent. */ /* * Fork again and drop the debugger parent so that the * debugger is not a child of the main parent. */ CHILD_REQUIRE((fpid = fork()) != -1); if (fpid != 0) _exit(2); /* Debugger process. */ close(dpipe[0]); CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal parent that debugger is attached. */ CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c)); /* Wait for parent's failed wait. */ CHILD_REQUIRE(read(dpipe[1], &c, sizeof(c)) == sizeof(c)); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 1); _exit(0); } close(dpipe[1]); /* Parent process. */ /* Wait for the debugger parent process to exit. */ wpid = waitpid(debugger, &status, 0); ATF_REQUIRE(wpid == debugger); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); /* A WNOHANG wait here should see the non-exited child. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == 0); /* Wait for the debugger to attach to the child. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Release the child. */ ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c)); ATF_REQUIRE(read(cpipe[0], &c, sizeof(c)) == 0); close(cpipe[0]); wait_for_zombie(child); /* * This wait should return a pid of 0 to indicate no status to * report. The parent should see the child as non-exited * until the debugger sees the exit. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == 0); /* Signal the debugger to wait for the child. */ ATF_REQUIRE(write(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Wait for the debugger. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == 0); close(dpipe[0]); /* The child process should now be ready. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); } /* * Make sure that we can collect the exit status of an orphaned process. */ ATF_TC_WITHOUT_HEAD(ptrace__parent_exits_before_child); ATF_TC_BODY(ptrace__parent_exits_before_child, tc) { ssize_t n; int cpipe1[2], cpipe2[2], gcpipe[2], status; pid_t child, gchild; ATF_REQUIRE(pipe(cpipe1) == 0); ATF_REQUIRE(pipe(cpipe2) == 0); ATF_REQUIRE(pipe(gcpipe) == 0); ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_ACQUIRE, NULL) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { CHILD_REQUIRE((gchild = fork()) != -1); if (gchild == 0) { status = 1; do { n = read(gcpipe[0], &status, sizeof(status)); } while (n == -1 && errno == EINTR); _exit(status); } CHILD_REQUIRE(write(cpipe1[1], &gchild, sizeof(gchild)) == sizeof(gchild)); CHILD_REQUIRE(read(cpipe2[0], &status, sizeof(status)) == sizeof(status)); _exit(status); } ATF_REQUIRE(read(cpipe1[0], &gchild, sizeof(gchild)) == sizeof(gchild)); ATF_REQUIRE(ptrace(PT_ATTACH, gchild, NULL, 0) == 0); status = 0; ATF_REQUIRE(write(cpipe2[1], &status, sizeof(status)) == sizeof(status)); ATF_REQUIRE(waitpid(child, &status, 0) == child); ATF_REQUIRE(WIFEXITED(status) && WEXITSTATUS(status) == 0); status = 0; ATF_REQUIRE(write(gcpipe[1], &status, sizeof(status)) == sizeof(status)); ATF_REQUIRE(waitpid(gchild, &status, 0) == gchild); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(ptrace(PT_DETACH, gchild, (caddr_t)1, 0) == 0); ATF_REQUIRE(waitpid(gchild, &status, 0) == gchild); ATF_REQUIRE(WIFEXITED(status) && WEXITSTATUS(status) == 0); ATF_REQUIRE(close(cpipe1[0]) == 0); ATF_REQUIRE(close(cpipe1[1]) == 0); ATF_REQUIRE(close(cpipe2[0]) == 0); ATF_REQUIRE(close(cpipe2[1]) == 0); ATF_REQUIRE(close(gcpipe[0]) == 0); ATF_REQUIRE(close(gcpipe[1]) == 0); } /* * The parent process should always act the same regardless of how the * debugger is attached to it. */ static __dead2 void follow_fork_parent(bool use_vfork) { pid_t fpid, wpid; int status; if (use_vfork) CHILD_REQUIRE((fpid = vfork()) != -1); else CHILD_REQUIRE((fpid = fork()) != -1); if (fpid == 0) /* Child */ _exit(2); wpid = waitpid(fpid, &status, 0); CHILD_REQUIRE(wpid == fpid); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 2); _exit(1); } /* * Helper routine for follow fork tests. This waits for two stops * that report both "sides" of a fork. It returns the pid of the new * child process. */ static pid_t handle_fork_events(pid_t parent, struct ptrace_lwpinfo *ppl) { struct ptrace_lwpinfo pl; bool fork_reported[2]; pid_t child, wpid; int i, status; fork_reported[0] = false; fork_reported[1] = false; child = -1; /* * Each process should report a fork event. The parent should * report a PL_FLAG_FORKED event, and the child should report * a PL_FLAG_CHILD event. */ for (i = 0; i < 2; i++) { wpid = wait(&status); ATF_REQUIRE(wpid > 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) != 0); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_FORKED | PL_FLAG_CHILD)) != (PL_FLAG_FORKED | PL_FLAG_CHILD)); if (pl.pl_flags & PL_FLAG_CHILD) { ATF_REQUIRE(wpid != parent); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(!fork_reported[1]); if (child == -1) child = wpid; else ATF_REQUIRE(child == wpid); if (ppl != NULL) ppl[1] = pl; fork_reported[1] = true; } else { ATF_REQUIRE(wpid == parent); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(!fork_reported[0]); if (child == -1) child = pl.pl_child_pid; else ATF_REQUIRE(child == pl.pl_child_pid); if (ppl != NULL) ppl[0] = pl; fork_reported[0] = true; } } return (child); } /* * Verify that a new child process is stopped after a followed fork and * that the traced parent sees the exit of the child after the debugger * when both processes remain attached to the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached); ATF_TC_BODY(ptrace__follow_fork_both_attached, tc) { pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the new * child process is detached after it reports its fork. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached); ATF_TC_BODY(ptrace__follow_fork_child_detached, tc) { pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1); /* * Should not see any status from the grandchild now, only the * child. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the * traced parent is detached after the fork. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached); ATF_TC_BODY(ptrace__follow_fork_parent_detached, tc) { pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. * * Even though the child process is detached, it is still a * child of the debugger, so it will still report it's exit * after the grandchild. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void attach_fork_parent(int cpipe[2]) { pid_t fpid; close(cpipe[0]); /* Double-fork to disassociate from the debugger. */ CHILD_REQUIRE((fpid = fork()) != -1); if (fpid != 0) _exit(3); /* Send the pid of the disassociated child to the debugger. */ fpid = getpid(); CHILD_REQUIRE(write(cpipe[1], &fpid, sizeof(fpid)) == sizeof(fpid)); /* Wait for the debugger to attach. */ CHILD_REQUIRE(read(cpipe[1], &fpid, sizeof(fpid)) == 0); } /* * Verify that a new child process is stopped after a followed fork and * that the traced parent sees the exit of the child after the debugger * when both processes remain attached to the debugger. In this test * the parent that forks is not a direct child of the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_both_attached_unrelated_debugger); ATF_TC_BODY(ptrace__follow_fork_both_attached_unrelated_debugger, tc) { pid_t children[2], fpid, wpid; int cpipe[2], status; if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) atf_tc_skip("https://bugs.freebsd.org/239397"); ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { attach_fork_parent(cpipe); follow_fork_parent(false); } /* Parent process. */ close(cpipe[1]); /* Wait for the direct child to exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 3); /* Read the pid of the fork parent. */ ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) == sizeof(children[0])); /* Attach to the fork parent. */ attach_child(children[0]); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the fork parent ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Signal the fork parent to continue. */ close(cpipe[0]); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The fork parent can't exit until the child reports status, * so the child should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the new * child process is detached after it reports its fork. In this test * the parent that forks is not a direct child of the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_child_detached_unrelated_debugger); ATF_TC_BODY(ptrace__follow_fork_child_detached_unrelated_debugger, tc) { pid_t children[2], fpid, wpid; int cpipe[2], status; if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) atf_tc_skip("https://bugs.freebsd.org/239292"); ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { attach_fork_parent(cpipe); follow_fork_parent(false); } /* Parent process. */ close(cpipe[1]); /* Wait for the direct child to exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 3); /* Read the pid of the fork parent. */ ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) == sizeof(children[0])); /* Attach to the fork parent. */ attach_child(children[0]); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the fork parent ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Signal the fork parent to continue. */ close(cpipe[0]); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_DETACH, children[1], (caddr_t)1, 0) != -1); /* * Should not see any status from the child now, only the fork * parent. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a new child process is stopped after a followed fork * and that the traced parent sees the exit of the child when the * traced parent is detached after the fork. In this test the parent * that forks is not a direct child of the debugger. */ ATF_TC_WITHOUT_HEAD(ptrace__follow_fork_parent_detached_unrelated_debugger); ATF_TC_BODY(ptrace__follow_fork_parent_detached_unrelated_debugger, tc) { pid_t children[2], fpid, wpid; int cpipe[2], status; if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) atf_tc_skip("https://bugs.freebsd.org/239425"); ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { attach_fork_parent(cpipe); follow_fork_parent(false); } /* Parent process. */ close(cpipe[1]); /* Wait for the direct child to exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 3); /* Read the pid of the fork parent. */ ATF_REQUIRE(read(cpipe[0], &children[0], sizeof(children[0])) == sizeof(children[0])); /* Attach to the fork parent. */ attach_child(children[0]); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the fork parent ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Signal the fork parent to continue. */ close(cpipe[0]); children[1] = handle_fork_events(children[0], NULL); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE(ptrace(PT_DETACH, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * Should not see any status from the fork parent now, only * the child. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a child process does not see an unrelated debugger as its * parent but sees its original parent process. */ ATF_TC_WITHOUT_HEAD(ptrace__getppid); ATF_TC_BODY(ptrace__getppid, tc) { pid_t child, debugger, ppid, wpid; int cpipe[2], dpipe[2], status; char c; if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) atf_tc_skip("https://bugs.freebsd.org/240510"); ATF_REQUIRE(pipe(cpipe) == 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { /* Child process. */ close(cpipe[0]); /* Wait for parent to be ready. */ CHILD_REQUIRE(read(cpipe[1], &c, sizeof(c)) == sizeof(c)); /* Report the parent PID to the parent. */ ppid = getppid(); CHILD_REQUIRE(write(cpipe[1], &ppid, sizeof(ppid)) == sizeof(ppid)); _exit(1); } close(cpipe[1]); ATF_REQUIRE(pipe(dpipe) == 0); ATF_REQUIRE((debugger = fork()) != -1); if (debugger == 0) { /* Debugger process. */ close(dpipe[0]); CHILD_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) != -1); wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* Signal parent that debugger is attached. */ CHILD_REQUIRE(write(dpipe[1], &c, sizeof(c)) == sizeof(c)); /* Wait for traced child to exit. */ wpid = waitpid(child, &status, 0); CHILD_REQUIRE(wpid == child); CHILD_REQUIRE(WIFEXITED(status)); CHILD_REQUIRE(WEXITSTATUS(status) == 1); _exit(0); } close(dpipe[1]); /* Parent process. */ /* Wait for the debugger to attach to the child. */ ATF_REQUIRE(read(dpipe[0], &c, sizeof(c)) == sizeof(c)); /* Release the child. */ ATF_REQUIRE(write(cpipe[0], &c, sizeof(c)) == sizeof(c)); /* Read the parent PID from the child. */ ATF_REQUIRE(read(cpipe[0], &ppid, sizeof(ppid)) == sizeof(ppid)); close(cpipe[0]); ATF_REQUIRE(ppid == getpid()); /* Wait for the debugger. */ wpid = waitpid(debugger, &status, 0); ATF_REQUIRE(wpid == debugger); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* The child process should now be ready. */ wpid = waitpid(child, &status, WNOHANG); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); } /* * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new * child process created via fork() reports the correct value. */ ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_fork); ATF_TC_BODY(ptrace__new_child_pl_syscall_code_fork, tc) { struct ptrace_lwpinfo pl[2]; pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(false); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Wait for both halves of the fork event to get reported. */ children[1] = handle_fork_events(children[0], pl); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE(pl[0].pl_syscall_code == SYS_fork); ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code); ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new * child process created via vfork() reports the correct value. */ ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_vfork); ATF_TC_BODY(ptrace__new_child_pl_syscall_code_vfork, tc) { struct ptrace_lwpinfo pl[2]; pid_t children[2], fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(true); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, children[0], NULL, 1) != -1); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Wait for both halves of the fork event to get reported. */ children[1] = handle_fork_events(children[0], pl); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE((pl[1].pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE(pl[0].pl_syscall_code == SYS_vfork); ATF_REQUIRE(pl[0].pl_syscall_code == pl[1].pl_syscall_code); ATF_REQUIRE(pl[0].pl_syscall_narg == pl[1].pl_syscall_narg); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * simple_thread(void *arg __unused) { pthread_exit(NULL); } static __dead2 void simple_thread_main(void) { pthread_t thread; CHILD_REQUIRE(pthread_create(&thread, NULL, simple_thread, NULL) == 0); CHILD_REQUIRE(pthread_join(thread, NULL) == 0); exit(1); } /* * Verify that pl_syscall_code in struct ptrace_lwpinfo for a new * thread reports the correct value. */ ATF_TC_WITHOUT_HEAD(ptrace__new_child_pl_syscall_code_thread); ATF_TC_BODY(ptrace__new_child_pl_syscall_code_thread, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t mainlwp; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); simple_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); mainlwp = pl.pl_lwpid; /* * Continue the child ignoring the SIGSTOP and tracing all * system call exits. */ ATF_REQUIRE(ptrace(PT_TO_SCX, fpid, (caddr_t)1, 0) != -1); /* * Wait for the new thread to arrive. pthread_create() might * invoke any number of system calls. For now we just wait * for the new thread to arrive and make sure it reports a * valid system call code. If ptrace grows thread event * reporting then this test can be made more precise. */ for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SCX) != 0); ATF_REQUIRE(pl.pl_syscall_code != 0); if (pl.pl_lwpid != mainlwp) /* New thread seen. */ break; ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } /* Wait for the child to exit. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFEXITED(status)) break; ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the expected LWP events are reported for a child thread. */ ATF_TC_WITHOUT_HEAD(ptrace__lwp_events); ATF_TC_BODY(ptrace__lwp_events, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t lwps[2]; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); simple_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); lwps[0] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The first event should be for the child thread's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != lwps[0]); lwps[1] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the child thread's death. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) == (PL_FLAG_EXITED | PL_FLAG_SCE)); ATF_REQUIRE(pl.pl_lwpid == lwps[1]); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * exec_thread(void *arg __unused) { execl("/usr/bin/true", "true", NULL); exit(127); } static __dead2 void exec_thread_main(void) { pthread_t thread; CHILD_REQUIRE(pthread_create(&thread, NULL, exec_thread, NULL) == 0); for (;;) sleep(60); exit(1); } /* * Verify that the expected LWP events are reported for a multithreaded * process that calls execve(2). */ ATF_TC_WITHOUT_HEAD(ptrace__lwp_events_exec); ATF_TC_BODY(ptrace__lwp_events_exec, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t lwps[2]; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exec_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); lwps[0] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The first event should be for the child thread's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != lwps[0]); lwps[1] = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next event should be for the main thread's death due to * single threading from execve(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXITED | PL_FLAG_SCE)) == (PL_FLAG_EXITED)); ATF_REQUIRE(pl.pl_lwpid == lwps[0]); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the child process's exec. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) == (PL_FLAG_EXEC | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid == lwps[1]); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void handler(int sig __unused) { } static void signal_main(void) { signal(SIGINFO, handler); raise(SIGINFO); exit(0); } /* * Verify that the expected ptrace event is reported for a signal. */ ATF_TC_WITHOUT_HEAD(ptrace__siginfo); ATF_TC_BODY(ptrace__siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); signal_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the SIGINFO. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGINFO); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_event == PL_EVENT_SIGNAL); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_code == SI_LWP); ATF_REQUIRE(pl.pl_siginfo.si_pid == wpid); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the expected ptrace events are reported for PTRACE_EXEC. */ ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_disable); ATF_TC_BODY(ptrace__ptrace_exec_disable, tc) { pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exec_thread(NULL); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); events = 0; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Should get one event at exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TC_WITHOUT_HEAD(ptrace__ptrace_exec_enable); ATF_TC_BODY(ptrace__ptrace_exec_enable, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exec_thread(NULL); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); events = PTRACE_EXEC; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next event should be for the child process's exec. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_EXEC | PL_FLAG_SCX)) == (PL_FLAG_EXEC | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TC_WITHOUT_HEAD(ptrace__event_mask); ATF_TC_BODY(ptrace__event_mask, tc) { pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(0); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* PT_FOLLOW_FORK should toggle the state of PTRACE_FORK. */ ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 1) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(events & PTRACE_FORK); ATF_REQUIRE(ptrace(PT_FOLLOW_FORK, fpid, NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(!(events & PTRACE_FORK)); /* PT_LWP_EVENTS should toggle the state of PTRACE_LWP. */ ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 1) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(events & PTRACE_LWP); ATF_REQUIRE(ptrace(PT_LWP_EVENTS, fpid, NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(!(events & PTRACE_LWP)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Should get one event at exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the expected ptrace events are reported for PTRACE_VFORK. */ ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork); ATF_TC_BODY(ptrace__ptrace_vfork, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(true); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); events |= PTRACE_VFORK; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1); /* The next event should report the end of the vfork. */ wpid = wait(&status); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_VFORK_DONE) != 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) != -1); wpid = wait(&status); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } ATF_TC_WITHOUT_HEAD(ptrace__ptrace_vfork_follow); ATF_TC_BODY(ptrace__ptrace_vfork_follow, tc) { struct ptrace_lwpinfo pl[2]; pid_t children[2], fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); follow_fork_parent(true); } /* Parent process. */ children[0] = fpid; /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(children[0], &status, 0); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, children[0], (caddr_t)&events, sizeof(events)) == 0); events |= PTRACE_FORK | PTRACE_VFORK; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, children[0], (caddr_t)&events, sizeof(events)) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); /* Wait for both halves of the fork event to get reported. */ children[1] = handle_fork_events(children[0], pl); ATF_REQUIRE(children[1] > 0); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORKED) != 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, children[1], (caddr_t)1, 0) != -1); /* * The child can't exit until the grandchild reports status, so the * grandchild should report its exit first to the debugger. */ wpid = waitpid(children[1], &status, 0); ATF_REQUIRE(wpid == children[1]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); /* * The child should report it's vfork() completion before it * exits. */ wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl[0], sizeof(pl[0])) != -1); ATF_REQUIRE((pl[0].pl_flags & PL_FLAG_VFORK_DONE) != 0); ATF_REQUIRE(ptrace(PT_CONTINUE, children[0], (caddr_t)1, 0) != -1); wpid = wait(&status); ATF_REQUIRE(wpid == children[0]); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #ifdef HAVE_BREAKPOINT /* * Verify that no more events are reported after PT_KILL except for the * process exit when stopped due to a breakpoint trap. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_breakpoint); ATF_TC_BODY(ptrace__PT_KILL_breakpoint, tc) { pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); breakpoint(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report hitting the breakpoint. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #endif /* HAVE_BREAKPOINT */ /* * Verify that no more events are reported after PT_KILL except for the * process exit when stopped inside of a system call. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_system_call); ATF_TC_BODY(ptrace__PT_KILL_system_call, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that no more events are reported after PT_KILL except for the * process exit when killing a multithreaded process. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_threads); ATF_TC_BODY(ptrace__PT_KILL_threads, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t main_lwp; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); simple_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); main_lwp = pl.pl_lwpid; ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The first event should be for the child thread's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != main_lwp); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * mask_usr1_thread(void *arg) { pthread_barrier_t *pbarrier; sigset_t sigmask; pbarrier = (pthread_barrier_t*)arg; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR1); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up with other thread after sigmask updated. */ pthread_barrier_wait(pbarrier); for (;;) sleep(60); return (NULL); } /* * Verify that the SIGKILL from PT_KILL takes priority over other signals * and prevents spurious stops due to those other signals. */ ATF_TC(ptrace__PT_KILL_competing_signal); ATF_TC_HEAD(ptrace__PT_KILL_competing_signal, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__PT_KILL_competing_signal, tc) { pid_t fpid, wpid; int status; cpuset_t setmask; pthread_t t; pthread_barrier_t barrier; struct sched_param sched_param; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { /* Bind to one CPU so only one thread at a time will run. */ CPU_ZERO(&setmask); CPU_SET(0, &setmask); cpusetid_t setid; CHILD_REQUIRE(cpuset(&setid) == 0); CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0); CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0); CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread, (void*)&barrier) == 0); /* * Give the main thread higher priority. The test always * assumes that, if both threads are able to run, the main * thread runs first. */ sched_param.sched_priority = (sched_get_priority_max(SCHED_FIFO) + sched_get_priority_min(SCHED_FIFO)) / 2; CHILD_REQUIRE(pthread_setschedparam(pthread_self(), SCHED_FIFO, &sched_param) == 0); sched_param.sched_priority -= RQ_PPQ; CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO, &sched_param) == 0); sigset_t sigmask; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR2); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up with other thread after sigmask updated. */ pthread_barrier_wait(&barrier); trace_me(); for (;;) sleep(60); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Send a signal that only the second thread can handle. */ ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* The second wait() should report the SIGUSR2. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Send a signal that only the first thread can handle. */ ATF_REQUIRE(kill(fpid, SIGUSR1) == 0); /* Replace the SIGUSR2 with a kill. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL (not the SIGUSR signal). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that the SIGKILL from PT_KILL takes priority over other stop events * and prevents spurious stops caused by those events. */ ATF_TC(ptrace__PT_KILL_competing_stop); ATF_TC_HEAD(ptrace__PT_KILL_competing_stop, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__PT_KILL_competing_stop, tc) { pid_t fpid, wpid; int status; cpuset_t setmask; pthread_t t; pthread_barrier_t barrier; lwpid_t main_lwp; struct ptrace_lwpinfo pl; struct sched_param sched_param; if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) atf_tc_skip("https://bugs.freebsd.org/220841"); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); /* Bind to one CPU so only one thread at a time will run. */ CPU_ZERO(&setmask); CPU_SET(0, &setmask); cpusetid_t setid; CHILD_REQUIRE(cpuset(&setid) == 0); CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0); CHILD_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0); CHILD_REQUIRE(pthread_create(&t, NULL, mask_usr1_thread, (void*)&barrier) == 0); /* * Give the main thread higher priority. The test always * assumes that, if both threads are able to run, the main * thread runs first. */ sched_param.sched_priority = (sched_get_priority_max(SCHED_FIFO) + sched_get_priority_min(SCHED_FIFO)) / 2; CHILD_REQUIRE(pthread_setschedparam(pthread_self(), SCHED_FIFO, &sched_param) == 0); sched_param.sched_priority -= RQ_PPQ; CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO, &sched_param) == 0); sigset_t sigmask; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR2); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up with other thread after sigmask updated. */ pthread_barrier_wait(&barrier); /* Sync up with the test before doing the getpid(). */ raise(SIGSTOP); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); main_lwp = pl.pl_lwpid; /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* * Continue until child is done with setup, which is indicated with * SIGSTOP. Ignore system calls in the meantime. */ for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); if (WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); } else { ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); break; } ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); } /* Proceed, allowing main thread to hit syscall entry for getpid(). */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_lwpid == main_lwp); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Prevent the main thread from hitting its syscall exit for now. */ ATF_REQUIRE(ptrace(PT_SUSPEND, main_lwp, 0, 0) == 0); /* * Proceed, allowing second thread to hit syscall exit for * pthread_barrier_wait(). */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_lwpid != main_lwp); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); /* Send a signal that only the second thread can handle. */ ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the SIGUSR2. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Allow the main thread to try to finish its system call. */ ATF_REQUIRE(ptrace(PT_RESUME, main_lwp, 0, 0) == 0); /* * At this point, the main thread is in the middle of a system call and * has been resumed. The second thread has taken a SIGUSR2 which will * be replaced with a SIGKILL below. The main thread will get to run * first. It should notice the kill request (even though the signal * replacement occurred in the other thread) and exit accordingly. It * should not stop for the system call exit event. */ /* Replace the SIGUSR2 with a kill. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL (not a syscall exit). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void sigusr1_handler(int sig) { CHILD_REQUIRE(sig == SIGUSR1); _exit(2); } /* * Verify that even if the signal queue is full for a child process, * a PT_KILL will kill the process. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_KILL_with_signal_full_sigqueue); ATF_TC_BODY(ptrace__PT_KILL_with_signal_full_sigqueue, tc) { pid_t fpid, wpid; int status; int max_pending_per_proc; size_t len; int i; ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); len = sizeof(max_pending_per_proc); ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc", &max_pending_per_proc, &len, NULL, 0) == 0); /* Fill the signal queue. */ for (i = 0; i < max_pending_per_proc; ++i) ATF_REQUIRE(kill(fpid, SIGUSR1) == 0); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that when stopped at a system call entry, a signal can be * requested with PT_CONTINUE which will be delivered once the system * call is complete. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue the child process with a signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { /* * The last wait() should report exit 2, i.e., a normal _exit * from the signal handler. In the meantime, catch and proceed * past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void sigusr1_counting_handler(int sig) { static int counter = 0; CHILD_REQUIRE(sig == SIGUSR1); counter++; if (counter == 2) _exit(2); } /* * Verify that, when continuing from a stop at system call entry and exit, * a signal can be requested from both stops, and both will be delivered when * the system call is complete. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue the child process with a signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The third wait() should report a system call exit for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); /* Continue the child process with a signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { /* * The last wait() should report exit 2, i.e., a normal _exit * from the signal handler. In the meantime, catch and proceed * past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that even if the signal queue is full for a child process, * a PT_CONTINUE with a signal will not result in loss of that signal. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_full_sigqueue); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_full_sigqueue, tc) { pid_t fpid, wpid; int status; int max_pending_per_proc; size_t len; int i; ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR); ATF_REQUIRE(signal(SIGUSR1, sigusr1_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); len = sizeof(max_pending_per_proc); ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc", &max_pending_per_proc, &len, NULL, 0) == 0); /* Fill the signal queue. */ for (i = 0; i < max_pending_per_proc; ++i) ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* Continue with signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status)) { ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { /* * The last wait() should report normal _exit from the * SIGUSR1 handler. */ ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static sem_t sigusr1_sem; static int got_usr1; static void sigusr1_sempost_handler(int sig __unused) { got_usr1++; CHILD_REQUIRE(sem_post(&sigusr1_sem) == 0); } /* * Verify that even if the signal queue is full for a child process, * and the signal is masked, a PT_CONTINUE with a signal will not * result in loss of that signal. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, err; int max_pending_per_proc; size_t len; int i; sigset_t sigmask; ATF_REQUIRE(signal(SIGUSR2, handler) != SIG_ERR); ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); got_usr1 = 0; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); CHILD_REQUIRE(got_usr1 == 0); /* Allow the pending SIGUSR1 in now. */ CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Wait to receive the SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); CHILD_REQUIRE(got_usr1 == 1); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); len = sizeof(max_pending_per_proc); ATF_REQUIRE(sysctlbyname("kern.sigqueue.max_pending_per_proc", &max_pending_per_proc, &len, NULL, 0) == 0); /* Fill the signal queue. */ for (i = 0; i < max_pending_per_proc; ++i) ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* Continue with signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* Collect and ignore all of the SIGUSR2. */ for (i = 0; i < max_pending_per_proc; ++i) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } /* Now our PT_CONTINUE'd SIGUSR1 should cause a stop after unmask. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1); /* Continue the child, ignoring the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last wait() should report exit after receiving SIGUSR1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that, after stopping due to a signal, that signal can be * replaced with another signal. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_change_sig); ATF_TC_BODY(ptrace__PT_CONTINUE_change_sig, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); sleep(20); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Send a signal without ptrace. */ ATF_REQUIRE(kill(fpid, SIGINT) == 0); /* The second wait() should report a SIGINT was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGINT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGINT); /* Continue the child process with a different signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTERM) == 0); /* * The last wait() should report having died due to the new * signal, SIGTERM. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGTERM); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a signal can be passed through to the child even when there * was no true signal originally. Such cases arise when a SIGTRAP is * invented for e.g, system call stops. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry); ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigtrap_system_call_entry, tc) { struct ptrace_lwpinfo pl; struct rlimit rl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); /* SIGTRAP expected to cause exit on syscall entry. */ rl.rlim_cur = rl.rlim_max = 0; ATF_REQUIRE(setrlimit(RLIMIT_CORE, &rl) == 0); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue the child process with a SIGTRAP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGTRAP) == 0); for (;;) { /* * The last wait() should report exit due to SIGTRAP. In the * meantime, catch and proceed past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGTRAP); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * A mixed bag PT_CONTINUE with signal test. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_mix); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_mix, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE(signal(SIGUSR1, sigusr1_counting_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); getpid(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP and tracing system calls. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a system call entry for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); /* Continue with the first SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next wait() should report a system call exit for getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); /* Send an ABRT without ptrace. */ ATF_REQUIRE(kill(fpid, SIGABRT) == 0); /* Continue normally. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the SIGABRT. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* Continue, replacing the SIGABRT with another SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); for (;;) { /* * The last wait() should report exit 2, i.e., a normal _exit * from the signal handler. In the meantime, catch and proceed * past any syscall stops. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & (PL_FLAG_SCE | PL_FLAG_SCX)); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } else { ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 2); break; } } wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify a signal delivered by ptrace is noticed by kevent(2). */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_kqueue); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_kqueue, tc) { pid_t fpid, wpid; int status, kq, nevents; struct kevent kev; ATF_REQUIRE(signal(SIGUSR1, SIG_IGN) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE((kq = kqueue()) > 0); EV_SET(&kev, SIGUSR1, EVFILT_SIGNAL, EV_ADD, 0, 0, 0); CHILD_REQUIRE(kevent(kq, &kev, 1, NULL, 0, NULL) == 0); trace_me(); for (;;) { nevents = kevent(kq, NULL, 0, &kev, 1, NULL); if (nevents == -1 && errno == EINTR) continue; CHILD_REQUIRE(nevents > 0); CHILD_REQUIRE(kev.filter == EVFILT_SIGNAL); CHILD_REQUIRE(kev.ident == SIGUSR1); break; } exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue with the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* * The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * signal_thread(void *arg) { int err; sigset_t sigmask; pthread_barrier_t *pbarrier = (pthread_barrier_t*)arg; /* Wait for this thread to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); /* Free our companion thread from the barrier. */ pthread_barrier_wait(pbarrier); /* * Swap ignore duties; the next SIGUSR1 should go to the * other thread. */ CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); /* Sync up threads after swapping signal masks. */ pthread_barrier_wait(pbarrier); /* Wait until our companion has received its SIGUSR1. */ pthread_barrier_wait(pbarrier); return (NULL); } /* * Verify that a traced process with blocked signal received the * signal from kill() once unmasked. */ ATF_TC_WITHOUT_HEAD(ptrace__killed_with_sigmask); ATF_TC_BODY(ptrace__killed_with_sigmask, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, err; sigset_t sigmask; ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); got_usr1 = 0; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); CHILD_REQUIRE(got_usr1 == 0); /* Allow the pending SIGUSR1 in now. */ CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Wait to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); CHILD_REQUIRE(got_usr1 == 1); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP); /* Send blocked SIGUSR1 which should cause a stop. */ ATF_REQUIRE(kill(fpid, SIGUSR1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the kill(SIGUSR1) was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1); /* Continue the child, allowing in the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that a traced process with blocked signal received the * signal from PT_CONTINUE once unmasked. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_sigmask); ATF_TC_BODY(ptrace__PT_CONTINUE_with_sigmask, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, err; sigset_t sigmask; ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); got_usr1 = 0; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(sigprocmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); CHILD_REQUIRE(got_usr1 == 0); /* Allow the pending SIGUSR1 in now. */ CHILD_REQUIRE(sigprocmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Wait to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); CHILD_REQUIRE(got_usr1 == 1); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGSTOP); /* Continue the child replacing SIGSTOP with SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next wait() should report the SIGUSR1 was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR1); ATF_REQUIRE(ptrace(PT_LWPINFO, fpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGUSR1); /* Continue the child, ignoring the SIGUSR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that if ptrace stops due to a signal but continues with * a different signal that the new signal is routed to a thread * that can accept it, and that the thread is awakened by the signal * in a timely manner. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_with_signal_thread_sigmask); ATF_TC_BODY(ptrace__PT_CONTINUE_with_signal_thread_sigmask, tc) { pid_t fpid, wpid; int status, err; pthread_t t; sigset_t sigmask; pthread_barrier_t barrier; ATF_REQUIRE(pthread_barrier_init(&barrier, NULL, 2) == 0); ATF_REQUIRE(sem_init(&sigusr1_sem, 0, 0) == 0); ATF_REQUIRE(signal(SIGUSR1, sigusr1_sempost_handler) != SIG_ERR); ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { CHILD_REQUIRE(pthread_create(&t, NULL, signal_thread, (void*)&barrier) == 0); /* The other thread should receive the first SIGUSR1. */ CHILD_REQUIRE(sigemptyset(&sigmask) == 0); CHILD_REQUIRE(sigaddset(&sigmask, SIGUSR1) == 0); CHILD_REQUIRE(pthread_sigmask(SIG_BLOCK, &sigmask, NULL) == 0); trace_me(); /* Wait until other thread has received its SIGUSR1. */ pthread_barrier_wait(&barrier); /* * Swap ignore duties; the next SIGUSR1 should go to this * thread. */ CHILD_REQUIRE(pthread_sigmask(SIG_UNBLOCK, &sigmask, NULL) == 0); /* Sync up threads after swapping signal masks. */ pthread_barrier_wait(&barrier); /* * Sync up with test code; we're ready for the next SIGUSR1 * now. */ raise(SIGSTOP); /* Wait for this thread to receive a SIGUSR1. */ do { err = sem_wait(&sigusr1_sem); CHILD_REQUIRE(err == 0 || errno == EINTR); } while (err != 0 && errno == EINTR); /* Free the other thread from the barrier. */ pthread_barrier_wait(&barrier); CHILD_REQUIRE(pthread_join(t, NULL) == 0); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * Send a signal without ptrace that either thread will accept (USR2, * in this case). */ ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* The second wait() should report a SIGUSR2 was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Continue the child, changing the signal to USR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); ATF_REQUIRE(kill(fpid, SIGUSR2) == 0); /* The next wait() should report a SIGUSR2 was received. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGUSR2); /* Continue the child, changing the signal to USR1. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, SIGUSR1) == 0); /* The last wait() should report normal exit with code 1. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * raise_sigstop_thread(void *arg __unused) { raise(SIGSTOP); return NULL; } static void * sleep_thread(void *arg __unused) { sleep(60); return NULL; } static void terminate_with_pending_sigstop(bool sigstop_from_main_thread) { pid_t fpid, wpid; int status, i; cpuset_t setmask; cpusetid_t setid; pthread_t t; /* * Become the reaper for this process tree. We need to be able to check * that both child and grandchild have died. */ ATF_REQUIRE(procctl(P_PID, getpid(), PROC_REAP_ACQUIRE, NULL) == 0); fpid = fork(); ATF_REQUIRE(fpid >= 0); if (fpid == 0) { fpid = fork(); CHILD_REQUIRE(fpid >= 0); if (fpid == 0) { trace_me(); /* Pin to CPU 0 to serialize thread execution. */ CPU_ZERO(&setmask); CPU_SET(0, &setmask); CHILD_REQUIRE(cpuset(&setid) == 0); CHILD_REQUIRE(cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, setid, sizeof(setmask), &setmask) == 0); if (sigstop_from_main_thread) { /* * We expect the SIGKILL sent when our parent * dies to be delivered to the new thread. * Raise the SIGSTOP in this thread so the * threads compete. */ CHILD_REQUIRE(pthread_create(&t, NULL, sleep_thread, NULL) == 0); raise(SIGSTOP); } else { /* * We expect the SIGKILL to be delivered to * this thread. After creating the new thread, * just get off the CPU so the other thread can * raise the SIGSTOP. */ CHILD_REQUIRE(pthread_create(&t, NULL, raise_sigstop_thread, NULL) == 0); sleep(60); } exit(0); } /* First stop is trace_me() immediately after fork. */ wpid = waitpid(fpid, &status, 0); CHILD_REQUIRE(wpid == fpid); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); CHILD_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Second stop is from the raise(SIGSTOP). */ wpid = waitpid(fpid, &status, 0); CHILD_REQUIRE(wpid == fpid); CHILD_REQUIRE(WIFSTOPPED(status)); CHILD_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* * Terminate tracing process without detaching. Our child * should be killed. */ exit(0); } /* * We should get a normal exit from our immediate child and a SIGKILL * exit from our grandchild. The latter case is the interesting one. * Our grandchild should not have stopped due to the SIGSTOP that was * left dangling when its parent died. */ for (i = 0; i < 2; ++i) { wpid = wait(&status); if (wpid == fpid) ATF_REQUIRE(WIFEXITED(status) && WEXITSTATUS(status) == 0); else ATF_REQUIRE(WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL); } } /* * These two tests ensure that if the tracing process exits without detaching * just after the child received a SIGSTOP, the child is cleanly killed and * doesn't go to sleep due to the SIGSTOP. The parent's death will send a * SIGKILL to the child. If the SIGKILL and the SIGSTOP are handled by * different threads, the SIGKILL must win. There are two variants of this * test, designed to catch the case where the SIGKILL is delivered to the * younger thread (the first test) and the case where the SIGKILL is delivered * to the older thread (the second test). This behavior has changed in the * past, so make no assumption. */ ATF_TC(ptrace__parent_terminate_with_pending_sigstop1); ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop1, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop1, tc) { terminate_with_pending_sigstop(true); } ATF_TC(ptrace__parent_terminate_with_pending_sigstop2); ATF_TC_HEAD(ptrace__parent_terminate_with_pending_sigstop2, tc) { atf_tc_set_md_var(tc, "require.user", "root"); } ATF_TC_BODY(ptrace__parent_terminate_with_pending_sigstop2, tc) { terminate_with_pending_sigstop(false); } /* * Verify that after ptrace() discards a SIGKILL signal, the event mask * is not modified. */ ATF_TC_WITHOUT_HEAD(ptrace__event_mask_sigkill_discard); ATF_TC_BODY(ptrace__event_mask_sigkill_discard, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status, event_mask, new_event_mask; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); raise(SIGSTOP); exit(0); } /* The first wait() should report the stop from trace_me(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Set several unobtrusive event bits. */ event_mask = PTRACE_EXEC | PTRACE_FORK | PTRACE_LWP; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, wpid, (caddr_t)&event_mask, sizeof(event_mask)) == 0); /* Send a SIGKILL without using ptrace. */ ATF_REQUIRE(kill(fpid, SIGKILL) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should be due to the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGKILL); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGKILL); /* Continue the child ignoring the SIGKILL. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Check the current event mask. It should not have changed. */ new_event_mask = 0; ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, wpid, (caddr_t)&new_event_mask, sizeof(new_event_mask)) == 0); ATF_REQUIRE(event_mask == new_event_mask); /* Continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } static void * flock_thread(void *arg) { int fd; fd = *(int *)arg; (void)flock(fd, LOCK_EX); (void)flock(fd, LOCK_UN); return (NULL); } /* * Verify that PT_ATTACH will suspend threads sleeping in an SBDRY section. * We rely on the fact that the lockf implementation sets SBDRY before blocking * on a lock. This is a regression test for r318191. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_ATTACH_with_SBDRY_thread); ATF_TC_BODY(ptrace__PT_ATTACH_with_SBDRY_thread, tc) { pthread_barrier_t barrier; pthread_barrierattr_t battr; char tmpfile[64]; pid_t child, wpid; int error, fd, i, status; ATF_REQUIRE(pthread_barrierattr_init(&battr) == 0); ATF_REQUIRE(pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_SHARED) == 0); ATF_REQUIRE(pthread_barrier_init(&barrier, &battr, 2) == 0); (void)snprintf(tmpfile, sizeof(tmpfile), "./ptrace.XXXXXX"); fd = mkstemp(tmpfile); ATF_REQUIRE(fd >= 0); ATF_REQUIRE((child = fork()) != -1); if (child == 0) { pthread_t t[2]; int cfd; error = pthread_barrier_wait(&barrier); if (error != 0 && error != PTHREAD_BARRIER_SERIAL_THREAD) _exit(1); cfd = open(tmpfile, O_RDONLY); if (cfd < 0) _exit(1); /* * We want at least two threads blocked on the file lock since * the SIGSTOP from PT_ATTACH may kick one of them out of * sleep. */ if (pthread_create(&t[0], NULL, flock_thread, &cfd) != 0) _exit(1); if (pthread_create(&t[1], NULL, flock_thread, &cfd) != 0) _exit(1); if (pthread_join(t[0], NULL) != 0) _exit(1); if (pthread_join(t[1], NULL) != 0) _exit(1); _exit(0); } ATF_REQUIRE(flock(fd, LOCK_EX) == 0); error = pthread_barrier_wait(&barrier); ATF_REQUIRE(error == 0 || error == PTHREAD_BARRIER_SERIAL_THREAD); /* * Give the child some time to block. Is there a better way to do this? */ sleep(1); /* * Attach and give the child 3 seconds to stop. */ ATF_REQUIRE(ptrace(PT_ATTACH, child, NULL, 0) == 0); for (i = 0; i < 3; i++) { wpid = waitpid(child, &status, WNOHANG); if (wpid == child && WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP) break; sleep(1); } ATF_REQUIRE_MSG(i < 3, "failed to stop child process after PT_ATTACH"); ATF_REQUIRE(ptrace(PT_DETACH, child, NULL, 0) == 0); ATF_REQUIRE(flock(fd, LOCK_UN) == 0); ATF_REQUIRE(unlink(tmpfile) == 0); ATF_REQUIRE(close(fd) == 0); } static void sigusr1_step_handler(int sig) { CHILD_REQUIRE(sig == SIGUSR1); raise(SIGABRT); } /* * Verify that PT_STEP with a signal invokes the signal before * stepping the next instruction (and that the next instruction is * stepped correctly). */ ATF_TC_WITHOUT_HEAD(ptrace__PT_STEP_with_signal); ATF_TC_BODY(ptrace__PT_STEP_with_signal, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); signal(SIGUSR1, sigusr1_step_handler); raise(SIGABRT); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should report the SIGABRT in the child body. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* Step the child process inserting SIGUSR1. */ ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, SIGUSR1) == 0); /* The next stop should report the SIGABRT in the signal handler. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* Continue the child process discarding the signal. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should report a trace trap from PT_STEP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP); ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE); /* Continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #ifdef HAVE_BREAKPOINT /* * Verify that a SIGTRAP event with the TRAP_BRKPT code is reported * for a breakpoint trap. */ ATF_TC_WITHOUT_HEAD(ptrace__breakpoint_siginfo); ATF_TC_BODY(ptrace__breakpoint_siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); breakpoint(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report hitting the breakpoint. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP); ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_BRKPT); /* Kill the child process. */ ATF_REQUIRE(ptrace(PT_KILL, fpid, 0, 0) == 0); /* The last wait() should report the SIGKILL. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSIGNALED(status)); ATF_REQUIRE(WTERMSIG(status) == SIGKILL); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #endif /* HAVE_BREAKPOINT */ /* * Verify that a SIGTRAP event with the TRAP_TRACE code is reported * for a single-step trap from PT_STEP. */ ATF_TC_WITHOUT_HEAD(ptrace__step_siginfo); ATF_TC_BODY(ptrace__step_siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Step the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_STEP, fpid, (caddr_t)1, 0) == 0); /* The second wait() should report a single-step trap. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP); ATF_REQUIRE(pl.pl_siginfo.si_code == TRAP_TRACE); /* Continue the child process. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK) static void * continue_thread(void *arg __unused) { breakpoint(); return (NULL); } static __dead2 void continue_thread_main(void) { pthread_t threads[2]; CHILD_REQUIRE(pthread_create(&threads[0], NULL, continue_thread, NULL) == 0); CHILD_REQUIRE(pthread_create(&threads[1], NULL, continue_thread, NULL) == 0); CHILD_REQUIRE(pthread_join(threads[0], NULL) == 0); CHILD_REQUIRE(pthread_join(threads[1], NULL) == 0); exit(1); } /* * Ensure that PT_CONTINUE clears the status of the thread that * triggered the stop even if a different thread's LWP was passed to * PT_CONTINUE. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_CONTINUE_different_thread); ATF_TC_BODY(ptrace__PT_CONTINUE_different_thread, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; lwpid_t lwps[2]; bool hit_break[2]; struct reg reg; int i, j, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); continue_thread_main(); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(ptrace(PT_LWP_EVENTS, wpid, NULL, 1) == 0); /* Continue the child ignoring the SIGSTOP. */ ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* One of the new threads should report it's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); lwps[0] = pl.pl_lwpid; /* * Suspend this thread to ensure both threads are alive before * hitting the breakpoint. */ ATF_REQUIRE(ptrace(PT_SUSPEND, lwps[0], NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* Second thread should report it's birth. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & (PL_FLAG_BORN | PL_FLAG_SCX)) == (PL_FLAG_BORN | PL_FLAG_SCX)); ATF_REQUIRE(pl.pl_lwpid != lwps[0]); lwps[1] = pl.pl_lwpid; /* Resume both threads waiting for breakpoint events. */ hit_break[0] = hit_break[1] = false; ATF_REQUIRE(ptrace(PT_RESUME, lwps[0], NULL, 0) != -1); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* One thread should report a breakpoint. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP && pl.pl_siginfo.si_code == TRAP_BRKPT); if (pl.pl_lwpid == lwps[0]) i = 0; else i = 1; hit_break[i] = true; ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); SKIP_BREAK(®); ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); /* * Resume both threads but pass the other thread's LWPID to * PT_CONTINUE. */ ATF_REQUIRE(ptrace(PT_CONTINUE, lwps[i ^ 1], (caddr_t)1, 0) == 0); /* * Will now get two thread exit events and one more breakpoint * event. */ for (j = 0; j < 3; j++) { wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); if (pl.pl_lwpid == lwps[0]) i = 0; else i = 1; ATF_REQUIRE_MSG(lwps[i] != 0, "event for exited thread"); if (pl.pl_flags & PL_FLAG_EXITED) { ATF_REQUIRE_MSG(hit_break[i], "exited thread did not report breakpoint"); lwps[i] = 0; } else { ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) != 0); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGTRAP && pl.pl_siginfo.si_code == TRAP_BRKPT); ATF_REQUIRE_MSG(!hit_break[i], "double breakpoint event"); hit_break[i] = true; ATF_REQUIRE(ptrace(PT_GETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); SKIP_BREAK(®); ATF_REQUIRE(ptrace(PT_SETREGS, pl.pl_lwpid, (caddr_t)®, 0) != -1); } ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); } /* Both threads should have exited. */ ATF_REQUIRE(lwps[0] == 0); ATF_REQUIRE(lwps[1] == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } #endif /* * Verify that PT_LWPINFO doesn't return stale siginfo. */ ATF_TC_WITHOUT_HEAD(ptrace__PT_LWPINFO_stale_siginfo); ATF_TC_BODY(ptrace__PT_LWPINFO_stale_siginfo, tc) { struct ptrace_lwpinfo pl; pid_t fpid, wpid; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); raise(SIGABRT); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The next stop should report the SIGABRT in the child body. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGABRT); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SI); ATF_REQUIRE(pl.pl_siginfo.si_signo == SIGABRT); /* * Continue the process ignoring the signal, but enabling * syscall traps. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* * The next stop should report a system call entry from * exit(). PL_FLAGS_SI should not be set. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); ATF_REQUIRE((pl.pl_flags & PL_FLAG_SI) == 0); /* Disable syscall tracing and continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); events &= ~PTRACE_SYSCALL; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * A simple test of PT_GET_SC_ARGS and PT_GET_SC_RET. */ ATF_TC_WITHOUT_HEAD(ptrace__syscall_args); ATF_TC_BODY(ptrace__syscall_args, tc) { struct ptrace_lwpinfo pl; struct ptrace_sc_ret psr; pid_t fpid, wpid; register_t args[2]; int events, status; ATF_REQUIRE((fpid = fork()) != -1); if (fpid == 0) { trace_me(); kill(getpid(), 0); close(3); exit(1); } /* The first wait() should report the stop from SIGSTOP. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* * Continue the process ignoring the signal, but enabling * syscall traps. */ ATF_REQUIRE(ptrace(PT_SYSCALL, fpid, (caddr_t)1, 0) == 0); /* * The next stop should be the syscall entry from getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); ATF_REQUIRE(pl.pl_syscall_code == SYS_getpid); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next stop should be the syscall exit from getpid(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); ATF_REQUIRE(pl.pl_syscall_code == SYS_getpid); ATF_REQUIRE(ptrace(PT_GET_SC_RET, wpid, (caddr_t)&psr, sizeof(psr)) != -1); ATF_REQUIRE(psr.sr_error == 0); ATF_REQUIRE(psr.sr_retval[0] == wpid); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next stop should be the syscall entry from kill(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); ATF_REQUIRE(pl.pl_syscall_code == SYS_kill); ATF_REQUIRE(pl.pl_syscall_narg == 2); ATF_REQUIRE(ptrace(PT_GET_SC_ARGS, wpid, (caddr_t)args, sizeof(args)) != -1); ATF_REQUIRE(args[0] == wpid); ATF_REQUIRE(args[1] == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next stop should be the syscall exit from kill(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); ATF_REQUIRE(pl.pl_syscall_code == SYS_kill); ATF_REQUIRE(ptrace(PT_GET_SC_RET, wpid, (caddr_t)&psr, sizeof(psr)) != -1); ATF_REQUIRE(psr.sr_error == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next stop should be the syscall entry from close(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCE); ATF_REQUIRE(pl.pl_syscall_code == SYS_close); ATF_REQUIRE(pl.pl_syscall_narg == 1); ATF_REQUIRE(ptrace(PT_GET_SC_ARGS, wpid, (caddr_t)args, sizeof(args)) != -1); ATF_REQUIRE(args[0] == 3); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* * The next stop should be the syscall exit from close(). */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(wpid == fpid); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGTRAP); ATF_REQUIRE(ptrace(PT_LWPINFO, wpid, (caddr_t)&pl, sizeof(pl)) != -1); ATF_REQUIRE(pl.pl_flags & PL_FLAG_SCX); ATF_REQUIRE(pl.pl_syscall_code == SYS_close); ATF_REQUIRE(ptrace(PT_GET_SC_RET, wpid, (caddr_t)&psr, sizeof(psr)) != -1); ATF_REQUIRE(psr.sr_error == EBADF); /* Disable syscall tracing and continue the child to let it exit. */ ATF_REQUIRE(ptrace(PT_GET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); events &= ~PTRACE_SYSCALL; ATF_REQUIRE(ptrace(PT_SET_EVENT_MASK, fpid, (caddr_t)&events, sizeof(events)) == 0); ATF_REQUIRE(ptrace(PT_CONTINUE, fpid, (caddr_t)1, 0) == 0); /* The last event should be for the child process's exit. */ wpid = waitpid(fpid, &status, 0); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 1); wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Verify that when the process is traced that it isn't reparent * to the init process when we close all process descriptors. */ ATF_TC(ptrace__proc_reparent); ATF_TC_HEAD(ptrace__proc_reparent, tc) { atf_tc_set_md_var(tc, "timeout", "2"); } ATF_TC_BODY(ptrace__proc_reparent, tc) { pid_t traced, debuger, wpid; int pd, status; traced = pdfork(&pd, 0); ATF_REQUIRE(traced >= 0); if (traced == 0) { raise(SIGSTOP); exit(0); } ATF_REQUIRE(pd >= 0); debuger = fork(); ATF_REQUIRE(debuger >= 0); if (debuger == 0) { /* The traced process is reparented to debuger. */ ATF_REQUIRE(ptrace(PT_ATTACH, traced, 0, 0) == 0); wpid = waitpid(traced, &status, 0); ATF_REQUIRE(wpid == traced); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(close(pd) == 0); ATF_REQUIRE(ptrace(PT_DETACH, traced, (caddr_t)1, 0) == 0); /* We closed pd so we should not have any child. */ wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); exit(0); } ATF_REQUIRE(close(pd) == 0); wpid = waitpid(debuger, &status, 0); ATF_REQUIRE(wpid == debuger); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* Check if we still have any child. */ wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); } /* * Ensure that traced processes created with pdfork(2) are visible to * waitid(P_ALL). */ ATF_TC_WITHOUT_HEAD(ptrace__procdesc_wait_child); ATF_TC_BODY(ptrace__procdesc_wait_child, tc) { pid_t child, wpid; int pd, status; child = pdfork(&pd, 0); ATF_REQUIRE(child >= 0); if (child == 0) { trace_me(); (void)raise(SIGSTOP); exit(0); } wpid = waitpid(child, &status, 0); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); wpid = wait(&status); ATF_REQUIRE(wpid == child); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); ATF_REQUIRE(ptrace(PT_CONTINUE, child, (caddr_t)1, 0) != -1); /* * If process was created by pdfork, the return code have to * be collected through process descriptor. */ wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); ATF_REQUIRE(close(pd) != -1); } /* * Ensure that traced processes created with pdfork(2) are not visible * after returning to parent - waitid(P_ALL). */ ATF_TC_WITHOUT_HEAD(ptrace__procdesc_reparent_wait_child); ATF_TC_BODY(ptrace__procdesc_reparent_wait_child, tc) { pid_t traced, debuger, wpid; int pd, status; + if (atf_tc_get_config_var_as_bool_wd(tc, "ci", false)) + atf_tc_skip("https://bugs.freebsd.org/243605"); + traced = pdfork(&pd, 0); ATF_REQUIRE(traced >= 0); if (traced == 0) { raise(SIGSTOP); exit(0); } ATF_REQUIRE(pd >= 0); debuger = fork(); ATF_REQUIRE(debuger >= 0); if (debuger == 0) { /* The traced process is reparented to debuger. */ ATF_REQUIRE(ptrace(PT_ATTACH, traced, 0, 0) == 0); wpid = waitpid(traced, &status, 0); ATF_REQUIRE(wpid == traced); ATF_REQUIRE(WIFSTOPPED(status)); ATF_REQUIRE(WSTOPSIG(status) == SIGSTOP); /* Allow process to die. */ ATF_REQUIRE(ptrace(PT_CONTINUE, traced, (caddr_t)1, 0) == 0); wpid = waitpid(traced, &status, 0); ATF_REQUIRE(wpid == traced); ATF_REQUIRE(WIFEXITED(status)); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* Reparent back to the orginal process. */ ATF_REQUIRE(close(pd) == 0); exit(0); } wpid = waitpid(debuger, &status, 0); ATF_REQUIRE(wpid == debuger); ATF_REQUIRE(WEXITSTATUS(status) == 0); /* * We have a child but it has a process descriptori * so we should not be able to collect it process. */ wpid = wait(&status); ATF_REQUIRE(wpid == -1); ATF_REQUIRE(errno == ECHILD); ATF_REQUIRE(close(pd) == 0); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_trace_me); ATF_TP_ADD_TC(tp, ptrace__parent_wait_after_attach); ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_child_debugger); ATF_TP_ADD_TC(tp, ptrace__parent_sees_exit_after_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__parent_exits_before_child); ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached); ATF_TP_ADD_TC(tp, ptrace__follow_fork_child_detached); ATF_TP_ADD_TC(tp, ptrace__follow_fork_parent_detached); ATF_TP_ADD_TC(tp, ptrace__follow_fork_both_attached_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__follow_fork_child_detached_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__follow_fork_parent_detached_unrelated_debugger); ATF_TP_ADD_TC(tp, ptrace__getppid); ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_fork); ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_vfork); ATF_TP_ADD_TC(tp, ptrace__new_child_pl_syscall_code_thread); ATF_TP_ADD_TC(tp, ptrace__lwp_events); ATF_TP_ADD_TC(tp, ptrace__lwp_events_exec); ATF_TP_ADD_TC(tp, ptrace__siginfo); ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_disable); ATF_TP_ADD_TC(tp, ptrace__ptrace_exec_enable); ATF_TP_ADD_TC(tp, ptrace__event_mask); ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork); ATF_TP_ADD_TC(tp, ptrace__ptrace_vfork_follow); #ifdef HAVE_BREAKPOINT ATF_TP_ADD_TC(tp, ptrace__PT_KILL_breakpoint); #endif ATF_TP_ADD_TC(tp, ptrace__PT_KILL_system_call); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_threads); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_signal); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_competing_stop); ATF_TP_ADD_TC(tp, ptrace__PT_KILL_with_signal_full_sigqueue); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_system_call_entry); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_system_call_entry_and_exit); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_full_sigqueue); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_masked_full_sigqueue); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_change_sig); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigtrap_system_call_entry); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_mix); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_kqueue); ATF_TP_ADD_TC(tp, ptrace__killed_with_sigmask); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_sigmask); ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_with_signal_thread_sigmask); ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop1); ATF_TP_ADD_TC(tp, ptrace__parent_terminate_with_pending_sigstop2); ATF_TP_ADD_TC(tp, ptrace__event_mask_sigkill_discard); ATF_TP_ADD_TC(tp, ptrace__PT_ATTACH_with_SBDRY_thread); ATF_TP_ADD_TC(tp, ptrace__PT_STEP_with_signal); #ifdef HAVE_BREAKPOINT ATF_TP_ADD_TC(tp, ptrace__breakpoint_siginfo); #endif ATF_TP_ADD_TC(tp, ptrace__step_siginfo); #if defined(HAVE_BREAKPOINT) && defined(SKIP_BREAK) ATF_TP_ADD_TC(tp, ptrace__PT_CONTINUE_different_thread); #endif ATF_TP_ADD_TC(tp, ptrace__PT_LWPINFO_stale_siginfo); ATF_TP_ADD_TC(tp, ptrace__syscall_args); ATF_TP_ADD_TC(tp, ptrace__proc_reparent); ATF_TP_ADD_TC(tp, ptrace__procdesc_wait_child); ATF_TP_ADD_TC(tp, ptrace__procdesc_reparent_wait_child); return (atf_no_error()); } Index: projects/clang1000-import/tests/sys/netinet/Makefile =================================================================== --- projects/clang1000-import/tests/sys/netinet/Makefile (revision 357178) +++ projects/clang1000-import/tests/sys/netinet/Makefile (revision 357179) @@ -1,22 +1,24 @@ # $FreeBSD$ +PACKAGE= tests + TESTSDIR= ${TESTSBASE}/sys/netinet BINDIR= ${TESTSDIR} ATF_TESTS_C= ip_reass_test \ so_reuseport_lb_test \ socket_afinet ATF_TESTS_SH= fibs_test redirect PROGS= udp_dontroute tcp_user_cookie ${PACKAGE}FILES+= redirect.py ${PACKAGE}FILESMODE_redirect.py=0555 MAN= WARNS?= 6 .include Index: projects/clang1000-import/tools/build/Makefile =================================================================== --- projects/clang1000-import/tools/build/Makefile (revision 357178) +++ projects/clang1000-import/tools/build/Makefile (revision 357179) @@ -1,158 +1,176 @@ # $FreeBSD$ .PATH: ${.CURDIR}/../../include LIB= egacy SRC= -INCSGROUPS= INCS SYSINCS CASPERINC +INCSGROUPS= INCS SYSINCS CASPERINC UFSINCS FFSINCS MSDOSFSINCS DISKINCS INCS= SYSINCSDIR= ${INCLUDEDIR}/sys CASPERINCDIR= ${INCLUDEDIR}/casper +# Also add ufs/ffs/msdosfs/disk headers to allow building makefs as a bootstrap tool +UFSINCSDIR= ${INCLUDEDIR}/ufs/ufs +FFSINCSDIR= ${INCLUDEDIR}/ufs/ffs +MSDOSFSINCSDIR= ${INCLUDEDIR}/fs/msdosfs +DISKINCSDIR= ${INCLUDEDIR}/sys/disk BOOTSTRAPPING?= 0 _WITH_PWCACHEDB!= grep -c pwcache_groupdb /usr/include/grp.h || true .if ${_WITH_PWCACHEDB} == 0 .PATH: ${.CURDIR}/../../contrib/libc-pwcache CFLAGS+= -I${.CURDIR}/../../contrib/libc-pwcache \ -I${.CURDIR}/../../lib/libc/include SRCS+= pwcache.c .endif _WITH_STRSVIS!= grep -c strsvis /usr/include/vis.h || true .if ${_WITH_STRSVIS} == 0 .PATH: ${.CURDIR}/../../contrib/libc-vis SRCS+= vis.c CFLAGS+= -I${.CURDIR}/../../contrib/libc-vis \ -I${.CURDIR}/../../lib/libc/include .endif _WITH_REALLOCARRAY!= grep -c reallocarray /usr/include/stdlib.h || true .if ${_WITH_REALLOCARRAY} == 0 .PATH: ${.CURDIR}/../../lib/libc/stdlib INCS+= stdlib.h SRCS+= reallocarray.c CFLAGS+= -I${.CURDIR}/../../lib/libc/include .endif _WITH_UTIMENS!= grep -c utimensat /usr/include/sys/stat.h || true .if ${_WITH_UTIMENS} == 0 SYSINCS+= stat.h SRCS+= futimens.c utimensat.c .endif _WITH_EXPLICIT_BZERO!= grep -c explicit_bzero /usr/include/strings.h || true .if ${_WITH_EXPLICIT_BZERO} == 0 .PATH: ${SRCTOP}/sys/libkern INCS+= strings.h SRCS+= explicit_bzero.c .endif .if exists(/usr/include/capsicum_helpers.h) _WITH_CAPH_ENTER!= grep -c caph_enter /usr/include/capsicum_helpers.h || true _WITH_CAPH_RIGHTS_LIMIT!= grep -c caph_rights_limit /usr/include/capsicum_helpers.h || true .endif .if !defined(_WITH_CAPH_ENTER) || ${_WITH_CAPH_ENTER} == 0 || ${_WITH_CAPH_RIGHTS_LIMIT} == 0 .PATH: ${SRCTOP}/lib/libcapsicum INCS+= capsicum_helpers.h .PATH: ${SRCTOP}/lib/libcasper/libcasper INCS+= libcasper.h .endif CASPERINC+= ${SRCTOP}/lib/libcasper/services/cap_fileargs/cap_fileargs.h .if empty(SRCS) SRCS= dummy.c .endif .if defined(CROSS_BUILD_TESTING) SUBDIR= cross-build .endif + +# To allow bootstrapping makefs on FreeBSD 11 or non-FreeBSD systems: +UFSINCS+= ${SRCTOP}/sys/ufs/ufs/dinode.h +UFSINCS+= ${SRCTOP}/sys/ufs/ufs/dir.h +FFSINCS+= ${SRCTOP}/sys/ufs/ffs/fs.h + +MSDOSFSINCS+= ${SRCTOP}/sys/fs/msdosfs/bootsect.h +MSDOSFSINCS+= ${SRCTOP}/sys/fs/msdosfs/bpb.h +MSDOSFSINCS+= ${SRCTOP}/sys/fs/msdosfs/denode.h +MSDOSFSINCS+= ${SRCTOP}/sys/fs/msdosfs/direntry.h +MSDOSFSINCS+= ${SRCTOP}/sys/fs/msdosfs/fat.h +MSDOSFSINCS+= ${SRCTOP}/sys/fs/msdosfs/msdosfsmount.h +DISKINCS+= ${SRCTOP}/sys/sys/disk/bsd.h # Needed to build config (since it uses libnv) SYSINCS+= ${SRCTOP}/sys/sys/nv.h ${SRCTOP}/sys/sys/cnv.h \ ${SRCTOP}/sys/sys/dnv.h # We want to run the build with only ${WORLDTMP} in $PATH to ensure we don't # accidentally run tools that are incompatible but happen to be in $PATH. # This is especially important when building on Linux/MacOS where many of the # programs used during the build accept different flags or generate different # output. On those platforms we only symlink the tools known to be compatible # (e.g. basic utilities such as mkdir) into ${WORLDTMP} and build all others # from the FreeBSD sources during the bootstrap-tools stage. # basic commands: It is fine to use the host version for all of these even on # Linux/MacOS since we only use flags that are supported by all of them. _host_tools_to_symlink= basename bzip2 bunzip2 chmod chown cmp comm cp date \ dirname echo env false find fmt gzip gunzip head hostname id ln ls \ mkdir mv nice patch rm realpath sh sleep stat tee touch tr true uname \ uniq wc which # We also need a symlink to the absolute path to the make binary used for # the toplevel makefile. This is not necessarily the same as `which make` # since e.g. on Linux and MacOS that will be GNU make. _make_abs!= which "${MAKE}" _host_abs_tools_to_symlink= ${_make_abs}:make ${_make_abs}:bmake host-symlinks: @echo "Linking host tools into ${DESTDIR}/bin" .for _tool in ${_host_tools_to_symlink} @if [ ! -e "${DESTDIR}/bin/${_tool}" ]; then \ source_path=`which ${_tool}`; \ if [ ! -e "$${source_path}" ] ; then \ echo "Cannot find host tool '${_tool}'"; false; \ fi; \ ln -sfnv "$${source_path}" "${DESTDIR}/bin/${_tool}"; \ fi .endfor .for _tool in ${_host_abs_tools_to_symlink} @source_path="${_tool:S/:/ /:[1]}"; \ target_path="${DESTDIR}/bin/${_tool:S/:/ /:[2]}"; \ if [ ! -e "$${target_path}" ] ; then \ if [ ! -e "$${source_path}" ] ; then \ echo "Host tool '${src_path}' is missing"; false; \ fi; \ ln -sfnv "$${source_path}" "$${target_path}"; \ fi .endfor .if exists(/usr/libexec/flua) ln -sf /usr/libexec/flua ${DESTDIR}/usr/libexec/flua .endif # Create all the directories that are needed during the legacy, bootstrap-tools # and cross-tools stages. We do this here using mkdir since mtree may not exist # yet (this happens if we are crossbuilding from Linux/Mac). INSTALLDIR_LIST= \ bin \ lib/casper \ lib/geom \ usr/include/casper \ usr/include/private/zstd \ usr/lib \ usr/libexec installdirs: mkdir -p ${INSTALLDIR_LIST:S,^,${DESTDIR}/,} # Link usr/bin, sbin, and usr/sbin to bin so that it doesn't matter whether a # bootstrap tool was added to WORLTMP with a symlink or by building it in the # bootstrap-tools phase. We could also overrride BINDIR when building bootstrap # tools but adding the symlinks is easier and means all tools are also # in the directory that they are installed to normally. .for _dir in sbin usr/sbin usr/bin # delete existing directories from before r340157 @if [ ! -L ${DESTDIR}/${_dir} ]; then \ echo "removing old non-symlink ${DESTDIR}/${_dir}"; \ rm -rf "${DESTDIR}/${_dir}"; \ fi .endfor ln -sfn bin ${DESTDIR}/sbin ln -sfn ../bin ${DESTDIR}/usr/bin ln -sfn ../bin ${DESTDIR}/usr/sbin .for _group in ${INCSGROUPS:NINCS} mkdir -p "${DESTDIR}/${${_group}DIR}" .endfor .include Index: projects/clang1000-import/usr.bin/xohtml/xohtml.sh =================================================================== --- projects/clang1000-import/usr.bin/xohtml/xohtml.sh (revision 357178) +++ projects/clang1000-import/usr.bin/xohtml/xohtml.sh (revision 357179) @@ -1,87 +1,87 @@ #!/bin/sh # $FreeBSD$ #!/bin/sh # # Copyright (c) 2014, Juniper Networks, Inc. # All rights reserved. # This SOFTWARE is licensed under the LICENSE provided in the # ../Copyright file. By downloading, installing, copying, or otherwise # using the SOFTWARE, you agree to be bound by the terms of that # LICENSE. # Phil Shafer, July 2014 # BASE=/usr/share/libxo -VERSION=1.3.1 +VERSION=1.4.0 CMD=cat DONE= WEB=http://juniper.github.io/libxo/${VERSION}/xohtml do_help () { echo "xohtml: wrap libxo-enabled output in HTML" echo "Usage: xohtml [options] [command [arguments]]" echo "Valid options are:" echo " -b | --base " echo " -c | --command " echo " -f | --file " exit 1 } while [ -z "$DONE" -a ! -z "$1" ]; do case "$1" in -b|--base) shift; BASE="$1"; shift; ;; -c|--command) shift; CMD="$1"; shift; ;; -f|--file) shift; FILE="$1"; shift; exec > "$FILE"; ;; -w|--web) shift; BASE="${WEB}"; ;; -*) do_help ;; *) DONE=1; XX=$1; shift; CMD="$XX --libxo=html $@" ;; esac done if [ "$CMD" = "cat" -a -t 0 ]; then do_help fi echo '' echo '' echo '' echo '' echo '' echo '' echo '' echo '' echo '' echo '' echo '' $CMD echo '' echo '' exit 0 Index: projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_denode.c =================================================================== --- projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_denode.c (revision 357178) +++ projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_denode.c (revision 357179) @@ -1,382 +1,378 @@ /* $NetBSD: msdosfs_denode.c,v 1.7 2015/03/29 05:52:59 agc Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include __FBSDID("$FreeBSD$"); #include #include -#include #include #include #include #include #include #include "ffs/buf.h" #include #include #include #include #include #include "makefs.h" #include "msdos.h" /* * If deget() succeeds it returns with the gotten denode locked(). * * pmp - address of msdosfsmount structure of the filesystem containing * the denode of interest. The pm_dev field and the address of * the msdosfsmount structure are used. * dirclust - which cluster bp contains, if dirclust is 0 (root directory) * diroffset is relative to the beginning of the root directory, * otherwise it is cluster relative. * diroffset - offset past begin of cluster of denode we want * depp - returns the address of the gotten denode. */ int deget(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset, struct denode **depp) { int error; uint64_t inode; struct direntry *direntptr; struct denode *ldep; struct buf *bp; MSDOSFS_DPRINTF(("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n", pmp, dirclust, diroffset, depp)); /* * On FAT32 filesystems, root is a (more or less) normal * directory */ if (FAT32(pmp) && dirclust == MSDOSFSROOT) dirclust = pmp->pm_rootdirblk; inode = (uint64_t)pmp->pm_bpcluster * dirclust + diroffset; ldep = ecalloc(1, sizeof(*ldep)); ldep->de_vnode = NULL; ldep->de_flag = 0; ldep->de_dirclust = dirclust; ldep->de_diroffset = diroffset; ldep->de_inode = inode; ldep->de_pmp = pmp; ldep->de_refcnt = 1; fc_purge(ldep, 0); /* init the FAT cache for this denode */ /* * Copy the directory entry into the denode area of the vnode. */ if ((dirclust == MSDOSFSROOT || (FAT32(pmp) && dirclust == pmp->pm_rootdirblk)) && diroffset == MSDOSFSROOT_OFS) { /* * Directory entry for the root directory. There isn't one, * so we manufacture one. We should probably rummage * through the root directory and find a label entry (if it * exists), and then use the time and date from that entry * as the time and date for the root denode. */ ldep->de_vnode = (struct vnode *)-1; ldep->de_Attributes = ATTR_DIRECTORY; ldep->de_LowerCase = 0; if (FAT32(pmp)) ldep->de_StartCluster = pmp->pm_rootdirblk; /* de_FileSize will be filled in further down */ else { ldep->de_StartCluster = MSDOSFSROOT; ldep->de_FileSize = pmp->pm_rootdirsize * DEV_BSIZE; } /* * fill in time and date so that dos2unixtime() doesn't * spit up when called from msdosfs_getattr() with root * denode */ ldep->de_CHun = 0; ldep->de_CTime = 0x0000; /* 00:00:00 */ ldep->de_CDate = (0 << DD_YEAR_SHIFT) | (1 << DD_MONTH_SHIFT) | (1 << DD_DAY_SHIFT); /* Jan 1, 1980 */ ldep->de_ADate = ldep->de_CDate; ldep->de_MTime = ldep->de_CTime; ldep->de_MDate = ldep->de_CDate; /* leave the other fields as garbage */ } else { error = readep(pmp, dirclust, diroffset, &bp, &direntptr); if (error) { ldep->de_Name[0] = SLOT_DELETED; *depp = NULL; return (error); } (void)DE_INTERNALIZE(ldep, direntptr); brelse(bp); } /* * Fill in a few fields of the vnode and finish filling in the * denode. Then return the address of the found denode. */ if (ldep->de_Attributes & ATTR_DIRECTORY) { /* * Since DOS directory entries that describe directories * have 0 in the filesize field, we take this opportunity * to find out the length of the directory and plug it into * the denode structure. */ u_long size; /* * XXX it sometimes happens that the "." entry has cluster * number 0 when it shouldn't. Use the actual cluster number * instead of what is written in directory entry. */ if (diroffset == 0 && ldep->de_StartCluster != dirclust) { MSDOSFS_DPRINTF(("deget(): \".\" entry at clust %lu != %lu\n", dirclust, ldep->de_StartCluster)); ldep->de_StartCluster = dirclust; } if (ldep->de_StartCluster != MSDOSFSROOT) { error = pcbmap(ldep, 0xffff, 0, &size, 0); if (error == E2BIG) { ldep->de_FileSize = de_cn2off(pmp, size); error = 0; } else { MSDOSFS_DPRINTF(("deget(): pcbmap returned %d\n", error)); } } } *depp = ldep; return (0); } /* * Truncate the file described by dep to the length specified by length. */ int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred) { int error; int allerror; u_long eofentry; u_long chaintofree; daddr_t bn; int boff; int isadir = dep->de_Attributes & ATTR_DIRECTORY; struct buf *bp; struct msdosfsmount *pmp = dep->de_pmp; MSDOSFS_DPRINTF(("detrunc(): file %s, length %lu, flags %x\n", dep->de_Name, length, flags)); /* * Disallow attempts to truncate the root directory since it is of * fixed size. That's just the way dos filesystems are. We use * the VROOT bit in the vnode because checking for the directory * bit and a startcluster of 0 in the denode is not adequate to * recognize the root directory at this point in a file or * directory's life. */ if (dep->de_vnode != NULL && !FAT32(pmp)) { MSDOSFS_DPRINTF(("detrunc(): can't truncate root directory, " "clust %ld, offset %ld\n", dep->de_dirclust, dep->de_diroffset)); return (EINVAL); } if (dep->de_FileSize < length) return deextend(dep, length, cred); /* * If the desired length is 0 then remember the starting cluster of * the file and set the StartCluster field in the directory entry * to 0. If the desired length is not zero, then get the number of * the last cluster in the shortened file. Then get the number of * the first cluster in the part of the file that is to be freed. * Then set the next cluster pointer in the last cluster of the * file to CLUST_EOFE. */ if (length == 0) { chaintofree = dep->de_StartCluster; dep->de_StartCluster = 0; eofentry = ~0; } else { error = pcbmap(dep, de_clcount(pmp, length) - 1, 0, &eofentry, 0); if (error) { MSDOSFS_DPRINTF(("detrunc(): pcbmap fails %d\n", error)); return (error); } } fc_purge(dep, de_clcount(pmp, length)); /* * If the new length is not a multiple of the cluster size then we * must zero the tail end of the new last cluster in case it * becomes part of the file again because of a seek. */ if ((boff = length & pmp->pm_crbomask) != 0) { if (isadir) { bn = cntobn(pmp, eofentry); error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, &bp); if (error) { brelse(bp); MSDOSFS_DPRINTF(("detrunc(): bread fails %d\n", error)); return (error); } memset(bp->b_data + boff, 0, pmp->pm_bpcluster - boff); - if (flags & IO_SYNC) bwrite(bp); - else - bdwrite(bp); } } /* * Write out the updated directory entry. Even if the update fails * we free the trailing clusters. */ dep->de_FileSize = length; if (!isadir) dep->de_flag |= DE_UPDATE|DE_MODIFIED; MSDOSFS_DPRINTF(("detrunc(): allerror %d, eofentry %lu\n", allerror, eofentry)); /* * If we need to break the cluster chain for the file then do it * now. */ if (eofentry != ~0) { error = fatentry(FAT_GET_AND_SET, pmp, eofentry, &chaintofree, CLUST_EOFE); if (error) { MSDOSFS_DPRINTF(("detrunc(): fatentry errors %d\n", error)); return (error); } fc_setcache(dep, FC_LASTFC, de_cluster(pmp, length - 1), eofentry); } /* * Now free the clusters removed from the file because of the * truncation. */ if (chaintofree != 0 && !MSDOSFSEOF(pmp, chaintofree)) freeclusterchain(pmp, chaintofree); return (allerror); } /* * Extend the file described by dep to length specified by length. */ int deextend(struct denode *dep, u_long length, struct ucred *cred) { struct msdosfsmount *pmp = dep->de_pmp; u_long count; int error; /* * The root of a DOS filesystem cannot be extended. */ if (dep->de_vnode != NULL && !FAT32(pmp)) return (EINVAL); /* * Directories cannot be extended. */ if (dep->de_Attributes & ATTR_DIRECTORY) return (EISDIR); if (length <= dep->de_FileSize) return (E2BIG); /* * Compute the number of clusters to allocate. */ count = de_clcount(pmp, length) - de_clcount(pmp, dep->de_FileSize); if (count > 0) { if (count > pmp->pm_freeclustercount) return (ENOSPC); error = extendfile(dep, count, NULL, NULL, DE_CLEAR); if (error) { /* truncate the added clusters away again */ (void) detrunc(dep, dep->de_FileSize, 0, cred); return (error); } } /* * Zero extend file range; ubc_zerorange() uses ubc_alloc() and a * memset(); we set the write size so ubc won't read in file data that * is zero'd later. */ dep->de_FileSize = length; dep->de_flag |= DE_UPDATE | DE_MODIFIED; return 0; } Index: projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_vnops.c =================================================================== --- projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_vnops.c (revision 357178) +++ projects/clang1000-import/usr.sbin/makefs/msdos/msdosfs_vnops.c (revision 357179) @@ -1,641 +1,645 @@ /* $NetBSD: msdosfs_vnops.c,v 1.19 2017/04/13 17:10:12 christos Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "ffs/buf.h" #include #include "msdos/direntry.h" #include #include #include #include "makefs.h" #include "msdos.h" /* * Some general notes: * * In the ufs filesystem the inodes, superblocks, and indirect blocks are * read/written using the vnode for the filesystem. Blocks that represent * the contents of a file are read/written using the vnode for the file * (including directories when they are read/written as files). This * presents problems for the dos filesystem because data that should be in * an inode (if dos had them) resides in the directory itself. Since we * must update directory entries without the benefit of having the vnode * for the directory we must use the vnode for the filesystem. This means * that when a directory is actually read/written (via read, write, or * readdir, or seek) we must use the vnode for the filesystem instead of * the vnode for the directory as would happen in ufs. This is to insure we * retrieve the correct block from the buffer cache since the hash value is * based upon the vnode address and the desired block number. */ static int msdosfs_wfile(const char *, struct denode *, fsnode *); static void unix2fattime(const struct timespec *tsp, uint16_t *ddp, uint16_t *dtp); static void msdosfs_times(struct denode *dep, const struct stat *st) { if (stampst.st_ino) st = &stampst; +#ifdef HAVE_STRUCT_STAT_BIRTHTIME unix2fattime(&st->st_birthtim, &dep->de_CDate, &dep->de_CTime); +#else + unix2fattime(&st->st_ctim, &dep->de_CDate, &dep->de_CTime); +#endif unix2fattime(&st->st_atim, &dep->de_ADate, NULL); unix2fattime(&st->st_mtim, &dep->de_MDate, &dep->de_MTime); } static void unix2fattime(const struct timespec *tsp, uint16_t *ddp, uint16_t *dtp) { time_t t1; struct tm lt = {0}; t1 = tsp->tv_sec; localtime_r(&t1, <); unsigned long fat_time = ((lt.tm_year - 80) << 25) | ((lt.tm_mon + 1) << 21) | (lt.tm_mday << 16) | (lt.tm_hour << 11) | (lt.tm_min << 5) | (lt.tm_sec >> 1); if (ddp != NULL) *ddp = (uint16_t)(fat_time >> 16); if (dtp != NULL) *dtp = (uint16_t)fat_time; } /* * When we search a directory the blocks containing directory entries are * read and examined. The directory entries contain information that would * normally be in the inode of a unix filesystem. This means that some of * a directory's contents may also be in memory resident denodes (sort of * an inode). This can cause problems if we are searching while some other * process is modifying a directory. To prevent one process from accessing * incompletely modified directory information we depend upon being the * sole owner of a directory block. bread/brelse provide this service. * This being the case, when a process modifies a directory it must first * acquire the disk block that contains the directory entry to be modified. * Then update the disk block and the denode, and then write the disk block * out to disk. This way disk blocks containing directory entries and in * memory denode's will be in synch. */ static int msdosfs_findslot(struct denode *dp, struct componentname *cnp) { daddr_t bn; int error; int slotcount; int slotoffset = 0; int frcn; u_long cluster; int blkoff; u_int diroff; int blsize; struct msdosfsmount *pmp; struct buf *bp = 0; struct direntry *dep; u_char dosfilename[12]; int wincnt = 1; int chksum = -1, chksum_ok; int olddos = 1; pmp = dp->de_pmp; switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename, cnp->cn_namelen, 0)) { case 0: return (EINVAL); case 1: break; case 2: wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, cnp->cn_namelen) + 1; break; case 3: olddos = 0; wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, cnp->cn_namelen) + 1; break; } if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) wincnt = 1; /* * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slotcount = 0; MSDOSFS_DPRINTF(("%s(): dos filename: %s\n", __func__, dosfilename)); /* * Search the directory pointed at by vdp for the name pointed at * by cnp->cn_nameptr. */ /* * The outer loop ranges over the clusters that make up the * directory. Note that the root directory is different from all * other directories. It has a fixed number of blocks that are not * part of the pool of allocatable clusters. So, we treat it a * little differently. The root directory starts at "cluster" 0. */ diroff = 0; for (frcn = 0; diroff < dp->de_FileSize; frcn++) { if ((error = pcbmap(dp, frcn, &bn, &cluster, &blsize)) != 0) { if (error == E2BIG) break; return (error); } error = bread(pmp->pm_devvp, bn, blsize, 0, &bp); if (error) { return (error); } for (blkoff = 0; blkoff < blsize; blkoff += sizeof(struct direntry), diroff += sizeof(struct direntry)) { dep = (struct direntry *)(bp->b_data + blkoff); /* * If the slot is empty and we are still looking * for an empty then remember this one. If the * slot is not empty then check to see if it * matches what we are looking for. If the slot * has never been filled with anything, then the * remainder of the directory has never been used, * so there is no point in searching it. */ if (dep->deName[0] == SLOT_EMPTY || dep->deName[0] == SLOT_DELETED) { /* * Drop memory of previous long matches */ chksum = -1; if (slotcount < wincnt) { slotcount++; slotoffset = diroff; } if (dep->deName[0] == SLOT_EMPTY) { brelse(bp); goto notfound; } } else { /* * If there wasn't enough space for our * winentries, forget about the empty space */ if (slotcount < wincnt) slotcount = 0; /* * Check for Win95 long filename entry */ if (dep->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; chksum = winChkName( (const u_char *)cnp->cn_nameptr, cnp->cn_namelen, (struct winentry *)dep, chksum); continue; } /* * Ignore volume labels (anywhere, not just * the root directory). */ if (dep->deAttributes & ATTR_VOLUME) { chksum = -1; continue; } /* * Check for a checksum or name match */ chksum_ok = (chksum == winChksum(dep->deName)); if (!chksum_ok && (!olddos || memcmp(dosfilename, dep->deName, 11))) { chksum = -1; continue; } MSDOSFS_DPRINTF(("%s(): match blkoff %d, diroff %u\n", __func__, blkoff, diroff)); /* * Remember where this directory * entry came from for whoever did * this lookup. */ dp->de_fndoffset = diroff; dp->de_fndcnt = 0; return EEXIST; } } /* for (blkoff = 0; .... */ /* * Release the buffer holding the directory cluster just * searched. */ brelse(bp); } /* for (frcn = 0; ; frcn++) */ notfound: /* * We hold no disk buffers at this point. */ /* * If we get here we didn't find the entry we were looking for. But * that's ok if we are creating or renaming and are at the end of * the pathname and the directory hasn't been removed. */ MSDOSFS_DPRINTF(("%s(): refcnt %ld, slotcount %d, slotoffset %d\n", __func__, dp->de_refcnt, slotcount, slotoffset)); /* * Fixup the slot description to point to the place where * we might put the new DOS direntry (putting the Win95 * long name entries before that) */ if (!slotcount) { slotcount = 1; slotoffset = diroff; } if (wincnt > slotcount) { slotoffset += sizeof(struct direntry) * (wincnt - slotcount); } /* * Return an indication of where the new directory * entry should be put. */ dp->de_fndoffset = slotoffset; dp->de_fndcnt = wincnt - 1; /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * * NB - if the directory is unlocked, then this * information cannot be used. */ return 0; } /* * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. */ struct denode * msdosfs_mkfile(const char *path, struct denode *pdep, fsnode *node) { struct componentname cn; struct denode ndirent; struct denode *dep; int error; struct stat *st = &node->inode->st; cn.cn_nameptr = node->name; cn.cn_namelen = strlen(node->name); MSDOSFS_DPRINTF(("%s(name %s, mode 0%o size %zu)\n", __func__, node->name, st->st_mode, (size_t)st->st_size)); /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad; } /* * Create a directory entry for the file, then call createde() to * have it installed. NOTE: DOS files are always executable. We * use the absence of the owner write bit to make the file * readonly. */ memset(&ndirent, 0, sizeof(ndirent)); if ((error = uniqdosname(pdep, &cn, ndirent.de_Name)) != 0) goto bad; ndirent.de_Attributes = (st->st_mode & S_IWUSR) ? ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY; ndirent.de_StartCluster = 0; ndirent.de_FileSize = 0; ndirent.de_pmp = pdep->de_pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; msdosfs_times(&ndirent, &node->inode->st); if ((error = msdosfs_findslot(pdep, &cn)) != 0) goto bad; if ((error = createde(&ndirent, pdep, &dep, &cn)) != 0) goto bad; if ((error = msdosfs_wfile(path, dep, node)) != 0) goto bad; return dep; bad: errno = error; return NULL; } static int msdosfs_updatede(struct denode *dep) { struct buf *bp; struct direntry *dirp; int error; dep->de_flag &= ~DE_MODIFIED; error = readde(dep, &bp, &dirp); if (error) return error; DE_EXTERNALIZE(dirp, dep); error = bwrite(bp); return error; } /* * Write data to a file or directory. */ static int msdosfs_wfile(const char *path, struct denode *dep, fsnode *node) { int error, fd; size_t osize = dep->de_FileSize; struct stat *st = &node->inode->st; size_t nsize, offs; struct msdosfsmount *pmp = dep->de_pmp; struct buf *bp; char *dat; u_long cn = 0; error = 0; /* XXX: gcc/vax */ MSDOSFS_DPRINTF(("%s(diroff %lu, dirclust %lu, startcluster %lu)\n", __func__, dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster)); if (st->st_size == 0) return 0; /* Don't bother to try to write files larger than the fs limit */ if (st->st_size > MSDOSFS_FILESIZE_MAX) return EFBIG; nsize = st->st_size; MSDOSFS_DPRINTF(("%s(nsize=%zu, osize=%zu)\n", __func__, nsize, osize)); if (nsize > osize) { if ((error = deextend(dep, nsize, NULL)) != 0) return error; if ((error = msdosfs_updatede(dep)) != 0) return error; } if ((fd = open(path, O_RDONLY)) == -1) { error = errno; MSDOSFS_DPRINTF(("open %s: %s", path, strerror(error))); return error; } if ((dat = mmap(0, nsize, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0)) == MAP_FAILED) { error = errno; MSDOSFS_DPRINTF(("%s: mmap %s: %s", __func__, node->name, strerror(error))); close(fd); goto out; } close(fd); for (offs = 0; offs < nsize;) { int blsize, cpsize; daddr_t bn; u_long on = offs & pmp->pm_crbomask; if ((error = pcbmap(dep, cn++, &bn, NULL, &blsize)) != 0) { MSDOSFS_DPRINTF(("%s: pcbmap %lu", __func__, (unsigned long)bn)); goto out; } MSDOSFS_DPRINTF(("%s(cn=%lu, bn=%llu, blsize=%d)\n", __func__, cn, (unsigned long long)bn, blsize)); if ((error = bread(pmp->pm_devvp, bn, blsize, 0, &bp)) != 0) { MSDOSFS_DPRINTF(("bread %d\n", error)); goto out; } cpsize = MIN((nsize - offs), blsize - on); memcpy(bp->b_data + on, dat + offs, cpsize); bwrite(bp); offs += cpsize; } munmap(dat, nsize); return 0; out: munmap(dat, nsize); return error; } static const struct { struct direntry dot; struct direntry dotdot; } dosdirtemplate = { { ". ", /* the . entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ }, { ".. ", /* the .. entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ } }; struct denode * msdosfs_mkdire(const char *path, struct denode *pdep, fsnode *node) { struct denode ndirent; struct denode *dep; struct componentname cn; struct msdosfsmount *pmp = pdep->de_pmp; int error; u_long newcluster, pcl, bn; struct direntry *denp; struct buf *bp; cn.cn_nameptr = node->name; cn.cn_namelen = strlen(node->name); /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad2; } /* * Allocate a cluster to hold the about to be created directory. */ error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL); if (error) goto bad2; memset(&ndirent, 0, sizeof(ndirent)); ndirent.de_pmp = pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; msdosfs_times(&ndirent, &node->inode->st); /* * Now fill the cluster with the "." and ".." entries. And write * the cluster to disk. This way it is there for the parent * directory to be pointing at if there were a crash. */ bn = cntobn(pmp, newcluster); MSDOSFS_DPRINTF(("%s(newcluster %lu, bn=%lu)\n", __func__, newcluster, bn)); /* always succeeds */ bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0); memset(bp->b_data, 0, pmp->pm_bpcluster); memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate); denp = (struct direntry *)bp->b_data; putushort(denp[0].deStartCluster, newcluster); putushort(denp[0].deCDate, ndirent.de_CDate); putushort(denp[0].deCTime, ndirent.de_CTime); denp[0].deCHundredth = ndirent.de_CHun; putushort(denp[0].deADate, ndirent.de_ADate); putushort(denp[0].deMDate, ndirent.de_MDate); putushort(denp[0].deMTime, ndirent.de_MTime); pcl = pdep->de_StartCluster; MSDOSFS_DPRINTF(("%s(pcl %lu, rootdirblk=%lu)\n", __func__, pcl, pmp->pm_rootdirblk)); if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = 0; putushort(denp[1].deStartCluster, pcl); putushort(denp[1].deCDate, ndirent.de_CDate); putushort(denp[1].deCTime, ndirent.de_CTime); denp[1].deCHundredth = ndirent.de_CHun; putushort(denp[1].deADate, ndirent.de_ADate); putushort(denp[1].deMDate, ndirent.de_MDate); putushort(denp[1].deMTime, ndirent.de_MTime); if (FAT32(pmp)) { putushort(denp[0].deHighClust, newcluster >> 16); putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16); } else { putushort(denp[0].deHighClust, 0); putushort(denp[1].deHighClust, 0); } if ((error = bwrite(bp)) != 0) goto bad; /* * Now build up a directory entry pointing to the newly allocated * cluster. This will be written to an empty slot in the parent * directory. */ if ((error = uniqdosname(pdep, &cn, ndirent.de_Name)) != 0) goto bad; ndirent.de_Attributes = ATTR_DIRECTORY; ndirent.de_StartCluster = newcluster; ndirent.de_FileSize = 0; ndirent.de_pmp = pdep->de_pmp; if ((error = msdosfs_findslot(pdep, &cn)) != 0) goto bad; if ((error = createde(&ndirent, pdep, &dep, &cn)) != 0) goto bad; if ((error = msdosfs_updatede(dep)) != 0) goto bad; return dep; bad: clusterfree(pmp, newcluster, NULL); bad2: errno = error; return NULL; } Index: projects/clang1000-import =================================================================== --- projects/clang1000-import (revision 357178) +++ projects/clang1000-import (revision 357179) Property changes on: projects/clang1000-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r357119-357178