diff --git a/bin/sh/exec.c b/bin/sh/exec.c index 7d18c3cfc62d..ab86b7639c9a 100644 --- a/bin/sh/exec.c +++ b/bin/sh/exec.c @@ -1,770 +1,772 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)exec.c 8.4 (Berkeley) 6/8/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* * When commands are first encountered, they are entered in a hash table. * This ensures that a full path search will not have to be done for them * on each invocation. * * We should investigate converting to a linear search, even though that * would make the command name "hash" a misnomer. */ #include "shell.h" #include "main.h" #include "nodes.h" #include "parser.h" #include "redir.h" #include "eval.h" #include "exec.h" #include "builtins.h" #include "var.h" #include "options.h" #include "input.h" #include "output.h" #include "syntax.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "show.h" #include "jobs.h" #include "alias.h" #define CMDTABLESIZE 31 /* should be prime */ struct tblentry { struct tblentry *next; /* next entry in hash chain */ union param param; /* definition of builtin function */ int special; /* flag for special builtin commands */ signed char cmdtype; /* index identifying command */ char cmdname[]; /* name of command */ }; static struct tblentry *cmdtable[CMDTABLESIZE]; static int cmdtable_cd = 0; /* cmdtable contains cd-dependent entries */ int exerrno = 0; /* Last exec error */ static void tryexec(char *, char **, char **); static void printentry(struct tblentry *, int); static struct tblentry *cmdlookup(const char *, int); static void delete_cmd_entry(void); static void addcmdentry(const char *, struct cmdentry *); /* * Exec a program. Never returns. If you change this routine, you may * have to change the find_command routine as well. * * The argv array may be changed and element argv[-1] should be writable. */ void shellexec(char **argv, char **envp, const char *path, int idx) { char *cmdname; int e; if (strchr(argv[0], '/') != NULL) { tryexec(argv[0], argv, envp); e = errno; } else { e = ENOENT; while ((cmdname = padvance(&path, argv[0])) != NULL) { if (--idx < 0 && pathopt == NULL) { tryexec(cmdname, argv, envp); if (errno != ENOENT && errno != ENOTDIR) e = errno; if (e == ENOEXEC) break; } stunalloc(cmdname); } } /* Map to POSIX errors */ if (e == ENOENT || e == ENOTDIR) { exerrno = 127; exerror(EXEXEC, "%s: not found", argv[0]); } else { exerrno = 126; exerror(EXEXEC, "%s: %s", argv[0], strerror(e)); } } static void tryexec(char *cmd, char **argv, char **envp) { int e, in; ssize_t n; char buf[256]; execve(cmd, argv, envp); e = errno; if (e == ENOEXEC) { INTOFF; in = open(cmd, O_RDONLY | O_NONBLOCK); if (in != -1) { n = pread(in, buf, sizeof buf, 0); close(in); if (n > 0 && memchr(buf, '\0', n) != NULL) { errno = ENOEXEC; return; } } *argv = cmd; *--argv = __DECONST(char *, _PATH_BSHELL); execve(_PATH_BSHELL, argv, envp); } errno = e; } /* * Do a path search. The variable path (passed by reference) should be * set to the start of the path before the first call; padvance will update * this value as it proceeds. Successive calls to padvance will return * the possible path expansions in sequence. If an option (indicated by * a percent sign) appears in the path entry then the global variable * pathopt will be set to point to it; otherwise pathopt will be set to * NULL. */ const char *pathopt; char * padvance(const char **path, const char *name) { const char *p, *start; char *q; size_t len, namelen; if (*path == NULL) return NULL; start = *path; for (p = start; *p && *p != ':' && *p != '%'; p++) ; /* nothing */ namelen = strlen(name); len = p - start + namelen + 2; /* "2" is for '/' and '\0' */ STARTSTACKSTR(q); CHECKSTRSPACE(len, q); if (p != start) { memcpy(q, start, p - start); q += p - start; *q++ = '/'; } memcpy(q, name, namelen + 1); pathopt = NULL; if (*p == '%') { pathopt = ++p; while (*p && *p != ':') p++; } if (*p == ':') *path = p + 1; else *path = NULL; return stalloc(len); } /*** Command hashing code ***/ int hashcmd(int argc __unused, char **argv __unused) { struct tblentry **pp; struct tblentry *cmdp; int c; int verbose; struct cmdentry entry; char *name; int errors; errors = 0; verbose = 0; while ((c = nextopt("rv")) != '\0') { if (c == 'r') { clearcmdentry(); } else if (c == 'v') { verbose++; } } if (*argptr == NULL) { for (pp = cmdtable ; pp < &cmdtable[CMDTABLESIZE] ; pp++) { for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) { if (cmdp->cmdtype == CMDNORMAL) printentry(cmdp, verbose); } } return 0; } while ((name = *argptr) != NULL) { if ((cmdp = cmdlookup(name, 0)) != NULL && cmdp->cmdtype == CMDNORMAL) delete_cmd_entry(); find_command(name, &entry, DO_ERR, pathval()); if (entry.cmdtype == CMDUNKNOWN) errors = 1; else if (verbose) { cmdp = cmdlookup(name, 0); if (cmdp != NULL) printentry(cmdp, verbose); else { outfmt(out2, "%s: not found\n", name); errors = 1; } flushall(); } argptr++; } return errors; } static void printentry(struct tblentry *cmdp, int verbose) { int idx; const char *path; char *name; if (cmdp->cmdtype == CMDNORMAL) { idx = cmdp->param.index; path = pathval(); do { name = padvance(&path, cmdp->cmdname); stunalloc(name); } while (--idx >= 0); out1str(name); } else if (cmdp->cmdtype == CMDBUILTIN) { out1fmt("builtin %s", cmdp->cmdname); } else if (cmdp->cmdtype == CMDFUNCTION) { out1fmt("function %s", cmdp->cmdname); if (verbose) { INTOFF; name = commandtext(getfuncnode(cmdp->param.func)); out1c(' '); out1str(name); ckfree(name); INTON; } #ifdef DEBUG } else { error("internal error: cmdtype %d", cmdp->cmdtype); #endif } out1c('\n'); } /* * Resolve a command name. If you change this routine, you may have to * change the shellexec routine as well. */ void find_command(const char *name, struct cmdentry *entry, int act, const char *path) { struct tblentry *cmdp, loc_cmd; int idx; char *fullname; struct stat statb; int e; int i; int spec; int cd; /* If name contains a slash, don't use the hash table */ if (strchr(name, '/') != NULL) { entry->cmdtype = CMDNORMAL; entry->u.index = 0; return; } cd = 0; /* If name is in the table, and not invalidated by cd, we're done */ if ((cmdp = cmdlookup(name, 0)) != NULL) { if (cmdp->cmdtype == CMDFUNCTION && act & DO_NOFUNC) cmdp = NULL; else goto success; } /* Check for builtin next */ if ((i = find_builtin(name, &spec)) >= 0) { INTOFF; cmdp = cmdlookup(name, 1); if (cmdp->cmdtype == CMDFUNCTION) cmdp = &loc_cmd; cmdp->cmdtype = CMDBUILTIN; cmdp->param.index = i; cmdp->special = spec; INTON; goto success; } /* We have to search path. */ e = ENOENT; idx = -1; for (;(fullname = padvance(&path, name)) != NULL; stunalloc(fullname)) { idx++; if (pathopt) { if (strncmp(pathopt, "func", 4) == 0) { /* handled below */ } else { continue; /* ignore unimplemented options */ } } if (fullname[0] != '/') cd = 1; if (stat(fullname, &statb) < 0) { if (errno != ENOENT && errno != ENOTDIR) e = errno; continue; } e = EACCES; /* if we fail, this will be the error */ if (!S_ISREG(statb.st_mode)) continue; if (pathopt) { /* this is a %func directory */ readcmdfile(fullname); if ((cmdp = cmdlookup(name, 0)) == NULL || cmdp->cmdtype != CMDFUNCTION) error("%s not defined in %s", name, fullname); stunalloc(fullname); goto success; } #ifdef notdef if (statb.st_uid == geteuid()) { if ((statb.st_mode & 0100) == 0) goto loop; } else if (statb.st_gid == getegid()) { if ((statb.st_mode & 010) == 0) goto loop; } else { if ((statb.st_mode & 01) == 0) goto loop; } #endif TRACE(("searchexec \"%s\" returns \"%s\"\n", name, fullname)); INTOFF; stunalloc(fullname); cmdp = cmdlookup(name, 1); if (cmdp->cmdtype == CMDFUNCTION) cmdp = &loc_cmd; cmdp->cmdtype = CMDNORMAL; cmdp->param.index = idx; INTON; goto success; } if (act & DO_ERR) { if (e == ENOENT || e == ENOTDIR) outfmt(out2, "%s: not found\n", name); else outfmt(out2, "%s: %s\n", name, strerror(e)); } entry->cmdtype = CMDUNKNOWN; entry->u.index = 0; return; success: if (cd) cmdtable_cd = 1; entry->cmdtype = cmdp->cmdtype; entry->u = cmdp->param; entry->special = cmdp->special; } /* * Search the table of builtin commands. */ int find_builtin(const char *name, int *special) { - const struct builtincmd *bp; + const unsigned char *bp; + size_t len; - for (bp = builtincmd ; bp->name ; bp++) { - if (*bp->name == *name && equal(bp->name, name)) { - *special = bp->special; - return bp->code; + len = strlen(name); + for (bp = builtincmd ; *bp ; bp += 2 + bp[0]) { + if (bp[0] == len && memcmp(bp + 2, name, len) == 0) { + *special = (bp[1] & BUILTIN_SPECIAL) != 0; + return bp[1] & ~BUILTIN_SPECIAL; } } return -1; } /* * Called when a cd is done. If any entry in cmdtable depends on the current * directory, simply clear cmdtable completely. */ void hashcd(void) { if (cmdtable_cd) clearcmdentry(); } /* * Called before PATH is changed. The argument is the new value of PATH; * pathval() still returns the old value at this point. Called with * interrupts off. */ void changepath(const char *newval __unused) { clearcmdentry(); } /* * Clear out command entries. The argument specifies the first entry in * PATH which has changed. */ void clearcmdentry(void) { struct tblentry **tblp; struct tblentry **pp; struct tblentry *cmdp; INTOFF; for (tblp = cmdtable ; tblp < &cmdtable[CMDTABLESIZE] ; tblp++) { pp = tblp; while ((cmdp = *pp) != NULL) { if (cmdp->cmdtype == CMDNORMAL) { *pp = cmdp->next; ckfree(cmdp); } else { pp = &cmdp->next; } } } cmdtable_cd = 0; INTON; } /* * Locate a command in the command hash table. If "add" is nonzero, * add the command to the table if it is not already present. The * variable "lastcmdentry" is set to point to the address of the link * pointing to the entry, so that delete_cmd_entry can delete the * entry. */ static struct tblentry **lastcmdentry; static struct tblentry * cmdlookup(const char *name, int add) { unsigned int hashval; const char *p; struct tblentry *cmdp; struct tblentry **pp; size_t len; p = name; hashval = (unsigned char)*p << 4; while (*p) hashval += *p++; pp = &cmdtable[hashval % CMDTABLESIZE]; for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) { if (equal(cmdp->cmdname, name)) break; pp = &cmdp->next; } if (add && cmdp == NULL) { INTOFF; len = strlen(name); cmdp = *pp = ckmalloc(sizeof (struct tblentry) + len + 1); cmdp->next = NULL; cmdp->cmdtype = CMDUNKNOWN; memcpy(cmdp->cmdname, name, len + 1); INTON; } lastcmdentry = pp; return cmdp; } /* * Delete the command entry returned on the last lookup. */ static void delete_cmd_entry(void) { struct tblentry *cmdp; INTOFF; cmdp = *lastcmdentry; *lastcmdentry = cmdp->next; ckfree(cmdp); INTON; } /* * Add a new command entry, replacing any existing command entry for * the same name. */ static void addcmdentry(const char *name, struct cmdentry *entry) { struct tblentry *cmdp; INTOFF; cmdp = cmdlookup(name, 1); if (cmdp->cmdtype == CMDFUNCTION) { unreffunc(cmdp->param.func); } cmdp->cmdtype = entry->cmdtype; cmdp->param = entry->u; INTON; } /* * Define a shell function. */ void defun(const char *name, union node *func) { struct cmdentry entry; INTOFF; entry.cmdtype = CMDFUNCTION; entry.u.func = copyfunc(func); addcmdentry(name, &entry); INTON; } /* * Delete a function if it exists. * Called with interrupts off. */ int unsetfunc(const char *name) { struct tblentry *cmdp; if ((cmdp = cmdlookup(name, 0)) != NULL && cmdp->cmdtype == CMDFUNCTION) { unreffunc(cmdp->param.func); delete_cmd_entry(); return (0); } return (0); } /* * Check if a function by a certain name exists. */ int isfunc(const char *name) { struct tblentry *cmdp; cmdp = cmdlookup(name, 0); return (cmdp != NULL && cmdp->cmdtype == CMDFUNCTION); } /* * Shared code for the following builtin commands: * type, command -v, command -V */ int typecmd_impl(int argc, char **argv, int cmd, const char *path) { struct cmdentry entry; struct tblentry *cmdp; const char *const *pp; struct alias *ap; int i; int error1 = 0; if (path != pathval()) clearcmdentry(); for (i = 1; i < argc; i++) { /* First look at the keywords */ for (pp = parsekwd; *pp; pp++) if (**pp == *argv[i] && equal(*pp, argv[i])) break; if (*pp) { if (cmd == TYPECMD_SMALLV) out1fmt("%s\n", argv[i]); else out1fmt("%s is a shell keyword\n", argv[i]); continue; } /* Then look at the aliases */ if ((ap = lookupalias(argv[i], 1)) != NULL) { if (cmd == TYPECMD_SMALLV) { out1fmt("alias %s=", argv[i]); out1qstr(ap->val); outcslow('\n', out1); } else out1fmt("%s is an alias for %s\n", argv[i], ap->val); continue; } /* Then check if it is a tracked alias */ if ((cmdp = cmdlookup(argv[i], 0)) != NULL) { entry.cmdtype = cmdp->cmdtype; entry.u = cmdp->param; entry.special = cmdp->special; } else { /* Finally use brute force */ find_command(argv[i], &entry, 0, path); } switch (entry.cmdtype) { case CMDNORMAL: { if (strchr(argv[i], '/') == NULL) { const char *path2 = path; char *name; int j = entry.u.index; do { name = padvance(&path2, argv[i]); stunalloc(name); } while (--j >= 0); if (cmd == TYPECMD_SMALLV) out1fmt("%s\n", name); else out1fmt("%s is%s %s\n", argv[i], (cmdp && cmd == TYPECMD_TYPE) ? " a tracked alias for" : "", name); } else { if (eaccess(argv[i], X_OK) == 0) { if (cmd == TYPECMD_SMALLV) out1fmt("%s\n", argv[i]); else out1fmt("%s is %s\n", argv[i], argv[i]); } else { if (cmd != TYPECMD_SMALLV) outfmt(out2, "%s: %s\n", argv[i], strerror(errno)); error1 |= 127; } } break; } case CMDFUNCTION: if (cmd == TYPECMD_SMALLV) out1fmt("%s\n", argv[i]); else out1fmt("%s is a shell function\n", argv[i]); break; case CMDBUILTIN: if (cmd == TYPECMD_SMALLV) out1fmt("%s\n", argv[i]); else if (entry.special) out1fmt("%s is a special shell builtin\n", argv[i]); else out1fmt("%s is a shell builtin\n", argv[i]); break; default: if (cmd != TYPECMD_SMALLV) outfmt(out2, "%s: not found\n", argv[i]); error1 |= 127; break; } } if (path != pathval()) clearcmdentry(); return error1; } /* * Locate and print what a word is... */ int typecmd(int argc, char **argv) { if (argc > 2 && strcmp(argv[1], "--") == 0) argc--, argv++; return typecmd_impl(argc, argv, TYPECMD_TYPE, bltinlookup("PATH", 1)); } diff --git a/bin/sh/mkbuiltins b/bin/sh/mkbuiltins index 1be7ff149089..02be8ab45585 100755 --- a/bin/sh/mkbuiltins +++ b/bin/sh/mkbuiltins @@ -1,98 +1,93 @@ #!/bin/sh - #- # Copyright (c) 1991, 1993 # The Regents of the University of California. All rights reserved. # # This code is derived from software contributed to Berkeley by # Kenneth Almquist. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 4. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)mkbuiltins 8.2 (Berkeley) 5/4/95 # $FreeBSD$ temp=`/usr/bin/mktemp -t ka` havehist=1 if [ "X$1" = "X-h" ]; then havehist=0 shift fi srcdir=$1 havejobs=0 if grep '^#define[ ]*JOBS[ ]*1' $srcdir/shell.h > /dev/null then havejobs=1 fi exec > builtins.c cat <<\! /* * This file was generated by the mkbuiltins program. */ #include #include "shell.h" #include "builtins.h" ! awk '/^[^#]/ {if(('$havejobs' || $2 != "-j") && ('$havehist' || $2 != "-h")) \ print $0}' $srcdir/builtins.def | sed 's/-[hj]//' > $temp echo 'int (*const builtinfunc[])(int, char **) = {' awk '/^[^#]/ { printf "\t%s,\n", $1}' $temp echo '}; -const struct builtincmd builtincmd[] = {' +const unsigned char builtincmd[] = {' awk '{ for (i = 2 ; i <= NF ; i++) { if ($i == "-s") { spc = 1; } else { - printf "\t{ \"%s\", %d, %d },\n", $i, NR-1, spc + printf "\t\"\\%03o\\%03o%s\"\n", length($i), (spc ? 128 : 0) + NR-1, $i spc = 0; } }}' $temp -echo ' { NULL, 0, 0 } -};' +echo '};' exec > builtins.h cat <<\! /* * This file was generated by the mkbuiltins program. */ #include ! tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ < $temp | awk '{ printf "#define %s %d\n", $1, NR-1}' echo ' -struct builtincmd { - const char *name; - int code; - int special; -}; +#define BUILTIN_SPECIAL 0x80 extern int (*const builtinfunc[])(int, char **); -extern const struct builtincmd builtincmd[]; +extern const unsigned char builtincmd[]; ' awk '{ printf "int %s(int, char **);\n", $1}' $temp rm -f $temp diff --git a/bin/sh/tests/builtins/Makefile b/bin/sh/tests/builtins/Makefile index 46a0b4159a1a..63e6ab56514d 100644 --- a/bin/sh/tests/builtins/Makefile +++ b/bin/sh/tests/builtins/Makefile @@ -1,170 +1,171 @@ # $FreeBSD$ .include TESTSDIR= ${TESTSBASE}/bin/sh/${.CURDIR:T} .PATH: ${.CURDIR:H} ATF_TESTS_SH= functional_test FILESDIR= ${TESTSDIR} FILES= alias.0 alias.0.stdout FILES+= alias.1 alias.1.stderr FILES+= alias3.0 alias3.0.stdout FILES+= alias4.0 FILES+= break1.0 FILES+= break2.0 break2.0.stdout FILES+= break3.0 FILES+= break4.4 FILES+= break5.4 FILES+= break6.0 FILES+= builtin1.0 FILES+= case1.0 FILES+= case2.0 FILES+= case3.0 FILES+= case4.0 FILES+= case5.0 FILES+= case6.0 FILES+= case7.0 FILES+= case8.0 FILES+= case9.0 FILES+= case10.0 FILES+= case11.0 FILES+= case12.0 FILES+= case13.0 FILES+= case14.0 FILES+= case15.0 FILES+= case16.0 FILES+= case17.0 FILES+= case18.0 FILES+= case19.0 FILES+= case20.0 FILES+= cd1.0 FILES+= cd2.0 FILES+= cd3.0 FILES+= cd4.0 FILES+= cd5.0 FILES+= cd6.0 FILES+= cd7.0 FILES+= cd8.0 FILES+= command1.0 FILES+= command2.0 FILES+= command3.0 FILES+= command3.0.stdout FILES+= command4.0 FILES+= command5.0 FILES+= command5.0.stdout FILES+= command6.0 FILES+= command6.0.stdout FILES+= command7.0 FILES+= command8.0 FILES+= command9.0 FILES+= command10.0 FILES+= command11.0 FILES+= command12.0 FILES+= dot1.0 FILES+= dot2.0 FILES+= dot3.0 FILES+= dot4.0 FILES+= eval1.0 FILES+= eval2.0 FILES+= eval3.0 FILES+= eval4.0 FILES+= eval5.0 FILES+= eval6.0 FILES+= eval7.0 FILES+= eval8.7 FILES+= exec1.0 FILES+= exec2.0 FILES+= exit1.0 FILES+= exit2.8 FILES+= exit3.0 FILES+= export1.0 FILES+= fc1.0 FILES+= fc2.0 FILES+= for1.0 FILES+= for2.0 FILES+= for3.0 FILES+= getopts1.0 getopts1.0.stdout FILES+= getopts2.0 getopts2.0.stdout FILES+= getopts3.0 FILES+= getopts4.0 FILES+= getopts5.0 FILES+= getopts6.0 FILES+= getopts7.0 FILES+= getopts8.0 getopts8.0.stdout +FILES+= getopts9.0 getopts9.0.stdout FILES+= hash1.0 hash1.0.stdout FILES+= hash2.0 hash2.0.stdout FILES+= hash3.0 hash3.0.stdout FILES+= hash4.0 FILES+= jobid1.0 FILES+= jobid2.0 FILES+= kill1.0 kill2.0 FILES+= lineno.0 lineno.0.stdout FILES+= lineno2.0 FILES+= lineno3.0 lineno3.0.stdout FILES+= local1.0 FILES+= local2.0 FILES+= local3.0 FILES+= local4.0 .if ${MK_NLS} != "no" FILES+= locale1.0 .endif FILES+= printf1.0 FILES+= printf2.0 FILES+= printf3.0 FILES+= printf4.0 FILES+= read1.0 read1.0.stdout FILES+= read2.0 FILES+= read3.0 read3.0.stdout FILES+= read4.0 read4.0.stdout FILES+= read5.0 FILES+= read6.0 FILES+= read7.0 FILES+= read8.0 FILES+= read9.0 FILES+= return1.0 FILES+= return2.1 FILES+= return3.1 FILES+= return4.0 FILES+= return5.0 FILES+= return6.4 FILES+= return7.4 FILES+= return8.0 FILES+= set1.0 FILES+= set2.0 FILES+= trap1.0 FILES+= trap10.0 FILES+= trap11.0 FILES+= trap12.0 FILES+= trap13.0 FILES+= trap14.0 FILES+= trap15.0 FILES+= trap16.0 FILES+= trap2.0 FILES+= trap3.0 FILES+= trap4.0 FILES+= trap5.0 FILES+= trap6.0 FILES+= trap7.0 FILES+= trap8.0 FILES+= trap9.0 FILES+= type1.0 type1.0.stderr FILES+= type2.0 FILES+= type3.0 FILES+= unalias.0 FILES+= var-assign.0 FILES+= var-assign2.0 FILES+= wait1.0 FILES+= wait2.0 FILES+= wait3.0 FILES+= wait4.0 FILES+= wait5.0 FILES+= wait6.0 FILES+= wait7.0 FILES+= wait8.0 FILES+= wait9.127 FILES+= wait10.0 .include diff --git a/etc/Makefile b/etc/Makefile index cf97988072a4..5484c11e1424 100644 --- a/etc/Makefile +++ b/etc/Makefile @@ -1,452 +1,459 @@ # from: @(#)Makefile 5.11 (Berkeley) 5/21/91 # $FreeBSD$ .include SUBDIR= \ newsyslog.conf.d .if ${MK_SENDMAIL} != "no" SUBDIR+=sendmail .endif BIN1= crontab \ devd.conf \ devfs.conf \ ddb.conf \ dhclient.conf \ disktab \ fbtab \ gettytab \ group \ hosts \ hosts.allow \ hosts.equiv \ libalias.conf \ libmap.conf \ login.access \ login.conf \ mac.conf \ motd \ netconfig \ network.subr \ networks \ newsyslog.conf \ nsswitch.conf \ phones \ profile \ protocols \ rc \ rc.bsdextended \ rc.firewall \ rc.initdiskless \ rc.shutdown \ rc.subr \ remote \ rpc \ services \ shells \ sysctl.conf \ syslog.conf \ termcap.small .if exists(${.CURDIR}/etc.${MACHINE}/ttys) BIN1+= etc.${MACHINE}/ttys .elif exists(${.CURDIR}/etc.${MACHINE_ARCH}/ttys) BIN1+= etc.${MACHINE_ARCH}/ttys .elif exists(${.CURDIR}/etc.${MACHINE_CPUARCH}/ttys) BIN1+= etc.${MACHINE_CPUARCH}/ttys .else .error etc.MACHINE/ttys missing .endif OPENBSMDIR= ${.CURDIR}/../contrib/openbsm BSM_ETC_OPEN_FILES= ${OPENBSMDIR}/etc/audit_class \ ${OPENBSMDIR}/etc/audit_event BSM_ETC_RESTRICTED_FILES= ${OPENBSMDIR}/etc/audit_control \ ${OPENBSMDIR}/etc/audit_user BSM_ETC_EXEC_FILES= ${OPENBSMDIR}/etc/audit_warn BSM_ETC_DIR= ${DESTDIR}/etc/security # NB: keep these sorted by MK_* knobs .if ${MK_AMD} != "no" BIN1+= amd.map .endif .if ${MK_APM} != "no" BIN1+= apmd.conf .endif .if ${MK_AUTOFS} != "no" BIN1+= auto_master .endif .if ${MK_BSNMP} != "no" BIN1+= snmpd.config .endif .if ${MK_FREEBSD_UPDATE} != "no" BIN1+= freebsd-update.conf .endif .if ${MK_FTP} != "no" BIN1+= ftpusers .endif .if ${MK_INETD} != "no" BIN1+= inetd.conf .endif .if ${MK_LOCATE} != "no" BIN1+= ${.CURDIR}/../usr.bin/locate/locate/locate.rc .endif .if ${MK_LPR} != "no" BIN1+= hosts.lpd printcap .endif .if ${MK_MAIL} != "no" BIN1+= ${.CURDIR}/../usr.bin/mail/misc/mail.rc .endif .if ${MK_NTP} != "no" BIN1+= ntp.conf .endif .if ${MK_OPENSSH} != "no" SSH= ${.CURDIR}/../crypto/openssh/ssh_config \ ${.CURDIR}/../crypto/openssh/sshd_config \ ${.CURDIR}/../crypto/openssh/moduli .endif .if ${MK_OPENSSL} != "no" SSL= ${.CURDIR}/../crypto/openssl/apps/openssl.cnf .endif .if ${MK_NS_CACHING} != "no" BIN1+= nscd.conf .endif .if ${MK_PORTSNAP} != "no" BIN1+= portsnap.conf .endif .if ${MK_PF} != "no" BIN1+= pf.os .endif .if ${MK_SENDMAIL} != "no" BIN1+= rc.sendmail .endif .if ${MK_TCSH} != "no" BIN1+= csh.cshrc csh.login csh.logout .endif .if ${MK_WIRELESS} != "no" BIN1+= regdomain.xml .endif # -rwxr-xr-x root:wheel, for the new cron root:wheel BIN2= netstart pccard_ether rc.suspend rc.resume MTREE= BSD.debug.dist BSD.include.dist BSD.root.dist BSD.usr.dist BSD.var.dist .if ${MK_LIB32} != "no" MTREE+= BSD.lib32.dist .endif +.if ${MK_LIBSOFT} != "no" +MTREE+= BSD.libsoft.dist +.endif .if ${MK_TESTS} != "no" MTREE+= BSD.tests.dist .endif .if ${MK_SENDMAIL} != "no" MTREE+= BSD.sendmail.dist .endif PPPCNF= ppp.conf .if ${MK_SENDMAIL} == "no" ETCMAIL=mailer.conf aliases .else ETCMAIL=Makefile README mailer.conf access.sample virtusertable.sample \ mailertable.sample aliases .endif # Special top level files for FreeBSD FREEBSD=COPYRIGHT # Sanitize DESTDIR DESTDIR:= ${DESTDIR:C://*:/:g} afterinstall: .if ${MK_MAN} != "no" ${_+_}cd ${.CURDIR}/../share/man; ${MAKE} makedb .endif distribute: # Avoid installing tests here; "make distribution" will do this and # correctly place them in the right location. ${_+_}cd ${.CURDIR} ; ${MAKE} MK_TESTS=no install \ DESTDIR=${DISTDIR}/${DISTRIBUTION} ${_+_}cd ${.CURDIR} ; ${MAKE} distribution DESTDIR=${DISTDIR}/${DISTRIBUTION} .include .if ${TARGET_ENDIANNESS} == "1234" CAP_MKDB_ENDIAN?= -l .elif ${TARGET_ENDIANNESS} == "4321" CAP_MKDB_ENDIAN?= -b .else CAP_MKDB_ENDIAN?= .endif .if defined(NO_ROOT) METALOG.add?= cat -l >> ${METALOG} .endif distribution: .if !defined(DESTDIR) @echo "set DESTDIR before running \"make ${.TARGET}\"" @false .endif cd ${.CURDIR}; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ ${BIN1} ${DESTDIR}/etc; \ cap_mkdb ${CAP_MKDB_ENDIAN} ${DESTDIR}/etc/login.conf; \ services_mkdb ${CAP_MKDB_ENDIAN} -q -o ${DESTDIR}/var/db/services.db \ ${DESTDIR}/etc/services; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 755 \ ${BIN2} ${DESTDIR}/etc; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 600 \ master.passwd nsmb.conf opieaccess ${DESTDIR}/etc; .if ${MK_AT} == "no" sed -i "" -e 's;.*/usr/libexec/atrun;#&;' ${DESTDIR}/etc/crontab .endif .if ${MK_TCSH} == "no" sed -i "" -e 's;/bin/csh;/bin/sh;' ${DESTDIR}/etc/master.passwd .endif pwd_mkdb -i -p -d ${DESTDIR}/etc ${DESTDIR}/etc/master.passwd .if defined(NO_ROOT) ( \ echo "./etc/login.conf.db type=file mode=0644 uname=root gname=wheel"; \ echo "./etc/passwd type=file mode=0644 uname=root gname=wheel"; \ echo "./etc/pwd.db type=file mode=0644 uname=root gname=wheel"; \ echo "./etc/spwd.db type=file mode=0600 uname=root gname=wheel"; \ ) | ${METALOG.add} .endif .if ${MK_AUTOFS} != "no" ${_+_}cd ${.CURDIR}/autofs; ${MAKE} install .endif .if ${MK_BLUETOOTH} != "no" ${_+_}cd ${.CURDIR}/bluetooth; ${MAKE} install .endif .if ${MK_CASPER} != "no" ${_+_}cd ${.CURDIR}/casper; ${MAKE} install .endif ${_+_}cd ${.CURDIR}/defaults; ${MAKE} install ${_+_}cd ${.CURDIR}/devd; ${MAKE} install ${_+_}cd ${.CURDIR}/gss; ${MAKE} install .if ${MK_NTP} != "no" ${_+_}cd ${.CURDIR}/ntp; ${MAKE} install .endif ${_+_}cd ${.CURDIR}/periodic; ${MAKE} install .if ${MK_PKGBOOTSTRAP} != "no" ${_+_}cd ${.CURDIR}/pkg; ${MAKE} install .endif ${_+_}cd ${.CURDIR}/rc.d; ${MAKE} install ${_+_}cd ${.CURDIR}/../share/termcap; ${MAKE} etc-termcap ${_+_}cd ${.CURDIR}/../usr.sbin/rmt; ${MAKE} etc-rmt ${_+_}cd ${.CURDIR}/pam.d; ${MAKE} install cd ${.CURDIR}; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 0444 \ ${BSM_ETC_OPEN_FILES} ${BSM_ETC_DIR} cd ${.CURDIR}; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 0600 \ ${BSM_ETC_RESTRICTED_FILES} ${BSM_ETC_DIR} cd ${.CURDIR}; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 0500 \ ${BSM_ETC_EXEC_FILES} ${BSM_ETC_DIR} .if ${MK_UNBOUND} != "no" if [ ! -e ${DESTDIR}/etc/unbound ]; then \ ${INSTALL_SYMLINK} ../var/unbound ${DESTDIR}/etc/unbound; \ fi .endif .if ${MK_SENDMAIL} != "no" ${_+_}cd ${.CURDIR}/sendmail; ${MAKE} distribution .endif .if ${MK_OPENSSH} != "no" cd ${.CURDIR}; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ ${SSH} ${DESTDIR}/etc/ssh .endif .if ${MK_OPENSSL} != "no" cd ${.CURDIR}; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ ${SSL} ${DESTDIR}/etc/ssl .endif .if ${MK_KERBEROS} != "no" cd ${.CURDIR}/root; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ dot.k5login ${DESTDIR}/root/.k5login; .endif cd ${.CURDIR}/root; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ dot.profile ${DESTDIR}/root/.profile; \ rm -f ${DESTDIR}/.profile; \ ln ${DESTDIR}/root/.profile ${DESTDIR}/.profile .if ${MK_TCSH} != "no" cd ${.CURDIR}/root; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ dot.cshrc ${DESTDIR}/root/.cshrc; \ ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ dot.login ${DESTDIR}/root/.login; \ rm -f ${DESTDIR}/.cshrc; \ ln ${DESTDIR}/root/.cshrc ${DESTDIR}/.cshrc .endif cd ${.CURDIR}/mtree; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 444 \ ${MTREE} ${DESTDIR}/etc/mtree .if ${MK_MAIL} != "no" cd ${.CURDIR}/mail; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 \ ${ETCMAIL} ${DESTDIR}/etc/mail if [ -d ${DESTDIR}/etc/mail -a -f ${DESTDIR}/etc/mail/aliases -a \ ! -f ${DESTDIR}/etc/aliases ]; then \ ln -s mail/aliases ${DESTDIR}/etc/aliases; \ fi .endif ${INSTALL} -o ${BINOWN} -g operator -m 664 /dev/null \ ${DESTDIR}/etc/dumpdates .if ${MK_LOCATE} != "no" ${INSTALL} -o nobody -g ${BINGRP} -m 644 /dev/null \ ${DESTDIR}/var/db/locate.database .endif ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 644 ${.CURDIR}/minfree \ ${DESTDIR}/var/crash cd ${.CURDIR}/..; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 444 \ ${FREEBSD} ${DESTDIR}/ .if ${MK_BOOT} != "no" .if exists(${.CURDIR}/../sys/${MACHINE}/conf/GENERIC.hints) ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 444 \ ${.CURDIR}/../sys/${MACHINE}/conf/GENERIC.hints \ ${DESTDIR}/boot/device.hints .endif .endif .if ${MK_NIS} == "no" sed -i "" -e 's/.*_compat:/# &/' -e 's/compat$$/files/' \ ${DESTDIR}/etc/nsswitch.conf .endif MTREE_CMD?= mtree .if ${MK_INSTALL_AS_USER} == "yes" && ${_uid} != 0 MTREE_FILTER= sed -e 's,\([gu]\)name=,\1id=,g' \ -e 's,\(uid=\)[^ ]* ,\1${_uid} ,' \ -e 's,\(gid=\)[^ ]* ,\1${_gid} ,' \ -e 's,\(uid=\)[^ ]*$$,\1${_uid},' \ -e 's,\(gid=\)[^ ]*$$,\1${_gid},' .else MTREE_FILTER= cat .if !defined(NO_FSCHG) MTREE_FSCHG= -i .endif .endif MTREES= mtree/BSD.root.dist / \ mtree/BSD.var.dist /var \ mtree/BSD.usr.dist /usr \ mtree/BSD.include.dist /usr/include \ mtree/BSD.debug.dist /usr/lib .if ${MK_GROFF} != "no" MTREES+= mtree/BSD.groff.dist /usr .endif .if ${MK_LIB32} != "no" MTREES+= mtree/BSD.lib32.dist /usr MTREES+= mtree/BSD.lib32.dist /usr/lib/debug/usr .endif +.if ${MK_LIBSOFT} != "no" +MTREES+= mtree/BSD.libsoft.dist /usr +MTREES+= mtree/BSD.libsoft.dist /usr/lib/debug/usr +.endif .if ${MK_TESTS} != "no" MTREES+= mtree/BSD.tests.dist ${TESTSBASE} MTREES+= mtree/BSD.tests.dist /usr/lib/debug/${TESTSBASE} .endif .if ${MK_SENDMAIL} != "no" MTREES+= mtree/BSD.sendmail.dist / .endif .for mtree in ${LOCAL_MTREE} MTREES+= ../${mtree} / .endfor # Clean up some directories that where mistakenly created as files that # should not have been as part of the nvi update in r281994. # This should be removed after 11.0-RELEASE. DISTRIB_CLEANUP_SHARE_FILES= ${SHAREDIR}/doc/usd/10.exref ${SHAREDIR}/doc/usd/11.edit DISTRIB_CLEANUP_SHARE_FILES+= ${SHAREDIR}/doc/usd/12.vi ${SHAREDIR}/doc/usd/13.viref distrib-cleanup: .PHONY for file in ${DISTRIB_CLEANUP_SHARE_FILES}; do \ if [ -f ${DESTDIR}/$${file} ]; then \ rm -f ${DESTDIR}/$${file}; \ fi; \ done distrib-dirs: ${MTREES:N/*} distrib-cleanup .PHONY @set ${MTREES}; \ while test $$# -ge 2; do \ m=${.CURDIR}/$$1; \ shift; \ d=${DESTDIR}$$1; \ shift; \ test -d $$d || mkdir -p $$d; \ ${ECHO} ${MTREE_CMD} -deU ${MTREE_FSCHG} \ ${MTREE_FOLLOWS_SYMLINKS} -f $$m -p $$d; \ ${MTREE_FILTER} $$m | \ ${MTREE_CMD} -deU ${MTREE_FSCHG} ${MTREE_FOLLOWS_SYMLINKS} \ -p $$d; \ done; true .if defined(NO_ROOT) @set ${MTREES}; \ while test $$# -ge 2; do \ m=${.CURDIR}/$$1; \ shift; \ d=$$1; \ test "$$d" == "/" && d=""; \ d=${DISTBASE}$$d; \ shift; \ test -d ${DESTDIR}/$$d || mkdir -p ${DESTDIR}/$$d; \ ${ECHO} "${MTREE_CMD:N-W} -C -f $$m -K uname,gname | " \ "sed s#^\.#.$$d# | ${METALOG.add}" ; \ ${MTREE_FILTER} $$m | \ ${MTREE_CMD:N-W} -C -K uname,gname | sed s#^\.#.$$d# | \ ${METALOG.add} ; \ done; true .endif ${INSTALL_SYMLINK} usr/src/sys ${DESTDIR}/sys .if ${MK_MAN} != "no" cd ${DESTDIR}${SHAREDIR}/man; \ for mandir in man*; do \ ${INSTALL_SYMLINK} ../$$mandir \ ${DESTDIR}${SHAREDIR}/man/en.ISO8859-1/; \ ${INSTALL_SYMLINK} ../$$mandir \ ${DESTDIR}${SHAREDIR}/man/en.UTF-8/; \ done .if ${MK_OPENSSL} != "no" cd ${DESTDIR}${SHAREDIR}/openssl/man; \ for mandir in man*; do \ ${INSTALL_SYMLINK} ../$$mandir \ ${DESTDIR}${SHAREDIR}/openssl/man/en.ISO8859-1/; \ done .endif set - `grep "^[a-zA-Z]" ${.CURDIR}/man.alias`; \ while [ $$# -gt 0 ] ; do \ ${INSTALL_SYMLINK} "$$2" "${DESTDIR}${SHAREDIR}/man/$$1"; \ if [ "${MK_OPENSSL}" != "no" ]; then \ ${INSTALL_SYMLINK} "$$2" \ "${DESTDIR}${SHAREDIR}/openssl/man/$$1"; \ fi; \ shift; shift; \ done .endif .if ${MK_NLS} != "no" set - `grep "^[a-zA-Z]" ${.CURDIR}/nls.alias`; \ while [ $$# -gt 0 ] ; do \ ${INSTALL_SYMLINK} "$$2" "${DESTDIR}${SHAREDIR}/nls/$$1"; \ shift; shift; \ done .endif etc-examples: cd ${.CURDIR}; ${INSTALL} -o ${BINOWN} -g ${BINGRP} -m 444 \ ${BIN1} ${BIN2} nsmb.conf opieaccess \ ${DESTDIR}${SHAREDIR}/examples/etc ${_+_}cd ${.CURDIR}/defaults; ${MAKE} install \ DESTDIR=${DESTDIR}${SHAREDIR}/examples .include diff --git a/etc/mtree/BSD.libsoft.dist b/etc/mtree/BSD.libsoft.dist new file mode 100644 index 000000000000..69026a73f932 --- /dev/null +++ b/etc/mtree/BSD.libsoft.dist @@ -0,0 +1,14 @@ +# $FreeBSD$ +# +# Please see the file src/etc/mtree/README before making changes to this file. +# + +/set type=dir uname=root gname=wheel mode=0755 +. + libsoft + dtrace + .. + i18n + .. + .. +.. diff --git a/etc/mtree/Makefile b/etc/mtree/Makefile index 1a941e41c27a..d8df25d3161b 100644 --- a/etc/mtree/Makefile +++ b/etc/mtree/Makefile @@ -1,33 +1,37 @@ # $FreeBSD$ .include FILES= ${_BSD.debug.dist} \ BSD.include.dist \ BSD.root.dist \ ${_BSD.lib32.dist} \ + ${_BSD.libsoft.dist} \ ${_BSD.sendmail.dist} \ ${_BSD.tests.dist} \ BSD.usr.dist \ BSD.var.dist .if ${MK_DEBUG_FILES} != "no" _BSD.debug.dist= BSD.debug.dist .endif .if ${MK_GROFF} != "no" _BSD.groff.dist= BSD.groff.dist .endif .if ${MK_LIB32} != "no" _BSD.lib32.dist= BSD.lib32.dist .endif +.if ${MK_LIBSOFT} != "no" +_BSD.libsoft.dist= BSD.libsoft.dist +.endif .if ${MK_SENDMAIL} != "no" _BSD.sendmail.dist= BSD.sendmail.dist .endif .if ${MK_TESTS} != "no" _BSD.tests.dist= BSD.tests.dist .endif NO_OBJ= FILESDIR= /etc/mtree .include diff --git a/etc/ntp/leap-seconds b/etc/ntp/leap-seconds index b8b41f2d1179..8fa6225f65e3 100644 --- a/etc/ntp/leap-seconds +++ b/etc/ntp/leap-seconds @@ -1,119 +1,221 @@ # # $FreeBSD$ # -# ATOMIC TIME. -# The Coordinated Universal Time (UTC) is the reference time scale derived -# from The "Temps Atomique International" (TAI) calculated by the Bureau -# International des Poids et Mesures (BIPM) using a worldwide network of atomic -# clocks. UTC differs from TAI by an integer number of seconds; it is the basis -# of all activities in the world. -# -# -# ASTRONOMICAL TIME (UT1) is the time scale based on the rate of rotation of the earth. -# It is now mainly derived from Very Long Baseline Interferometry (VLBI). The various -# irregular fluctuations progressively detected in the rotation rate of the Earth lead -# in 1972 to the replacement of UT1 by UTC as the reference time scale. -# -# -# LEAP SECOND -# Atomic clocks are more stable than the rate of the earth rotatiob since the later -# undergoes a full range of geophysical perturbations at various time scales (lunisolar -# and core-mantle torques,atmospheric and oceanic effetcs, ...) -# Leap seconds are needed to keep the two time scales in agreement, i.e. UT1-UTC smaller -# than 0.9 second. So, when necessary a "leap second" is introduced in UTC. -# Since the adoption of this system in 1972 it has been necessary to add 26 seconds to UTC, -# firstly due to the initial choice of the value of the second (1/86400 mean solar day of -# the year 1820) and secondly to the general slowing down of the Earth's rotation. It is -# theorically possible to have a negative leap second (a second removed from UTC), but so far, -# all leap seconds have been positive (a second has been added to UTC). Based on what we know about the earth's rotation, -# it is unlikely that we will ever have a negative leap second. -# -# -# HISTORY -# The first leap second was added on June 30, 1972. Until 2000, it was necessary in average to add a leap second at a rate -# of 1 to 2 years. Since 2000, due to the fact that the earth rate of rotation is accelerating, leap seconds are introduced -# with an average frequency of 3 to 4 years. -# -# -# RESPONSABILITY OF THE DECISION TO INTRODUCE A LEAP SECOND IN UTC -# The decision to introduce a leap second in UTC is the responsibility of the Earth Orientation Center of -# the International Earth Rotation and reference System Service (IERS). This center is located at Paris -# Observatory. According to international agreements, leap second date have to occur at fixed date : -# first preference is given to the end of December and June, and second preference at the end of March -# and September. Since the system was introduced in 1972, only dates in June and December were used. -# -# Questions or comments to: -# Daniel Gambis, daniel.gambis@obspm.fr -# Christian Bizouard: christian.bizouard@obspm.fr -# Earth orientation Center of the IERS -# Paris Observatory, France -# -# -# -# VALIDITY OF THE FILE -# It is important to express the validity of the file. These next two dates are -# given in units of seconds since 1900.0. -# -# 1) Last update of the file. -# -# Updated through IERS Bulletin C (ftp://hpiers.obspm.fr/iers/bul/bulc/bulletinc.dat) -# -# The following line shows the last update of this file in NTP timestamp: -# -#$ 3645216000 -# -# 2) Expiration date of the file given on a semi-annual basis: last June or last December -# -# File expires on 28 December 2015 -# -# Expire date in NTP timestamp: -# -#@ 3660249600 -# -# -# LIST OF LEAP SECONDS -# NTP timestamp (X parameter) is the number of seconds since 1900.0 -# -# MJD: The Modified Julian Day number. MJD = X/86400 + 15020 -# -# DTAI: The difference DTAI= TAI-UTC in units of seconds -# It is the quantity to add to UTC to get the time in TAI -# -# Day Month Year : epoch in clear -# -#NTP Time DTAI Day Month Year -# -2272060800 10 # 1 Jan 1972 -2287785600 11 # 1 Jul 1972 -2303683200 12 # 1 Jan 1973 -2335219200 13 # 1 Jan 1974 -2366755200 14 # 1 Jan 1975 -2398291200 15 # 1 Jan 1976 -2429913600 16 # 1 Jan 1977 -2461449600 17 # 1 Jan 1978 -2492985600 18 # 1 Jan 1979 -2524521600 19 # 1 Jan 1980 -2571782400 20 # 1 Jul 1981 -2603318400 21 # 1 Jul 1982 -2634854400 22 # 1 Jul 1983 -2698012800 23 # 1 Jul 1985 -2776982400 24 # 1 Jan 1988 -2840140800 25 # 1 Jan 1990 -2871676800 26 # 1 Jan 1991 -2918937600 27 # 1 Jul 1992 -2950473600 28 # 1 Jul 1993 -2982009600 29 # 1 Jul 1994 -3029443200 30 # 1 Jan 1996 -3076704000 31 # 1 Jul 1997 -3124137600 32 # 1 Jan 1999 -3345062400 33 # 1 Jan 2006 -3439756800 34 # 1 Jan 2009 -3550089600 35 # 1 Jul 2012 -3644697600 36 # 1 Jul 2015 -# -# In order to verify the integrity of this file, a hash code -# has been generated. For more information how to use -# this hash code, please consult the README file under the -# 'sha' repertory. -# -#h 620ba8af 37900668 95ac09ba d77640f9 6fd75493 +# In the following text, the symbol '#' introduces +# a comment, which continues from that symbol until +# the end of the line. A plain comment line has a +# whitespace character following the comment indicator. +# There are also special comment lines defined below. +# A special comment will always have a non-whitespace +# character in column 2. +# +# A blank line should be ignored. +# +# The following table shows the corrections that must +# be applied to compute International Atomic Time (TAI) +# from the Coordinated Universal Time (UTC) values that +# are transmitted by almost all time services. +# +# The first column shows an epoch as a number of seconds +# since 1900.0 and the second column shows the number of +# seconds that must be added to UTC to compute TAI for +# any timestamp at or after that epoch. The value on +# each line is valid from the indicated initial instant +# until the epoch given on the next one or indefinitely +# into the future if there is no next line. +# (The comment on each line shows the representation of +# the corresponding initial epoch in the usual +# day-month-year format. The epoch always begins at +# 00:00:00 UTC on the indicated day. See Note 5 below.) +# +# Important notes: +# +# 1. Coordinated Universal Time (UTC) is often referred to +# as Greenwich Mean Time (GMT). The GMT time scale is no +# longer used, and the use of GMT to designate UTC is +# discouraged. +# +# 2. The UTC time scale is realized by many national +# laboratories and timing centers. Each laboratory +# identifies its realization with its name: Thus +# UTC(NIST), UTC(USNO), etc. The differences among +# these different realizations are typically on the +# order of a few nanoseconds (i.e., 0.000 000 00x s) +# and can be ignored for many purposes. These differences +# are tabulated in Circular T, which is published monthly +# by the International Bureau of Weights and Measures +# (BIPM). See www.bipm.fr for more information. +# +# 3. The current defintion of the relationship between UTC +# and TAI dates from 1 January 1972. A number of different +# time scales were in use before than epoch, and it can be +# quite difficult to compute precise timestamps and time +# intervals in those "prehistoric" days. For more information, +# consult: +# +# The Explanatory Supplement to the Astronomical +# Ephemeris. +# or +# Terry Quinn, "The BIPM and the Accurate Measurement +# of Time," Proc. of the IEEE, Vol. 79, pp. 894-905, +# July, 1991. +# +# 4. The insertion of leap seconds into UTC is currently the +# responsibility of the International Earth Rotation Service, +# which is located at the Paris Observatory: +# +# Central Bureau of IERS +# 61, Avenue de l'Observatoire +# 75014 Paris, France. +# +# Leap seconds are announced by the IERS in its Bulletin C +# +# See hpiers.obspm.fr or www.iers.org for more details. +# +# All national laboratories and timing centers use the +# data from the BIPM and the IERS to construct their +# local realizations of UTC. +# +# Although the definition also includes the possibility +# of dropping seconds ("negative" leap seconds), this has +# never been done and is unlikely to be necessary in the +# foreseeable future. +# +# 5. If your system keeps time as the number of seconds since +# some epoch (e.g., NTP timestamps), then the algorithm for +# assigning a UTC time stamp to an event that happens during a positive +# leap second is not well defined. The official name of that leap +# second is 23:59:60, but there is no way of representing that time +# in these systems. +# Many systems of this type effectively stop the system clock for +# one second during the leap second and use a time that is equivalent +# to 23:59:59 UTC twice. For these systems, the corresponding TAI +# timestamp would be obtained by advancing to the next entry in the +# following table when the time equivalent to 23:59:59 UTC +# is used for the second time. Thus the leap second which +# occurred on 30 June 1972 at 23:59:59 UTC would have TAI +# timestamps computed as follows: +# +# ... +# 30 June 1972 23:59:59 (2287785599, first time): TAI= UTC + 10 seconds +# 30 June 1972 23:59:60 (2287785599,second time): TAI= UTC + 11 seconds +# 1 July 1972 00:00:00 (2287785600) TAI= UTC + 11 seconds +# ... +# +# If your system realizes the leap second by repeating 00:00:00 UTC twice +# (this is possible but not usual), then the advance to the next entry +# in the table must occur the second time that a time equivlent to +# 00:00:00 UTC is used. Thus, using the same example as above: +# +# ... +# 30 June 1972 23:59:59 (2287785599): TAI= UTC + 10 seconds +# 30 June 1972 23:59:60 (2287785600, first time): TAI= UTC + 10 seconds +# 1 July 1972 00:00:00 (2287785600,second time): TAI= UTC + 11 seconds +# ... +# +# in both cases the use of timestamps based on TAI produces a smooth +# time scale with no discontinuity in the time interval. +# +# This complexity would not be needed for negative leap seconds (if they +# are ever used). The UTC time would skip 23:59:59 and advance from +# 23:59:58 to 00:00:00 in that case. The TAI offset would decrease by +# 1 second at the same instant. This is a much easier situation to deal +# with, since the difficulty of unambiguously representing the epoch +# during the leap second does not arise. +# +# Questions or comments to: +# Jeff Prillaman +# Time Service Department +# US Naval Observatory +# Washington, DC +# jeffrey.prillaman@usno.navy.mil +# +# Last Update of leap second values: 31 Dec 2015 +# +# The following line shows this last update date in NTP timestamp +# format. This is the date on which the most recent change to +# the leap second data was added to the file. This line can +# be identified by the unique pair of characters in the first two +# columns as shown below. +# +#$ 3660508800 +# +# The data in this file will be updated periodically as new leap +# seconds are announced. In addition to being entered on the line +# above, the update time (in NTP format) will be added to the basic +# file name leap-seconds to form the name leap-seconds.. +# In addition, the generic name leap-seconds.list will always point to +# the most recent version of the file. +# +# This update procedure will be performed only when a new leap second +# is announced. +# +# The following entry specifies the expiration date of the data +# in this file in units of seconds since 1900.0. This expiration date +# will be changed at least twice per year whether or not a new leap +# second is announced. These semi-annual changes will be made no +# later than 1 June and 1 December of each year to indicate what +# action (if any) is to be taken on 30 June and 31 December, +# respectively. (These are the customary effective dates for new +# leap seconds.) This expiration date will be identified by a +# unique pair of characters in columns 1 and 2 as shown below. +# In the unlikely event that a leap second is announced with an +# effective date other than 30 June or 31 December, then this +# file will be edited to include that leap second as soon as it is +# announced or at least one month before the effective date +# (whichever is later). +# If an announcement by the IERS specifies that no leap second is +# scheduled, then only the expiration date of the file will +# be advanced to show that the information in the file is still +# current -- the update time stamp, the data and the name of the file +# will not change. +# +# Updated through IERS Bulletin C 50 +# File expires on: 1 Jun 2016 +# +#@ 3673728000 +# +2272060800 10 # 1 Jan 1972 +2287785600 11 # 1 Jul 1972 +2303683200 12 # 1 Jan 1973 +2335219200 13 # 1 Jan 1974 +2366755200 14 # 1 Jan 1975 +2398291200 15 # 1 Jan 1976 +2429913600 16 # 1 Jan 1977 +2461449600 17 # 1 Jan 1978 +2492985600 18 # 1 Jan 1979 +2524521600 19 # 1 Jan 1980 +2571782400 20 # 1 Jul 1981 +2603318400 21 # 1 Jul 1982 +2634854400 22 # 1 Jul 1983 +2698012800 23 # 1 Jul 1985 +2776982400 24 # 1 Jan 1988 +2840140800 25 # 1 Jan 1990 +2871676800 26 # 1 Jan 1991 +2918937600 27 # 1 Jul 1992 +2950473600 28 # 1 Jul 1993 +2982009600 29 # 1 Jul 1994 +3029443200 30 # 1 Jan 1996 +3076704000 31 # 1 Jul 1997 +3124137600 32 # 1 Jan 1999 +3345062400 33 # 1 Jan 2006 +3439756800 34 # 1 Jan 2009 +3550089600 35 # 1 Jul 2012 +3644697600 36 # 1 Jul 2015 +# +# the following special comment contains the +# hash value of the data in this file computed +# use the secure hash algorithm as specified +# by FIPS 180-1. See the files in ~/sha for +# the details of how this hash value is +# computed. Note that the hash computation +# ignores comments and whitespace characters +# in data lines. It includes the NTP values +# of both the last modification time and the +# expiration time of the file, but not the +# white space on those lines. +# the hash line is also ignored in the +# computation. +# +#h 44a44c49 35b22601 a9c7054c 8c56cf57 9b6f6ed5 +# diff --git a/etc/rc b/etc/rc index 4efc293b6b2b..2c90f385b88b 100644 --- a/etc/rc +++ b/etc/rc @@ -1,143 +1,146 @@ #!/bin/sh # # Copyright (c) 2000-2004 The FreeBSD Project # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)rc 5.27 (Berkeley) 6/5/91 # $FreeBSD$ # # System startup script run by init on autoboot # or after single-user. # Output and error are redirected to console by init, # and the console is the controlling terminal. # Note that almost all of the user-configurable behavior is no longer in # this file, but rather in /etc/defaults/rc.conf. Please check that file # first before contemplating any changes here. If you do need to change # this file for some reason, we would like to know about it. stty status '^T' 2> /dev/null # Set shell to ignore SIGINT (2), but not children; # shell catches SIGQUIT (3) and returns to single user. # trap : 2 trap "echo 'Boot interrupted'; exit 1" 3 HOME=/ PATH=/sbin:/bin:/usr/sbin:/usr/bin export HOME PATH if [ "$1" = autoboot ]; then autoboot=yes _boot="faststart" rc_fast=yes # run_rc_command(): do fast booting else autoboot=no _boot="quietstart" fi dlv=`/sbin/sysctl -n vfs.nfs.diskless_valid 2> /dev/null` if [ ${dlv:=0} -ne 0 -o -f /etc/diskless ]; then sh /etc/rc.initdiskless fi # Run these after determining whether we are booting diskless in order # to minimize the number of files that are needed on a diskless system, # and to make the configuration file variables available to rc itself. # . /etc/rc.subr load_rc_config # If we receive a SIGALRM, re-source /etc/rc.conf; this allows rc.d # scripts to perform "boot-time configuration" including enabling and # disabling rc.d scripts which appear later in the boot order. trap "_rc_conf_loaded=false; load_rc_config" ALRM skip="-s nostart" if [ `/sbin/sysctl -n security.jail.jailed` -eq 1 ]; then skip="$skip -s nojail" if [ `/sbin/sysctl -n security.jail.vnet` -ne 1 ]; then skip="$skip -s nojailvnet" fi fi # If the firstboot sentinel doesn't exist, we want to skip firstboot scripts. if ! [ -e ${firstboot_sentinel} ]; then skip_firstboot="-s firstboot" fi # Do a first pass to get everything up to $early_late_divider so that # we can do a second pass that includes $local_startup directories # files=`rcorder ${skip} ${skip_firstboot} /etc/rc.d/* 2>/dev/null` _rc_elem_done=' ' for _rc_elem in ${files}; do run_rc_script ${_rc_elem} ${_boot} _rc_elem_done="${_rc_elem_done}${_rc_elem} " case "$_rc_elem" in */${early_late_divider}) break ;; esac done unset files local_rc # Now that disks are mounted, for each dir in $local_startup # search for init scripts that use the new rc.d semantics. # case ${local_startup} in [Nn][Oo] | '') ;; *) find_local_scripts_new ;; esac # The firstboot sentinel might be on a newly mounted filesystem; look for it # again and unset skip_firstboot if we find it. if [ -e ${firstboot_sentinel} ]; then skip_firstboot="" fi files=`rcorder ${skip} ${skip_firstboot} /etc/rc.d/* ${local_rc} 2>/dev/null` for _rc_elem in ${files}; do case "$_rc_elem_done" in *" $_rc_elem "*) continue ;; esac run_rc_script ${_rc_elem} ${_boot} done # Remove the firstboot sentinel, and reboot if it was requested. if [ -e ${firstboot_sentinel} ]; then - rm ${firstboot_sentinel} + [ ${root_rw_mount} = "yes" ] || mount -uw / + /bin/rm ${firstboot_sentinel} if [ -e ${firstboot_sentinel}-reboot ]; then - rm ${firstboot_sentinel}-reboot + /bin/rm ${firstboot_sentinel}-reboot + [ ${root_rw_mount} = "yes" ] || mount -ur / kill -INT 1 fi + [ ${root_rw_mount} = "yes" ] || mount -ur / fi echo '' date exit 0 diff --git a/lib/libgssapi/gss_release_oid_set.c b/lib/libgssapi/gss_release_oid_set.c index bedb90ba8439..a46853a4a0d8 100644 --- a/lib/libgssapi/gss_release_oid_set.c +++ b/lib/libgssapi/gss_release_oid_set.c @@ -1,46 +1,56 @@ /*- * Copyright (c) 2005 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include OM_uint32 gss_release_oid_set(OM_uint32 *minor_status, - gss_OID_set *set) + gss_OID_set *setp) { + gss_OID_set set; + gss_OID o; + size_t i; *minor_status = 0; - if (set && *set) { - if ((*set)->elements) - free((*set)->elements); - free(*set); - *set = GSS_C_NO_OID_SET; + if (setp) { + set = *setp; + if (set) { + for (i = 0; i < set->count; i++) { + o = &set->elements[i]; + if (o->elements) + free(o->elements); + } + free(set->elements); + free(set); + *setp = GSS_C_NO_OID_SET; + } } return (GSS_S_COMPLETE); } diff --git a/lib/libnv/tests/dnv_tests.cc b/lib/libnv/tests/dnv_tests.cc index fe80d00f4851..a21a91dfa187 100644 --- a/lib/libnv/tests/dnv_tests.cc +++ b/lib/libnv/tests/dnv_tests.cc @@ -1,568 +1,569 @@ /*- * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); +#include #include #include #include ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_bool__present); ATF_TEST_CASE_BODY(dnvlist_get_bool__present) { nvlist_t *nvl; const char *key; bool value; nvl = nvlist_create(0); key = "name"; value = true; nvlist_add_bool(nvl, key, value); ATF_REQUIRE_EQ(dnvlist_get_bool(nvl, key, false), value); ATF_REQUIRE_EQ(dnvlist_get_bool(nvl, "name", false), value); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_bool__default_value); ATF_TEST_CASE_BODY(dnvlist_get_bool__default_value) { nvlist_t *nvl; const char *key; key = "123"; nvl = nvlist_create(0); ATF_REQUIRE_EQ(dnvlist_get_bool(nvl, key, false), false); ATF_REQUIRE_EQ(dnvlist_get_bool(nvl, "123", true), true); nvlist_add_bool(nvl, key, true); ATF_REQUIRE_EQ(dnvlist_get_bool(nvl, "otherkey", true), true); ATF_REQUIRE_EQ(dnvlist_get_bool(nvl, "12c", false), false); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_number__present); ATF_TEST_CASE_BODY(dnvlist_get_number__present) { nvlist_t *nvl; const char *key; uint64_t value; nvl = nvlist_create(0); key = "key"; value = 48952; nvlist_add_number(nvl, key, value); ATF_REQUIRE_EQ(dnvlist_get_number(nvl, key, 19), value); ATF_REQUIRE_EQ(dnvlist_get_number(nvl, "key", 65), value); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_number__default_value); ATF_TEST_CASE_BODY(dnvlist_get_number__default_value) { nvlist_t *nvl; const char *key; key = "123"; nvl = nvlist_create(0); ATF_REQUIRE_EQ(dnvlist_get_number(nvl, key, 5), 5); ATF_REQUIRE_EQ(dnvlist_get_number(nvl, "1234", 5), 5); nvlist_add_number(nvl, key, 24841); ATF_REQUIRE_EQ(dnvlist_get_number(nvl, "1234", 5641), 5641); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_string__present); ATF_TEST_CASE_BODY(dnvlist_get_string__present) { nvlist_t *nvl; const char *key; const char *value, *actual_value; nvl = nvlist_create(0); key = "string"; value = "fjdojfdi"; nvlist_add_string(nvl, key, value); ATF_REQUIRE_EQ(strcmp(dnvlist_get_string(nvl, key, "g"), value), 0); actual_value = dnvlist_get_string(nvl, key, "rs"); ATF_REQUIRE_EQ(strcmp(actual_value, value), 0); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_string__default_value); ATF_TEST_CASE_BODY(dnvlist_get_string__default_value) { nvlist_t *nvl; const char *key; const char *actual_value; key = "123"; nvl = nvlist_create(0); ATF_REQUIRE_EQ(strcmp(dnvlist_get_string(nvl, key, "bar"), "bar"), 0); actual_value = dnvlist_get_string(nvl, key, "d"); ATF_REQUIRE_EQ(strcmp(actual_value, "d"), 0); nvlist_add_string(nvl, key, "cxhweh"); ATF_REQUIRE_EQ(strcmp(dnvlist_get_string(nvl, "hthth", "fd"), "fd"), 0); actual_value = dnvlist_get_string(nvl, "5", "5"); ATF_REQUIRE_EQ(strcmp("5", "5"), 0); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_nvlist__present); ATF_TEST_CASE_BODY(dnvlist_get_nvlist__present) { nvlist_t *nvl; const char *key; nvlist_t *value; const nvlist_t *actual_value; nvl = nvlist_create(0); key = "nvlist"; value = nvlist_create(0); nvlist_move_nvlist(nvl, key, value); actual_value = dnvlist_get_nvlist(nvl, key, NULL); ATF_REQUIRE(actual_value != NULL); ATF_REQUIRE(nvlist_empty(actual_value)); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_nvlist__default_value); ATF_TEST_CASE_BODY(dnvlist_get_nvlist__default_value) { nvlist_t *nvl; const char *key; nvlist_t *dummy; key = "123"; nvl = nvlist_create(0); dummy = nvlist_create(0); ATF_REQUIRE_EQ(dnvlist_get_nvlist(nvl, key, dummy), dummy); nvlist_move_nvlist(nvl, key, nvlist_create(0)); ATF_REQUIRE_EQ(dnvlist_get_nvlist(nvl, "456", dummy), dummy); ATF_REQUIRE_EQ(dnvlist_get_nvlist(nvl, "gh", dummy), dummy); nvlist_destroy(nvl); } static void set_const_binary_value(const void *&value, size_t &size, const char *str) { value = str; size = strlen(str) + 1; /* +1 to include '\0' */ } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_binary__present); ATF_TEST_CASE_BODY(dnvlist_get_binary__present) { nvlist_t *nvl; const char *k; const void *value, *actual_value; size_t value_size, actual_size; nvl = nvlist_create(0); k = "binary"; set_const_binary_value(value, value_size, "fjdojfdi"); nvlist_add_binary(nvl, k, value, value_size); actual_value = dnvlist_get_binary(nvl, k, &actual_size, "g", 1); ATF_REQUIRE_EQ(value_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_value, value, actual_size), 0); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_get_binary__default_value); ATF_TEST_CASE_BODY(dnvlist_get_binary__default_value) { nvlist_t *nvl; const char *key; const void *default_value, *actual_value; size_t default_size, actual_size; key = "123"; nvl = nvlist_create(0); set_const_binary_value(default_value, default_size, "bar"); actual_value = dnvlist_get_binary(nvl, key, &actual_size, default_value, default_size); ATF_REQUIRE_EQ(default_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_value, default_value, actual_size), 0); set_const_binary_value(default_value, default_size, "atf"); actual_value = dnvlist_get_binary(nvl, key, &actual_size, default_value, default_size); ATF_REQUIRE_EQ(default_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_value, default_value, actual_size), 0); nvlist_add_binary(nvl, key, "test", 4); set_const_binary_value(default_value, default_size, "bthrg"); actual_value = dnvlist_get_binary(nvl, "k", &actual_size, default_value, default_size); ATF_REQUIRE_EQ(default_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_value, default_value, actual_size), 0); set_const_binary_value(default_value, default_size, "rrhgrythtyrtgbrhgrtdsvdfbtjlkul"); actual_value = dnvlist_get_binary(nvl, "s", &actual_size, default_value, default_size); ATF_REQUIRE_EQ(default_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_value, default_value, actual_size), 0); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_bool__present); ATF_TEST_CASE_BODY(dnvlist_take_bool__present) { nvlist_t *nvl; const char *key; bool value; nvl = nvlist_create(0); key = "name"; value = true; nvlist_add_bool(nvl, key, value); ATF_REQUIRE_EQ(dnvlist_take_bool(nvl, key, false), value); ATF_REQUIRE(nvlist_empty(nvl)); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_bool__empty); ATF_TEST_CASE_BODY(dnvlist_take_bool__empty) { nvlist_t *nvl; nvl = nvlist_create(0); ATF_REQUIRE_EQ(dnvlist_take_bool(nvl, "123", false), false); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_bool__default_value); ATF_TEST_CASE_BODY(dnvlist_take_bool__default_value) { nvlist_t *nvl; nvl = nvlist_create(0); nvlist_add_bool(nvl, "key", true); ATF_REQUIRE_EQ(dnvlist_take_bool(nvl, "otherkey", true), true); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_number__present); ATF_TEST_CASE_BODY(dnvlist_take_number__present) { nvlist_t *nvl; const char *key; uint64_t value; nvl = nvlist_create(0); key = "name"; value = 194154; nvlist_add_number(nvl, key, value); ATF_REQUIRE_EQ(dnvlist_take_number(nvl, key, 2), value); ATF_REQUIRE(nvlist_empty(nvl)); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_number__empty); ATF_TEST_CASE_BODY(dnvlist_take_number__empty) { nvlist_t *nvl; nvl = nvlist_create(0); ATF_REQUIRE_EQ(dnvlist_take_number(nvl, "123", 126484), 126484); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_number__default_value); ATF_TEST_CASE_BODY(dnvlist_take_number__default_value) { nvlist_t *nvl; nvl = nvlist_create(0); nvlist_add_number(nvl, "key", 12); ATF_REQUIRE_EQ(dnvlist_take_number(nvl, "otherkey", 13), 13); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_string__present); ATF_TEST_CASE_BODY(dnvlist_take_string__present) { nvlist_t *nvl; const char *key; const char *value; char *default_val, *actual_val; nvl = nvlist_create(0); key = "name"; value = "wrowm"; default_val = strdup("default"); nvlist_add_string(nvl, key, value); actual_val = dnvlist_take_string(nvl, key, default_val); ATF_REQUIRE_EQ(strcmp(actual_val, value), 0); ATF_REQUIRE(nvlist_empty(nvl)); free(actual_val); free(default_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_string__empty); ATF_TEST_CASE_BODY(dnvlist_take_string__empty) { nvlist_t *nvl; char *default_val, *actual_val; nvl = nvlist_create(0); default_val = strdup(""); actual_val = dnvlist_take_string(nvl, "123", default_val); ATF_REQUIRE_EQ(strcmp(actual_val, default_val), 0); free(actual_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_string__default_value); ATF_TEST_CASE_BODY(dnvlist_take_string__default_value) { nvlist_t *nvl; char *default_val, *actual_val; nvl = nvlist_create(0); nvlist_add_string(nvl, "key", "foobar"); default_val = strdup("other"); actual_val = dnvlist_take_string(nvl, "otherkey", default_val); ATF_REQUIRE_EQ(strcmp(actual_val, default_val), 0); free(actual_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_nvlist__present); ATF_TEST_CASE_BODY(dnvlist_take_nvlist__present) { nvlist_t *nvl; const char *key; nvlist_t *value, *default_val, *actual_val; nvl = nvlist_create(0); key = "name"; value = nvlist_create(0); default_val = nvlist_create(0); nvlist_move_nvlist(nvl, key, value); actual_val = dnvlist_take_nvlist(nvl, key, default_val); ATF_REQUIRE_EQ(actual_val, value); ATF_REQUIRE(nvlist_empty(nvl)); free(actual_val); free(default_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_nvlist__empty); ATF_TEST_CASE_BODY(dnvlist_take_nvlist__empty) { nvlist_t *nvl, *actual_val; nvl = nvlist_create(0); actual_val = dnvlist_take_nvlist(nvl, "123", NULL); ATF_REQUIRE_EQ(actual_val, static_cast(NULL)); free(actual_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_nvlist__default_value); ATF_TEST_CASE_BODY(dnvlist_take_nvlist__default_value) { nvlist_t *nvl; nvlist_t *default_val, *actual_val; nvl = nvlist_create(0); nvlist_move_nvlist(nvl, "key", nvlist_create(0)); default_val = nvlist_create(0); actual_val = dnvlist_take_nvlist(nvl, "otherkey", default_val); ATF_REQUIRE_EQ(actual_val, default_val); free(actual_val); nvlist_destroy(nvl); } static void set_binary_value(void *&value, size_t &size, const char *str) { value = strdup(str); size = strlen(str) + 1; /* +1 to include '\0' */ } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_binary__present); ATF_TEST_CASE_BODY(dnvlist_take_binary__present) { nvlist_t *nvl; const char *key; void *value, *default_val, *actual_val; size_t value_size, default_size, actual_size; nvl = nvlist_create(0); key = "name"; set_binary_value(value, value_size, "fkdojvmo908"); set_binary_value(default_val, default_size, "16546"); nvlist_add_binary(nvl, key, value, value_size); actual_val = dnvlist_take_binary(nvl, key, &actual_size, default_val, default_size); ATF_REQUIRE_EQ(value_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_val, value, value_size), 0); ATF_REQUIRE(nvlist_empty(nvl)); free(actual_val); free(default_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_binary__empty); ATF_TEST_CASE_BODY(dnvlist_take_binary__empty) { nvlist_t *nvl; void *default_val, *actual_val; size_t default_size, actual_size; nvl = nvlist_create(0); set_binary_value(default_val, default_size, "\xa8\x89\x49\xff\xe2\x08"); actual_val = dnvlist_take_binary(nvl, "123", &actual_size, default_val, default_size); ATF_REQUIRE_EQ(default_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_val, default_val, actual_size), 0); free(actual_val); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(dnvlist_take_binary__default_value); ATF_TEST_CASE_BODY(dnvlist_take_binary__default_value) { nvlist_t *nvl; void *default_val, *actual_val; size_t default_size, actual_size; nvl = nvlist_create(0); nvlist_add_binary(nvl, "key", "foobar", 6); set_binary_value(default_val, default_size, "vbhag"); actual_val = dnvlist_take_binary(nvl, "otherkey", &actual_size, default_val, default_size); ATF_REQUIRE_EQ(default_size, actual_size); ATF_REQUIRE_EQ(memcmp(actual_val, default_val, default_size), 0); free(actual_val); nvlist_destroy(nvl); } ATF_INIT_TEST_CASES(tp) { ATF_ADD_TEST_CASE(tp, dnvlist_get_bool__present); ATF_ADD_TEST_CASE(tp, dnvlist_get_bool__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_get_number__present); ATF_ADD_TEST_CASE(tp, dnvlist_get_number__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_get_string__present); ATF_ADD_TEST_CASE(tp, dnvlist_get_string__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_get_nvlist__present); ATF_ADD_TEST_CASE(tp, dnvlist_get_nvlist__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_get_binary__present); ATF_ADD_TEST_CASE(tp, dnvlist_get_binary__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_take_bool__present); ATF_ADD_TEST_CASE(tp, dnvlist_take_bool__empty); ATF_ADD_TEST_CASE(tp, dnvlist_take_bool__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_take_number__present); ATF_ADD_TEST_CASE(tp, dnvlist_take_number__empty); ATF_ADD_TEST_CASE(tp, dnvlist_take_number__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_take_string__present); ATF_ADD_TEST_CASE(tp, dnvlist_take_string__empty); ATF_ADD_TEST_CASE(tp, dnvlist_take_string__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_take_nvlist__present); ATF_ADD_TEST_CASE(tp, dnvlist_take_nvlist__empty); ATF_ADD_TEST_CASE(tp, dnvlist_take_nvlist__default_value); ATF_ADD_TEST_CASE(tp, dnvlist_take_binary__present); ATF_ADD_TEST_CASE(tp, dnvlist_take_binary__empty); ATF_ADD_TEST_CASE(tp, dnvlist_take_binary__default_value); } diff --git a/lib/libnv/tests/nv_array_tests.cc b/lib/libnv/tests/nv_array_tests.cc index 3de0ee877a2f..f6ffcef83be7 100644 --- a/lib/libnv/tests/nv_array_tests.cc +++ b/lib/libnv/tests/nv_array_tests.cc @@ -1,1191 +1,1196 @@ /*- * Copyright (c) 2015 Mariusz Zaborski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); -#include +#include #include +#include #include #include #include #include #include #include #include #include #include #define fd_is_valid(fd) (fcntl((fd), F_GETFL) != -1 || errno != EBADF) ATF_TEST_CASE_WITHOUT_HEAD(nvlist_bool_array__basic); ATF_TEST_CASE_BODY(nvlist_bool_array__basic) { bool testbool[16]; const bool *const_result; bool *result; nvlist_t *nvl; - size_t nitems; + size_t num_items; unsigned int i; const char *key; key = "nvl/bool"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); for (i = 0; i < 16; i++) testbool[i] = (i % 2 == 0); nvlist_add_bool_array(nvl, key, testbool, 16); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_bool_array(nvl, key)); ATF_REQUIRE(nvlist_exists_bool_array(nvl, "nvl/bool")); - const_result = nvlist_get_bool_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, 16); + const_result = nvlist_get_bool_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, 16); ATF_REQUIRE(const_result != NULL); - for (i = 0; i < nitems; i++) + for (i = 0; i < num_items; i++) ATF_REQUIRE_EQ(const_result[i], testbool[i]); - result = nvlist_take_bool_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, 16); + result = nvlist_take_bool_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, 16); ATF_REQUIRE(const_result != NULL); - for (i = 0; i < nitems; i++) + for (i = 0; i < num_items; i++) ATF_REQUIRE_EQ(result[i], testbool[i]); ATF_REQUIRE(!nvlist_exists_bool_array(nvl, key)); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); free(result); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_string_array__basic); ATF_TEST_CASE_BODY(nvlist_string_array__basic) { const char * const *const_result; char **result; nvlist_t *nvl; - size_t nitems; + size_t num_items; unsigned int i; const char *key; - const char *string[8] = { "a", "b", "kot", "foo", + const char *string_arr[8] = { "a", "b", "kot", "foo", "tests", "nice test", "", "abcdef" }; key = "nvl/string"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); - nvlist_add_string_array(nvl, key, string, 8); + nvlist_add_string_array(nvl, key, string_arr, nitems(string_arr)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_string_array(nvl, key)); ATF_REQUIRE(nvlist_exists_string_array(nvl, "nvl/string")); - const_result = nvlist_get_string_array(nvl, key, &nitems); + const_result = nvlist_get_string_array(nvl, key, &num_items); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(const_result != NULL); - ATF_REQUIRE(nitems == 8); - for (i = 0; i < nitems; i++) { - if (string[i] != NULL) { - ATF_REQUIRE(strcmp(const_result[i], string[i]) == 0); + ATF_REQUIRE(num_items == nitems(string_arr)); + for (i = 0; i < num_items; i++) { + if (string_arr[i] != NULL) { + ATF_REQUIRE(strcmp(const_result[i], + string_arr[i]) == 0); } else { - ATF_REQUIRE(const_result[i] == string[i]); + ATF_REQUIRE(const_result[i] == string_arr[i]); } } - result = nvlist_take_string_array(nvl, key, &nitems); + result = nvlist_take_string_array(nvl, key, &num_items); ATF_REQUIRE(result != NULL); - ATF_REQUIRE_EQ(nitems, 8); - for (i = 0; i < nitems; i++) { - if (string[i] != NULL) { - ATF_REQUIRE_EQ(strcmp(result[i], string[i]), 0); + ATF_REQUIRE_EQ(num_items, nitems(string_arr)); + for (i = 0; i < num_items; i++) { + if (string_arr[i] != NULL) { + ATF_REQUIRE_EQ(strcmp(result[i], string_arr[i]), 0); } else { - ATF_REQUIRE_EQ(result[i], string[i]); + ATF_REQUIRE_EQ(result[i], string_arr[i]); } } ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); - for (i = 0; i < 8; i++) + for (i = 0; i < num_items; i++) free(result[i]); free(result); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_descriptor_array__basic); ATF_TEST_CASE_BODY(nvlist_descriptor_array__basic) { int fd[32], *result; const int *const_result; nvlist_t *nvl; - size_t nitems; + size_t num_items; unsigned int i; const char *key; - for (i = 0; i < 32; i++) { + for (i = 0; i < nitems(fd); i++) { fd[i] = dup(STDERR_FILENO); ATF_REQUIRE(fd_is_valid(fd[i])); } key = "nvl/descriptor"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_descriptor_array(nvl, key)); - nvlist_add_descriptor_array(nvl, key, fd, 32); + nvlist_add_descriptor_array(nvl, key, fd, nitems(fd)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_descriptor_array(nvl, key)); ATF_REQUIRE(nvlist_exists_descriptor_array(nvl, "nvl/descriptor")); - const_result = nvlist_get_descriptor_array(nvl, key, &nitems); + const_result = nvlist_get_descriptor_array(nvl, key, &num_items); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(const_result != NULL); - ATF_REQUIRE(nitems == 32); - for (i = 0; i < nitems; i++) { + ATF_REQUIRE(num_items == nitems(fd)); + for (i = 0; i < num_items; i++) { ATF_REQUIRE(fd_is_valid(const_result[i])); if (i > 0) ATF_REQUIRE(const_result[i] != const_result[i - 1]); } - result = nvlist_take_descriptor_array(nvl, key, &nitems); + result = nvlist_take_descriptor_array(nvl, key, &num_items); ATF_REQUIRE(result != NULL); - ATF_REQUIRE_EQ(nitems, 32); - for (i = 0; i < nitems; i++) { + ATF_REQUIRE_EQ(num_items, nitems(fd)); + for (i = 0; i < num_items; i++) { ATF_REQUIRE(fd_is_valid(result[i])); if (i > 0) ATF_REQUIRE(const_result[i] != const_result[i - 1]); } ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); - for (i = 0; i < nitems; i++) { + for (i = 0; i < num_items; i++) { close(result[i]); close(fd[i]); } free(result); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_number_array__basic); ATF_TEST_CASE_BODY(nvlist_number_array__basic) { const uint64_t *const_result; uint64_t *result; nvlist_t *nvl; - size_t nitems; + size_t num_items; unsigned int i; const char *key; const uint64_t number[8] = { 0, UINT_MAX, 7, 123, 90, 100000, 8, 1 }; key = "nvl/number"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); - nvlist_add_number_array(nvl, key, number, 8); + nvlist_add_number_array(nvl, key, number, nitems(number)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_number_array(nvl, key)); ATF_REQUIRE(nvlist_exists_number_array(nvl, "nvl/number")); - const_result = nvlist_get_number_array(nvl, key, &nitems); + const_result = nvlist_get_number_array(nvl, key, &num_items); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(const_result != NULL); - ATF_REQUIRE(nitems == 8); - for (i = 0; i < nitems; i++) + ATF_REQUIRE(num_items == nitems(number)); + for (i = 0; i < num_items; i++) ATF_REQUIRE_EQ(const_result[i], number[i]); - result = nvlist_take_number_array(nvl, key, &nitems); + result = nvlist_take_number_array(nvl, key, &num_items); ATF_REQUIRE(result != NULL); - ATF_REQUIRE_EQ(nitems, 8); - for (i = 0; i < nitems; i++) + ATF_REQUIRE_EQ(num_items, nitems(number)); + for (i = 0; i < num_items; i++) ATF_REQUIRE_EQ(result[i], number[i]); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); free(result); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_nvlist_array__basic); ATF_TEST_CASE_BODY(nvlist_nvlist_array__basic) { nvlist_t *testnvl[8]; const nvlist_t * const *const_result; nvlist_t **result; nvlist_t *nvl; - size_t nitems; + size_t num_items; unsigned int i; const char *somestr[8] = { "a", "b", "c", "d", "e", "f", "g", "h" }; const char *key; for (i = 0; i < 8; i++) { testnvl[i] = nvlist_create(0); ATF_REQUIRE(testnvl[i] != NULL); ATF_REQUIRE_EQ(nvlist_error(testnvl[i]), 0); nvlist_add_string(testnvl[i], "nvl/string", somestr[i]); ATF_REQUIRE_EQ(nvlist_error(testnvl[i]), 0); ATF_REQUIRE(nvlist_exists_string(testnvl[i], "nvl/string")); } key = "nvl/nvlist"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); nvlist_add_nvlist_array(nvl, key, (const nvlist_t * const *)testnvl, 8); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_nvlist_array(nvl, key)); ATF_REQUIRE(nvlist_exists_nvlist_array(nvl, "nvl/nvlist")); - const_result = nvlist_get_nvlist_array(nvl, key, &nitems); + const_result = nvlist_get_nvlist_array(nvl, key, &num_items); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(const_result != NULL); - ATF_REQUIRE(nitems == 8); + ATF_REQUIRE(num_items == nitems(testnvl)); - for (i = 0; i < nitems; i++) { + for (i = 0; i < num_items; i++) { ATF_REQUIRE_EQ(nvlist_error(const_result[i]), 0); - if (i < nitems - 1) { + if (i < num_items - 1) { ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == const_result[i + 1]); } else { ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == NULL); } ATF_REQUIRE(nvlist_get_parent(const_result[i], NULL) == nvl); ATF_REQUIRE(nvlist_in_array(const_result[i])); ATF_REQUIRE(nvlist_exists_string(const_result[i], "nvl/string")); ATF_REQUIRE(strcmp(nvlist_get_string(const_result[i], "nvl/string"), somestr[i]) == 0); } - result = nvlist_take_nvlist_array(nvl, key, &nitems); + result = nvlist_take_nvlist_array(nvl, key, &num_items); ATF_REQUIRE(result != NULL); - ATF_REQUIRE_EQ(nitems, 8); - for (i = 0; i < nitems; i++) { + ATF_REQUIRE_EQ(num_items, 8); + for (i = 0; i < num_items; i++) { ATF_REQUIRE_EQ(nvlist_error(result[i]), 0); ATF_REQUIRE(nvlist_get_array_next(result[i]) == NULL); ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == NULL); ATF_REQUIRE(!nvlist_in_array(const_result[i])); } ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); for (i = 0; i < 8; i++) { nvlist_destroy(result[i]); nvlist_destroy(testnvl[i]); } free(result); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_clone_array); ATF_TEST_CASE_BODY(nvlist_clone_array) { nvlist_t *testnvl[8]; nvlist_t *src, *dst; const nvlist_t *nvl; bool testbool[16]; int testfd[16]; - size_t i, nitems; - const char *string[8] = { "a", "b", "kot", "foo", + size_t i, num_items; + const char *string_arr[8] = { "a", "b", "kot", "foo", "tests", "nice test", "", "abcdef" }; const char *somestr[8] = { "a", "b", "c", "d", "e", "f", "g", "h" }; const uint64_t number[8] = { 0, UINT_MAX, 7, 123, 90, 100000, 8, 1 }; - for (i = 0; i < 16; i++) { + for (i = 0; i < nitems(testfd); i++) { testbool[i] = (i % 2 == 0); testfd[i] = dup(STDERR_FILENO); ATF_REQUIRE(fd_is_valid(testfd[i])); } - for (i = 0; i < 8; i++) { + for (i = 0; i < nitems(testnvl); i++) { testnvl[i] = nvlist_create(0); ATF_REQUIRE(nvlist_error(testnvl[i]) == 0); nvlist_add_string(testnvl[i], "nvl/nvl/teststr", somestr[i]); ATF_REQUIRE(nvlist_error(testnvl[i]) == 0); } src = nvlist_create(0); ATF_REQUIRE(nvlist_error(src) == 0); ATF_REQUIRE(!nvlist_exists_bool_array(src, "nvl/bool")); - nvlist_add_bool_array(src, "nvl/bool", testbool, 16); + nvlist_add_bool_array(src, "nvl/bool", testbool, nitems(testbool)); ATF_REQUIRE_EQ(nvlist_error(src), 0); ATF_REQUIRE(nvlist_exists_bool_array(src, "nvl/bool")); ATF_REQUIRE(!nvlist_exists_string_array(src, "nvl/string")); - nvlist_add_string_array(src, "nvl/string", string, 8); + nvlist_add_string_array(src, "nvl/string", string_arr, + nitems(string_arr)); ATF_REQUIRE_EQ(nvlist_error(src), 0); ATF_REQUIRE(nvlist_exists_string_array(src, "nvl/string")); ATF_REQUIRE(!nvlist_exists_descriptor_array(src, "nvl/fd")); - nvlist_add_descriptor_array(src, "nvl/fd", testfd, 16); + nvlist_add_descriptor_array(src, "nvl/fd", testfd, nitems(testfd)); ATF_REQUIRE_EQ(nvlist_error(src), 0); ATF_REQUIRE(nvlist_exists_descriptor_array(src, "nvl/fd")); ATF_REQUIRE(!nvlist_exists_number_array(src, "nvl/number")); - nvlist_add_number_array(src, "nvl/number", number, 8); + nvlist_add_number_array(src, "nvl/number", number, + nitems(number)); ATF_REQUIRE_EQ(nvlist_error(src), 0); ATF_REQUIRE(nvlist_exists_number_array(src, "nvl/number")); ATF_REQUIRE(!nvlist_exists_nvlist_array(src, "nvl/array")); nvlist_add_nvlist_array(src, "nvl/array", - (const nvlist_t * const *)testnvl, 8); + (const nvlist_t * const *)testnvl, nitems(testnvl)); ATF_REQUIRE_EQ(nvlist_error(src), 0); ATF_REQUIRE(nvlist_exists_nvlist_array(src, "nvl/array")); dst = nvlist_clone(src); ATF_REQUIRE(dst != NULL); ATF_REQUIRE(nvlist_exists_bool_array(dst, "nvl/bool")); - (void) nvlist_get_bool_array(dst, "nvl/bool", &nitems); - ATF_REQUIRE_EQ(nitems, 16); - for (i = 0; i < nitems; i++) { + (void) nvlist_get_bool_array(dst, "nvl/bool", &num_items); + ATF_REQUIRE_EQ(num_items, nitems(testbool)); + for (i = 0; i < num_items; i++) { ATF_REQUIRE( - nvlist_get_bool_array(dst, "nvl/bool", &nitems)[i] == - nvlist_get_bool_array(src, "nvl/bool", &nitems)[i]); + nvlist_get_bool_array(dst, "nvl/bool", &num_items)[i] == + nvlist_get_bool_array(src, "nvl/bool", &num_items)[i]); } ATF_REQUIRE(nvlist_exists_string_array(dst, "nvl/string")); - (void) nvlist_get_string_array(dst, "nvl/string", &nitems); - ATF_REQUIRE_EQ(nitems, 8); - for (i = 0; i < nitems; i++) { + (void) nvlist_get_string_array(dst, "nvl/string", &num_items); + ATF_REQUIRE_EQ(num_items, nitems(string_arr)); + for (i = 0; i < num_items; i++) { if (nvlist_get_string_array(dst, "nvl/string", - &nitems)[i] == NULL) { + &num_items)[i] == NULL) { ATF_REQUIRE(nvlist_get_string_array(dst, "nvl/string", - &nitems)[i] == nvlist_get_string_array(src, - "nvl/string", &nitems)[i]); + &num_items)[i] == nvlist_get_string_array(src, + "nvl/string", &num_items)[i]); } else { ATF_REQUIRE(strcmp(nvlist_get_string_array(dst, - "nvl/string", &nitems)[i], nvlist_get_string_array( - src, "nvl/string", &nitems)[i]) == 0); + "nvl/string", &num_items)[i], nvlist_get_string_array( + src, "nvl/string", &num_items)[i]) == 0); } } ATF_REQUIRE(nvlist_exists_descriptor_array(dst, "nvl/fd")); - (void) nvlist_get_descriptor_array(dst, "nvl/fd", &nitems); - ATF_REQUIRE_EQ(nitems, 16); - for (i = 0; i < nitems; i++) { + (void) nvlist_get_descriptor_array(dst, "nvl/fd", &num_items); + ATF_REQUIRE_EQ(num_items, nitems(testfd)); + for (i = 0; i < num_items; i++) { ATF_REQUIRE(fd_is_valid( - nvlist_get_descriptor_array(dst, "nvl/fd", &nitems)[i])); + nvlist_get_descriptor_array(dst, "nvl/fd", &num_items)[i])); } ATF_REQUIRE(nvlist_exists_number_array(dst, "nvl/number")); - (void) nvlist_get_number_array(dst, "nvl/number", &nitems); - ATF_REQUIRE_EQ(nitems, 8); + (void) nvlist_get_number_array(dst, "nvl/number", &num_items); + ATF_REQUIRE_EQ(num_items, nitems(number)); - for (i = 0; i < nitems; i++) { + for (i = 0; i < num_items; i++) { ATF_REQUIRE( - nvlist_get_number_array(dst, "nvl/number", &nitems)[i] == - nvlist_get_number_array(src, "nvl/number", &nitems)[i]); + nvlist_get_number_array(dst, "nvl/number", &num_items)[i] == + nvlist_get_number_array(src, "nvl/number", &num_items)[i]); } ATF_REQUIRE(nvlist_exists_nvlist_array(dst, "nvl/array")); - (void) nvlist_get_nvlist_array(dst, "nvl/array", &nitems); - ATF_REQUIRE_EQ(nitems, 8); - for (i = 0; i < nitems; i++) { - nvl = nvlist_get_nvlist_array(dst, "nvl/array", &nitems)[i]; + (void) nvlist_get_nvlist_array(dst, "nvl/array", &num_items); + ATF_REQUIRE_EQ(num_items, nitems(testnvl)); + for (i = 0; i < num_items; i++) { + nvl = nvlist_get_nvlist_array(dst, "nvl/array", &num_items)[i]; ATF_REQUIRE(nvlist_exists_string(nvl, "nvl/nvl/teststr")); ATF_REQUIRE(strcmp(nvlist_get_string(nvl, "nvl/nvl/teststr"), somestr[i]) == 0); } - for (i = 0; i < 16; i++) { + for (i = 0; i < nitems(testfd); i++) { close(testfd[i]); - if (i < 8) { - nvlist_destroy(testnvl[i]); - } + } + for (i = 0; i < nitems(testnvl); i++) { + nvlist_destroy(testnvl[i]); } nvlist_destroy(src); nvlist_destroy(dst); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_bool_array__move); ATF_TEST_CASE_BODY(nvlist_bool_array__move) { bool *testbool; const bool *const_result; nvlist_t *nvl; - size_t nitems, count; + size_t num_items, count; unsigned int i; const char *key; key = "nvl/bool"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); count = 16; testbool = (bool*)malloc(sizeof(*testbool) * count); ATF_REQUIRE(testbool != NULL); for (i = 0; i < count; i++) testbool[i] = (i % 2 == 0); nvlist_move_bool_array(nvl, key, testbool, count); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_bool_array(nvl, key)); - const_result = nvlist_get_bool_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, count); + const_result = nvlist_get_bool_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, count); ATF_REQUIRE(const_result != NULL); ATF_REQUIRE(const_result == testbool); - for (i = 0; i < nitems; i++) + for (i = 0; i < num_items; i++) ATF_REQUIRE_EQ(const_result[i], (i % 2 == 0)); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_string_array__move); ATF_TEST_CASE_BODY(nvlist_string_array__move) { char **teststr; const char * const *const_result; nvlist_t *nvl; - size_t nitems, count; + size_t num_items, count; unsigned int i; const char *key; key = "nvl/string"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); count = 26; teststr = (char**)malloc(sizeof(*teststr) * count); ATF_REQUIRE(teststr != NULL); for (i = 0; i < count; i++) { teststr[i] = (char*)malloc(sizeof(**teststr) * 2); ATF_REQUIRE(teststr[i] != NULL); teststr[i][0] = 'a' + i; teststr[i][1] = '\0'; } nvlist_move_string_array(nvl, key, teststr, count); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_string_array(nvl, key)); - const_result = nvlist_get_string_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, count); + const_result = nvlist_get_string_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, count); ATF_REQUIRE(const_result != NULL); ATF_REQUIRE((intptr_t)const_result == (intptr_t)teststr); - for (i = 0; i < nitems; i++) { + for (i = 0; i < num_items; i++) { ATF_REQUIRE_EQ(const_result[i][0], (char)('a' + i)); ATF_REQUIRE_EQ(const_result[i][1], '\0'); } nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_nvlist_array__move); ATF_TEST_CASE_BODY(nvlist_nvlist_array__move) { nvlist **testnv; const nvlist * const *const_result; nvlist_t *nvl; - size_t nitems, count; + size_t num_items, count; unsigned int i; const char *key; key = "nvl/nvlist"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_nvlist_array(nvl, key)); count = 26; testnv = (nvlist**)malloc(sizeof(*testnv) * count); ATF_REQUIRE(testnv != NULL); for (i = 0; i < count; i++) { testnv[i] = nvlist_create(0); ATF_REQUIRE(testnv[i] != NULL); } nvlist_move_nvlist_array(nvl, key, testnv, count); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_nvlist_array(nvl, key)); - const_result = nvlist_get_nvlist_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, count); + const_result = nvlist_get_nvlist_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, count); ATF_REQUIRE(const_result != NULL); ATF_REQUIRE((intptr_t)const_result == (intptr_t)testnv); - for (i = 0; i < nitems; i++) { + for (i = 0; i < num_items; i++) { ATF_REQUIRE_EQ(nvlist_error(const_result[i]), 0); ATF_REQUIRE(nvlist_empty(const_result[i])); - if (i < nitems - 1) { + if (i < num_items - 1) { ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == const_result[i + 1]); } else { ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == NULL); } ATF_REQUIRE(nvlist_get_parent(const_result[i], NULL) == nvl); ATF_REQUIRE(nvlist_in_array(const_result[i])); } nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_number_array__move); ATF_TEST_CASE_BODY(nvlist_number_array__move) { uint64_t *testnumber; const uint64_t *const_result; nvlist_t *nvl; - size_t nitems, count; + size_t num_items, count; unsigned int i; const char *key; key = "nvl/number"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); count = 1000; testnumber = (uint64_t*)malloc(sizeof(*testnumber) * count); ATF_REQUIRE(testnumber != NULL); for (i = 0; i < count; i++) testnumber[i] = i; nvlist_move_number_array(nvl, key, testnumber, count); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_number_array(nvl, key)); - const_result = nvlist_get_number_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, count); + const_result = nvlist_get_number_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, count); ATF_REQUIRE(const_result != NULL); ATF_REQUIRE(const_result == testnumber); - for (i = 0; i < nitems; i++) + for (i = 0; i < num_items; i++) ATF_REQUIRE_EQ(const_result[i], i); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_descriptor_array__move); ATF_TEST_CASE_BODY(nvlist_descriptor_array__move) { int *testfd; const int *const_result; nvlist_t *nvl; - size_t nitems, count; + size_t num_items, count; unsigned int i; const char *key; key = "nvl/fd"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); count = 50; testfd = (int*)malloc(sizeof(*testfd) * count); ATF_REQUIRE(testfd != NULL); for (i = 0; i < count; i++) { testfd[i] = dup(STDERR_FILENO); ATF_REQUIRE(fd_is_valid(testfd[i])); } nvlist_move_descriptor_array(nvl, key, testfd, count); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_descriptor_array(nvl, key)); - const_result = nvlist_get_descriptor_array(nvl, key, &nitems); - ATF_REQUIRE_EQ(nitems, count); + const_result = nvlist_get_descriptor_array(nvl, key, &num_items); + ATF_REQUIRE_EQ(num_items, count); ATF_REQUIRE(const_result != NULL); ATF_REQUIRE(const_result == testfd); - for (i = 0; i < nitems; i++) + for (i = 0; i < num_items; i++) ATF_REQUIRE(fd_is_valid(const_result[i])); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_arrays__error_null); ATF_TEST_CASE_BODY(nvlist_arrays__error_null) { nvlist_t *nvl; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_number_array(nvl, "nvl/number", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_number_array(nvl, "nvl/number", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_descriptor_array(nvl, "nvl/fd", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_descriptor_array(nvl, "nvl/fd", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_string_array(nvl, "nvl/string", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_string_array(nvl, "nvl/string", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_nvlist_array(nvl, "nvl/nvlist", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_nvlist_array(nvl, "nvl/nvlist", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_bool_array(nvl, "nvl/bool", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_bool_array(nvl, "nvl/bool", NULL, 0); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_arrays__bad_value); ATF_TEST_CASE_BODY(nvlist_arrays__bad_value) { nvlist_t *nvl, *nvladd[1], **nvlmove; int fdadd[1], *fdmove; nvladd[0] = NULL; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_nvlist_array(nvl, "nvl/nvlist", nvladd, 1); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); nvlmove = (nvlist_t**)malloc(sizeof(*nvlmove)); ATF_REQUIRE(nvlmove != NULL); nvlmove[0] = NULL; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_nvlist_array(nvl, "nvl/nvlist", nvlmove, 1); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); fdadd[0] = -2; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_descriptor_array(nvl, "nvl/fd", fdadd, 1); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); fdmove = (int*)malloc(sizeof(*fdmove)); ATF_REQUIRE(fdmove != NULL); fdmove[0] = -2; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_move_descriptor_array(nvl, "nvl/fd", fdmove, 1); ATF_REQUIRE(nvlist_error(nvl) != 0); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_nvlist_array__travel); ATF_TEST_CASE_BODY(nvlist_nvlist_array__travel) { nvlist_t *nvl, *test[5], *nasted; const nvlist_t *travel; - void *cookie; - int index, i, type; const char *name; + void *cookie; + int type; + unsigned int i, index; - for (i = 0; i < 5; i++) { + for (i = 0; i < nitems(test); i++) { test[i] = nvlist_create(0); ATF_REQUIRE(test[i] != NULL); nvlist_add_number(test[i], "nvl/number", i); ATF_REQUIRE(nvlist_error(test[i]) == 0); } nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); - nvlist_add_nvlist_array(nvl, "nvl/nvlist_array", test, 5); + nvlist_add_nvlist_array(nvl, "nvl/nvlist_array", test, nitems(test)); ATF_REQUIRE(nvlist_error(nvl) == 0); nasted = nvlist_create(0); ATF_REQUIRE(nasted != NULL); - nvlist_add_nvlist_array(nasted, "nvl/nvl/nvlist_array", test, 5); + nvlist_add_nvlist_array(nasted, "nvl/nvl/nvlist_array", test, + nitems(test)); ATF_REQUIRE(nvlist_error(nasted) == 0); nvlist_move_nvlist(nvl, "nvl/nvl", nasted); ATF_REQUIRE(nvlist_error(nvl) == 0); nvlist_add_string(nvl, "nvl/string", "END"); ATF_REQUIRE(nvlist_error(nvl) == 0); cookie = NULL; index = 0; travel = nvl; do { while ((name = nvlist_next(travel, &type, &cookie)) != NULL) { if (index == 0) { ATF_REQUIRE(type == NV_TYPE_NVLIST_ARRAY); - } else if (index >= 1 && index <= 5) { + } else if (index >= 1 && index <= nitems(test)) { ATF_REQUIRE(type == NV_TYPE_NUMBER); - } else if (index == 6) { + } else if (index == nitems(test) + 1) { ATF_REQUIRE(type == NV_TYPE_NVLIST); - } else if (index == 7) { + } else if (index == nitems(test) + 2) { ATF_REQUIRE(type == NV_TYPE_NVLIST_ARRAY); - } else if (index >= 8 && index <= 12) { + } else if (index >= nitems(test) + 3 && + index <= 2 * nitems(test) + 2) { ATF_REQUIRE(type == NV_TYPE_NUMBER); - } else if (index == 13) { + } else if (index == 2 * nitems(test) + 3) { ATF_REQUIRE(type == NV_TYPE_STRING); } if (type == NV_TYPE_NVLIST) { travel = nvlist_get_nvlist(travel, name); cookie = NULL; } else if (type == NV_TYPE_NVLIST_ARRAY) { travel = nvlist_get_nvlist_array(travel, name, NULL)[0]; cookie = NULL; } index ++; } } while ((travel = nvlist_get_pararr(travel, &cookie)) != NULL); - for (i = 0; i < 5; i++) + for (i = 0; i < nitems(test); i++) nvlist_destroy(test[i]); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_nvlist_array__travel_alternative); ATF_TEST_CASE_BODY(nvlist_nvlist_array__travel_alternative) { nvlist_t *nvl, *test[5], *nasted; const nvlist_t *travel, *tmp; void *cookie; int index, i, type; const char *name; for (i = 0; i < 5; i++) { test[i] = nvlist_create(0); ATF_REQUIRE(test[i] != NULL); nvlist_add_number(test[i], "nvl/number", i); ATF_REQUIRE(nvlist_error(test[i]) == 0); } nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); nvlist_add_nvlist_array(nvl, "nvl/nvlist_array", test, 5); ATF_REQUIRE(nvlist_error(nvl) == 0); nasted = nvlist_create(0); ATF_REQUIRE(nasted != NULL); nvlist_add_nvlist_array(nasted, "nvl/nvl/nvlist_array", test, 5); ATF_REQUIRE(nvlist_error(nasted) == 0); nvlist_move_nvlist(nvl, "nvl/nvl", nasted); ATF_REQUIRE(nvlist_error(nvl) == 0); nvlist_add_string(nvl, "nvl/string", "END"); ATF_REQUIRE(nvlist_error(nvl) == 0); cookie = NULL; index = 0; tmp = travel = nvl; do { do { travel = tmp; while ((name = nvlist_next(travel, &type, &cookie)) != NULL) { if (index == 0) { ATF_REQUIRE(type == NV_TYPE_NVLIST_ARRAY); } else if (index >= 1 && index <= 5) { ATF_REQUIRE(type == NV_TYPE_NUMBER); } else if (index == 6) { ATF_REQUIRE(type == NV_TYPE_NVLIST); } else if (index == 7) { ATF_REQUIRE(type == NV_TYPE_NVLIST_ARRAY); } else if (index >= 8 && index <= 12) { ATF_REQUIRE(type == NV_TYPE_NUMBER); } else if (index == 13) { ATF_REQUIRE(type == NV_TYPE_STRING); } if (type == NV_TYPE_NVLIST) { travel = nvlist_get_nvlist(travel, name); cookie = NULL; } else if (type == NV_TYPE_NVLIST_ARRAY) { travel = nvlist_get_nvlist_array(travel, name, NULL)[0]; cookie = NULL; } index ++; } cookie = NULL; } while ((tmp = nvlist_get_array_next(travel)) != NULL); } while ((tmp = nvlist_get_parent(travel, &cookie)) != NULL); for (i = 0; i < 5; i++) nvlist_destroy(test[i]); nvlist_destroy(nvl); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_bool_array__pack); ATF_TEST_CASE_BODY(nvlist_bool_array__pack) { nvlist_t *nvl, *unpacked; const char *key; size_t packed_size, count; void *packed; unsigned int i; const bool *const_result; bool testbool[16]; - for (i = 0; i < 16; i++) + for (i = 0; i < nitems(testbool); i++) testbool[i] = (i % 2 == 0); key = "nvl/bool"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); - nvlist_add_bool_array(nvl, key, testbool, 16); + nvlist_add_bool_array(nvl, key, testbool, nitems(testbool)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_bool_array(nvl, key)); packed = nvlist_pack(nvl, &packed_size); ATF_REQUIRE(packed != NULL); unpacked = nvlist_unpack(packed, packed_size, 0); ATF_REQUIRE(unpacked != NULL); ATF_REQUIRE_EQ(nvlist_error(unpacked), 0); ATF_REQUIRE(nvlist_exists_bool_array(unpacked, key)); const_result = nvlist_get_bool_array(unpacked, key, &count); - ATF_REQUIRE_EQ(count, 16); + ATF_REQUIRE_EQ(count, nitems(testbool)); for (i = 0; i < count; i++) { ATF_REQUIRE_EQ(testbool[i], const_result[i]); } nvlist_destroy(nvl); nvlist_destroy(unpacked); free(packed); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_number_array__pack); ATF_TEST_CASE_BODY(nvlist_number_array__pack) { nvlist_t *nvl, *unpacked; const char *key; size_t packed_size, count; void *packed; unsigned int i; const uint64_t *const_result; const uint64_t number[8] = { 0, UINT_MAX, 7, 123, 90, 100000, 8, 1 }; key = "nvl/number"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); nvlist_add_number_array(nvl, key, number, 8); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_number_array(nvl, key)); packed = nvlist_pack(nvl, &packed_size); ATF_REQUIRE(packed != NULL); unpacked = nvlist_unpack(packed, packed_size, 0); ATF_REQUIRE(unpacked != NULL); ATF_REQUIRE_EQ(nvlist_error(unpacked), 0); ATF_REQUIRE(nvlist_exists_number_array(unpacked, key)); const_result = nvlist_get_number_array(unpacked, key, &count); - ATF_REQUIRE_EQ(count, 8); + ATF_REQUIRE_EQ(count, nitems(number)); for (i = 0; i < count; i++) { ATF_REQUIRE_EQ(number[i], const_result[i]); } nvlist_destroy(nvl); nvlist_destroy(unpacked); free(packed); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_descriptor_array__pack); ATF_TEST_CASE_BODY(nvlist_descriptor_array__pack) { nvlist_t *nvl; const char *key; - size_t nitems; + size_t num_items; unsigned int i; const int *const_result; int desc[32], fd, socks[2]; pid_t pid; key = "nvl/descriptor"; ATF_REQUIRE_EQ(socketpair(PF_UNIX, SOCK_STREAM, 0, socks), 0); pid = atf::utils::fork(); ATF_REQUIRE(pid >= 0); if (pid == 0) { /* Child. */ fd = socks[0]; close(socks[1]); - for (i = 0; i < 32; i++) { + for (i = 0; i < nitems(desc); i++) { desc[i] = dup(STDERR_FILENO); ATF_REQUIRE(fd_is_valid(desc[i])); } nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_descriptor_array(nvl, key)); - nvlist_add_descriptor_array(nvl, key, desc, 32); + nvlist_add_descriptor_array(nvl, key, desc, nitems(desc)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_descriptor_array(nvl, key)); ATF_REQUIRE(nvlist_send(fd, nvl) >= 0); - for (i = 0; i < nitems; i++) + for (i = 0; i < nitems(desc); i++) close(desc[i]); } else { /* Parent */ fd = socks[1]; close(socks[0]); errno = 0; nvl = nvlist_recv(fd, 0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(nvlist_exists_descriptor_array(nvl, key)); - const_result = nvlist_get_descriptor_array(nvl, key, &nitems); + const_result = nvlist_get_descriptor_array(nvl, key, &num_items); ATF_REQUIRE(const_result != NULL); - ATF_REQUIRE_EQ(nitems, 32); - for (i = 0; i < nitems; i++) + ATF_REQUIRE_EQ(num_items, nitems(desc)); + for (i = 0; i < num_items; i++) ATF_REQUIRE(fd_is_valid(const_result[i])); atf::utils::wait(pid, 0, "", ""); } nvlist_destroy(nvl); close(fd); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_string_array__pack); ATF_TEST_CASE_BODY(nvlist_string_array__pack) { nvlist_t *nvl, *unpacked; const char *key; size_t packed_size, count; void *packed; unsigned int i; const char * const *const_result; - const char *string[8] = { "a", "b", "kot", "foo", + const char *string_arr[8] = { "a", "b", "kot", "foo", "tests", "nice test", "", "abcdef" }; key = "nvl/string"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); - nvlist_add_string_array(nvl, key, string, 8); + nvlist_add_string_array(nvl, key, string_arr, nitems(string_arr)); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_string_array(nvl, key)); packed = nvlist_pack(nvl, &packed_size); ATF_REQUIRE(packed != NULL); unpacked = nvlist_unpack(packed, packed_size, 0); ATF_REQUIRE(unpacked != NULL); ATF_REQUIRE_EQ(nvlist_error(unpacked), 0); ATF_REQUIRE(nvlist_exists_string_array(unpacked, key)); const_result = nvlist_get_string_array(unpacked, key, &count); - ATF_REQUIRE_EQ(count, 8); + ATF_REQUIRE_EQ(count, nitems(string_arr)); for (i = 0; i < count; i++) { - ATF_REQUIRE_EQ(strcmp(string[i], const_result[i]), 0); + ATF_REQUIRE_EQ(strcmp(string_arr[i], const_result[i]), 0); } nvlist_destroy(nvl); nvlist_destroy(unpacked); free(packed); } ATF_TEST_CASE_WITHOUT_HEAD(nvlist_nvlist_array__pack); ATF_TEST_CASE_BODY(nvlist_nvlist_array__pack) { nvlist_t *testnvl[8], *unpacked; const nvlist_t * const *const_result; - nvlist_t **result; nvlist_t *nvl; - size_t nitems, packed_size; + size_t num_items, packed_size; unsigned int i; void *packed; const char *somestr[8] = { "a", "b", "c", "d", "e", "f", "g", "h" }; const char *key; - for (i = 0; i < 8; i++) { + for (i = 0; i < nitems(testnvl); i++) { testnvl[i] = nvlist_create(0); ATF_REQUIRE(testnvl[i] != NULL); ATF_REQUIRE_EQ(nvlist_error(testnvl[i]), 0); nvlist_add_string(testnvl[i], "nvl/string", somestr[i]); ATF_REQUIRE_EQ(nvlist_error(testnvl[i]), 0); ATF_REQUIRE(nvlist_exists_string(testnvl[i], "nvl/string")); } key = "nvl/nvlist"; nvl = nvlist_create(0); ATF_REQUIRE(nvl != NULL); ATF_REQUIRE(nvlist_empty(nvl)); ATF_REQUIRE(!nvlist_exists_string_array(nvl, key)); nvlist_add_nvlist_array(nvl, key, (const nvlist_t * const *)testnvl, 8); ATF_REQUIRE_EQ(nvlist_error(nvl), 0); ATF_REQUIRE(!nvlist_empty(nvl)); ATF_REQUIRE(nvlist_exists_nvlist_array(nvl, key)); ATF_REQUIRE(nvlist_exists_nvlist_array(nvl, "nvl/nvlist")); packed = nvlist_pack(nvl, &packed_size); ATF_REQUIRE(packed != NULL); unpacked = nvlist_unpack(packed, packed_size, 0); ATF_REQUIRE(unpacked != NULL); ATF_REQUIRE_EQ(nvlist_error(unpacked), 0); ATF_REQUIRE(nvlist_exists_nvlist_array(unpacked, key)); - const_result = nvlist_get_nvlist_array(unpacked, key, &nitems); + const_result = nvlist_get_nvlist_array(unpacked, key, &num_items); ATF_REQUIRE(const_result != NULL); - ATF_REQUIRE_EQ(nitems, 8); - for (i = 0; i < nitems; i++) { + ATF_REQUIRE_EQ(num_items, nitems(testnvl)); + for (i = 0; i < num_items; i++) { ATF_REQUIRE_EQ(nvlist_error(const_result[i]), 0); - if (i < nitems - 1) { + if (i < num_items - 1) { ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == const_result[i + 1]); } else { ATF_REQUIRE(nvlist_get_array_next(const_result[i]) == NULL); } ATF_REQUIRE(nvlist_get_parent(const_result[i], NULL) == unpacked); ATF_REQUIRE(nvlist_in_array(const_result[i])); ATF_REQUIRE(nvlist_exists_string(const_result[i], "nvl/string")); ATF_REQUIRE(strcmp(nvlist_get_string(const_result[i], "nvl/string"), somestr[i]) == 0); } - for (i = 0; i < 8; i++) + for (i = 0; i < nitems(testnvl); i++) nvlist_destroy(testnvl[i]); - free(result); nvlist_destroy(nvl); nvlist_destroy(unpacked); free(packed); } ATF_INIT_TEST_CASES(tp) { ATF_ADD_TEST_CASE(tp, nvlist_bool_array__basic); ATF_ADD_TEST_CASE(tp, nvlist_string_array__basic); ATF_ADD_TEST_CASE(tp, nvlist_descriptor_array__basic); ATF_ADD_TEST_CASE(tp, nvlist_number_array__basic); ATF_ADD_TEST_CASE(tp, nvlist_nvlist_array__basic) ATF_ADD_TEST_CASE(tp, nvlist_clone_array) ATF_ADD_TEST_CASE(tp, nvlist_bool_array__move); ATF_ADD_TEST_CASE(tp, nvlist_string_array__move); ATF_ADD_TEST_CASE(tp, nvlist_nvlist_array__move); ATF_ADD_TEST_CASE(tp, nvlist_number_array__move); ATF_ADD_TEST_CASE(tp, nvlist_descriptor_array__move); ATF_ADD_TEST_CASE(tp, nvlist_arrays__error_null); ATF_ADD_TEST_CASE(tp, nvlist_arrays__bad_value) ATF_ADD_TEST_CASE(tp, nvlist_nvlist_array__travel) ATF_ADD_TEST_CASE(tp, nvlist_nvlist_array__travel_alternative) ATF_ADD_TEST_CASE(tp, nvlist_bool_array__pack) ATF_ADD_TEST_CASE(tp, nvlist_number_array__pack) ATF_ADD_TEST_CASE(tp, nvlist_descriptor_array__pack) ATF_ADD_TEST_CASE(tp, nvlist_string_array__pack) ATF_ADD_TEST_CASE(tp, nvlist_nvlist_array__pack) } diff --git a/lib/libstand/Makefile b/lib/libstand/Makefile index 3126c1c30197..9d1f6bcd44a0 100644 --- a/lib/libstand/Makefile +++ b/lib/libstand/Makefile @@ -1,152 +1,153 @@ # $FreeBSD$ # Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ # # Notes: # - We don't use the libc strerror/sys_errlist because the string table is # quite large. # MK_PROFILE= no MK_SSP= no .include -LIBSTAND_SRC= ${.CURDIR} +LIBSTAND_SRC?= ${.CURDIR} +LIBSTAND_CPUARCH?=${MACHINE_CPUARCH} LIBC_SRC= ${LIBSTAND_SRC}/../libc LIB= stand NO_PIC= INCS= stand.h -MAN= libstand.3 +MAN?= libstand.3 WARNS?= 0 CFLAGS+= -I${LIBSTAND_SRC} # standalone components and stuff we have modified locally SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c gets.c \ globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \ sbrk.c twiddle.c zalloc.c zalloc_malloc.c # private (pruned) versions of libc string functions SRCS+= strcasecmp.c .PATH: ${LIBC_SRC}/net SRCS+= ntoh.c # string functions from libc .PATH: ${LIBC_SRC}/string SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \ memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \ strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c .if ${MACHINE_CPUARCH} == "arm" .PATH: ${LIBC_SRC}/arm/gen # Do not generate movt/movw, because the relocation fixup for them does not # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8). # Also, the fpu is not available in a standalone environment. CFLAGS.clang+= -mllvm -arm-use-movt=0 CFLAGS.clang+= -mfpu=none # Compiler support functions .PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/ # __clzsi2 and ctzsi2 for various builtin functions SRCS+= clzsi2.c ctzsi2.c # Divide and modulus functions called by the compiler SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c .PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/arm/ SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S .endif .if ${MACHINE_CPUARCH} == "aarch64" .PATH: ${LIBC_SRC}/aarch64/gen .endif .if ${MACHINE_CPUARCH} == "powerpc" .PATH: ${LIBC_SRC}/quad SRCS+= ashldi3.c ashrdi3.c SRCS+= syncicache.c .endif # uuid functions from libc .PATH: ${LIBC_SRC}/uuid SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c # _setjmp/_longjmp -.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH} +.PATH: ${LIBSTAND_SRC}/${LIBSTAND_CPUARCH} SRCS+= _setjmp.S # decompression functionality from libbz2 # NOTE: to actually test this functionality after libbz2 upgrade compile # loader(8) with LOADER_BZIP2_SUPPORT defined .PATH: ${LIBSTAND_SRC}/../../contrib/bzip2 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS SRCS+= libstand_bzlib_private.h .for file in bzlib.c crctable.c decompress.c huffman.c randtable.c SRCS+= _${file} CLEANFILES+= _${file} _${file}: ${file} sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \ ${.ALLSRC} > ${.TARGET} .endfor CLEANFILES+= libstand_bzlib_private.h libstand_bzlib_private.h: bzlib_private.h sed -e 's||"stand.h"|' \ ${.ALLSRC} > ${.TARGET} # decompression functionality from libz .PATH: ${LIBSTAND_SRC}/../libz CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz SRCS+= adler32.c crc32.c libstand_zutil.h libstand_gzguts.h .for file in infback.c inffast.c inflate.c inftrees.c zutil.c SRCS+= _${file} CLEANFILES+= _${file} _${file}: ${file} sed -e "s|zutil\.h|libstand_zutil.h|" \ -e "s|gzguts\.h|libstand_gzguts.h|" \ ${.ALLSRC} > ${.TARGET} .endfor # depend on stand.h being able to be included multiple times .for file in zutil.h gzguts.h CLEANFILES+= libstand_${file} libstand_${file}: ${file} sed -e 's||"stand.h"|' \ -e 's||"stand.h"|' \ -e 's||"stand.h"|' \ -e 's||"stand.h"|' \ -e 's||"stand.h"|' \ ${.ALLSRC} > ${.TARGET} .endfor # io routines SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ fstat.c close.c lseek.c open.c read.c write.c readdir.c # network routines SRCS+= arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c # network info services: SRCS+= bootp.c rarp.c bootparam.c # boot filesystems SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c SRCS+= dosfs.c ext2fs.c SRCS+= splitfs.c SRCS+= pkgfs.c .if ${MK_NAND} != "no" SRCS+= nandfs.c .endif .include .include diff --git a/libexec/rtld-elf/aarch64/rtld_machdep.h b/libexec/rtld-elf/aarch64/rtld_machdep.h index 943e3e6482d2..a2fc74af73e0 100644 --- a/libexec/rtld-elf/aarch64/rtld_machdep.h +++ b/libexec/rtld-elf/aarch64/rtld_machdep.h @@ -1,83 +1,85 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * Copyright (c) 2014 the FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Andrew Turner * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) \ ({ \ Elf_Addr _dynamic_addr; \ asm volatile("adr %0, _DYNAMIC" : "=&r"(_dynamic_addr)); \ (const Elf_Dyn *)_dynamic_addr; \ }) #define RTLD_IS_DYNAMIC() (1) Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *defobj, const struct Struct_Obj_Entry *obj, const Elf_Rel *rel); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(16, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) #define TLS_TCB_SIZE 16 typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; extern void *__tls_get_addr(tls_index *ti); #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +#define md_abi_variant_hook(x) + #endif diff --git a/libexec/rtld-elf/amd64/rtld_machdep.h b/libexec/rtld-elf/amd64/rtld_machdep.h index cb5e9a178e0f..a8696feb37ab 100644 --- a/libexec/rtld-elf/amd64/rtld_machdep.h +++ b/libexec/rtld-elf/amd64/rtld_machdep.h @@ -1,82 +1,84 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) \ ((const Elf_Dyn *)((obj)->relocbase + (Elf_Addr)&_DYNAMIC)) /* Fixup the jump slot at "where" to transfer control to "target". */ static inline Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *obj, const struct Struct_Obj_Entry *refobj, const Elf_Rel *rel) { #ifdef dbg dbg("reloc_jmpslot: *%p = %p", (void *)(where), (void *)(target)); #endif (*(Elf_Addr *)(where) = (Elf_Addr)(target)); return target; } #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(size, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round((prev_offset) + (size), align) #define calculate_tls_end(off, size) (off) typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; void *__tls_get_addr(tls_index *ti) __exported; #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +#define md_abi_variant_hook(x) + #endif diff --git a/libexec/rtld-elf/arm/reloc.c b/libexec/rtld-elf/arm/reloc.c index d0bf987e2811..6bdda735e713 100644 --- a/libexec/rtld-elf/arm/reloc.c +++ b/libexec/rtld-elf/arm/reloc.c @@ -1,476 +1,511 @@ /* $NetBSD: mdreloc.c,v 1.23 2003/07/26 15:04:38 mrg Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "machine/sysarch.h" #include "debug.h" #include "rtld.h" #include "paths.h" +void +arm_abi_variant_hook(Elf_Auxinfo **aux_info) +{ + Elf_Word ehdr; + + /* + * If we're running an old kernel that doesn't provide any data fail + * safe by doing nothing. + */ + if (aux_info[AT_EHDRFLAGS] == NULL) + return; + ehdr = aux_info[AT_EHDRFLAGS]->a_un.a_val; + + /* + * Hard float ABI binaries are the default, and use the default paths + * and such. + */ + if ((ehdr & EF_ARM_VFP_FLOAT) != 0) + return; + + /* + * This is a soft float ABI binary. We need to use the soft float + * settings. For the moment, the standard library path includes the hard + * float paths as well. When upgrading, we need to execute the wrong + * kind of binary until we've installed the new binaries. We could go + * off whether or not /libsoft exists, but the simplicity of having it + * in the path wins. + */ + ld_elf_hints_default = _PATH_SOFT_ELF_HINTS; + ld_path_libmap_conf = _PATH_SOFT_LIBMAP_CONF; + ld_path_rtld = _PATH_SOFT_RTLD; + ld_standard_library_path = SOFT_STANDARD_LIBRARY_PATH; + ld_env_prefix = LD_SOFT_; +} + void init_pltgot(Obj_Entry *obj) { if (obj->pltgot != NULL) { obj->pltgot[1] = (Elf_Addr) obj; obj->pltgot[2] = (Elf_Addr) &_rtld_bind_start; } } int do_copy_relocations(Obj_Entry *dstobj) { const Elf_Rel *rellim; const Elf_Rel *rel; assert(dstobj->mainprog); /* COPY relocations are invalid elsewhere */ rellim = (const Elf_Rel *) ((caddr_t) dstobj->rel + dstobj->relsize); for (rel = dstobj->rel; rel < rellim; rel++) { if (ELF_R_TYPE(rel->r_info) == R_ARM_COPY) { void *dstaddr; const Elf_Sym *dstsym; const char *name; size_t size; const void *srcaddr; const Elf_Sym *srcsym; const Obj_Entry *srcobj, *defobj; SymLook req; int res; dstaddr = (void *) (dstobj->relocbase + rel->r_offset); dstsym = dstobj->symtab + ELF_R_SYM(rel->r_info); name = dstobj->strtab + dstsym->st_name; size = dstsym->st_size; symlook_init(&req, name); req.ventry = fetch_ventry(dstobj, ELF_R_SYM(rel->r_info)); req.flags = SYMLOOK_EARLY; for (srcobj = dstobj->next; srcobj != NULL; srcobj = srcobj->next) { res = symlook_obj(&req, srcobj); if (res == 0) { srcsym = req.sym_out; defobj = req.defobj_out; break; } } if (srcobj == NULL) { _rtld_error( "Undefined symbol \"%s\" referenced from COPY relocation in %s", name, dstobj->path); return (-1); } srcaddr = (const void *)(defobj->relocbase + srcsym->st_value); memcpy(dstaddr, srcaddr, size); } } return 0; } void _rtld_bind_start(void); void _rtld_relocate_nonplt_self(Elf_Dyn *, Elf_Addr); int open(); int _open(); void _rtld_relocate_nonplt_self(Elf_Dyn *dynp, Elf_Addr relocbase) { const Elf_Rel *rel = 0, *rellim; Elf_Addr relsz = 0; Elf_Addr *where; uint32_t size; for (; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_REL: rel = (const Elf_Rel *)(relocbase + dynp->d_un.d_ptr); break; case DT_RELSZ: relsz = dynp->d_un.d_val; break; } } rellim = (const Elf_Rel *)((caddr_t)rel + relsz); size = (rellim - 1)->r_offset - rel->r_offset; for (; rel < rellim; rel++) { where = (Elf_Addr *)(relocbase + rel->r_offset); *where += (Elf_Addr)relocbase; } } /* * It is possible for the compiler to emit relocations for unaligned data. * We handle this situation with these inlines. */ #define RELOC_ALIGNED_P(x) \ (((uintptr_t)(x) & (sizeof(void *) - 1)) == 0) static __inline Elf_Addr load_ptr(void *where) { Elf_Addr res; memcpy(&res, where, sizeof(res)); return (res); } static __inline void store_ptr(void *where, Elf_Addr val) { memcpy(where, &val, sizeof(val)); } static int reloc_nonplt_object(Obj_Entry *obj, const Elf_Rel *rel, SymCache *cache, int flags, RtldLockState *lockstate) { Elf_Addr *where; const Elf_Sym *def; const Obj_Entry *defobj; Elf_Addr tmp; unsigned long symnum; where = (Elf_Addr *)(obj->relocbase + rel->r_offset); symnum = ELF_R_SYM(rel->r_info); switch (ELF_R_TYPE(rel->r_info)) { case R_ARM_NONE: break; #if 1 /* XXX should not occur */ case R_ARM_PC24: { /* word32 S - P + A */ Elf32_Sword addend; /* * Extract addend and sign-extend if needed. */ addend = *where; if (addend & 0x00800000) addend |= 0xff000000; def = find_symdef(symnum, obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; tmp = (Elf_Addr)obj->relocbase + def->st_value - (Elf_Addr)where + (addend << 2); if ((tmp & 0xfe000000) != 0xfe000000 && (tmp & 0xfe000000) != 0) { _rtld_error( "%s: R_ARM_PC24 relocation @ %p to %s failed " "(displacement %ld (%#lx) out of range)", obj->path, where, obj->strtab + obj->symtab[symnum].st_name, (long) tmp, (long) tmp); return -1; } tmp >>= 2; *where = (*where & 0xff000000) | (tmp & 0x00ffffff); dbg("PC24 %s in %s --> %p @ %p in %s", obj->strtab + obj->symtab[symnum].st_name, obj->path, (void *)*where, where, defobj->path); break; } #endif case R_ARM_ABS32: /* word32 B + S + A */ case R_ARM_GLOB_DAT: /* word32 B + S */ def = find_symdef(symnum, obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; if (__predict_true(RELOC_ALIGNED_P(where))) { tmp = *where + (Elf_Addr)defobj->relocbase + def->st_value; *where = tmp; } else { tmp = load_ptr(where) + (Elf_Addr)defobj->relocbase + def->st_value; store_ptr(where, tmp); } dbg("ABS32/GLOB_DAT %s in %s --> %p @ %p in %s", obj->strtab + obj->symtab[symnum].st_name, obj->path, (void *)tmp, where, defobj->path); break; case R_ARM_RELATIVE: /* word32 B + A */ if (__predict_true(RELOC_ALIGNED_P(where))) { tmp = *where + (Elf_Addr)obj->relocbase; *where = tmp; } else { tmp = load_ptr(where) + (Elf_Addr)obj->relocbase; store_ptr(where, tmp); } dbg("RELATIVE in %s --> %p", obj->path, (void *)tmp); break; case R_ARM_COPY: /* * These are deferred until all other relocations have * been done. All we do here is make sure that the * COPY relocation is not in a shared library. They * are allowed only in executable files. */ if (!obj->mainprog) { _rtld_error( "%s: Unexpected R_COPY relocation in shared library", obj->path); return -1; } dbg("COPY (avoid in main)"); break; case R_ARM_TLS_DTPOFF32: def = find_symdef(symnum, obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; tmp = (Elf_Addr)(def->st_value); if (__predict_true(RELOC_ALIGNED_P(where))) *where = tmp; else store_ptr(where, tmp); dbg("TLS_DTPOFF32 %s in %s --> %p", obj->strtab + obj->symtab[symnum].st_name, obj->path, (void *)tmp); break; case R_ARM_TLS_DTPMOD32: def = find_symdef(symnum, obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; tmp = (Elf_Addr)(defobj->tlsindex); if (__predict_true(RELOC_ALIGNED_P(where))) *where = tmp; else store_ptr(where, tmp); dbg("TLS_DTPMOD32 %s in %s --> %p", obj->strtab + obj->symtab[symnum].st_name, obj->path, (void *)tmp); break; case R_ARM_TLS_TPOFF32: def = find_symdef(symnum, obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; if (!defobj->tls_done && allocate_tls_offset(obj)) return -1; /* XXX: FIXME */ tmp = (Elf_Addr)def->st_value + defobj->tlsoffset + TLS_TCB_SIZE; if (__predict_true(RELOC_ALIGNED_P(where))) *where = tmp; else store_ptr(where, tmp); dbg("TLS_TPOFF32 %s in %s --> %p", obj->strtab + obj->symtab[symnum].st_name, obj->path, (void *)tmp); break; default: dbg("sym = %lu, type = %lu, offset = %p, " "contents = %p, symbol = %s", symnum, (u_long)ELF_R_TYPE(rel->r_info), (void *)rel->r_offset, (void *)load_ptr(where), obj->strtab + obj->symtab[symnum].st_name); _rtld_error("%s: Unsupported relocation type %ld " "in non-PLT relocations\n", obj->path, (u_long) ELF_R_TYPE(rel->r_info)); return -1; } return 0; } /* * * Process non-PLT relocations * */ int reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags, RtldLockState *lockstate) { const Elf_Rel *rellim; const Elf_Rel *rel; SymCache *cache; int r = -1; /* The relocation for the dynamic loader has already been done. */ if (obj == obj_rtld) return (0); if ((flags & SYMLOOK_IFUNC) != 0) /* XXX not implemented */ return (0); /* * The dynamic loader may be called from a thread, we have * limited amounts of stack available so we cannot use alloca(). */ cache = calloc(obj->dynsymcount, sizeof(SymCache)); /* No need to check for NULL here */ rellim = (const Elf_Rel *)((caddr_t)obj->rel + obj->relsize); for (rel = obj->rel; rel < rellim; rel++) { if (reloc_nonplt_object(obj, rel, cache, flags, lockstate) < 0) goto done; } r = 0; done: if (cache != NULL) free(cache); return (r); } /* * * Process the PLT relocations. * */ int reloc_plt(Obj_Entry *obj) { const Elf_Rel *rellim; const Elf_Rel *rel; rellim = (const Elf_Rel *)((char *)obj->pltrel + obj->pltrelsize); for (rel = obj->pltrel; rel < rellim; rel++) { Elf_Addr *where; assert(ELF_R_TYPE(rel->r_info) == R_ARM_JUMP_SLOT); where = (Elf_Addr *)(obj->relocbase + rel->r_offset); *where += (Elf_Addr )obj->relocbase; } return (0); } /* * * LD_BIND_NOW was set - force relocation for all jump slots * */ int reloc_jmpslots(Obj_Entry *obj, int flags, RtldLockState *lockstate) { const Obj_Entry *defobj; const Elf_Rel *rellim; const Elf_Rel *rel; const Elf_Sym *def; Elf_Addr *where; Elf_Addr target; rellim = (const Elf_Rel *)((char *)obj->pltrel + obj->pltrelsize); for (rel = obj->pltrel; rel < rellim; rel++) { assert(ELF_R_TYPE(rel->r_info) == R_ARM_JUMP_SLOT); where = (Elf_Addr *)(obj->relocbase + rel->r_offset); def = find_symdef(ELF_R_SYM(rel->r_info), obj, &defobj, SYMLOOK_IN_PLT | flags, NULL, lockstate); if (def == NULL) { dbg("reloc_jmpslots: sym not found"); return (-1); } target = (Elf_Addr)(defobj->relocbase + def->st_value); reloc_jmpslot(where, target, defobj, obj, (const Elf_Rel *) rel); } obj->jmpslots_done = true; return (0); } int reloc_iresolve(Obj_Entry *obj, struct Struct_RtldLockState *lockstate) { /* XXX not implemented */ return (0); } int reloc_gnu_ifunc(Obj_Entry *obj, int flags, struct Struct_RtldLockState *lockstate) { /* XXX not implemented */ return (0); } Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *defobj, const Obj_Entry *obj, const Elf_Rel *rel) { assert(ELF_R_TYPE(rel->r_info) == R_ARM_JUMP_SLOT); if (*where != target) *where = target; return target; } void allocate_initial_tls(Obj_Entry *objs) { #ifdef ARM_TP_ADDRESS void **_tp = (void **)ARM_TP_ADDRESS; #endif /* * Fix the size of the static TLS block by using the maximum * offset allocated so far and adding a bit for dynamic modules to * use. */ tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA; #ifdef ARM_TP_ADDRESS (*_tp) = (void *) allocate_tls(objs, NULL, TLS_TCB_SIZE, 8); #else sysarch(ARM_SET_TP, allocate_tls(objs, NULL, TLS_TCB_SIZE, 8)); #endif } void * __tls_get_addr(tls_index* ti) { char *p; #ifdef ARM_TP_ADDRESS void **_tp = (void **)ARM_TP_ADDRESS; p = tls_get_addr_common((Elf_Addr **)(*_tp), ti->ti_module, ti->ti_offset); #else void *_tp; __asm __volatile("mrc p15, 0, %0, c13, c0, 3" \ : "=r" (_tp)); p = tls_get_addr_common((Elf_Addr **)(_tp), ti->ti_module, ti->ti_offset); #endif return (p); } diff --git a/libexec/rtld-elf/arm/rtld_machdep.h b/libexec/rtld-elf/arm/rtld_machdep.h index f980de07b7ab..c61bce0cdd24 100644 --- a/libexec/rtld-elf/arm/rtld_machdep.h +++ b/libexec/rtld-elf/arm/rtld_machdep.h @@ -1,78 +1,83 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) (&_DYNAMIC) Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *defobj, const struct Struct_Obj_Entry *obj, const Elf_Rel *rel); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) #define TLS_TCB_SIZE 8 typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(8, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) /* * Lazy binding entry point, called via PLT. */ void _rtld_bind_start(void); extern void *__tls_get_addr(tls_index *ti); #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +extern void arm_abi_variant_hook(Elf_Auxinfo **); + +#define md_abi_variant_hook(x) arm_abi_variant_hook(x) +#define RTLD_VARIANT_ENV_NAMES + #endif diff --git a/libexec/rtld-elf/i386/rtld_machdep.h b/libexec/rtld-elf/i386/rtld_machdep.h index 5c328da6ba84..5237d4fce04a 100644 --- a/libexec/rtld-elf/i386/rtld_machdep.h +++ b/libexec/rtld-elf/i386/rtld_machdep.h @@ -1,83 +1,85 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) \ ((const Elf_Dyn *)((obj)->relocbase + (Elf_Addr)&_DYNAMIC)) /* Fixup the jump slot at "where" to transfer control to "target". */ static inline Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *obj, const struct Struct_Obj_Entry *refobj, const Elf_Rel *rel) { #ifdef dbg dbg("reloc_jmpslot: *%p = %p", (void *)(where), (void *)(target)); #endif (*(Elf_Addr *)(where) = (Elf_Addr)(target)); return target; } #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(size, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round((prev_offset) + (size), align) #define calculate_tls_end(off, size) (off) typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; void *___tls_get_addr(tls_index *ti) __attribute__((__regparm__(1))) __exported; void *__tls_get_addr(tls_index *ti) __exported; #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +#define md_abi_variant_hook(x) + #endif diff --git a/libexec/rtld-elf/mips/rtld_machdep.h b/libexec/rtld-elf/mips/rtld_machdep.h index befbf139deb3..34e8f3c591fc 100644 --- a/libexec/rtld-elf/mips/rtld_machdep.h +++ b/libexec/rtld-elf/mips/rtld_machdep.h @@ -1,78 +1,80 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) (&_DYNAMIC) Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *defobj, const struct Struct_Obj_Entry *obj, const Elf_Rel *rel); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(TLS_TCB_SIZE, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) /* * Lazy binding entry point, called via PLT. */ void _rtld_bind_start(void); extern void *__tls_get_addr(tls_index *ti); #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +#define md_abi_variant_hook(x) + #endif diff --git a/libexec/rtld-elf/paths.h b/libexec/rtld-elf/paths.h index 709e4d303ddc..abfeb3f3ce1a 100644 --- a/libexec/rtld-elf/paths.h +++ b/libexec/rtld-elf/paths.h @@ -1,70 +1,74 @@ /*- * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra. * Copyright 2003 Alexander Kabaev . - * Copyright 2009-2012 Konstantin Belousov . - * Copyright 2012 John Marino . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef PATHS_H #define PATHS_H #undef _PATH_ELF_HINTS #ifdef COMPAT_32BIT #define _PATH_ELF_HINTS "/var/run/ld-elf32.so.hints" #define _PATH_LIBMAP_CONF "/etc/libmap32.conf" #define _PATH_RTLD "/libexec/ld-elf32.so.1" #define STANDARD_LIBRARY_PATH "/lib32:/usr/lib32" #define LD_ "LD_32_" #endif #ifndef _PATH_ELF_HINTS #define _PATH_ELF_HINTS "/var/run/ld-elf.so.hints" #endif #ifndef _PATH_LIBMAP_CONF #define _PATH_LIBMAP_CONF "/etc/libmap.conf" #endif #ifndef _PATH_RTLD #define _PATH_RTLD "/libexec/ld-elf.so.1" #endif #ifndef STANDARD_LIBRARY_PATH #define STANDARD_LIBRARY_PATH "/lib:/usr/lib" #endif #ifndef LD_ #define LD_ "LD_" #endif -extern char *ld_path_elf_hints; +#define _PATH_SOFT_ELF_HINTS "/var/run/ld-elf-soft.so.hints" +#define _PATH_SOFT_LIBMAP_CONF "/etc/libmap-soft.conf" +#define _PATH_SOFT_RTLD "/libexec/ld-elf.so.1" +#define SOFT_STANDARD_LIBRARY_PATH "/libsoft:/usr/libsoft:/lib:/usr/lib" +#define LD_SOFT_ "LD_SOFT_" + +extern char *ld_elf_hints_default; extern char *ld_path_libmap_conf; extern char *ld_path_rtld; extern char *ld_standard_library_path; extern char *ld_env_prefix; #endif /* PATHS_H */ diff --git a/libexec/rtld-elf/powerpc/rtld_machdep.h b/libexec/rtld-elf/powerpc/rtld_machdep.h index 1ddf1bcbc4b8..3e39c8231c03 100644 --- a/libexec/rtld-elf/powerpc/rtld_machdep.h +++ b/libexec/rtld-elf/powerpc/rtld_machdep.h @@ -1,93 +1,95 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) (&_DYNAMIC) Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *defobj, const struct Struct_Obj_Entry *obj, const Elf_Rel *rel); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) /* * Lazy binding entry point, called via PLT. */ void _rtld_bind_start(void); /* * PLT functions. Not really correct prototypes, but the * symbol values are needed. */ void _rtld_powerpc_pltlongresolve(void); void _rtld_powerpc_pltresolve(void); void _rtld_powerpc_pltcall(void); /* * TLS */ #define TLS_TP_OFFSET 0x7000 #define TLS_DTV_OFFSET 0x8000 #define TLS_TCB_SIZE 8 #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(8, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; extern void *__tls_get_addr(tls_index* ti); #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +#define md_abi_variant_hook(x) + #endif diff --git a/libexec/rtld-elf/powerpc64/rtld_machdep.h b/libexec/rtld-elf/powerpc64/rtld_machdep.h index b88ed9deef47..32c7d12d449f 100644 --- a/libexec/rtld-elf/powerpc64/rtld_machdep.h +++ b/libexec/rtld-elf/powerpc64/rtld_machdep.h @@ -1,85 +1,87 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ #define rtld_dynamic(obj) (&_DYNAMIC) Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const struct Struct_Obj_Entry *defobj, const struct Struct_Obj_Entry *obj, const Elf_Rel *rel); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) /* * Lazy binding entry point, called via PLT. */ void _rtld_bind_start(void); /* * TLS */ #define TLS_TP_OFFSET 0x7000 #define TLS_DTV_OFFSET 0x8000 #define TLS_TCB_SIZE 16 #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(16, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; extern void *__tls_get_addr(tls_index* ti); #define RTLD_DEFAULT_STACK_PF_EXEC PF_X #define RTLD_DEFAULT_STACK_EXEC PROT_EXEC +#define md_abi_variant_hook(x) + #endif diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 4eee6c5442f0..66edc157d393 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -1,5099 +1,5117 @@ /*- * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra. * Copyright 2003 Alexander Kabaev . * Copyright 2009-2012 Konstantin Belousov . * Copyright 2012 John Marino . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * Dynamic linker for ELF. * * John Polstra . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "debug.h" #include "rtld.h" #include "libmap.h" #include "paths.h" #include "rtld_tls.h" #include "rtld_printf.h" #include "notes.h" /* Types. */ typedef void (*func_ptr_type)(); typedef void * (*path_enum_proc) (const char *path, size_t len, void *arg); /* * Function declarations. */ static const char *basename(const char *); static void digest_dynamic1(Obj_Entry *, int, const Elf_Dyn **, const Elf_Dyn **, const Elf_Dyn **); static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *, const Elf_Dyn *); static void digest_dynamic(Obj_Entry *, int); static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *); static Obj_Entry *dlcheck(void *); static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate); static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int); static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *); static bool donelist_check(DoneList *, const Obj_Entry *); static void errmsg_restore(char *); static char *errmsg_save(void); static void *fill_search_info(const char *, size_t, void *); static char *find_library(const char *, const Obj_Entry *, int *); static const char *gethints(bool); static void init_dag(Obj_Entry *); static void init_pagesizes(Elf_Auxinfo **aux_info); static void init_rtld(caddr_t, Elf_Auxinfo **); static void initlist_add_neededs(Needed_Entry *, Objlist *); static void initlist_add_objects(Obj_Entry *, Obj_Entry **, Objlist *); static void linkmap_add(Obj_Entry *); static void linkmap_delete(Obj_Entry *); static void load_filtees(Obj_Entry *, int flags, RtldLockState *); static void unload_filtees(Obj_Entry *); static int load_needed_objects(Obj_Entry *, int); static int load_preload_objects(void); static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *, int); static void map_stacks_exec(RtldLockState *); static Obj_Entry *obj_from_addr(const void *); static void objlist_call_fini(Objlist *, Obj_Entry *, RtldLockState *); static void objlist_call_init(Objlist *, RtldLockState *); static void objlist_clear(Objlist *); static Objlist_Entry *objlist_find(Objlist *, const Obj_Entry *); static void objlist_init(Objlist *); static void objlist_push_head(Objlist *, Obj_Entry *); static void objlist_push_tail(Objlist *, Obj_Entry *); static void objlist_put_after(Objlist *, Obj_Entry *, Obj_Entry *); static void objlist_remove(Objlist *, Obj_Entry *); static int parse_libdir(const char *); static void *path_enumerate(const char *, path_enum_proc, void *); static int relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate); static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate); static int relocate_objects(Obj_Entry *, bool, Obj_Entry *, int, RtldLockState *); static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, RtldLockState *lockstate); static int rtld_dirname(const char *, char *); static int rtld_dirname_abs(const char *, char *); static void *rtld_dlopen(const char *name, int fd, int mode); static void rtld_exit(void); static char *search_library_path(const char *, const char *); static char *search_library_pathfds(const char *, const char *, int *); static const void **get_program_var_addr(const char *, RtldLockState *); static void set_program_var(const char *, const void *); static int symlook_default(SymLook *, const Obj_Entry *refobj); static int symlook_global(SymLook *, DoneList *); static void symlook_init_from_req(SymLook *, const SymLook *); static int symlook_list(SymLook *, const Objlist *, DoneList *); static int symlook_needed(SymLook *, const Needed_Entry *, DoneList *); static int symlook_obj1_sysv(SymLook *, const Obj_Entry *); static int symlook_obj1_gnu(SymLook *, const Obj_Entry *); static void trace_loaded_objects(Obj_Entry *); static void unlink_object(Obj_Entry *); static void unload_object(Obj_Entry *); static void unref_dag(Obj_Entry *); static void ref_dag(Obj_Entry *); static char *origin_subst_one(Obj_Entry *, char *, const char *, const char *, bool); static char *origin_subst(Obj_Entry *, char *); static bool obj_resolve_origin(Obj_Entry *obj); static void preinit_main(void); static int rtld_verify_versions(const Objlist *); static int rtld_verify_object_versions(Obj_Entry *); static void object_add_name(Obj_Entry *, const char *); static int object_match_name(const Obj_Entry *, const char *); static void ld_utrace_log(int, void *, void *, size_t, int, const char *); static void rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info); static uint32_t gnu_hash(const char *); static bool matched_symbol(SymLook *, const Obj_Entry *, Sym_Match_Result *, const unsigned long); void r_debug_state(struct r_debug *, struct link_map *) __noinline __exported; void _r_debug_postinit(struct link_map *) __noinline __exported; int __sys_openat(int, const char *, int, ...); /* * Data declarations. */ static char *error_message; /* Message for dlerror(), or NULL */ struct r_debug r_debug __exported; /* for GDB; */ static bool libmap_disable; /* Disable libmap */ static bool ld_loadfltr; /* Immediate filters processing */ static char *libmap_override; /* Maps to use in addition to libmap.conf */ static bool trust; /* False for setuid and setgid programs */ static bool dangerous_ld_env; /* True if environment variables have been used to affect the libraries loaded */ static char *ld_bind_now; /* Environment variable for immediate binding */ static char *ld_debug; /* Environment variable for debugging */ static char *ld_library_path; /* Environment variable for search path */ static char *ld_library_dirs; /* Environment variable for library descriptors */ static char *ld_preload; /* Environment variable for libraries to load first */ static char *ld_elf_hints_path; /* Environment variable for alternative hints path */ static char *ld_tracing; /* Called from ldd to print libs */ static char *ld_utrace; /* Use utrace() to log events. */ static Obj_Entry *obj_list; /* Head of linked list of shared objects */ static Obj_Entry **obj_tail; /* Link field of last object in list */ static Obj_Entry *obj_main; /* The main program shared object */ static Obj_Entry obj_rtld; /* The dynamic linker shared object */ static unsigned int obj_count; /* Number of objects in obj_list */ static unsigned int obj_loads; /* Number of objects in obj_list */ static Objlist list_global = /* Objects dlopened with RTLD_GLOBAL */ STAILQ_HEAD_INITIALIZER(list_global); static Objlist list_main = /* Objects loaded at program startup */ STAILQ_HEAD_INITIALIZER(list_main); static Objlist list_fini = /* Objects needing fini() calls */ STAILQ_HEAD_INITIALIZER(list_fini); Elf_Sym sym_zero; /* For resolving undefined weak refs. */ #define GDB_STATE(s,m) r_debug.r_state = s; r_debug_state(&r_debug,m); extern Elf_Dyn _DYNAMIC; #pragma weak _DYNAMIC #ifndef RTLD_IS_DYNAMIC #define RTLD_IS_DYNAMIC() (&_DYNAMIC != NULL) #endif -#define _LD(x) LD_ x - int dlclose(void *) __exported; char *dlerror(void) __exported; void *dlopen(const char *, int) __exported; void *fdlopen(int, int) __exported; void *dlsym(void *, const char *) __exported; dlfunc_t dlfunc(void *, const char *) __exported; void *dlvsym(void *, const char *, const char *) __exported; int dladdr(const void *, Dl_info *) __exported; void dllockinit(void *, void *(*)(void *), void (*)(void *), void (*)(void *), void (*)(void *), void (*)(void *), void (*)(void *)) __exported; int dlinfo(void *, int , void *) __exported; int dl_iterate_phdr(__dl_iterate_hdr_callback, void *) __exported; int _rtld_addr_phdr(const void *, struct dl_phdr_info *) __exported; int _rtld_get_stack_prot(void) __exported; int _rtld_is_dlopened(void *) __exported; void _rtld_error(const char *, ...) __exported; int npagesizes, osreldate; size_t *pagesizes; long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0}; static int stack_prot = PROT_READ | PROT_WRITE | RTLD_DEFAULT_STACK_EXEC; static int max_stack_flags; /* * Global declarations normally provided by crt1. The dynamic linker is * not built with crt1, so we have to provide them ourselves. */ char *__progname; char **environ; /* * Used to pass argc, argv to init functions. */ int main_argc; char **main_argv; /* * Globals to control TLS allocation. */ size_t tls_last_offset; /* Static TLS offset of last module */ size_t tls_last_size; /* Static TLS size of last module */ size_t tls_static_space; /* Static TLS space allocated */ size_t tls_static_max_align; int tls_dtv_generation = 1; /* Used to detect when dtv size changes */ int tls_max_index = 1; /* Largest module index allocated */ bool ld_library_path_rpath = false; /* * Globals for path names, and such */ char *ld_elf_hints_default = _PATH_ELF_HINTS; char *ld_path_libmap_conf = _PATH_LIBMAP_CONF; char *ld_path_rtld = _PATH_RTLD; char *ld_standard_library_path = STANDARD_LIBRARY_PATH; char *ld_env_prefix = LD_; /* * Fill in a DoneList with an allocation large enough to hold all of * the currently-loaded objects. Keep this as a macro since it calls * alloca and we want that to occur within the scope of the caller. */ #define donelist_init(dlp) \ ((dlp)->objs = alloca(obj_count * sizeof (dlp)->objs[0]), \ assert((dlp)->objs != NULL), \ (dlp)->num_alloc = obj_count, \ (dlp)->num_used = 0) #define UTRACE_DLOPEN_START 1 #define UTRACE_DLOPEN_STOP 2 #define UTRACE_DLCLOSE_START 3 #define UTRACE_DLCLOSE_STOP 4 #define UTRACE_LOAD_OBJECT 5 #define UTRACE_UNLOAD_OBJECT 6 #define UTRACE_ADD_RUNDEP 7 #define UTRACE_PRELOAD_FINISHED 8 #define UTRACE_INIT_CALL 9 #define UTRACE_FINI_CALL 10 #define UTRACE_DLSYM_START 11 #define UTRACE_DLSYM_STOP 12 struct utrace_rtld { char sig[4]; /* 'RTLD' */ int event; void *handle; void *mapbase; /* Used for 'parent' and 'init/fini' */ size_t mapsize; int refcnt; /* Used for 'mode' */ char name[MAXPATHLEN]; }; #define LD_UTRACE(e, h, mb, ms, r, n) do { \ if (ld_utrace != NULL) \ ld_utrace_log(e, h, mb, ms, r, n); \ } while (0) static void ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize, int refcnt, const char *name) { struct utrace_rtld ut; ut.sig[0] = 'R'; ut.sig[1] = 'T'; ut.sig[2] = 'L'; ut.sig[3] = 'D'; ut.event = event; ut.handle = handle; ut.mapbase = mapbase; ut.mapsize = mapsize; ut.refcnt = refcnt; bzero(ut.name, sizeof(ut.name)); if (name) strlcpy(ut.name, name, sizeof(ut.name)); utrace(&ut, sizeof(ut)); } +#ifdef RTLD_VARIANT_ENV_NAMES +/* + * construct the env variable based on the type of binary that's + * running. + */ +static inline const char * +_LD(const char *var) +{ + static char buffer[128]; + + strlcpy(buffer, ld_env_prefix, sizeof(buffer)); + strlcat(buffer, var, sizeof(buffer)); + return (buffer); +} +#else +#define _LD(x) LD_ x +#endif + /* * Main entry point for dynamic linking. The first argument is the * stack pointer. The stack is expected to be laid out as described * in the SVR4 ABI specification, Intel 386 Processor Supplement. * Specifically, the stack pointer points to a word containing * ARGC. Following that in the stack is a null-terminated sequence * of pointers to argument strings. Then comes a null-terminated * sequence of pointers to environment strings. Finally, there is a * sequence of "auxiliary vector" entries. * * The second argument points to a place to store the dynamic linker's * exit procedure pointer and the third to a place to store the main * program's object. * * The return value is the main program's entry point. */ func_ptr_type _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) { Elf_Auxinfo *aux_info[AT_COUNT]; int i; int argc; char **argv; char **env; Elf_Auxinfo *aux; Elf_Auxinfo *auxp; const char *argv0; Objlist_Entry *entry; Obj_Entry *obj; Obj_Entry **preload_tail; Obj_Entry *last_interposer; Objlist initlist; RtldLockState lockstate; char *library_path_rpath; int mib[2]; size_t len; /* * On entry, the dynamic linker itself has not been relocated yet. * Be very careful not to reference any global data until after * init_rtld has returned. It is OK to reference file-scope statics * and string constants, and to call static and global functions. */ /* Find the auxiliary vector on the stack. */ argc = *sp++; argv = (char **) sp; sp += argc + 1; /* Skip over arguments and NULL terminator */ env = (char **) sp; while (*sp++ != 0) /* Skip over environment, and NULL terminator */ ; aux = (Elf_Auxinfo *) sp; /* Digest the auxiliary vector. */ for (i = 0; i < AT_COUNT; i++) aux_info[i] = NULL; for (auxp = aux; auxp->a_type != AT_NULL; auxp++) { if (auxp->a_type < AT_COUNT) aux_info[auxp->a_type] = auxp; } /* Initialize and relocate ourselves. */ assert(aux_info[AT_BASE] != NULL); init_rtld((caddr_t) aux_info[AT_BASE]->a_un.a_ptr, aux_info); __progname = obj_rtld.path; argv0 = argv[0] != NULL ? argv[0] : "(null)"; environ = env; main_argc = argc; main_argv = argv; if (aux_info[AT_CANARY] != NULL && aux_info[AT_CANARY]->a_un.a_ptr != NULL) { i = aux_info[AT_CANARYLEN]->a_un.a_val; if (i > sizeof(__stack_chk_guard)) i = sizeof(__stack_chk_guard); memcpy(__stack_chk_guard, aux_info[AT_CANARY]->a_un.a_ptr, i); } else { mib[0] = CTL_KERN; mib[1] = KERN_ARND; len = sizeof(__stack_chk_guard); if (sysctl(mib, 2, __stack_chk_guard, &len, NULL, 0) == -1 || len != sizeof(__stack_chk_guard)) { /* If sysctl was unsuccessful, use the "terminator canary". */ ((unsigned char *)(void *)__stack_chk_guard)[0] = 0; ((unsigned char *)(void *)__stack_chk_guard)[1] = 0; ((unsigned char *)(void *)__stack_chk_guard)[2] = '\n'; ((unsigned char *)(void *)__stack_chk_guard)[3] = 255; } } trust = !issetugid(); + md_abi_variant_hook(aux_info); + ld_bind_now = getenv(_LD("BIND_NOW")); /* * If the process is tainted, then we un-set the dangerous environment * variables. The process will be marked as tainted until setuid(2) * is called. If any child process calls setuid(2) we do not want any * future processes to honor the potentially un-safe variables. */ if (!trust) { if (unsetenv(_LD("PRELOAD")) || unsetenv(_LD("LIBMAP")) || unsetenv(_LD("LIBRARY_PATH")) || unsetenv(_LD("LIBRARY_PATH_FDS")) || unsetenv(_LD("LIBMAP_DISABLE")) || unsetenv(_LD("DEBUG")) || unsetenv(_LD("ELF_HINTS_PATH")) || unsetenv(_LD("LOADFLTR")) || unsetenv(_LD("LIBRARY_PATH_RPATH"))) { _rtld_error("environment corrupt; aborting"); rtld_die(); } } ld_debug = getenv(_LD("DEBUG")); libmap_disable = getenv(_LD("LIBMAP_DISABLE")) != NULL; libmap_override = getenv(_LD("LIBMAP")); ld_library_path = getenv(_LD("LIBRARY_PATH")); ld_library_dirs = getenv(_LD("LIBRARY_PATH_FDS")); ld_preload = getenv(_LD("PRELOAD")); ld_elf_hints_path = getenv(_LD("ELF_HINTS_PATH")); ld_loadfltr = getenv(_LD("LOADFLTR")) != NULL; library_path_rpath = getenv(_LD("LIBRARY_PATH_RPATH")); if (library_path_rpath != NULL) { if (library_path_rpath[0] == 'y' || library_path_rpath[0] == 'Y' || library_path_rpath[0] == '1') ld_library_path_rpath = true; else ld_library_path_rpath = false; } dangerous_ld_env = libmap_disable || (libmap_override != NULL) || (ld_library_path != NULL) || (ld_preload != NULL) || (ld_elf_hints_path != NULL) || ld_loadfltr; ld_tracing = getenv(_LD("TRACE_LOADED_OBJECTS")); ld_utrace = getenv(_LD("UTRACE")); if ((ld_elf_hints_path == NULL) || strlen(ld_elf_hints_path) == 0) ld_elf_hints_path = ld_elf_hints_default; if (ld_debug != NULL && *ld_debug != '\0') debug = 1; dbg("%s is initialized, base address = %p", __progname, (caddr_t) aux_info[AT_BASE]->a_un.a_ptr); dbg("RTLD dynamic = %p", obj_rtld.dynamic); dbg("RTLD pltgot = %p", obj_rtld.pltgot); dbg("initializing thread locks"); lockdflt_init(); /* * Load the main program, or process its program header if it is * already loaded. */ if (aux_info[AT_EXECFD] != NULL) { /* Load the main program. */ int fd = aux_info[AT_EXECFD]->a_un.a_val; dbg("loading main program"); obj_main = map_object(fd, argv0, NULL); close(fd); if (obj_main == NULL) rtld_die(); max_stack_flags = obj->stack_flags; } else { /* Main program already loaded. */ const Elf_Phdr *phdr; int phnum; caddr_t entry; dbg("processing main program's program header"); assert(aux_info[AT_PHDR] != NULL); phdr = (const Elf_Phdr *) aux_info[AT_PHDR]->a_un.a_ptr; assert(aux_info[AT_PHNUM] != NULL); phnum = aux_info[AT_PHNUM]->a_un.a_val; assert(aux_info[AT_PHENT] != NULL); assert(aux_info[AT_PHENT]->a_un.a_val == sizeof(Elf_Phdr)); assert(aux_info[AT_ENTRY] != NULL); entry = (caddr_t) aux_info[AT_ENTRY]->a_un.a_ptr; if ((obj_main = digest_phdr(phdr, phnum, entry, argv0)) == NULL) rtld_die(); } if (aux_info[AT_EXECPATH] != 0) { char *kexecpath; char buf[MAXPATHLEN]; kexecpath = aux_info[AT_EXECPATH]->a_un.a_ptr; dbg("AT_EXECPATH %p %s", kexecpath, kexecpath); if (kexecpath[0] == '/') obj_main->path = kexecpath; else if (getcwd(buf, sizeof(buf)) == NULL || strlcat(buf, "/", sizeof(buf)) >= sizeof(buf) || strlcat(buf, kexecpath, sizeof(buf)) >= sizeof(buf)) obj_main->path = xstrdup(argv0); else obj_main->path = xstrdup(buf); } else { dbg("No AT_EXECPATH"); obj_main->path = xstrdup(argv0); } dbg("obj_main path %s", obj_main->path); obj_main->mainprog = true; if (aux_info[AT_STACKPROT] != NULL && aux_info[AT_STACKPROT]->a_un.a_val != 0) stack_prot = aux_info[AT_STACKPROT]->a_un.a_val; #ifndef COMPAT_32BIT /* * Get the actual dynamic linker pathname from the executable if * possible. (It should always be possible.) That ensures that * gdb will find the right dynamic linker even if a non-standard * one is being used. */ if (obj_main->interp != NULL && strcmp(obj_main->interp, obj_rtld.path) != 0) { free(obj_rtld.path); obj_rtld.path = xstrdup(obj_main->interp); __progname = obj_rtld.path; } #endif digest_dynamic(obj_main, 0); dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj_main->path, obj_main->valid_hash_sysv, obj_main->valid_hash_gnu, obj_main->dynsymcount); linkmap_add(obj_main); linkmap_add(&obj_rtld); /* Link the main program into the list of objects. */ *obj_tail = obj_main; obj_tail = &obj_main->next; obj_count++; obj_loads++; /* Initialize a fake symbol for resolving undefined weak references. */ sym_zero.st_info = ELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); sym_zero.st_shndx = SHN_UNDEF; sym_zero.st_value = -(uintptr_t)obj_main->relocbase; if (!libmap_disable) libmap_disable = (bool)lm_init(libmap_override); dbg("loading LD_PRELOAD libraries"); if (load_preload_objects() == -1) rtld_die(); preload_tail = obj_tail; dbg("loading needed objects"); if (load_needed_objects(obj_main, 0) == -1) rtld_die(); /* Make a list of all objects loaded at startup. */ last_interposer = obj_main; for (obj = obj_list; obj != NULL; obj = obj->next) { if (obj->z_interpose && obj != obj_main) { objlist_put_after(&list_main, last_interposer, obj); last_interposer = obj; } else { objlist_push_tail(&list_main, obj); } obj->refcount++; } dbg("checking for required versions"); if (rtld_verify_versions(&list_main) == -1 && !ld_tracing) rtld_die(); if (ld_tracing) { /* We're done */ trace_loaded_objects(obj_main); exit(0); } if (getenv(_LD("DUMP_REL_PRE")) != NULL) { dump_relocations(obj_main); exit (0); } /* * Processing tls relocations requires having the tls offsets * initialized. Prepare offsets before starting initial * relocation processing. */ dbg("initializing initial thread local storage offsets"); STAILQ_FOREACH(entry, &list_main, link) { /* * Allocate all the initial objects out of the static TLS * block even if they didn't ask for it. */ allocate_tls_offset(entry->obj); } if (relocate_objects(obj_main, ld_bind_now != NULL && *ld_bind_now != '\0', &obj_rtld, SYMLOOK_EARLY, NULL) == -1) rtld_die(); dbg("doing copy relocations"); if (do_copy_relocations(obj_main) == -1) rtld_die(); if (getenv(_LD("DUMP_REL_POST")) != NULL) { dump_relocations(obj_main); exit (0); } /* * Setup TLS for main thread. This must be done after the * relocations are processed, since tls initialization section * might be the subject for relocations. */ dbg("initializing initial thread local storage"); allocate_initial_tls(obj_list); dbg("initializing key program variables"); set_program_var("__progname", argv[0] != NULL ? basename(argv[0]) : ""); set_program_var("environ", env); set_program_var("__elf_aux_vector", aux); /* Make a list of init functions to call. */ objlist_init(&initlist); initlist_add_objects(obj_list, preload_tail, &initlist); r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */ map_stacks_exec(NULL); dbg("resolving ifuncs"); if (resolve_objects_ifunc(obj_main, ld_bind_now != NULL && *ld_bind_now != '\0', SYMLOOK_EARLY, NULL) == -1) rtld_die(); if (!obj_main->crt_no_init) { /* * Make sure we don't call the main program's init and fini * functions for binaries linked with old crt1 which calls * _init itself. */ obj_main->init = obj_main->fini = (Elf_Addr)NULL; obj_main->preinit_array = obj_main->init_array = obj_main->fini_array = (Elf_Addr)NULL; } wlock_acquire(rtld_bind_lock, &lockstate); if (obj_main->crt_no_init) preinit_main(); objlist_call_init(&initlist, &lockstate); _r_debug_postinit(&obj_main->linkmap); objlist_clear(&initlist); dbg("loading filtees"); for (obj = obj_list->next; obj != NULL; obj = obj->next) { if (ld_loadfltr || obj->z_loadfltr) load_filtees(obj, 0, &lockstate); } lock_release(rtld_bind_lock, &lockstate); dbg("transferring control to program entry point = %p", obj_main->entry); /* Return the exit procedure and the program entry point. */ *exit_proc = rtld_exit; *objp = obj_main; return (func_ptr_type) obj_main->entry; } void * rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def) { void *ptr; Elf_Addr target; ptr = (void *)make_function_pointer(def, obj); target = ((Elf_Addr (*)(void))ptr)(); return ((void *)target); } Elf_Addr _rtld_bind(Obj_Entry *obj, Elf_Size reloff) { const Elf_Rel *rel; const Elf_Sym *def; const Obj_Entry *defobj; Elf_Addr *where; Elf_Addr target; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (obj->pltrel) rel = (const Elf_Rel *) ((caddr_t) obj->pltrel + reloff); else rel = (const Elf_Rel *) ((caddr_t) obj->pltrela + reloff); where = (Elf_Addr *) (obj->relocbase + rel->r_offset); def = find_symdef(ELF_R_SYM(rel->r_info), obj, &defobj, true, NULL, &lockstate); if (def == NULL) rtld_die(); if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) target = (Elf_Addr)rtld_resolve_ifunc(defobj, def); else target = (Elf_Addr)(defobj->relocbase + def->st_value); dbg("\"%s\" in \"%s\" ==> %p in \"%s\"", defobj->strtab + def->st_name, basename(obj->path), (void *)target, basename(defobj->path)); /* * Write the new contents for the jmpslot. Note that depending on * architecture, the value which we need to return back to the * lazy binding trampoline may or may not be the target * address. The value returned from reloc_jmpslot() is the value * that the trampoline needs. */ target = reloc_jmpslot(where, target, defobj, obj, rel); lock_release(rtld_bind_lock, &lockstate); return target; } /* * Error reporting function. Use it like printf. If formats the message * into a buffer, and sets things up so that the next call to dlerror() * will return the message. */ void _rtld_error(const char *fmt, ...) { static char buf[512]; va_list ap; va_start(ap, fmt); rtld_vsnprintf(buf, sizeof buf, fmt, ap); error_message = buf; va_end(ap); } /* * Return a dynamically-allocated copy of the current error message, if any. */ static char * errmsg_save(void) { return error_message == NULL ? NULL : xstrdup(error_message); } /* * Restore the current error message from a copy which was previously saved * by errmsg_save(). The copy is freed. */ static void errmsg_restore(char *saved_msg) { if (saved_msg == NULL) error_message = NULL; else { _rtld_error("%s", saved_msg); free(saved_msg); } } static const char * basename(const char *name) { const char *p = strrchr(name, '/'); return p != NULL ? p + 1 : name; } static struct utsname uts; static char * origin_subst_one(Obj_Entry *obj, char *real, const char *kw, const char *subst, bool may_free) { char *p, *p1, *res, *resp; int subst_len, kw_len, subst_count, old_len, new_len; kw_len = strlen(kw); /* * First, count the number of the keyword occurences, to * preallocate the final string. */ for (p = real, subst_count = 0;; p = p1 + kw_len, subst_count++) { p1 = strstr(p, kw); if (p1 == NULL) break; } /* * If the keyword is not found, just return. * * Return non-substituted string if resolution failed. We * cannot do anything more reasonable, the failure mode of the * caller is unresolved library anyway. */ if (subst_count == 0 || (obj != NULL && !obj_resolve_origin(obj))) return (may_free ? real : xstrdup(real)); if (obj != NULL) subst = obj->origin_path; /* * There is indeed something to substitute. Calculate the * length of the resulting string, and allocate it. */ subst_len = strlen(subst); old_len = strlen(real); new_len = old_len + (subst_len - kw_len) * subst_count; res = xmalloc(new_len + 1); /* * Now, execute the substitution loop. */ for (p = real, resp = res, *resp = '\0';;) { p1 = strstr(p, kw); if (p1 != NULL) { /* Copy the prefix before keyword. */ memcpy(resp, p, p1 - p); resp += p1 - p; /* Keyword replacement. */ memcpy(resp, subst, subst_len); resp += subst_len; *resp = '\0'; p = p1 + kw_len; } else break; } /* Copy to the end of string and finish. */ strcat(resp, p); if (may_free) free(real); return (res); } static char * origin_subst(Obj_Entry *obj, char *real) { char *res1, *res2, *res3, *res4; if (obj == NULL || !trust) return (xstrdup(real)); if (uts.sysname[0] == '\0') { if (uname(&uts) != 0) { _rtld_error("utsname failed: %d", errno); return (NULL); } } res1 = origin_subst_one(obj, real, "$ORIGIN", NULL, false); res2 = origin_subst_one(NULL, res1, "$OSNAME", uts.sysname, true); res3 = origin_subst_one(NULL, res2, "$OSREL", uts.release, true); res4 = origin_subst_one(NULL, res3, "$PLATFORM", uts.machine, true); return (res4); } void rtld_die(void) { const char *msg = dlerror(); if (msg == NULL) msg = "Fatal error"; rtld_fdputstr(STDERR_FILENO, msg); rtld_fdputchar(STDERR_FILENO, '\n'); _exit(1); } /* * Process a shared object's DYNAMIC section, and save the important * information in its Obj_Entry structure. */ static void digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath, const Elf_Dyn **dyn_soname, const Elf_Dyn **dyn_runpath) { const Elf_Dyn *dynp; Needed_Entry **needed_tail = &obj->needed; Needed_Entry **needed_filtees_tail = &obj->needed_filtees; Needed_Entry **needed_aux_filtees_tail = &obj->needed_aux_filtees; const Elf_Hashelt *hashtab; const Elf32_Word *hashval; Elf32_Word bkt, nmaskwords; int bloom_size32; int plttype = DT_REL; *dyn_rpath = NULL; *dyn_soname = NULL; *dyn_runpath = NULL; obj->bind_now = false; for (dynp = obj->dynamic; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_REL: obj->rel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_RELSZ: obj->relsize = dynp->d_un.d_val; break; case DT_RELENT: assert(dynp->d_un.d_val == sizeof(Elf_Rel)); break; case DT_JMPREL: obj->pltrel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_PLTRELSZ: obj->pltrelsize = dynp->d_un.d_val; break; case DT_RELA: obj->rela = (const Elf_Rela *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_RELASZ: obj->relasize = dynp->d_un.d_val; break; case DT_RELAENT: assert(dynp->d_un.d_val == sizeof(Elf_Rela)); break; case DT_PLTREL: plttype = dynp->d_un.d_val; assert(dynp->d_un.d_val == DT_REL || plttype == DT_RELA); break; case DT_SYMTAB: obj->symtab = (const Elf_Sym *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_SYMENT: assert(dynp->d_un.d_val == sizeof(Elf_Sym)); break; case DT_STRTAB: obj->strtab = (const char *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_STRSZ: obj->strsize = dynp->d_un.d_val; break; case DT_VERNEED: obj->verneed = (const Elf_Verneed *) (obj->relocbase + dynp->d_un.d_val); break; case DT_VERNEEDNUM: obj->verneednum = dynp->d_un.d_val; break; case DT_VERDEF: obj->verdef = (const Elf_Verdef *) (obj->relocbase + dynp->d_un.d_val); break; case DT_VERDEFNUM: obj->verdefnum = dynp->d_un.d_val; break; case DT_VERSYM: obj->versyms = (const Elf_Versym *)(obj->relocbase + dynp->d_un.d_val); break; case DT_HASH: { hashtab = (const Elf_Hashelt *)(obj->relocbase + dynp->d_un.d_ptr); obj->nbuckets = hashtab[0]; obj->nchains = hashtab[1]; obj->buckets = hashtab + 2; obj->chains = obj->buckets + obj->nbuckets; obj->valid_hash_sysv = obj->nbuckets > 0 && obj->nchains > 0 && obj->buckets != NULL; } break; case DT_GNU_HASH: { hashtab = (const Elf_Hashelt *)(obj->relocbase + dynp->d_un.d_ptr); obj->nbuckets_gnu = hashtab[0]; obj->symndx_gnu = hashtab[1]; nmaskwords = hashtab[2]; bloom_size32 = (__ELF_WORD_SIZE / 32) * nmaskwords; obj->maskwords_bm_gnu = nmaskwords - 1; obj->shift2_gnu = hashtab[3]; obj->bloom_gnu = (Elf_Addr *) (hashtab + 4); obj->buckets_gnu = hashtab + 4 + bloom_size32; obj->chain_zero_gnu = obj->buckets_gnu + obj->nbuckets_gnu - obj->symndx_gnu; /* Number of bitmask words is required to be power of 2 */ obj->valid_hash_gnu = powerof2(nmaskwords) && obj->nbuckets_gnu > 0 && obj->buckets_gnu != NULL; } break; case DT_NEEDED: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_tail = nep; needed_tail = &nep->next; } break; case DT_FILTER: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_filtees_tail = nep; needed_filtees_tail = &nep->next; } break; case DT_AUXILIARY: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_aux_filtees_tail = nep; needed_aux_filtees_tail = &nep->next; } break; case DT_PLTGOT: obj->pltgot = (Elf_Addr *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_TEXTREL: obj->textrel = true; break; case DT_SYMBOLIC: obj->symbolic = true; break; case DT_RPATH: /* * We have to wait until later to process this, because we * might not have gotten the address of the string table yet. */ *dyn_rpath = dynp; break; case DT_SONAME: *dyn_soname = dynp; break; case DT_RUNPATH: *dyn_runpath = dynp; break; case DT_INIT: obj->init = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_PREINIT_ARRAY: obj->preinit_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_PREINIT_ARRAYSZ: obj->preinit_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; case DT_INIT_ARRAY: obj->init_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_INIT_ARRAYSZ: obj->init_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; case DT_FINI: obj->fini = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_FINI_ARRAY: obj->fini_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_FINI_ARRAYSZ: obj->fini_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; /* * Don't process DT_DEBUG on MIPS as the dynamic section * is mapped read-only. DT_MIPS_RLD_MAP is used instead. */ #ifndef __mips__ case DT_DEBUG: if (!early) dbg("Filling in DT_DEBUG entry"); ((Elf_Dyn*)dynp)->d_un.d_ptr = (Elf_Addr) &r_debug; break; #endif case DT_FLAGS: if (dynp->d_un.d_val & DF_ORIGIN) obj->z_origin = true; if (dynp->d_un.d_val & DF_SYMBOLIC) obj->symbolic = true; if (dynp->d_un.d_val & DF_TEXTREL) obj->textrel = true; if (dynp->d_un.d_val & DF_BIND_NOW) obj->bind_now = true; /*if (dynp->d_un.d_val & DF_STATIC_TLS) ;*/ break; #ifdef __mips__ case DT_MIPS_LOCAL_GOTNO: obj->local_gotno = dynp->d_un.d_val; break; case DT_MIPS_SYMTABNO: obj->symtabno = dynp->d_un.d_val; break; case DT_MIPS_GOTSYM: obj->gotsym = dynp->d_un.d_val; break; case DT_MIPS_RLD_MAP: *((Elf_Addr *)(dynp->d_un.d_ptr)) = (Elf_Addr) &r_debug; break; #endif #ifdef __powerpc64__ case DT_PPC64_GLINK: obj->glink = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; #endif case DT_FLAGS_1: if (dynp->d_un.d_val & DF_1_NOOPEN) obj->z_noopen = true; if (dynp->d_un.d_val & DF_1_ORIGIN) obj->z_origin = true; if (dynp->d_un.d_val & DF_1_GLOBAL) obj->z_global = true; if (dynp->d_un.d_val & DF_1_BIND_NOW) obj->bind_now = true; if (dynp->d_un.d_val & DF_1_NODELETE) obj->z_nodelete = true; if (dynp->d_un.d_val & DF_1_LOADFLTR) obj->z_loadfltr = true; if (dynp->d_un.d_val & DF_1_INTERPOSE) obj->z_interpose = true; if (dynp->d_un.d_val & DF_1_NODEFLIB) obj->z_nodeflib = true; break; default: if (!early) { dbg("Ignoring d_tag %ld = %#lx", (long)dynp->d_tag, (long)dynp->d_tag); } break; } } obj->traced = false; if (plttype == DT_RELA) { obj->pltrela = (const Elf_Rela *) obj->pltrel; obj->pltrel = NULL; obj->pltrelasize = obj->pltrelsize; obj->pltrelsize = 0; } /* Determine size of dynsym table (equal to nchains of sysv hash) */ if (obj->valid_hash_sysv) obj->dynsymcount = obj->nchains; else if (obj->valid_hash_gnu) { obj->dynsymcount = 0; for (bkt = 0; bkt < obj->nbuckets_gnu; bkt++) { if (obj->buckets_gnu[bkt] == 0) continue; hashval = &obj->chain_zero_gnu[obj->buckets_gnu[bkt]]; do obj->dynsymcount++; while ((*hashval++ & 1u) == 0); } obj->dynsymcount += obj->symndx_gnu; } } static bool obj_resolve_origin(Obj_Entry *obj) { if (obj->origin_path != NULL) return (true); obj->origin_path = xmalloc(PATH_MAX); return (rtld_dirname_abs(obj->path, obj->origin_path) != -1); } static void digest_dynamic2(Obj_Entry *obj, const Elf_Dyn *dyn_rpath, const Elf_Dyn *dyn_soname, const Elf_Dyn *dyn_runpath) { if (obj->z_origin && !obj_resolve_origin(obj)) rtld_die(); if (dyn_runpath != NULL) { obj->runpath = (char *)obj->strtab + dyn_runpath->d_un.d_val; obj->runpath = origin_subst(obj, obj->runpath); } else if (dyn_rpath != NULL) { obj->rpath = (char *)obj->strtab + dyn_rpath->d_un.d_val; obj->rpath = origin_subst(obj, obj->rpath); } if (dyn_soname != NULL) object_add_name(obj, obj->strtab + dyn_soname->d_un.d_val); } static void digest_dynamic(Obj_Entry *obj, int early) { const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; digest_dynamic1(obj, early, &dyn_rpath, &dyn_soname, &dyn_runpath); digest_dynamic2(obj, dyn_rpath, dyn_soname, dyn_runpath); } /* * Process a shared object's program header. This is used only for the * main program, when the kernel has already loaded the main program * into memory before calling the dynamic linker. It creates and * returns an Obj_Entry structure. */ static Obj_Entry * digest_phdr(const Elf_Phdr *phdr, int phnum, caddr_t entry, const char *path) { Obj_Entry *obj; const Elf_Phdr *phlimit = phdr + phnum; const Elf_Phdr *ph; Elf_Addr note_start, note_end; int nsegs = 0; obj = obj_new(); for (ph = phdr; ph < phlimit; ph++) { if (ph->p_type != PT_PHDR) continue; obj->phdr = phdr; obj->phsize = ph->p_memsz; obj->relocbase = (caddr_t)phdr - ph->p_vaddr; break; } obj->stack_flags = PF_X | PF_R | PF_W; for (ph = phdr; ph < phlimit; ph++) { switch (ph->p_type) { case PT_INTERP: obj->interp = (const char *)(ph->p_vaddr + obj->relocbase); break; case PT_LOAD: if (nsegs == 0) { /* First load segment */ obj->vaddrbase = trunc_page(ph->p_vaddr); obj->mapbase = obj->vaddrbase + obj->relocbase; obj->textsize = round_page(ph->p_vaddr + ph->p_memsz) - obj->vaddrbase; } else { /* Last load segment */ obj->mapsize = round_page(ph->p_vaddr + ph->p_memsz) - obj->vaddrbase; } nsegs++; break; case PT_DYNAMIC: obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase); break; case PT_TLS: obj->tlsindex = 1; obj->tlssize = ph->p_memsz; obj->tlsalign = ph->p_align; obj->tlsinitsize = ph->p_filesz; obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); break; case PT_GNU_STACK: obj->stack_flags = ph->p_flags; break; case PT_GNU_RELRO: obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr); obj->relro_size = round_page(ph->p_memsz); break; case PT_NOTE: note_start = (Elf_Addr)obj->relocbase + ph->p_vaddr; note_end = note_start + ph->p_filesz; digest_notes(obj, note_start, note_end); break; } } if (nsegs < 1) { _rtld_error("%s: too few PT_LOAD segments", path); return NULL; } obj->entry = entry; return obj; } void digest_notes(Obj_Entry *obj, Elf_Addr note_start, Elf_Addr note_end) { const Elf_Note *note; const char *note_name; uintptr_t p; for (note = (const Elf_Note *)note_start; (Elf_Addr)note < note_end; note = (const Elf_Note *)((const char *)(note + 1) + roundup2(note->n_namesz, sizeof(Elf32_Addr)) + roundup2(note->n_descsz, sizeof(Elf32_Addr)))) { if (note->n_namesz != sizeof(NOTE_FREEBSD_VENDOR) || note->n_descsz != sizeof(int32_t)) continue; if (note->n_type != NT_FREEBSD_ABI_TAG && note->n_type != NT_FREEBSD_NOINIT_TAG) continue; note_name = (const char *)(note + 1); if (strncmp(NOTE_FREEBSD_VENDOR, note_name, sizeof(NOTE_FREEBSD_VENDOR)) != 0) continue; switch (note->n_type) { case NT_FREEBSD_ABI_TAG: /* FreeBSD osrel note */ p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); obj->osrel = *(const int32_t *)(p); dbg("note osrel %d", obj->osrel); break; case NT_FREEBSD_NOINIT_TAG: /* FreeBSD 'crt does not call init' note */ obj->crt_no_init = true; dbg("note crt_no_init"); break; } } } static Obj_Entry * dlcheck(void *handle) { Obj_Entry *obj; for (obj = obj_list; obj != NULL; obj = obj->next) if (obj == (Obj_Entry *) handle) break; if (obj == NULL || obj->refcount == 0 || obj->dl_refcount == 0) { _rtld_error("Invalid shared object handle %p", handle); return NULL; } return obj; } /* * If the given object is already in the donelist, return true. Otherwise * add the object to the list and return false. */ static bool donelist_check(DoneList *dlp, const Obj_Entry *obj) { unsigned int i; for (i = 0; i < dlp->num_used; i++) if (dlp->objs[i] == obj) return true; /* * Our donelist allocation should always be sufficient. But if * our threads locking isn't working properly, more shared objects * could have been loaded since we allocated the list. That should * never happen, but we'll handle it properly just in case it does. */ if (dlp->num_used < dlp->num_alloc) dlp->objs[dlp->num_used++] = obj; return false; } /* * Hash function for symbol table lookup. Don't even think about changing * this. It is specified by the System V ABI. */ unsigned long elf_hash(const char *name) { const unsigned char *p = (const unsigned char *) name; unsigned long h = 0; unsigned long g; while (*p != '\0') { h = (h << 4) + *p++; if ((g = h & 0xf0000000) != 0) h ^= g >> 24; h &= ~g; } return h; } /* * The GNU hash function is the Daniel J. Bernstein hash clipped to 32 bits * unsigned in case it's implemented with a wider type. */ static uint32_t gnu_hash(const char *s) { uint32_t h; unsigned char c; h = 5381; for (c = *s; c != '\0'; c = *++s) h = h * 33 + c; return (h & 0xffffffff); } /* * Find the library with the given name, and return its full pathname. * The returned string is dynamically allocated. Generates an error * message and returns NULL if the library cannot be found. * * If the second argument is non-NULL, then it refers to an already- * loaded shared object, whose library search path will be searched. * * If a library is successfully located via LD_LIBRARY_PATH_FDS, its * descriptor (which is close-on-exec) will be passed out via the third * argument. * * The search order is: * DT_RPATH in the referencing file _unless_ DT_RUNPATH is present (1) * DT_RPATH of the main object if DSO without defined DT_RUNPATH (1) * LD_LIBRARY_PATH * DT_RUNPATH in the referencing file * ldconfig hints (if -z nodefaultlib, filter out default library directories * from list) * /lib:/usr/lib _unless_ the referencing file is linked with -z nodefaultlib * * (1) Handled in digest_dynamic2 - rpath left NULL if runpath defined. */ static char * find_library(const char *xname, const Obj_Entry *refobj, int *fdp) { char *pathname; char *name; bool nodeflib, objgiven; objgiven = refobj != NULL; if (strchr(xname, '/') != NULL) { /* Hard coded pathname */ if (xname[0] != '/' && !trust) { _rtld_error("Absolute pathname required for shared object \"%s\"", xname); return NULL; } return (origin_subst(__DECONST(Obj_Entry *, refobj), __DECONST(char *, xname))); } if (libmap_disable || !objgiven || (name = lm_find(refobj->path, xname)) == NULL) name = (char *)xname; dbg(" Searching for \"%s\"", name); /* * If refobj->rpath != NULL, then refobj->runpath is NULL. Fall * back to pre-conforming behaviour if user requested so with * LD_LIBRARY_PATH_RPATH environment variable and ignore -z * nodeflib. */ if (objgiven && refobj->rpath != NULL && ld_library_path_rpath) { if ((pathname = search_library_path(name, ld_library_path)) != NULL || (refobj != NULL && (pathname = search_library_path(name, refobj->rpath)) != NULL) || (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL || (pathname = search_library_path(name, gethints(false))) != NULL || (pathname = search_library_path(name, ld_standard_library_path)) != NULL) return (pathname); } else { nodeflib = objgiven ? refobj->z_nodeflib : false; if ((objgiven && (pathname = search_library_path(name, refobj->rpath)) != NULL) || (objgiven && refobj->runpath == NULL && refobj != obj_main && (pathname = search_library_path(name, obj_main->rpath)) != NULL) || (pathname = search_library_path(name, ld_library_path)) != NULL || (objgiven && (pathname = search_library_path(name, refobj->runpath)) != NULL) || (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL || (pathname = search_library_path(name, gethints(nodeflib))) != NULL || (objgiven && !nodeflib && (pathname = search_library_path(name, ld_standard_library_path)) != NULL)) return (pathname); } if (objgiven && refobj->path != NULL) { _rtld_error("Shared object \"%s\" not found, required by \"%s\"", name, basename(refobj->path)); } else { _rtld_error("Shared object \"%s\" not found", name); } return NULL; } /* * Given a symbol number in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the * defining object via the reference parameter DEFOBJ_OUT. */ const Elf_Sym * find_symdef(unsigned long symnum, const Obj_Entry *refobj, const Obj_Entry **defobj_out, int flags, SymCache *cache, RtldLockState *lockstate) { const Elf_Sym *ref; const Elf_Sym *def; const Obj_Entry *defobj; SymLook req; const char *name; int res; /* * If we have already found this symbol, get the information from * the cache. */ if (symnum >= refobj->dynsymcount) return NULL; /* Bad object */ if (cache != NULL && cache[symnum].sym != NULL) { *defobj_out = cache[symnum].obj; return cache[symnum].sym; } ref = refobj->symtab + symnum; name = refobj->strtab + ref->st_name; def = NULL; defobj = NULL; /* * We don't have to do a full scale lookup if the symbol is local. * We know it will bind to the instance in this load module; to * which we already have a pointer (ie ref). By not doing a lookup, * we not only improve performance, but it also avoids unresolvable * symbols when local symbols are not in the hash table. This has * been seen with the ia64 toolchain. */ if (ELF_ST_BIND(ref->st_info) != STB_LOCAL) { if (ELF_ST_TYPE(ref->st_info) == STT_SECTION) { _rtld_error("%s: Bogus symbol table entry %lu", refobj->path, symnum); } symlook_init(&req, name); req.flags = flags; req.ventry = fetch_ventry(refobj, symnum); req.lockstate = lockstate; res = symlook_default(&req, refobj); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } else { def = ref; defobj = refobj; } /* * If we found no definition and the reference is weak, treat the * symbol as having the value zero. */ if (def == NULL && ELF_ST_BIND(ref->st_info) == STB_WEAK) { def = &sym_zero; defobj = obj_main; } if (def != NULL) { *defobj_out = defobj; /* Record the information in the cache to avoid subsequent lookups. */ if (cache != NULL) { cache[symnum].sym = def; cache[symnum].obj = defobj; } } else { if (refobj != &obj_rtld) _rtld_error("%s: Undefined symbol \"%s\"", refobj->path, name); } return def; } /* * Return the search path from the ldconfig hints file, reading it if * necessary. If nostdlib is true, then the default search paths are * not added to result. * * Returns NULL if there are problems with the hints file, * or if the search path there is empty. */ static const char * gethints(bool nostdlib) { static char *hints, *filtered_path; struct elfhints_hdr hdr; struct fill_search_info_args sargs, hargs; struct dl_serinfo smeta, hmeta, *SLPinfo, *hintinfo; struct dl_serpath *SLPpath, *hintpath; char *p; unsigned int SLPndx, hintndx, fndx, fcount; int fd; size_t flen; bool skip; /* First call, read the hints file */ if (hints == NULL) { /* Keep from trying again in case the hints file is bad. */ hints = ""; if ((fd = open(ld_elf_hints_path, O_RDONLY | O_CLOEXEC)) == -1) return (NULL); if (read(fd, &hdr, sizeof hdr) != sizeof hdr || hdr.magic != ELFHINTS_MAGIC || hdr.version != 1) { close(fd); return (NULL); } p = xmalloc(hdr.dirlistlen + 1); if (lseek(fd, hdr.strtab + hdr.dirlist, SEEK_SET) == -1 || read(fd, p, hdr.dirlistlen + 1) != (ssize_t)hdr.dirlistlen + 1) { free(p); close(fd); return (NULL); } hints = p; close(fd); } /* * If caller agreed to receive list which includes the default * paths, we are done. Otherwise, if we still did not * calculated filtered result, do it now. */ if (!nostdlib) return (hints[0] != '\0' ? hints : NULL); if (filtered_path != NULL) goto filt_ret; /* * Obtain the list of all configured search paths, and the * list of the default paths. * * First estimate the size of the results. */ smeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath); smeta.dls_cnt = 0; hmeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath); hmeta.dls_cnt = 0; sargs.request = RTLD_DI_SERINFOSIZE; sargs.serinfo = &smeta; hargs.request = RTLD_DI_SERINFOSIZE; hargs.serinfo = &hmeta; path_enumerate(ld_standard_library_path, fill_search_info, &sargs); path_enumerate(p, fill_search_info, &hargs); SLPinfo = xmalloc(smeta.dls_size); hintinfo = xmalloc(hmeta.dls_size); /* * Next fetch both sets of paths. */ sargs.request = RTLD_DI_SERINFO; sargs.serinfo = SLPinfo; sargs.serpath = &SLPinfo->dls_serpath[0]; sargs.strspace = (char *)&SLPinfo->dls_serpath[smeta.dls_cnt]; hargs.request = RTLD_DI_SERINFO; hargs.serinfo = hintinfo; hargs.serpath = &hintinfo->dls_serpath[0]; hargs.strspace = (char *)&hintinfo->dls_serpath[hmeta.dls_cnt]; path_enumerate(ld_standard_library_path, fill_search_info, &sargs); path_enumerate(p, fill_search_info, &hargs); /* * Now calculate the difference between two sets, by excluding * standard paths from the full set. */ fndx = 0; fcount = 0; filtered_path = xmalloc(hdr.dirlistlen + 1); hintpath = &hintinfo->dls_serpath[0]; for (hintndx = 0; hintndx < hmeta.dls_cnt; hintndx++, hintpath++) { skip = false; SLPpath = &SLPinfo->dls_serpath[0]; /* * Check each standard path against current. */ for (SLPndx = 0; SLPndx < smeta.dls_cnt; SLPndx++, SLPpath++) { /* matched, skip the path */ if (!strcmp(hintpath->dls_name, SLPpath->dls_name)) { skip = true; break; } } if (skip) continue; /* * Not matched against any standard path, add the path * to result. Separate consequtive paths with ':'. */ if (fcount > 0) { filtered_path[fndx] = ':'; fndx++; } fcount++; flen = strlen(hintpath->dls_name); strncpy((filtered_path + fndx), hintpath->dls_name, flen); fndx += flen; } filtered_path[fndx] = '\0'; free(SLPinfo); free(hintinfo); filt_ret: return (filtered_path[0] != '\0' ? filtered_path : NULL); } static void init_dag(Obj_Entry *root) { const Needed_Entry *needed; const Objlist_Entry *elm; DoneList donelist; if (root->dag_inited) return; donelist_init(&donelist); /* Root object belongs to own DAG. */ objlist_push_tail(&root->dldags, root); objlist_push_tail(&root->dagmembers, root); donelist_check(&donelist, root); /* * Add dependencies of root object to DAG in breadth order * by exploiting the fact that each new object get added * to the tail of the dagmembers list. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { for (needed = elm->obj->needed; needed != NULL; needed = needed->next) { if (needed->obj == NULL || donelist_check(&donelist, needed->obj)) continue; objlist_push_tail(&needed->obj->dldags, root); objlist_push_tail(&root->dagmembers, needed->obj); } } root->dag_inited = true; } static void process_z(Obj_Entry *root) { const Objlist_Entry *elm; Obj_Entry *obj; /* * Walk over object DAG and process every dependent object * that is marked as DF_1_NODELETE or DF_1_GLOBAL. They need * to grow their own DAG. * * For DF_1_GLOBAL, DAG is required for symbol lookups in * symlook_global() to work. * * For DF_1_NODELETE, the DAG should have its reference upped. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { obj = elm->obj; if (obj == NULL) continue; if (obj->z_nodelete && !obj->ref_nodel) { dbg("obj %s -z nodelete", obj->path); init_dag(obj); ref_dag(obj); obj->ref_nodel = true; } if (obj->z_global && objlist_find(&list_global, obj) == NULL) { dbg("obj %s -z global", obj->path); objlist_push_tail(&list_global, obj); init_dag(obj); } } } /* * Initialize the dynamic linker. The argument is the address at which * the dynamic linker has been mapped into memory. The primary task of * this function is to relocate the dynamic linker. */ static void init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) { Obj_Entry objtmp; /* Temporary rtld object */ const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; #ifdef RTLD_INIT_PAGESIZES_EARLY /* The page size is required by the dynamic memory allocator. */ init_pagesizes(aux_info); #endif /* * Conjure up an Obj_Entry structure for the dynamic linker. * * The "path" member can't be initialized yet because string constants * cannot yet be accessed. Below we will set it correctly. */ memset(&objtmp, 0, sizeof(objtmp)); objtmp.path = NULL; objtmp.rtld = true; objtmp.mapbase = mapbase; #ifdef PIC objtmp.relocbase = mapbase; #endif if (RTLD_IS_DYNAMIC()) { objtmp.dynamic = rtld_dynamic(&objtmp); digest_dynamic1(&objtmp, 1, &dyn_rpath, &dyn_soname, &dyn_runpath); assert(objtmp.needed == NULL); #if !defined(__mips__) /* MIPS has a bogus DT_TEXTREL. */ assert(!objtmp.textrel); #endif /* * Temporarily put the dynamic linker entry into the object list, so * that symbols can be found. */ relocate_objects(&objtmp, true, &objtmp, 0, NULL); } /* Initialize the object list. */ obj_tail = &obj_list; /* Now that non-local variables can be accesses, copy out obj_rtld. */ memcpy(&obj_rtld, &objtmp, sizeof(obj_rtld)); #ifndef RTLD_INIT_PAGESIZES_EARLY /* The page size is required by the dynamic memory allocator. */ init_pagesizes(aux_info); #endif if (aux_info[AT_OSRELDATE] != NULL) osreldate = aux_info[AT_OSRELDATE]->a_un.a_val; digest_dynamic2(&obj_rtld, dyn_rpath, dyn_soname, dyn_runpath); /* Replace the path with a dynamically allocated copy. */ obj_rtld.path = xstrdup(ld_path_rtld); r_debug.r_brk = r_debug_state; r_debug.r_state = RT_CONSISTENT; } /* * Retrieve the array of supported page sizes. The kernel provides the page * sizes in increasing order. */ static void init_pagesizes(Elf_Auxinfo **aux_info) { static size_t psa[MAXPAGESIZES]; int mib[2]; size_t len, size; if (aux_info[AT_PAGESIZES] != NULL && aux_info[AT_PAGESIZESLEN] != NULL) { size = aux_info[AT_PAGESIZESLEN]->a_un.a_val; pagesizes = aux_info[AT_PAGESIZES]->a_un.a_ptr; } else { len = 2; if (sysctlnametomib("hw.pagesizes", mib, &len) == 0) size = sizeof(psa); else { /* As a fallback, retrieve the base page size. */ size = sizeof(psa[0]); if (aux_info[AT_PAGESZ] != NULL) { psa[0] = aux_info[AT_PAGESZ]->a_un.a_val; goto psa_filled; } else { mib[0] = CTL_HW; mib[1] = HW_PAGESIZE; len = 2; } } if (sysctl(mib, len, psa, &size, NULL, 0) == -1) { _rtld_error("sysctl for hw.pagesize(s) failed"); rtld_die(); } psa_filled: pagesizes = psa; } npagesizes = size / sizeof(pagesizes[0]); /* Discard any invalid entries at the end of the array. */ while (npagesizes > 0 && pagesizes[npagesizes - 1] == 0) npagesizes--; } /* * Add the init functions from a needed object list (and its recursive * needed objects) to "list". This is not used directly; it is a helper * function for initlist_add_objects(). The write lock must be held * when this function is called. */ static void initlist_add_neededs(Needed_Entry *needed, Objlist *list) { /* Recursively process the successor needed objects. */ if (needed->next != NULL) initlist_add_neededs(needed->next, list); /* Process the current needed object. */ if (needed->obj != NULL) initlist_add_objects(needed->obj, &needed->obj->next, list); } /* * Scan all of the DAGs rooted in the range of objects from "obj" to * "tail" and add their init functions to "list". This recurses over * the DAGs and ensure the proper init ordering such that each object's * needed libraries are initialized before the object itself. At the * same time, this function adds the objects to the global finalization * list "list_fini" in the opposite order. The write lock must be * held when this function is called. */ static void initlist_add_objects(Obj_Entry *obj, Obj_Entry **tail, Objlist *list) { if (obj->init_scanned || obj->init_done) return; obj->init_scanned = true; /* Recursively process the successor objects. */ if (&obj->next != tail) initlist_add_objects(obj->next, tail, list); /* Recursively process the needed objects. */ if (obj->needed != NULL) initlist_add_neededs(obj->needed, list); if (obj->needed_filtees != NULL) initlist_add_neededs(obj->needed_filtees, list); if (obj->needed_aux_filtees != NULL) initlist_add_neededs(obj->needed_aux_filtees, list); /* Add the object to the init list. */ if (obj->preinit_array != (Elf_Addr)NULL || obj->init != (Elf_Addr)NULL || obj->init_array != (Elf_Addr)NULL) objlist_push_tail(list, obj); /* Add the object to the global fini list in the reverse order. */ if ((obj->fini != (Elf_Addr)NULL || obj->fini_array != (Elf_Addr)NULL) && !obj->on_fini_list) { objlist_push_head(&list_fini, obj); obj->on_fini_list = true; } } #ifndef FPTR_TARGET #define FPTR_TARGET(f) ((Elf_Addr) (f)) #endif static void free_needed_filtees(Needed_Entry *n) { Needed_Entry *needed, *needed1; for (needed = n; needed != NULL; needed = needed->next) { if (needed->obj != NULL) { dlclose(needed->obj); needed->obj = NULL; } } for (needed = n; needed != NULL; needed = needed1) { needed1 = needed->next; free(needed); } } static void unload_filtees(Obj_Entry *obj) { free_needed_filtees(obj->needed_filtees); obj->needed_filtees = NULL; free_needed_filtees(obj->needed_aux_filtees); obj->needed_aux_filtees = NULL; obj->filtees_loaded = false; } static void load_filtee1(Obj_Entry *obj, Needed_Entry *needed, int flags, RtldLockState *lockstate) { for (; needed != NULL; needed = needed->next) { needed->obj = dlopen_object(obj->strtab + needed->name, -1, obj, flags, ((ld_loadfltr || obj->z_loadfltr) ? RTLD_NOW : RTLD_LAZY) | RTLD_LOCAL, lockstate); } } static void load_filtees(Obj_Entry *obj, int flags, RtldLockState *lockstate) { lock_restart_for_upgrade(lockstate); if (!obj->filtees_loaded) { load_filtee1(obj, obj->needed_filtees, flags, lockstate); load_filtee1(obj, obj->needed_aux_filtees, flags, lockstate); obj->filtees_loaded = true; } } static int process_needed(Obj_Entry *obj, Needed_Entry *needed, int flags) { Obj_Entry *obj1; for (; needed != NULL; needed = needed->next) { obj1 = needed->obj = load_object(obj->strtab + needed->name, -1, obj, flags & ~RTLD_LO_NOLOAD); if (obj1 == NULL && !ld_tracing && (flags & RTLD_LO_FILTEES) == 0) return (-1); } return (0); } /* * Given a shared object, traverse its list of needed objects, and load * each of them. Returns 0 on success. Generates an error message and * returns -1 on failure. */ static int load_needed_objects(Obj_Entry *first, int flags) { Obj_Entry *obj; for (obj = first; obj != NULL; obj = obj->next) { if (process_needed(obj, obj->needed, flags) == -1) return (-1); } return (0); } static int load_preload_objects(void) { char *p = ld_preload; Obj_Entry *obj; static const char delim[] = " \t:;"; if (p == NULL) return 0; p += strspn(p, delim); while (*p != '\0') { size_t len = strcspn(p, delim); char savech; savech = p[len]; p[len] = '\0'; obj = load_object(p, -1, NULL, 0); if (obj == NULL) return -1; /* XXX - cleanup */ obj->z_interpose = true; p[len] = savech; p += len; p += strspn(p, delim); } LD_UTRACE(UTRACE_PRELOAD_FINISHED, NULL, NULL, 0, 0, NULL); return 0; } static const char * printable_path(const char *path) { return (path == NULL ? "" : path); } /* * Load a shared object into memory, if it is not already loaded. The * object may be specified by name or by user-supplied file descriptor * fd_u. In the later case, the fd_u descriptor is not closed, but its * duplicate is. * * Returns a pointer to the Obj_Entry for the object. Returns NULL * on failure. */ static Obj_Entry * load_object(const char *name, int fd_u, const Obj_Entry *refobj, int flags) { Obj_Entry *obj; int fd; struct stat sb; char *path; fd = -1; if (name != NULL) { for (obj = obj_list->next; obj != NULL; obj = obj->next) { if (object_match_name(obj, name)) return (obj); } path = find_library(name, refobj, &fd); if (path == NULL) return (NULL); } else path = NULL; if (fd >= 0) { /* * search_library_pathfds() opens a fresh file descriptor for the * library, so there is no need to dup(). */ } else if (fd_u == -1) { /* * If we didn't find a match by pathname, or the name is not * supplied, open the file and check again by device and inode. * This avoids false mismatches caused by multiple links or ".." * in pathnames. * * To avoid a race, we open the file and use fstat() rather than * using stat(). */ if ((fd = open(path, O_RDONLY | O_CLOEXEC | O_VERIFY)) == -1) { _rtld_error("Cannot open \"%s\"", path); free(path); return (NULL); } } else { fd = fcntl(fd_u, F_DUPFD_CLOEXEC, 0); if (fd == -1) { _rtld_error("Cannot dup fd"); free(path); return (NULL); } } if (fstat(fd, &sb) == -1) { _rtld_error("Cannot fstat \"%s\"", printable_path(path)); close(fd); free(path); return NULL; } for (obj = obj_list->next; obj != NULL; obj = obj->next) if (obj->ino == sb.st_ino && obj->dev == sb.st_dev) break; if (obj != NULL && name != NULL) { object_add_name(obj, name); free(path); close(fd); return obj; } if (flags & RTLD_LO_NOLOAD) { free(path); close(fd); return (NULL); } /* First use of this object, so we must map it in */ obj = do_load_object(fd, name, path, &sb, flags); if (obj == NULL) free(path); close(fd); return obj; } static Obj_Entry * do_load_object(int fd, const char *name, char *path, struct stat *sbp, int flags) { Obj_Entry *obj; struct statfs fs; /* * but first, make sure that environment variables haven't been * used to circumvent the noexec flag on a filesystem. */ if (dangerous_ld_env) { if (fstatfs(fd, &fs) != 0) { _rtld_error("Cannot fstatfs \"%s\"", printable_path(path)); return NULL; } if (fs.f_flags & MNT_NOEXEC) { _rtld_error("Cannot execute objects on %s\n", fs.f_mntonname); return NULL; } } dbg("loading \"%s\"", printable_path(path)); obj = map_object(fd, printable_path(path), sbp); if (obj == NULL) return NULL; /* * If DT_SONAME is present in the object, digest_dynamic2 already * added it to the object names. */ if (name != NULL) object_add_name(obj, name); obj->path = path; digest_dynamic(obj, 0); dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj->path, obj->valid_hash_sysv, obj->valid_hash_gnu, obj->dynsymcount); if (obj->z_noopen && (flags & (RTLD_LO_DLOPEN | RTLD_LO_TRACE)) == RTLD_LO_DLOPEN) { dbg("refusing to load non-loadable \"%s\"", obj->path); _rtld_error("Cannot dlopen non-loadable %s", obj->path); munmap(obj->mapbase, obj->mapsize); obj_free(obj); return (NULL); } obj->dlopened = (flags & RTLD_LO_DLOPEN) != 0; *obj_tail = obj; obj_tail = &obj->next; obj_count++; obj_loads++; linkmap_add(obj); /* for GDB & dlinfo() */ max_stack_flags |= obj->stack_flags; dbg(" %p .. %p: %s", obj->mapbase, obj->mapbase + obj->mapsize - 1, obj->path); if (obj->textrel) dbg(" WARNING: %s has impure text", obj->path); LD_UTRACE(UTRACE_LOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0, obj->path); return obj; } static Obj_Entry * obj_from_addr(const void *addr) { Obj_Entry *obj; for (obj = obj_list; obj != NULL; obj = obj->next) { if (addr < (void *) obj->mapbase) continue; if (addr < (void *) (obj->mapbase + obj->mapsize)) return obj; } return NULL; } static void preinit_main(void) { Elf_Addr *preinit_addr; int index; preinit_addr = (Elf_Addr *)obj_main->preinit_array; if (preinit_addr == NULL) return; for (index = 0; index < obj_main->preinit_array_num; index++) { if (preinit_addr[index] != 0 && preinit_addr[index] != 1) { dbg("calling preinit function for %s at %p", obj_main->path, (void *)preinit_addr[index]); LD_UTRACE(UTRACE_INIT_CALL, obj_main, (void *)preinit_addr[index], 0, 0, obj_main->path); call_init_pointer(obj_main, preinit_addr[index]); } } } /* * Call the finalization functions for each of the objects in "list" * belonging to the DAG of "root" and referenced once. If NULL "root" * is specified, every finalization function will be called regardless * of the reference count and the list elements won't be freed. All of * the objects are expected to have non-NULL fini functions. */ static void objlist_call_fini(Objlist *list, Obj_Entry *root, RtldLockState *lockstate) { Objlist_Entry *elm; char *saved_msg; Elf_Addr *fini_addr; int index; assert(root == NULL || root->refcount == 1); /* * Preserve the current error message since a fini function might * call into the dynamic linker and overwrite it. */ saved_msg = errmsg_save(); do { STAILQ_FOREACH(elm, list, link) { if (root != NULL && (elm->obj->refcount != 1 || objlist_find(&root->dagmembers, elm->obj) == NULL)) continue; /* Remove object from fini list to prevent recursive invocation. */ STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link); /* * XXX: If a dlopen() call references an object while the * fini function is in progress, we might end up trying to * unload the referenced object in dlclose() or the object * won't be unloaded although its fini function has been * called. */ lock_release(rtld_bind_lock, lockstate); /* * It is legal to have both DT_FINI and DT_FINI_ARRAY defined. * When this happens, DT_FINI_ARRAY is processed first. */ fini_addr = (Elf_Addr *)elm->obj->fini_array; if (fini_addr != NULL && elm->obj->fini_array_num > 0) { for (index = elm->obj->fini_array_num - 1; index >= 0; index--) { if (fini_addr[index] != 0 && fini_addr[index] != 1) { dbg("calling fini function for %s at %p", elm->obj->path, (void *)fini_addr[index]); LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)fini_addr[index], 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, fini_addr[index]); } } } if (elm->obj->fini != (Elf_Addr)NULL) { dbg("calling fini function for %s at %p", elm->obj->path, (void *)elm->obj->fini); LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)elm->obj->fini, 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, elm->obj->fini); } wlock_acquire(rtld_bind_lock, lockstate); /* No need to free anything if process is going down. */ if (root != NULL) free(elm); /* * We must restart the list traversal after every fini call * because a dlclose() call from the fini function or from * another thread might have modified the reference counts. */ break; } } while (elm != NULL); errmsg_restore(saved_msg); } /* * Call the initialization functions for each of the objects in * "list". All of the objects are expected to have non-NULL init * functions. */ static void objlist_call_init(Objlist *list, RtldLockState *lockstate) { Objlist_Entry *elm; Obj_Entry *obj; char *saved_msg; Elf_Addr *init_addr; int index; /* * Clean init_scanned flag so that objects can be rechecked and * possibly initialized earlier if any of vectors called below * cause the change by using dlopen. */ for (obj = obj_list; obj != NULL; obj = obj->next) obj->init_scanned = false; /* * Preserve the current error message since an init function might * call into the dynamic linker and overwrite it. */ saved_msg = errmsg_save(); STAILQ_FOREACH(elm, list, link) { if (elm->obj->init_done) /* Initialized early. */ continue; /* * Race: other thread might try to use this object before current * one completes the initilization. Not much can be done here * without better locking. */ elm->obj->init_done = true; lock_release(rtld_bind_lock, lockstate); /* * It is legal to have both DT_INIT and DT_INIT_ARRAY defined. * When this happens, DT_INIT is processed first. */ if (elm->obj->init != (Elf_Addr)NULL) { dbg("calling init function for %s at %p", elm->obj->path, (void *)elm->obj->init); LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)elm->obj->init, 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, elm->obj->init); } init_addr = (Elf_Addr *)elm->obj->init_array; if (init_addr != NULL) { for (index = 0; index < elm->obj->init_array_num; index++) { if (init_addr[index] != 0 && init_addr[index] != 1) { dbg("calling init function for %s at %p", elm->obj->path, (void *)init_addr[index]); LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)init_addr[index], 0, 0, elm->obj->path); call_init_pointer(elm->obj, init_addr[index]); } } } wlock_acquire(rtld_bind_lock, lockstate); } errmsg_restore(saved_msg); } static void objlist_clear(Objlist *list) { Objlist_Entry *elm; while (!STAILQ_EMPTY(list)) { elm = STAILQ_FIRST(list); STAILQ_REMOVE_HEAD(list, link); free(elm); } } static Objlist_Entry * objlist_find(Objlist *list, const Obj_Entry *obj) { Objlist_Entry *elm; STAILQ_FOREACH(elm, list, link) if (elm->obj == obj) return elm; return NULL; } static void objlist_init(Objlist *list) { STAILQ_INIT(list); } static void objlist_push_head(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; elm = NEW(Objlist_Entry); elm->obj = obj; STAILQ_INSERT_HEAD(list, elm, link); } static void objlist_push_tail(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; elm = NEW(Objlist_Entry); elm->obj = obj; STAILQ_INSERT_TAIL(list, elm, link); } static void objlist_put_after(Objlist *list, Obj_Entry *listobj, Obj_Entry *obj) { Objlist_Entry *elm, *listelm; STAILQ_FOREACH(listelm, list, link) { if (listelm->obj == listobj) break; } elm = NEW(Objlist_Entry); elm->obj = obj; if (listelm != NULL) STAILQ_INSERT_AFTER(list, listelm, elm, link); else STAILQ_INSERT_TAIL(list, elm, link); } static void objlist_remove(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; if ((elm = objlist_find(list, obj)) != NULL) { STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link); free(elm); } } /* * Relocate dag rooted in the specified object. * Returns 0 on success, or -1 on failure. */ static int relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { Objlist_Entry *elm; int error; error = 0; STAILQ_FOREACH(elm, &root->dagmembers, link) { error = relocate_object(elm->obj, bind_now, rtldobj, flags, lockstate); if (error == -1) break; } return (error); } /* * Relocate single object. * Returns 0 on success, or -1 on failure. */ static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { if (obj->relocated) return (0); obj->relocated = true; if (obj != rtldobj) dbg("relocating \"%s\"", obj->path); if (obj->symtab == NULL || obj->strtab == NULL || !(obj->valid_hash_sysv || obj->valid_hash_gnu)) { _rtld_error("%s: Shared object has no run-time symbol table", obj->path); return (-1); } if (obj->textrel) { /* There are relocations to the write-protected text segment. */ if (mprotect(obj->mapbase, obj->textsize, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) { _rtld_error("%s: Cannot write-enable text segment: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* Process the non-PLT non-IFUNC relocations. */ if (reloc_non_plt(obj, rtldobj, flags, lockstate)) return (-1); if (obj->textrel) { /* Re-protected the text segment. */ if (mprotect(obj->mapbase, obj->textsize, PROT_READ|PROT_EXEC) == -1) { _rtld_error("%s: Cannot write-protect text segment: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* Set the special PLT or GOT entries. */ init_pltgot(obj); /* Process the PLT relocations. */ if (reloc_plt(obj) == -1) return (-1); /* Relocate the jump slots if we are doing immediate binding. */ if (obj->bind_now || bind_now) if (reloc_jmpslots(obj, flags, lockstate) == -1) return (-1); /* * Process the non-PLT IFUNC relocations. The relocations are * processed in two phases, because IFUNC resolvers may * reference other symbols, which must be readily processed * before resolvers are called. */ if (obj->non_plt_gnu_ifunc && reloc_non_plt(obj, rtldobj, flags | SYMLOOK_IFUNC, lockstate)) return (-1); if (obj->relro_size > 0) { if (mprotect(obj->relro_page, obj->relro_size, PROT_READ) == -1) { _rtld_error("%s: Cannot enforce relro protection: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* * Set up the magic number and version in the Obj_Entry. These * were checked in the crt1.o from the original ElfKit, so we * set them for backward compatibility. */ obj->magic = RTLD_MAGIC; obj->version = RTLD_VERSION; return (0); } /* * Relocate newly-loaded shared objects. The argument is a pointer to * the Obj_Entry for the first such object. All objects from the first * to the end of the list of objects are relocated. Returns 0 on success, * or -1 on failure. */ static int relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { Obj_Entry *obj; int error; for (error = 0, obj = first; obj != NULL; obj = obj->next) { error = relocate_object(obj, bind_now, rtldobj, flags, lockstate); if (error == -1) break; } return (error); } /* * The handling of R_MACHINE_IRELATIVE relocations and jumpslots * referencing STT_GNU_IFUNC symbols is postponed till the other * relocations are done. The indirect functions specified as * ifunc are allowed to call other symbols, so we need to have * objects relocated before asking for resolution from indirects. * * The R_MACHINE_IRELATIVE slots are resolved in greedy fashion, * instead of the usual lazy handling of PLT slots. It is * consistent with how GNU does it. */ static int resolve_object_ifunc(Obj_Entry *obj, bool bind_now, int flags, RtldLockState *lockstate) { if (obj->irelative && reloc_iresolve(obj, lockstate) == -1) return (-1); if ((obj->bind_now || bind_now) && obj->gnu_ifunc && reloc_gnu_ifunc(obj, flags, lockstate) == -1) return (-1); return (0); } static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, RtldLockState *lockstate) { Obj_Entry *obj; for (obj = first; obj != NULL; obj = obj->next) { if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1) return (-1); } return (0); } static int initlist_objects_ifunc(Objlist *list, bool bind_now, int flags, RtldLockState *lockstate) { Objlist_Entry *elm; STAILQ_FOREACH(elm, list, link) { if (resolve_object_ifunc(elm->obj, bind_now, flags, lockstate) == -1) return (-1); } return (0); } /* * Cleanup procedure. It will be called (by the atexit mechanism) just * before the process exits. */ static void rtld_exit(void) { RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); dbg("rtld_exit()"); objlist_call_fini(&list_fini, NULL, &lockstate); /* No need to remove the items from the list, since we are exiting. */ if (!libmap_disable) lm_fini(); lock_release(rtld_bind_lock, &lockstate); } /* * Iterate over a search path, translate each element, and invoke the * callback on the result. */ static void * path_enumerate(const char *path, path_enum_proc callback, void *arg) { const char *trans; if (path == NULL) return (NULL); path += strspn(path, ":;"); while (*path != '\0') { size_t len; char *res; len = strcspn(path, ":;"); trans = lm_findn(NULL, path, len); if (trans) res = callback(trans, strlen(trans), arg); else res = callback(path, len, arg); if (res != NULL) return (res); path += len; path += strspn(path, ":;"); } return (NULL); } struct try_library_args { const char *name; size_t namelen; char *buffer; size_t buflen; }; static void * try_library_path(const char *dir, size_t dirlen, void *param) { struct try_library_args *arg; arg = param; if (*dir == '/' || trust) { char *pathname; if (dirlen + 1 + arg->namelen + 1 > arg->buflen) return (NULL); pathname = arg->buffer; strncpy(pathname, dir, dirlen); pathname[dirlen] = '/'; strcpy(pathname + dirlen + 1, arg->name); dbg(" Trying \"%s\"", pathname); if (access(pathname, F_OK) == 0) { /* We found it */ pathname = xmalloc(dirlen + 1 + arg->namelen + 1); strcpy(pathname, arg->buffer); return (pathname); } } return (NULL); } static char * search_library_path(const char *name, const char *path) { char *p; struct try_library_args arg; if (path == NULL) return NULL; arg.name = name; arg.namelen = strlen(name); arg.buffer = xmalloc(PATH_MAX); arg.buflen = PATH_MAX; p = path_enumerate(path, try_library_path, &arg); free(arg.buffer); return (p); } /* * Finds the library with the given name using the directory descriptors * listed in the LD_LIBRARY_PATH_FDS environment variable. * * Returns a freshly-opened close-on-exec file descriptor for the library, * or -1 if the library cannot be found. */ static char * search_library_pathfds(const char *name, const char *path, int *fdp) { char *envcopy, *fdstr, *found, *last_token; size_t len; int dirfd, fd; dbg("%s('%s', '%s', fdp)", __func__, name, path); /* Don't load from user-specified libdirs into setuid binaries. */ if (!trust) return (NULL); /* We can't do anything if LD_LIBRARY_PATH_FDS isn't set. */ if (path == NULL) return (NULL); /* LD_LIBRARY_PATH_FDS only works with relative paths. */ if (name[0] == '/') { dbg("Absolute path (%s) passed to %s", name, __func__); return (NULL); } /* * Use strtok_r() to walk the FD:FD:FD list. This requires a local * copy of the path, as strtok_r rewrites separator tokens * with '\0'. */ found = NULL; envcopy = xstrdup(path); for (fdstr = strtok_r(envcopy, ":", &last_token); fdstr != NULL; fdstr = strtok_r(NULL, ":", &last_token)) { dirfd = parse_libdir(fdstr); if (dirfd < 0) break; fd = __sys_openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_VERIFY); if (fd >= 0) { *fdp = fd; len = strlen(fdstr) + strlen(name) + 3; found = xmalloc(len); if (rtld_snprintf(found, len, "#%d/%s", dirfd, name) < 0) { _rtld_error("error generating '%d/%s'", dirfd, name); rtld_die(); } dbg("open('%s') => %d", found, fd); break; } } free(envcopy); return (found); } int dlclose(void *handle) { Obj_Entry *root; RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); root = dlcheck(handle); if (root == NULL) { lock_release(rtld_bind_lock, &lockstate); return -1; } LD_UTRACE(UTRACE_DLCLOSE_START, handle, NULL, 0, root->dl_refcount, root->path); /* Unreference the object and its dependencies. */ root->dl_refcount--; if (root->refcount == 1) { /* * The object will be no longer referenced, so we must unload it. * First, call the fini functions. */ objlist_call_fini(&list_fini, root, &lockstate); unref_dag(root); /* Finish cleaning up the newly-unreferenced objects. */ GDB_STATE(RT_DELETE,&root->linkmap); unload_object(root); GDB_STATE(RT_CONSISTENT,NULL); } else unref_dag(root); LD_UTRACE(UTRACE_DLCLOSE_STOP, handle, NULL, 0, 0, NULL); lock_release(rtld_bind_lock, &lockstate); return 0; } char * dlerror(void) { char *msg = error_message; error_message = NULL; return msg; } /* * This function is deprecated and has no effect. */ void dllockinit(void *context, void *(*lock_create)(void *context), void (*rlock_acquire)(void *lock), void (*wlock_acquire)(void *lock), void (*lock_release)(void *lock), void (*lock_destroy)(void *lock), void (*context_destroy)(void *context)) { static void *cur_context; static void (*cur_context_destroy)(void *); /* Just destroy the context from the previous call, if necessary. */ if (cur_context_destroy != NULL) cur_context_destroy(cur_context); cur_context = context; cur_context_destroy = context_destroy; } void * dlopen(const char *name, int mode) { return (rtld_dlopen(name, -1, mode)); } void * fdlopen(int fd, int mode) { return (rtld_dlopen(NULL, fd, mode)); } static void * rtld_dlopen(const char *name, int fd, int mode) { RtldLockState lockstate; int lo_flags; LD_UTRACE(UTRACE_DLOPEN_START, NULL, NULL, 0, mode, name); ld_tracing = (mode & RTLD_TRACE) == 0 ? NULL : "1"; if (ld_tracing != NULL) { rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); environ = (char **)*get_program_var_addr("environ", &lockstate); lock_release(rtld_bind_lock, &lockstate); } lo_flags = RTLD_LO_DLOPEN; if (mode & RTLD_NODELETE) lo_flags |= RTLD_LO_NODELETE; if (mode & RTLD_NOLOAD) lo_flags |= RTLD_LO_NOLOAD; if (ld_tracing != NULL) lo_flags |= RTLD_LO_TRACE; return (dlopen_object(name, fd, obj_main, lo_flags, mode & (RTLD_MODEMASK | RTLD_GLOBAL), NULL)); } static void dlopen_cleanup(Obj_Entry *obj) { obj->dl_refcount--; unref_dag(obj); if (obj->refcount == 0) unload_object(obj); } static Obj_Entry * dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate) { Obj_Entry **old_obj_tail; Obj_Entry *obj; Objlist initlist; RtldLockState mlockstate; int result; objlist_init(&initlist); if (lockstate == NULL && !(lo_flags & RTLD_LO_EARLY)) { wlock_acquire(rtld_bind_lock, &mlockstate); lockstate = &mlockstate; } GDB_STATE(RT_ADD,NULL); old_obj_tail = obj_tail; obj = NULL; if (name == NULL && fd == -1) { obj = obj_main; obj->refcount++; } else { obj = load_object(name, fd, refobj, lo_flags); } if (obj) { obj->dl_refcount++; if (mode & RTLD_GLOBAL && objlist_find(&list_global, obj) == NULL) objlist_push_tail(&list_global, obj); if (*old_obj_tail != NULL) { /* We loaded something new. */ assert(*old_obj_tail == obj); result = load_needed_objects(obj, lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY)); init_dag(obj); ref_dag(obj); if (result != -1) result = rtld_verify_versions(&obj->dagmembers); if (result != -1 && ld_tracing) goto trace; if (result == -1 || relocate_object_dag(obj, (mode & RTLD_MODEMASK) == RTLD_NOW, &obj_rtld, (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0, lockstate) == -1) { dlopen_cleanup(obj); obj = NULL; } else if (lo_flags & RTLD_LO_EARLY) { /* * Do not call the init functions for early loaded * filtees. The image is still not initialized enough * for them to work. * * Our object is found by the global object list and * will be ordered among all init calls done right * before transferring control to main. */ } else { /* Make list of init functions to call. */ initlist_add_objects(obj, &obj->next, &initlist); } /* * Process all no_delete or global objects here, given * them own DAGs to prevent their dependencies from being * unloaded. This has to be done after we have loaded all * of the dependencies, so that we do not miss any. */ if (obj != NULL) process_z(obj); } else { /* * Bump the reference counts for objects on this DAG. If * this is the first dlopen() call for the object that was * already loaded as a dependency, initialize the dag * starting at it. */ init_dag(obj); ref_dag(obj); if ((lo_flags & RTLD_LO_TRACE) != 0) goto trace; } if (obj != NULL && ((lo_flags & RTLD_LO_NODELETE) != 0 || obj->z_nodelete) && !obj->ref_nodel) { dbg("obj %s nodelete", obj->path); ref_dag(obj); obj->z_nodelete = obj->ref_nodel = true; } } LD_UTRACE(UTRACE_DLOPEN_STOP, obj, NULL, 0, obj ? obj->dl_refcount : 0, name); GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL); if (!(lo_flags & RTLD_LO_EARLY)) { map_stacks_exec(lockstate); } if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW, (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0, lockstate) == -1) { objlist_clear(&initlist); dlopen_cleanup(obj); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); return (NULL); } if (!(lo_flags & RTLD_LO_EARLY)) { /* Call the init functions. */ objlist_call_init(&initlist, lockstate); } objlist_clear(&initlist); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); return obj; trace: trace_loaded_objects(obj); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); exit(0); } static void * do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve, int flags) { DoneList donelist; const Obj_Entry *obj, *defobj; const Elf_Sym *def; SymLook req; RtldLockState lockstate; tls_index ti; void *sym; int res; def = NULL; defobj = NULL; symlook_init(&req, name); req.ventry = ve; req.flags = flags | SYMLOOK_IN_PLT; req.lockstate = &lockstate; LD_UTRACE(UTRACE_DLSYM_START, handle, NULL, 0, 0, name); rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_NEXT || handle == RTLD_DEFAULT || handle == RTLD_SELF) { if ((obj = obj_from_addr(retaddr)) == NULL) { _rtld_error("Cannot determine caller's shared object"); lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } if (handle == NULL) { /* Just the caller's shared object. */ res = symlook_obj(&req, obj); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } else if (handle == RTLD_NEXT || /* Objects after caller's */ handle == RTLD_SELF) { /* ... caller included */ if (handle == RTLD_NEXT) obj = obj->next; for (; obj != NULL; obj = obj->next) { res = symlook_obj(&req, obj); if (res == 0) { if (def == NULL || ELF_ST_BIND(req.sym_out->st_info) != STB_WEAK) { def = req.sym_out; defobj = req.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) { res = symlook_obj(&req, &obj_rtld); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } else { assert(handle == RTLD_DEFAULT); res = symlook_default(&req, obj); if (res == 0) { defobj = req.defobj_out; def = req.sym_out; } } } else { if ((obj = dlcheck(handle)) == NULL) { lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } donelist_init(&donelist); if (obj->mainprog) { /* Handle obtained by dlopen(NULL, ...) implies global scope. */ res = symlook_global(&req, &donelist); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) { res = symlook_obj(&req, &obj_rtld); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } else { /* Search the whole DAG rooted at the given object. */ res = symlook_list(&req, &obj->dagmembers, &donelist); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } if (def != NULL) { lock_release(rtld_bind_lock, &lockstate); /* * The value required by the caller is derived from the value * of the symbol. this is simply the relocated value of the * symbol. */ if (ELF_ST_TYPE(def->st_info) == STT_FUNC) sym = make_function_pointer(def, defobj); else if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) sym = rtld_resolve_ifunc(defobj, def); else if (ELF_ST_TYPE(def->st_info) == STT_TLS) { ti.ti_module = defobj->tlsindex; ti.ti_offset = def->st_value; sym = __tls_get_addr(&ti); } else sym = defobj->relocbase + def->st_value; LD_UTRACE(UTRACE_DLSYM_STOP, handle, sym, 0, 0, name); return (sym); } _rtld_error("Undefined symbol \"%s\"", name); lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } void * dlsym(void *handle, const char *name) { return do_dlsym(handle, name, __builtin_return_address(0), NULL, SYMLOOK_DLSYM); } dlfunc_t dlfunc(void *handle, const char *name) { union { void *d; dlfunc_t f; } rv; rv.d = do_dlsym(handle, name, __builtin_return_address(0), NULL, SYMLOOK_DLSYM); return (rv.f); } void * dlvsym(void *handle, const char *name, const char *version) { Ver_Entry ventry; ventry.name = version; ventry.file = NULL; ventry.hash = elf_hash(version); ventry.flags= 0; return do_dlsym(handle, name, __builtin_return_address(0), &ventry, SYMLOOK_DLSYM); } int _rtld_addr_phdr(const void *addr, struct dl_phdr_info *phdr_info) { const Obj_Entry *obj; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return (0); } rtld_fill_dl_phdr_info(obj, phdr_info); lock_release(rtld_bind_lock, &lockstate); return (1); } int dladdr(const void *addr, Dl_info *info) { const Obj_Entry *obj; const Elf_Sym *def; void *symbol_addr; unsigned long symoffset; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return 0; } info->dli_fname = obj->path; info->dli_fbase = obj->mapbase; info->dli_saddr = (void *)0; info->dli_sname = NULL; /* * Walk the symbol list looking for the symbol whose address is * closest to the address sent in. */ for (symoffset = 0; symoffset < obj->dynsymcount; symoffset++) { def = obj->symtab + symoffset; /* * For skip the symbol if st_shndx is either SHN_UNDEF or * SHN_COMMON. */ if (def->st_shndx == SHN_UNDEF || def->st_shndx == SHN_COMMON) continue; /* * If the symbol is greater than the specified address, or if it * is further away from addr than the current nearest symbol, * then reject it. */ symbol_addr = obj->relocbase + def->st_value; if (symbol_addr > addr || symbol_addr < info->dli_saddr) continue; /* Update our idea of the nearest symbol. */ info->dli_sname = obj->strtab + def->st_name; info->dli_saddr = symbol_addr; /* Exact match? */ if (info->dli_saddr == addr) break; } lock_release(rtld_bind_lock, &lockstate); return 1; } int dlinfo(void *handle, int request, void *p) { const Obj_Entry *obj; RtldLockState lockstate; int error; rlock_acquire(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_SELF) { void *retaddr; retaddr = __builtin_return_address(0); /* __GNUC__ only */ if ((obj = obj_from_addr(retaddr)) == NULL) _rtld_error("Cannot determine caller's shared object"); } else obj = dlcheck(handle); if (obj == NULL) { lock_release(rtld_bind_lock, &lockstate); return (-1); } error = 0; switch (request) { case RTLD_DI_LINKMAP: *((struct link_map const **)p) = &obj->linkmap; break; case RTLD_DI_ORIGIN: error = rtld_dirname(obj->path, p); break; case RTLD_DI_SERINFOSIZE: case RTLD_DI_SERINFO: error = do_search_info(obj, request, (struct dl_serinfo *)p); break; default: _rtld_error("Invalid request %d passed to dlinfo()", request); error = -1; } lock_release(rtld_bind_lock, &lockstate); return (error); } static void rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info) { phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase; phdr_info->dlpi_name = obj->path; phdr_info->dlpi_phdr = obj->phdr; phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]); phdr_info->dlpi_tls_modid = obj->tlsindex; phdr_info->dlpi_tls_data = obj->tlsinit; phdr_info->dlpi_adds = obj_loads; phdr_info->dlpi_subs = obj_loads - obj_count; } int dl_iterate_phdr(__dl_iterate_hdr_callback callback, void *param) { struct dl_phdr_info phdr_info; const Obj_Entry *obj; RtldLockState bind_lockstate, phdr_lockstate; int error; wlock_acquire(rtld_phdr_lock, &phdr_lockstate); rlock_acquire(rtld_bind_lock, &bind_lockstate); error = 0; for (obj = obj_list; obj != NULL; obj = obj->next) { rtld_fill_dl_phdr_info(obj, &phdr_info); if ((error = callback(&phdr_info, sizeof phdr_info, param)) != 0) break; } if (error == 0) { rtld_fill_dl_phdr_info(&obj_rtld, &phdr_info); error = callback(&phdr_info, sizeof(phdr_info), param); } lock_release(rtld_bind_lock, &bind_lockstate); lock_release(rtld_phdr_lock, &phdr_lockstate); return (error); } static void * fill_search_info(const char *dir, size_t dirlen, void *param) { struct fill_search_info_args *arg; arg = param; if (arg->request == RTLD_DI_SERINFOSIZE) { arg->serinfo->dls_cnt ++; arg->serinfo->dls_size += sizeof(struct dl_serpath) + dirlen + 1; } else { struct dl_serpath *s_entry; s_entry = arg->serpath; s_entry->dls_name = arg->strspace; s_entry->dls_flags = arg->flags; strncpy(arg->strspace, dir, dirlen); arg->strspace[dirlen] = '\0'; arg->strspace += dirlen + 1; arg->serpath++; } return (NULL); } static int do_search_info(const Obj_Entry *obj, int request, struct dl_serinfo *info) { struct dl_serinfo _info; struct fill_search_info_args args; args.request = RTLD_DI_SERINFOSIZE; args.serinfo = &_info; _info.dls_size = __offsetof(struct dl_serinfo, dls_serpath); _info.dls_cnt = 0; path_enumerate(obj->rpath, fill_search_info, &args); path_enumerate(ld_library_path, fill_search_info, &args); path_enumerate(obj->runpath, fill_search_info, &args); path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args); if (!obj->z_nodeflib) path_enumerate(ld_standard_library_path, fill_search_info, &args); if (request == RTLD_DI_SERINFOSIZE) { info->dls_size = _info.dls_size; info->dls_cnt = _info.dls_cnt; return (0); } if (info->dls_cnt != _info.dls_cnt || info->dls_size != _info.dls_size) { _rtld_error("Uninitialized Dl_serinfo struct passed to dlinfo()"); return (-1); } args.request = RTLD_DI_SERINFO; args.serinfo = info; args.serpath = &info->dls_serpath[0]; args.strspace = (char *)&info->dls_serpath[_info.dls_cnt]; args.flags = LA_SER_RUNPATH; if (path_enumerate(obj->rpath, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_LIBPATH; if (path_enumerate(ld_library_path, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_RUNPATH; if (path_enumerate(obj->runpath, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_CONFIG; if (path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_DEFAULT; if (!obj->z_nodeflib && path_enumerate(ld_standard_library_path, fill_search_info, &args) != NULL) return (-1); return (0); } static int rtld_dirname(const char *path, char *bname) { const char *endp; /* Empty or NULL string gets treated as "." */ if (path == NULL || *path == '\0') { bname[0] = '.'; bname[1] = '\0'; return (0); } /* Strip trailing slashes */ endp = path + strlen(path) - 1; while (endp > path && *endp == '/') endp--; /* Find the start of the dir */ while (endp > path && *endp != '/') endp--; /* Either the dir is "/" or there are no slashes */ if (endp == path) { bname[0] = *endp == '/' ? '/' : '.'; bname[1] = '\0'; return (0); } else { do { endp--; } while (endp > path && *endp == '/'); } if (endp - path + 2 > PATH_MAX) { _rtld_error("Filename is too long: %s", path); return(-1); } strncpy(bname, path, endp - path + 1); bname[endp - path + 1] = '\0'; return (0); } static int rtld_dirname_abs(const char *path, char *base) { char *last; if (realpath(path, base) == NULL) return (-1); dbg("%s -> %s", path, base); last = strrchr(base, '/'); if (last == NULL) return (-1); if (last != base) *last = '\0'; return (0); } static void linkmap_add(Obj_Entry *obj) { struct link_map *l = &obj->linkmap; struct link_map *prev; obj->linkmap.l_name = obj->path; obj->linkmap.l_addr = obj->mapbase; obj->linkmap.l_ld = obj->dynamic; #ifdef __mips__ /* GDB needs load offset on MIPS to use the symbols */ obj->linkmap.l_offs = obj->relocbase; #endif if (r_debug.r_map == NULL) { r_debug.r_map = l; return; } /* * Scan to the end of the list, but not past the entry for the * dynamic linker, which we want to keep at the very end. */ for (prev = r_debug.r_map; prev->l_next != NULL && prev->l_next != &obj_rtld.linkmap; prev = prev->l_next) ; /* Link in the new entry. */ l->l_prev = prev; l->l_next = prev->l_next; if (l->l_next != NULL) l->l_next->l_prev = l; prev->l_next = l; } static void linkmap_delete(Obj_Entry *obj) { struct link_map *l = &obj->linkmap; if (l->l_prev == NULL) { if ((r_debug.r_map = l->l_next) != NULL) l->l_next->l_prev = NULL; return; } if ((l->l_prev->l_next = l->l_next) != NULL) l->l_next->l_prev = l->l_prev; } /* * Function for the debugger to set a breakpoint on to gain control. * * The two parameters allow the debugger to easily find and determine * what the runtime loader is doing and to whom it is doing it. * * When the loadhook trap is hit (r_debug_state, set at program * initialization), the arguments can be found on the stack: * * +8 struct link_map *m * +4 struct r_debug *rd * +0 RetAddr */ void r_debug_state(struct r_debug* rd, struct link_map *m) { /* * The following is a hack to force the compiler to emit calls to * this function, even when optimizing. If the function is empty, * the compiler is not obliged to emit any code for calls to it, * even when marked __noinline. However, gdb depends on those * calls being made. */ __compiler_membar(); } /* * A function called after init routines have completed. This can be used to * break before a program's entry routine is called, and can be used when * main is not available in the symbol table. */ void _r_debug_postinit(struct link_map *m) { /* See r_debug_state(). */ __compiler_membar(); } /* * Get address of the pointer variable in the main program. * Prefer non-weak symbol over the weak one. */ static const void ** get_program_var_addr(const char *name, RtldLockState *lockstate) { SymLook req; DoneList donelist; symlook_init(&req, name); req.lockstate = lockstate; donelist_init(&donelist); if (symlook_global(&req, &donelist) != 0) return (NULL); if (ELF_ST_TYPE(req.sym_out->st_info) == STT_FUNC) return ((const void **)make_function_pointer(req.sym_out, req.defobj_out)); else if (ELF_ST_TYPE(req.sym_out->st_info) == STT_GNU_IFUNC) return ((const void **)rtld_resolve_ifunc(req.defobj_out, req.sym_out)); else return ((const void **)(req.defobj_out->relocbase + req.sym_out->st_value)); } /* * Set a pointer variable in the main program to the given value. This * is used to set key variables such as "environ" before any of the * init functions are called. */ static void set_program_var(const char *name, const void *value) { const void **addr; if ((addr = get_program_var_addr(name, NULL)) != NULL) { dbg("\"%s\": *%p <-- %p", name, addr, value); *addr = value; } } /* * Search the global objects, including dependencies and main object, * for the given symbol. */ static int symlook_global(SymLook *req, DoneList *donelist) { SymLook req1; const Objlist_Entry *elm; int res; symlook_init_from_req(&req1, req); /* Search all objects loaded at program start up. */ if (req->defobj_out == NULL || ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) { res = symlook_list(&req1, &list_main, donelist); if (res == 0 && (req->defobj_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } /* Search all DAGs whose roots are RTLD_GLOBAL objects. */ STAILQ_FOREACH(elm, &list_global, link) { if (req->defobj_out != NULL && ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK) break; res = symlook_list(&req1, &elm->obj->dagmembers, donelist); if (res == 0 && (req->defobj_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } return (req->sym_out != NULL ? 0 : ESRCH); } /* * Given a symbol name in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the * defining object via the reference parameter DEFOBJ_OUT. */ static int symlook_default(SymLook *req, const Obj_Entry *refobj) { DoneList donelist; const Objlist_Entry *elm; SymLook req1; int res; donelist_init(&donelist); symlook_init_from_req(&req1, req); /* Look first in the referencing object if linked symbolically. */ if (refobj->symbolic && !donelist_check(&donelist, refobj)) { res = symlook_obj(&req1, refobj); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } symlook_global(req, &donelist); /* Search all dlopened DAGs containing the referencing object. */ STAILQ_FOREACH(elm, &refobj->dldags, link) { if (req->sym_out != NULL && ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK) break; res = symlook_list(&req1, &elm->obj->dagmembers, &donelist); if (res == 0 && (req->sym_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (req->sym_out == NULL || ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) { res = symlook_obj(&req1, &obj_rtld); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } return (req->sym_out != NULL ? 0 : ESRCH); } static int symlook_list(SymLook *req, const Objlist *objlist, DoneList *dlp) { const Elf_Sym *def; const Obj_Entry *defobj; const Objlist_Entry *elm; SymLook req1; int res; def = NULL; defobj = NULL; STAILQ_FOREACH(elm, objlist, link) { if (donelist_check(dlp, elm->obj)) continue; symlook_init_from_req(&req1, req); if ((res = symlook_obj(&req1, elm->obj)) == 0) { if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) { def = req1.sym_out; defobj = req1.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } } if (def != NULL) { req->sym_out = def; req->defobj_out = defobj; return (0); } return (ESRCH); } /* * Search the chain of DAGS cointed to by the given Needed_Entry * for a symbol of the given name. Each DAG is scanned completely * before advancing to the next one. Returns a pointer to the symbol, * or NULL if no definition was found. */ static int symlook_needed(SymLook *req, const Needed_Entry *needed, DoneList *dlp) { const Elf_Sym *def; const Needed_Entry *n; const Obj_Entry *defobj; SymLook req1; int res; def = NULL; defobj = NULL; symlook_init_from_req(&req1, req); for (n = needed; n != NULL; n = n->next) { if (n->obj == NULL || (res = symlook_list(&req1, &n->obj->dagmembers, dlp)) != 0) continue; if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) { def = req1.sym_out; defobj = req1.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } if (def != NULL) { req->sym_out = def; req->defobj_out = defobj; return (0); } return (ESRCH); } /* * Search the symbol table of a single shared object for a symbol of * the given name and version, if requested. Returns a pointer to the * symbol, or NULL if no definition was found. If the object is * filter, return filtered symbol from filtee. * * The symbol's hash value is passed in for efficiency reasons; that * eliminates many recomputations of the hash value. */ int symlook_obj(SymLook *req, const Obj_Entry *obj) { DoneList donelist; SymLook req1; int flags, res, mres; /* * If there is at least one valid hash at this point, we prefer to * use the faster GNU version if available. */ if (obj->valid_hash_gnu) mres = symlook_obj1_gnu(req, obj); else if (obj->valid_hash_sysv) mres = symlook_obj1_sysv(req, obj); else return (EINVAL); if (mres == 0) { if (obj->needed_filtees != NULL) { flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0; load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate); donelist_init(&donelist); symlook_init_from_req(&req1, req); res = symlook_needed(&req1, obj->needed_filtees, &donelist); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; } return (res); } if (obj->needed_aux_filtees != NULL) { flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0; load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate); donelist_init(&donelist); symlook_init_from_req(&req1, req); res = symlook_needed(&req1, obj->needed_aux_filtees, &donelist); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; return (res); } } } return (mres); } /* Symbol match routine common to both hash functions */ static bool matched_symbol(SymLook *req, const Obj_Entry *obj, Sym_Match_Result *result, const unsigned long symnum) { Elf_Versym verndx; const Elf_Sym *symp; const char *strp; symp = obj->symtab + symnum; strp = obj->strtab + symp->st_name; switch (ELF_ST_TYPE(symp->st_info)) { case STT_FUNC: case STT_NOTYPE: case STT_OBJECT: case STT_COMMON: case STT_GNU_IFUNC: if (symp->st_value == 0) return (false); /* fallthrough */ case STT_TLS: if (symp->st_shndx != SHN_UNDEF) break; #ifndef __mips__ else if (((req->flags & SYMLOOK_IN_PLT) == 0) && (ELF_ST_TYPE(symp->st_info) == STT_FUNC)) break; /* fallthrough */ #endif default: return (false); } if (req->name[0] != strp[0] || strcmp(req->name, strp) != 0) return (false); if (req->ventry == NULL) { if (obj->versyms != NULL) { verndx = VER_NDX(obj->versyms[symnum]); if (verndx > obj->vernum) { _rtld_error( "%s: symbol %s references wrong version %d", obj->path, obj->strtab + symnum, verndx); return (false); } /* * If we are not called from dlsym (i.e. this * is a normal relocation from unversioned * binary), accept the symbol immediately if * it happens to have first version after this * shared object became versioned. Otherwise, * if symbol is versioned and not hidden, * remember it. If it is the only symbol with * this name exported by the shared object, it * will be returned as a match by the calling * function. If symbol is global (verndx < 2) * accept it unconditionally. */ if ((req->flags & SYMLOOK_DLSYM) == 0 && verndx == VER_NDX_GIVEN) { result->sym_out = symp; return (true); } else if (verndx >= VER_NDX_GIVEN) { if ((obj->versyms[symnum] & VER_NDX_HIDDEN) == 0) { if (result->vsymp == NULL) result->vsymp = symp; result->vcount++; } return (false); } } result->sym_out = symp; return (true); } if (obj->versyms == NULL) { if (object_match_name(obj, req->ventry->name)) { _rtld_error("%s: object %s should provide version %s " "for symbol %s", obj_rtld.path, obj->path, req->ventry->name, obj->strtab + symnum); return (false); } } else { verndx = VER_NDX(obj->versyms[symnum]); if (verndx > obj->vernum) { _rtld_error("%s: symbol %s references wrong version %d", obj->path, obj->strtab + symnum, verndx); return (false); } if (obj->vertab[verndx].hash != req->ventry->hash || strcmp(obj->vertab[verndx].name, req->ventry->name)) { /* * Version does not match. Look if this is a * global symbol and if it is not hidden. If * global symbol (verndx < 2) is available, * use it. Do not return symbol if we are * called by dlvsym, because dlvsym looks for * a specific version and default one is not * what dlvsym wants. */ if ((req->flags & SYMLOOK_DLSYM) || (verndx >= VER_NDX_GIVEN) || (obj->versyms[symnum] & VER_NDX_HIDDEN)) return (false); } } result->sym_out = symp; return (true); } /* * Search for symbol using SysV hash function. * obj->buckets is known not to be NULL at this point; the test for this was * performed with the obj->valid_hash_sysv assignment. */ static int symlook_obj1_sysv(SymLook *req, const Obj_Entry *obj) { unsigned long symnum; Sym_Match_Result matchres; matchres.sym_out = NULL; matchres.vsymp = NULL; matchres.vcount = 0; for (symnum = obj->buckets[req->hash % obj->nbuckets]; symnum != STN_UNDEF; symnum = obj->chains[symnum]) { if (symnum >= obj->nchains) return (ESRCH); /* Bad object */ if (matched_symbol(req, obj, &matchres, symnum)) { req->sym_out = matchres.sym_out; req->defobj_out = obj; return (0); } } if (matchres.vcount == 1) { req->sym_out = matchres.vsymp; req->defobj_out = obj; return (0); } return (ESRCH); } /* Search for symbol using GNU hash function */ static int symlook_obj1_gnu(SymLook *req, const Obj_Entry *obj) { Elf_Addr bloom_word; const Elf32_Word *hashval; Elf32_Word bucket; Sym_Match_Result matchres; unsigned int h1, h2; unsigned long symnum; matchres.sym_out = NULL; matchres.vsymp = NULL; matchres.vcount = 0; /* Pick right bitmask word from Bloom filter array */ bloom_word = obj->bloom_gnu[(req->hash_gnu / __ELF_WORD_SIZE) & obj->maskwords_bm_gnu]; /* Calculate modulus word size of gnu hash and its derivative */ h1 = req->hash_gnu & (__ELF_WORD_SIZE - 1); h2 = ((req->hash_gnu >> obj->shift2_gnu) & (__ELF_WORD_SIZE - 1)); /* Filter out the "definitely not in set" queries */ if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0) return (ESRCH); /* Locate hash chain and corresponding value element*/ bucket = obj->buckets_gnu[req->hash_gnu % obj->nbuckets_gnu]; if (bucket == 0) return (ESRCH); hashval = &obj->chain_zero_gnu[bucket]; do { if (((*hashval ^ req->hash_gnu) >> 1) == 0) { symnum = hashval - obj->chain_zero_gnu; if (matched_symbol(req, obj, &matchres, symnum)) { req->sym_out = matchres.sym_out; req->defobj_out = obj; return (0); } } } while ((*hashval++ & 1) == 0); if (matchres.vcount == 1) { req->sym_out = matchres.vsymp; req->defobj_out = obj; return (0); } return (ESRCH); } static void trace_loaded_objects(Obj_Entry *obj) { char *fmt1, *fmt2, *fmt, *main_local, *list_containers; int c; if ((main_local = getenv(_LD("TRACE_LOADED_OBJECTS_PROGNAME"))) == NULL) main_local = ""; if ((fmt1 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT1"))) == NULL) fmt1 = "\t%o => %p (%x)\n"; if ((fmt2 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT2"))) == NULL) fmt2 = "\t%o (%x)\n"; list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL")); for (; obj; obj = obj->next) { Needed_Entry *needed; char *name, *path; bool is_lib; if (list_containers && obj->needed != NULL) rtld_printf("%s:\n", obj->path); for (needed = obj->needed; needed; needed = needed->next) { if (needed->obj != NULL) { if (needed->obj->traced && !list_containers) continue; needed->obj->traced = true; path = needed->obj->path; } else path = "not found"; name = (char *)obj->strtab + needed->name; is_lib = strncmp(name, "lib", 3) == 0; /* XXX - bogus */ fmt = is_lib ? fmt1 : fmt2; while ((c = *fmt++) != '\0') { switch (c) { default: rtld_putchar(c); continue; case '\\': switch (c = *fmt) { case '\0': continue; case 'n': rtld_putchar('\n'); break; case 't': rtld_putchar('\t'); break; } break; case '%': switch (c = *fmt) { case '\0': continue; case '%': default: rtld_putchar(c); break; case 'A': rtld_putstr(main_local); break; case 'a': rtld_putstr(obj_main->path); break; case 'o': rtld_putstr(name); break; #if 0 case 'm': rtld_printf("%d", sodp->sod_major); break; case 'n': rtld_printf("%d", sodp->sod_minor); break; #endif case 'p': rtld_putstr(path); break; case 'x': rtld_printf("%p", needed->obj ? needed->obj->mapbase : 0); break; } break; } ++fmt; } } } } /* * Unload a dlopened object and its dependencies from memory and from * our data structures. It is assumed that the DAG rooted in the * object has already been unreferenced, and that the object has a * reference count of 0. */ static void unload_object(Obj_Entry *root) { Obj_Entry *obj; Obj_Entry **linkp; assert(root->refcount == 0); /* * Pass over the DAG removing unreferenced objects from * appropriate lists. */ unlink_object(root); /* Unmap all objects that are no longer referenced. */ linkp = &obj_list->next; while ((obj = *linkp) != NULL) { if (obj->refcount == 0) { LD_UTRACE(UTRACE_UNLOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0, obj->path); dbg("unloading \"%s\"", obj->path); unload_filtees(root); munmap(obj->mapbase, obj->mapsize); linkmap_delete(obj); *linkp = obj->next; obj_count--; obj_free(obj); } else linkp = &obj->next; } obj_tail = linkp; } static void unlink_object(Obj_Entry *root) { Objlist_Entry *elm; if (root->refcount == 0) { /* Remove the object from the RTLD_GLOBAL list. */ objlist_remove(&list_global, root); /* Remove the object from all objects' DAG lists. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { objlist_remove(&elm->obj->dldags, root); if (elm->obj != root) unlink_object(elm->obj); } } } static void ref_dag(Obj_Entry *root) { Objlist_Entry *elm; assert(root->dag_inited); STAILQ_FOREACH(elm, &root->dagmembers, link) elm->obj->refcount++; } static void unref_dag(Obj_Entry *root) { Objlist_Entry *elm; assert(root->dag_inited); STAILQ_FOREACH(elm, &root->dagmembers, link) elm->obj->refcount--; } /* * Common code for MD __tls_get_addr(). */ static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline; static void * tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset) { Elf_Addr *newdtv, *dtv; RtldLockState lockstate; int to_copy; dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (dtv[0] != tls_dtv_generation) { wlock_acquire(rtld_bind_lock, &lockstate); newdtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); to_copy = dtv[1]; if (to_copy > tls_max_index) to_copy = tls_max_index; memcpy(&newdtv[2], &dtv[2], to_copy * sizeof(Elf_Addr)); newdtv[0] = tls_dtv_generation; newdtv[1] = tls_max_index; free(dtv); lock_release(rtld_bind_lock, &lockstate); dtv = *dtvp = newdtv; } /* Dynamically allocate module TLS if necessary */ if (dtv[index + 1] == 0) { /* Signal safe, wlock will block out signals. */ wlock_acquire(rtld_bind_lock, &lockstate); if (!dtv[index + 1]) dtv[index + 1] = (Elf_Addr)allocate_module_tls(index); lock_release(rtld_bind_lock, &lockstate); } return ((void *)(dtv[index + 1] + offset)); } void * tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset) { Elf_Addr *dtv; dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (__predict_true(dtv[0] == tls_dtv_generation && dtv[index + 1] != 0)) return ((void *)(dtv[index + 1] + offset)); return (tls_get_addr_slow(dtvp, index, offset)); } #if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || \ defined(__powerpc__) || defined(__riscv__) /* * Allocate Static TLS using the Variant I method. */ void * allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; char *tcb; Elf_Addr **tls; Elf_Addr *dtv; Elf_Addr addr; int i; if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); assert(tcbsize >= TLS_TCB_SIZE); tcb = xcalloc(1, tls_static_space - TLS_TCB_SIZE + tcbsize); tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE); if (oldtcb != NULL) { memcpy(tls, oldtcb, tls_static_space); free(oldtcb); /* Adjust the DTV. */ dtv = tls[0]; for (i = 0; i < dtv[1]; i++) { if (dtv[i+2] >= (Elf_Addr)oldtcb && dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) { dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tls; } } } else { dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); tls[0] = dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; for (obj = objs; obj; obj = obj->next) { if (obj->tlsoffset > 0) { addr = (Elf_Addr)tls + obj->tlsoffset; if (obj->tlsinitsize > 0) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); if (obj->tlssize > obj->tlsinitsize) memset((void*) (addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); dtv[obj->tlsindex + 1] = addr; } } } return (tcb); } void free_tls(void *tcb, size_t tcbsize, size_t tcbalign) { Elf_Addr *dtv; Elf_Addr tlsstart, tlsend; int dtvsize, i; assert(tcbsize >= TLS_TCB_SIZE); tlsstart = (Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE; tlsend = tlsstart + tls_static_space; dtv = *(Elf_Addr **)tlsstart; dtvsize = dtv[1]; for (i = 0; i < dtvsize; i++) { if (dtv[i+2] && (dtv[i+2] < tlsstart || dtv[i+2] >= tlsend)) { free((void*)dtv[i+2]); } } free(dtv); free(tcb); } #endif #if defined(__i386__) || defined(__amd64__) || defined(__sparc64__) /* * Allocate Static TLS using the Variant II method. */ void * allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; size_t size, ralign; char *tls; Elf_Addr *dtv, *olddtv; Elf_Addr segbase, oldsegbase, addr; int i; ralign = tcbalign; if (tls_static_max_align > ralign) ralign = tls_static_max_align; size = round(tls_static_space, ralign) + round(tcbsize, ralign); assert(tcbsize >= 2*sizeof(Elf_Addr)); tls = malloc_aligned(size, ralign); dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); segbase = (Elf_Addr)(tls + round(tls_static_space, ralign)); ((Elf_Addr*)segbase)[0] = segbase; ((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; if (oldtls) { /* * Copy the static TLS block over whole. */ oldsegbase = (Elf_Addr) oldtls; memcpy((void *)(segbase - tls_static_space), (const void *)(oldsegbase - tls_static_space), tls_static_space); /* * If any dynamic TLS blocks have been created tls_get_addr(), * move them over. */ olddtv = ((Elf_Addr**)oldsegbase)[1]; for (i = 0; i < olddtv[1]; i++) { if (olddtv[i+2] < oldsegbase - size || olddtv[i+2] > oldsegbase) { dtv[i+2] = olddtv[i+2]; olddtv[i+2] = 0; } } /* * We assume that this block was the one we created with * allocate_initial_tls(). */ free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr)); } else { for (obj = objs; obj; obj = obj->next) { if (obj->tlsoffset) { addr = segbase - obj->tlsoffset; memset((void*) (addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); if (obj->tlsinit) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); dtv[obj->tlsindex + 1] = addr; } } } return (void*) segbase; } void free_tls(void *tls, size_t tcbsize, size_t tcbalign) { Elf_Addr* dtv; size_t size, ralign; int dtvsize, i; Elf_Addr tlsstart, tlsend; /* * Figure out the size of the initial TLS block so that we can * find stuff which ___tls_get_addr() allocated dynamically. */ ralign = tcbalign; if (tls_static_max_align > ralign) ralign = tls_static_max_align; size = round(tls_static_space, ralign); dtv = ((Elf_Addr**)tls)[1]; dtvsize = dtv[1]; tlsend = (Elf_Addr) tls; tlsstart = tlsend - size; for (i = 0; i < dtvsize; i++) { if (dtv[i + 2] != 0 && (dtv[i + 2] < tlsstart || dtv[i + 2] > tlsend)) { free_aligned((void *)dtv[i + 2]); } } free_aligned((void *)tlsstart); free((void*) dtv); } #endif /* * Allocate TLS block for module with given index. */ void * allocate_module_tls(int index) { Obj_Entry* obj; char* p; for (obj = obj_list; obj; obj = obj->next) { if (obj->tlsindex == index) break; } if (!obj) { _rtld_error("Can't find module with TLS index %d", index); rtld_die(); } p = malloc_aligned(obj->tlssize, obj->tlsalign); memcpy(p, obj->tlsinit, obj->tlsinitsize); memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); return p; } bool allocate_tls_offset(Obj_Entry *obj) { size_t off; if (obj->tls_done) return true; if (obj->tlssize == 0) { obj->tls_done = true; return true; } if (tls_last_offset == 0) off = calculate_first_tls_offset(obj->tlssize, obj->tlsalign); else off = calculate_tls_offset(tls_last_offset, tls_last_size, obj->tlssize, obj->tlsalign); /* * If we have already fixed the size of the static TLS block, we * must stay within that size. When allocating the static TLS, we * leave a small amount of space spare to be used for dynamically * loading modules which use static TLS. */ if (tls_static_space != 0) { if (calculate_tls_end(off, obj->tlssize) > tls_static_space) return false; } else if (obj->tlsalign > tls_static_max_align) { tls_static_max_align = obj->tlsalign; } tls_last_offset = obj->tlsoffset = off; tls_last_size = obj->tlssize; obj->tls_done = true; return true; } void free_tls_offset(Obj_Entry *obj) { /* * If we were the last thing to allocate out of the static TLS * block, we give our space back to the 'allocator'. This is a * simplistic workaround to allow libGL.so.1 to be loaded and * unloaded multiple times. */ if (calculate_tls_end(obj->tlsoffset, obj->tlssize) == calculate_tls_end(tls_last_offset, tls_last_size)) { tls_last_offset -= obj->tlssize; tls_last_size = 0; } } void * _rtld_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign) { void *ret; RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); ret = allocate_tls(obj_list, oldtls, tcbsize, tcbalign); lock_release(rtld_bind_lock, &lockstate); return (ret); } void _rtld_free_tls(void *tcb, size_t tcbsize, size_t tcbalign) { RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); free_tls(tcb, tcbsize, tcbalign); lock_release(rtld_bind_lock, &lockstate); } static void object_add_name(Obj_Entry *obj, const char *name) { Name_Entry *entry; size_t len; len = strlen(name); entry = malloc(sizeof(Name_Entry) + len); if (entry != NULL) { strcpy(entry->name, name); STAILQ_INSERT_TAIL(&obj->names, entry, link); } } static int object_match_name(const Obj_Entry *obj, const char *name) { Name_Entry *entry; STAILQ_FOREACH(entry, &obj->names, link) { if (strcmp(name, entry->name) == 0) return (1); } return (0); } static Obj_Entry * locate_dependency(const Obj_Entry *obj, const char *name) { const Objlist_Entry *entry; const Needed_Entry *needed; STAILQ_FOREACH(entry, &list_main, link) { if (object_match_name(entry->obj, name)) return entry->obj; } for (needed = obj->needed; needed != NULL; needed = needed->next) { if (strcmp(obj->strtab + needed->name, name) == 0 || (needed->obj != NULL && object_match_name(needed->obj, name))) { /* * If there is DT_NEEDED for the name we are looking for, * we are all set. Note that object might not be found if * dependency was not loaded yet, so the function can * return NULL here. This is expected and handled * properly by the caller. */ return (needed->obj); } } _rtld_error("%s: Unexpected inconsistency: dependency %s not found", obj->path, name); rtld_die(); } static int check_object_provided_version(Obj_Entry *refobj, const Obj_Entry *depobj, const Elf_Vernaux *vna) { const Elf_Verdef *vd; const char *vername; vername = refobj->strtab + vna->vna_name; vd = depobj->verdef; if (vd == NULL) { _rtld_error("%s: version %s required by %s not defined", depobj->path, vername, refobj->path); return (-1); } for (;;) { if (vd->vd_version != VER_DEF_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verdef entry", depobj->path, vd->vd_version); return (-1); } if (vna->vna_hash == vd->vd_hash) { const Elf_Verdaux *aux = (const Elf_Verdaux *) ((char *)vd + vd->vd_aux); if (strcmp(vername, depobj->strtab + aux->vda_name) == 0) return (0); } if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } if (vna->vna_flags & VER_FLG_WEAK) return (0); _rtld_error("%s: version %s required by %s not found", depobj->path, vername, refobj->path); return (-1); } static int rtld_verify_object_versions(Obj_Entry *obj) { const Elf_Verneed *vn; const Elf_Verdef *vd; const Elf_Verdaux *vda; const Elf_Vernaux *vna; const Obj_Entry *depobj; int maxvernum, vernum; if (obj->ver_checked) return (0); obj->ver_checked = true; maxvernum = 0; /* * Walk over defined and required version records and figure out * max index used by any of them. Do very basic sanity checking * while there. */ vn = obj->verneed; while (vn != NULL) { if (vn->vn_version != VER_NEED_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verneed entry", obj->path, vn->vn_version); return (-1); } vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux); for (;;) { vernum = VER_NEED_IDX(vna->vna_other); if (vernum > maxvernum) maxvernum = vernum; if (vna->vna_next == 0) break; vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next); } if (vn->vn_next == 0) break; vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next); } vd = obj->verdef; while (vd != NULL) { if (vd->vd_version != VER_DEF_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verdef entry", obj->path, vd->vd_version); return (-1); } vernum = VER_DEF_IDX(vd->vd_ndx); if (vernum > maxvernum) maxvernum = vernum; if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } if (maxvernum == 0) return (0); /* * Store version information in array indexable by version index. * Verify that object version requirements are satisfied along the * way. */ obj->vernum = maxvernum + 1; obj->vertab = xcalloc(obj->vernum, sizeof(Ver_Entry)); vd = obj->verdef; while (vd != NULL) { if ((vd->vd_flags & VER_FLG_BASE) == 0) { vernum = VER_DEF_IDX(vd->vd_ndx); assert(vernum <= maxvernum); vda = (const Elf_Verdaux *)((char *)vd + vd->vd_aux); obj->vertab[vernum].hash = vd->vd_hash; obj->vertab[vernum].name = obj->strtab + vda->vda_name; obj->vertab[vernum].file = NULL; obj->vertab[vernum].flags = 0; } if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } vn = obj->verneed; while (vn != NULL) { depobj = locate_dependency(obj, obj->strtab + vn->vn_file); if (depobj == NULL) return (-1); vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux); for (;;) { if (check_object_provided_version(obj, depobj, vna)) return (-1); vernum = VER_NEED_IDX(vna->vna_other); assert(vernum <= maxvernum); obj->vertab[vernum].hash = vna->vna_hash; obj->vertab[vernum].name = obj->strtab + vna->vna_name; obj->vertab[vernum].file = obj->strtab + vn->vn_file; obj->vertab[vernum].flags = (vna->vna_other & VER_NEED_HIDDEN) ? VER_INFO_HIDDEN : 0; if (vna->vna_next == 0) break; vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next); } if (vn->vn_next == 0) break; vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next); } return 0; } static int rtld_verify_versions(const Objlist *objlist) { Objlist_Entry *entry; int rc; rc = 0; STAILQ_FOREACH(entry, objlist, link) { /* * Skip dummy objects or objects that have their version requirements * already checked. */ if (entry->obj->strtab == NULL || entry->obj->vertab != NULL) continue; if (rtld_verify_object_versions(entry->obj) == -1) { rc = -1; if (ld_tracing == NULL) break; } } if (rc == 0 || ld_tracing != NULL) rc = rtld_verify_object_versions(&obj_rtld); return rc; } const Ver_Entry * fetch_ventry(const Obj_Entry *obj, unsigned long symnum) { Elf_Versym vernum; if (obj->vertab) { vernum = VER_NDX(obj->versyms[symnum]); if (vernum >= obj->vernum) { _rtld_error("%s: symbol %s has wrong verneed value %d", obj->path, obj->strtab + symnum, vernum); } else if (obj->vertab[vernum].hash != 0) { return &obj->vertab[vernum]; } } return NULL; } int _rtld_get_stack_prot(void) { return (stack_prot); } int _rtld_is_dlopened(void *arg) { Obj_Entry *obj; RtldLockState lockstate; int res; rlock_acquire(rtld_bind_lock, &lockstate); obj = dlcheck(arg); if (obj == NULL) obj = obj_from_addr(arg); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return (-1); } res = obj->dlopened ? 1 : 0; lock_release(rtld_bind_lock, &lockstate); return (res); } static void map_stacks_exec(RtldLockState *lockstate) { void (*thr_map_stacks_exec)(void); if ((max_stack_flags & PF_X) == 0 || (stack_prot & PROT_EXEC) != 0) return; thr_map_stacks_exec = (void (*)(void))(uintptr_t) get_program_var_addr("__pthread_map_stacks_exec", lockstate); if (thr_map_stacks_exec != NULL) { stack_prot |= PROT_EXEC; thr_map_stacks_exec(); } } void symlook_init(SymLook *dst, const char *name) { bzero(dst, sizeof(*dst)); dst->name = name; dst->hash = elf_hash(name); dst->hash_gnu = gnu_hash(name); } static void symlook_init_from_req(SymLook *dst, const SymLook *src) { dst->name = src->name; dst->hash = src->hash; dst->hash_gnu = src->hash_gnu; dst->ventry = src->ventry; dst->flags = src->flags; dst->defobj_out = NULL; dst->sym_out = NULL; dst->lockstate = src->lockstate; } /* * Parse a file descriptor number without pulling in more of libc (e.g. atoi). */ static int parse_libdir(const char *str) { static const int RADIX = 10; /* XXXJA: possibly support hex? */ const char *orig; int fd; char c; orig = str; fd = 0; for (c = *str; c != '\0'; c = *++str) { if (c < '0' || c > '9') return (-1); fd *= RADIX; fd += c - '0'; } /* Make sure we actually parsed something. */ if (str == orig) { _rtld_error("failed to parse directory FD from '%s'", str); return (-1); } return (fd); } /* * Overrides for libc_pic-provided functions. */ int __getosreldate(void) { size_t len; int oid[2]; int error, osrel; if (osreldate != 0) return (osreldate); oid[0] = CTL_KERN; oid[1] = KERN_OSRELDATE; osrel = 0; len = sizeof(osrel); error = sysctl(oid, 2, &osrel, &len, NULL, 0); if (error == 0 && osrel > 0 && len == sizeof(osrel)) osreldate = osrel; return (osreldate); } void exit(int status) { _exit(status); } void (*__cleanup)(void); int __isthreaded = 0; int _thread_autoinit_dummy_decl = 1; /* * No unresolved symbols for rtld. */ void __pthread_cxa_finalize(struct dl_phdr_info *a) { } void __stack_chk_fail(void) { _rtld_error("stack overflow detected; terminated"); rtld_die(); } __weak_reference(__stack_chk_fail, __stack_chk_fail_local); void __chk_fail(void) { _rtld_error("buffer overflow detected; terminated"); rtld_die(); } const char * rtld_strerror(int errnum) { if (errnum < 0 || errnum >= sys_nerr) return ("Unknown error"); return (sys_errlist[errnum]); } diff --git a/libexec/rtld-elf/sparc64/rtld_machdep.h b/libexec/rtld-elf/sparc64/rtld_machdep.h index 44fe2cf9b757..9df63b8bb1f3 100644 --- a/libexec/rtld-elf/sparc64/rtld_machdep.h +++ b/libexec/rtld-elf/sparc64/rtld_machdep.h @@ -1,74 +1,76 @@ /*- * Copyright (c) 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_MACHDEP_H #define RTLD_MACHDEP_H 1 #include #include struct Struct_Obj_Entry; /* Return the address of the .dynamic section in the dynamic linker. */ Elf_Dyn *rtld_dynamic_addr(void); #define rtld_dynamic(obj) rtld_dynamic_addr() #define RTLD_IS_DYNAMIC() (rtld_dynamic_addr() != NULL) Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, const struct Struct_Obj_Entry *, const struct Struct_Obj_Entry *, const Elf_Rel *); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) #define call_initfini_pointer(obj, target) \ (((InitFunc)(target))()) #define call_init_pointer(obj, target) \ (((InitArrFunc)(target))(main_argc, main_argv, environ)) #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ round(size, align) #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round((prev_offset) + (size), align) #define calculate_tls_end(off, size) ((off) + (size)) typedef struct { unsigned long ti_module; unsigned long ti_offset; } tls_index; extern void *__tls_get_addr(tls_index *ti); #define RTLD_DEFAULT_STACK_PF_EXEC 0 #define RTLD_DEFAULT_STACK_EXEC 0 +#define md_abi_variant_hook(x) + #endif diff --git a/release/Makefile b/release/Makefile index d9dbb4f80c20..e3a233d355d2 100644 --- a/release/Makefile +++ b/release/Makefile @@ -1,306 +1,306 @@ # $FreeBSD$ # # Makefile for building releases and release media. # # User-driven targets: # cdrom: Builds release CD-ROM media (disc1.iso) # dvdrom: Builds release DVD-ROM media (dvd1.iso) # memstick: Builds memory stick image (memstick.img) # mini-memstick: Builds minimal memory stick image (mini-memstick.img) # ftp: Sets up FTP distribution area (ftp) # release: Invokes real-release, vm-release, and cloudware-release targets # real-release: Build all media and FTP distribution area # vm-release: Build all virtual machine image targets # cloudware-release: Build all cloud hosting provider targets # install: Invokes the release-install and vm-install targets # release-install: Copies all release installation media into ${DESTDIR} # vm-install: Copies all virtual machine images into ${DESTDIR} # # Variables affecting the build process: # WORLDDIR: location of src tree -- must have built world and default kernel # (by default, the directory above this one) # PORTSDIR: location of ports tree to distribute (default: /usr/ports) # DOCDIR: location of doc tree (default: /usr/doc) # XTRADIR: xtra-bits-dir argument for /mkisoimages.sh # NOPKG: if set, do not distribute third-party packages # NOPORTS: if set, do not distribute ports tree # NOSRC: if set, do not distribute source tree # NODOC: if set, do not generate release documentation # WITH_DVD: if set, generate dvd1.iso # WITH_COMPRESSED_IMAGES: if set, compress installation images with xz(1) # (uncompressed images are not removed) # WITH_VMIMAGES: if set, build virtual machine images with the release # WITH_COMPRESSED_VMIMAGES: if set, compress virtual machine disk images # with xz(1) (extremely time consuming) # WITH_CLOUDWARE: if set, build cloud hosting disk images with the release # TARGET/TARGET_ARCH: architecture of built release # WORLDDIR?= ${.CURDIR}/.. PORTSDIR?= /usr/ports DOCDIR?= /usr/doc RELNOTES_LANG?= en_US.ISO8859-1 .if !defined(TARGET) || empty(TARGET) TARGET= ${MACHINE} .endif .if !defined(TARGET_ARCH) || empty(TARGET_ARCH) .if ${TARGET} == ${MACHINE} TARGET_ARCH= ${MACHINE_ARCH} .else TARGET_ARCH= ${TARGET} .endif .endif IMAKE= ${MAKE} TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET} DISTDIR= dist # Define OSRELEASE by using newvars.sh .if !defined(OSRELEASE) || empty(OSRELEASE) .for _V in TYPE BRANCH REVISION ${_V}!= eval $$(awk '/^${_V}=/{print}' ${.CURDIR}/../sys/conf/newvers.sh); echo $$${_V} .endfor .for _V in ${TARGET_ARCH} .if !empty(TARGET:M${_V}) OSRELEASE= ${TYPE}-${REVISION}-${BRANCH}-${TARGET} VOLUME_LABEL= ${REVISION:C/[.-]/_/g}_${BRANCH:C/[.-]/_/g}_${TARGET} .else OSRELEASE= ${TYPE}-${REVISION}-${BRANCH}-${TARGET}-${TARGET_ARCH} VOLUME_LABEL= ${REVISION:C/[.-]/_/g}_${BRANCH:C/[.-]/_/g}_${TARGET_ARCH} .endif .endfor .endif .if !defined(VOLUME_LABEL) || empty(VOLUME_LABEL) VOLUME_LABEL= FreeBSD_Install .endif .if !exists(${DOCDIR}) NODOC= true .endif .if !exists(${PORTSDIR}) NOPORTS= true .endif EXTRA_PACKAGES= .if !defined(NOPORTS) EXTRA_PACKAGES+= ports.txz .endif .if !defined(NOSRC) EXTRA_PACKAGES+= src.txz .endif .if !defined(NODOC) EXTRA_PACKAGES+= reldoc .endif RELEASE_TARGETS= ftp IMAGES= .if exists(${.CURDIR}/${TARGET}/mkisoimages.sh) RELEASE_TARGETS+= cdrom IMAGES+= disc1.iso bootonly.iso . if defined(WITH_DVD) && !empty(WITH_DVD) RELEASE_TARGETS+= dvdrom IMAGES+= dvd1.iso . endif .endif .if exists(${.CURDIR}/${TARGET}/make-memstick.sh) RELEASE_TARGETS+= memstick.img RELEASE_TARGETS+= mini-memstick.img IMAGES+= memstick.img IMAGES+= mini-memstick.img .endif CLEANFILES= packagesystem *.txz MANIFEST release ${IMAGES} .if defined(WITH_COMPRESSED_IMAGES) && !empty(WITH_COMPRESSED_IMAGES) . for I in ${IMAGES} CLEANFILES+= ${I}.xz . endfor .endif .if defined(WITH_DVD) && !empty(WITH_DVD) CLEANFILES+= pkg-stage .endif CLEANDIRS= dist ftp disc1 bootonly dvd beforeclean: chflags -R noschg . .include clean: beforeclean base.txz: mkdir -p ${DISTDIR} cd ${WORLDDIR} && ${IMAKE} distributeworld DISTDIR=${.OBJDIR}/${DISTDIR} # Set up mergemaster root database sh ${.CURDIR}/scripts/mm-mtree.sh -m ${WORLDDIR} -F \ "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" -D "${.OBJDIR}/${DISTDIR}/base" etcupdate extract -B -M "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" \ -s ${WORLDDIR} -d "${.OBJDIR}/${DISTDIR}/base/var/db/etcupdate" # Package all components cd ${WORLDDIR} && ${IMAKE} packageworld DISTDIR=${.OBJDIR}/${DISTDIR} mv ${DISTDIR}/*.txz . kernel.txz: mkdir -p ${DISTDIR} cd ${WORLDDIR} && ${IMAKE} distributekernel packagekernel DISTDIR=${.OBJDIR}/${DISTDIR} mv ${DISTDIR}/kernel*.txz . src.txz: mkdir -p ${DISTDIR}/usr ln -fs ${WORLDDIR} ${DISTDIR}/usr/src cd ${DISTDIR} && tar cLvf - --exclude .svn --exclude .zfs \ --exclude .git --exclude @ --exclude usr/src/release/dist usr/src | \ ${XZ_CMD} > ${.OBJDIR}/src.txz ports.txz: mkdir -p ${DISTDIR}/usr ln -fs ${PORTSDIR} ${DISTDIR}/usr/ports cd ${DISTDIR} && tar cLvf - \ --exclude .git --exclude .svn \ --exclude usr/ports/distfiles --exclude usr/ports/packages \ --exclude 'usr/ports/INDEX*' --exclude work usr/ports | \ ${XZ_CMD} > ${.OBJDIR}/ports.txz reldoc: cd ${.CURDIR}/doc && ${MAKE} all install clean 'FORMATS=html txt' \ INSTALL_COMPRESSED='' URLS_ABSOLUTE=YES DOCDIR=${.OBJDIR}/rdoc mkdir -p reldoc .for i in hardware readme relnotes errata ln -f rdoc/${RELNOTES_LANG}/${i}/article.txt reldoc/${i:tu}.TXT ln -f rdoc/${RELNOTES_LANG}/${i}/article.html reldoc/${i:tu}.HTM .endfor cp rdoc/${RELNOTES_LANG}/readme/docbook.css reldoc disc1: packagesystem # Install system mkdir -p ${.TARGET} cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \ DESTDIR=${.OBJDIR}/${.TARGET} MK_RESCUE=no MK_KERNEL_SYMBOLS=no \ MK_PROFILE=no MK_SENDMAIL=no MK_TESTS=no MK_LIB32=no \ MK_DEBUG_FILES=no # Copy distfiles mkdir -p ${.TARGET}/usr/freebsd-dist for dist in MANIFEST $$(ls *.txz | grep -v -- '-dbg'); \ do cp $${dist} ${.TARGET}/usr/freebsd-dist; \ done # Copy documentation, if generated .if !defined(NODOC) cp reldoc/* ${.TARGET} .endif # Set up installation environment ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf cp ${.CURDIR}/rc.local ${.TARGET}/etc touch ${.TARGET} bootonly: packagesystem # Install system mkdir -p ${.TARGET} cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \ DESTDIR=${.OBJDIR}/${.TARGET} MK_AMD=no MK_AT=no \ MK_GAMES=no MK_GROFF=no \ MK_INSTALLLIB=no MK_LIB32=no MK_MAIL=no \ MK_NCP=no MK_TOOLCHAIN=no MK_PROFILE=no \ MK_INSTALLIB=no MK_RESCUE=no MK_DICT=no \ MK_KERNEL_SYMBOLS=no MK_TESTS=no MK_DEBUG_FILES=no # Copy manifest only (no distfiles) to get checksums mkdir -p ${.TARGET}/usr/freebsd-dist cp MANIFEST ${.TARGET}/usr/freebsd-dist # Copy documentation, if generated .if !defined(NODOC) cp reldoc/* ${.TARGET} .endif # Set up installation environment ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf cp ${.CURDIR}/rc.local ${.TARGET}/etc dvd: packagesystem # Install system mkdir -p ${.TARGET} cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \ DESTDIR=${.OBJDIR}/${.TARGET} MK_RESCUE=no MK_KERNEL_SYMBOLS=no \ MK_TESTS=no MK_DEBUG_FILES=no # Copy distfiles mkdir -p ${.TARGET}/usr/freebsd-dist for dist in MANIFEST $$(ls *.txz | grep -v -- '-dbg'); \ do cp $${dist} ${.TARGET}/usr/freebsd-dist; \ done # Copy documentation, if generated .if !defined(NODOC) cp reldoc/* ${.TARGET} .endif # Set up installation environment ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf cp ${.CURDIR}/rc.local ${.TARGET}/etc touch ${.TARGET} release.iso: disc1.iso disc1.iso: disc1 sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_CD ${.TARGET} disc1 ${XTRADIR} dvd1.iso: dvd pkg-stage sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_DVD ${.TARGET} dvd ${XTRADIR} bootonly.iso: bootonly sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_BO ${.TARGET} bootonly ${XTRADIR} memstick: memstick.img memstick.img: disc1 sh ${.CURDIR}/${TARGET}/make-memstick.sh disc1 ${.TARGET} mini-memstick: mini-memstick.img mini-memstick.img: bootonly sh ${.CURDIR}/${TARGET}/make-memstick.sh bootonly ${.TARGET} packagesystem: base.txz kernel.txz ${EXTRA_PACKAGES} sh ${.CURDIR}/scripts/make-manifest.sh *.txz > MANIFEST touch ${.TARGET} pkg-stage: .if !defined(NOPKG) env REPOS_DIR=${.CURDIR}/pkg_repos/ \ sh ${.CURDIR}/scripts/pkg-stage.sh mkdir -p ${.OBJDIR}/dvd/packages/repos/ cp ${.CURDIR}/scripts/FreeBSD_install_cdrom.conf \ ${.OBJDIR}/dvd/packages/repos/ .endif touch ${.TARGET} cdrom: disc1.iso bootonly.iso dvdrom: dvd1.iso ftp: packagesystem rm -rf ftp mkdir -p ftp cp *.txz MANIFEST ftp release: real-release vm-release cloudware-release - touch ${.TARGET} + touch ${.OBJDIR}/${.TARGET} real-release: ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} obj ${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} ${RELEASE_TARGETS} install: release-install vm-install cloudware-install release-install: .if defined(DESTDIR) && !empty(DESTDIR) mkdir -p ${DESTDIR} .endif cp -a ftp ${DESTDIR}/ .for I in ${IMAGES} cp -p ${I} ${DESTDIR}/${OSRELEASE}-${I} . if defined(WITH_COMPRESSED_IMAGES) && !empty(WITH_COMPRESSED_IMAGES) ${XZ_CMD} -k ${DESTDIR}/${OSRELEASE}-${I} . endif .endfor cd ${DESTDIR} && sha512 ${OSRELEASE}* > ${DESTDIR}/CHECKSUM.SHA512 cd ${DESTDIR} && sha256 ${OSRELEASE}* > ${DESTDIR}/CHECKSUM.SHA256 .include "${.CURDIR}/Makefile.vm" diff --git a/release/doc/en_US.ISO8859-1/hardware/article.xml b/release/doc/en_US.ISO8859-1/hardware/article.xml index 191ac575c7d0..4b66434bffc1 100644 --- a/release/doc/en_US.ISO8859-1/hardware/article.xml +++ b/release/doc/en_US.ISO8859-1/hardware/article.xml @@ -1,1656 +1,1659 @@ %release; %devauto; ]>
&os; &release.current; Hardware Notes The &os; Documentation Project $FreeBSD$ 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 The &os; Documentation Project &tm-attrib.freebsd; &tm-attrib.amd; &tm-attrib.fujitsu; &tm-attrib.ibm; &tm-attrib.intel; &tm-attrib.sparc; &tm-attrib.sun; &tm-attrib.general; Introduction This document contains the hardware compatibility notes for &os; &release.current;. It lists the hardware platforms supported by &os;, as well as the various types of hardware devices (storage controllers, network interfaces, and so on), along with known working instances of these devices. Supported Processors and System Boards This section provides some architecture-specific information about the specific processors and systems that are supported by each architecture. amd64 Since mid-2003 &os;/&arch.amd64; has supported the AMD64 (Hammer) and &intel; EM64T architecture, and is now one of the Tier-1 platforms (fully supported architecture), which are expected to be Production Quality with respects to all aspects of the &os; operating system, including installation and development environments. Note that there are two names for this architecture, AMD64 (AMD) and Intel EM64T (Extended Memory 64-bit Technology). 64-bit mode of the two architectures are almost compatible with each other, and &os;/&arch.amd64; supports them both. As of this writing, the following processors are supported: &amd.athlon;64 (Clawhammer). &amd.opteron; (Sledgehammer). &amd.sempron;. &amd.turion;. &amd.phenom;. All multi-core &intel; &xeon; processors except Sossaman have EM64T support. The single-core &intel; &xeon; processors Nocona, Irwindale, Potomac, and Cranford have EM64T support. All &intel; &core; 2 (not &core; Duo) and later processors All &intel; &core; i range of processors All &intel; &pentium; D processors All &intel; ¢rino; Duo and ¢rino; Pro platforms &intel; &pentium; 4s and &celeron; Ds using the Cedar Mill core have EM64T support. Some &intel; &pentium; 4s and &celeron; Ds using the Prescott core have EM64T support. See the Intel Processor Spec Finder for the definitive answer about EM64T support in Intel processors. &intel; EM64T is an extended version of IA-32 (x86) and different from &intel; IA-64 (Itanium) architecture. Some &intel;'s old documentation refers to &intel; EM64T as 64-bit extension technology or IA-32e. Both Uniprocessor (UP) and Symmetric Multi-processor (SMP) configurations are supported. In many respects, &os;/&arch.amd64; is similar to &os;/&arch.i386;, in terms of drivers supported. Generally, drivers that already function correctly on other 64-bit platforms should work. i386 &os;/&arch.i386; runs on a wide variety of IBM PC compatible machines. Due to the wide range of hardware available for this architecture, it is impossible to exhaustively list all combinations of equipment supported by &os;. Nevertheless, some general guidelines are presented here. Almost all &i386;-compatible processors with a floating point unit are supported. All &intel; processors beginning with the 80486 are supported, including the 80486, &pentium;, &pentium; Pro, &pentium; II, &pentium; III, &pentium; 4, and variants thereof, such as the &xeon; and &celeron; processors. All &i386;-compatible AMD processors are also supported, including the &am486;, &am5x86;, K5, &amd.k6; (and variants), &amd.athlon; (including Athlon-MP, Athlon-XP, Athlon-4, and Athlon Thunderbird), and &amd.duron; processors. The AMD Élan SC520 embedded processor is supported. The Transmeta Crusoe is recognized and supported, as are &i386;-compatible processors from Cyrix and NexGen. There is a wide variety of motherboards available for this architecture. Motherboards using the ISA, VLB, EISA, AGP, and PCI expansion buses are well-supported. There is some limited support for the MCA (MicroChannel) expansion bus used in the IBM PS/2 line of PCs. Symmetric multi-processor (SMP) systems are generally supported by &os;, although in some cases, BIOS or motherboard bugs may generate some problems. Perusal of the archives of the &a.smp; may yield some clues. &os; will take advantage of SMT (Symmetric MultiThreading, also known as HyperThreading on &intel; CPUs) on the supported CPUs. The GENERIC kernel which is installed by default will automatically detect the additional logical processors. The default &os; scheduler recognizes processor topology on the system and selects logical and physical processors to obtain optimal performance. The &man.smp.4; manual page has more details. &os; will take advantage of Physical Address Extensions (PAE) support on CPUs that support this feature. A kernel with the PAE feature enabled will detect memory above 4 gigabytes and allow it to be used by the system. This feature places constraints on the device drivers and other features of &os; which may be used; consult the &man.pae.4; manual page for more details. &os; will generally run on i386-based laptops, albeit with varying levels of support for certain hardware features such as sound, graphics, power management, and PCCARD expansion slots. These features tend to vary in idiosyncratic ways between machines, and frequently require special-case support in &os; to work around hardware bugs or other oddities. When in doubt, a search of the archives of the &a.mobile; may be useful. Most modern laptops (as well as many desktops) use the Advanced Configuration and Power Management (ACPI) standard. &os; supports ACPI via the ACPI Component Architecture reference implementation from &intel;, as described in the &man.acpi.4; manual page. The use of ACPI causes instabilities on some machines and it may be necessary to disable the ACPI driver, which is normally loaded via a kernel module. This may be accomplished by adding the following line to /boot/device.hints: hint.acpi.0.disabled="1" Users debugging ACPI-related problems may find it useful to disable portions of the ACPI functionality. The &man.acpi.4; manual page has more information on how to do this via loader tunables. ACPI depends on a Differentiated System Descriptor Table (DSDT) provided by each machine's BIOS. Some machines have bad or incomplete DSDTs, which prevents ACPI from functioning correctly. Replacement DSDTs for some machines can be found at the DSDT section of the ACPI4Linux project Web site. &os; can use these DSDTs to override the DSDT provided by the BIOS; see the &man.acpi.4; manual page for more information. pc98 NEC PC-9801/9821 series with almost all &i386;-compatible processors, including 80486, &pentium;, &pentium; Pro, &pentium; II, and variants. All &i386;-compatible processors by AMD, Cyrix, IBM, and IDT are also supported. NEC FC-9801/9821 series, and NEC SV-98 series (both of them are compatible with PC-9801/9821 series) should be supported. EPSON PC-386/486/586 series, which are compatible with NEC PC-9801 series are supported. High-resolution mode is not supported. NEC PC-98XA/XL/RL/XL^2, and NEC PC-H98 series are supported in normal (PC-9801 compatible) mode only. Although there are some multi-processor systems (such as Rs20/B20), SMP-related features of &os; are not supported yet. PC-9801/9821 standard bus (called C-Bus), PC-9801NOTE expansion bus (110pin), and PCI bus are supported. New Extend Standard Architecture (NESA) bus (used in PC-H98, SV-H98, and FC-H98 series) is not supported. powerpc All Apple PowerPC machines with built-in USB are supported, as well a limited selection of non-Apple machines, including KVM on POWER7 SMP is supported on all systems with more than 1 processor. sparc64 This section describes the systems currently known to be supported by &os; on the Fujitsu &sparc64; and Sun &ultrasparc; platforms. SMP is supported on all systems with more than 1 processor. When using the GENERIC kernel, &os;/&arch.sparc64; systems not equipped with a framebuffer supported by the &man.creator.4; (Sun Creator, Sun Creator3D and Sun Elite3D) or &man.machfb.4; (Sun PGX and Sun PGX64 as well as the ATI Mach64 chips found onboard in for example &sun.blade; 100, &sun.blade; 150, &sun.ultra; 5 and &sun.ultra; 10) driver must use the serial console. If you have a system that is not listed here, it may not have been tested with &os; &release.current;. We encourage you to try it and send a note to the &a.sparc; with your results, including which devices work and which do not. The following systems are fully supported by &os;: Naturetech GENIALstation 777S &sun.blade; 100 &sun.blade; 150 &sun.enterprise; 150 &sun.enterprise; 220R &sun.enterprise; 250 &sun.enterprise; 420R &sun.enterprise; 450 &sun.fire; B100s (support for the on-board NICs first appeared in 8.1-RELEASE) &sun.fire; V100 &sun.fire; V120 Sun &netra; t1 100/105 Sun &netra; T1 AC200/DC200 Sun &netra; t 1100 Sun &netra; t 1120 Sun &netra; t 1125 Sun &netra; t 1400/1405 Sun &netra; 120 Sun &netra; X1 Sun &sparcengine; Ultra AX1105 Sun &sparcengine; Ultra AXe Sun &sparcengine; Ultra AXi Sun &sparcengine; Ultra AXmp Sun &sparcengine; CP1500 &sun.ultra; 1 &sun.ultra; 1E &sun.ultra; 2 &sun.ultra; 5 &sun.ultra; 10 &sun.ultra; 30 &sun.ultra; 60 &sun.ultra; 80 &sun.ultra; 450 The following systems are partially supported by &os;. In particular the fiber channel controllers in SBus-based systems are not supported. However, it is possible to use these with a SCSI controller supported by the &man.esp.4; driver (Sun ESP SCSI, Sun FAS Fast-SCSI and Sun FAS366 Fast-Wide SCSI controllers). &sun.enterprise; 3500 &sun.enterprise; 4500 Starting with 7.2-RELEASE, &arch.sparc64; systems based on Sun &ultrasparc; III and beyond are also supported by &os;, which includes the following known working systems: &sun.blade; 1000 &sun.blade; 1500 &sun.blade; 2000 &sun.blade; 2500 &sun.fire; 280R &sun.fire; V210 &sun.fire; V215 (support first appeared in 7.3-RELEASE and 8.1-RELEASE) &sun.fire; V240 &sun.fire; V245 (support first appeared in 7.3-RELEASE and 8.1-RELEASE) &sun.fire; V250 &sun.fire; V440 (support for the on-board NICs first appeared in 7.3-RELEASE and 8.0-RELEASE) &sun.fire; V480 (501-6780 and 501-6790 centerplanes only, for which support first appeared in 7.3-RELEASE and 8.1-RELEASE, other centerplanes might work beginning with 8.3-RELEASE and 9.0-RELEASE) &sun.fire; V880 &sun.fire; V890 (support first appeared in 7.4-RELEASE and 8.1-RELEASE, non-mixed &ultrasparc; IV/IV+ CPU-configurations only) &netra; 20/&netra; T4 The following Sun &ultrasparc; systems are not tested but believed to be also supported by &os;: &sun.fire; V125 &sun.fire; V490 (support first appeared in 7.4-RELEASE and 8.1-RELEASE, non-mixed &ultrasparc; IV/IV+ CPU-configurations only) Starting with 7.4-RELEASE and 8.1-RELEASE, &arch.sparc64; systems based on Fujitsu &sparc64; V are also supported by &os;, which includes the following known working systems: Fujitsu &primepower; 250 The following Fujitsu &primepower; systems are not tested but believed to be also supported by &os;: Fujitsu &primepower; 450 Fujitsu &primepower; 650 Fujitsu &primepower; 850 Supported Devices This section describes the devices currently known to be supported by &os;. Other configurations may also work, but simply have not been tested yet. Feedback, updates, and corrections to this list are encouraged. Where possible, the drivers applicable to each device or class of devices is listed. If the driver in question has a manual page in the &os; base distribution (most should), it is referenced here. Information on specific models of supported devices, controllers, etc. can be found in the manual pages. The device lists in this document are being generated automatically from &os; manual pages. This means that some devices, which are supported by multiple drivers, may appear multiple times. Disk Controllers [&arch.amd64;, &arch.i386;, &arch.pc98;, &arch.sparc64;] IDE/ATA controllers (&man.ata.4; driver) [&arch.pc98;] IDE/ATA controllers (wdc driver) On-board IDE controller &hwlist.aac; &hwlist.adv; &hwlist.adw; &hwlist.aha; &hwlist.ahb; &hwlist.ahc; &hwlist.ahci; &hwlist.ahd; &hwlist.aic; &hwlist.amr; &hwlist.arcmsr; &hwlist.bt; &hwlist.ciss; &hwlist.ct; &hwlist.dpt; [&arch.amd64;, &arch.i386;] Booting from these controllers is supported. EISA adapters are not supported. &hwlist.esp; &hwlist.hpt27xx; &hwlist.hptiop; &hwlist.hptmv; &hwlist.hptrr; &hwlist.ida; &hwlist.iir; &hwlist.ips; &hwlist.isci; &hwlist.isp; &hwlist.mfi; &hwlist.mlx; [&arch.amd64;, &arch.i386;] Booting from these controllers is supported. EISA adapters are not supported. &hwlist.mly; &hwlist.mpr; &hwlist.mps; &hwlist.mpt; &hwlist.mrsas; &hwlist.mvs; &hwlist.ncr; &hwlist.ncv; &hwlist.nsp; &hwlist.pms; &hwlist.pst; &hwlist.siis; &hwlist.stg; &hwlist.sym; &hwlist.trm; &hwlist.twa; &hwlist.twe; &hwlist.tws; &hwlist.vpo; [&arch.i386;] The wds(4) driver supports the WD7000 SCSI controller. With all supported SCSI controllers, full support is provided for SCSI-I, SCSI-II, and SCSI-III peripherals, including hard disks, optical disks, tape drives (including DAT, 8mm Exabyte, Mammoth, and DLT), medium changers, processor target devices and CD-ROM drives. WORM devices that support CD-ROM commands are supported for read-only access by the CD-ROM drivers (such as &man.cd.4;). WORM/CD-R/CD-RW writing support is provided by &man.cdrecord.1;, which is a part of the sysutils/cdrtools port in the Ports Collection. The following CD-ROM type systems are supported at this time: SCSI interface (also includes ProAudio Spectrum and SoundBlaster SCSI) (&man.cd.4;) [&arch.i386;] Sony proprietary interface (all models) (&man.scd.4;) ATAPI IDE interface (&man.acd.4;) [&arch.i386;] The following device is unmaintained: Mitsumi proprietary CD-ROM interface (all models) (&man.mcd.4;) Ethernet Interfaces &hwlist.ae; &hwlist.age; &hwlist.ale; &hwlist.aue; &hwlist.axe; ASIX Electronics AX88178A/AX88179 USB Gigabit Ethernet adapters (&man.axge.4; driver) &hwlist.bce; [&arch.amd64;, &arch.i386;] Broadcom BCM4401 based Fast Ethernet adapters (&man.bfe.4; driver) &hwlist.bge; &hwlist.bxe; &hwlist.cas; &hwlist.cdce; [&arch.amd64;, &arch.i386;] Crystal Semiconductor CS89x0-based NICs (&man.cs.4; driver) &hwlist.cue; &hwlist.cxgb; &hwlist.dc; &hwlist.de; &hwlist.ed; &hwlist.em; &hwlist.ep; Agere ET1310 Gigabit Ethernet adapters (&man.et.4; driver) &hwlist.ex; &hwlist.fe; &hwlist.fxp; &hwlist.gem; &hwlist.hme; &hwlist.ie; &hwlist.igb; &hwlist.ipheth; &hwlist.ixgb; &hwlist.ixgbe; &hwlist.jme; &hwlist.kue; &hwlist.lge; &hwlist.msk; &hwlist.mxge; &hwlist.my; &hwlist.nfe; &hwlist.nge; &hwlist.nxge; &hwlist.oce; &hwlist.pcn; &hwlist.qlxgb; &hwlist.qlxgbe; &hwlist.qlxge; &hwlist.re; &hwlist.rl; &hwlist.rue; &hwlist.sf; &hwlist.sfxge; &hwlist.sge; &hwlist.sis; &hwlist.sk; &hwlist.smsc; &hwlist.sn; &hwlist.snc; &hwlist.ste; &hwlist.stge; &hwlist.ti; &hwlist.tl; [&arch.amd64;, &arch.i386;, &arch.pc98;] SMC 83c17x (EPIC)-based Ethernet NICs (&man.tx.4; driver) &hwlist.txp; &hwlist.udav; &hwlist.urndis; &hwlist.vge; &hwlist.vr; &hwlist.vte; &hwlist.vx; &hwlist.vxge; &hwlist.wb; &hwlist.xe; &hwlist.xl; FDDI Interfaces [&arch.i386;, &arch.pc98;] DEC DEFPA PCI (&man.fpa.4; driver) [&arch.i386;] DEC DEFEA EISA (&man.fpa.4; driver) ATM Interfaces [&arch.i386;, &arch.pc98;] Midway-based ATM interfaces (&man.en.4; driver) [&arch.i386;, &arch.pc98; &arch.sparc64;] FORE Systems, Inc. PCA-200E ATM PCI Adapters (hfa and &man.fatm.4; drivers) [&arch.i386;, &arch.pc98;] IDT NICStAR 77201/211-based ATM Adapters (&man.idt.4; driver) [&arch.i386;, &arch.pc98; &arch.sparc64;] FORE Systems, Inc. HE155 and HE622 ATM interfaces (&man.hatm.4; driver) [&arch.i386;, &arch.pc98;] IDT77252-based ATM cards (&man.patm.4; driver) Wireless Network Interfaces [&arch.amd64;, &arch.i386;, &arch.pc98;] Cisco/Aironet 802.11b wireless adapters (&man.an.4; driver) &hwlist.ath; &hwlist.bwi; &hwlist.bwn; [&arch.i386;, &arch.amd64;] Intel PRO/Wireless 2100 MiniPCI network adapter (&man.ipw.4; driver) [&arch.i386;, &arch.amd64;] Intel PRO/Wireless 2200BG/2915ABG MiniPCI and 2225BG PCI network adapters (&man.iwi.4; driver) [&arch.i386;, &arch.amd64;] Intel Dual Band Wireless AC 3160/7260/7265 IEEE 802.11ac network adapters (&man.iwm.4; driver) [&arch.i386;, &arch.amd64;] Intel Wireless WiFi Link 4965AGN IEEE 802.11n PCI network adapters (&man.iwn.4; driver) [&arch.i386;, &arch.amd64;] Marvell Libertas IEEE 802.11b/g PCI network adapters (&man.malo.4; driver) Marvell 88W8363 IEEE 802.11n wireless network adapters (&man.mwl.4; driver) &hwlist.otus; &hwlist.ral; &hwlist.rsu; + Realtek RTL8188CE based PCIe IEEE 802.11b/g/n wireless network + adapters (&man.rtwn.4; driver) + &hwlist.rum; &hwlist.run; &hwlist.uath; &hwlist.upgt; &hwlist.ural; &hwlist.urtw; &hwlist.urtwn; [&arch.amd64;, &arch.i386;, &arch.pc98;] Lucent Technologies WaveLAN/IEEE 802.11b wireless network adapters and workalikes using the Lucent Hermes, Intersil PRISM-II, Intersil PRISM-2.5, Intersil Prism-3, and Symbol Spectrum24 chipsets (&man.wi.4; driver) [&arch.i386;] NCR / AT&T / Lucent Technologies WaveLan T1-speed ISA/radio LAN cards (&man.wl.4; driver) [&arch.i386;, &arch.amd64;] Intel PRO/Wireless 3945ABG MiniPCI network adapters (&man.wpi.4; driver) &hwlist.zyd; Miscellaneous Networks &hwlist.ce; &hwlist.cx; &hwlist.cp; &hwlist.ctau; &hwlist.cm; Serial Interfaces [&arch.amd64;, &arch.i386;] PC standard 8250, 16450, and 16550-based serial ports (&man.sio.4; driver) &hwlist.uart; &hwlist.scc; [&arch.amd64;, &arch.i386;] AST 4 port serial card using shared IRQ ARNET 8 port serial card using shared IRQ ARNET (now Digiboard) Sync 570/i high-speed serial [&arch.i386;] Boca multi-port serial cards Boca BB1004 4-Port serial card (Modems not supported) Boca IOAT66 6-Port serial card (Modems supported) Boca BB1008 8-Port serial card (Modems not supported) Boca BB2016 16-Port serial card (Modems supported) [&arch.i386;] Comtrol Rocketport card (&man.rp.4; driver) [&arch.i386;] Cyclades Cyclom-Y serial board (&man.cy.4; driver) [&arch.i386;] STB 4 port card using shared IRQ [&arch.i386;] DigiBoard intelligent serial cards (digi driver) [&arch.amd64;, &arch.i386;] PCI-Based multi-port serial boards (&man.puc.4; driver) [&arch.amd64;, &arch.i386;] Actiontech 56K PCI [&arch.amd64;, &arch.i386;] Avlab Technology, PCI IO 2S and PCI IO 4S [&arch.amd64;, &arch.i386;] Comtrol RocketPort 550 [&arch.amd64;, &arch.i386;] Decision Computers PCCOM 4-port serial and dual port RS232/422/485 [&arch.amd64;, &arch.i386;] Dolphin Peripherals 4025/4035/4036 [&arch.amd64;, &arch.i386;] IC Book Labs Dreadnought 16x Lite and Pro [&arch.amd64;, &arch.i386;] Lava Computers 2SP-PCI/DSerial-PCI/Quattro-PCI/Octopus-550 [&arch.amd64;, &arch.i386;] Middle Digital, Weasle serial port [&arch.amd64;, &arch.i386;] Moxa Industio CP-114, Smartio C104H-PCI and C168H/PCI [&arch.amd64;, &arch.i386;] NEC PK-UG-X001 and PK-UG-X008 [&arch.amd64;, &arch.i386;] Netmos NM9835 PCI-2S-550 [&arch.amd64;, &arch.i386;] Oxford Semiconductor OX16PCI954 PCI UART [&arch.amd64;, &arch.i386;] Syba Tech SD-LAB PCI-4S2P-550-ECP [&arch.amd64;, &arch.i386;] SIIG Cyber I/O PCI 16C550/16C650/16C850 [&arch.amd64;, &arch.i386;] SIIG Cyber 2P1S PCI 16C550/16C650/16C850 [&arch.amd64;, &arch.i386;] SIIG Cyber 2S1P PCI 16C550/16C650/16C850 [&arch.amd64;, &arch.i386;] SIIG Cyber 4S PCI 16C550/16C650/16C850 [&arch.amd64;, &arch.i386;] SIIG Cyber Serial (Single and Dual) PCI 16C550/16C650/16C850 [&arch.amd64;, &arch.i386;] Syba Tech Ltd. PCI-4S2P-550-ECP [&arch.amd64;, &arch.i386;] Titan PCI-200H and PCI-800H [&arch.amd64;, &arch.i386;] US Robotics (3Com) 3CP5609 modem [&arch.amd64;, &arch.i386;] VScom PCI-400 and PCI-800 &hwlist.rc; [&arch.i386;, &arch.amd64;] Specialix SI/XIO/SX multiport serial cards, with both the older SIHOST2.x and the enhanced (transputer based, aka JET) host cards (ISA, EISA and PCI) are supported. Note that the newer SX+ PCI cards are not currently supported. (&man.si.4; driver) [&arch.pc98;] Internel serial interfaces (&man.sio.4; driver) PC-9801 on-board PC-9821 2'nd CCU (flags 0x12000000) [&arch.pc98;] NEC PC-9861K, PC-9801-101 and Midori-Denshi MDC-926Rs (&man.sio.4; driver) COM2 (flags 0x01000000) COM3 (flags 0x02000000) [&arch.pc98;] NEC PC-9801-120 (&man.sio.4; driver) "flags 0x11000000" is necessary in kernel configuration. [&arch.pc98;] Microcore MC-16550, MC-16550II, MC-RS98 (&man.sio.4; driver) "flags 0x14000?01" is necessary in kernel configuration. [&arch.pc98;] Media Intelligent RSB-2000, RSB-3000 and AIWA B98-02 (&man.sio.4; driver) "flags 0x15000?01" is necessary in kernel configuration. [&arch.pc98;] Media Intelligent RSB-384 (&man.sio.4; driver) "flags 0x16000001" is necessary in kernel configuration. [&arch.pc98;] I-O DATA RSA-98III (&man.sio.4; driver) "flags 0x18000?01" is necessary in kernel configuration. [&arch.pc98;] Hayes ESP98 (&man.sio.4; driver) "options COM_ESP" and "flags 0x19000000" are necessary in kernel configuration. Sound Devices &hwlist.snd.ad1816; &hwlist.snd.als4000; &hwlist.snd.atiixp; &hwlist.snd.audiocs; &hwlist.snd.cmi; &hwlist.snd.cs4281; &hwlist.snd.csa; &hwlist.snd.ds1; &hwlist.snd.emu10k1; &hwlist.snd.emu10kx; &hwlist.snd.envy24; &hwlist.snd.envy24ht; &hwlist.snd.es137x; &hwlist.snd.ess; &hwlist.snd.fm801; &hwlist.snd.gusc; &hwlist.snd.hda; &hwlist.snd.hdspe; &hwlist.snd.ich; &hwlist.snd.maestro; &hwlist.snd.maestro3; &hwlist.snd.mss; &hwlist.snd.neomagic; &hwlist.snd.sbc; &hwlist.snd.solo; &hwlist.snd.spicds; &hwlist.snd.t4dwave; &hwlist.snd.via8233; &hwlist.snd.via82c686; &hwlist.snd.vibes; [&arch.pc98;] NEC PC-9801-73, 86 and compatibles (nss driver) NEC A-MATE internal sound Q-Vision WaveStar, WaveMaster [&arch.pc98;] NEC X-MATE, CanBe, ValueStar internal (mss driver) [&arch.pc98;] Creative Technologies SoundBlaster(98) (&man.sb.4; driver) [&arch.pc98;] I-O DATA CD-BOX (&man.sb.4; driver) [&arch.pc98;] MPU-401 and compatible interfaces (mpu driver) Q-Vision WaveStar Camera and Video Capture Devices &hwlist.bktr; [&arch.i386;] Connectix QuickCam USB Devices [&arch.amd64;, &arch.i386;, &arch.pc98;] A range of USB peripherals are supported; devices known to work are listed in this section. Owing to the generic nature of most USB devices, with some exceptions any device of a given class will be supported, even if not explicitly listed here. [&arch.amd64;, &arch.i386;, &arch.pc98;] USB Ethernet adapters can be found in the section listing Ethernet interfaces. [&arch.amd64;, &arch.i386;, &arch.pc98;] USB Bluetooth adapters can be found in Bluetooth section. &hwlist.ohci; &hwlist.uhci; [&arch.amd64;, &arch.i386;, &arch.pc98;] USB 2.0 controllers using the EHCI interface (&man.ehci.4; driver) [&arch.amd64;, &arch.i386;, &arch.pc98;] Hubs [&arch.amd64;, &arch.i386;, &arch.pc98;] Keyboards (&man.ukbd.4; driver) [&arch.amd64;, &arch.i386;, &arch.pc98;] Miscellaneous Assist Computer Systems PC Camera C-M1 ActiveWire I/O Board Creative Technology Video Blaster WebCam Plus D-Link DSB-R100 USB Radio (&man.ufm.4; driver) Mirunet AlphaCam Plus &hwlist.urio; &hwlist.umodem; [&arch.amd64;, &arch.i386;, &arch.pc98;] Mice (&man.ums.4; driver) &hwlist.ulpt; &hwlist.ubsa; &hwlist.ubser; &hwlist.uftdi; &hwlist.uplcom; &hwlist.umct; &hwlist.umass; [&arch.amd64;, &arch.i386;, &arch.pc98;] Audio Devices (&man.uaudio.4; driver) &hwlist.uvisor; IEEE 1394 (Firewire) Devices &hwlist.fwohci; [&arch.amd64;, &arch.i386;, &arch.sparc64;] Serial Bus Protocol 2 (SBP-2) storage devices (&man.sbp.4; driver) Bluetooth Devices &hwlist.ng.bt3c; &hwlist.ng.ubt; Cryptographic Accelerators &hwlist.hifn; &hwlist.safe; &hwlist.ubsec; Miscellaneous [&arch.amd64;, &arch.i386;, &arch.pc98;] FAX-Modem/PCCARD MELCO IGM-PCM56K/IGM-PCM56KH Nokia Card Phone 2.0 (gsm900/dcs1800 HSCSD terminal) [&arch.amd64;, &arch.i386;, &arch.pc98;] Floppy drives (&man.fdc.4; driver) [&arch.amd64;, &arch.i386;] VGA-compatible video cards (&man.vga.4; driver) Information regarding specific video cards and compatibility with Xorg can be found at http://www.x.org/. [&arch.amd64;, &arch.i386;, &arch.pc98;] Keyboards including: [&arch.i386;] AT-style keyboards (&man.atkbd.4; driver) [&arch.amd64;, &arch.i386;] PS/2 keyboards (&man.atkbd.4; driver) [&arch.pc98;] Standard keyboards [&arch.amd64;, &arch.i386;, &arch.pc98;] USB keyboards (&man.ukbd.4; driver) [&arch.amd64;, &arch.i386;, &arch.pc98;] Pointing devices including: [&arch.amd64;, &arch.i386;, &arch.pc98;] Bus mice and compatible devices (&man.mse.4; driver) [&arch.amd64;, &arch.i386;] PS/2 mice and compatible devices, including many laptop pointing devices (&man.psm.4; driver) Serial mice and compatible devices [&arch.amd64;, &arch.i386;, &arch.pc98;] USB mice (&man.ums.4; driver) &man.moused.8; has more information on using pointing devices with &os;. Information on using pointing devices with Xorg can be found at http://www.x.org/. [&arch.amd64;, &arch.i386;] PC standard parallel ports (&man.ppc.4; driver) [&arch.pc98;] PC-9821 standard parallel ports (&man.ppc.4; driver) [&arch.i386;, &arch.amd64;] PC-compatible joysticks (&man.joy.4; driver) [&arch.pc98;] Joystick port of SoundBlaster(98) (&man.joy.4; driver) [&arch.i386;, &arch.pc98;] PHS Data Communication Card/PCCARD NTT DoCoMo P-in Comp@ct Panasonic KX-PH405 SII MC-P200 [&arch.i386;] Xilinx XC6200-based reconfigurable hardware cards compatible with the HOT1 from Virtual Computers (xrpu driver). [&arch.pc98;] Power Management Controller of NEC PC-98 Note (pmc driver)
diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk index 65c4348ca360..47a67e02e52a 100644 --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -1,412 +1,417 @@ # $FreeBSD$ # # Option file for FreeBSD /usr/src builds. # # Users define WITH_FOO and WITHOUT_FOO on the command line or in /etc/src.conf # and /etc/make.conf files. These translate in the build system to MK_FOO={yes,no} # with sensible (usually) defaults. # # Makefiles must include bsd.opts.mk after defining specific MK_FOO options that # are applicable for that Makefile (typically there are none, but sometimes there # are exceptions). Recursive makes usually add MK_FOO=no for options that they wish # to omit from that make. # # Makefiles must include bsd.mkopt.mk before they test the value of any MK_FOO # variable. # # Makefiles may also assume that this file is included by src.opts.mk should it # need variables defined there prior to the end of the Makefile where # bsd.{subdir,lib.bin}.mk is traditionally included. # # The old-style YES_FOO and NO_FOO are being phased out. No new instances of them # should be added. Old instances should be removed since they were just to # bridge the gap between FreeBSD 4 and FreeBSD 5. # # Makefiles should never test WITH_FOO or WITHOUT_FOO directly (although an # exception is made for _WITHOUT_SRCONF which turns off this mechanism # completely inside bsd.*.mk files). # .if !target(____) ____: .include # # Define MK_* variables (which are either "yes" or "no") for users # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the # make(1) environment. # These should be tested with `== "no"' or `!= "no"' in makefiles. # The NO_* variables should only be set by makefiles for variables # that haven't been converted over. # # These options are used by src the builds __DEFAULT_YES_OPTIONS = \ ACCT \ ACPI \ AMD \ APM \ AT \ ATM \ AUDIT \ AUTHPF \ AUTOFS \ BHYVE \ BINUTILS \ BINUTILS_BOOTSTRAP \ BLUETOOTH \ BOOT \ BOOTPARAMD \ BOOTPD \ BSD_CPIO \ BSDINSTALL \ BSNMP \ BZIP2 \ CALENDAR \ CAPSICUM \ CASPER \ CCD \ CDDL \ CPP \ CROSS_COMPILER \ CRYPT \ CTM \ CUSE \ CXX \ DICT \ DMAGENT \ DYNAMICROOT \ ED_CRYPTO \ EE \ ELFTOOLCHAIN_BOOTSTRAP \ EXAMPLES \ FDT \ FILE \ FINGER \ FLOPPY \ FMTREE \ FORTH \ FP_LIBC \ FREEBSD_UPDATE \ FTP \ GAMES \ GCOV \ GDB \ GNU \ GNU_GREP_COMPAT \ GPIO \ GPL_DTC \ GROFF \ HAST \ HTML \ HYPERV \ ICONV \ INET \ INET6 \ INETD \ IPFILTER \ IPFW \ ISCSI \ JAIL \ KDUMP \ KVM \ LDNS \ LDNS_UTILS \ LEGACY_CONSOLE \ LIB32 \ LIBPTHREAD \ LIBTHR \ LOCALES \ LOCATE \ LPR \ LS_COLORS \ LZMA_SUPPORT \ MAIL \ MAILWRAPPER \ MAKE \ MANDOCDB \ NDIS \ NETCAT \ NETGRAPH \ NLS_CATALOGS \ NS_CACHING \ NTP \ OPENSSL \ PAM \ PC_SYSINSTALL \ PF \ PKGBOOTSTRAP \ PMC \ PORTSNAP \ PPP \ QUOTAS \ RADIUS_SUPPORT \ RCMDS \ RBOOTD \ RCS \ RESCUE \ ROUTED \ SENDMAIL \ SETUID_LOGIN \ SHAREDOCS \ SOURCELESS \ SOURCELESS_HOST \ SOURCELESS_UCODE \ SVNLITE \ SYSCONS \ TALK \ TCP_WRAPPERS \ TCSH \ TELNET \ TESTS \ TEXTPROC \ TFTP \ TIMED \ UNBOUND \ USB \ UTMPX \ VI \ VT \ WIRELESS \ WPA_SUPPLICANT_EAPOL \ ZFS \ ZONEINFO __DEFAULT_NO_OPTIONS = \ BSD_GREP \ CLANG_EXTRAS \ DTRACE_TESTS \ EISA \ HESIOD \ + LIBSOFT \ NAND \ OFED \ OPENLDAP \ SHARED_TOOLCHAIN \ SORT_THREADS \ SVN # # Default behaviour of some options depends on the architecture. Unfortunately # this means that we have to test TARGET_ARCH (the buildworld case) as well # as MACHINE_ARCH (the non-buildworld case). Normally TARGET_ARCH is not # used at all in bsd.*.mk, but we have to make an exception here if we want # to allow defaults for some things like clang to vary by target architecture. # Additional, per-target behavior should be rarely added only after much # gnashing of teeth and grinding of gears. # .if defined(TARGET_ARCH) __T=${TARGET_ARCH} .else __T=${MACHINE_ARCH} .endif .if defined(TARGET) __TT=${TARGET} .else __TT=${MACHINE} .endif .include # If the compiler is not C++11 capable, disable Clang and use GCC instead. # This means that architectures that have GCC 4.2 as default can not # build Clang without using an external compiler. .if ${COMPILER_FEATURES:Mc++11} && (${__T} == "aarch64" || \ ${__T} == "amd64" || ${__TT} == "arm" || ${__T} == "i386") # Clang is enabled, and will be installed as the default /usr/bin/cc. __DEFAULT_YES_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC __DEFAULT_NO_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX .elif ${COMPILER_FEATURES:Mc++11} && ${__T:Mpowerpc*} # On powerpc, if an external compiler that supports C++11 is used as ${CC}, # then Clang is enabled, but GCC is installed as the default /usr/bin/cc. __DEFAULT_YES_OPTIONS+=CLANG CLANG_FULL GCC GCC_BOOTSTRAP GNUCXX __DEFAULT_NO_OPTIONS+=CLANG_BOOTSTRAP CLANG_IS_CC .else # Everything else disables Clang, and uses GCC instead. __DEFAULT_YES_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX __DEFAULT_NO_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC .endif # In-tree binutils/gcc are older versions without modern architecture support. .if ${__T} == "aarch64" || ${__T} == "riscv64" BROKEN_OPTIONS+=BINUTILS BINUTILS_BOOTSTRAP GCC GCC_BOOTSTRAP GDB __DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY .else __DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY .endif .if ${__T} == "riscv64" BROKEN_OPTIONS+=PROFILE # "sorry, unimplemented: profiler support for RISC-V" BROKEN_OPTIONS+=TESTS # "undefined reference to `_Unwind_Resume'" BROKEN_OPTIONS+=CXX # "libcxxrt.so: undefined reference to `_Unwind_Resume_or_Rethrow'" .endif .if ${__T} == "aarch64" || ${__T} == "amd64" __DEFAULT_YES_OPTIONS+=LLDB .else __DEFAULT_NO_OPTIONS+=LLDB .endif # LLVM lacks support for FreeBSD 64-bit atomic operations for ARMv4/ARMv5 .if ${__T} == "arm" || ${__T} == "armeb" BROKEN_OPTIONS+=LLDB .endif +# Only doing soft float API stuff on armv6 +.if ${__T} != "armv6" +BROKEN_OPTIONS+=LIBSOFT +.endif .include # # MK_* options that default to "yes" if the compiler is a C++11 compiler. # .for var in \ LIBCPLUSPLUS .if !defined(MK_${var}) .if ${COMPILER_FEATURES:Mc++11} .if defined(WITHOUT_${var}) MK_${var}:= no .else MK_${var}:= yes .endif .else .if defined(WITH_${var}) MK_${var}:= yes .else MK_${var}:= no .endif .endif .endif .endfor # # Force some options off if their dependencies are off. # Order is somewhat important. # .if ${MK_LIBPTHREAD} == "no" MK_LIBTHR:= no .endif .if ${MK_LDNS} == "no" MK_LDNS_UTILS:= no MK_UNBOUND:= no .endif .if ${MK_SOURCELESS} == "no" MK_SOURCELESS_HOST:= no MK_SOURCELESS_UCODE:= no .endif .if ${MK_CDDL} == "no" MK_ZFS:= no MK_CTF:= no .endif .if ${MK_CRYPT} == "no" MK_OPENSSL:= no MK_OPENSSH:= no MK_KERBEROS:= no .endif .if ${MK_CXX} == "no" MK_CLANG:= no MK_GROFF:= no MK_GNUCXX:= no .endif .if ${MK_MAIL} == "no" MK_MAILWRAPPER:= no MK_SENDMAIL:= no MK_DMAGENT:= no .endif .if ${MK_NETGRAPH} == "no" MK_ATM:= no MK_BLUETOOTH:= no .endif .if ${MK_OPENSSL} == "no" MK_OPENSSH:= no MK_KERBEROS:= no .endif .if ${MK_PF} == "no" MK_AUTHPF:= no .endif .if ${MK_TESTS} == "no" MK_DTRACE_TESTS:= no .endif .if ${MK_TEXTPROC} == "no" MK_GROFF:= no .endif .if ${MK_CROSS_COMPILER} == "no" MK_BINUTILS_BOOTSTRAP:= no MK_CLANG_BOOTSTRAP:= no MK_ELFTOOLCHAIN_BOOTSTRAP:= no MK_GCC_BOOTSTRAP:= no .endif .if ${MK_TOOLCHAIN} == "no" MK_BINUTILS:= no MK_CLANG:= no MK_GCC:= no MK_GDB:= no MK_INCLUDES:= no .endif .if ${MK_CLANG} == "no" MK_CLANG_EXTRAS:= no MK_CLANG_FULL:= no .endif # # Set defaults for the MK_*_SUPPORT variables. # # # MK_*_SUPPORT options which default to "yes" unless their corresponding # MK_* variable is set to "no". # .for var in \ BZIP2 \ GNU \ INET \ INET6 \ KERBEROS \ KVM \ NETGRAPH \ PAM \ TESTS \ WIRELESS .if defined(WITHOUT_${var}_SUPPORT) || ${MK_${var}} == "no" MK_${var}_SUPPORT:= no .else MK_${var}_SUPPORT:= yes .endif .endfor # # MK_* options whose default value depends on another option. # .for vv in \ GSSAPI/KERBEROS \ MAN_UTILS/MAN .if defined(WITH_${vv:H}) MK_${vv:H}:= yes .elif defined(WITHOUT_${vv:H}) MK_${vv:H}:= no .else MK_${vv:H}:= ${MK_${vv:T}} .endif .endfor .if !${COMPILER_FEATURES:Mc++11} MK_LLDB:= no .endif # gcc 4.8 and newer supports libc++, so suppress gnuc++ in that case. # while in theory we could build it with that, we don't want to do # that since it creates too much confusion for too little gain. .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 40800 MK_GNUCXX:=no MK_GCC:=no .endif .endif # !target(____) diff --git a/share/vt/keymaps/gr.101.acc.kbd b/share/vt/keymaps/gr.101.acc.kbd index 89a931f0723f..7e8e6614e7ec 100644 --- a/share/vt/keymaps/gr.101.acc.kbd +++ b/share/vt/keymaps/gr.101.acc.kbd @@ -1,255 +1,255 @@ # $FreeBSD$ # # Built on Wed 1 Apr 15:59:44 EEST 1998 by peppe@cs.uoi.gr # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ # 000 nop nop nop nop nop nop nop nop O 001 esc esc nop nop esc esc debug nop O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '@' nul nul '2' '@' nul nul O 004 '3' '#' nop nop '3' '#' nop nop O 005 '4' '$' nop nop '4' '$' nop nop O 006 '5' '%' nop nop '5' '%' nop nop O 007 '6' '^' rs rs '6' '^' rs rs O 008 '7' '&' nop nop '7' '&' nop nop O 009 '8' '*' nop nop '8' '*' nop nop O 010 '9' '(' nop nop '9' '(' nop nop O 011 '0' ')' nop nop '0' ')' nop nop O - 012 '-' '_' ns ns '-' '_' ns ns O + 012 '-' '_' us us '-' '_' us us O 013 '=' '+' nop nop '=' '+' nop nop O 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 ';' ':' dc1 dc1 C 017 'w' 'W' etb etb 0x03c2 0x03a3 etb etb C 018 'e' 'E' enq enq 0x03b5 0x0395 enq enq C 019 'r' 'R' dc2 dc2 0x03c1 0x03a1 dc2 dc2 C 020 't' 'T' dc4 dc4 0x03c4 0x03a4 dc4 dc4 C 021 'y' 'Y' em em 0x03c5 0x03a5 em em C 022 'u' 'U' nak nak 0x03b8 0x0398 nak nak C 023 'i' 'I' ht ht 0x03b9 0x0399 ht ht C 024 'o' 'O' si si 0x03bf 0x039f si si C 025 'p' 'P' dle dle 0x03c0 0x03a0 dle dle C 026 '[' '{' esc esc '[' '{' esc esc O 027 ']' '}' gs gs ']' '}' gs gs O 028 cr cr nl nl cr cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 0x03b1 0x0391 soh soh C 031 's' 'S' dc3 dc3 0x03c3 0x03a3 dc3 dc3 C 032 'd' 'D' eot eot 0x03b4 0x0394 eot eot C 033 'f' 'F' ack ack 0x03c6 0x03a6 ack ack C 034 'g' 'G' bel bel 0x03b3 0x0393 bel bel C 035 'h' 'H' bs bs 0x03b7 0x0397 bs bs C 036 'j' 'J' nl nl 0x03be 0x039e nl nl C 037 'k' 'K' vt vt 0x03ba 0x039a vt vt C 038 'l' 'L' ff ff 0x03bb 0x039b ff ff C 039 ';' ':' nop nop dacu ddia dcir nop O 040 ''' '"' nop nop ''' '"' nop nop O 041 '`' '~' nop nop '`' '~' nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 '\' '|' fs fs '\' '|' fs fs O 044 'z' 'Z' sub sub 0x03b6 0x0396 sub sub C 045 'x' 'X' can can 0x03c7 0x03a7 can can C 046 'c' 'C' etx etx 0x03c8 0x03a8 etx etx C 047 'v' 'V' syn syn 0x03c9 0x03a9 syn syn C 048 'b' 'B' stx stx 0x03b2 0x0392 stx stx C 049 'n' 'N' so so 0x03bd 0x039d so so C 050 'm' 'M' cr cr 0x03bc 0x039c cr cr C 051 ',' '<' nop nop ',' '<' nop nop O 052 '.' '>' nop nop '.' '>' nop nop O 053 '/' '?' nop nop '/' '?' nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' nscr nscr '*' '*' nscr nscr O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' alock ' ' susp ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N - 084 ns ns ns ns ns ns ns ns O + 084 us us us us us us us us O 085 nop nop nop nop nop nop nop nop O 086 '\' '|' nop nop '\' '|' nop nop O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr nl nl cr cr nl nl O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' N 092 nscr pscr debug nop nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock slock slock slock slock slock slock slock O 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O # 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O # 128 nop nop nop nop nop nop nop nop O 129 esc esc nop nop esc esc debug nop O 130 '1' '!' nop nop '1' '!' nop nop O 131 '2' '@' nul nul '2' '@' nul nul O 132 '3' '#' nop nop '3' '#' nop nop O 133 '4' '$' nop nop '4' '$' nop nop O 134 '5' '%' nop nop '5' '%' nop nop O 135 '6' '^' rs rs '6' '^' rs rs O 136 '7' '&' nop nop '7' '&' nop nop O 137 '8' '*' nop nop '8' '*' nop nop O 138 '9' '(' nop nop '9' '(' nop nop O 139 '0' ')' nop nop '0' ')' nop nop O - 140 '-' '_' ns ns '-' '_' ns ns O + 140 '-' '_' us us '-' '_' us us O 141 '=' '+' nop nop '=' '+' nop nop O 142 bs bs del del bs bs del del O 143 ht btab nop nop ht btab nop nop O 144 ';' ':' dc1 dc1 'q' 'Q' dc1 dc1 C 145 0x03c2 0x03a3 etb etb 'w' 'W' etb etb C 146 0x03b5 0x0395 enq enq 'e' 'E' enq enq C 147 0x03c1 0x03a1 dc2 dc2 'r' 'R' dc2 dc2 C 148 0x03c4 0x03a4 dc4 dc4 't' 'T' dc4 dc4 C 149 0x03c5 0x03a5 em em 'y' 'Y' em em C 150 0x03b8 0x0398 nak nak 'u' 'U' nak nak C 151 0x03b9 0x0399 ht ht 'i' 'I' ht ht C 152 0x03bf 0x039f si si 'o' 'O' si si C 153 0x03c0 0x03a0 dle dle 'p' 'P' dle dle C 154 '[' '{' esc esc '[' '{' esc esc O 155 ']' '}' gs gs ']' '}' gs gs O 156 cr cr nl nl cr cr nl nl O 157 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 158 0x03b1 0x0391 soh soh 'a' 'A' soh soh C 159 0x03c3 0x03a3 dc3 dc3 's' 'S' dc3 dc3 C 160 0x03b4 0x0394 eot eot 'd' 'D' eot eot C 161 0x03c6 0x03a6 ack ack 'f' 'F' ack ack C 162 0x03b3 0x0393 bel bel 'g' 'G' bel bel C 163 0x03b7 0x0397 bs bs 'h' 'H' bs bs C 164 0x03be 0x039e nl nl 'j' 'J' nl nl C 165 0x03ba 0x039a vt vt 'k' 'K' vt vt C 166 0x03bb 0x039b ff ff 'l' 'L' ff ff C 167 dacu ddia dcir nop ';' ':' nop nop O 168 ''' '"' nop nop ''' '"' nop nop O 169 '`' '~' nop nop '`' '~' nop nop O 170 lshift lshift lshift lshift lshift lshift lshift lshift O 171 '\' '|' fs fs '\' '|' fs fs O 172 0x03b6 0x0396 sub sub 'z' 'Z' sub sub C 173 0x03c7 0x03a7 can can 'x' 'X' can can C 174 0x03c8 0x03a8 etx etx 'c' 'C' etx etx C 175 0x03c9 0x03a9 syn syn 'v' 'V' syn syn C 176 0x03b2 0x0392 stx stx 'b' 'B' stx stx C 177 0x03bd 0x039d so so 'n' 'N' so so C 178 0x03bc 0x039c cr cr 'm' 'M' cr cr C 179 ',' '<' nop nop ',' '<' nop nop O 180 '.' '>' nop nop '.' '>' nop nop O 181 '/' '?' nop nop '/' '?' nop nop O 182 rshift rshift rshift rshift rshift rshift rshift rshift O 183 '*' '*' nscr nscr '*' '*' nscr nscr O 184 lalt lalt lalt lalt lalt lalt lalt lalt O 185 ' ' ' ' nul ' ' alock ' ' susp ' ' O 186 clock clock clock clock clock clock clock clock O 187 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 188 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 189 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 190 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 191 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 192 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 193 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 194 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 195 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 196 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 197 nlock nlock nlock nlock nlock nlock nlock nlock O 198 slock slock slock slock slock slock slock slock O 199 fkey49 '7' '7' '7' '7' '7' '7' '7' N 200 fkey50 '8' '8' '8' '8' '8' '8' '8' N 201 fkey51 '9' '9' '9' '9' '9' '9' '9' N 202 fkey52 '-' '-' '-' '-' '-' '-' '-' N 203 fkey53 '4' '4' '4' '4' '4' '4' '4' N 204 fkey54 '5' '5' '5' '5' '5' '5' '5' N 205 fkey55 '6' '6' '6' '6' '6' '6' '6' N 206 fkey56 '+' '+' '+' '+' '+' '+' '+' N 207 fkey57 '1' '1' '1' '1' '1' '1' '1' N 208 fkey58 '2' '2' '2' '2' '2' '2' '2' N 209 fkey59 '3' '3' '3' '3' '3' '3' '3' N 210 fkey60 '0' '0' '0' '0' '0' '0' '0' N 211 del '.' '.' '.' '.' '.' boot boot N - 212 ns ns ns ns ns ns ns ns O + 212 us us us us us us us us O 213 nop nop nop nop nop nop nop nop O 214 '\' '|' nop nop '\' '|' nop nop O 215 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 216 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 217 cr cr nl nl cr cr nl nl O 218 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 219 '/' '/' '/' '/' '/' '/' '/' '/' N 220 nscr pscr debug nop nop nop nop nop O 221 ralt ralt ralt ralt ralt ralt ralt ralt O 222 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 223 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 224 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 225 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 226 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 227 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 228 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 229 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 230 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 231 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 232 slock slock slock slock slock slock slock slock O 233 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 234 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 235 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O dacu 0x0384 ( 0x03b1 0x03ac ) ( 0x0391 0x0386 ) ( 0x03b5 0x03ad ) ( 0x0395 0x0388 ) ( 0x03b9 0x03af ) ( 0x0399 0x038a ) ( 0x03b7 0x03ae ) ( 0x0397 0x0389 ) ( 0x03c5 0x03cd ) ( 0x03a5 0x038e ) ( 0x03bf 0x03cc ) ( 0x039f 0x038c ) ( 0x03c9 0x03ce ) ( 0x03a9 0x038f ) dcir 0x0385 ( 0x03b9 0x0390 ) ( 0x0399 0x03aa ) ( 0x03c5 0x03b0 ) ( 0x03a5 0x03ab ) ddia 0xa8 ( 0x03b9 0x03ca ) ( 0x0399 0x03aa ) ( 0x03c5 0x03cb ) ( 0x03a5 0x03ab ) diff --git a/share/vt/keymaps/gr.elot.acc.kbd b/share/vt/keymaps/gr.elot.acc.kbd index 1a758a778b24..061717f1dfe2 100644 --- a/share/vt/keymaps/gr.elot.acc.kbd +++ b/share/vt/keymaps/gr.elot.acc.kbd @@ -1,255 +1,255 @@ # $FreeBSD$ # # Built on Wed 1 Apr 15:59:44 EEST 1998 by peppe@cs.uoi.gr # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ # 000 nop nop nop nop nop nop nop nop O 001 esc esc nop nop esc esc debug nop O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '"' nul nul '2' '"' nul nul O 004 '3' 0xa3 nop nop '3' 0xa3 nop nop O 005 '4' '$' nop nop '4' '$' nop nop O 006 '5' '%' nop nop '5' '%' nop nop O 007 '6' '^' rs rs '6' '^' rs rs O 008 '7' '&' nop nop '7' '&' nop nop O 009 '8' '*' nop nop '8' '*' nop nop O 010 '9' '(' nop nop '9' '(' nop nop O 011 '0' ')' nop nop '0' ')' nop nop O - 012 '-' '_' ns ns '-' '_' ns ns O + 012 '-' '_' us us '-' '_' us us O 013 '=' '+' nop nop '=' '+' nop nop O 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 0xab 0xbb dc1 dc1 C 017 'w' 'W' etb etb 0x03c2 0x03a3 etb etb C 018 'e' 'E' enq enq 0x03b5 0x0395 enq enq C 019 'r' 'R' dc2 dc2 0x03c1 0x03a1 dc2 dc2 C 020 't' 'T' dc4 dc4 0x03c4 0x03a4 dc4 dc4 C 021 'y' 'Y' em em 0x03c5 0x03a5 em em C 022 'u' 'U' nak nak 0x03b8 0x0398 nak nak C 023 'i' 'I' ht ht 0x03b9 0x0399 ht ht C 024 'o' 'O' si si 0x03bf 0x039f si si C 025 'p' 'P' dle dle 0x03c0 0x03a0 dle dle C 026 '[' '{' esc esc '[' '{' esc esc O 027 ']' '}' gs gs ']' '}' gs gs O 028 cr cr nl nl cr cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 0x03b1 0x0391 soh soh C 031 's' 'S' dc3 dc3 0x03c3 0x03a3 dc3 dc3 C 032 'd' 'D' eot eot 0x03b4 0x0394 eot eot C 033 'f' 'F' ack ack 0x03c6 0x03a6 ack ack C 034 'g' 'G' bel bel 0x03b3 0x0393 bel bel C 035 'h' 'H' bs bs 0x03b7 0x0397 bs bs C 036 'j' 'J' nl nl 0x03be 0x039e nl nl C 037 'k' 'K' vt vt 0x03ba 0x039a vt vt C 038 'l' 'L' ff ff 0x03bb 0x039b ff ff C 039 ';' ':' nop nop ';' ':' nop nop O 040 '/' '@' nop nop dacu ddia nop dcir O 041 '`' ''' nop nop '`' ''' nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 '#' '~' fs fs '#' '~' fs fs O 044 'z' 'Z' sub sub 0x03b6 0x0396 sub sub C 045 'x' 'X' can can 0x03c7 0x03a7 can can C 046 'c' 'C' etx etx 0x03c8 0x03a8 etx etx C 047 'v' 'V' syn syn 0x03c9 0x03a9 syn syn C 048 'b' 'B' stx stx 0x03b2 0x0392 stx stx C 049 'n' 'N' so so 0x03bd 0x039d so so C 050 'm' 'M' cr cr 0x03bc 0x039c cr cr C 051 ',' '<' nop nop ',' '<' nop nop O 052 '.' '>' nop nop '.' '>' nop nop O 053 '/' '?' nop nop '/' '?' nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' nscr nscr '*' '*' nscr nscr O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' alock ' ' susp ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N - 084 ns ns ns ns ns ns ns ns O + 084 us us us us us us us us O 085 nop nop nop nop nop nop nop nop O 086 '\' '|' nop nop '\' '|' nop nop O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr nl nl cr cr nl nl O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' N 092 nscr pscr debug nop nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock slock slock slock slock slock slock slock O 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O # 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O # 128 nop nop nop nop nop nop nop nop O 129 esc esc nop nop esc esc debug nop O 130 '1' '!' nop nop '1' '!' nop nop O 131 '2' '"' nul nul '2' '"' nul nul O 132 '3' 0xa3 nop nop '3' 0xa3 nop nop O 133 '4' '$' nop nop '4' '$' nop nop O 134 '5' '%' nop nop '5' '%' nop nop O 135 '6' '^' rs rs '6' '^' rs rs O 136 '7' '&' nop nop '7' '&' nop nop O 137 '8' '*' nop nop '8' '*' nop nop O 138 '9' '(' nop nop '9' '(' nop nop O 139 '0' ')' nop nop '0' ')' nop nop O - 140 '-' '_' ns ns '-' '_' ns ns O + 140 '-' '_' us us '-' '_' us us O 141 '=' '+' nop nop '=' '+' nop nop O 142 bs bs del del bs bs del del O 143 ht btab nop nop ht btab nop nop O 144 0xab 0xbb dc1 dc1 'q' 'Q' dc1 dc1 C 145 0x03c2 0x03a3 etb etb 'w' 'W' etb etb C 146 0x03b5 0x0395 enq enq 'e' 'E' enq enq C 147 0x03c1 0x03a1 dc2 dc2 'r' 'R' dc2 dc2 C 148 0x03c4 0x03a4 dc4 dc4 't' 'T' dc4 dc4 C 149 0x03c5 0x03a5 em em 'y' 'Y' em em C 150 0x03b8 0x0398 nak nak 'u' 'U' nak nak C 151 0x03b9 0x0399 ht ht 'i' 'I' ht ht C 152 0x03bf 0x039f si si 'o' 'O' si si C 153 0x03c0 0x03a0 dle dle 'p' 'P' dle dle C 154 '[' '{' esc esc '[' '{' esc esc O 155 ']' '}' gs gs ']' '}' gs gs O 156 cr cr nl nl cr cr nl nl O 157 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 158 0x03b1 0x0391 soh soh 'a' 'A' soh soh C 159 0x03c3 0x03a3 dc3 dc3 's' 'S' dc3 dc3 C 160 0x03b4 0x0394 eot eot 'd' 'D' eot eot C 161 0x03c6 0x03a6 ack ack 'f' 'F' ack ack C 162 0x03b3 0x0393 bel bel 'g' 'G' bel bel C 163 0x03b7 0x0397 bs bs 'h' 'H' bs bs C 164 0x03be 0x039e nl nl 'j' 'J' nl nl C 165 0x03ba 0x039a vt vt 'k' 'K' vt vt C 166 0x03bb 0x039b ff ff 'l' 'L' ff ff C 167 ';' ':' nop nop ';' ':' nop nop O 168 dacu ddia dcir nop '/' '@' nop nop O 169 '`' ''' nop nop '`' ''' nop nop O 170 lshift lshift lshift lshift lshift lshift lshift lshift O 171 '#' '~' fs fs '#' '~' fs fs O 172 0x03b6 0x0396 sub sub 'z' 'Z' sub sub C 173 0x03c7 0x03a7 can can 'x' 'X' can can C 174 0x03c8 0x03a8 etx etx 'c' 'C' etx etx C 175 0x03c9 0x03a9 syn syn 'v' 'V' syn syn C 176 0x03b2 0x0392 stx stx 'b' 'B' stx stx C 177 0x03bd 0x039d so so 'n' 'N' so so C 178 0x03bc 0x039c cr cr 'm' 'M' cr cr C 179 ',' '<' nop nop ',' '<' nop nop O 180 '.' '>' nop nop '.' '>' nop nop O 181 '/' '?' nop nop '/' '?' nop nop O 182 rshift rshift rshift rshift rshift rshift rshift rshift O 183 '*' '*' nscr nscr '*' '*' nscr nscr O 184 lalt lalt lalt lalt lalt lalt lalt lalt O 185 ' ' ' ' nul ' ' alock ' ' susp ' ' O 186 clock clock clock clock clock clock clock clock O 187 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 188 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 189 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 190 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 191 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 192 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 193 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 194 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 195 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 196 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 197 nlock nlock nlock nlock nlock nlock nlock nlock O 198 slock slock slock slock slock slock slock slock O 199 fkey49 '7' '7' '7' '7' '7' '7' '7' N 200 fkey50 '8' '8' '8' '8' '8' '8' '8' N 201 fkey51 '9' '9' '9' '9' '9' '9' '9' N 202 fkey52 '-' '-' '-' '-' '-' '-' '-' N 203 fkey53 '4' '4' '4' '4' '4' '4' '4' N 204 fkey54 '5' '5' '5' '5' '5' '5' '5' N 205 fkey55 '6' '6' '6' '6' '6' '6' '6' N 206 fkey56 '+' '+' '+' '+' '+' '+' '+' N 207 fkey57 '1' '1' '1' '1' '1' '1' '1' N 208 fkey58 '2' '2' '2' '2' '2' '2' '2' N 209 fkey59 '3' '3' '3' '3' '3' '3' '3' N 210 fkey60 '0' '0' '0' '0' '0' '0' '0' N 211 del '.' '.' '.' '.' '.' boot boot N - 212 ns ns ns ns ns ns ns ns O + 212 us us us us us us us us O 213 nop nop nop nop nop nop nop nop O 214 '\' '|' nop nop '\' '|' nop nop O 215 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 216 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 217 cr cr nl nl cr cr nl nl O 218 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 219 '/' '/' '/' '/' '/' '/' '/' '/' N 220 nscr pscr debug nop nop nop nop nop O 221 ralt ralt ralt ralt ralt ralt ralt ralt O 222 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 223 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 224 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 225 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 226 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 227 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 228 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 229 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 230 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 231 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 232 slock slock slock slock slock slock slock slock O 233 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 234 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 235 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O dacu 0x0384 ( 0x03b1 0x03ac ) ( 0x0391 0x0386 ) ( 0x03b5 0x03ad ) ( 0x0395 0x0388 ) ( 0x03b9 0x03af ) ( 0x0399 0x038a ) ( 0x03b7 0x03ae ) ( 0x0397 0x0389 ) ( 0x03c5 0x03cd ) ( 0x03a5 0x038e ) ( 0x03bf 0x03cc ) ( 0x039f 0x038c ) ( 0x03c9 0x03ce ) ( 0x03a9 0x038f ) dcir 0x0385 ( 0x03b9 0x0390 ) ( 0x0399 0x03aa ) ( 0x03c5 0x03b0 ) ( 0x03a5 0x03ab ) ddia 0xa8 ( 0x03b9 0x03ca ) ( 0x0399 0x03aa ) ( 0x03c5 0x03cb ) ( 0x03a5 0x03ab ) diff --git a/share/vt/keymaps/hu.101.kbd b/share/vt/keymaps/hu.101.kbd index 3101b36e0186..07be2b181272 100644 --- a/share/vt/keymaps/hu.101.kbd +++ b/share/vt/keymaps/hu.101.kbd @@ -1,293 +1,293 @@ # $FreeBSD$ # # This is for use with a US keyboard, with only the # Hungarian accented characters added to it, and some tricks: # The accented characters are available as Alt + something: (and the upper # case version is with Alt + Shift + something) # ' -> a' # ; -> e' # ` -> i' # = -> o' # 0 -> o: # [ -> o" # ] -> u' # - -> u: # \ -> u" # (i' and I' has another method: Alt + j -> i' and Alt + i -> I') # # This keymap file has a switching feature: # with the Shift+Ctrl combination, # we can change the keyboard from US kbd (with hungarian accented letters as # Alt+something) to a US kbd, with the help of which we can type the accented # characters # without the Alt key (and can type the original characters as Alt + ;). So # after loading that keymap: # ; -> ; # Alt + ; -> e' # press Shift+Ctrl, and get: # ; -> e' # Alt + ; -> ; # The keyboard switch can be seen at the CapsLock led: off = normal mode; on = # switched mode. By the way, we cannot see the CapsLock key's status ;-( # # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc esc esc esc esc debug esc O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '@' nul nul '2' '@' nul nul O 004 '3' '#' nop nop '3' '#' nop nop O 005 '4' '$' nop nop '4' '$' nop nop O 006 '5' '%' nop nop '5' '%' nop nop O 007 '6' '^' rs rs '6' '^' rs rs O 008 '7' '&' nop nop '7' '&' nop nop O 009 '8' '*' nop nop '8' '*' nop nop O 010 '9' '(' nop nop '9' '(' nop nop O 011 '0' ')' nop nop 0xf6 0xd6 nop nop O - 012 '-' '_' ns ns 0xfc 0xdc ns ns C + 012 '-' '_' us us 0xfc 0xdc us us C 013 '=' '+' nop nop 0xf3 0xd3 nop nop C 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 'q' 'Q' dc1 dc1 C 017 'w' 'W' etb etb 'w' 'W' etb etb C 018 'e' 'E' enq enq 'e' 'E' enq enq C 019 'r' 'R' dc2 dc2 'r' 'R' dc2 dc2 C 020 't' 'T' dc4 dc4 't' 'T' dc4 dc4 C 021 'y' 'Y' em em 'y' 'Y' em em C 022 'u' 'U' nak nak 'u' 'U' nak nak C 023 'i' 'I' ht ht 0xcd 'I' ht ht C 024 'o' 'O' si si 'o' 'O' si si C 025 'p' 'P' dle dle 'p' 'P' dle dle C 026 '[' '{' esc esc 0x0151 0x0150 esc esc C 027 ']' '}' gs gs 0xfa 0xda gs gs C 028 cr cr nl nl cr cr nl nl O 029 lctrl alock lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 'a' 'A' soh soh C 031 's' 'S' dc3 dc3 's' 'S' dc3 dc3 C 032 'd' 'D' eot eot 'd' 'D' eot eot C 033 'f' 'F' ack ack 'f' 'F' ack ack C 034 'g' 'G' bel bel 'g' 'G' bel bel C 035 'h' 'H' bs bs 'h' 'H' bs bs C 036 'j' 'J' nl nl 0xed 'J' nl nl C 037 'k' 'K' vt vt 'k' 'K' vt vt C 038 'l' 'L' ff ff 'l' 'L' ff ff C 039 ';' ':' nop nop 0xe9 0xc9 nop nop C 040 ''' '"' nop nop 0xe1 0xc1 nop nop C 041 '`' '~' nop nop 0xed 0xcd nop nop C 042 lshift lshift alock lshift lshift lshift lshift lshift O 043 '\' '|' fs fs 0x0171 0x0170 fs fs C 044 'z' 'Z' sub sub 'z' 'Z' sub sub C 045 'x' 'X' can can 'x' 'X' can can C 046 'c' 'C' etx etx 'c' 'C' etx etx C 047 'v' 'V' syn syn 'v' 'V' syn syn C 048 'b' 'B' stx stx 'b' 'B' stx stx C 049 'n' 'N' so so 'n' 'N' so so C 050 'm' 'M' cr cr 'm' 'M' cr cr C 051 ',' '<' nop nop ',' '<' nop nop O 052 '.' '>' nop nop '.' '>' nop nop O 053 '/' '?' nop nop '/' '?' nop nop C 054 rshift rshift alock rshift rshift rshift rshift rshift O 055 '*' '*' '*' '*' '*' '*' '*' '*' O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' ' ' ' ' susp ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N 084 nop nop nop nop nop nop nop nop O 085 nop nop nop nop nop nop nop nop O 086 0xed 0xcd nop nop nop nop nop nop C 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr cr cr cr cr cr cr O 090 rctrl alock rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' O 092 nscr pscr debug debug nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock saver slock saver susp nop susp nop O 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O # ------------------------------------------------------------------ 128 nop nop nop nop nop nop nop nop O 129 esc esc esc esc esc esc debug esc O 130 '1' '!' nop nop '1' '!' nop nop O 131 '2' '@' nul nul '2' '@' nul nul O 132 '3' '#' nop nop '3' '#' nop nop O 133 '4' '$' nop nop '4' '$' nop nop O 134 '5' '%' nop nop '5' '%' nop nop O 135 '6' '^' rs rs '6' '^' rs rs O 136 '7' '&' nop nop '7' '&' nop nop O 137 '8' '*' nop nop '8' '*' nop nop O 138 '9' '(' nop nop '9' '(' nop nop O 139 0xf6 0xd6 nop nop '0' ')' nop nop O - 140 0xfc 0xdc ns ns '-' '_' ns ns C + 140 0xfc 0xdc us us '-' '_' us us C 141 0xf3 0xd3 nop nop '=' '+' nop nop C 142 bs bs del del bs bs del del O 143 ht btab nop nop ht btab nop nop O 144 'q' 'Q' dc1 dc1 'q' 'Q' dc1 dc1 C 145 'w' 'W' etb etb 'w' 'W' etb etb C 146 'e' 'E' enq enq 'e' 'E' enq enq C 147 'r' 'R' dc2 dc2 'r' 'R' dc2 dc2 C 148 't' 'T' dc4 dc4 't' 'T' dc4 dc4 C 149 'y' 'Y' em em 'y' 'Y' em em C 150 'u' 'U' nak nak 'u' 'U' nak nak C 151 'i' 'I' ht ht 0xcd 'I' ht ht C 152 'o' 'O' si si 'o' 'O' si si C 153 'p' 'P' dle dle 'p' 'P' dle dle C 154 0x0151 0x0150 esc esc '[' '{' esc esc C 155 0xfa 0xda gs gs ']' '}' gs gs C 156 cr cr nl nl cr cr nl nl O 157 lctrl alock lctrl lctrl lctrl lctrl lctrl lctrl O 158 'a' 'A' soh soh 'a' 'A' soh soh C 159 's' 'S' dc3 dc3 's' 'S' dc3 dc3 C 160 'd' 'D' eot eot 'd' 'D' eot eot C 161 'f' 'F' ack ack 'f' 'F' ack ack C 162 'g' 'G' bel bel 'g' 'G' bel bel C 163 'h' 'H' bs bs 'h' 'H' bs bs C 164 'j' 'J' nl nl 0xed 'J' nl nl C 165 'k' 'K' vt vt 'k' 'K' vt vt C 166 'l' 'L' ff ff 'l' 'L' ff ff C 167 0xe9 0xc9 nop nop ';' ':' nop nop C 168 0xe1 0xc1 nop nop ''' '"' nop nop C 169 0xed 0xcd nop nop '`' '~' nop nop C 170 lshift lshift alock lshift lshift lshift lshift lshift O 171 0x0171 0x0170 fs fs '\' '|' fs fs C 172 'z' 'Z' sub sub 'z' 'Z' sub sub C 173 'x' 'X' can can 'x' 'X' can can C 174 'c' 'C' etx etx 'c' 'C' etx etx C 175 'v' 'V' syn syn 'v' 'V' syn syn C 176 'b' 'B' stx stx 'b' 'B' stx stx C 177 'n' 'N' so so 'n' 'N' so so C 178 'm' 'M' cr cr 'm' 'M' cr cr C 179 ',' '<' nop nop ',' '<' nop nop O 180 '.' '>' nop nop '.' '>' nop nop O 181 '/' '?' nop nop '/' '?' nop nop C 182 rshift rshift alock rshift rshift rshift rshift rshift O 183 '*' '*' '*' '*' '*' '*' '*' '*' O 184 lalt lalt lalt lalt lalt lalt lalt lalt O 185 ' ' ' ' nul ' ' ' ' ' ' susp ' ' O 186 clock clock clock clock clock clock clock clock O 187 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 188 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 189 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 190 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 191 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 192 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 193 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 194 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 195 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 196 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 197 nlock nlock nlock nlock nlock nlock nlock nlock O 198 slock slock slock slock slock slock slock slock O 199 fkey49 '7' '7' '7' '7' '7' '7' '7' N 200 fkey50 '8' '8' '8' '8' '8' '8' '8' N 201 fkey51 '9' '9' '9' '9' '9' '9' '9' N 202 fkey52 '-' '-' '-' '-' '-' '-' '-' N 203 fkey53 '4' '4' '4' '4' '4' '4' '4' N 204 fkey54 '5' '5' '5' '5' '5' '5' '5' N 205 fkey55 '6' '6' '6' '6' '6' '6' '6' N 206 fkey56 '+' '+' '+' '+' '+' '+' '+' N 207 fkey57 '1' '1' '1' '1' '1' '1' '1' N 208 fkey58 '2' '2' '2' '2' '2' '2' '2' N 209 fkey59 '3' '3' '3' '3' '3' '3' '3' N 210 fkey60 '0' '0' '0' '0' '0' '0' '0' N 211 del '.' '.' '.' '.' '.' boot boot N 212 nop nop nop nop nop nop nop nop O 213 nop nop nop nop nop nop nop nop O 214 0xed 0xcd nop nop nop nop nop nop C 215 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 216 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 217 cr cr cr cr cr cr cr cr O 218 rctrl alock rctrl rctrl rctrl rctrl rctrl rctrl O 219 '/' '/' '/' '/' '/' '/' '/' '/' O 220 nscr pscr debug debug nop nop nop nop O 221 ralt ralt ralt ralt ralt ralt ralt ralt O 222 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 223 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 224 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 225 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 226 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 227 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 228 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 229 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 230 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 231 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 232 slock saver slock saver susp nop susp nop O 233 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 234 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 235 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 236 nop nop nop nop nop nop nop nop O 237 nop nop nop nop nop nop nop nop O 238 nop nop nop nop nop nop nop nop O 239 nop nop nop nop nop nop nop nop O 240 nop nop nop nop nop nop nop nop O 241 nop nop nop nop nop nop nop nop O 242 nop nop nop nop nop nop nop nop O 243 nop nop nop nop nop nop nop nop O 244 nop nop nop nop nop nop nop nop O 245 nop nop nop nop nop nop nop nop O 246 nop nop nop nop nop nop nop nop O 247 nop nop nop nop nop nop nop nop O 248 nop nop nop nop nop nop nop nop O 249 nop nop nop nop nop nop nop nop O 250 nop nop nop nop nop nop nop nop O 251 nop nop nop nop nop nop nop nop O 252 nop nop nop nop nop nop nop nop O 253 nop nop nop nop nop nop nop nop O 254 nop nop nop nop nop nop nop nop O 255 nop nop nop nop nop nop nop nop O diff --git a/share/vt/keymaps/hu.102.kbd b/share/vt/keymaps/hu.102.kbd index 532cbd832c6a..244db9ea56ba 100644 --- a/share/vt/keymaps/hu.102.kbd +++ b/share/vt/keymaps/hu.102.kbd @@ -1,142 +1,142 @@ # This is an as-close-as-possible (closer :-) representation of the # Hungarian keyboard standard (after M$'s W*). There are so many PCs in # Hungary with that type of keyboard. # # $FreeBSD$ # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc esc esc esc esc debug esc O 002 '1' ''' nop nop '~' nop nop nop O 003 '2' '"' nop nop 0x02c7 nop nop nop O 004 '3' '+' nop nop '^' nop rs rs O 005 '4' '!' nop nop 0x02d8 nop nop nop O 006 '5' '%' nop nop 0xb0 nop nop nop O 007 '6' '/' rs rs 0x02db nop rs rs O 008 '7' '=' nop nop '`' nop nop nop O 009 '8' '(' nop nop 0x02d9 nop nop nop O 010 '9' ')' nop nop 0xb4 nop nop nop O 011 0xf6 0xd6 nop nop 0x02dd nop nop nop C 012 0xfc 0xdc nop nop 0xa8 nop nop nop C 013 0xf3 0xd3 nop nop 0xb8 nop nop nop C 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 '\' nop fs fs C 017 'w' 'W' etb etb '|' nop etb etb C 018 'e' 'E' enq enq nop nop enq enq C 019 'r' 'R' dc2 dc2 nop nop dc2 dc2 C 020 't' 'T' dc4 dc4 nop nop dc4 dc4 C 021 'z' 'Z' sub sub nop nop sub sub C 022 'u' 'U' nak nak nop nop nak nak C 023 'i' 'I' ht ht 0xcd nop ht ht C 024 'o' 'O' si si nop nop si si C 025 'p' 'P' dle dle nop nop dle dle C 026 0x0151 0x0150 esc esc 0xf7 nop esc esc C 027 0xfa 0xda gs gs 0xd7 nop gs gs C 028 cr cr nl nl cr cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh nop nop soh soh C 031 's' 'S' dc3 dc3 0x0111 nop dc3 dc3 C 032 'd' 'D' eot eot 0x0110 nop eot eot C 033 'f' 'F' ack ack '[' nop esc esc C 034 'g' 'G' bel bel ']' nop gs gs C 035 'h' 'H' bs bs nop nop bs bs C 036 'j' 'J' nl nl 0xed nop nl nl C 037 'k' 'K' vt vt 0x0142 nop vt vt C 038 'l' 'L' ff ff 0x0141 nop ff ff C 039 0xe9 0xc9 nop nop '$' nop nop nop C 040 0xe1 0xc1 nop nop 0xdf nop nop nop C 041 '0' 0x15 nop nop nop nop nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 0x0171 0x0170 fs fs 0xa4 nop fs fs C 044 'y' 'Y' em em '>' '<' em em C 045 'x' 'X' can can '#' nop can can C 046 'c' 'C' etx etx '&' nop etx etx C 047 'v' 'V' syn syn '@' nop nul nul C 048 'b' 'B' stx stx '{' nop stx stx C 049 'n' 'N' so so '}' nop so so C 050 'm' 'M' cr cr '<' nop cr cr C 051 ',' '?' nop nop ';' nop nop nop O 052 '.' ':' nop nop '>' nop nop nop O - 053 '-' '_' ns ns '*' nop nop nop O + 053 '-' '_' us us '*' nop nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' '*' '*' '*' '*' '*' '*' O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' ' ' ' ' susp ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N 084 nop nop nop nop nop nop nop nop O 085 nop nop nop nop nop nop nop nop O 086 0xed 0xcd nop nop '<' nop nop nop C 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr cr cr cr cr cr cr O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' O 092 nscr pscr debug debug nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock saver slock saver susp nop susp nop O # the left Windows key. If you would like to use them, # program these keys with: # kbdcontrol -f 62 'strings you would like to send' 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O # the right Windows key. 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O # the right Menu pointer key. 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O diff --git a/share/vt/keymaps/lt.kbd b/share/vt/keymaps/lt.kbd index 0f7ac09025ec..ac8b17b313a1 100644 --- a/share/vt/keymaps/lt.kbd +++ b/share/vt/keymaps/lt.kbd @@ -1,242 +1,242 @@ # $FreeBSD$ # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc nop nop esc esc debug nop O 002 '1' '!' nop nop 0x0105 0x0104 nop nop O 003 '2' '@' nul nul 0x010d 0x010c nul nul O 004 '3' '#' nop nop 0x0119 0x0118 nop nop O 005 '4' '$' nop nop 0x0117 0x0116 nop nop O 006 '5' '%' nop nop 0x012f 0x012e nop nop O 007 '6' '^' rs rs 0x0161 0x0160 rs rs O 008 '7' '&' nop nop 0x0173 0x0172 nop nop O 009 '8' '*' nop nop 0x016b 0x0172 nop nop O 010 '9' '(' nop nop '9' '(' nop nop O 011 '0' ')' nop nop '0' ')' nop nop O - 012 '-' '_' ns ns '-' '_' ns ns O + 012 '-' '_' us us '-' '_' us us O 013 '=' '+' nop nop 0x017e 0x017d nop nop O 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 'q' 'Q' dc1 dc1 C 017 'w' 'W' etb etb 'w' 'W' etb etb C 018 'e' 'E' enq enq 'e' 'E' enq enq C 019 'r' 'R' dc2 dc2 'r' 'R' dc2 dc2 C 020 't' 'T' dc4 dc4 't' 'T' dc4 dc4 C 021 'y' 'Y' em em 'y' 'Y' em em C 022 'u' 'U' nak nak 'u' 'U' nak nak C 023 'i' 'I' ht ht 'i' 'I' ht ht C 024 'o' 'O' si si 'o' 'O' si si C 025 'p' 'P' dle dle 'p' 'P' dle dle C 026 '[' '{' esc esc '[' '{' esc esc O 027 ']' '}' gs gs ']' '}' gs gs O 028 cr alock nl nl alock cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 'a' 'A' soh soh C 031 's' 'S' dc3 dc3 's' 'S' dc3 dc3 C 032 'd' 'D' eot eot 'd' 'D' eot eot C 033 'f' 'F' ack ack 'f' 'F' ack ack C 034 'g' 'G' bel bel 'g' 'G' bel bel C 035 'h' 'H' bs bs 'h' 'H' bs bs C 036 'j' 'J' nl nl 'j' 'J' nl nl C 037 'k' 'K' vt vt 'k' 'K' vt vt C 038 'l' 'L' ff ff 'l' 'L' ff ff C 039 ';' ':' nop nop ';' ':' nop nop O 040 ''' '"' nop nop ''' '"' nop nop O 041 '`' '~' nop nop '`' '~' nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 '\' '|' fs fs '\' '|' fs fs O 044 'z' 'Z' sub sub 'z' 'Z' sub sub C 045 'x' 'X' can can 'x' 'X' can can C 046 'c' 'C' etx etx 'c' 'C' etx etx C 047 'v' 'V' syn syn 'v' 'V' syn syn C 048 'b' 'B' stx stx 'b' 'B' stx stx C 049 'n' 'N' so so 'n' 'N' so so C 050 'm' 'M' cr cr 'm' 'M' cr cr C 051 ',' '<' nop nop ',' '<' nop nop O 052 '.' '>' nop nop '.' '>' nop nop O 053 '/' '?' nop nop '/' '?' nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' nscr nscr '*' '*' nscr nscr O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' ' ' ' ' susp ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' ',' ',' del ',' boot boot N - 084 ns ns ns ns ns ns ns ns O + 084 us us us us us us us us O 085 nop nop nop nop nop nop nop nop O 086 ralt ralt ralt ralt ralt ralt ralt ralt O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr nl nl cr cr nl nl O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' N 092 nscr pscr debug nop nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock slock slock slock slock slock slock slock O 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O 128 nop nop nop nop nop nop nop nop O 129 esc esc nop nop esc esc debug nop O 130 0x0105 0x0104 nop nop '1' '!' nop nop C 131 0x010d 0x010c nul nul '2' '@' nul nul C 132 0x0119 0x0118 nop nop '3' '#' nop nop C 133 0x0117 0x0116 nop nop '4' '$' nop nop C 134 0x012f 0x012e nop nop '5' '%' nop nop C 135 0x0161 0x0160 rs rs '6' '^' rs rs C 136 0x0173 0x0172 nop nop '7' '&' nop nop C 137 0x016b 0x016a nop nop '8' '*' nop nop C 138 '9' '(' nop nop '9' '(' nop nop O 139 '0' ')' nop nop '0' ')' nop nop O - 140 '-' '_' ns ns '-' '_' ns ns O + 140 '-' '_' us us '-' '_' us us O 141 0x017e 0x017d nop nop '=' '+' nop nop C 142 bs bs del del bs bs del del O 143 ht btab nop nop ht btab nop nop O 144 'q' 'Q' dc1 dc1 'q' 'Q' dc1 dc1 C 145 'w' 'W' etb etb 'w' 'W' etb etb C 146 'e' 'E' enq enq 'e' 'E' enq enq C 147 'r' 'R' dc2 dc2 'r' 'R' dc2 dc2 C 148 't' 'T' dc4 dc4 't' 'T' dc4 dc4 C 149 'y' 'Y' em em 'y' 'Y' em em C 150 'u' 'U' nak nak 'u' 'U' nak nak C 151 'i' 'I' ht ht 'i' 'I' ht ht C 152 'o' 'O' si si 'o' 'O' si si C 153 'p' 'P' dle dle 'p' 'P' dle dle C 154 '[' '{' esc esc '[' '{' esc esc O 155 ']' '}' gs gs ']' '}' gs gs O 156 cr alock nl nl alock cr nl nl O 157 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 158 'a' 'A' soh soh 'a' 'A' soh soh C 159 's' 'S' dc3 dc3 's' 'S' dc3 dc3 C 160 'd' 'D' eot eot 'd' 'D' eot eot C 161 'f' 'F' ack ack 'f' 'F' ack ack C 162 'g' 'G' bel bel 'g' 'G' bel bel C 163 'h' 'H' bs bs 'h' 'H' bs bs C 164 'j' 'J' nl nl 'j' 'J' nl nl C 165 'k' 'K' vt vt 'k' 'K' vt vt C 166 'l' 'L' ff ff 'l' 'L' ff ff C 167 ';' ':' nop nop ';' ':' nop nop O 168 ''' '"' nop nop ''' '"' nop nop O 169 '`' '~' nop nop '`' '~' nop nop O 170 lshift lshift lshift lshift lshift lshift lshift lshift O 171 '\' '|' fs fs '\' '|' fs fs O 172 'z' 'Z' sub sub 'z' 'Z' sub sub C 173 'x' 'X' can can 'x' 'X' can can C 174 'c' 'C' etx etx 'c' 'C' etx etx C 175 'v' 'V' syn syn 'v' 'V' syn syn C 176 'b' 'B' stx stx 'b' 'B' stx stx C 177 'n' 'N' so so 'n' 'N' so so C 178 'm' 'M' cr cr 'm' 'M' cr cr C 179 ',' '<' nop nop ',' '<' nop nop O 180 '.' '>' nop nop '.' '>' nop nop O 181 '/' '?' nop nop '/' '?' nop nop O 182 rshift rshift rshift rshift rshift rshift rshift rshift O 183 '*' '*' nscr nscr '*' '*' nscr nscr O 184 lalt lalt lalt lalt lalt lalt lalt lalt O 185 ' ' ' ' nul ' ' ' ' ' ' susp ' ' O 186 clock clock clock clock clock clock clock clock O 187 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 188 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 189 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 190 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 191 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 192 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 193 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 194 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 195 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 196 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 197 nlock nlock nlock nlock nlock nlock nlock nlock O 198 slock slock slock slock slock slock slock slock O 199 fkey49 '7' '7' '7' '7' '7' '7' '7' N 200 fkey50 '8' '8' '8' '8' '8' '8' '8' N 201 fkey51 '9' '9' '9' '9' '9' '9' '9' N 202 fkey52 '-' '-' '-' '-' '-' '-' '-' N 203 fkey53 '4' '4' '4' '4' '4' '4' '4' N 204 fkey54 '5' '5' '5' '5' '5' '5' '5' N 205 fkey55 '6' '6' '6' '6' '6' '6' '6' N 206 fkey56 '+' '+' '+' '+' '+' '+' '+' N 207 fkey57 '1' '1' '1' '1' '1' '1' '1' N 208 fkey58 '2' '2' '2' '2' '2' '2' '2' N 209 fkey59 '3' '3' '3' '3' '3' '3' '3' N 210 fkey60 '0' '0' '0' '0' '0' '0' '0' N 211 del ',' '.' '.' del '.' boot boot N - 212 ns ns ns ns ns ns ns ns O + 212 us us us us us us us us O 213 nop nop nop nop nop nop nop nop O 214 ralt ralt ralt ralt ralt ralt ralt ralt O 215 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 216 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 217 cr cr nl nl cr cr nl nl O 218 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 219 '/' '/' '/' '/' '/' '/' '/' '/' N 220 nscr pscr debug nop nop nop nop nop O 221 ralt ralt ralt ralt ralt ralt ralt ralt O 222 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 223 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 224 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 225 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 226 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 227 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 228 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 229 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 230 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 231 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 232 slock slock slock slock slock slock slock slock O 233 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 234 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 235 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O diff --git a/share/vt/keymaps/pt.acc.kbd b/share/vt/keymaps/pt.acc.kbd index 02ee9b5bda6d..8b6ee3bc12a0 100644 --- a/share/vt/keymaps/pt.acc.kbd +++ b/share/vt/keymaps/pt.acc.kbd @@ -1,139 +1,139 @@ #pt.iso.acc.kbd #by: pm@dee.uc.pt # $FreeBSD$ # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc esc esc esc esc debug esc O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '"' nul nul '@' '@' nul nul O 004 '3' '#' nop nop '3' '#' nop nop O 005 '4' '$' nop nop '4' '$' nop nop O 006 '5' '%' nop nop '5' '%' nop nop O 007 '6' '&' rs rs '6' '^' rs rs O 008 '7' '/' nop nop '{' '&' nop nop O 009 '8' '(' nop nop '[' '*' nop nop O 010 '9' ')' nop nop ']' '(' nop nop O 011 '0' '=' nop nop '}' ')' nop nop O - 012 ''' '?' ns ns '-' '_' ns ns O + 012 ''' '?' us us '-' '_' us us O 013 '=' '+' nop nop '=' '+' nop nop O 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 'q' 'Q' dc1 dc1 C 017 'w' 'W' etb etb 'w' 'W' etb etb C 018 'e' 'E' enq enq 0x20ac 'E' enq enq C 019 'r' 'R' dc2 dc2 'r' 'R' dc2 dc2 C 020 't' 'T' dc4 dc4 't' 'T' dc4 dc4 C 021 'y' 'Y' em em 'y' 'Y' em em C 022 'u' 'U' nak nak 'u' 'U' nak nak C 023 'i' 'I' ht ht 'i' 'I' ht ht C 024 'o' 'O' si si 'o' 'O' si si C 025 'p' 'P' dle dle 'p' 'P' dle dle C 026 '+' '*' esc esc duml '{' esc esc O 027 dacu dgra gs gs ']' '}' gs gs O 028 cr cr nl nl cr cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 'a' 'A' soh soh C 031 's' 'S' dc3 dc3 's' 'S' dc3 dc3 C 032 'd' 'D' eot eot 'd' 'D' eot eot C 033 'f' 'F' ack ack 'f' 'F' ack ack C 034 'g' 'G' bel bel 'g' 'G' bel bel C 035 'h' 'H' bs bs 'h' 'H' bs bs C 036 'j' 'J' nl nl 'j' 'J' nl nl C 037 'k' 'K' vt vt 'k' 'K' vt vt C 038 'l' 'L' ff ff 'l' 'L' ff ff C 039 0xe7 0xc7 nop nop ';' ':' nop nop O 040 nop nop nop nop ''' '"' nop nop O 041 '\' '|' nop nop '`' '~' nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 dtil dcir fs fs '\' '|' fs fs O 044 'z' 'Z' sub sub 'z' 'Z' sub sub C 045 'x' 'X' can can 'x' 'X' can can C 046 'c' 'C' etx etx 'c' 'C' etx etx C 047 'v' 'V' syn syn 'v' 'V' syn syn C 048 'b' 'B' stx stx 'b' 'B' stx stx C 049 'n' 'N' so so 'n' 'N' so so C 050 'm' 'M' cr cr 'm' 'M' cr cr C 051 ',' ';' nop nop nop nop nop nop C 052 '.' ':' nop nop '.' '>' nop nop O 053 '-' '_' nop nop '/' '?' nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' '*' '*' '*' '*' '*' '*' O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' ' ' ' ' 0x82 ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N 084 nop nop nop nop nop nop nop nop O 085 nop nop nop nop nop nop nop nop O 086 '<' '>' nop nop nop nop nop nop O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr cr cr cr cr cr cr O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' O 092 nscr pscr debug debug nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock saver slock saver susp nop susp nop O 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O dgra '`' ( 'a' 0xe0 ) ( 'A' 0xc0 ) ( 'e' 0xe8 ) ( 'E' 0xc8 ) ( 'i' 0xec ) ( 'I' 0xcc ) ( 'o' 0xf2 ) ( 'O' 0xd2 ) ( 'u' 0xf9 ) ( 'U' 0xd9 ) dacu 0xb4 ( 'a' 0xe1 ) ( 'A' 0xc1 ) ( 'e' 0xe9 ) ( 'E' 0xc9 ) ( 'i' 0xed ) ( 'I' 0xcd ) ( 'o' 0xf3 ) ( 'O' 0xd3 ) ( 'u' 0xfa ) ( 'U' 0xda ) ( 'y' 0xfd ) ( 'Y' 0xdd ) dcir '^' ( 'a' 0xe2 ) ( 'A' 0xc2 ) ( 'e' 0xea ) ( 'E' 0xca ) ( 'i' 0xee ) ( 'I' 0xce ) ( 'o' 0xf4 ) ( 'O' 0xd4 ) ( 'u' 0xfb ) ( 'U' 0xdb ) dtil '~' ( 'a' 0xe3 ) ( 'A' 0xc3 ) ( 'n' 0xf1 ) ( 'N' 0xd1 ) ( 'o' 0xf5 ) ( 'O' 0xd5 ) duml 0xa8 ( 'a' 0xe4 ) ( 'A' 0xc4 ) ( 'e' 0xeb ) ( 'E' 0xcb ) ( 'i' 0xef ) ( 'I' 0xcf ) ( 'o' 0xf6 ) ( 'O' 0xd6 ) ( 'u' 0xfc ) ( 'U' 0xdc ) ( 'y' 0xff ) drin 0xb0 ( 'a' 0xe5 ) ( 'A' 0xc5 ) dced 0xb8 ( 'c' 0xe7 ) ( 'C' 0xc7 ) diff --git a/share/vt/keymaps/pt.kbd b/share/vt/keymaps/pt.kbd index 46f781ec45f6..addd275e4e76 100644 --- a/share/vt/keymaps/pt.kbd +++ b/share/vt/keymaps/pt.kbd @@ -1,116 +1,116 @@ # $FreeBSD$ # alt #by: pm@dee.uc.pt # # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc esc esc esc esc debug esc O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '"' nul nul '@' '@' nul nul O 004 '3' '#' nop nop '3' '#' nop nop O 005 '4' '$' nop nop '4' '$' nop nop O 006 '5' '%' nop nop '5' '%' nop nop O 007 '6' '&' rs rs '6' '^' rs rs O 008 '7' '/' nop nop '{' '&' nop nop O 009 '8' '(' nop nop '[' '*' nop nop O 010 '9' ')' nop nop ']' '(' nop nop O 011 '0' '=' nop nop '}' ')' nop nop O - 012 ''' '?' ns ns '-' '_' ns ns O + 012 ''' '?' us us '-' '_' us us O 013 '=' '+' nop nop '=' '+' nop nop O 014 bs bs del del bs bs del del O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 'q' 'Q' dc1 dc1 C 017 'w' 'W' etb etb 'w' 'W' etb etb C 018 'e' 'E' enq enq 0x20ac 'E' enq enq C 019 'r' 'R' dc2 dc2 'r' 'R' dc2 dc2 C 020 't' 'T' dc4 dc4 't' 'T' dc4 dc4 C 021 'y' 'Y' em em 'y' 'Y' em em C 022 'u' 'U' nak nak 'u' 'U' nak nak C 023 'i' 'I' ht ht 'i' 'I' ht ht C 024 'o' 'O' si si 'o' 'O' si si C 025 'p' 'P' dle dle 'p' 'P' dle dle C 026 '+' '*' esc esc 'h' '{' esc esc O 027 ''' '`' gs gs ']' '}' gs gs O 028 cr cr nl nl cr cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 'a' 'A' soh soh C 031 's' 'S' dc3 dc3 's' 'S' dc3 dc3 C 032 'd' 'D' eot eot 'd' 'D' eot eot C 033 'f' 'F' ack ack 'f' 'F' ack ack C 034 'g' 'G' bel bel 'g' 'G' bel bel C 035 'h' 'H' bs bs 'h' 'H' bs bs C 036 'j' 'J' nl nl 'j' 'J' nl nl C 037 'k' 'K' vt vt 'k' 'K' vt vt C 038 'l' 'L' ff ff 'l' 'L' ff ff C 039 0xe7 0xc7 nop nop ';' ':' nop nop O 040 nop nop nop nop ''' '"' nop nop O 041 '\' '|' nop nop '`' '~' nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 '~' '^' fs fs '\' '|' fs fs O 044 'z' 'Z' sub sub 'z' 'Z' sub sub C 045 'x' 'X' can can 'x' 'X' can can C 046 'c' 'C' etx etx 'c' 'C' etx etx C 047 'v' 'V' syn syn 'v' 'V' syn syn C 048 'b' 'B' stx stx 'b' 'B' stx stx C 049 'n' 'N' so so 'n' 'N' so so C 050 'm' 'M' cr cr 'm' 'M' cr cr C 051 ',' ';' nop nop nop nop nop nop C 052 '.' ':' nop nop '.' '>' nop nop O 053 '-' '_' nop nop '/' '?' nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' '*' '*' '*' '*' '*' '*' O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' ' ' ' ' 0x82 ' ' O 058 clock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N 084 nop nop nop nop nop nop nop nop O 085 nop nop nop nop nop nop nop nop O 086 '<' '>' nop nop nop nop nop nop O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr cr cr cr cr cr cr O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' O 092 nscr pscr debug debug nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey60 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 fkey61 fkey61 fkey61 fkey61 fkey61 fkey61 boot fkey61 O 104 slock saver slock saver susp nop susp nop O 105 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O diff --git a/share/vt/keymaps/ua.kbd b/share/vt/keymaps/ua.kbd index d031f1d04346..3c20b8e24ef2 100644 --- a/share/vt/keymaps/ua.kbd +++ b/share/vt/keymaps/ua.kbd @@ -1,242 +1,242 @@ # $FreeBSD$ # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc esc esc esc esc debug nop O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '@' nul nul '2' '"' nul nul O 004 '3' '#' nop nop '3' '/' nop nop O 005 '4' '$' nop nop '4' '$' nop nop O 006 '5' '%' nop nop '5' ':' nop nop O 007 '6' '^' rs rs '6' ',' rs rs O 008 '7' '&' nop nop '7' '.' nop nop O 009 '8' '*' nop nop '8' ';' nop nop O 010 '9' '(' nop nop '9' '?' nop nop O 011 '0' ')' nop nop '0' '%' nop nop O - 012 '-' '_' ns ns '-' '_' ns ns O + 012 '-' '_' us us '-' '_' us us O 013 '=' '+' nop nop '=' '+' nop nop O 014 bs bs del del bs bs del del O 015 ht btab nop nop btab btab nop nop O 016 'q' 'Q' dc1 dc1 0x0439 0x0419 dc1 dc1 C 017 'w' 'W' etb etb 0x0446 0x0426 etb etb C 018 'e' 'E' enq enq 0x0443 0x0423 enq enq C 019 'r' 'R' dc2 dc2 0x043a 0x041a dc2 dc2 C 020 't' 'T' dc4 dc4 0x0435 0x0415 dc4 dc4 C 021 'y' 'Y' em em 0x043d 0x041d em em C 022 'u' 'U' nak nak 0x0433 0x0413 nak nak C 023 'i' 'I' ht ht 0x0448 0x0428 ht ht C 024 'o' 'O' si si 0x0449 0x0429 si si C 025 'p' 'P' dle dle 0x0437 0x0417 dle dle C 026 '[' '{' esc esc 0x0445 0x0425 esc esc O 027 ']' '}' gs gs 0x0457 0x0407 0x044a 0x042a O 028 cr cr nl nl cr cr nl nl O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 0x0444 0x0424 soh soh C 031 's' 'S' dc3 dc3 0x0456 0x0406 0x044b 0x042b C 032 'd' 'D' eot eot 0x0432 0x0412 eot eot C 033 'f' 'F' ack ack 0x0430 0x0410 ack ack C 034 'g' 'G' bel bel 0x043f 0x041f bel bel C 035 'h' 'H' bs bs 0x0440 0x0420 bs bs C 036 'j' 'J' nl nl 0x043e 0x041e nl nl C 037 'k' 'K' vt vt 0x043b 0x041b vt vt C 038 'l' 'L' ff ff 0x0434 0x0414 ff ff C 039 ';' ':' nop nop 0x0436 0x0416 nop nop O 040 ''' '"' nop nop 0x0454 0x0404 0x044d 0x042d O 041 '`' '~' nop nop 0x0491 0x0490 0x0451 0x0401 O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 '\' '|' fs fs '\' '|' fs fs O 044 'z' 'Z' sub sub 0x044f 0x042f sub sub C 045 'x' 'X' can can 0x0447 0x0427 can can C 046 'c' 'C' etx etx 0x0441 0x0421 etx etx C 047 'v' 'V' syn syn 0x043c 0x041c syn syn C 048 'b' 'B' stx stx 0x0438 0x0418 stx stx C 049 'n' 'N' so so 0x0442 0x0422 so so C 050 'm' 'M' cr cr 0x044c 0x042c cr cr C 051 ',' '<' nop nop 0x0431 0x0411 nop nop O 052 '.' '>' nop nop 0x044e 0x042e nop nop O 053 '/' '?' nop nop '/' '?' nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' nl nl '*' '*' nl nl O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' nul ' ' ' ' ' ' susp ' ' O 058 alock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 fkey90 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 fkey91 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 fkey92 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' '7' '7' '7' '7' N 072 fkey50 '8' '8' '8' '8' '8' '8' '8' N 073 fkey51 '9' '9' '9' '9' '9' '9' '9' N 074 fkey52 '-' '-' '-' '-' '-' '-' '-' N 075 fkey53 '4' '4' '4' '4' '4' '4' '4' N 076 fkey54 '5' '5' '5' '5' '5' '5' '5' N 077 fkey55 '6' '6' '6' '6' '6' '6' '6' N 078 fkey56 '+' '+' '+' '+' '+' '+' '+' N 079 fkey57 '1' '1' '1' '1' '1' '1' '1' N 080 fkey58 '2' '2' '2' '2' '2' '2' '2' N 081 fkey59 '3' '3' '3' '3' '3' '3' '3' N 082 fkey60 '0' '0' '0' '0' '0' '0' '0' N 083 del '.' '.' '.' '.' '.' boot boot N 084 alock alock alock alock alock alock alock alock O 085 nop nop nop nop nop nop nop nop O 086 nop nop nop nop nop nop nop nop O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr nl nl cr cr nl nl O 090 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' '/' '/' '/' '/' '/' '/' O 092 nscr pscr debug nop nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey69 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey70 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey71 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey73 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey75 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey77 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey78 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey79 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey80 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 del del fkey81 del fkey61 fkey61 boot boot O 104 slock slock slock slock slock slock slock slock O 105 meta fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 meta fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 meta fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O # 128 nop nop nop nop nop nop nop nop O 129 esc esc esc esc esc esc debug esc O 130 '1' '!' nop nop '1' '!' nop nop O 131 '2' '"' nul nul '2' '@' nul nul O 132 '3' '/' nop nop '3' '#' nop nop O 133 '4' '$' nop nop '4' '$' nop nop O 134 '5' ':' nop nop '5' '%' nop nop O 135 '6' ',' rs rs '6' '^' rs rs O 136 '7' '.' nop nop '7' '&' nop nop O 137 '8' ';' nop nop '8' '*' nop nop O 138 '9' '?' nop nop '9' '(' nop nop O 139 '0' '%' nop nop '0' ')' nop nop O - 140 '-' '_' ns ns '-' '_' ns ns O + 140 '-' '_' us us '-' '_' us us O 141 '=' '+' nop nop '=' '+' nop nop O 142 bs bs del del bs bs del del O 143 ht btab nop nop btab btab nop nop O 144 0x0439 0x0419 dc1 dc1 'q' 'Q' dc1 dc1 C 145 0x0446 0x0426 etb etb 'w' 'W' etb etb C 146 0x0443 0x0423 enq enq 'e' 'E' enq enq C 147 0x043a 0x041a dc2 dc2 'r' 'R' dc2 dc2 C 148 0x0435 0x0415 dc4 dc4 't' 'T' dc4 dc4 C 149 0x043d 0x041d em em 'y' 'Y' em em C 150 0x0433 0x0413 nak nak 'u' 'U' nak nak C 151 0x0448 0x0428 ht ht 'i' 'I' ht ht C 152 0x0449 0x0429 si si 'o' 'O' si si C 153 0x0437 0x0417 dle dle 'p' 'P' dle dle C 154 0x0445 0x0425 esc esc '[' '{' esc esc C 155 0x0457 0x0407 gs gs ']' '}' 0x044a 0x042a C 156 cr cr nl nl cr cr nl nl O 157 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 158 0x0444 0x0424 soh soh 'a' 'A' soh soh C 159 0x0456 0x0406 dc3 dc3 's' 'S' 0x044b 0x042b C 160 0x0432 0x0412 eot eot 'd' 'D' eot eot C 161 0x0430 0x0410 ack ack 'f' 'F' ack ack C 162 0x043f 0x041f bel bel 'g' 'G' bel bel C 163 0x0440 0x0420 bs bs 'h' 'H' bs bs C 164 0x043e 0x041e nl nl 'j' 'J' nl nl C 165 0x043b 0x041b vt vt 'k' 'K' vt vt C 166 0x0434 0x0414 ff ff 'l' 'L' ff ff C 167 0x0436 0x0416 nop nop ';' ':' nop nop C 168 0x0454 0x0404 nop nop ''' '"' 0x044d 0x042d C 169 0x0491 0x0490 nop nop '`' '~' 0x0451 0x0401 C 170 lshift lshift lshift lshift lshift lshift lshift lshift O 171 '\' '|' fs fs '\' '|' fs fs O 172 0x044f 0x042f sub sub 'z' 'Z' sub sub C 173 0x0447 0x0427 can can 'x' 'X' can can C 174 0x0441 0x0421 etx etx 'c' 'C' etx etx C 175 0x043c 0x041c syn syn 'v' 'V' syn syn C 176 0x0438 0x0418 stx stx 'b' 'B' stx stx C 177 0x0442 0x0422 so so 'n' 'N' so so C 178 0x044c 0x042c cr cr 'm' 'M' cr cr C 179 0x0431 0x0411 nop nop ',' '<' nop nop C 180 0x044e 0x042e nop nop '.' '>' nop nop C 181 '/' '?' nop nop '/' '?' nop nop O 182 rshift rshift rshift rshift rshift rshift rshift rshift O 183 '*' '*' nl nl '*' '*' nl nl O 184 lalt lalt lalt lalt lalt lalt lalt lalt O 185 ' ' ' ' nul ' ' ' ' ' ' ' ' susp O 186 alock clock clock clock clock clock clock clock O 187 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 188 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 189 fkey03 fkey15 fkey27 fkey39 scr03 scr13 fkey90 scr13 O 190 fkey04 fkey16 fkey28 fkey40 scr04 scr14 fkey91 scr14 O 191 fkey05 fkey17 fkey29 fkey41 scr05 scr15 fkey92 scr15 O 192 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 193 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 194 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 195 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 196 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 197 nlock nlock nlock nlock nlock nlock nlock nlock O 198 slock slock slock slock slock slock slock slock O 199 fkey49 '7' '7' '7' '7' '7' '7' '7' N 200 fkey50 '8' '8' '8' '8' '8' '8' '8' N 201 fkey51 '9' '9' '9' '9' '9' '9' '9' N 202 fkey52 '-' '-' '-' '-' '-' '-' '-' N 203 fkey53 '4' '4' '4' '4' '4' '4' '4' N 204 fkey54 '5' '5' '5' '5' '5' '5' '5' N 205 fkey55 '6' '6' '6' '6' '6' '6' '6' N 206 fkey56 '+' '+' '+' '+' '+' '+' '+' N 207 fkey57 '1' '1' '1' '1' '1' '1' '1' N 208 fkey58 '2' '2' '2' '2' '2' '2' '2' N 209 fkey59 '3' '3' '3' '3' '3' '3' '3' N 210 fkey60 '0' '0' '0' '0' '0' '0' '0' N 211 del '.' '.' '.' '.' '.' boot boot N 212 alock alock alock alock alock alock alock alock O 213 nop nop nop nop nop nop nop nop O 214 nop nop nop nop nop nop nop nop O 215 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 216 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 217 cr cr nl nl cr cr nl nl O 218 rctrl rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 219 '/' '/' '/' '/' '/' '/' '/' '/' O 220 nscr pscr debug nop nop nop nop nop O 221 ralt ralt ralt ralt ralt ralt ralt ralt O 222 fkey49 fkey49 fkey69 fkey49 fkey49 fkey49 fkey49 fkey49 O 223 fkey50 fkey50 fkey70 fkey50 fkey50 fkey50 fkey50 fkey50 O 224 fkey51 fkey51 fkey71 fkey51 fkey51 fkey51 fkey51 fkey51 O 225 fkey53 fkey53 fkey73 fkey53 fkey53 fkey53 fkey53 fkey53 O 226 fkey55 fkey55 fkey75 fkey55 fkey55 fkey55 fkey55 fkey55 O 227 fkey57 fkey57 fkey77 fkey57 fkey57 fkey57 fkey57 fkey57 O 228 fkey58 fkey58 fkey78 fkey58 fkey58 fkey58 fkey58 fkey58 O 229 fkey59 fkey59 fkey79 fkey59 fkey59 fkey59 fkey59 fkey59 O 230 fkey60 paste fkey80 fkey60 fkey60 fkey60 fkey60 fkey60 O 231 del del fkey81 del fkey61 fkey61 boot boot O 232 slock slock slock slock slock slock slock slock O 233 meta fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 234 meta fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 235 meta fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O diff --git a/share/vt/keymaps/ua.shift.alt.kbd b/share/vt/keymaps/ua.shift.alt.kbd index cef2c2797a4f..3159e9acb8ab 100644 --- a/share/vt/keymaps/ua.shift.alt.kbd +++ b/share/vt/keymaps/ua.shift.alt.kbd @@ -1,254 +1,254 @@ # $FreeBSD$ # # keyboard mapping for both Ukrainian and Russian languages # by sia@lot.cs.kiev.ua # based on ru.koi8-r.kbd by ache@FreeBSD.org # some additions by petr@i.am and kunia@i.am # # it has 'i', 'ye','yi',"g'" at positions of '-','=','\','`'; # ->`1234567890-=\<- # ->Ò‘1234567890ієї<- # Lwin,Rwin=Meta, Menu=Screensaver # PrScr=NextScreen, Shift+PrScr=PrevScreen # Shift+Insert=paste # alt # scan cntrl alt alt cntrl lock # code base shift cntrl shift alt shift cntrl shift state # ------------------------------------------------------------------ 000 nop nop nop nop nop nop nop nop O 001 esc esc nop nop esc esc debug nop O 002 '1' '!' nop nop '1' '!' nop nop O 003 '2' '@' nul nul '2' '"' 0x2500 0x2500 O 004 '3' '#' nop nop '3' ''' nop nop O 005 '4' '$' nop nop '4' ';' nop nop O 006 '5' '%' nop nop '5' ':' nop nop O 007 '6' '^' rs rs '6' ',' 0xb7 0xb7 O 008 '7' '&' nop nop '7' '.' nop nop O 009 '8' '*' nop nop '8' '*' nop nop O 010 '9' '(' nop nop '9' '(' nop nop O 011 '0' ')' nop nop '0' ')' nop nop O - 012 '-' '_' ns ns 0x0456 0x0406 0xf7 0xf7 O + 012 '-' '_' us us 0x0456 0x0406 0xf7 0xf7 O 013 '=' '+' nop nop 0x0454 0x0404 nop nop O 014 bs bs del del bs bs 0x042a 0x042a O 015 ht btab nop nop ht btab nop nop O 016 'q' 'Q' dc1 dc1 0x0439 0x0419 0x2592 0x2592 C 017 'w' 'W' etb etb 0x0446 0x0426 0x2248 0x2248 C 018 'e' 'E' enq enq 0x0443 0x0423 0x2518 0x2518 C 019 'r' 'R' dc2 dc2 0x043a 0x041a 0x2593 0x2593 C 020 't' 'T' dc4 dc4 0x0435 0x0415 0x25a0 0x25a0 C 021 'y' 'Y' em em 0x043d 0x041d 0x2265 0x2265 C 022 'u' 'U' nak nak 0x0433 0x0413 0x2219 0x2219 C 023 'i' 'I' ht ht 0x0448 0x0428 0x2534 0x2534 C 024 'o' 'O' si si 0x0449 0x0429 0x2590 0x2590 C 025 'p' 'P' dle dle 0x0437 0x0417 0x2591 0x2591 C 026 '[' '{' esc esc 0x0445 0x0425 0x2321 0x2321 O 027 ']' '}' gs gs 0x044a 0x042a 0xb2 0xb2 O 028 cr cr nl nl cr cr 0x253c 0x253c O 029 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 030 'a' 'A' soh soh 0x0444 0x0424 0x2502 0x2502 C 031 's' 'S' dc3 dc3 0x044b 0x042b 0x2320 0x2320 C 032 'd' 'D' eot eot 0x0432 0x0412 0x2514 0x2514 C 033 'f' 'F' ack ack 0x0430 0x0410 0x251c 0x251c C 034 'g' 'G' bel bel 0x043f 0x041f 0x2524 0x2524 C 035 'h' 'H' bs bs 0x0440 0x0420 0x252c 0x252c C 036 'j' 'J' nl nl 0x043e 0x041e 0x253c 0x253c C 037 'k' 'K' vt vt 0x043b 0x041b 0x2580 0x2580 C 038 'l' 'L' ff ff 0x0434 0x0414 0x2584 0x2584 C 039 ';' ':' nop nop 0x0436 0x0416 nop nop O 040 ''' '"' nop nop 0x044d 0x042d nop nop O 041 '`' '~' nop nop 0x0491 0x0490 nop nop O 042 lshift lshift lshift lshift lshift lshift lshift lshift O 043 '\' '|' fs fs 0x0457 0x0407 0xb0 0xb0 O 044 'z' 'Z' sub sub 0x044f 0x042f 0xa0 0xa0 C 045 'x' 'X' can can 0x0447 0x0427 0x2264 0x2264 C 046 'c' 'C' etx etx 0x0441 0x0421 0x2510 0x2510 C 047 'v' 'V' syn syn 0x043c 0x041c 0x221a 0x221a C 048 'b' 'B' stx stx 0x0438 0x0418 0x250c 0x250c C 049 'n' 'N' so so 0x0442 0x0422 0x258c 0x258c C 050 'm' 'M' cr cr 0x044c 0x042c 0x2588 0x2588 C 051 ',' '<' nop nop 0x0431 0x0411 nop nop O 052 '.' '>' nop nop 0x044e 0x042e nop nop O 053 '/' '?' nop nop 0x0451 0x0401 nop nop O 054 rshift rshift rshift rshift rshift rshift rshift rshift O 055 '*' '*' nl nl '*' '*' 0x253c 0x253c O 056 lalt lalt lalt lalt lalt lalt lalt lalt O 057 ' ' ' ' ' ' ' ' ' ' ' ' 0x2550 0x2550 O 058 alock clock clock clock clock clock clock clock O 059 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 060 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 061 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 062 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 063 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 064 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 065 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 066 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 067 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 068 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 069 nlock nlock nlock nlock nlock nlock nlock nlock O 070 slock slock slock slock slock slock slock slock O 071 fkey49 '7' '7' '7' 0x0407 0x0407 0x0407 0x0407 N 072 fkey50 '8' '8' '8' 0x2566 0x2566 0x2566 0x2566 N 073 fkey51 '9' '9' '9' 0x2567 0x2567 0x2567 0x2567 N 074 fkey52 '-' '-' '-' 0x0491 0x0491 0x0491 0x0491 N 075 fkey53 '4' '4' '4' 0x0404 0x0404 0x0404 0x0404 N 076 fkey48 '5' '5' '5' 0x2563 0x2563 0x2563 0x2563 N 077 fkey55 '6' '6' '6' 0x0406 0x0406 0x0406 0x0406 N 078 fkey56 '+' '+' '+' 0x255a 0x255a 0x255a 0x255a N 079 fkey57 '1' '1' '1' 0x2560 0x2560 0x2560 0x2560 N 080 fkey58 '2' '2' '2' 0x2561 0x2561 0x2561 0x2561 N 081 fkey59 '3' '3' '3' 0x0401 0x0401 0x0401 0x0401 N 082 fkey60 '0' '0' '0' 0x255f 0x255f 0x255f 0x255f N 083 fkey54 '.' del del 0x042a 0x042a boot halt N 084 alock alock alock alock alock alock alock alock O 085 nop nop nop nop nop nop nop nop O 086 nop nop nop nop nop nop nop nop O 087 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 088 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 089 cr cr nl nl 0x2588 0x2588 0x253c 0x253c O 090 alock rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 091 '/' '/' nop nop 0x255e 0x255e nop nop O 092 nscr pscr debug nop nop nop nop nop O 093 ralt ralt ralt ralt ralt ralt ralt ralt O 094 fkey49 fkey49 fkey69 fkey49 fkey49 fkey49 fkey49 fkey49 O 095 fkey50 fkey50 fkey70 fkey50 fkey50 fkey50 fkey50 fkey50 O 096 fkey51 fkey51 fkey71 fkey51 fkey51 fkey51 fkey51 fkey51 O 097 fkey53 fkey53 fkey73 fkey53 fkey53 fkey53 fkey53 fkey53 O 098 fkey55 fkey55 fkey75 fkey55 fkey55 fkey55 fkey55 fkey55 O 099 fkey57 fkey57 fkey77 fkey57 fkey57 fkey57 fkey57 fkey57 O 100 fkey58 fkey58 fkey78 fkey58 fkey58 fkey58 fkey58 fkey58 O 101 fkey59 fkey59 fkey79 fkey59 fkey59 fkey59 fkey59 fkey59 O 102 fkey60 paste fkey80 fkey60 fkey60 fkey60 fkey60 fkey60 O 103 del del fkey81 del fkey61 fkey61 boot pdwn O 104 slock saver slock saver susp nop susp nop O 105 meta fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 106 meta fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 107 saver fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 108 nop nop nop nop nop nop nop nop O 109 nop nop nop nop nop nop nop nop O 110 nop nop nop nop nop nop nop nop O 111 nop nop nop nop nop nop nop nop O 112 nop nop nop nop nop nop nop nop O 113 nop nop nop nop nop nop nop nop O 114 nop nop nop nop nop nop nop nop O 115 nop nop nop nop nop nop nop nop O 116 nop nop nop nop nop nop nop nop O 117 nop nop nop nop nop nop nop nop O 118 nop nop nop nop nop nop nop nop O 119 nop nop nop nop nop nop nop nop O 120 nop nop nop nop nop nop nop nop O 121 nop nop nop nop nop nop nop nop O 122 nop nop nop nop nop nop nop nop O 123 nop nop nop nop nop nop nop nop O 124 nop nop nop nop nop nop nop nop O 125 nop nop nop nop nop nop nop nop O 126 nop nop nop nop nop nop nop nop O 127 nop nop nop nop nop nop nop nop O 128 nop nop nop nop nop nop nop nop O 129 esc esc nop nop esc esc debug nop O 130 '1' '!' nop nop '1' '!' nop nop O 131 '2' '"' nul nul '2' '@' 0x2500 0x2500 O 132 '3' ''' nop nop '3' '#' nop nop O 133 '4' ';' nop nop '4' '$' nop nop O 134 '5' ':' nop nop '5' '%' nop nop O 135 '6' ',' rs rs '6' '^' 0xb7 0xb7 O 136 '7' '.' nop nop '7' '&' nop nop O 137 '8' '*' nop nop '8' '*' nop nop O 138 '9' '(' nop nop '9' '(' nop nop O 139 '0' ')' nop nop '0' ')' nop nop O - 140 0x0456 0x0406 ns ns '-' '_' 0xf7 0xf7 O + 140 0x0456 0x0406 us us '-' '_' 0xf7 0xf7 O 141 0x0454 0x0404 nop nop '=' '+' nop nop O 142 bs bs del del bs bs 0x042a 0x042a O 143 ht btab nop nop ht btab nop nop O 144 0x0439 0x0419 dc1 dc1 'q' 'Q' 0x2592 0x2592 C 145 0x0446 0x0426 etb etb 'w' 'W' 0x2248 0x2248 C 146 0x0443 0x0423 enq enq 'e' 'E' 0x2518 0x2518 C 147 0x043a 0x041a dc2 dc2 'r' 'R' 0x2593 0x2593 C 148 0x0435 0x0415 dc4 dc4 't' 'T' 0x25a0 0x25a0 C 149 0x043d 0x041d em em 'y' 'Y' 0x2265 0x2265 C 150 0x0433 0x0413 nak nak 'u' 'U' 0x2219 0x2219 C 151 0x0448 0x0428 ht ht 'i' 'I' 0x2534 0x2534 C 152 0x0449 0x0429 si si 'o' 'O' 0x2590 0x2590 C 153 0x0437 0x0417 dle dle 'p' 'P' 0x2591 0x2591 C 154 0x0445 0x0425 esc esc '[' '{' 0x2321 0x2321 C 155 0x044a 0x042a gs gs ']' '}' 0xb2 0xb2 C 156 cr cr nl nl cr cr 0x253c 0x253c O 157 lctrl lctrl lctrl lctrl lctrl lctrl lctrl lctrl O 158 0x0444 0x0424 soh soh 'a' 'A' 0x2502 0x2502 C 159 0x044b 0x042b dc3 dc3 's' 'S' 0x2320 0x2320 C 160 0x0432 0x0412 eot eot 'd' 'D' 0x2514 0x2514 C 161 0x0430 0x0410 ack ack 'f' 'F' 0x251c 0x251c C 162 0x043f 0x041f bel bel 'g' 'G' 0x2524 0x2524 C 163 0x0440 0x0420 bs bs 'h' 'H' 0x252c 0x252c C 164 0x043e 0x041e nl nl 'j' 'J' 0x253c 0x253c C 165 0x043b 0x041b vt vt 'k' 'K' 0x2580 0x2580 C 166 0x0434 0x0414 ff ff 'l' 'L' 0x2584 0x2584 C 167 0x0436 0x0416 nop nop ';' ':' nop nop C 168 0x044d 0x042d nop nop ''' '"' nop nop C 169 0x0491 0x0490 nop nop '`' '~' nop nop C 170 lshift lshift lshift lshift lshift lshift lshift lshift O 171 0x0457 0x0407 fs fs '\' '|' 0xb0 0xb0 O 172 0x044f 0x042f sub sub 'z' 'Z' 0xa0 0xa0 C 173 0x0447 0x0427 can can 'x' 'X' 0x2264 0x2264 C 174 0x0441 0x0421 etx etx 'c' 'C' 0x2510 0x2510 C 175 0x043c 0x041c syn syn 'v' 'V' 0x221a 0x221a C 176 0x0438 0x0418 stx stx 'b' 'B' 0x250c 0x250c C 177 0x0442 0x0422 so so 'n' 'N' 0x258c 0x258c C 178 0x044c 0x042c cr cr 'm' 'M' 0x2588 0x2588 C 179 0x0431 0x0411 nop nop ',' '<' nop nop C 180 0x044e 0x042e nop nop '.' '>' nop nop C 181 0x0451 0x0401 nop nop '/' '?' nop nop O 182 rshift rshift rshift rshift rshift rshift rshift rshift O 183 '*' '*' nl nl '*' '*' 0x253c 0x253c O 184 lalt lalt lalt lalt lalt lalt lalt lalt O 185 ' ' ' ' ' ' ' ' ' ' ' ' 0x2550 0x2550 O 186 alock clock clock clock clock clock clock clock O 187 fkey01 fkey13 fkey25 fkey37 scr01 scr11 scr01 scr11 O 188 fkey02 fkey14 fkey26 fkey38 scr02 scr12 scr02 scr12 O 189 fkey03 fkey15 fkey27 fkey39 scr03 scr13 scr03 scr13 O 190 fkey04 fkey16 fkey28 fkey40 scr04 scr14 scr04 scr14 O 191 fkey05 fkey17 fkey29 fkey41 scr05 scr15 scr05 scr15 O 192 fkey06 fkey18 fkey30 fkey42 scr06 scr16 scr06 scr16 O 193 fkey07 fkey19 fkey31 fkey43 scr07 scr07 scr07 scr07 O 194 fkey08 fkey20 fkey32 fkey44 scr08 scr08 scr08 scr08 O 195 fkey09 fkey21 fkey33 fkey45 scr09 scr09 scr09 scr09 O 196 fkey10 fkey22 fkey34 fkey46 scr10 scr10 scr10 scr10 O 197 nlock nlock nlock nlock nlock nlock nlock nlock O 198 slock slock slock slock slock slock slock slock O 199 fkey49 '7' '7' '7' 0x0407 0x0407 0x0407 0x0407 N 200 fkey50 '8' '8' '8' 0x2566 0x2566 0x2566 0x2566 N 201 fkey51 '9' '9' '9' 0x2567 0x2567 0x2567 0x2567 N 202 fkey52 '-' '-' '-' 0x0491 0x0491 0x0491 0x0491 N 203 fkey53 '4' '4' '4' 0x0404 0x0404 0x0404 0x0404 N 204 fkey48 '5' '5' '5' 0x2563 0x2563 0x2563 0x2563 N 205 fkey55 '6' '6' '6' 0x0406 0x0406 0x0406 0x0406 N 206 fkey56 '+' '+' '+' 0x255a 0x255a 0x255a 0x255a N 207 fkey57 '1' '1' '1' 0x2560 0x2560 0x2560 0x2560 N 208 fkey58 '2' '2' '2' 0x2561 0x2561 0x2561 0x2561 N 209 fkey59 '3' '3' '3' 0x0401 0x0401 0x0401 0x0401 N 210 fkey60 '0' '0' '0' 0x255f 0x255f 0x255f 0x255f N 211 del '.' del del 0x042a 0x042a boot halt N 212 alock alock alock alock alock alock alock alock O 213 nop nop nop nop nop nop nop nop O 214 nop nop nop nop nop nop nop nop O 215 fkey11 fkey23 fkey35 fkey47 scr11 scr11 scr11 scr11 O 216 fkey12 fkey24 fkey36 fkey48 scr12 scr12 scr12 scr12 O 217 cr cr nl nl 0x2588 0x2588 0x253c 0x253c O 218 alock rctrl rctrl rctrl rctrl rctrl rctrl rctrl O 219 '/' '/' nop nop 0x255e 0x255e nop nop O 220 nscr pscr debug nop nop nop nop nop O 221 ralt ralt ralt ralt ralt ralt ralt ralt O 222 fkey49 fkey49 fkey69 fkey49 fkey49 fkey49 fkey49 fkey49 O 223 fkey50 fkey50 fkey70 fkey50 fkey50 fkey50 fkey50 fkey50 O 224 fkey51 fkey51 fkey71 fkey51 fkey51 fkey51 fkey51 fkey51 O 225 fkey53 fkey53 fkey73 fkey53 fkey53 fkey53 fkey53 fkey53 O 226 fkey55 fkey55 fkey75 fkey55 fkey55 fkey55 fkey55 fkey55 O 227 fkey57 fkey57 fkey77 fkey57 fkey57 fkey57 fkey57 fkey57 O 228 fkey58 fkey58 fkey78 fkey58 fkey58 fkey58 fkey58 fkey58 O 229 fkey59 fkey59 fkey79 fkey59 fkey59 fkey59 fkey59 fkey59 O 230 fkey60 paste fkey80 fkey60 fkey60 fkey60 fkey60 fkey60 O 231 del del fkey81 del fkey61 fkey61 boot pdwn O 232 slock saver slock saver susp nop susp nop O 233 meta fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 fkey62 O 234 meta fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 fkey63 O 235 saver fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 fkey64 O 236 nop nop nop nop nop nop nop nop O diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 6bf251fcde53..1bf735fd3eaa 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1,2470 +1,2474 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_ATPIC #include #else #include #endif #include #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern u_int64_t hammer_time(u_int64_t, u_int64_t); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Preload data parse function */ static caddr_t native_parse_preload_data(u_int64_t); /* Native function to fetch and parse the e820 map */ static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); /* Default init_ops implementation. */ struct init_ops init_ops = { .parse_preload_data = native_parse_preload_data, .early_clock_source_init = i8254_init, .early_delay = i8254_delay, .parse_memmap = native_parse_memmap, #ifdef SMP .mp_bootaddress = mp_bootaddress, .start_all_aps = native_start_all_aps, #endif .msi_init = msi_init, }; /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its value is * the physical address at which the kernel is loaded. */ extern char kernphys[]; struct msgbuf *msgbufp; /* * Physical address of the EFI System Table. Stashed from the metadata hints * passed into the kernel and used by the EFI code to call runtime services. */ vm_paddr_t efi_systbl; /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; struct mtx dt_lock; /* lock for GDT and LDT */ void (*vmm_resume_p)(void); static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct pcb *pcb; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; pcb = td->td_pcb; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, sizeof(sf.sf_uc.uc_mcontext.mc_spare)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned long)sp & ~0x3Ful); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = p->p_sysent->sv_sigcode_base; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct pcb *pcb; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; long rflags; int cs, error, ret; ksiginfo_t ksi; pcb = td->td_pcb; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) { uprintf("pid %d (%s): sigreturn copyin failed\n", p->p_pid, td->td_name); return (error); } ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(rflags, regs->tf_rflags)) { uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, td->td_name, rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) { uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sys_sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; td->td_retval[1] = 0; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); } /* * Initialize amd64 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; /* * Software prototypes -- in more palatable form. * * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same * slots as corresponding segments for i386 kernel. */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GNULL2_SEL 1 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 8 64 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, .ssd_type = SDT_SYSTSS, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Actually, the TSS is a system descriptor which is double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 LDT Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 12 LDT Descriptor, double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; void setidt(int idx, inthand_t *func, int typ, int dpl, int ist) { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), #endif IDTVEC(fast_syscall), IDTVEC(fast_syscall32); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { struct { uint16_t limit; uint64_t base; } __packed idtr, gdtr; uint16_t ldt, tr; __asm __volatile("sidt %0" : "=m" (idtr)); db_printf("idtr\t0x%016lx/%04x\n", (u_long)idtr.base, (u_int)idtr.limit); __asm __volatile("sgdt %0" : "=m" (gdtr)); db_printf("gdtr\t0x%016lx/%04x\n", (u_long)gdtr.base, (u_int)gdtr.limit); __asm __volatile("sldt %0" : "=r" (ldt)); db_printf("ldtr\t0x%04x\n", ldt); __asm __volatile("str %0" : "=r" (tr)); db_printf("tr\t0x%04x\n", tr); db_printf("cr0\t0x%016lx\n", rcr0()); db_printf("cr2\t0x%016lx\n", rcr2()); db_printf("cr3\t0x%016lx\n", rcr3()); db_printf("cr4\t0x%016lx\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016lx\n", rxcr(0)); db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t%016lx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%016lx\n", rdr0()); db_printf("dr1\t0x%016lx\n", rdr1()); db_printf("dr2\t0x%016lx\n", rdr2()); db_printf("dr3\t0x%016lx\n", rdr3()); db_printf("dr6\t0x%016lx\n", rdr6()); db_printf("dr7\t0x%016lx\n", rdr7()); } #endif void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include #include /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. * This is only here to pacify sio. It is NOT FATAL if this doesn't work. * It shouldn't be here. There should probably be an APIC centric * implementation in the apic driver code, if at all. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } #endif u_int basemem; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. * * NB: physmap_idx points to the next free slot. */ insert_idx = physmap_idx; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } void bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, vm_paddr_t *physmap, int *physmap_idx) { struct bios_smap *smap, *smapend; smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) continue; if (!add_physmap_entry(smap->base, smap->length, physmap, physmap_idx)) break; } } #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, int *physmap_idx) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idx)) break; } } static char bootmethod[16] = ""; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); static void native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct bios_smap *smap; struct efi_map_header *efihdr; u_int32_t size; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); smap = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (efihdr == NULL && smap == NULL) panic("No BIOS smap or EFI map info from loader!"); if (efihdr != NULL) { add_efi_map_entries(efihdr, physmap, physmap_idx); strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); } else { size = *((u_int32_t *)smap - 1); bios_add_smap_entries(smap, size, physmap, physmap_idx); strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); } } #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx, da_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; u_long physmem_start, physmem_tunable, memtest; pt_entry_t *pte; quad_t dcons_addr, dcons_size; int page_counter; bzero(physmap, sizeof(physmap)); physmap_idx = 0; init_ops.parse_memmap(kmdp, physmap, &physmap_idx); physmap_idx -= 2; /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] <= 0xA0000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0 || basemem > 640) { if (bootverbose) printf( "Memory map doesn't contain a basemem segment, faking it"); basemem = 640; } /* * Make hole for "AP -> long mode" bootstrap code. The * mp_bootaddress vector is only available when the kernel * is configured to support APs and APs for the system start * in 32bit mode (e.g. SMP bare metal). */ if (init_ops.mp_bootaddress) { if (physmap[1] >= 0x100000000) panic( "Basemem segment is not suitable for AP bootstrap code!"); physmap[1] = init_ops.mp_bootaddress(physmap[1] / 1024); } /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * The boot memory test is disabled by default, as it takes a * significant amount of time on large-memory systems, and is * unfriendly to virtual machines as it unnecessarily touches all * pages. * * A general name is used as the code may be extended to support * additional tests beyond the current "page present" test. */ memtest = 0; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); /* * Don't allow MAXMEM or hw.physmem to extend the amount of memory * in the system. */ if (Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. * * XXX Some BIOSes corrupt low 64KB between suspend and resume. * By default, mask off the first 16 pages unless we appear to be * running in a VM. */ physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); if (physmap[0] < physmem_start) { if (physmem_start < PAGE_SIZE) physmap[0] = PAGE_SIZE; else if (physmem_start >= physmap[1]) physmap[0] = round_page(physmap[1] - PAGE_SIZE); else physmap[0] = round_page(physmem_start); } pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ page_counter = 0; if (memtest != 0) printf("Testing system memory"); for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= (vm_paddr_t)kernphys && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * Print a "." every GB to show we're making * progress. */ page_counter++; if ((page_counter % PAGES_PER_GB) == 0) printf("."); /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); if (memtest != 0) printf("\n"); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); } static caddr_t native_parse_preload_data(u_int64_t modulep) { caddr_t kmdp; + char *envp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; + envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + if (envp != NULL) + envp += KERNBASE; + init_static_kenv(envp, 0); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif efi_systbl = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); return (kmdp); } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; struct xstate_hdr *xhdr; u_int64_t msr; char *env; size_t kstack0_sz; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); kmdp = init_ops.parse_preload_data(modulep); /* Init basic tunables, hz etc */ init_param1(); thread0.td_kstack = physfree + KERNBASE; thread0.td_kstack_pages = kstack_pages; kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; bzero((void *)thread0.td_kstack, kstack0_sz); physfree += kstack0_sz; /* * make gdt memory segments */ for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); dpcpu_init((void *)(physfree + KERNBASE), 0); physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); #endif #ifdef XENHVM setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); #endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); /* * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) * transition). * Once bootblocks have updated, we can test directly for * efi_systbl != NULL here... */ if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP) != NULL) vty_set_preferred(VTY_VT); identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* * NMI stack, runs on ist2. The pcpu pointer is stored just * above the start of the ist2 stack. */ np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ cninit(); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); #endif #else #error "have you forgotten the isa device?"; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif msgbufinit(msgbufp, msgbufsize); fpuinit(); /* * Set up thread0 pcb after fpuinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; /* Ensure the stack is aligned to 16 bytes */ common_tss[0].tss_rsp0 &= ~0xFul; PCPU_SET(rsp0, common_tss[0].tss_rsp0); PCPU_SET(curpcb, thread0.td_pcb); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); _ufssel = GSEL(GUFS32_SEL, SEL_UPL); _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; thread0.td_frame = &proc0_tf; env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); static int efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct efi_map_header *efihdr; caddr_t kmdp; uint32_t efisize; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr == NULL) return (0); efisize = *((uint32_t *)efihdr - 1); return (SYSCTL_OUT(req, efihdr, efisize)); } SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map"); void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_r12 = tf->tf_r12; pcb->pcb_r13 = tf->tf_r13; pcb->pcb_r14 = tf->tf_r14; pcb->pcb_r15 = tf->tf_r15; pcb->pcb_rbp = tf->tf_rbp; pcb->pcb_rbx = tf->tf_rbx; pcb->pcb_rip = tf->tf_rip; pcb->pcb_rsp = tf->tf_rsp; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_rflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_rflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; tp = td->td_frame; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; if (tp->tf_flags & TF_HASSEGS) { regs->r_ds = tp->tf_ds; regs->r_es = tp->tf_es; regs->r_fs = tp->tf_fs; regs->r_gs = tp->tf_gs; } else { regs->r_ds = 0; regs->r_es = 0; regs->r_fs = 0; regs->r_gs = 0; } return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; register_t rflags; tp = td->td_frame; rflags = regs->r_rflags & 0xffffffff; if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; if (0) { /* XXXKIB */ tp->tf_ds = regs->r_ds; tp->tf_es = regs->r_es; tp->tf_fs = regs->r_fs; tp->tf_gs = regs->r_gs; tp->tf_flags = TF_HASSEGS; } set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); fpugetregs(td); fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); fpuuserinited(td); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; struct trapframe *tp; pcb = td->td_pcb; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; mcp->mc_rflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; mcp->mc_rflags &= ~PSL_C; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_ds = tp->tf_ds; mcp->mc_es = tp->tf_es; mcp->mc_fs = tp->tf_fs; mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tp; char *xfpustate; long rflags; int ret; pcb = td->td_pcb; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; tp->tf_flags = mcp->mc_flags; if (tp->tf_flags & TF_HASSEGS) { tp->tf_ds = mcp->mc_ds; tp->tf_es = mcp->mc_es; tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { size_t max_len, len; mcp->mc_ownedfp = fpugetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(struct savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { struct savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); } else return (EINVAL); return (error); } void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetuserregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetuserregs()... perhaps we just * have too many layers. */ clear_pcb_flags(curthread->td_pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[8] = 0; dbregs->dr[9] = 0; dbregs->dr[10] = 0; dbregs->dr[11] = 0; dbregs->dr[12] = 0; dbregs->dr[13] = 0; dbregs->dr[14] = 0; dbregs->dr[15] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP or a general protection fault right here. * Upper bits of dr6 and dr7 must not be set */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (td->td_frame->tf_cs == _ucode32sel && DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) return (EINVAL); } if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || (dbregs->dr[7] & 0xffffffff00000000ul) != 0) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; set_pcb_flags(pcb, PCB_DBREGS); } return (0); } void reset_dbregs(void) { load_dr7(0); /* Turn off the control bits first */ load_dr0(0); load_dr1(0); load_dr2(0); load_dr3(0); load_dr6(0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int64_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index af7ee997071c..b8e3ffd9c76d 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -1,1910 +1,1917 @@ /* $NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $ */ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * for the NetBSD Project. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Machine dependant functions for kernel setup * * Created : 17/09/94 * Updated : 18/04/01 updated for new wscons */ #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include "opt_sched.h" #include "opt_timer.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #ifdef DDB #include #if __ARM_ARCH >= 6 #include DB_SHOW_COMMAND(cp15, db_show_cp15) { u_int reg; reg = cp15_midr_get(); db_printf("Cpu ID: 0x%08x\n", reg); reg = cp15_ctr_get(); db_printf("Current Cache Lvl ID: 0x%08x\n",reg); reg = cp15_sctlr_get(); db_printf("Ctrl: 0x%08x\n",reg); reg = cp15_actlr_get(); db_printf("Aux Ctrl: 0x%08x\n",reg); reg = cp15_id_pfr0_get(); db_printf("Processor Feat 0: 0x%08x\n", reg); reg = cp15_id_pfr1_get(); db_printf("Processor Feat 1: 0x%08x\n", reg); reg = cp15_id_dfr0_get(); db_printf("Debug Feat 0: 0x%08x\n", reg); reg = cp15_id_afr0_get(); db_printf("Auxiliary Feat 0: 0x%08x\n", reg); reg = cp15_id_mmfr0_get(); db_printf("Memory Model Feat 0: 0x%08x\n", reg); reg = cp15_id_mmfr1_get(); db_printf("Memory Model Feat 1: 0x%08x\n", reg); reg = cp15_id_mmfr2_get(); db_printf("Memory Model Feat 2: 0x%08x\n", reg); reg = cp15_id_mmfr3_get(); db_printf("Memory Model Feat 3: 0x%08x\n", reg); reg = cp15_ttbr_get(); db_printf("TTB0: 0x%08x\n", reg); } DB_SHOW_COMMAND(vtop, db_show_vtop) { u_int reg; if (have_addr) { cp15_ats1cpr_set(addr); reg = cp15_par_get(); db_printf("Physical address reg: 0x%08x\n",reg); } else db_printf("show vtop \n"); } #endif /* __ARM_ARCH >= 6 */ #endif /* DDB */ #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif struct pcpu __pcpu[MAXCPU]; struct pcpu *pcpup = &__pcpu[0]; static struct trapframe proc0_tf; uint32_t cpu_reset_address = 0; int cold = 1; vm_offset_t vector_page; int (*_arm_memcpy)(void *, void *, int, int) = NULL; int (*_arm_bzero)(void *, int, int) = NULL; int _min_memcpy_size = 0; int _min_bzero_size = 0; extern int *end; #ifdef FDT +static char *loader_envp; + vm_paddr_t pmap_pa; #ifdef ARM_NEW_PMAP vm_offset_t systempage; vm_offset_t irqstack; vm_offset_t undstack; vm_offset_t abtstack; #else /* * This is the number of L2 page tables required for covering max * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, * stacks etc.), uprounded to be divisible by 4. */ #define KERNEL_PT_MAX 78 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; struct pv_addr systempage; static struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; static struct pv_addr kernelstack; #endif #endif #if defined(LINUX_BOOT_ABI) #define LBABI_MAX_BANKS 10 uint32_t board_id; struct arm_lbabi_tag *atag_list; char linux_command_line[LBABI_MAX_COMMAND_LINE + 1]; char atags[LBABI_MAX_COMMAND_LINE * 2]; uint32_t memstart[LBABI_MAX_BANKS]; uint32_t memsize[LBABI_MAX_BANKS]; uint32_t membanks; #endif static uint32_t board_revision; /* hex representation of uint64_t */ static char board_serial[32]; SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD, 0, "Board attributes"); SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD, &board_revision, 0, "Board revision"); SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD, board_serial, 0, "Board serial"); int vfp_exists; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &vfp_exists, 0, "Floating point support enabled"); void board_set_serial(uint64_t serial) { snprintf(board_serial, sizeof(board_serial)-1, "%016jx", serial); } void board_set_revision(uint32_t revision) { board_revision = revision; } void sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int onstack; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_usr_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe *)td->td_frame->tf_usr_sp; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); /* Populate the siginfo frame. */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_r0 = sig; tf->tf_r1 = (register_t)&fp->sf_si; tf->tf_r2 = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_r5 = (register_t)&fp->sf_uc; tf->tf_pc = (register_t)catcher; tf->tf_usr_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_usr_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_usr_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); /* Set the mode to enter in the signal handler */ #if __ARM_ARCH >= 7 if ((register_t)catcher & 1) tf->tf_spsr |= PSR_T; else tf->tf_spsr &= ~PSR_T; #endif CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr, tf->tf_usr_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } struct kva_md_info kmi; /* * arm32_vector_init: * * Initialize the vector page, and select whether or not to * relocate the vectors. * * NOTE: We expect the vector page to be mapped at its expected * destination. */ extern unsigned int page0[], page0_data[]; void arm_vector_init(vm_offset_t va, int which) { unsigned int *vectors = (int *) va; unsigned int *vectors_data = vectors + (page0_data - page0); int vec; /* * Loop through the vectors we're taking over, and copy the * vector's insn and data word. */ for (vec = 0; vec < ARM_NVEC; vec++) { if ((which & (1 << vec)) == 0) { /* Don't want to take over this vector. */ continue; } vectors[vec] = page0[vec]; vectors_data[vec] = page0_data[vec]; } /* Now sync the vectors. */ cpu_icache_sync_range(va, (ARM_NVEC * 2) * sizeof(u_int)); vector_page = va; if (va == ARM_VECTORS_HIGH) { /* * Assume the MD caller knows what it's doing here, and * really does want the vector page relocated. * * Note: This has to be done here (and not just in * cpu_setup()) because the vector page needs to be * accessible *before* cpu_startup() is called. * Think ddb(9) ... * * NOTE: If the CPU control register is not readable, * this will totally fail! We'll just assume that * any system that has high vector support has a * readable CPU control register, for now. If we * ever encounter one that does not, we'll have to * rethink this. */ cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC); } } static void cpu_startup(void *dummy) { struct pcb *pcb = thread0.td_pcb; const unsigned int mbyte = 1024 * 1024; #ifdef ARM_TP_ADDRESS #ifndef ARM_CACHE_LOCK_ENABLE vm_page_t m; #endif #endif identify_arm_cpu(); vm_ksubmap_init(&kmi); /* * Display the RAM layout. */ printf("real memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(realmem), (uintmax_t)arm32_ptob(realmem) / mbyte); printf("avail memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(vm_cnt.v_free_count), (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte); if (bootverbose) { arm_physmem_print_tables(); arm_devmap_print_table(); } bufinit(); vm_pager_bufferinit(); pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; pmap_set_pcb_pagedir(pmap_kernel(), pcb); #ifndef ARM_NEW_PMAP vector_page_setprot(VM_PROT_READ); pmap_postinit(); #endif #ifdef ARM_TP_ADDRESS #ifdef ARM_CACHE_LOCK_ENABLE pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS); arm_lock_cache_line(ARM_TP_ADDRESS); #else m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO); pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m)); #endif *(uint32_t *)ARM_RAS_START = 0; *(uint32_t *)ARM_RAS_END = 0xffffffff; #endif } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { cpu_dcache_wb_range((uintptr_t)ptr, len); #ifdef ARM_L2_PIPT cpu_l2cache_wb_range((uintptr_t)vtophys(ptr), len); #else cpu_l2cache_wb_range((uintptr_t)ptr, len); #endif } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } void cpu_idle(int busy) { CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); spinlock_enter(); #ifndef NO_EVENTTIMERS if (!busy) cpu_idleclock(); #endif if (!sched_runnable()) cpu_sleep(0); #ifndef NO_EVENTTIMERS if (!busy) cpu_activeclock(); #endif spinlock_exit(); CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { return (0); } /* * Most ARM platforms don't need to do anything special to init their clocks * (they get intialized during normal device attachment), and by not defining a * cpu_initclocks() function they get this generic one. Any platform that needs * to do something special can just provide their own implementation, which will * override this one due to the weak linkage. */ void arm_generic_initclocks(void) { #ifndef NO_EVENTTIMERS #ifdef SMP if (PCPU_GET(cpuid) == 0) cpu_initclocks_bsp(); else cpu_initclocks_ap(); #else cpu_initclocks_bsp(); #endif #endif } __weak_reference(arm_generic_initclocks, cpu_initclocks); int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(&tf->tf_r0, regs->r, sizeof(regs->r)); regs->r_sp = tf->tf_usr_sp; regs->r_lr = tf->tf_usr_lr; regs->r_pc = tf->tf_pc; regs->r_cpsr = tf->tf_spsr; return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { bzero(regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(regs->r, &tf->tf_r0, sizeof(regs->r)); tf->tf_usr_sp = regs->r_sp; tf->tf_usr_lr = regs->r_lr; tf->tf_pc = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { return (0); } static int ptrace_read_int(struct thread *td, vm_offset_t addr, uint32_t *v) { if (proc_readmem(td, td->td_proc, addr, v, sizeof(*v)) != sizeof(*v)) return (ENOMEM); return (0); } static int ptrace_write_int(struct thread *td, vm_offset_t addr, uint32_t v) { if (proc_writemem(td, td->td_proc, addr, &v, sizeof(v)) != sizeof(v)) return (ENOMEM); return (0); } static u_int ptrace_get_usr_reg(void *cookie, int reg) { int ret; struct thread *td = cookie; KASSERT(((reg >= 0) && (reg <= ARM_REG_NUM_PC)), ("reg is outside range")); switch(reg) { case ARM_REG_NUM_PC: ret = td->td_frame->tf_pc; break; case ARM_REG_NUM_LR: ret = td->td_frame->tf_usr_lr; break; case ARM_REG_NUM_SP: ret = td->td_frame->tf_usr_sp; break; default: ret = *((register_t*)&td->td_frame->tf_r0 + reg); break; } return (ret); } static u_int ptrace_get_usr_int(void* cookie, vm_offset_t offset, u_int* val) { struct thread *td = cookie; u_int error; error = ptrace_read_int(td, offset, val); return (error); } /** * This function parses current instruction opcode and decodes * any possible jump (change in PC) which might occur after * the instruction is executed. * * @param td Thread structure of analysed task * @param cur_instr Currently executed instruction * @param alt_next_address Pointer to the variable where * the destination address of the * jump instruction shall be stored. * * @return <0> when jump is possible * otherwise */ static int ptrace_get_alternative_next(struct thread *td, uint32_t cur_instr, uint32_t *alt_next_address) { int error; if (inst_branch(cur_instr) || inst_call(cur_instr) || inst_return(cur_instr)) { error = arm_predict_branch(td, cur_instr, td->td_frame->tf_pc, alt_next_address, ptrace_get_usr_reg, ptrace_get_usr_int); return (error); } return (EINVAL); } int ptrace_single_step(struct thread *td) { struct proc *p; int error, error_alt; uint32_t cur_instr, alt_next = 0; /* TODO: This needs to be updated for Thumb-2 */ if ((td->td_frame->tf_spsr & PSR_T) != 0) return (EINVAL); KASSERT(td->td_md.md_ptrace_instr == 0, ("Didn't clear single step")); KASSERT(td->td_md.md_ptrace_instr_alt == 0, ("Didn't clear alternative single step")); p = td->td_proc; PROC_UNLOCK(p); error = ptrace_read_int(td, td->td_frame->tf_pc, &cur_instr); if (error) goto out; error = ptrace_read_int(td, td->td_frame->tf_pc + INSN_SIZE, &td->td_md.md_ptrace_instr); if (error == 0) { error = ptrace_write_int(td, td->td_frame->tf_pc + INSN_SIZE, PTRACE_BREAKPOINT); if (error) { td->td_md.md_ptrace_instr = 0; } else { td->td_md.md_ptrace_addr = td->td_frame->tf_pc + INSN_SIZE; } } error_alt = ptrace_get_alternative_next(td, cur_instr, &alt_next); if (error_alt == 0) { error_alt = ptrace_read_int(td, alt_next, &td->td_md.md_ptrace_instr_alt); if (error_alt) { td->td_md.md_ptrace_instr_alt = 0; } else { error_alt = ptrace_write_int(td, alt_next, PTRACE_BREAKPOINT); if (error_alt) td->td_md.md_ptrace_instr_alt = 0; else td->td_md.md_ptrace_addr_alt = alt_next; } } out: PROC_LOCK(p); return ((error != 0) && (error_alt != 0)); } int ptrace_clear_single_step(struct thread *td) { struct proc *p; /* TODO: This needs to be updated for Thumb-2 */ if ((td->td_frame->tf_spsr & PSR_T) != 0) return (EINVAL); if (td->td_md.md_ptrace_instr != 0) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr, td->td_md.md_ptrace_instr); PROC_LOCK(p); td->td_md.md_ptrace_instr = 0; } if (td->td_md.md_ptrace_instr_alt != 0) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr_alt, td->td_md.md_ptrace_instr_alt); PROC_LOCK(p); td->td_md.md_ptrace_instr_alt = 0; } return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_pc = addr; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; register_t cspr; td = curthread; if (td->td_md.md_spinlock_count == 0) { cspr = disable_interrupts(PSR_I | PSR_F); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = cspr; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t cspr; td = curthread; critical_exit(); cspr = td->td_md.md_saved_cspr; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restore_interrupts(cspr); } /* * Clear registers on exec */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(*tf)); tf->tf_usr_sp = stack; tf->tf_usr_lr = imgp->entry_addr; tf->tf_svc_lr = 0x77777777; tf->tf_pc = imgp->entry_addr; tf->tf_spsr = PSR_USR32_MODE; } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; if (clear_ret & GET_MC_CLEAR_RET) { gr[_REG_R0] = 0; gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C; } else { gr[_REG_R0] = tf->tf_r0; gr[_REG_CPSR] = tf->tf_spsr; } gr[_REG_R1] = tf->tf_r1; gr[_REG_R2] = tf->tf_r2; gr[_REG_R3] = tf->tf_r3; gr[_REG_R4] = tf->tf_r4; gr[_REG_R5] = tf->tf_r5; gr[_REG_R6] = tf->tf_r6; gr[_REG_R7] = tf->tf_r7; gr[_REG_R8] = tf->tf_r8; gr[_REG_R9] = tf->tf_r9; gr[_REG_R10] = tf->tf_r10; gr[_REG_R11] = tf->tf_r11; gr[_REG_R12] = tf->tf_r12; gr[_REG_SP] = tf->tf_usr_sp; gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; const __greg_t *gr = mcp->__gregs; tf->tf_r0 = gr[_REG_R0]; tf->tf_r1 = gr[_REG_R1]; tf->tf_r2 = gr[_REG_R2]; tf->tf_r3 = gr[_REG_R3]; tf->tf_r4 = gr[_REG_R4]; tf->tf_r5 = gr[_REG_R5]; tf->tf_r6 = gr[_REG_R6]; tf->tf_r7 = gr[_REG_R7]; tf->tf_r8 = gr[_REG_R8]; tf->tf_r9 = gr[_REG_R9]; tf->tf_r10 = gr[_REG_R10]; tf->tf_r11 = gr[_REG_R11]; tf->tf_r12 = gr[_REG_R12]; tf->tf_usr_sp = gr[_REG_SP]; tf->tf_usr_lr = gr[_REG_LR]; tf->tf_pc = gr[_REG_PC]; tf->tf_spsr = gr[_REG_CPSR]; return (0); } /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; int spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ spsr = uc.uc_mcontext.__gregs[_REG_CPSR]; if ((spsr & PSR_MODE) != PSR_USR32_MODE || (spsr & (PSR_I | PSR_F)) != 0) return (EINVAL); /* Restore register context. */ set_mcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_regs.sf_r4 = tf->tf_r4; pcb->pcb_regs.sf_r5 = tf->tf_r5; pcb->pcb_regs.sf_r6 = tf->tf_r6; pcb->pcb_regs.sf_r7 = tf->tf_r7; pcb->pcb_regs.sf_r8 = tf->tf_r8; pcb->pcb_regs.sf_r9 = tf->tf_r9; pcb->pcb_regs.sf_r10 = tf->tf_r10; pcb->pcb_regs.sf_r11 = tf->tf_r11; pcb->pcb_regs.sf_r12 = tf->tf_r12; pcb->pcb_regs.sf_pc = tf->tf_pc; pcb->pcb_regs.sf_lr = tf->tf_usr_lr; pcb->pcb_regs.sf_sp = tf->tf_usr_sp; } /* * Fake up a boot descriptor table */ vm_offset_t fake_preload_metadata(struct arm_boot_params *abp __unused) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = KERNVIRTADDR; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR; #ifdef DDB if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; + init_static_kenv(NULL, 0); + return (lastaddr); } void pcpu0_init(void) { #if __ARM_ARCH >= 6 set_curthread(&thread0); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); } #if defined(LINUX_BOOT_ABI) vm_offset_t linux_parse_boot_param(struct arm_boot_params *abp) { struct arm_lbabi_tag *walker; uint32_t revision; uint64_t serial; /* * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2 * is atags or dtb pointer. If all of these aren't satisfied, * then punt. */ if (!(abp->abp_r0 == 0 && abp->abp_r1 != 0 && abp->abp_r2 != 0)) return 0; board_id = abp->abp_r1; walker = (struct arm_lbabi_tag *) (abp->abp_r2 + KERNVIRTADDR - abp->abp_physaddr); /* xxx - Need to also look for binary device tree */ if (ATAG_TAG(walker) != ATAG_CORE) return 0; atag_list = walker; while (ATAG_TAG(walker) != ATAG_NONE) { switch (ATAG_TAG(walker)) { case ATAG_CORE: break; case ATAG_MEM: arm_physmem_hardware_region(walker->u.tag_mem.start, walker->u.tag_mem.size); break; case ATAG_INITRD2: break; case ATAG_SERIAL: serial = walker->u.tag_sn.low | ((uint64_t)walker->u.tag_sn.high << 32); board_set_serial(serial); break; case ATAG_REVISION: revision = walker->u.tag_rev.rev; board_set_revision(revision); break; case ATAG_CMDLINE: /* XXX open question: Parse this for boothowto? */ bcopy(walker->u.tag_cmd.command, linux_command_line, ATAG_SIZE(walker)); break; default: break; } walker = ATAG_NEXT(walker); } /* Save a copy for later */ bcopy(atag_list, atags, (char *)walker - (char *)atag_list + ATAG_SIZE(walker)); + init_static_kenv(NULL, 0); + return fake_preload_metadata(abp); } #endif #if defined(FREEBSD_BOOT_LOADER) vm_offset_t freebsd_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr = 0; void *mdp; void *kmdp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif /* * Mask metadata pointer: it is supposed to be on page boundary. If * the first argument (mdp) doesn't point to a valid address the * bootloader must have passed us something else than the metadata * ptr, so we give up. Also give up if we cannot find metadta section * the loader creates that we get all this data out of. */ if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL) return 0; preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) return 0; boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + loader_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + init_static_kenv(loader_envp, 0); lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif return lastaddr; } #endif vm_offset_t default_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr; #if defined(LINUX_BOOT_ABI) if ((lastaddr = linux_parse_boot_param(abp)) != 0) return lastaddr; #endif #if defined(FREEBSD_BOOT_LOADER) if ((lastaddr = freebsd_parse_boot_param(abp)) != 0) return lastaddr; #endif /* Fall back to hardcoded metadata. */ lastaddr = fake_preload_metadata(abp); return lastaddr; } /* * Stub version of the boot parameter parsing routine. We are * called early in initarm, before even VM has been initialized. * This routine needs to preserve any data that the boot loader * has passed in before the kernel starts to grow past the end * of the BSS, traditionally the place boot-loaders put this data. * * Since this is called so early, things that depend on the vm system * being setup (including access to some SoC's serial ports), about * all that can be done in this routine is to copy the arguments. * * This is the default boot parameter parsing routine. Individual * kernels/boards can override this weak function with one of their * own. We just fake metadata... */ __weak_reference(default_parse_boot_param, parse_boot_param); /* * Initialize proc0 */ void init_proc0(vm_offset_t kstack) { proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + kstack_pages * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_vfpcpu = -1; thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } int arm_predict_branch(void *cookie, u_int insn, register_t pc, register_t *new_pc, u_int (*fetch_reg)(void*, int), u_int (*read_int)(void*, vm_offset_t, u_int*)) { u_int addr, nregs, offset = 0; int error = 0; switch ((insn >> 24) & 0xf) { case 0x2: /* add pc, reg1, #value */ case 0x0: /* add pc, reg1, reg2, lsl #offset */ addr = fetch_reg(cookie, (insn >> 16) & 0xf); if (((insn >> 16) & 0xf) == 15) addr += 8; if (insn & 0x0200000) { offset = (insn >> 7) & 0x1e; offset = (insn & 0xff) << (32 - offset) | (insn & 0xff) >> offset; } else { offset = fetch_reg(cookie, insn & 0x0f); if ((insn & 0x0000ff0) != 0x00000000) { if (insn & 0x10) nregs = fetch_reg(cookie, (insn >> 8) & 0xf); else nregs = (insn >> 7) & 0x1f; switch ((insn >> 5) & 3) { case 0: /* lsl */ offset = offset << nregs; break; case 1: /* lsr */ offset = offset >> nregs; break; default: break; /* XXX */ } } *new_pc = addr + offset; return (0); } case 0xa: /* b ... */ case 0xb: /* bl ... */ addr = ((insn << 2) & 0x03ffffff); if (addr & 0x02000000) addr |= 0xfc000000; *new_pc = (pc + 8 + addr); return (0); case 0x7: /* ldr pc, [pc, reg, lsl #2] */ addr = fetch_reg(cookie, insn & 0xf); addr = pc + 8 + (addr << 2); error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); case 0x1: /* mov pc, reg */ *new_pc = fetch_reg(cookie, insn & 0xf); return (0); case 0x4: case 0x5: /* ldr pc, [reg] */ addr = fetch_reg(cookie, (insn >> 16) & 0xf); /* ldr pc, [reg, #offset] */ if (insn & (1 << 24)) offset = insn & 0xfff; if (insn & 0x00800000) addr += offset; else addr -= offset; error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); case 0x8: /* ldmxx reg, {..., pc} */ case 0x9: addr = fetch_reg(cookie, (insn >> 16) & 0xf); nregs = (insn & 0x5555) + ((insn >> 1) & 0x5555); nregs = (nregs & 0x3333) + ((nregs >> 2) & 0x3333); nregs = (nregs + (nregs >> 4)) & 0x0f0f; nregs = (nregs + (nregs >> 8)) & 0x001f; switch ((insn >> 23) & 0x3) { case 0x0: /* ldmda */ addr = addr - 0; break; case 0x1: /* ldmia */ addr = addr + 0 + ((nregs - 1) << 2); break; case 0x2: /* ldmdb */ addr = addr - 4; break; case 0x3: /* ldmib */ addr = addr + 4 + ((nregs - 1) << 2); break; } error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); default: return (EINVAL); } } #ifdef ARM_NEW_PMAP void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #else void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #endif #ifdef EFI #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, struct mem_region *mr, int *mrcnt, uint32_t *memsize) { struct efi_md *map, *p; const char *type; size_t efisz, memory_size; int ndesc, i, j; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; *mrcnt = 0; *memsize = 0; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = roundup2(sizeof(struct efi_map_header), 0x10); map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); memory_size = 0; for (i = 0, j = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012llx %12p %08llx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } j++; if (j >= FDT_MEM_REGIONS) break; mr[j].mr_start = p->md_phys; mr[j].mr_size = p->md_pages * PAGE_SIZE; memory_size += mr[j].mr_size; } *mrcnt = j; *memsize = memory_size; } #endif /* EFI */ #ifdef FDT static char * kenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } static void print_kenv(void) { char *cp; debugf("loader passed (static) kenv:\n"); - if (kern_envp == NULL) { + if (loader_envp == NULL) { debugf(" no env, null ptr\n"); return; } - debugf(" kern_envp = 0x%08x\n", (uint32_t)kern_envp); + debugf(" loader_envp = 0x%08x\n", (uint32_t)loader_envp); - for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) + for (cp = loader_envp; cp != NULL; cp = kenv_next(cp)) debugf(" %x %s\n", (uint32_t)cp, cp); } #ifndef ARM_NEW_PMAP void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; struct pv_addr kernel_l1pt; struct pv_addr dpcpu; vm_offset_t dtbp, freemempos, l2_start, lastaddr; uint32_t memsize, l2size; char *env; void *kmdp; u_int l1pagetable; int i, j, err_devmap, mem_regions_sz; lastaddr = parse_boot_param(abp); arm_physmem_kernaddr = abp->abp_physaddr; memsize = 0; cpuinfo_init(); set_cpufuncs(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); else dtbp = (vm_offset_t)NULL; #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* Calculate number of L2 tables needed for mapping vm_page_array */ l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); l2size = (l2size >> L1_S_SHIFT) + 1; /* * Add one table for end of kernel map, one for stacks, msgbuf and * L1 and L2 tables map and one for vectors map. */ l2size += 3; /* Make it divisible by 4 */ l2size = (l2size + 3) & ~3; freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_va, (np)); \ (var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); for (i = 0, j = 0; i < l2size; ++i) { if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); j = i; } else { kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + L2_TABLE_SIZE_REAL * (i - j); kernel_pt_table[i].pv_pa = kernel_pt_table[i].pv_va - KERNVIRTADDR + abp->abp_physaddr; } } /* * Allocate a page for the system page mapped to 0x00000000 * or 0xffff0000. This page will just contain the system vectors * and can be shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); valloc_pages(kernelstack, kstack_pages * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* * Try to map as much as possible of kernel text and data using * 1MB section mapping and for the rest of initial kernel address * space use L2 coarse tables. * * Link L2 tables for mapping remainder of kernel (modulo 1MB) * and kernel structures */ l2_start = lastaddr & ~(L1_S_OFFSET); for (i = 0 ; i < l2size - 1; i++) pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE, &kernel_pt_table[i]); pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE; /* Map kernel code and data */ pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr, (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map L1 directory and allocated L2 page tables */ pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va, kernel_pt_table[0].pv_pa, L2_TABLE_SIZE_REAL * l2size, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); /* Map allocated DPCPU, stacks and msgbuf */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, freemempos - dpcpu.pv_va, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Link and map the vector page */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, &kernel_pt_table[l2size - 1]); pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); arm_devmap_bootstrap(l1pagetable, NULL); vm_max_kernel_address = platform_lastaddr(); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); pmap_pa = kernel_l1pt.pv_pa; setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); undefined_init(); init_proc0(kernelstack.pv_va); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); pmap_bootstrap(freemempos, &kernel_l1pt); msgbufp = (void *)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); /* * Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } #else /* !ARM_NEW_PMAP */ void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_paddr_t lastaddr; vm_offset_t dtbp, kernelstack, dpcpu; uint32_t memsize; char *env; void *kmdp; int err_devmap, mem_regions_sz; #ifdef EFI struct efi_map_header *efihdr; #endif /* get last allocated physical address */ arm_physmem_kernaddr = abp->abp_physaddr; lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr; memsize = 0; set_cpufuncs(); cpuinfo_init(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); #ifdef EFI efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) { add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz, &memsize); } else #endif { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); } arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* * Set TEX remapping registers. * Setup kernel page tables and switch to kernel L1 page table. */ pmap_set_tex(); pmap_bootstrap_prepare(lastaddr); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* * Allocate a page for the system page mapped to 0xffff0000 * This page will just contain the system vectors and can be * shared by all processes. */ systempage = pmap_preboot_get_pages(1); /* Map the vector page. */ pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH, 1); if (virtual_end >= ARM_VECTORS_HIGH) virtual_end = ARM_VECTORS_HIGH - 1; /* Allocate dynamic per-cpu area. */ dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate stacks for all modes */ irqstack = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU); abtstack = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU); undstack = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU ); kernelstack = pmap_preboot_get_vpages(kstack_pages * MAXCPU); /* Allocate message buffer. */ msgbufp = (void *)pmap_preboot_get_vpages( round_page(msgbufsize) / PAGE_SIZE); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ set_stackptrs(0); mutex_init(); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); arm_devmap_bootstrap(0, NULL); vm_max_kernel_address = platform_lastaddr(); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); debugf(" lastaddr1: 0x%08x\n", lastaddr); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ /* Set stack for exception handlers */ undefined_init(); init_proc0(kernelstack); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); enable_interrupts(PSR_A); pmap_bootstrap(0); /* Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); /* Init message buffer. */ msgbufinit(msgbufp, msgbufsize); kdb_init(); return ((void *)STACKALIGN(thread0.td_pcb)); } #endif /* !ARM_NEW_PMAP */ #endif /* FDT */ uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, struct timecounter *); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { return (arm_cpu_fill_vdso_timehands != NULL ? arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); } diff --git a/sys/arm/arm/physmem.c b/sys/arm/arm/physmem.c index a92216337949..566ab8569550 100644 --- a/sys/arm/arm/physmem.c +++ b/sys/arm/arm/physmem.c @@ -1,350 +1,363 @@ /*- * Copyright (c) 2014 Ian Lepore * All rights excluded. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" /* * Routines for describing and initializing anything related to physical memory. */ #include #include #include #include #include /* * These structures are used internally to keep track of regions of physical * ram, and regions within the physical ram that need to be excluded. An * exclusion region can be excluded from crash dumps, from the vm pool of pages * that can be allocated, or both, depending on the exclusion flags associated * with the region. */ #define MAX_HWCNT 10 #define MAX_EXCNT 10 struct region { vm_paddr_t addr; vm_size_t size; uint32_t flags; }; static struct region hwregions[MAX_HWCNT]; static struct region exregions[MAX_EXCNT]; static size_t hwcnt; static size_t excnt; /* * These "avail lists" are globals used to communicate physical memory layout to * other parts of the kernel. Within the arrays, each value is the starting * address of a contiguous area of physical address space. The values at even * indexes are areas that contain usable memory and the values at odd indexes * are areas that aren't usable. Each list is terminated by a pair of zero * entries. * * dump_avail tells the dump code what regions to include in a crash dump, and * phys_avail is the way we hand all the remaining physical ram we haven't used * in early kernel init over to the vm system for allocation management. * * We size these arrays to hold twice as many available regions as we allow for * hardware memory regions, to allow for the fact that exclusions can split a * hardware region into two or more available regions. In the real world there * will typically be one or two hardware regions and two or three exclusions. * * Each available region in this list occupies two array slots (the start of the * available region and the start of the unavailable region that follows it). */ #define MAX_AVAIL_REGIONS (MAX_HWCNT * 2) #define MAX_AVAIL_ENTRIES (MAX_AVAIL_REGIONS * 2) vm_paddr_t phys_avail[MAX_AVAIL_ENTRIES + 2]; /* +2 to allow for a pair */ vm_paddr_t dump_avail[MAX_AVAIL_ENTRIES + 2]; /* of zeroes to terminate. */ /* * realmem is the total number of hardware pages, excluded or not. * Maxmem is one greater than the last physical page number. */ long realmem; long Maxmem; /* The address at which the kernel was loaded. Set early in initarm(). */ vm_paddr_t arm_physmem_kernaddr; /* * Print the contents of the physical and excluded region tables using the * provided printf-like output function (which will be either printf or * db_printf). */ static void physmem_dump_tables(int (*prfunc)(const char *, ...)) { int flags, i; uintmax_t addr, size; const unsigned int mbyte = 1024 * 1024; prfunc("Physical memory chunk(s):\n"); for (i = 0; i < hwcnt; ++i) { addr = hwregions[i].addr; size = hwregions[i].size; prfunc(" 0x%08jx - 0x%08jx, %5ju MB (%7ju pages)\n", addr, addr + size - 1, size / mbyte, size / PAGE_SIZE); } prfunc("Excluded memory regions:\n"); for (i = 0; i < excnt; ++i) { addr = exregions[i].addr; size = exregions[i].size; flags = exregions[i].flags; prfunc(" 0x%08jx - 0x%08jx, %5ju MB (%7ju pages) %s %s\n", addr, addr + size - 1, size / mbyte, size / PAGE_SIZE, (flags & EXFLAG_NOALLOC) ? "NoAlloc" : "", (flags & EXFLAG_NODUMP) ? "NoDump" : ""); } #ifdef DEBUG prfunc("Avail lists:\n"); for (i = 0; phys_avail[i] != 0; ++i) { prfunc(" phys_avail[%d] 0x%08x\n", i, phys_avail[i]); } for (i = 0; dump_avail[i] != 0; ++i) { prfunc(" dump_avail[%d] 0x%08x\n", i, dump_avail[i]); } #endif } /* * Print the contents of the static mapping table. Used for bootverbose. */ void arm_physmem_print_tables() { physmem_dump_tables(printf); } /* * Walk the list of hardware regions, processing it against the list of * exclusions that contain the given exflags, and generating an "avail list". * * Updates the value at *pavail with the sum of all pages in all hw regions. * * Returns the number of pages of non-excluded memory added to the avail list. */ static size_t regions_to_avail(vm_paddr_t *avail, uint32_t exflags, long *pavail) { size_t acnt, exi, hwi; - vm_paddr_t end, start, xend, xstart; + uint64_t end, start, xend, xstart; long availmem; const struct region *exp, *hwp; realmem = 0; availmem = 0; acnt = 0; for (hwi = 0, hwp = hwregions; hwi < hwcnt; ++hwi, ++hwp) { start = hwp->addr; end = hwp->size + start; - realmem += arm32_btop(end - start); + realmem += arm32_btop((vm_offset_t)(end - start)); for (exi = 0, exp = exregions; exi < excnt; ++exi, ++exp) { /* * If the excluded region does not match given flags, * continue checking with the next excluded region. */ if ((exp->flags & exflags) == 0) continue; xstart = exp->addr; xend = exp->size + xstart; /* * If the excluded region ends before this hw region, * continue checking with the next excluded region. */ if (xend <= start) continue; /* * If the excluded region begins after this hw region * we're done because both lists are sorted. */ if (xstart >= end) break; /* * If the excluded region completely covers this hw * region, shrink this hw region to zero size. */ if ((start >= xstart) && (end <= xend)) { start = xend; end = xend; break; } /* * If the excluded region falls wholly within this hw * region without abutting or overlapping the beginning * or end, create an available entry from the leading * fragment, then adjust the start of this hw region to * the end of the excluded region, and continue checking * the next excluded region because another exclusion * could affect the remainder of this hw region. */ if ((xstart > start) && (xend < end)) { - avail[acnt++] = start; - avail[acnt++] = xstart; - availmem += arm32_btop(xstart - start); + avail[acnt++] = (vm_paddr_t)start; + avail[acnt++] = (vm_paddr_t)xstart; + availmem += + arm32_btop((vm_offset_t)(xstart - start)); start = xend; continue; } /* * We know the excluded region overlaps either the start * or end of this hardware region (but not both), trim * the excluded portion off the appropriate end. */ if (xstart <= start) start = xend; else end = xstart; } /* * If the trimming actions above left a non-zero size, create an * available entry for it. */ if (end > start) { - avail[acnt++] = start; - avail[acnt++] = end; - availmem += arm32_btop(end - start); + avail[acnt++] = (vm_paddr_t)start; + avail[acnt++] = (vm_paddr_t)end; + availmem += arm32_btop((vm_offset_t)(end - start)); } if (acnt >= MAX_AVAIL_ENTRIES) panic("Not enough space in the dump/phys_avail arrays"); } if (pavail) *pavail = availmem; return (acnt); } /* * Insertion-sort a new entry into a regions list; sorted by start address. */ static void insert_region(struct region *regions, size_t rcnt, vm_paddr_t addr, vm_size_t size, uint32_t flags) { size_t i; struct region *ep, *rp; ep = regions + rcnt; for (i = 0, rp = regions; i < rcnt; ++i, ++rp) { if (addr < rp->addr) { bcopy(rp, rp + 1, (ep - rp) * sizeof(*rp)); break; } } rp->addr = addr; rp->size = size; rp->flags = flags; } /* * Add a hardware memory region. */ void arm_physmem_hardware_region(vm_paddr_t pa, vm_size_t sz) { vm_offset_t adj; /* * Filter out the page at PA 0x00000000. The VM can't handle it, as * pmap_extract() == 0 means failure. + * + * Also filter out the page at the end of the physical address space -- + * if addr is non-zero and addr+size is zero we wrapped to the next byte + * beyond what vm_paddr_t can express. That leads to a NULL pointer + * deref early in startup; work around it by leaving the last page out. + * + * XXX This just in: subtract out a whole megabyte, not just 1 page. + * Reducing the size by anything less than 1MB results in the NULL + * pointer deref in _vm_map_lock_read(). Better to give up a megabyte + * than leave some folks with an unusable system while we investigate. */ if (pa == 0) { pa = PAGE_SIZE; sz -= PAGE_SIZE; + } else if (pa + sz == 0) { + sz -= 1024 * 1024; } /* * Round the starting address up to a page boundary, and truncate the * ending page down to a page boundary. */ adj = round_page(pa) - pa; pa = round_page(pa); sz = trunc_page(sz - adj); if (hwcnt < nitems(hwregions)) insert_region(hwregions, hwcnt++, pa, sz, 0); } /* * Add an exclusion region. */ void arm_physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t exflags) { vm_offset_t adj; /* * Truncate the starting address down to a page boundary, and round the * ending page up to a page boundary. */ adj = pa - trunc_page(pa); pa = trunc_page(pa); sz = round_page(sz + adj); if (excnt < nitems(exregions)) insert_region(exregions, excnt++, pa, sz, exflags); } /* * Process all the regions added earlier into the global avail lists. * * Updates the kernel global 'physmem' with the number of physical pages * available for use (all pages not in any exclusion region). * * Updates the kernel global 'Maxmem' with the page number one greater then the * last page of physical memory in the system. */ void arm_physmem_init_kernel_globals(void) { size_t nextidx; regions_to_avail(dump_avail, EXFLAG_NODUMP, NULL); nextidx = regions_to_avail(phys_avail, EXFLAG_NOALLOC, &physmem); if (nextidx == 0) panic("No memory entries in phys_avail"); Maxmem = atop(phys_avail[nextidx - 1]); } #ifdef DDB #include DB_SHOW_COMMAND(physmem, db_show_physmem) { physmem_dump_tables(db_printf); } #endif /* DDB */ diff --git a/sys/arm/xscale/ixp425/avila_machdep.c b/sys/arm/xscale/ixp425/avila_machdep.c index e48fbb647243..e033e1357b64 100644 --- a/sys/arm/xscale/ixp425/avila_machdep.c +++ b/sys/arm/xscale/ixp425/avila_machdep.c @@ -1,437 +1,433 @@ /* $NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $ */ /*- * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * RiscBSD kernel project * * machdep.c * * Machine dependant functions for kernel setup * * This file needs a lot of work. * * Created : 17/09/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_kstack_pages.h" #define _ARM32_BUS_DMA_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define KERNEL_PT_SYS 0 /* Page table for mapping proc0 zero page */ #define KERNEL_PT_IO 1 #define KERNEL_PT_IO_NUM 3 #define KERNEL_PT_BEFOREKERN KERNEL_PT_IO + KERNEL_PT_IO_NUM #define KERNEL_PT_AFKERNEL KERNEL_PT_BEFOREKERN + 1 /* L2 table for mapping after kernel */ #define KERNEL_PT_AFKERNEL_NUM 9 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */ #define NUM_KERNEL_PTS (KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM) struct pv_addr kernel_pt_table[NUM_KERNEL_PTS]; /* Physical and virtual addresses for some global pages */ struct pv_addr systempage; struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; struct pv_addr kernelstack; struct pv_addr minidataclean; /* Static device mappings. */ static const struct arm_devmap_entry ixp425_devmap[] = { /* Physical/Virtual address for I/O space */ { IXP425_IO_VBASE, IXP425_IO_HWBASE, IXP425_IO_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* Expansion Bus */ { IXP425_EXP_VBASE, IXP425_EXP_HWBASE, IXP425_EXP_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* CFI Flash on the Expansion Bus */ { IXP425_EXP_BUS_CS0_VBASE, IXP425_EXP_BUS_CS0_HWBASE, IXP425_EXP_BUS_CS0_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* IXP425 PCI Configuration */ { IXP425_PCI_VBASE, IXP425_PCI_HWBASE, IXP425_PCI_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* SDRAM Controller */ { IXP425_MCU_VBASE, IXP425_MCU_HWBASE, IXP425_MCU_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* PCI Memory Space */ { IXP425_PCI_MEM_VBASE, IXP425_PCI_MEM_HWBASE, IXP425_PCI_MEM_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* Q-Mgr Memory Space */ { IXP425_QMGR_VBASE, IXP425_QMGR_HWBASE, IXP425_QMGR_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, { 0 }, }; /* Static device mappings. */ static const struct arm_devmap_entry ixp435_devmap[] = { /* Physical/Virtual address for I/O space */ { IXP425_IO_VBASE, IXP425_IO_HWBASE, IXP425_IO_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, { IXP425_EXP_VBASE, IXP425_EXP_HWBASE, IXP425_EXP_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* IXP425 PCI Configuration */ { IXP425_PCI_VBASE, IXP425_PCI_HWBASE, IXP425_PCI_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* DDRII Controller NB: mapped same place as IXP425 */ { IXP425_MCU_VBASE, IXP435_MCU_HWBASE, IXP425_MCU_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* PCI Memory Space */ { IXP425_PCI_MEM_VBASE, IXP425_PCI_MEM_HWBASE, IXP425_PCI_MEM_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* Q-Mgr Memory Space */ { IXP425_QMGR_VBASE, IXP425_QMGR_HWBASE, IXP425_QMGR_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* CFI Flash on the Expansion Bus */ { IXP425_EXP_BUS_CS0_VBASE, IXP425_EXP_BUS_CS0_HWBASE, IXP425_EXP_BUS_CS0_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* USB1 Memory Space */ { IXP435_USB1_VBASE, IXP435_USB1_HWBASE, IXP435_USB1_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* USB2 Memory Space */ { IXP435_USB2_VBASE, IXP435_USB2_HWBASE, IXP435_USB2_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* GPS Memory Space */ { CAMBRIA_GPS_VBASE, CAMBRIA_GPS_HWBASE, CAMBRIA_GPS_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, /* RS485 Memory Space */ { CAMBRIA_RS485_VBASE, CAMBRIA_RS485_HWBASE, CAMBRIA_RS485_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, }, { 0 } }; extern vm_offset_t xscale_cache_clean_addr; void * initarm(struct arm_boot_params *abp) { #define next_chunk2(a,b) (((a) + (b)) &~ ((b)-1)) #define next_page(a) next_chunk2(a,PAGE_SIZE) struct pv_addr kernel_l1pt; struct pv_addr dpcpu; int loop, i; u_int l1pagetable; vm_offset_t freemempos; vm_offset_t freemem_pt; vm_offset_t afterkern; vm_offset_t freemem_after; vm_offset_t lastaddr; uint32_t memsize; /* kernel text starts where we were loaded at boot */ #define KERNEL_TEXT_OFF (abp->abp_physaddr - PHYSADDR) #define KERNEL_TEXT_BASE (KERNBASE + KERNEL_TEXT_OFF) #define KERNEL_TEXT_PHYS (PHYSADDR + KERNEL_TEXT_OFF) lastaddr = parse_boot_param(abp); arm_physmem_kernaddr = abp->abp_physaddr; set_cpufuncs(); /* NB: sets cputype */ pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); - if (envmode == 1) - kern_envp = static_env; + init_static_kenv(NULL, 0); + /* Do basic tuning, hz etc */ init_param1(); /* * We allocate memory downwards from where we were loaded * by RedBoot; first the L1 page table, then NUM_KERNEL_PTS * entries in the L2 page table. Past that we re-align the * allocation boundary so later data structures (stacks, etc) * can be mapped with different attributes (write-back vs * write-through). Note this leaves a gap for expansion * (or might be repurposed). */ freemempos = abp->abp_physaddr; /* macros to simplify initial memory allocation */ #define alloc_pages(var, np) do { \ freemempos -= (np * PAGE_SIZE); \ (var) = freemempos; \ /* NB: this works because locore maps PA=VA */ \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); \ } while (0) #define valloc_pages(var, np) do { \ alloc_pages((var).pv_pa, (np)); \ (var).pv_va = (var).pv_pa + (KERNVIRTADDR - abp->abp_physaddr); \ } while (0) /* force L1 page table alignment */ while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos -= PAGE_SIZE; /* allocate contiguous L1 page table */ valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); /* now allocate L2 page tables; they are linked to L1 below */ for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) { if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[loop], L2_TABLE_SIZE / PAGE_SIZE); } else { kernel_pt_table[loop].pv_pa = freemempos + (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) * L2_TABLE_SIZE_REAL; kernel_pt_table[loop].pv_va = kernel_pt_table[loop].pv_pa + (KERNVIRTADDR - abp->abp_physaddr); } } freemem_pt = freemempos; /* base of allocated pt's */ /* * Re-align allocation boundary so we can map the area * write-back instead of write-through for the stacks and * related structures allocated below. */ freemempos = PHYSADDR + 0x100000; /* * Allocate a page for the system page mapped to V0x00000000 * This page will just contain the system vectors and can be * shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE); valloc_pages(abtstack, ABT_STACK_SIZE); valloc_pages(undstack, UND_STACK_SIZE); valloc_pages(kernelstack, kstack_pages); alloc_pages(minidataclean.pv_pa, 1); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now construct the L1 page table. First map the L2 * page tables into the L1 so we can replace L1 mappings * later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* Map the L2 pages tables in the L1 page table */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH & ~(0x00100000 - 1), &kernel_pt_table[KERNEL_PT_SYS]); pmap_link_l2pt(l1pagetable, IXP425_IO_VBASE, &kernel_pt_table[KERNEL_PT_IO]); pmap_link_l2pt(l1pagetable, IXP425_MCU_VBASE, &kernel_pt_table[KERNEL_PT_IO + 1]); pmap_link_l2pt(l1pagetable, IXP425_PCI_MEM_VBASE, &kernel_pt_table[KERNEL_PT_IO + 2]); pmap_link_l2pt(l1pagetable, KERNBASE, &kernel_pt_table[KERNEL_PT_BEFOREKERN]); pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR, 0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); pmap_map_chunk(l1pagetable, KERNBASE + 0x100000, PHYSADDR + 0x100000, 0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, KERNEL_TEXT_BASE, KERNEL_TEXT_PHYS, next_chunk2(((uint32_t)lastaddr) - KERNEL_TEXT_BASE, L1_S_SIZE), VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); freemem_after = next_page((int)lastaddr); afterkern = round_page(next_chunk2((vm_offset_t)lastaddr, L1_S_SIZE)); for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) { pmap_link_l2pt(l1pagetable, afterkern + i * 0x00100000, &kernel_pt_table[KERNEL_PT_AFKERNEL + i]); } pmap_map_entry(l1pagetable, afterkern, minidataclean.pv_pa, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map the Mini-Data cache clean area. */ xscale_setup_minidata(l1pagetable, afterkern, minidataclean.pv_pa); /* Map the vector page. */ pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); if (cpu_is_ixp43x()) arm_devmap_bootstrap(l1pagetable, ixp435_devmap); else arm_devmap_bootstrap(l1pagetable, ixp425_devmap); /* * Give the XScale global cache clean code an appropriately * sized chunk of unmapped VA space starting at 0xff000000 * (our device mappings end before this address). */ xscale_cache_clean_addr = 0xff000000U; cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT); setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); cpu_setup(); /* ready to setup the console (XXX move earlier if possible) */ cninit(); /* * Fetch the RAM size from the MCU registers. The * expansion bus was mapped above so we can now read 'em. */ if (cpu_is_ixp43x()) memsize = ixp435_ddram_size(); else memsize = ixp425_sdram_size(); undefined_init(); init_proc0(kernelstack.pv_va); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); pmap_curmaxkvaddr = afterkern + PAGE_SIZE; vm_max_kernel_address = 0xe0000000; pmap_bootstrap(pmap_curmaxkvaddr, &kernel_l1pt); msgbufp = (void*)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); /* * Add the physical ram we have available. * * Exclude the kernel, and all the things we allocated which immediately * follow the kernel, from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_hardware_region(PHYSADDR, memsize); arm_physmem_exclude_region(freemem_pt, abp->abp_physaddr - freemem_pt, EXFLAG_NOALLOC); arm_physmem_exclude_region(freemempos, abp->abp_physaddr - 0x100000 - freemempos, EXFLAG_NOALLOC); arm_physmem_exclude_region(abp->abp_physaddr, virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); kdb_init(); - /* use static kernel environment if so configured */ - if (envmode == 1) - kern_envp = static_env; - return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); #undef next_page #undef next_chunk2 } diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index f7e1456ceb8d..f256260ba3aa 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -1,985 +1,985 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_platform.h" #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef FDT #include #include #endif struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } void bzero(void *buf, size_t len) { uint8_t *p; p = buf; while(len-- > 0) *p++ = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sp = frame->tf_sp; regs->lr = frame->tf_lr; regs->elr = frame->tf_elr; regs->spsr = frame->tf_spsr; memcpy(regs->x, frame->tf_x, sizeof(regs->x)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sp = regs->sp; frame->tf_lr = regs->lr; frame->tf_elr = regs->elr; frame->tf_spsr = regs->spsr; memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, pcb); memcpy(regs->fp_q, pcb->pcb_vfp, sizeof(regs->fp_q)); regs->fp_cr = pcb->pcb_fpcr; regs->fp_sr = pcb->pcb_fpsr; } else #endif memset(regs->fp_q, 0, sizeof(regs->fp_q)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; memcpy(pcb->pcb_vfp, regs->fp_q, sizeof(regs->fp_q)); pcb->pcb_fpcr = regs->fp_cr; pcb->pcb_fpsr = regs->fp_sr; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ARM64TODO: ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_lr = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct gpregs *)0)->gp_x); CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct reg *)0)->x); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_x[0] = 0; mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; mcp->mc_gpregs.gp_spsr = tf->tf_spsr; } memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_lr = tf->tf_lr; mcp->mc_gpregs.gp_elr = tf->tf_elr; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_lr = mcp->mc_gpregs.gp_lr; tf->tf_elr = mcp->mc_gpregs.gp_elr; tf->tf_spsr = mcp->mc_gpregs.gp_spsr; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_vfp, sizeof(mcp->mc_fpregs)); mcp->mc_fpregs.fp_cr = curpcb->pcb_fpcr; mcp->mc_fpregs.fp_sr = curpcb->pcb_fpsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* * Discard any vfp state for the current thread, we * are about to override it. */ vfp_discard(td); memcpy(curpcb->pcb_vfp, mcp->mc_fpregs.fp_q, sizeof(mcp->mc_fpregs)); curpcb->pcb_fpcr = mcp->mc_fpregs.fp_cr; curpcb->pcb_fpsr = mcp->mc_fpregs.fp_sr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags; } critical_exit(); #endif } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "dsb sy \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { /* We should have shutdown by now, if not enter a low power sleep */ intr_disable(); while (1) { __asm __volatile("wfi"); } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* ARM64TODO TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("ARM64TODO: cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t daif; td = curthread; if (td->td_md.md_spinlock_count == 0) { daif = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = daif; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t daif; td = curthread; critical_exit(); daif = td->td_md.md_saved_daif; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(daif); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; uint32_t spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); spsr = uc.uc_mcontext.mc_gpregs.gp_spsr; if ((spsr & PSR_M_MASK) != PSR_M_EL0t || (spsr & (PSR_F | PSR_I | PSR_A | PSR_D)) != 0) return (EINVAL); set_mcontext(td, &uc.uc_mcontext); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { int i; for (i = 0; i < PCB_LR; i++) pcb->pcb_x[i] = tf->tf_x[i]; pcb->pcb_x[PCB_LR] = tf->tf_lr; pcb->pcb_pc = tf->tf_elr; pcb->pcb_sp = tf->tf_sp; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int code, onstack, sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_x[0]= sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { struct pcpu *pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } typedef struct { uint32_t type; uint64_t phys_start; uint64_t virt_start; uint64_t num_pages; uint64_t attr; } EFI_MEMORY_DESCRIPTOR; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, u_int *physmap_idxp) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idxp)) break; } } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static void cache_setup(void) { int dcache_line_shift, icache_line_shift; uint32_t ctr_el0; ctr_el0 = READ_SPECIALREG(ctr_el0); /* Read the log2 words in each D cache line */ dcache_line_shift = CTR_DLINE_SIZE(ctr_el0); /* Get the D cache line size */ dcache_line_size = sizeof(int) << dcache_line_shift; /* And the same for the I cache */ icache_line_shift = CTR_ILINE_SIZE(ctr_el0); icache_line_size = sizeof(int) << icache_line_shift; idcache_line_size = MIN(dcache_line_size, icache_line_size); } void initarm(struct arm64_bootparams *abp) { struct efi_map_header *efihdr; struct pcpu *pcpup; vm_offset_t lastaddr; caddr_t kmdp; vm_paddr_t mem_len; int i; /* Set the module data location */ preload_metadata = (caddr_t)(uintptr_t)(abp->modulep); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); #ifdef FDT try_load_dtb(kmdp); #endif /* Find the address to start allocating from */ lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); /* Load the physical memory ranges */ physmap_idx = 0; efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); add_efi_map_entries(efihdr, physmap, &physmap_idx); /* Print the memory map */ mem_len = 0; for (i = 0; i < physmap_idx; i += 2) { dump_avail[i] = physmap[i]; dump_avail[i + 1] = physmap[i + 1]; mem_len += physmap[i + 1] - physmap[i]; } dump_avail[i] = 0; dump_avail[i + 1] = 0; /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta, lastaddr - KERNBASE); arm_devmap_bootstrap(0, NULL); cninit(); init_proc0(abp->kern_stack); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); dbg_monitor_init(); kdb_init(); early_boot = 0; } uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, struct timecounter *); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { return (arm_cpu_fill_vdso_timehands != NULL ? arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); } #ifdef DDB #include DB_SHOW_COMMAND(specialregs, db_show_spregs) { #define PRINT_REG(reg) \ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) PRINT_REG(actlr_el1); PRINT_REG(afsr0_el1); PRINT_REG(afsr1_el1); PRINT_REG(aidr_el1); PRINT_REG(amair_el1); PRINT_REG(ccsidr_el1); PRINT_REG(clidr_el1); PRINT_REG(contextidr_el1); PRINT_REG(cpacr_el1); PRINT_REG(csselr_el1); PRINT_REG(ctr_el0); PRINT_REG(currentel); PRINT_REG(daif); PRINT_REG(dczid_el0); PRINT_REG(elr_el1); PRINT_REG(esr_el1); PRINT_REG(far_el1); #if 0 /* ARM64TODO: Enable VFP before reading floating-point registers */ PRINT_REG(fpcr); PRINT_REG(fpsr); #endif PRINT_REG(id_aa64afr0_el1); PRINT_REG(id_aa64afr1_el1); PRINT_REG(id_aa64dfr0_el1); PRINT_REG(id_aa64dfr1_el1); PRINT_REG(id_aa64isar0_el1); PRINT_REG(id_aa64isar1_el1); PRINT_REG(id_aa64pfr0_el1); PRINT_REG(id_aa64pfr1_el1); PRINT_REG(id_afr0_el1); PRINT_REG(id_dfr0_el1); PRINT_REG(id_isar0_el1); PRINT_REG(id_isar1_el1); PRINT_REG(id_isar2_el1); PRINT_REG(id_isar3_el1); PRINT_REG(id_isar4_el1); PRINT_REG(id_isar5_el1); PRINT_REG(id_mmfr0_el1); PRINT_REG(id_mmfr1_el1); PRINT_REG(id_mmfr2_el1); PRINT_REG(id_mmfr3_el1); #if 0 /* Missing from llvm */ PRINT_REG(id_mmfr4_el1); #endif PRINT_REG(id_pfr0_el1); PRINT_REG(id_pfr1_el1); PRINT_REG(isr_el1); PRINT_REG(mair_el1); PRINT_REG(midr_el1); PRINT_REG(mpidr_el1); PRINT_REG(mvfr0_el1); PRINT_REG(mvfr1_el1); PRINT_REG(mvfr2_el1); PRINT_REG(revidr_el1); PRINT_REG(sctlr_el1); PRINT_REG(sp_el0); PRINT_REG(spsel); PRINT_REG(spsr_el1); PRINT_REG(tcr_el1); PRINT_REG(tpidr_el0); PRINT_REG(tpidr_el1); PRINT_REG(tpidrro_el0); PRINT_REG(ttbr0_el1); PRINT_REG(ttbr1_el1); PRINT_REG(vbar_el1); #undef PRINT_REG } DB_SHOW_COMMAND(vtop, db_show_vtop) { uint64_t phys; if (have_addr) { phys = arm64_address_translate_s1e1r(addr); db_printf("Physical address reg: 0x%016lx\n", phys); } else db_printf("show vtop \n"); } #endif diff --git a/lib/libgssapi/gss_release_oid_set.c b/sys/arm64/arm64/ofw_machdep.c similarity index 69% copy from lib/libgssapi/gss_release_oid_set.c copy to sys/arm64/arm64/ofw_machdep.c index bedb90ba8439..6a0b714cd208 100644 --- a/lib/libgssapi/gss_release_oid_set.c +++ b/sys/arm64/arm64/ofw_machdep.c @@ -1,46 +1,54 @@ /*- - * Copyright (c) 2005 Doug Rabson + * Copyright (c) 2015 Ian Lepore * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include -#include -#include +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include -OM_uint32 -gss_release_oid_set(OM_uint32 *minor_status, - gss_OID_set *set) +#include +#include + +extern struct bus_space memmap_bus; + +int +OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *tag, + bus_space_handle_t *handle) { + bus_addr_t addr; + bus_size_t size; + int err; + + err = ofw_reg_to_paddr(dev, regno, &addr, &size, NULL); + if (err != 0) + return (err); - *minor_status = 0; - if (set && *set) { - if ((*set)->elements) - free((*set)->elements); - free(*set); - *set = GSS_C_NO_OID_SET; - } - return (GSS_S_COMPLETE); + *tag = &memmap_bus; + return (bus_space_map(*tag, addr, size, 0, handle)); } diff --git a/sys/arm64/include/ofw_machdep.h b/sys/arm64/include/ofw_machdep.h index e9c65ef02801..511fc8d71c19 100644 --- a/sys/arm64/include/ofw_machdep.h +++ b/sys/arm64/include/ofw_machdep.h @@ -1,47 +1,44 @@ /*- * Copyright (c) 2009 The FreeBSD Foundation * All rights reserved. * * This software was developed by Semihalf under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_OFW_MACHDEP_H_ #define _MACHINE_OFW_MACHDEP_H_ #include typedef uint32_t cell_t; struct mem_region { vm_offset_t mr_start; vm_size_t mr_size; }; -/* FDT follows ePAPR */ -#define OFW_EPAPR - #endif /* _MACHINE_OFW_MACHDEP_H_ */ diff --git a/sys/boot/efi/loader/main.c b/sys/boot/efi/loader/main.c index fc8949ea47f5..7a407094e0fe 100644 --- a/sys/boot/efi/loader/main.c +++ b/sys/boot/efi/loader/main.c @@ -1,445 +1,446 @@ /*- * Copyright (c) 2008-2010 Rui Paulo * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "loader_efi.h" extern char bootprog_name[]; extern char bootprog_rev[]; extern char bootprog_date[]; extern char bootprog_maker[]; struct devdesc currdev; /* our current device */ struct arch_switch archsw; /* MI/MD interface boundary */ EFI_GUID acpi = ACPI_TABLE_GUID; EFI_GUID acpi20 = ACPI_20_TABLE_GUID; EFI_GUID devid = DEVICE_PATH_PROTOCOL; EFI_GUID imgid = LOADED_IMAGE_PROTOCOL; EFI_GUID mps = MPS_TABLE_GUID; EFI_GUID netid = EFI_SIMPLE_NETWORK_PROTOCOL; EFI_GUID smbios = SMBIOS_TABLE_GUID; EFI_GUID dxe = DXE_SERVICES_TABLE_GUID; EFI_GUID hoblist = HOB_LIST_TABLE_GUID; EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID; EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID; EFI_GUID fdtdtb = FDT_TABLE_GUID; EFI_STATUS main(int argc, CHAR16 *argv[]) { char var[128]; EFI_LOADED_IMAGE *img; EFI_GUID *guid; int i, j, vargood; /* * XXX Chicken-and-egg problem; we want to have console output * early, but some console attributes may depend on reading from * eg. the boot device, which we can't do yet. We can use * printf() etc. once this is done. */ cons_probe(); /* * Loop through the args, and for each one that contains an '=' that is * not the first character, add it to the environment. This allows * loader and kernel env vars to be passed on the command line. Convert * args from UCS-2 to ASCII (16 to 8 bit) as they are copied. */ for (i = 1; i < argc; i++) { vargood = 0; for (j = 0; argv[i][j] != 0; j++) { if (j == sizeof(var)) { vargood = 0; break; } if (j > 0 && argv[i][j] == '=') vargood = 1; var[j] = (char)argv[i][j]; } if (vargood) { var[j] = 0; putenv(var); } } if (efi_copy_init()) { printf("failed to allocate staging area\n"); return (EFI_BUFFER_TOO_SMALL); } /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); /* Get our loaded image protocol interface structure. */ BS->HandleProtocol(IH, &imgid, (VOID**)&img); printf("Image base: 0x%lx\n", (u_long)img->ImageBase); printf("EFI version: %d.%02d\n", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff); printf("EFI Firmware: "); /* printf doesn't understand EFI Unicode */ ST->ConOut->OutputString(ST->ConOut, ST->FirmwareVendor); printf(" (rev %d.%02d)\n", ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff); printf("\n"); printf("%s, Revision %s\n", bootprog_name, bootprog_rev); printf("(%s, %s)\n", bootprog_maker, bootprog_date); efi_handle_lookup(img->DeviceHandle, &currdev.d_dev, &currdev.d_unit); currdev.d_type = currdev.d_dev->dv_type; /* * Disable the watchdog timer. By default the boot manager sets * the timer to 5 minutes before invoking a boot option. If we * want to return to the boot manager, we have to disable the * watchdog timer and since we're an interactive program, we don't * want to wait until the user types "quit". The timer may have * fired by then. We don't care if this fails. It does not prevent * normal functioning in any way... */ BS->SetWatchdogTimer(0, 0, 0, NULL); env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev), efi_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset, env_nounset); setenv("LINES", "24", 1); /* optional */ archsw.arch_autoload = efi_autoload; archsw.arch_getdev = efi_getdev; archsw.arch_copyin = efi_copyin; archsw.arch_copyout = efi_copyout; archsw.arch_readin = efi_readin; for (i = 0; i < ST->NumberOfTableEntries; i++) { guid = &ST->ConfigurationTable[i].VendorGuid; if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) { smbios_detect(ST->ConfigurationTable[i].VendorTable); break; } } interact(NULL); /* doesn't return */ return (EFI_SUCCESS); /* keep compiler happy */ } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); RS->ResetSystem(EfiResetCold, EFI_SUCCESS, 23, (CHAR16 *)"Reboot from the loader"); /* NOTREACHED */ return (CMD_ERROR); } COMMAND_SET(quit, "quit", "exit the loader", command_quit); static int command_quit(int argc, char *argv[]) { exit(0); return (CMD_OK); } COMMAND_SET(memmap, "memmap", "print memory map", command_memmap); static int command_memmap(int argc, char *argv[]) { UINTN sz; EFI_MEMORY_DESCRIPTOR *map, *p; UINTN key, dsz; UINT32 dver; EFI_STATUS status; int i, ndesc; static char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; sz = 0; status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver); if (status != EFI_BUFFER_TOO_SMALL) { printf("Can't determine memory map size\n"); return CMD_ERROR; } map = malloc(sz); status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); if (EFI_ERROR(status)) { printf("Can't read memory map\n"); return CMD_ERROR; } ndesc = sz / dsz; printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = NextMemoryDescriptor(p, dsz)) { printf("%23s %012lx %012lx %08lx ", types[p->Type], p->PhysicalStart, p->VirtualStart, p->NumberOfPages); if (p->Attribute & EFI_MEMORY_UC) printf("UC "); if (p->Attribute & EFI_MEMORY_WC) printf("WC "); if (p->Attribute & EFI_MEMORY_WT) printf("WT "); if (p->Attribute & EFI_MEMORY_WB) printf("WB "); if (p->Attribute & EFI_MEMORY_UCE) printf("UCE "); if (p->Attribute & EFI_MEMORY_WP) printf("WP "); if (p->Attribute & EFI_MEMORY_RP) printf("RP "); if (p->Attribute & EFI_MEMORY_XP) printf("XP "); printf("\n"); } return CMD_OK; } COMMAND_SET(configuration, "configuration", "print configuration tables", command_configuration); static const char * guid_to_string(EFI_GUID *guid) { static char buf[40]; sprintf(buf, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], guid->Data4[5], guid->Data4[6], guid->Data4[7]); return (buf); } static int command_configuration(int argc, char *argv[]) { int i; printf("NumberOfTableEntries=%ld\n", ST->NumberOfTableEntries); for (i = 0; i < ST->NumberOfTableEntries; i++) { EFI_GUID *guid; printf(" "); guid = &ST->ConfigurationTable[i].VendorGuid; if (!memcmp(guid, &mps, sizeof(EFI_GUID))) printf("MPS Table"); else if (!memcmp(guid, &acpi, sizeof(EFI_GUID))) printf("ACPI Table"); else if (!memcmp(guid, &acpi20, sizeof(EFI_GUID))) printf("ACPI 2.0 Table"); else if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) printf("SMBIOS Table"); else if (!memcmp(guid, &dxe, sizeof(EFI_GUID))) printf("DXE Table"); else if (!memcmp(guid, &hoblist, sizeof(EFI_GUID))) printf("HOB List Table"); else if (!memcmp(guid, &memtype, sizeof(EFI_GUID))) printf("Memory Type Information Table"); else if (!memcmp(guid, &debugimg, sizeof(EFI_GUID))) printf("Debug Image Info Table"); else if (!memcmp(guid, &fdtdtb, sizeof(EFI_GUID))) printf("FDT Table"); else printf("Unknown Table (%s)", guid_to_string(guid)); printf(" at %p\n", ST->ConfigurationTable[i].VendorTable); } return CMD_OK; } COMMAND_SET(mode, "mode", "change or display EFI text modes", command_mode); static int command_mode(int argc, char *argv[]) { UINTN cols, rows; unsigned int mode; int i; char *cp; char rowenv[8]; EFI_STATUS status; SIMPLE_TEXT_OUTPUT_INTERFACE *conout; conout = ST->ConOut; if (argc > 1) { mode = strtol(argv[1], &cp, 0); if (cp[0] != '\0') { printf("Invalid mode\n"); return (CMD_ERROR); } status = conout->QueryMode(conout, mode, &cols, &rows); if (EFI_ERROR(status)) { printf("invalid mode %d\n", mode); return (CMD_ERROR); } status = conout->SetMode(conout, mode); if (EFI_ERROR(status)) { printf("couldn't set mode %d\n", mode); return (CMD_ERROR); } sprintf(rowenv, "%u", (unsigned)rows); setenv("LINES", rowenv, 1); return (CMD_OK); } - for (i = 0; ; i++) { + printf("Current mode: %d\n", conout->Mode->Mode); + for (i = 0; i <= conout->Mode->MaxMode; i++) { status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) - break; + continue; printf("Mode %d: %u columns, %u rows\n", i, (unsigned)cols, (unsigned)rows); } if (i != 0) printf("Select a mode with the command \"mode \"\n"); return (CMD_OK); } COMMAND_SET(nvram, "nvram", "get or set NVRAM variables", command_nvram); static int command_nvram(int argc, char *argv[]) { CHAR16 var[128]; CHAR16 *data; EFI_STATUS status; EFI_GUID varguid = { 0,0,0,{0,0,0,0,0,0,0,0} }; UINTN varsz, datasz; SIMPLE_TEXT_OUTPUT_INTERFACE *conout; int i; conout = ST->ConOut; /* Initiate the search */ status = RS->GetNextVariableName(&varsz, NULL, NULL); for (; status != EFI_NOT_FOUND; ) { status = RS->GetNextVariableName(&varsz, var, &varguid); //if (EFI_ERROR(status)) //break; conout->OutputString(conout, var); printf("="); datasz = 0; status = RS->GetVariable(var, &varguid, NULL, &datasz, NULL); /* XXX: check status */ data = malloc(datasz); status = RS->GetVariable(var, &varguid, NULL, &datasz, data); if (EFI_ERROR(status)) printf(""); else { for (i = 0; i < datasz; i++) { if (isalnum(data[i]) || isspace(data[i])) printf("%c", data[i]); else printf("\\x%02x", data[i]); } } /* XXX */ pager_output("\n"); free(data); } return (CMD_OK); } #ifdef LOADER_FDT_SUPPORT extern int command_fdt_internal(int argc, char *argv[]); /* * Since proper fdt command handling function is defined in fdt_loader_cmd.c, * and declaring it as extern is in contradiction with COMMAND_SET() macro * (which uses static pointer), we're defining wrapper function, which * calls the proper fdt handling routine. */ static int command_fdt(int argc, char *argv[]) { return (command_fdt_internal(argc, argv)); } COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); #endif diff --git a/sys/boot/libstand32/Makefile b/sys/boot/libstand32/Makefile index 075a9ea24227..d1c9ba2f6d85 100644 --- a/sys/boot/libstand32/Makefile +++ b/sys/boot/libstand32/Makefile @@ -1,155 +1,27 @@ # $FreeBSD$ -# Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ -# -# Notes: -# - We don't use the libc strerror/sys_errlist because the string table is -# quite large. -# - -MAN= .include -MK_SSP= no LIBSTAND_SRC= ${.CURDIR}/../../../lib/libstand -LIBC_SRC= ${LIBSTAND_SRC}/../libc - -.PATH: ${LIBSTAND_SRC} -LIB= stand -INTERNALLIB= -MK_PROFILE= no -NO_PIC= -.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "powerpc64" -CFLAGS+= -m32 -I. -.endif - -WARNS?= 0 - -# standalone components and stuff we have modified locally -SRCS+= gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c gets.c \ - globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \ - sbrk.c twiddle.c zalloc.c zalloc_malloc.c - -# private (pruned) versions of libc string functions -SRCS+= strcasecmp.c - -.PATH: ${LIBC_SRC}/net - -SRCS+= ntoh.c - -# string functions from libc -.PATH: ${LIBC_SRC}/string -SRCS+= bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \ - memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \ - strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ - strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c -.if ${MACHINE_CPUARCH} == "arm" -.PATH: ${LIBC_SRC}/arm/gen - -# Compiler support functions -.PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/ -# __clzsi2 and ctzsi2 for various builtin functions -SRCS+= clzsi2.c ctzsi2.c -# Divide and modulus functions called by the compiler -SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c -SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c - -.PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/arm/ -SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S -SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S - -.endif -.if ${MACHINE_CPUARCH} == "powerpc" -.PATH: ${LIBC_SRC}/quad -SRCS+= ashldi3.c ashrdi3.c -SRCS+= syncicache.c -.endif - -# uuid functions from libc -.PATH: ${LIBC_SRC}/uuid -SRCS+= uuid_equal.c uuid_is_nil.c - -# _setjmp/_longjmp .if ${MACHINE_CPUARCH} == "amd64" -.PATH: ${LIBSTAND_SRC}/i386 +LIBSTAND_CPUARCH=i386 .else -.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH} +LIBSTAND_CPUARCH=${MACHINE_CPUARCH} .endif -SRCS+= _setjmp.S - -# decompression functionality from libbz2 -# NOTE: to actually test this functionality after libbz2 upgrade compile -# loader(8) with LOADER_BZIP2_SUPPORT defined -.PATH: ${LIBSTAND_SRC}/../../contrib/bzip2 -CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS -SRCS+= libstand_bzlib_private.h - -.for file in bzlib.c crctable.c decompress.c huffman.c randtable.c -SRCS+= _${file} -CLEANFILES+= _${file} - -_${file}: ${file} - sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \ - ${.ALLSRC} > ${.TARGET} -.endfor - -CLEANFILES+= libstand_bzlib_private.h -libstand_bzlib_private.h: bzlib_private.h - sed -e 's||"stand.h"|' \ - ${.ALLSRC} > ${.TARGET} - -# decompression functionality from libz -.PATH: ${LIBSTAND_SRC}/../libz -CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz -SRCS+= adler32.c crc32.c libstand_zutil.h libstand_gzguts.h - -.for file in infback.c inffast.c inflate.c inftrees.c zutil.c -SRCS+= _${file} -CLEANFILES+= _${file} - -_${file}: ${file} - sed -e "s|zutil\.h|libstand_zutil.h|" \ - -e "s|gzguts\.h|libstand_gzguts.h|" \ - ${.ALLSRC} > ${.TARGET} -.endfor - -# depend on stand.h being able to be included multiple times -.for file in zutil.h gzguts.h -CLEANFILES+= libstand_${file} -libstand_${file}: ${file} - sed -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - -e 's||"stand.h"|' \ - ${.ALLSRC} > ${.TARGET} -.endfor - -# io routines -SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ - fstat.c close.c lseek.c open.c read.c write.c readdir.c - -# network routines -SRCS+= arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c - -# network info services: -SRCS+= bootp.c rarp.c bootparam.c +LIBC_SRC= ${LIBSTAND_SRC}/../libc +INTERNALLIB= +MAN= +.PATH: ${LIBSTAND_SRC} -# boot filesystems -SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c -SRCS+= dosfs.c ext2fs.c -SRCS+= splitfs.c -SRCS+= pkgfs.c -.if ${MK_NAND} != "no" -SRCS+= nandfs.c +.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "powerpc64" +CFLAGS+= -m32 -I. .endif -.include -.include +.include "${LIBSTAND_SRC}/Makefile" .if ${MACHINE_CPUARCH} == "amd64" CLEANFILES+= machine beforedepend ${OBJS}: machine machine: ln -fs ${.CURDIR}/../../i386/include machine .endif diff --git a/sys/boot/uboot/lib/copy.c b/sys/boot/uboot/lib/copy.c index 51416ac1ece8..c0e8b81290bc 100644 --- a/sys/boot/uboot/lib/copy.c +++ b/sys/boot/uboot/lib/copy.c @@ -1,164 +1,166 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2007 Semihalf, Rafal Jaworowski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "api_public.h" #include "glue.h" #include "libuboot.h" /* * MD primitives supporting placement of module data */ #ifdef __arm__ #define KERN_ALIGN (2 * 1024 * 1024) #else #define KERN_ALIGN PAGE_SIZE #endif /* * Avoid low memory, u-boot puts things like args and dtb blobs there. */ #define KERN_MINADDR max(KERN_ALIGN, (1024 * 1024)) extern void _start(void); /* ubldr entry point address. */ /* * This is called for every object loaded (kernel, module, dtb file, etc). The * expected return value is the next address at or after the given addr which is * appropriate for loading the given object described by type and data. On each * call the addr is the next address following the previously loaded object. * * The first call is for loading the kernel, and the addr argument will be zero, * and we search for a big block of ram to load the kernel and modules. * * On subsequent calls the addr will be non-zero, and we just round it up so * that each object begins on a page boundary. */ uint64_t uboot_loadaddr(u_int type, void *data, uint64_t addr) { struct sys_info *si; - uintptr_t sblock, eblock, subldr, eubldr; - uintptr_t biggest_block, this_block; - size_t biggest_size, this_size; + uint64_t sblock, eblock, subldr, eubldr; + uint64_t biggest_block, this_block; + uint64_t biggest_size, this_size; int i; - char * envstr; + char *envstr; if (addr == 0) { /* * If the loader_kernaddr environment variable is set, blindly * honor it. It had better be right. We force interpretation * of the value in base-16 regardless of any leading 0x prefix, * because that's the U-Boot convention. */ envstr = ub_env_get("loader_kernaddr"); if (envstr != NULL) return (strtoul(envstr, NULL, 16)); /* * Find addr/size of largest DRAM block. Carve our own address * range out of the block, because loading the kernel over the * top ourself is a poor memory-conservation strategy. Avoid * memory at beginning of the first block of physical ram, * since u-boot likes to pass args and data there. Assume that * u-boot has moved itself to the very top of ram and * optimistically assume that we won't run into it up there. */ if ((si = ub_get_sys_info()) == NULL) panic("could not retrieve system info"); biggest_block = 0; biggest_size = 0; - subldr = rounddown2((uintptr_t)_start, KERN_ALIGN); - eubldr = roundup2(uboot_heap_end, KERN_ALIGN); + subldr = rounddown2((uint64_t)(uintptr_t)_start, KERN_ALIGN); + eubldr = roundup2((uint64_t)uboot_heap_end, KERN_ALIGN); for (i = 0; i < si->mr_no; i++) { if (si->mr[i].flags != MR_ATTR_DRAM) continue; - sblock = roundup2(si->mr[i].start, KERN_ALIGN); - eblock = rounddown2(si->mr[i].start + si->mr[i].size, + sblock = roundup2((uint64_t)si->mr[i].start, KERN_ALIGN); + eblock = rounddown2((uint64_t)si->mr[i].start + + si->mr[i].size, KERN_ALIGN); if (biggest_size == 0) sblock += KERN_MINADDR; if (subldr >= sblock && subldr < eblock) { if (subldr - sblock > eblock - eubldr) { this_block = sblock; this_size = subldr - sblock; } else { this_block = eubldr; this_size = eblock - eubldr; } } else if (subldr < sblock && eubldr < eblock) { /* Loader is below or engulfs the sblock */ this_block = (eubldr < sblock) ? sblock : eubldr; this_size = eblock - this_block; } else { this_block = 0; this_size = 0; } if (biggest_size < this_size) { biggest_block = this_block; biggest_size = this_size; } } if (biggest_size == 0) panic("Not enough DRAM to load kernel\n"); #if 0 - printf("Loading kernel into region 0x%08x-0x%08x (%u MiB)\n", - biggest_block, biggest_block + biggest_size - 1, - biggest_size / 1024 / 1024); + printf("Loading kernel into region 0x%08jx-0x%08jx (%ju MiB)\n", + (uintmax_t)biggest_block, + (uintmax_t)biggest_block + biggest_size - 1, + (uintmax_t)biggest_size / 1024 / 1024); #endif return (biggest_block); } return roundup2(addr, PAGE_SIZE); } ssize_t uboot_copyin(const void *src, vm_offset_t dest, const size_t len) { bcopy(src, (void *)dest, len); return (len); } ssize_t uboot_copyout(const vm_offset_t src, void *dest, const size_t len) { bcopy((void *)src, dest, len); return (len); } ssize_t uboot_readin(const int fd, vm_offset_t dest, const size_t len) { return (read(fd, (void *)dest, len)); } diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 6ecf36abb58e..a12896767186 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -1,94 +1,95 @@ # $FreeBSD$ arm/arm/devmap.c standard arm/arm/generic_timer.c standard arm/arm/pmu.c standard arm64/acpica/acpi_machdep.c optional acpi arm64/acpica/OsdEnvironment.c optional acpi arm64/acpica/acpi_wakeup.c optional acpi arm64/acpica/pci_cfgreg.c optional acpi pci arm64/arm64/autoconf.c standard arm64/arm64/bcopy.c standard arm64/arm64/bus_machdep.c standard arm64/arm64/bus_space_asm.S standard arm64/arm64/busdma_bounce.c standard arm64/arm64/busdma_machdep.c standard arm64/arm64/clock.c standard arm64/arm64/copyinout.S standard arm64/arm64/copystr.c standard arm64/arm64/cpufunc_asm.S standard arm64/arm64/db_disasm.c optional ddb arm64/arm64/db_interface.c optional ddb arm64/arm64/db_trace.c optional ddb arm64/arm64/debug_monitor.c optional kdb arm64/arm64/dump_machdep.c standard arm64/arm64/elf_machdep.c standard arm64/arm64/exception.S standard arm64/arm64/gic.c standard arm64/arm64/gic_acpi.c optional acpi arm64/arm64/gic_fdt.c optional fdt arm64/arm64/gic_v3.c standard arm64/arm64/gic_v3_fdt.c optional fdt arm64/arm64/gic_v3_its.c standard arm64/arm64/identcpu.c standard arm64/arm64/intr_machdep.c standard arm64/arm64/in_cksum.c optional inet | inet6 arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/mem.c standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp arm64/arm64/nexus.c standard +arm64/arm64/ofw_machdep.c optional fdt arm64/arm64/pic_if.m standard arm64/arm64/pmap.c standard arm64/arm64/stack_machdep.c optional ddb | stack arm64/arm64/support.S standard arm64/arm64/swtch.S standard arm64/arm64/sys_machdep.c standard arm64/arm64/trap.c standard arm64/arm64/uio_machdep.c standard arm64/arm64/uma_machdep.c standard arm64/arm64/unwind.c optional ddb | kdtrace_hooks | stack arm64/arm64/vfp.c standard arm64/arm64/vm_machdep.c standard arm64/cavium/thunder_pcie.c optional soc_cavm_thunderx pci fdt arm64/cavium/thunder_pcie_pem.c optional soc_cavm_thunderx pci arm64/cavium/thunder_pcie_common.c optional soc_cavm_thunderx pci arm64/cloudabi64/cloudabi64_sysvec.c optional compat_cloudabi64 crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb dev/acpica/acpi_if.m optional acpi dev/ahci/ahci_generic.c optional ahci fdt dev/fdt/fdt_arm64.c optional fdt dev/hwpmc/hwpmc_arm64.c optional hwpmc dev/hwpmc/hwpmc_arm64_md.c optional hwpmc dev/mmc/host/dwmmc.c optional dwmmc dev/mmc/host/dwmmc_hisi.c optional dwmmc soc_hisi_hi6220 dev/ofw/ofw_cpu.c optional fdt dev/pci/pci_host_generic.c optional pci fdt dev/psci/psci.c optional psci dev/psci/psci_arm64.S optional psci dev/uart/uart_cpu_fdt.c optional uart fdt dev/uart/uart_dev_pl011.c optional uart pl011 dev/usb/controller/dwc_otg_hisi.c optional dwcotg soc_hisi_hi6220 dev/vnic/nic_main.c optional vnic pci dev/vnic/nicvf_main.c optional vnic pci pci_iov dev/vnic/nicvf_queues.c optional vnic pci pci_iov dev/vnic/thunder_bgx_fdt.c optional vnic fdt dev/vnic/thunder_bgx.c optional vnic pci dev/vnic/thunder_mdio_fdt.c optional vnic fdt dev/vnic/thunder_mdio.c optional vnic dev/vnic/lmac_if.m optional vnic kern/kern_clocksource.c standard libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}" cddl/dev/dtrace/aarch64/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/aarch64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/aarch64/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" diff --git a/sys/dev/ath/ah_osdep.c b/sys/dev/ath/ah_osdep.c index fe4a657c98c8..3d633a44adda 100644 --- a/sys/dev/ath/ah_osdep.c +++ b/sys/dev/ath/ah_osdep.c @@ -1,409 +1,422 @@ /*- * Copyright (c) 2002-2008 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ #include "opt_ah.h" #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX for ether_sprintf */ #include #include /* * WiSoC boards overload the bus tag with information about the * board layout. We must extract the bus space tag from that * indirect structure. For everyone else the tag is passed in * directly. * XXX cache indirect ref privately */ #ifdef AH_SUPPORT_AR5312 #define BUSTAG(ah) \ ((bus_space_tag_t) ((struct ar531x_config *)((ah)->ah_st))->tag) #else #define BUSTAG(ah) ((ah)->ah_st) #endif /* * This lock is used to seralise register access for chips which have * problems w/ SMP CPUs issuing concurrent PCI transactions. * * XXX This is a global lock for now; it should be pushed to * a per-device lock in some platform-independent fashion. */ struct mtx ah_regser_mtx; MTX_SYSINIT(ah_regser, &ah_regser_mtx, "Atheros register access mutex", MTX_SPIN); extern void ath_hal_printf(struct ath_hal *, const char*, ...) __printflike(2,3); extern void ath_hal_vprintf(struct ath_hal *, const char*, __va_list) __printflike(2, 0); extern const char* ath_hal_ether_sprintf(const u_int8_t *mac); extern void *ath_hal_malloc(size_t); extern void ath_hal_free(void *); #ifdef AH_ASSERT extern void ath_hal_assert_failed(const char* filename, int lineno, const char* msg); #endif #ifdef AH_DEBUG extern void DO_HALDEBUG(struct ath_hal *ah, u_int mask, const char* fmt, ...); #endif /* AH_DEBUG */ /* NB: put this here instead of the driver to avoid circular references */ SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD, 0, "Atheros driver parameters"); static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD, 0, "Atheros HAL parameters"); #ifdef AH_DEBUG int ath_hal_debug = 0; SYSCTL_INT(_hw_ath_hal, OID_AUTO, debug, CTLFLAG_RWTUN, &ath_hal_debug, 0, "Atheros HAL debugging printfs"); #endif /* AH_DEBUG */ static MALLOC_DEFINE(M_ATH_HAL, "ath_hal", "ath hal data"); void* ath_hal_malloc(size_t size) { return malloc(size, M_ATH_HAL, M_NOWAIT | M_ZERO); } void ath_hal_free(void* p) { free(p, M_ATH_HAL); } void ath_hal_vprintf(struct ath_hal *ah, const char* fmt, va_list ap) { vprintf(fmt, ap); } void ath_hal_printf(struct ath_hal *ah, const char* fmt, ...) { va_list ap; va_start(ap, fmt); ath_hal_vprintf(ah, fmt, ap); va_end(ap); } const char* ath_hal_ether_sprintf(const u_int8_t *mac) { return ether_sprintf(mac); } #ifdef AH_DEBUG /* * XXX This is highly relevant only for the AR5416 and later * PCI/PCIe NICs. It'll need adjustment for other hardware * variations. */ static int ath_hal_reg_whilst_asleep(struct ath_hal *ah, uint32_t reg) { if (reg >= 0x4000 && reg < 0x5000) return (1); if (reg >= 0x6000 && reg < 0x7000) return (1); if (reg >= 0x7000 && reg < 0x8000) return (1); return (0); } void DO_HALDEBUG(struct ath_hal *ah, u_int mask, const char* fmt, ...) { if ((mask == HAL_DEBUG_UNMASKABLE) || (ah != NULL && ah->ah_config.ah_debug & mask) || (ath_hal_debug & mask)) { __va_list ap; va_start(ap, fmt); ath_hal_vprintf(ah, fmt, ap); va_end(ap); } } #undef HAL_DEBUG_UNMASKABLE #endif /* AH_DEBUG */ #ifdef AH_DEBUG_ALQ /* * ALQ register tracing support. * * Setting hw.ath.hal.alq=1 enables tracing of all register reads and * writes to the file /tmp/ath_hal.log. The file format is a simple * fixed-size array of records. When done logging set hw.ath.hal.alq=0 * and then decode the file with the arcode program (that is part of the * HAL). If you start+stop tracing the data will be appended to an * existing file. * * NB: doesn't handle multiple devices properly; only one DEVICE record * is emitted and the different devices are not identified. */ #include #include #include static struct alq *ath_hal_alq; static int ath_hal_alq_emitdev; /* need to emit DEVICE record */ static u_int ath_hal_alq_lost; /* count of lost records */ static char ath_hal_logfile[MAXPATHLEN] = "/tmp/ath_hal.log"; SYSCTL_STRING(_hw_ath_hal, OID_AUTO, alq_logfile, CTLFLAG_RW, &ath_hal_logfile, sizeof(kernelname), "Name of ALQ logfile"); static u_int ath_hal_alq_qsize = 64*1024; static int ath_hal_setlogging(int enable) { int error; if (enable) { error = alq_open(&ath_hal_alq, ath_hal_logfile, curthread->td_ucred, ALQ_DEFAULT_CMODE, sizeof (struct athregrec), ath_hal_alq_qsize); ath_hal_alq_lost = 0; ath_hal_alq_emitdev = 1; printf("ath_hal: logging to %s enabled\n", ath_hal_logfile); } else { if (ath_hal_alq) alq_close(ath_hal_alq); ath_hal_alq = NULL; printf("ath_hal: logging disabled\n"); error = 0; } return (error); } static int sysctl_hw_ath_hal_log(SYSCTL_HANDLER_ARGS) { int error, enable; enable = (ath_hal_alq != NULL); error = sysctl_handle_int(oidp, &enable, 0, req); if (error || !req->newptr) return (error); else return (ath_hal_setlogging(enable)); } SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hw_ath_hal_log, "I", "Enable HAL register logging"); SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_size, CTLFLAG_RW, &ath_hal_alq_qsize, 0, "In-memory log size (#records)"); SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_lost, CTLFLAG_RW, &ath_hal_alq_lost, 0, "Register operations not logged"); static struct ale * ath_hal_alq_get(struct ath_hal *ah) { struct ale *ale; if (ath_hal_alq_emitdev) { ale = alq_get(ath_hal_alq, ALQ_NOWAIT); if (ale) { struct athregrec *r = (struct athregrec *) ale->ae_data; r->op = OP_DEVICE; r->reg = 0; r->val = ah->ah_devid; alq_post(ath_hal_alq, ale); ath_hal_alq_emitdev = 0; } else ath_hal_alq_lost++; } ale = alq_get(ath_hal_alq, ALQ_NOWAIT); if (!ale) ath_hal_alq_lost++; return ale; } void ath_hal_reg_write(struct ath_hal *ah, u_int32_t reg, u_int32_t val) { bus_space_tag_t tag = BUSTAG(ah); bus_space_handle_t h = ah->ah_sh; +#ifdef AH_DEBUG /* Debug - complain if we haven't fully waken things up */ if (! ath_hal_reg_whilst_asleep(ah, reg) && ah->ah_powerMode != HAL_PM_AWAKE) { ath_hal_printf(ah, "%s: reg=0x%08x, val=0x%08x, pm=%d\n", __func__, reg, val, ah->ah_powerMode); } +#endif if (ath_hal_alq) { struct ale *ale = ath_hal_alq_get(ah); if (ale) { struct athregrec *r = (struct athregrec *) ale->ae_data; r->threadid = curthread->td_tid; r->op = OP_WRITE; r->reg = reg; r->val = val; alq_post(ath_hal_alq, ale); } } if (ah->ah_config.ah_serialise_reg_war) mtx_lock_spin(&ah_regser_mtx); bus_space_write_4(tag, h, reg, val); + OS_BUS_BARRIER_REG(ah, reg, OS_BUS_BARRIER_WRITE); if (ah->ah_config.ah_serialise_reg_war) mtx_unlock_spin(&ah_regser_mtx); } u_int32_t ath_hal_reg_read(struct ath_hal *ah, u_int32_t reg) { bus_space_tag_t tag = BUSTAG(ah); bus_space_handle_t h = ah->ah_sh; u_int32_t val; +#ifdef AH_DEBUG /* Debug - complain if we haven't fully waken things up */ if (! ath_hal_reg_whilst_asleep(ah, reg) && ah->ah_powerMode != HAL_PM_AWAKE) { ath_hal_printf(ah, "%s: reg=0x%08x, pm=%d\n", __func__, reg, ah->ah_powerMode); } +#endif if (ah->ah_config.ah_serialise_reg_war) mtx_lock_spin(&ah_regser_mtx); + OS_BUS_BARRIER_REG(ah, reg, OS_BUS_BARRIER_READ); val = bus_space_read_4(tag, h, reg); if (ah->ah_config.ah_serialise_reg_war) mtx_unlock_spin(&ah_regser_mtx); if (ath_hal_alq) { struct ale *ale = ath_hal_alq_get(ah); if (ale) { struct athregrec *r = (struct athregrec *) ale->ae_data; r->threadid = curthread->td_tid; r->op = OP_READ; r->reg = reg; r->val = val; alq_post(ath_hal_alq, ale); } } return val; } void OS_MARK(struct ath_hal *ah, u_int id, u_int32_t v) { if (ath_hal_alq) { struct ale *ale = ath_hal_alq_get(ah); if (ale) { struct athregrec *r = (struct athregrec *) ale->ae_data; r->threadid = curthread->td_tid; r->op = OP_MARK; r->reg = id; r->val = v; alq_post(ath_hal_alq, ale); } } } -#elif defined(AH_DEBUG) || defined(AH_REGOPS_FUNC) +#else /* AH_DEBUG_ALQ */ + /* * Memory-mapped device register read/write. These are here * as routines when debugging support is enabled and/or when * explicitly configured to use function calls. The latter is * for architectures that might need to do something before * referencing memory (e.g. remap an i/o window). * * NB: see the comments in ah_osdep.h about byte-swapping register * reads and writes to understand what's going on below. */ void ath_hal_reg_write(struct ath_hal *ah, u_int32_t reg, u_int32_t val) { bus_space_tag_t tag = BUSTAG(ah); bus_space_handle_t h = ah->ah_sh; +#ifdef AH_DEBUG /* Debug - complain if we haven't fully waken things up */ if (! ath_hal_reg_whilst_asleep(ah, reg) && ah->ah_powerMode != HAL_PM_AWAKE) { ath_hal_printf(ah, "%s: reg=0x%08x, val=0x%08x, pm=%d\n", __func__, reg, val, ah->ah_powerMode); } +#endif if (ah->ah_config.ah_serialise_reg_war) mtx_lock_spin(&ah_regser_mtx); bus_space_write_4(tag, h, reg, val); + OS_BUS_BARRIER_REG(ah, reg, OS_BUS_BARRIER_WRITE); if (ah->ah_config.ah_serialise_reg_war) mtx_unlock_spin(&ah_regser_mtx); } u_int32_t ath_hal_reg_read(struct ath_hal *ah, u_int32_t reg) { bus_space_tag_t tag = BUSTAG(ah); bus_space_handle_t h = ah->ah_sh; u_int32_t val; +#ifdef AH_DEBUG /* Debug - complain if we haven't fully waken things up */ if (! ath_hal_reg_whilst_asleep(ah, reg) && ah->ah_powerMode != HAL_PM_AWAKE) { ath_hal_printf(ah, "%s: reg=0x%08x, pm=%d\n", __func__, reg, ah->ah_powerMode); } +#endif if (ah->ah_config.ah_serialise_reg_war) mtx_lock_spin(&ah_regser_mtx); + OS_BUS_BARRIER_REG(ah, reg, OS_BUS_BARRIER_READ); val = bus_space_read_4(tag, h, reg); if (ah->ah_config.ah_serialise_reg_war) mtx_unlock_spin(&ah_regser_mtx); return val; } -#endif /* AH_DEBUG || AH_REGOPS_FUNC */ +#endif /* AH_DEBUG_ALQ */ #ifdef AH_ASSERT void ath_hal_assert_failed(const char* filename, int lineno, const char *msg) { printf("Atheros HAL assertion failure: %s: line %u: %s\n", filename, lineno, msg); panic("ath_hal_assert"); } #endif /* AH_ASSERT */ diff --git a/sys/dev/ath/ah_osdep.h b/sys/dev/ath/ah_osdep.h index 50778f2a16a7..94601927ca23 100644 --- a/sys/dev/ath/ah_osdep.h +++ b/sys/dev/ath/ah_osdep.h @@ -1,139 +1,149 @@ /*- * Copyright (c) 2002-2008 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ #ifndef _ATH_AH_OSDEP_H_ #define _ATH_AH_OSDEP_H_ /* * Atheros Hardware Access Layer (HAL) OS Dependent Definitions. */ #include #include #include #include #include #include /* * Bus i/o type definitions. */ typedef void *HAL_SOFTC; typedef bus_space_tag_t HAL_BUS_TAG; typedef bus_space_handle_t HAL_BUS_HANDLE; /* * Although the underlying hardware may support 64 bit DMA, the * current Atheros hardware only supports 32 bit addressing. */ typedef uint32_t HAL_DMA_ADDR; /* * Linker set writearounds for chip and RF backend registration. */ #define OS_DATA_SET(set, item) DATA_SET(set, item) #define OS_SET_DECLARE(set, ptype) SET_DECLARE(set, ptype) #define OS_SET_FOREACH(pvar, set) SET_FOREACH(pvar, set) /* * Delay n microseconds. */ #define OS_DELAY(_n) DELAY(_n) #define OS_INLINE __inline #define OS_MEMZERO(_a, _n) bzero((_a), (_n)) #define OS_MEMCPY(_d, _s, _n) memcpy(_d,_s,_n) #define OS_MEMCMP(_a, _b, _l) memcmp((_a), (_b), (_l)) #define abs(_a) __builtin_abs(_a) struct ath_hal; /* * The hardware registers are native little-endian byte order. * Big-endian hosts are handled by enabling hardware byte-swap * of register reads and writes at reset. But the PCI clock * domain registers are not byte swapped! Thus, on big-endian * platforms we have to explicitly byte-swap those registers. * OS_REG_UNSWAPPED identifies the registers that need special handling. * * This is not currently used by the FreeBSD HAL osdep code; the HAL * currently does not configure hardware byteswapping for register space * accesses and instead does it through the FreeBSD bus space code. */ #if _BYTE_ORDER == _BIG_ENDIAN #define OS_REG_UNSWAPPED(_reg) \ (((_reg) >= 0x4000 && (_reg) < 0x5000) || \ ((_reg) >= 0x7000 && (_reg) < 0x8000)) #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ #define OS_REG_UNSWAPPED(_reg) (0) #endif /* _BYTE_ORDER */ /* * For USB/SDIO support (where access latencies are quite high); * some write accesses may be buffered and then flushed when * either a read is done, or an explicit flush is done. * * These are simply placeholders for now. */ #define OS_REG_WRITE_BUFFER_ENABLE(_ah) \ do { } while (0) #define OS_REG_WRITE_BUFFER_DISABLE(_ah) \ do { } while (0) #define OS_REG_WRITE_BUFFER_FLUSH(_ah) \ do { } while (0) /* - * Register read/write operations are either handled through - * platform-dependent routines (or when debugging is enabled - * with AH_DEBUG); or they are inline expanded using the macros - * defined below. + * Read and write barriers. Some platforms require more strongly ordered + * operations and unfortunately most of the HAL is written assuming everything + * is either an x86 or the bus layer will do the barriers for you. + * + * Read barriers should occur before each read, and write barriers + * occur after each write. + * + * Later on for SDIO/USB parts we will methodize this and make them no-ops; + * register accesses will go via USB commands. + */ +#define OS_BUS_BARRIER_READ BUS_SPACE_BARRIER_READ +#define OS_BUS_BARRIER_WRITE BUS_SPACE_BARRIER_WRITE +#define OS_BUS_BARRIER_RW \ + (BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE) +#define OS_BUS_BARRIER(_ah, _start, _len, _t) \ + bus_space_barrier((bus_space_tag_t)(_ah)->ah_st, \ + (bus_space_handle_t)(_ah)->ah_sh, (_start), (_len), (_t)) +#define OS_BUS_BARRIER_REG(_ah, _reg, _t) \ + OS_BUS_BARRIER((_ah), (_reg), 4, (_t)) + +/* + * Register read/write operations are handled through + * platform-dependent routines. */ -#if defined(AH_DEBUG) || defined(AH_REGOPS_FUNC) || defined(AH_DEBUG_ALQ) #define OS_REG_WRITE(_ah, _reg, _val) ath_hal_reg_write(_ah, _reg, _val) #define OS_REG_READ(_ah, _reg) ath_hal_reg_read(_ah, _reg) extern void ath_hal_reg_write(struct ath_hal *ah, u_int reg, u_int32_t val); extern u_int32_t ath_hal_reg_read(struct ath_hal *ah, u_int reg); -#else -#define OS_REG_WRITE(_ah, _reg, _val) \ - bus_space_write_4((bus_space_tag_t)(_ah)->ah_st, \ - (bus_space_handle_t)(_ah)->ah_sh, (_reg), (_val)) -#define OS_REG_READ(_ah, _reg) \ - bus_space_read_4((bus_space_tag_t)(_ah)->ah_st, \ - (bus_space_handle_t)(_ah)->ah_sh, (_reg)) -#endif #ifdef AH_DEBUG_ALQ extern void OS_MARK(struct ath_hal *, u_int id, u_int32_t value); #else #define OS_MARK(_ah, _id, _v) #endif #endif /* _ATH_AH_OSDEP_H_ */ diff --git a/sys/dev/iwm/if_iwm.c b/sys/dev/iwm/if_iwm.c index ce3b603d5b5e..73cccd996afd 100644 --- a/sys/dev/iwm/if_iwm.c +++ b/sys/dev/iwm/if_iwm.c @@ -1,5023 +1,5011 @@ /* $OpenBSD: if_iwm.c,v 1.39 2015/03/23 00:35:19 jsg Exp $ */ /* * Copyright (c) 2014 genua mbh * Copyright (c) 2014 Fixup Software Ltd. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- * Based on BSD-licensed source modules in the Linux iwlwifi driver, * which were used as the reference documentation for this implementation. * * Driver version we are currently based off of is * Linux 3.14.3 (tag id a2df521e42b1d9a23f620ac79dbfe8655a8391dd) * *********************************************************************** * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2007 - 2013 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, * USA * * The full GNU General Public License is included in this distribution * in the file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * * * BSD LICENSE * * Copyright(c) 2005 - 2013 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2007-2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const uint8_t iwm_nvm_channels[] = { /* 2.4 GHz */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 5 GHz */ 36, 40, 44 , 48, 52, 56, 60, 64, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 149, 153, 157, 161, 165 }; #define IWM_NUM_2GHZ_CHANNELS 14 /* * XXX For now, there's simply a fixed set of rate table entries * that are populated. */ const struct iwm_rate { uint8_t rate; uint8_t plcp; } iwm_rates[] = { { 2, IWM_RATE_1M_PLCP }, { 4, IWM_RATE_2M_PLCP }, { 11, IWM_RATE_5M_PLCP }, { 22, IWM_RATE_11M_PLCP }, { 12, IWM_RATE_6M_PLCP }, { 18, IWM_RATE_9M_PLCP }, { 24, IWM_RATE_12M_PLCP }, { 36, IWM_RATE_18M_PLCP }, { 48, IWM_RATE_24M_PLCP }, { 72, IWM_RATE_36M_PLCP }, { 96, IWM_RATE_48M_PLCP }, { 108, IWM_RATE_54M_PLCP }, }; #define IWM_RIDX_CCK 0 #define IWM_RIDX_OFDM 4 #define IWM_RIDX_MAX (nitems(iwm_rates)-1) #define IWM_RIDX_IS_CCK(_i_) ((_i_) < IWM_RIDX_OFDM) #define IWM_RIDX_IS_OFDM(_i_) ((_i_) >= IWM_RIDX_OFDM) static int iwm_store_cscheme(struct iwm_softc *, const uint8_t *, size_t); static int iwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type, const uint8_t *, size_t); static int iwm_set_default_calib(struct iwm_softc *, const void *); static void iwm_fw_info_free(struct iwm_fw_info *); static int iwm_read_firmware(struct iwm_softc *, enum iwm_ucode_type); static void iwm_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int iwm_dma_contig_alloc(bus_dma_tag_t, struct iwm_dma_info *, bus_size_t, bus_size_t); static void iwm_dma_contig_free(struct iwm_dma_info *); static int iwm_alloc_fwmem(struct iwm_softc *); static void iwm_free_fwmem(struct iwm_softc *); static int iwm_alloc_sched(struct iwm_softc *); static void iwm_free_sched(struct iwm_softc *); static int iwm_alloc_kw(struct iwm_softc *); static void iwm_free_kw(struct iwm_softc *); static int iwm_alloc_ict(struct iwm_softc *); static void iwm_free_ict(struct iwm_softc *); static int iwm_alloc_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static void iwm_reset_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static void iwm_free_rx_ring(struct iwm_softc *, struct iwm_rx_ring *); static int iwm_alloc_tx_ring(struct iwm_softc *, struct iwm_tx_ring *, int); static void iwm_reset_tx_ring(struct iwm_softc *, struct iwm_tx_ring *); static void iwm_free_tx_ring(struct iwm_softc *, struct iwm_tx_ring *); static void iwm_enable_interrupts(struct iwm_softc *); static void iwm_restore_interrupts(struct iwm_softc *); static void iwm_disable_interrupts(struct iwm_softc *); static void iwm_ict_reset(struct iwm_softc *); static int iwm_allow_mcast(struct ieee80211vap *, struct iwm_softc *); static void iwm_stop_device(struct iwm_softc *); static void iwm_mvm_nic_config(struct iwm_softc *); static int iwm_nic_rx_init(struct iwm_softc *); static int iwm_nic_tx_init(struct iwm_softc *); static int iwm_nic_init(struct iwm_softc *); static void iwm_enable_txq(struct iwm_softc *, int, int); static int iwm_post_alive(struct iwm_softc *); static int iwm_nvm_read_chunk(struct iwm_softc *, uint16_t, uint16_t, uint16_t, uint8_t *, uint16_t *); static int iwm_nvm_read_section(struct iwm_softc *, uint16_t, uint8_t *, uint16_t *); static void iwm_init_channel_map(struct iwm_softc *, const uint16_t * const); static int iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *, const uint16_t *, const uint16_t *, uint8_t, uint8_t); struct iwm_nvm_section; static int iwm_parse_nvm_sections(struct iwm_softc *, struct iwm_nvm_section *); static int iwm_nvm_init(struct iwm_softc *); static int iwm_firmware_load_chunk(struct iwm_softc *, uint32_t, const uint8_t *, uint32_t); static int iwm_load_firmware(struct iwm_softc *, enum iwm_ucode_type); static int iwm_start_fw(struct iwm_softc *, enum iwm_ucode_type); static int iwm_fw_alive(struct iwm_softc *, uint32_t); static int iwm_send_tx_ant_cfg(struct iwm_softc *, uint8_t); static int iwm_send_phy_cfg_cmd(struct iwm_softc *); static int iwm_mvm_load_ucode_wait_alive(struct iwm_softc *, enum iwm_ucode_type); static int iwm_run_init_mvm_ucode(struct iwm_softc *, int); static int iwm_rx_addbuf(struct iwm_softc *, int, int); static int iwm_mvm_calc_rssi(struct iwm_softc *, struct iwm_rx_phy_info *); static int iwm_mvm_get_signal_strength(struct iwm_softc *, struct iwm_rx_phy_info *); static void iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static int iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *); static void iwm_mvm_rx_rx_mpdu(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); -static void iwm_mvm_rx_tx_cmd_single(struct iwm_softc *, +static int iwm_mvm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_node *); static void iwm_mvm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *, struct iwm_rx_data *); static void iwm_cmd_done(struct iwm_softc *, struct iwm_rx_packet *); #if 0 static void iwm_update_sched(struct iwm_softc *, int, int, uint8_t, uint16_t); #endif static const struct iwm_rate * iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *, struct ieee80211_frame *, struct iwm_tx_cmd *); static int iwm_tx(struct iwm_softc *, struct mbuf *, struct ieee80211_node *, int); static int iwm_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void iwm_mvm_add_sta_cmd_v6_to_v5(struct iwm_mvm_add_sta_cmd_v6 *, struct iwm_mvm_add_sta_cmd_v5 *); static int iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *, struct iwm_mvm_add_sta_cmd_v6 *, int *); static int iwm_mvm_sta_send_to_fw(struct iwm_softc *, struct iwm_node *, int); static int iwm_mvm_add_sta(struct iwm_softc *, struct iwm_node *); static int iwm_mvm_update_sta(struct iwm_softc *, struct iwm_node *); static int iwm_mvm_add_int_sta_common(struct iwm_softc *, struct iwm_int_sta *, const uint8_t *, uint16_t, uint16_t); static int iwm_mvm_add_aux_sta(struct iwm_softc *); static int iwm_mvm_update_quotas(struct iwm_softc *, struct iwm_node *); static int iwm_auth(struct ieee80211vap *, struct iwm_softc *); static int iwm_assoc(struct ieee80211vap *, struct iwm_softc *); static int iwm_release(struct iwm_softc *, struct iwm_node *); static struct ieee80211_node * iwm_node_alloc(struct ieee80211vap *, const uint8_t[IEEE80211_ADDR_LEN]); static void iwm_setrates(struct iwm_softc *, struct iwm_node *); static int iwm_media_change(struct ifnet *); static int iwm_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void iwm_endscan_cb(void *, int); static int iwm_init_hw(struct iwm_softc *); static void iwm_init(struct iwm_softc *); static void iwm_start(struct iwm_softc *); static void iwm_stop(struct iwm_softc *); static void iwm_watchdog(void *); static void iwm_parent(struct ieee80211com *); #ifdef IWM_DEBUG static const char * iwm_desc_lookup(uint32_t); static void iwm_nic_error(struct iwm_softc *); #endif static void iwm_notif_intr(struct iwm_softc *); static void iwm_intr(void *); static int iwm_attach(device_t); static void iwm_preinit(void *); static int iwm_detach_local(struct iwm_softc *sc, int); static void iwm_init_task(void *); static void iwm_radiotap_attach(struct iwm_softc *); static struct ieee80211vap * iwm_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void iwm_vap_delete(struct ieee80211vap *); static void iwm_scan_start(struct ieee80211com *); static void iwm_scan_end(struct ieee80211com *); static void iwm_update_mcast(struct ieee80211com *); static void iwm_set_channel(struct ieee80211com *); static void iwm_scan_curchan(struct ieee80211_scan_state *, unsigned long); static void iwm_scan_mindwell(struct ieee80211_scan_state *); static int iwm_detach(device_t); /* * Firmware parser. */ static int iwm_store_cscheme(struct iwm_softc *sc, const uint8_t *data, size_t dlen) { const struct iwm_fw_cscheme_list *l = (const void *)data; if (dlen < sizeof(*l) || dlen < sizeof(l->size) + l->size * sizeof(*l->cs)) return EINVAL; /* we don't actually store anything for now, always use s/w crypto */ return 0; } static int iwm_firmware_store_section(struct iwm_softc *sc, enum iwm_ucode_type type, const uint8_t *data, size_t dlen) { struct iwm_fw_sects *fws; struct iwm_fw_onesect *fwone; if (type >= IWM_UCODE_TYPE_MAX) return EINVAL; if (dlen < sizeof(uint32_t)) return EINVAL; fws = &sc->sc_fw.fw_sects[type]; if (fws->fw_count >= IWM_UCODE_SECT_MAX) return EINVAL; fwone = &fws->fw_sect[fws->fw_count]; /* first 32bit are device load offset */ memcpy(&fwone->fws_devoff, data, sizeof(uint32_t)); /* rest is data */ fwone->fws_data = data + sizeof(uint32_t); fwone->fws_len = dlen - sizeof(uint32_t); fws->fw_count++; fws->fw_totlen += fwone->fws_len; return 0; } /* iwlwifi: iwl-drv.c */ struct iwm_tlv_calib_data { uint32_t ucode_type; struct iwm_tlv_calib_ctrl calib; } __packed; static int iwm_set_default_calib(struct iwm_softc *sc, const void *data) { const struct iwm_tlv_calib_data *def_calib = data; uint32_t ucode_type = le32toh(def_calib->ucode_type); if (ucode_type >= IWM_UCODE_TYPE_MAX) { device_printf(sc->sc_dev, "Wrong ucode_type %u for default " "calibration.\n", ucode_type); return EINVAL; } sc->sc_default_calib[ucode_type].flow_trigger = def_calib->calib.flow_trigger; sc->sc_default_calib[ucode_type].event_trigger = def_calib->calib.event_trigger; return 0; } static void iwm_fw_info_free(struct iwm_fw_info *fw) { firmware_put(fw->fw_rawdata, FIRMWARE_UNLOAD); fw->fw_rawdata = NULL; fw->fw_rawsize = 0; /* don't touch fw->fw_status */ memset(fw->fw_sects, 0, sizeof(fw->fw_sects)); } static int iwm_read_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_info *fw = &sc->sc_fw; const struct iwm_tlv_ucode_header *uhdr; struct iwm_ucode_tlv tlv; enum iwm_ucode_tlv_type tlv_type; const struct firmware *fwp; const uint8_t *data; int error = 0; size_t len; if (fw->fw_status == IWM_FW_STATUS_DONE && ucode_type != IWM_UCODE_TYPE_INIT) return 0; while (fw->fw_status == IWM_FW_STATUS_INPROGRESS) msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfwp", 0); fw->fw_status = IWM_FW_STATUS_INPROGRESS; if (fw->fw_rawdata != NULL) iwm_fw_info_free(fw); /* * Load firmware into driver memory. * fw_rawdata and fw_rawsize will be set. */ IWM_UNLOCK(sc); fwp = firmware_get(sc->sc_fwname); if (fwp == NULL) { device_printf(sc->sc_dev, "could not read firmware %s (error %d)\n", sc->sc_fwname, error); IWM_LOCK(sc); goto out; } IWM_LOCK(sc); fw->fw_rawdata = fwp->data; fw->fw_rawsize = fwp->datasize; /* * Parse firmware contents */ uhdr = (const void *)fw->fw_rawdata; if (*(const uint32_t *)fw->fw_rawdata != 0 || le32toh(uhdr->magic) != IWM_TLV_UCODE_MAGIC) { device_printf(sc->sc_dev, "invalid firmware %s\n", sc->sc_fwname); error = EINVAL; goto out; } sc->sc_fwver = le32toh(uhdr->ver); data = uhdr->data; len = fw->fw_rawsize - sizeof(*uhdr); while (len >= sizeof(tlv)) { size_t tlv_len; const void *tlv_data; memcpy(&tlv, data, sizeof(tlv)); tlv_len = le32toh(tlv.length); tlv_type = le32toh(tlv.type); len -= sizeof(tlv); data += sizeof(tlv); tlv_data = data; if (len < tlv_len) { device_printf(sc->sc_dev, "firmware too short: %zu bytes\n", len); error = EINVAL; goto parse_out; } switch ((int)tlv_type) { case IWM_UCODE_TLV_PROBE_MAX_LEN: if (tlv_len < sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: PROBE_MAX_LEN (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } sc->sc_capa_max_probe_len = le32toh(*(const uint32_t *)tlv_data); /* limit it to something sensible */ if (sc->sc_capa_max_probe_len > (1<<16)) { IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "%s: IWM_UCODE_TLV_PROBE_MAX_LEN " "ridiculous\n", __func__); error = EINVAL; goto parse_out; } break; case IWM_UCODE_TLV_PAN: if (tlv_len) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_PAN: tlv_len (%d) > 0\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } sc->sc_capaflags |= IWM_UCODE_TLV_FLAGS_PAN; break; case IWM_UCODE_TLV_FLAGS: if (tlv_len < sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_FLAGS: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } /* * Apparently there can be many flags, but Linux driver * parses only the first one, and so do we. * * XXX: why does this override IWM_UCODE_TLV_PAN? * Intentional or a bug? Observations from * current firmware file: * 1) TLV_PAN is parsed first * 2) TLV_FLAGS contains TLV_FLAGS_PAN * ==> this resets TLV_PAN to itself... hnnnk */ sc->sc_capaflags = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_CSCHEME: if ((error = iwm_store_cscheme(sc, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: iwm_store_cscheme(): returned %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_NUM_OF_CPU: if (tlv_len != sizeof(uint32_t)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_NUM_OF_CPU: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); error = EINVAL; goto parse_out; } if (le32toh(*(const uint32_t*)tlv_data) != 1) { device_printf(sc->sc_dev, "%s: driver supports " "only TLV_NUM_OF_CPU == 1", __func__); error = EINVAL; goto parse_out; } break; case IWM_UCODE_TLV_SEC_RT: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_REGULAR, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_REGULAR: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_SEC_INIT: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_INIT, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_INIT: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_SEC_WOWLAN: if ((error = iwm_firmware_store_section(sc, IWM_UCODE_TYPE_WOW, tlv_data, tlv_len)) != 0) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TYPE_WOW: iwm_firmware_store_section() failed; %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_DEF_CALIB: if (tlv_len != sizeof(struct iwm_tlv_calib_data)) { device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_DEV_CALIB: tlv_len (%d) < sizeof(iwm_tlv_calib_data) (%d)\n", __func__, (int) tlv_len, (int) sizeof(struct iwm_tlv_calib_data)); error = EINVAL; goto parse_out; } if ((error = iwm_set_default_calib(sc, tlv_data)) != 0) { device_printf(sc->sc_dev, "%s: iwm_set_default_calib() failed: %d\n", __func__, error); goto parse_out; } break; case IWM_UCODE_TLV_PHY_SKU: if (tlv_len != sizeof(uint32_t)) { error = EINVAL; device_printf(sc->sc_dev, "%s: IWM_UCODE_TLV_PHY_SKU: tlv_len (%d) < sizeof(uint32_t)\n", __func__, (int) tlv_len); goto parse_out; } sc->sc_fw_phy_config = le32toh(*(const uint32_t *)tlv_data); break; case IWM_UCODE_TLV_API_CHANGES_SET: case IWM_UCODE_TLV_ENABLED_CAPABILITIES: /* ignore, not used by current driver */ break; default: device_printf(sc->sc_dev, "%s: unknown firmware section %d, abort\n", __func__, tlv_type); error = EINVAL; goto parse_out; } len -= roundup(tlv_len, 4); data += roundup(tlv_len, 4); } KASSERT(error == 0, ("unhandled error")); parse_out: if (error) { device_printf(sc->sc_dev, "firmware parse error %d, " "section type %d\n", error, tlv_type); } if (!(sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_PM_CMD_SUPPORT)) { device_printf(sc->sc_dev, "device uses unsupported power ops\n"); error = ENOTSUP; } out: if (error) { fw->fw_status = IWM_FW_STATUS_NONE; if (fw->fw_rawdata != NULL) iwm_fw_info_free(fw); } else fw->fw_status = IWM_FW_STATUS_DONE; wakeup(&sc->sc_fw); return error; } /* * DMA resource routines */ static void iwm_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs)); *(bus_addr_t *)arg = segs[0].ds_addr; } static int iwm_dma_contig_alloc(bus_dma_tag_t tag, struct iwm_dma_info *dma, bus_size_t size, bus_size_t alignment) { int error; dma->tag = NULL; dma->size = size; error = bus_dma_tag_create(tag, alignment, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &dma->tag); if (error != 0) goto fail; error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->map); if (error != 0) goto fail; error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, iwm_dma_map_addr, &dma->paddr, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); return 0; fail: iwm_dma_contig_free(dma); return error; } static void iwm_dma_contig_free(struct iwm_dma_info *dma) { if (dma->map != NULL) { if (dma->vaddr != NULL) { bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->tag, dma->map); bus_dmamem_free(dma->tag, dma->vaddr, dma->map); dma->vaddr = NULL; } bus_dmamap_destroy(dma->tag, dma->map); dma->map = NULL; } if (dma->tag != NULL) { bus_dma_tag_destroy(dma->tag); dma->tag = NULL; } } /* fwmem is used to load firmware onto the card */ static int iwm_alloc_fwmem(struct iwm_softc *sc) { /* Must be aligned on a 16-byte boundary. */ return iwm_dma_contig_alloc(sc->sc_dmat, &sc->fw_dma, sc->sc_fwdmasegsz, 16); } static void iwm_free_fwmem(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->fw_dma); } /* tx scheduler rings. not used? */ static int iwm_alloc_sched(struct iwm_softc *sc) { int rv; /* TX scheduler rings must be aligned on a 1KB boundary. */ rv = iwm_dma_contig_alloc(sc->sc_dmat, &sc->sched_dma, nitems(sc->txq) * sizeof(struct iwm_agn_scd_bc_tbl), 1024); return rv; } static void iwm_free_sched(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->sched_dma); } /* keep-warm page is used internally by the card. see iwl-fh.h for more info */ static int iwm_alloc_kw(struct iwm_softc *sc) { return iwm_dma_contig_alloc(sc->sc_dmat, &sc->kw_dma, 4096, 4096); } static void iwm_free_kw(struct iwm_softc *sc) { iwm_dma_contig_free(&sc->kw_dma); } /* interrupt cause table */ static int iwm_alloc_ict(struct iwm_softc *sc) { return iwm_dma_contig_alloc(sc->sc_dmat, &sc->ict_dma, IWM_ICT_SIZE, 1<ict_dma); } static int iwm_alloc_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { bus_size_t size; int i, error; ring->cur = 0; /* Allocate RX descriptors (256-byte aligned). */ size = IWM_RX_RING_COUNT * sizeof(uint32_t); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256); if (error != 0) { device_printf(sc->sc_dev, "could not allocate RX ring DMA memory\n"); goto fail; } ring->desc = ring->desc_dma.vaddr; /* Allocate RX status area (16-byte aligned). */ error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->stat_dma, sizeof(*ring->stat), 16); if (error != 0) { device_printf(sc->sc_dev, "could not allocate RX status DMA memory\n"); goto fail; } ring->stat = ring->stat_dma.vaddr; /* Create RX buffer DMA tag. */ error = bus_dma_tag_create(sc->sc_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, IWM_RBUF_SIZE, 1, IWM_RBUF_SIZE, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA tag, error %d\n", __func__, error); goto fail; } /* * Allocate and map RX buffers. */ for (i = 0; i < IWM_RX_RING_COUNT; i++) { if ((error = iwm_rx_addbuf(sc, IWM_RBUF_SIZE, i)) != 0) { goto fail; } } return 0; fail: iwm_free_rx_ring(sc, ring); return error; } static void iwm_reset_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { /* XXX print out if we can't lock the NIC? */ if (iwm_nic_lock(sc)) { /* XXX handle if RX stop doesn't finish? */ (void) iwm_pcie_rx_stop(sc); iwm_nic_unlock(sc); } /* Reset the ring state */ ring->cur = 0; memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat)); } static void iwm_free_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring) { int i; iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->stat_dma); for (i = 0; i < IWM_RX_RING_COUNT; i++) { struct iwm_rx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) { bus_dmamap_destroy(ring->data_dmat, data->map); data->map = NULL; } } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } static int iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring, int qid) { bus_addr_t paddr; bus_size_t size; int i, error; ring->qid = qid; ring->queued = 0; ring->cur = 0; /* Allocate TX descriptors (256-byte aligned). */ size = IWM_TX_RING_COUNT * sizeof (struct iwm_tfd); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256); if (error != 0) { device_printf(sc->sc_dev, "could not allocate TX ring DMA memory\n"); goto fail; } ring->desc = ring->desc_dma.vaddr; /* * We only use rings 0 through 9 (4 EDCA + cmd) so there is no need * to allocate commands space for other rings. */ if (qid > IWM_MVM_CMD_QUEUE) return 0; size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd); error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->cmd_dma, size, 4); if (error != 0) { device_printf(sc->sc_dev, "could not allocate TX cmd DMA memory\n"); goto fail; } ring->cmd = ring->cmd_dma.vaddr; error = bus_dma_tag_create(sc->sc_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, - IWM_MAX_SCATTER - 1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); + IWM_MAX_SCATTER - 2, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create TX buf DMA tag\n"); goto fail; } paddr = ring->cmd_dma.paddr; for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; data->cmd_paddr = paddr; data->scratch_paddr = paddr + sizeof(struct iwm_cmd_header) + offsetof(struct iwm_tx_cmd, scratch); paddr += sizeof(struct iwm_device_cmd); error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create TX buf DMA map\n"); goto fail; } } KASSERT(paddr == ring->cmd_dma.paddr + size, ("invalid physical address")); return 0; fail: iwm_free_tx_ring(sc, ring); return error; } static void iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring) { int i; for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } } /* Clear TX descriptors. */ memset(ring->desc, 0, ring->desc_dma.size); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); sc->qfullmsk &= ~(1 << ring->qid); ring->queued = 0; ring->cur = 0; } static void iwm_free_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring) { int i; iwm_dma_contig_free(&ring->desc_dma); iwm_dma_contig_free(&ring->cmd_dma); for (i = 0; i < IWM_TX_RING_COUNT; i++) { struct iwm_tx_data *data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->map != NULL) { bus_dmamap_destroy(ring->data_dmat, data->map); data->map = NULL; } } if (ring->data_dmat != NULL) { bus_dma_tag_destroy(ring->data_dmat); ring->data_dmat = NULL; } } /* * High-level hardware frobbing routines */ static void iwm_enable_interrupts(struct iwm_softc *sc) { sc->sc_intmask = IWM_CSR_INI_SET_MASK; IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask); } static void iwm_restore_interrupts(struct iwm_softc *sc) { IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask); } static void iwm_disable_interrupts(struct iwm_softc *sc) { /* disable interrupts */ IWM_WRITE(sc, IWM_CSR_INT_MASK, 0); /* acknowledge all interrupts */ IWM_WRITE(sc, IWM_CSR_INT, ~0); IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, ~0); } static void iwm_ict_reset(struct iwm_softc *sc) { iwm_disable_interrupts(sc); /* Reset ICT table. */ memset(sc->ict_dma.vaddr, 0, IWM_ICT_SIZE); sc->ict_cur = 0; /* Set physical address of ICT table (4KB aligned). */ IWM_WRITE(sc, IWM_CSR_DRAM_INT_TBL_REG, IWM_CSR_DRAM_INT_TBL_ENABLE | IWM_CSR_DRAM_INIT_TBL_WRAP_CHECK | sc->ict_dma.paddr >> IWM_ICT_PADDR_SHIFT); /* Switch to ICT interrupt mode in driver. */ sc->sc_flags |= IWM_FLAG_USE_ICT; /* Re-enable interrupts. */ IWM_WRITE(sc, IWM_CSR_INT, ~0); iwm_enable_interrupts(sc); } /* iwlwifi pcie/trans.c */ /* * Since this .. hard-resets things, it's time to actually * mark the first vap (if any) as having no mac context. * It's annoying, but since the driver is potentially being * stop/start'ed whilst active (thanks openbsd port!) we * have to correctly track this. */ static void iwm_stop_device(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); int chnl, ntries; int qid; /* tell the device to stop sending interrupts */ iwm_disable_interrupts(sc); /* * FreeBSD-local: mark the first vap as not-uploaded, * so the next transition through auth/assoc * will correctly populate the MAC context. */ if (vap) { struct iwm_vap *iv = IWM_VAP(vap); iv->is_uploaded = 0; } /* device going down, Stop using ICT table */ sc->sc_flags &= ~IWM_FLAG_USE_ICT; /* stop tx and rx. tx and rx bits, as usual, are from if_iwn */ iwm_write_prph(sc, IWM_SCD_TXFACT, 0); /* Stop all DMA channels. */ if (iwm_nic_lock(sc)) { for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) { IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), 0); for (ntries = 0; ntries < 200; ntries++) { uint32_t r; r = IWM_READ(sc, IWM_FH_TSSR_TX_STATUS_REG); if (r & IWM_FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE( chnl)) break; DELAY(20); } } iwm_nic_unlock(sc); } /* Stop RX ring. */ iwm_reset_rx_ring(sc, &sc->rxq); /* Reset all TX rings. */ for (qid = 0; qid < nitems(sc->txq); qid++) iwm_reset_tx_ring(sc, &sc->txq[qid]); /* * Power-down device's busmaster DMA clocks */ iwm_write_prph(sc, IWM_APMG_CLK_DIS_REG, IWM_APMG_CLK_VAL_DMA_CLK_RQT); DELAY(5); /* Make sure (redundant) we've released our request to stay awake */ IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); /* Stop the device, and put it in low power state */ iwm_apm_stop(sc); /* Upon stop, the APM issues an interrupt if HW RF kill is set. * Clean again the interrupt here */ iwm_disable_interrupts(sc); /* stop and reset the on-board processor */ IWM_WRITE(sc, IWM_CSR_RESET, IWM_CSR_RESET_REG_FLAG_NEVO_RESET); /* * Even if we stop the HW, we still want the RF kill * interrupt */ iwm_enable_rfkill_int(sc); iwm_check_rfkill(sc); } /* iwlwifi: mvm/ops.c */ static void iwm_mvm_nic_config(struct iwm_softc *sc) { uint8_t radio_cfg_type, radio_cfg_step, radio_cfg_dash; uint32_t reg_val = 0; radio_cfg_type = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_TYPE) >> IWM_FW_PHY_CFG_RADIO_TYPE_POS; radio_cfg_step = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_STEP) >> IWM_FW_PHY_CFG_RADIO_STEP_POS; radio_cfg_dash = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_DASH) >> IWM_FW_PHY_CFG_RADIO_DASH_POS; /* SKU control */ reg_val |= IWM_CSR_HW_REV_STEP(sc->sc_hw_rev) << IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_STEP; reg_val |= IWM_CSR_HW_REV_DASH(sc->sc_hw_rev) << IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_DASH; /* radio configuration */ reg_val |= radio_cfg_type << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_TYPE; reg_val |= radio_cfg_step << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_STEP; reg_val |= radio_cfg_dash << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_DASH; IWM_WRITE(sc, IWM_CSR_HW_IF_CONFIG_REG, reg_val); IWM_DPRINTF(sc, IWM_DEBUG_RESET, "Radio type=0x%x-0x%x-0x%x\n", radio_cfg_type, radio_cfg_step, radio_cfg_dash); /* * W/A : NIC is stuck in a reset state after Early PCIe power off * (PCIe power is lost before PERST# is asserted), causing ME FW * to lose ownership and not being able to obtain it back. */ iwm_set_bits_mask_prph(sc, IWM_APMG_PS_CTRL_REG, IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS, ~IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS); } static int iwm_nic_rx_init(struct iwm_softc *sc) { if (!iwm_nic_lock(sc)) return EBUSY; /* * Initialize RX ring. This is from the iwn driver. */ memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat)); /* stop DMA */ IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG, 0); IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_RBDCB_WPTR, 0); IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_FLUSH_RB_REQ, 0); IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RDPTR, 0); IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG, 0); /* Set physical address of RX ring (256-byte aligned). */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_BASE_REG, sc->rxq.desc_dma.paddr >> 8); /* Set physical address of RX status (16-byte aligned). */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_STTS_WPTR_REG, sc->rxq.stat_dma.paddr >> 4); /* Enable RX. */ /* * Note: Linux driver also sets this: * (IWM_RX_RB_TIMEOUT << IWM_FH_RCSR_RX_CONFIG_REG_IRQ_RBTH_POS) | * * It causes weird behavior. YMMV. */ IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG, IWM_FH_RCSR_RX_CONFIG_CHNL_EN_ENABLE_VAL | IWM_FH_RCSR_CHNL0_RX_IGNORE_RXF_EMPTY | /* HW bug */ IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_INT_HOST_VAL | IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_4K | IWM_RX_QUEUE_SIZE_LOG << IWM_FH_RCSR_RX_CONFIG_RBDCB_SIZE_POS); IWM_WRITE_1(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_TIMEOUT_DEF); /* W/A for interrupt coalescing bug in 7260 and 3160 */ if (sc->host_interrupt_operation_mode) IWM_SETBITS(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_OPER_MODE); /* * Thus sayeth el jefe (iwlwifi) via a comment: * * This value should initially be 0 (before preparing any * RBs), should be 8 after preparing the first 8 RBs (for example) */ IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, 8); iwm_nic_unlock(sc); return 0; } static int iwm_nic_tx_init(struct iwm_softc *sc) { int qid; if (!iwm_nic_lock(sc)) return EBUSY; /* Deactivate TX scheduler. */ iwm_write_prph(sc, IWM_SCD_TXFACT, 0); /* Set physical address of "keep warm" page (16-byte aligned). */ IWM_WRITE(sc, IWM_FH_KW_MEM_ADDR_REG, sc->kw_dma.paddr >> 4); /* Initialize TX rings. */ for (qid = 0; qid < nitems(sc->txq); qid++) { struct iwm_tx_ring *txq = &sc->txq[qid]; /* Set physical address of TX ring (256-byte aligned). */ IWM_WRITE(sc, IWM_FH_MEM_CBBC_QUEUE(qid), txq->desc_dma.paddr >> 8); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: loading ring %d descriptors (%p) at %lx\n", __func__, qid, txq->desc, (unsigned long) (txq->desc_dma.paddr >> 8)); } iwm_nic_unlock(sc); return 0; } static int iwm_nic_init(struct iwm_softc *sc) { int error; iwm_apm_init(sc); iwm_set_pwr(sc); iwm_mvm_nic_config(sc); if ((error = iwm_nic_rx_init(sc)) != 0) return error; /* * Ditto for TX, from iwn */ if ((error = iwm_nic_tx_init(sc)) != 0) return error; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: shadow registers enabled\n", __func__); IWM_SETBITS(sc, IWM_CSR_MAC_SHADOW_REG_CTRL, 0x800fffff); return 0; } enum iwm_mvm_tx_fifo { IWM_MVM_TX_FIFO_BK = 0, IWM_MVM_TX_FIFO_BE, IWM_MVM_TX_FIFO_VI, IWM_MVM_TX_FIFO_VO, IWM_MVM_TX_FIFO_MCAST = 5, }; const uint8_t iwm_mvm_ac_to_tx_fifo[] = { IWM_MVM_TX_FIFO_VO, IWM_MVM_TX_FIFO_VI, IWM_MVM_TX_FIFO_BE, IWM_MVM_TX_FIFO_BK, }; static void iwm_enable_txq(struct iwm_softc *sc, int qid, int fifo) { if (!iwm_nic_lock(sc)) { device_printf(sc->sc_dev, "%s: cannot enable txq %d\n", __func__, qid); return; /* XXX return EBUSY */ } /* unactivate before configuration */ iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid), (0 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) | (1 << IWM_SCD_QUEUE_STTS_REG_POS_SCD_ACT_EN)); if (qid != IWM_MVM_CMD_QUEUE) { iwm_set_bits_prph(sc, IWM_SCD_QUEUECHAIN_SEL, (1 << qid)); } iwm_clear_bits_prph(sc, IWM_SCD_AGGR_SEL, (1 << qid)); IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0); iwm_write_prph(sc, IWM_SCD_QUEUE_RDPTR(qid), 0); iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid), 0); /* Set scheduler window size and frame limit. */ iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid) + sizeof(uint32_t), ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_POS) & IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_MSK) | ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_POS) & IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_MSK)); iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid), (1 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) | (fifo << IWM_SCD_QUEUE_STTS_REG_POS_TXF) | (1 << IWM_SCD_QUEUE_STTS_REG_POS_WSL) | IWM_SCD_QUEUE_STTS_REG_MSK); iwm_nic_unlock(sc); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: enabled txq %d FIFO %d\n", __func__, qid, fifo); } static int iwm_post_alive(struct iwm_softc *sc) { int nwords; int error, chnl; if (!iwm_nic_lock(sc)) return EBUSY; if (sc->sched_base != iwm_read_prph(sc, IWM_SCD_SRAM_BASE_ADDR)) { device_printf(sc->sc_dev, "%s: sched addr mismatch", __func__); error = EINVAL; goto out; } iwm_ict_reset(sc); /* Clear TX scheduler state in SRAM. */ nwords = (IWM_SCD_TRANS_TBL_MEM_UPPER_BOUND - IWM_SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(uint32_t); error = iwm_write_mem(sc, sc->sched_base + IWM_SCD_CONTEXT_MEM_LOWER_BOUND, NULL, nwords); if (error) goto out; /* Set physical address of TX scheduler rings (1KB aligned). */ iwm_write_prph(sc, IWM_SCD_DRAM_BASE_ADDR, sc->sched_dma.paddr >> 10); iwm_write_prph(sc, IWM_SCD_CHAINEXT_EN, 0); /* enable command channel */ iwm_enable_txq(sc, IWM_MVM_CMD_QUEUE, 7); iwm_write_prph(sc, IWM_SCD_TXFACT, 0xff); /* Enable DMA channels. */ for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) { IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE); } IWM_SETBITS(sc, IWM_FH_TX_CHICKEN_BITS_REG, IWM_FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN); /* Enable L1-Active */ iwm_clear_bits_prph(sc, IWM_APMG_PCIDEV_STT_REG, IWM_APMG_PCIDEV_STT_VAL_L1_ACT_DIS); out: iwm_nic_unlock(sc); return error; } /* * NVM read access and content parsing. We do not support * external NVM or writing NVM. * iwlwifi/mvm/nvm.c */ /* list of NVM sections we are allowed/need to read */ const int nvm_to_read[] = { IWM_NVM_SECTION_TYPE_HW, IWM_NVM_SECTION_TYPE_SW, IWM_NVM_SECTION_TYPE_CALIBRATION, IWM_NVM_SECTION_TYPE_PRODUCTION, }; /* Default NVM size to read */ #define IWM_NVM_DEFAULT_CHUNK_SIZE (2*1024) #define IWM_MAX_NVM_SECTION_SIZE 7000 #define IWM_NVM_WRITE_OPCODE 1 #define IWM_NVM_READ_OPCODE 0 static int iwm_nvm_read_chunk(struct iwm_softc *sc, uint16_t section, uint16_t offset, uint16_t length, uint8_t *data, uint16_t *len) { offset = 0; struct iwm_nvm_access_cmd nvm_access_cmd = { .offset = htole16(offset), .length = htole16(length), .type = htole16(section), .op_code = IWM_NVM_READ_OPCODE, }; struct iwm_nvm_access_resp *nvm_resp; struct iwm_rx_packet *pkt; struct iwm_host_cmd cmd = { .id = IWM_NVM_ACCESS_CMD, .flags = IWM_CMD_SYNC | IWM_CMD_WANT_SKB | IWM_CMD_SEND_IN_RFKILL, .data = { &nvm_access_cmd, }, }; int ret, bytes_read, offset_read; uint8_t *resp_data; cmd.len[0] = sizeof(struct iwm_nvm_access_cmd); ret = iwm_send_cmd(sc, &cmd); if (ret) return ret; pkt = cmd.resp_pkt; if (pkt->hdr.flags & IWM_CMD_FAILED_MSK) { device_printf(sc->sc_dev, "%s: Bad return from IWM_NVM_ACCES_COMMAND (0x%08X)\n", __func__, pkt->hdr.flags); ret = EIO; goto exit; } /* Extract NVM response */ nvm_resp = (void *)pkt->data; ret = le16toh(nvm_resp->status); bytes_read = le16toh(nvm_resp->length); offset_read = le16toh(nvm_resp->offset); resp_data = nvm_resp->data; if (ret) { device_printf(sc->sc_dev, "%s: NVM access command failed with status %d\n", __func__, ret); ret = EINVAL; goto exit; } if (offset_read != offset) { device_printf(sc->sc_dev, "%s: NVM ACCESS response with invalid offset %d\n", __func__, offset_read); ret = EINVAL; goto exit; } memcpy(data + offset, resp_data, bytes_read); *len = bytes_read; exit: iwm_free_resp(sc, &cmd); return ret; } /* * Reads an NVM section completely. * NICs prior to 7000 family doesn't have a real NVM, but just read * section 0 which is the EEPROM. Because the EEPROM reading is unlimited * by uCode, we need to manually check in this case that we don't * overflow and try to read more than the EEPROM size. * For 7000 family NICs, we supply the maximal size we can read, and * the uCode fills the response with as much data as we can, * without overflowing, so no check is needed. */ static int iwm_nvm_read_section(struct iwm_softc *sc, uint16_t section, uint8_t *data, uint16_t *len) { uint16_t length, seglen; int error; /* Set nvm section read length */ length = seglen = IWM_NVM_DEFAULT_CHUNK_SIZE; *len = 0; /* Read the NVM until exhausted (reading less than requested) */ while (seglen == length) { error = iwm_nvm_read_chunk(sc, section, *len, length, data, &seglen); if (error) { device_printf(sc->sc_dev, "Cannot read NVM from section " "%d offset %d, length %d\n", section, *len, length); return error; } *len += seglen; } IWM_DPRINTF(sc, IWM_DEBUG_RESET, "NVM section %d read completed\n", section); return 0; } /* * BEGIN IWM_NVM_PARSE */ /* iwlwifi/iwl-nvm-parse.c */ /* NVM offsets (in words) definitions */ enum wkp_nvm_offsets { /* NVM HW-Section offset (in words) definitions */ IWM_HW_ADDR = 0x15, /* NVM SW-Section offset (in words) definitions */ IWM_NVM_SW_SECTION = 0x1C0, IWM_NVM_VERSION = 0, IWM_RADIO_CFG = 1, IWM_SKU = 2, IWM_N_HW_ADDRS = 3, IWM_NVM_CHANNELS = 0x1E0 - IWM_NVM_SW_SECTION, /* NVM calibration section offset (in words) definitions */ IWM_NVM_CALIB_SECTION = 0x2B8, IWM_XTAL_CALIB = 0x316 - IWM_NVM_CALIB_SECTION }; /* SKU Capabilities (actual values from NVM definition) */ enum nvm_sku_bits { IWM_NVM_SKU_CAP_BAND_24GHZ = (1 << 0), IWM_NVM_SKU_CAP_BAND_52GHZ = (1 << 1), IWM_NVM_SKU_CAP_11N_ENABLE = (1 << 2), IWM_NVM_SKU_CAP_11AC_ENABLE = (1 << 3), }; /* radio config bits (actual values from NVM definition) */ #define IWM_NVM_RF_CFG_DASH_MSK(x) (x & 0x3) /* bits 0-1 */ #define IWM_NVM_RF_CFG_STEP_MSK(x) ((x >> 2) & 0x3) /* bits 2-3 */ #define IWM_NVM_RF_CFG_TYPE_MSK(x) ((x >> 4) & 0x3) /* bits 4-5 */ #define IWM_NVM_RF_CFG_PNUM_MSK(x) ((x >> 6) & 0x3) /* bits 6-7 */ #define IWM_NVM_RF_CFG_TX_ANT_MSK(x) ((x >> 8) & 0xF) /* bits 8-11 */ #define IWM_NVM_RF_CFG_RX_ANT_MSK(x) ((x >> 12) & 0xF) /* bits 12-15 */ #define DEFAULT_MAX_TX_POWER 16 /** * enum iwm_nvm_channel_flags - channel flags in NVM * @IWM_NVM_CHANNEL_VALID: channel is usable for this SKU/geo * @IWM_NVM_CHANNEL_IBSS: usable as an IBSS channel * @IWM_NVM_CHANNEL_ACTIVE: active scanning allowed * @IWM_NVM_CHANNEL_RADAR: radar detection required * @IWM_NVM_CHANNEL_DFS: dynamic freq selection candidate * @IWM_NVM_CHANNEL_WIDE: 20 MHz channel okay (?) * @IWM_NVM_CHANNEL_40MHZ: 40 MHz channel okay (?) * @IWM_NVM_CHANNEL_80MHZ: 80 MHz channel okay (?) * @IWM_NVM_CHANNEL_160MHZ: 160 MHz channel okay (?) */ enum iwm_nvm_channel_flags { IWM_NVM_CHANNEL_VALID = (1 << 0), IWM_NVM_CHANNEL_IBSS = (1 << 1), IWM_NVM_CHANNEL_ACTIVE = (1 << 3), IWM_NVM_CHANNEL_RADAR = (1 << 4), IWM_NVM_CHANNEL_DFS = (1 << 7), IWM_NVM_CHANNEL_WIDE = (1 << 8), IWM_NVM_CHANNEL_40MHZ = (1 << 9), IWM_NVM_CHANNEL_80MHZ = (1 << 10), IWM_NVM_CHANNEL_160MHZ = (1 << 11), }; /* * Add a channel to the net80211 channel list. * * ieee is the ieee channel number * ch_idx is channel index. * mode is the channel mode - CHAN_A, CHAN_B, CHAN_G. * ch_flags is the iwm channel flags. * * Return 0 on OK, < 0 on error. */ static int iwm_init_net80211_channel(struct iwm_softc *sc, int ieee, int ch_idx, int mode, uint16_t ch_flags) { /* XXX for now, no overflow checking! */ struct ieee80211com *ic = &sc->sc_ic; int is_5ghz, flags; struct ieee80211_channel *channel; channel = &ic->ic_channels[ic->ic_nchans++]; channel->ic_ieee = ieee; is_5ghz = ch_idx >= IWM_NUM_2GHZ_CHANNELS; if (!is_5ghz) { flags = IEEE80211_CHAN_2GHZ; channel->ic_flags = mode; } else { flags = IEEE80211_CHAN_5GHZ; channel->ic_flags = mode; } channel->ic_freq = ieee80211_ieee2mhz(ieee, flags); if (!(ch_flags & IWM_NVM_CHANNEL_ACTIVE)) channel->ic_flags |= IEEE80211_CHAN_PASSIVE; return (0); } static void iwm_init_channel_map(struct iwm_softc *sc, const uint16_t * const nvm_ch_flags) { struct ieee80211com *ic = &sc->sc_ic; struct iwm_nvm_data *data = &sc->sc_nvm; int ch_idx; uint16_t ch_flags; int hw_value; for (ch_idx = 0; ch_idx < nitems(iwm_nvm_channels); ch_idx++) { ch_flags = le16_to_cpup(nvm_ch_flags + ch_idx); if (ch_idx >= IWM_NUM_2GHZ_CHANNELS && !data->sku_cap_band_52GHz_enable) ch_flags &= ~IWM_NVM_CHANNEL_VALID; if (!(ch_flags & IWM_NVM_CHANNEL_VALID)) { IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "Ch. %d Flags %x [%sGHz] - No traffic\n", iwm_nvm_channels[ch_idx], ch_flags, (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ? "5.2" : "2.4"); continue; } hw_value = iwm_nvm_channels[ch_idx]; /* 5GHz? */ if (ch_idx >= IWM_NUM_2GHZ_CHANNELS) { (void) iwm_init_net80211_channel(sc, hw_value, ch_idx, IEEE80211_CHAN_A, ch_flags); } else { (void) iwm_init_net80211_channel(sc, hw_value, ch_idx, IEEE80211_CHAN_B, ch_flags); /* If it's not channel 13, also add 11g */ if (hw_value != 13) (void) iwm_init_net80211_channel(sc, hw_value, ch_idx, IEEE80211_CHAN_G, ch_flags); } IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "Ch. %d Flags %x [%sGHz] - Added\n", iwm_nvm_channels[ch_idx], ch_flags, (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ? "5.2" : "2.4"); } ieee80211_sort_channels(ic->ic_channels, ic->ic_nchans); } static int iwm_parse_nvm_data(struct iwm_softc *sc, const uint16_t *nvm_hw, const uint16_t *nvm_sw, const uint16_t *nvm_calib, uint8_t tx_chains, uint8_t rx_chains) { struct iwm_nvm_data *data = &sc->sc_nvm; uint8_t hw_addr[IEEE80211_ADDR_LEN]; uint16_t radio_cfg, sku; data->nvm_version = le16_to_cpup(nvm_sw + IWM_NVM_VERSION); radio_cfg = le16_to_cpup(nvm_sw + IWM_RADIO_CFG); data->radio_cfg_type = IWM_NVM_RF_CFG_TYPE_MSK(radio_cfg); data->radio_cfg_step = IWM_NVM_RF_CFG_STEP_MSK(radio_cfg); data->radio_cfg_dash = IWM_NVM_RF_CFG_DASH_MSK(radio_cfg); data->radio_cfg_pnum = IWM_NVM_RF_CFG_PNUM_MSK(radio_cfg); data->valid_tx_ant = IWM_NVM_RF_CFG_TX_ANT_MSK(radio_cfg); data->valid_rx_ant = IWM_NVM_RF_CFG_RX_ANT_MSK(radio_cfg); sku = le16_to_cpup(nvm_sw + IWM_SKU); data->sku_cap_band_24GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_24GHZ; data->sku_cap_band_52GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_52GHZ; data->sku_cap_11n_enable = 0; if (!data->valid_tx_ant || !data->valid_rx_ant) { device_printf(sc->sc_dev, "%s: invalid antennas (0x%x, 0x%x)\n", __func__, data->valid_tx_ant, data->valid_rx_ant); return EINVAL; } data->n_hw_addrs = le16_to_cpup(nvm_sw + IWM_N_HW_ADDRS); data->xtal_calib[0] = *(nvm_calib + IWM_XTAL_CALIB); data->xtal_calib[1] = *(nvm_calib + IWM_XTAL_CALIB + 1); /* The byte order is little endian 16 bit, meaning 214365 */ IEEE80211_ADDR_COPY(hw_addr, nvm_hw + IWM_HW_ADDR); data->hw_addr[0] = hw_addr[1]; data->hw_addr[1] = hw_addr[0]; data->hw_addr[2] = hw_addr[3]; data->hw_addr[3] = hw_addr[2]; data->hw_addr[4] = hw_addr[5]; data->hw_addr[5] = hw_addr[4]; iwm_init_channel_map(sc, &nvm_sw[IWM_NVM_CHANNELS]); data->calib_version = 255; /* TODO: this value will prevent some checks from failing, we need to check if this field is still needed, and if it does, where is it in the NVM */ return 0; } /* * END NVM PARSE */ struct iwm_nvm_section { uint16_t length; const uint8_t *data; }; static int iwm_parse_nvm_sections(struct iwm_softc *sc, struct iwm_nvm_section *sections) { const uint16_t *hw, *sw, *calib; /* Checking for required sections */ if (!sections[IWM_NVM_SECTION_TYPE_SW].data || !sections[IWM_NVM_SECTION_TYPE_HW].data) { device_printf(sc->sc_dev, "%s: Can't parse empty NVM sections\n", __func__); return ENOENT; } hw = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_HW].data; sw = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_SW].data; calib = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_CALIBRATION].data; return iwm_parse_nvm_data(sc, hw, sw, calib, IWM_FW_VALID_TX_ANT(sc), IWM_FW_VALID_RX_ANT(sc)); } static int iwm_nvm_init(struct iwm_softc *sc) { struct iwm_nvm_section nvm_sections[IWM_NVM_NUM_OF_SECTIONS]; int i, section, error; uint16_t len; uint8_t *nvm_buffer, *temp; /* Read From FW NVM */ IWM_DPRINTF(sc, IWM_DEBUG_EEPROM, "%s: Read NVM\n", __func__); /* TODO: find correct NVM max size for a section */ nvm_buffer = malloc(IWM_OTP_LOW_IMAGE_SIZE, M_DEVBUF, M_NOWAIT); if (nvm_buffer == NULL) return (ENOMEM); for (i = 0; i < nitems(nvm_to_read); i++) { section = nvm_to_read[i]; KASSERT(section <= nitems(nvm_sections), ("too many sections")); error = iwm_nvm_read_section(sc, section, nvm_buffer, &len); if (error) break; temp = malloc(len, M_DEVBUF, M_NOWAIT); if (temp == NULL) { error = ENOMEM; break; } memcpy(temp, nvm_buffer, len); nvm_sections[section].data = temp; nvm_sections[section].length = len; } free(nvm_buffer, M_DEVBUF); if (error) return error; return iwm_parse_nvm_sections(sc, nvm_sections); } /* * Firmware loading gunk. This is kind of a weird hybrid between the * iwn driver and the Linux iwlwifi driver. */ static int iwm_firmware_load_chunk(struct iwm_softc *sc, uint32_t dst_addr, const uint8_t *section, uint32_t byte_cnt) { struct iwm_dma_info *dma = &sc->fw_dma; int error; /* Copy firmware section into pre-allocated DMA-safe memory. */ memcpy(dma->vaddr, section, byte_cnt); bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); if (!iwm_nic_lock(sc)) return EBUSY; sc->sc_fw_chunk_done = 0; IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE); IWM_WRITE(sc, IWM_FH_SRVC_CHNL_SRAM_ADDR_REG(IWM_FH_SRVC_CHNL), dst_addr); IWM_WRITE(sc, IWM_FH_TFDIB_CTRL0_REG(IWM_FH_SRVC_CHNL), dma->paddr & IWM_FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK); IWM_WRITE(sc, IWM_FH_TFDIB_CTRL1_REG(IWM_FH_SRVC_CHNL), (iwm_get_dma_hi_addr(dma->paddr) << IWM_FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt); IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_BUF_STS_REG(IWM_FH_SRVC_CHNL), 1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM | 1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX | IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID); IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL), IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE | IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD); iwm_nic_unlock(sc); /* wait 1s for this segment to load */ while (!sc->sc_fw_chunk_done) if ((error = msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfw", hz)) != 0) break; return error; } static int iwm_load_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { struct iwm_fw_sects *fws; int error, i, w; const void *data; uint32_t dlen; uint32_t offset; sc->sc_uc.uc_intr = 0; fws = &sc->sc_fw.fw_sects[ucode_type]; for (i = 0; i < fws->fw_count; i++) { data = fws->fw_sect[i].fws_data; dlen = fws->fw_sect[i].fws_len; offset = fws->fw_sect[i].fws_devoff; IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV, "LOAD FIRMWARE type %d offset %u len %d\n", ucode_type, offset, dlen); error = iwm_firmware_load_chunk(sc, offset, data, dlen); if (error) { device_printf(sc->sc_dev, "%s: chunk %u of %u returned error %02d\n", __func__, i, fws->fw_count, error); return error; } } /* wait for the firmware to load */ IWM_WRITE(sc, IWM_CSR_RESET, 0); for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) { error = msleep(&sc->sc_uc, &sc->sc_mtx, 0, "iwmuc", hz/10); } return error; } /* iwlwifi: pcie/trans.c */ static int iwm_start_fw(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { int error; IWM_WRITE(sc, IWM_CSR_INT, ~0); if ((error = iwm_nic_init(sc)) != 0) { device_printf(sc->sc_dev, "unable to init nic\n"); return error; } /* make sure rfkill handshake bits are cleared */ IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED); /* clear (again), then enable host interrupts */ IWM_WRITE(sc, IWM_CSR_INT, ~0); iwm_enable_interrupts(sc); /* really make sure rfkill handshake bits are cleared */ /* maybe we should write a few times more? just to make sure */ IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL); /* Load the given image to the HW */ return iwm_load_firmware(sc, ucode_type); } static int iwm_fw_alive(struct iwm_softc *sc, uint32_t sched_base) { return iwm_post_alive(sc); } static int iwm_send_tx_ant_cfg(struct iwm_softc *sc, uint8_t valid_tx_ant) { struct iwm_tx_ant_cfg_cmd tx_ant_cmd = { .valid = htole32(valid_tx_ant), }; return iwm_mvm_send_cmd_pdu(sc, IWM_TX_ANT_CONFIGURATION_CMD, IWM_CMD_SYNC, sizeof(tx_ant_cmd), &tx_ant_cmd); } /* iwlwifi: mvm/fw.c */ static int iwm_send_phy_cfg_cmd(struct iwm_softc *sc) { struct iwm_phy_cfg_cmd phy_cfg_cmd; enum iwm_ucode_type ucode_type = sc->sc_uc_current; /* Set parameters */ phy_cfg_cmd.phy_cfg = htole32(sc->sc_fw_phy_config); phy_cfg_cmd.calib_control.event_trigger = sc->sc_default_calib[ucode_type].event_trigger; phy_cfg_cmd.calib_control.flow_trigger = sc->sc_default_calib[ucode_type].flow_trigger; IWM_DPRINTF(sc, IWM_DEBUG_CMD | IWM_DEBUG_RESET, "Sending Phy CFG command: 0x%x\n", phy_cfg_cmd.phy_cfg); return iwm_mvm_send_cmd_pdu(sc, IWM_PHY_CONFIGURATION_CMD, IWM_CMD_SYNC, sizeof(phy_cfg_cmd), &phy_cfg_cmd); } static int iwm_mvm_load_ucode_wait_alive(struct iwm_softc *sc, enum iwm_ucode_type ucode_type) { enum iwm_ucode_type old_type = sc->sc_uc_current; int error; if ((error = iwm_read_firmware(sc, ucode_type)) != 0) return error; sc->sc_uc_current = ucode_type; error = iwm_start_fw(sc, ucode_type); if (error) { sc->sc_uc_current = old_type; return error; } return iwm_fw_alive(sc, sc->sched_base); } /* * mvm misc bits */ /* * follows iwlwifi/fw.c */ static int iwm_run_init_mvm_ucode(struct iwm_softc *sc, int justnvm) { int error; /* do not operate with rfkill switch turned on */ if ((sc->sc_flags & IWM_FLAG_RFKILL) && !justnvm) { device_printf(sc->sc_dev, "radio is disabled by hardware switch\n"); return EPERM; } sc->sc_init_complete = 0; if ((error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_INIT)) != 0) return error; if (justnvm) { if ((error = iwm_nvm_init(sc)) != 0) { device_printf(sc->sc_dev, "failed to read nvm\n"); return error; } IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, sc->sc_nvm.hw_addr); sc->sc_scan_cmd_len = sizeof(struct iwm_scan_cmd) + sc->sc_capa_max_probe_len + IWM_MAX_NUM_SCAN_CHANNELS * sizeof(struct iwm_scan_channel); sc->sc_scan_cmd = malloc(sc->sc_scan_cmd_len, M_DEVBUF, M_NOWAIT); if (sc->sc_scan_cmd == NULL) return (ENOMEM); return 0; } /* Send TX valid antennas before triggering calibrations */ if ((error = iwm_send_tx_ant_cfg(sc, IWM_FW_VALID_TX_ANT(sc))) != 0) return error; /* * Send phy configurations command to init uCode * to start the 16.0 uCode init image internal calibrations. */ if ((error = iwm_send_phy_cfg_cmd(sc)) != 0 ) { device_printf(sc->sc_dev, "%s: failed to run internal calibration: %d\n", __func__, error); return error; } /* * Nothing to do but wait for the init complete notification * from the firmware */ while (!sc->sc_init_complete) if ((error = msleep(&sc->sc_init_complete, &sc->sc_mtx, 0, "iwminit", 2*hz)) != 0) break; return error; } /* * receive side */ /* (re)stock rx ring, called at init-time and at runtime */ static int iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx) { struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &ring->data[idx]; struct mbuf *m; int error; bus_addr_t paddr; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, IWM_RBUF_SIZE); if (m == NULL) return ENOBUFS; if (data->m != NULL) bus_dmamap_unload(ring->data_dmat, data->map); m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "%s: could not create RX buf DMA map, error %d\n", __func__, error); goto fail; } data->m = m; error = bus_dmamap_load(ring->data_dmat, data->map, mtod(data->m, void *), IWM_RBUF_SIZE, iwm_dma_map_addr, &paddr, BUS_DMA_NOWAIT); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "%s: can't not map mbuf, error %d\n", __func__, error); goto fail; } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREREAD); /* Update RX descriptor. */ ring->desc[idx] = htole32(paddr >> 8); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); return 0; fail: return error; } /* iwlwifi: mvm/rx.c */ #define IWM_RSSI_OFFSET 50 static int iwm_mvm_calc_rssi(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info) { int rssi_a, rssi_b, rssi_a_dbm, rssi_b_dbm, max_rssi_dbm; uint32_t agc_a, agc_b; uint32_t val; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_AGC_IDX]); agc_a = (val & IWM_OFDM_AGC_A_MSK) >> IWM_OFDM_AGC_A_POS; agc_b = (val & IWM_OFDM_AGC_B_MSK) >> IWM_OFDM_AGC_B_POS; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_RSSI_AB_IDX]); rssi_a = (val & IWM_OFDM_RSSI_INBAND_A_MSK) >> IWM_OFDM_RSSI_A_POS; rssi_b = (val & IWM_OFDM_RSSI_INBAND_B_MSK) >> IWM_OFDM_RSSI_B_POS; /* * dBm = rssi dB - agc dB - constant. * Higher AGC (higher radio gain) means lower signal. */ rssi_a_dbm = rssi_a - IWM_RSSI_OFFSET - agc_a; rssi_b_dbm = rssi_b - IWM_RSSI_OFFSET - agc_b; max_rssi_dbm = MAX(rssi_a_dbm, rssi_b_dbm); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "Rssi In A %d B %d Max %d AGCA %d AGCB %d\n", rssi_a_dbm, rssi_b_dbm, max_rssi_dbm, agc_a, agc_b); return max_rssi_dbm; } /* iwlwifi: mvm/rx.c */ /* * iwm_mvm_get_signal_strength - use new rx PHY INFO API * values are reported by the fw as positive values - need to negate * to obtain their dBM. Account for missing antennas by replacing 0 * values by -256dBm: practically 0 power and a non-feasible 8 bit value. */ static int iwm_mvm_get_signal_strength(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info) { int energy_a, energy_b, energy_c, max_energy; uint32_t val; val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_ENERGY_ANT_ABC_IDX]); energy_a = (val & IWM_RX_INFO_ENERGY_ANT_A_MSK) >> IWM_RX_INFO_ENERGY_ANT_A_POS; energy_a = energy_a ? -energy_a : -256; energy_b = (val & IWM_RX_INFO_ENERGY_ANT_B_MSK) >> IWM_RX_INFO_ENERGY_ANT_B_POS; energy_b = energy_b ? -energy_b : -256; energy_c = (val & IWM_RX_INFO_ENERGY_ANT_C_MSK) >> IWM_RX_INFO_ENERGY_ANT_C_POS; energy_c = energy_c ? -energy_c : -256; max_energy = MAX(energy_a, energy_b); max_energy = MAX(max_energy, energy_c); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "energy In A %d B %d C %d , and max %d\n", energy_a, energy_b, energy_c, max_energy); return max_energy; } static void iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct iwm_rx_phy_info *phy_info = (void *)pkt->data; IWM_DPRINTF(sc, IWM_DEBUG_RECV, "received PHY stats\n"); bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); memcpy(&sc->sc_last_phy_info, phy_info, sizeof(sc->sc_last_phy_info)); } /* * Retrieve the average noise (in dBm) among receivers. */ static int iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *stats) { int i, total, nbant, noise; total = nbant = noise = 0; for (i = 0; i < 3; i++) { noise = le32toh(stats->beacon_silence_rssi[i]) & 0xff; if (noise) { total += noise; nbant++; } } /* There should be at least one antenna but check anyway. */ return (nbant == 0) ? -127 : (total / nbant) - 107; } /* * iwm_mvm_rx_rx_mpdu - IWM_REPLY_RX_MPDU_CMD handler * * Handles the actual data of the Rx packet from the fw */ static void iwm_mvm_rx_rx_mpdu(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_frame *wh; struct ieee80211_node *ni; struct ieee80211_rx_stats rxs; struct mbuf *m; struct iwm_rx_phy_info *phy_info; struct iwm_rx_mpdu_res_start *rx_res; uint32_t len; uint32_t rx_pkt_status; int rssi; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); phy_info = &sc->sc_last_phy_info; rx_res = (struct iwm_rx_mpdu_res_start *)pkt->data; wh = (struct ieee80211_frame *)(pkt->data + sizeof(*rx_res)); len = le16toh(rx_res->byte_count); rx_pkt_status = le32toh(*(uint32_t *)(pkt->data + sizeof(*rx_res) + len)); m = data->m; m->m_data = pkt->data + sizeof(*rx_res); m->m_pkthdr.len = m->m_len = len; if (__predict_false(phy_info->cfg_phy_cnt > 20)) { device_printf(sc->sc_dev, "dsp size out of range [0,20]: %d\n", phy_info->cfg_phy_cnt); return; } if (!(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_CRC_OK) || !(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_OVERRUN_OK)) { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "Bad CRC or FIFO: 0x%08X.\n", rx_pkt_status); return; /* drop */ } if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_RX_ENERGY_API) { rssi = iwm_mvm_get_signal_strength(sc, phy_info); } else { rssi = iwm_mvm_calc_rssi(sc, phy_info); } rssi = (0 - IWM_MIN_DBM) + rssi; /* normalize */ rssi = MIN(rssi, sc->sc_max_rssi); /* clip to max. 100% */ /* replenish ring for the buffer we're going to feed to the sharks */ if (iwm_rx_addbuf(sc, IWM_RBUF_SIZE, sc->rxq.cur) != 0) { device_printf(sc->sc_dev, "%s: unable to add more buffers\n", __func__); return; } ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh); IWM_DPRINTF(sc, IWM_DEBUG_RECV, "%s: phy_info: channel=%d, flags=0x%08x\n", __func__, le16toh(phy_info->channel), le16toh(phy_info->phy_flags)); /* * Populate an RX state struct with the provided information. */ bzero(&rxs, sizeof(rxs)); rxs.r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ; rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI; rxs.c_ieee = le16toh(phy_info->channel); if (le16toh(phy_info->phy_flags & IWM_RX_RES_PHY_FLAGS_BAND_24)) { rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_2GHZ); } else { rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_5GHZ); } rxs.rssi = rssi - sc->sc_noise; rxs.nf = sc->sc_noise; if (ieee80211_radiotap_active_vap(vap)) { struct iwm_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; if (phy_info->phy_flags & htole16(IWM_PHY_INFO_FLAG_SHPREAMBLE)) tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE; tap->wr_chan_freq = htole16(rxs.c_freq); /* XXX only if ic->ic_curchan->ic_ieee == rxs.c_ieee */ tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags); tap->wr_dbm_antsignal = (int8_t)rssi; tap->wr_dbm_antnoise = (int8_t)sc->sc_noise; tap->wr_tsft = phy_info->system_timestamp; switch (phy_info->rate) { /* CCK rates. */ case 10: tap->wr_rate = 2; break; case 20: tap->wr_rate = 4; break; case 55: tap->wr_rate = 11; break; case 110: tap->wr_rate = 22; break; /* OFDM rates. */ case 0xd: tap->wr_rate = 12; break; case 0xf: tap->wr_rate = 18; break; case 0x5: tap->wr_rate = 24; break; case 0x7: tap->wr_rate = 36; break; case 0x9: tap->wr_rate = 48; break; case 0xb: tap->wr_rate = 72; break; case 0x1: tap->wr_rate = 96; break; case 0x3: tap->wr_rate = 108; break; /* Unknown rate: should not happen. */ default: tap->wr_rate = 0; } } IWM_UNLOCK(sc); if (ni != NULL) { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "input m %p\n", m); ieee80211_input_mimo(ni, m, &rxs); ieee80211_free_node(ni); } else { IWM_DPRINTF(sc, IWM_DEBUG_RECV, "inputall m %p\n", m); ieee80211_input_mimo_all(ic, m, &rxs); } IWM_LOCK(sc); } -static void +static int iwm_mvm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_node *in) { struct iwm_mvm_tx_resp *tx_resp = (void *)pkt->data; - struct ieee80211vap *vap = in->in_ni.ni_vap; + struct ieee80211_node *ni = &in->in_ni; + struct ieee80211vap *vap = ni->ni_vap; int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK; int failack = tx_resp->failure_frame; KASSERT(tx_resp->frame_count == 1, ("too many frames")); /* Update rate control statistics. */ if (status != IWM_TX_STATUS_SUCCESS && status != IWM_TX_STATUS_DIRECT_DONE) { - if_inc_counter(vap->iv_ifp, IFCOUNTER_OERRORS, 1); - ieee80211_ratectl_tx_complete(vap, &in->in_ni, + ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_FAILURE, &failack, NULL); + return (1); } else { - if_inc_counter(vap->iv_ifp, IFCOUNTER_OPACKETS, 1); - ieee80211_ratectl_tx_complete(vap, &in->in_ni, + ieee80211_ratectl_tx_complete(vap, ni, IEEE80211_RATECTL_TX_SUCCESS, &failack, NULL); - + return (0); } } static void iwm_mvm_rx_tx_cmd(struct iwm_softc *sc, struct iwm_rx_packet *pkt, struct iwm_rx_data *data) { struct iwm_cmd_header *cmd_hdr = &pkt->hdr; int idx = cmd_hdr->idx; int qid = cmd_hdr->qid; struct iwm_tx_ring *ring = &sc->txq[qid]; struct iwm_tx_data *txd = &ring->data[idx]; struct iwm_node *in = txd->in; + struct mbuf *m = txd->m; + int status; + + KASSERT(txd->done == 0, ("txd not done")); + KASSERT(txd->in != NULL, ("txd without node")); + KASSERT(txd->m != NULL, ("txd without mbuf")); - if (txd->done) { - device_printf(sc->sc_dev, - "%s: got tx interrupt that's already been handled!\n", - __func__); - return; - } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); sc->sc_tx_timer = 0; - iwm_mvm_rx_tx_cmd_single(sc, pkt, in); + status = iwm_mvm_rx_tx_cmd_single(sc, pkt, in); /* Unmap and free mbuf. */ bus_dmamap_sync(ring->data_dmat, txd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, txd->map); - m_freem(txd->m); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "free txd %p, in %p\n", txd, txd->in); - KASSERT(txd->done == 0, ("txd not done")); txd->done = 1; - KASSERT(txd->in, ("txd without node")); - txd->m = NULL; txd->in = NULL; - ieee80211_free_node((struct ieee80211_node *)in); + + ieee80211_tx_complete(&in->in_ni, m, status); if (--ring->queued < IWM_TX_RING_LOMARK) { sc->qfullmsk &= ~(1 << ring->qid); if (sc->qfullmsk == 0) { /* * Well, we're in interrupt context, but then again * I guess net80211 does all sorts of stunts in * interrupt context, so maybe this is no biggie. */ iwm_start(sc); } } } /* * transmit side */ /* * Process a "command done" firmware notification. This is where we wakeup * processes waiting for a synchronous command completion. * from if_iwn */ static void iwm_cmd_done(struct iwm_softc *sc, struct iwm_rx_packet *pkt) { struct iwm_tx_ring *ring = &sc->txq[IWM_MVM_CMD_QUEUE]; struct iwm_tx_data *data; if (pkt->hdr.qid != IWM_MVM_CMD_QUEUE) { return; /* Not a command ack. */ } data = &ring->data[pkt->hdr.idx]; /* If the command was mapped in an mbuf, free it. */ if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } wakeup(&ring->desc[pkt->hdr.idx]); } #if 0 /* * necessary only for block ack mode */ void iwm_update_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id, uint16_t len) { struct iwm_agn_scd_bc_tbl *scd_bc_tbl; uint16_t w_val; scd_bc_tbl = sc->sched_dma.vaddr; len += 8; /* magic numbers came naturally from paris */ if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE) len = roundup(len, 4) / 4; w_val = htole16(sta_id << 12 | len); /* Update TX scheduler. */ scd_bc_tbl[qid].tfd_offset[idx] = w_val; bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map, BUS_DMASYNC_PREWRITE); /* I really wonder what this is ?!? */ if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) { scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val; bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map, BUS_DMASYNC_PREWRITE); } } #endif /* * Take an 802.11 (non-n) rate, find the relevant rate * table entry. return the index into in_ridx[]. * * The caller then uses that index back into in_ridx * to figure out the rate index programmed /into/ * the firmware for this given node. */ static int iwm_tx_rateidx_lookup(struct iwm_softc *sc, struct iwm_node *in, uint8_t rate) { int i; uint8_t r; for (i = 0; i < nitems(in->in_ridx); i++) { r = iwm_rates[in->in_ridx[i]].rate; if (rate == r) return (i); } /* XXX Return the first */ /* XXX TODO: have it return the /lowest/ */ return (0); } /* * Fill in various bit for management frames, and leave them * unfilled for data frames (firmware takes care of that). * Return the selected TX rate. */ static const struct iwm_rate * iwm_tx_fill_cmd(struct iwm_softc *sc, struct iwm_node *in, struct ieee80211_frame *wh, struct iwm_tx_cmd *tx) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni = &in->in_ni; const struct iwm_rate *rinfo; int type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; int ridx, rate_flags; tx->rts_retry_limit = IWM_RTS_DFAULT_RETRY_LIMIT; tx->data_retry_limit = IWM_DEFAULT_TX_RETRY; /* * XXX TODO: everything about the rate selection here is terrible! */ if (type == IEEE80211_FC0_TYPE_DATA) { int i; /* for data frames, use RS table */ (void) ieee80211_ratectl_rate(ni, NULL, 0); i = iwm_tx_rateidx_lookup(sc, in, ni->ni_txrate); ridx = in->in_ridx[i]; /* This is the index into the programmed table */ tx->initial_rate_index = i; tx->tx_flags |= htole32(IWM_TX_CMD_FLG_STA_RATE); IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TXRATE, "%s: start with i=%d, txrate %d\n", __func__, i, iwm_rates[ridx].rate); /* XXX no rate_n_flags? */ return &iwm_rates[ridx]; } /* * For non-data, use the lowest supported rate for the given * operational mode. * * Note: there may not be any rate control information available. * This driver currently assumes if we're transmitting data * frames, use the rate control table. Grr. * * XXX TODO: use the configured rate for the traffic type! */ if (ic->ic_curmode == IEEE80211_MODE_11A) { /* * XXX this assumes the mode is either 11a or not 11a; * definitely won't work for 11n. */ ridx = IWM_RIDX_OFDM; } else { ridx = IWM_RIDX_CCK; } rinfo = &iwm_rates[ridx]; IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: ridx=%d; rate=%d, CCK=%d\n", __func__, ridx, rinfo->rate, !! (IWM_RIDX_IS_CCK(ridx)) ); /* XXX TODO: hard-coded TX antenna? */ rate_flags = 1 << IWM_RATE_MCS_ANT_POS; if (IWM_RIDX_IS_CCK(ridx)) rate_flags |= IWM_RATE_MCS_CCK_MSK; /* XXX hard-coded tx rate */ tx->rate_n_flags = htole32(rate_flags | rinfo->plcp); return rinfo; } #define TB0_SIZE 16 static int iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int ac) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); - struct iwm_node *in = (struct iwm_node *)ni; + struct iwm_node *in = IWM_NODE(ni); struct iwm_tx_ring *ring; struct iwm_tx_data *data; struct iwm_tfd *desc; struct iwm_device_cmd *cmd; struct iwm_tx_cmd *tx; struct ieee80211_frame *wh; struct ieee80211_key *k = NULL; struct mbuf *m1; const struct iwm_rate *rinfo; uint32_t flags; u_int hdrlen; bus_dma_segment_t *seg, segs[IWM_MAX_SCATTER]; int nsegs; uint8_t tid, type; int i, totlen, error, pad; wh = mtod(m, struct ieee80211_frame *); hdrlen = ieee80211_anyhdrsize(wh); type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; tid = 0; ring = &sc->txq[ac]; desc = &ring->desc[ring->cur]; memset(desc, 0, sizeof(*desc)); data = &ring->data[ring->cur]; /* Fill out iwm_tx_cmd to send to the firmware */ cmd = &ring->cmd[ring->cur]; cmd->hdr.code = IWM_TX_CMD; cmd->hdr.flags = 0; cmd->hdr.qid = ring->qid; cmd->hdr.idx = ring->cur; tx = (void *)cmd->data; memset(tx, 0, sizeof(*tx)); rinfo = iwm_tx_fill_cmd(sc, in, wh, tx); /* Encrypt the frame if need be. */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { /* Retrieve key for TX && do software encryption. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) { m_freem(m); return (ENOBUFS); } /* 802.11 header may have moved. */ wh = mtod(m, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { struct iwm_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; tap->wt_chan_freq = htole16(ni->ni_chan->ic_freq); tap->wt_chan_flags = htole16(ni->ni_chan->ic_flags); tap->wt_rate = rinfo->rate; - tap->wt_hwqueue = ac; if (k != NULL) tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP; ieee80211_radiotap_tx(vap, m); } totlen = m->m_pkthdr.len; flags = 0; if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= IWM_TX_CMD_FLG_ACK; } if (type != IEEE80211_FC0_TYPE_DATA && (totlen + IEEE80211_CRC_LEN > vap->iv_rtsthreshold) && !IEEE80211_IS_MULTICAST(wh->i_addr1)) { flags |= IWM_TX_CMD_FLG_PROT_REQUIRE; } if (IEEE80211_IS_MULTICAST(wh->i_addr1) || type != IEEE80211_FC0_TYPE_DATA) tx->sta_id = sc->sc_aux_sta.sta_id; else tx->sta_id = IWM_STATION_ID; if (type == IEEE80211_FC0_TYPE_MGT) { uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK; if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ || subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) tx->pm_frame_timeout = htole16(3); else tx->pm_frame_timeout = htole16(2); } else { tx->pm_frame_timeout = htole16(0); } if (hdrlen & 3) { /* First segment length must be a multiple of 4. */ flags |= IWM_TX_CMD_FLG_MH_PAD; pad = 4 - (hdrlen & 3); } else pad = 0; tx->driver_txop = 0; tx->next_frame_len = 0; tx->len = htole16(totlen); tx->tid_tspec = tid; tx->life_time = htole32(IWM_TX_CMD_LIFE_TIME_INFINITE); /* Set physical address of "scratch area". */ tx->dram_lsb_ptr = htole32(data->scratch_paddr); tx->dram_msb_ptr = iwm_get_dma_hi_addr(data->scratch_paddr); /* Copy 802.11 header in TX command. */ memcpy(((uint8_t *)tx) + sizeof(*tx), wh, hdrlen); flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL; tx->sec_ctl = 0; tx->tx_flags |= htole32(flags); /* Trim 802.11 header. */ m_adj(m, hdrlen); error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { if (error != EFBIG) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return error; } /* Too many DMA segments, linearize mbuf. */ - MGETHDR(m1, M_NOWAIT, MT_DATA); + m1 = m_collapse(m, M_NOWAIT, IWM_MAX_SCATTER - 2); if (m1 == NULL) { + device_printf(sc->sc_dev, + "%s: could not defrag mbuf\n", __func__); m_freem(m); - return ENOBUFS; - } - if (m->m_pkthdr.len > MHLEN) { - MCLGET(m1, M_NOWAIT); - if (!(m1->m_flags & M_EXT)) { - m_freem(m); - m_freem(m1); - return ENOBUFS; - } + return (ENOBUFS); } - m_copydata(m, 0, m->m_pkthdr.len, mtod(m1, void *)); - m1->m_pkthdr.len = m1->m_len = m->m_pkthdr.len; - m_freem(m); m = m1; + error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error); m_freem(m); return error; } } data->m = m; data->in = in; data->done = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "sending txd %p, in %p\n", data, data->in); KASSERT(data->in != NULL, ("node is NULL")); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "sending data: qid=%d idx=%d len=%d nsegs=%d\n", ring->qid, ring->cur, totlen, nsegs); /* Fill TX descriptor. */ desc->num_tbs = 2 + nsegs; desc->tbs[0].lo = htole32(data->cmd_paddr); desc->tbs[0].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) | (TB0_SIZE << 4); desc->tbs[1].lo = htole32(data->cmd_paddr + TB0_SIZE); desc->tbs[1].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) | ((sizeof(struct iwm_cmd_header) + sizeof(*tx) + hdrlen + pad - TB0_SIZE) << 4); /* Other DMA segments are for data payload. */ for (i = 0; i < nsegs; i++) { seg = &segs[i]; desc->tbs[i+2].lo = htole32(seg->ds_addr); desc->tbs[i+2].hi_n_len = \ htole16(iwm_get_dma_hi_addr(seg->ds_addr)) | ((seg->ds_len) << 4); } bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->cmd_dma.tag, ring->cmd_dma.map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map, BUS_DMASYNC_PREWRITE); #if 0 iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len)); #endif /* Kick TX ring. */ ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT; IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur); /* Mark TX ring as full if we reach a certain threshold. */ if (++ring->queued > IWM_TX_RING_HIMARK) { sc->qfullmsk |= 1 << ring->qid; } return 0; } static int iwm_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct iwm_softc *sc = ic->ic_softc; int error = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "->%s begin\n", __func__); if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) { m_freem(m); IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "<-%s not RUNNING\n", __func__); return (ENETDOWN); } IWM_LOCK(sc); /* XXX fix this */ if (params == NULL) { error = iwm_tx(sc, m, ni, 0); } else { error = iwm_tx(sc, m, ni, 0); } sc->sc_tx_timer = 5; IWM_UNLOCK(sc); return (error); } /* * mvm/tx.c */ #if 0 /* * Note that there are transports that buffer frames before they reach * the firmware. This means that after flush_tx_path is called, the * queue might not be empty. The race-free way to handle this is to: * 1) set the station as draining * 2) flush the Tx path * 3) wait for the transport queues to be empty */ int iwm_mvm_flush_tx_path(struct iwm_softc *sc, int tfd_msk, int sync) { struct iwm_tx_path_flush_cmd flush_cmd = { .queues_ctl = htole32(tfd_msk), .flush_ctl = htole16(IWM_DUMP_TX_FIFO_FLUSH), }; int ret; ret = iwm_mvm_send_cmd_pdu(sc, IWM_TXPATH_FLUSH, sync ? IWM_CMD_SYNC : IWM_CMD_ASYNC, sizeof(flush_cmd), &flush_cmd); if (ret) device_printf(sc->sc_dev, "Flushing tx queue failed: %d\n", ret); return ret; } #endif /* * BEGIN mvm/sta.c */ static void iwm_mvm_add_sta_cmd_v6_to_v5(struct iwm_mvm_add_sta_cmd_v6 *cmd_v6, struct iwm_mvm_add_sta_cmd_v5 *cmd_v5) { memset(cmd_v5, 0, sizeof(*cmd_v5)); cmd_v5->add_modify = cmd_v6->add_modify; cmd_v5->tid_disable_tx = cmd_v6->tid_disable_tx; cmd_v5->mac_id_n_color = cmd_v6->mac_id_n_color; IEEE80211_ADDR_COPY(cmd_v5->addr, cmd_v6->addr); cmd_v5->sta_id = cmd_v6->sta_id; cmd_v5->modify_mask = cmd_v6->modify_mask; cmd_v5->station_flags = cmd_v6->station_flags; cmd_v5->station_flags_msk = cmd_v6->station_flags_msk; cmd_v5->add_immediate_ba_tid = cmd_v6->add_immediate_ba_tid; cmd_v5->remove_immediate_ba_tid = cmd_v6->remove_immediate_ba_tid; cmd_v5->add_immediate_ba_ssn = cmd_v6->add_immediate_ba_ssn; cmd_v5->sleep_tx_count = cmd_v6->sleep_tx_count; cmd_v5->sleep_state_flags = cmd_v6->sleep_state_flags; cmd_v5->assoc_id = cmd_v6->assoc_id; cmd_v5->beamform_flags = cmd_v6->beamform_flags; cmd_v5->tfd_queue_msk = cmd_v6->tfd_queue_msk; } static int iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *sc, struct iwm_mvm_add_sta_cmd_v6 *cmd, int *status) { struct iwm_mvm_add_sta_cmd_v5 cmd_v5; if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_STA_KEY_CMD) { return iwm_mvm_send_cmd_pdu_status(sc, IWM_ADD_STA, sizeof(*cmd), cmd, status); } iwm_mvm_add_sta_cmd_v6_to_v5(cmd, &cmd_v5); return iwm_mvm_send_cmd_pdu_status(sc, IWM_ADD_STA, sizeof(cmd_v5), &cmd_v5, status); } /* send station add/update command to firmware */ static int iwm_mvm_sta_send_to_fw(struct iwm_softc *sc, struct iwm_node *in, int update) { struct iwm_mvm_add_sta_cmd_v6 add_sta_cmd; int ret; uint32_t status; memset(&add_sta_cmd, 0, sizeof(add_sta_cmd)); add_sta_cmd.sta_id = IWM_STATION_ID; add_sta_cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(IWM_DEFAULT_MACID, IWM_DEFAULT_COLOR)); if (!update) { add_sta_cmd.tfd_queue_msk = htole32(0xf); IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid); } add_sta_cmd.add_modify = update ? 1 : 0; add_sta_cmd.station_flags_msk |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK); status = IWM_ADD_STA_SUCCESS; ret = iwm_mvm_send_add_sta_cmd_status(sc, &add_sta_cmd, &status); if (ret) return ret; switch (status) { case IWM_ADD_STA_SUCCESS: break; default: ret = EIO; device_printf(sc->sc_dev, "IWM_ADD_STA failed\n"); break; } return ret; } static int iwm_mvm_add_sta(struct iwm_softc *sc, struct iwm_node *in) { int ret; ret = iwm_mvm_sta_send_to_fw(sc, in, 0); if (ret) return ret; return 0; } static int iwm_mvm_update_sta(struct iwm_softc *sc, struct iwm_node *in) { return iwm_mvm_sta_send_to_fw(sc, in, 1); } static int iwm_mvm_add_int_sta_common(struct iwm_softc *sc, struct iwm_int_sta *sta, const uint8_t *addr, uint16_t mac_id, uint16_t color) { struct iwm_mvm_add_sta_cmd_v6 cmd; int ret; uint32_t status; memset(&cmd, 0, sizeof(cmd)); cmd.sta_id = sta->sta_id; cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(mac_id, color)); cmd.tfd_queue_msk = htole32(sta->tfd_queue_msk); if (addr) IEEE80211_ADDR_COPY(cmd.addr, addr); ret = iwm_mvm_send_add_sta_cmd_status(sc, &cmd, &status); if (ret) return ret; switch (status) { case IWM_ADD_STA_SUCCESS: IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: Internal station added.\n", __func__); return 0; default: device_printf(sc->sc_dev, "%s: Add internal station failed, status=0x%x\n", __func__, status); ret = EIO; break; } return ret; } static int iwm_mvm_add_aux_sta(struct iwm_softc *sc) { int ret; sc->sc_aux_sta.sta_id = 3; sc->sc_aux_sta.tfd_queue_msk = 0; ret = iwm_mvm_add_int_sta_common(sc, &sc->sc_aux_sta, NULL, IWM_MAC_INDEX_AUX, 0); if (ret) memset(&sc->sc_aux_sta, 0, sizeof(sc->sc_aux_sta)); return ret; } /* * END mvm/sta.c */ /* * BEGIN mvm/quota.c */ static int iwm_mvm_update_quotas(struct iwm_softc *sc, struct iwm_node *in) { struct iwm_time_quota_cmd cmd; int i, idx, ret, num_active_macs, quota, quota_rem; int colors[IWM_MAX_BINDINGS] = { -1, -1, -1, -1, }; int n_ifs[IWM_MAX_BINDINGS] = {0, }; uint16_t id; memset(&cmd, 0, sizeof(cmd)); /* currently, PHY ID == binding ID */ if (in) { id = in->in_phyctxt->id; KASSERT(id < IWM_MAX_BINDINGS, ("invalid id")); colors[id] = in->in_phyctxt->color; if (1) n_ifs[id] = 1; } /* * The FW's scheduling session consists of * IWM_MVM_MAX_QUOTA fragments. Divide these fragments * equally between all the bindings that require quota */ num_active_macs = 0; for (i = 0; i < IWM_MAX_BINDINGS; i++) { cmd.quotas[i].id_and_color = htole32(IWM_FW_CTXT_INVALID); num_active_macs += n_ifs[i]; } quota = 0; quota_rem = 0; if (num_active_macs) { quota = IWM_MVM_MAX_QUOTA / num_active_macs; quota_rem = IWM_MVM_MAX_QUOTA % num_active_macs; } for (idx = 0, i = 0; i < IWM_MAX_BINDINGS; i++) { if (colors[i] < 0) continue; cmd.quotas[idx].id_and_color = htole32(IWM_FW_CMD_ID_AND_COLOR(i, colors[i])); if (n_ifs[i] <= 0) { cmd.quotas[idx].quota = htole32(0); cmd.quotas[idx].max_duration = htole32(0); } else { cmd.quotas[idx].quota = htole32(quota * n_ifs[i]); cmd.quotas[idx].max_duration = htole32(0); } idx++; } /* Give the remainder of the session to the first binding */ cmd.quotas[0].quota = htole32(le32toh(cmd.quotas[0].quota) + quota_rem); ret = iwm_mvm_send_cmd_pdu(sc, IWM_TIME_QUOTA_CMD, IWM_CMD_SYNC, sizeof(cmd), &cmd); if (ret) device_printf(sc->sc_dev, "%s: Failed to send quota: %d\n", __func__, ret); return ret; } /* * END mvm/quota.c */ /* * ieee80211 routines */ /* * Change to AUTH state in 80211 state machine. Roughly matches what * Linux does in bss_info_changed(). */ static int iwm_auth(struct ieee80211vap *vap, struct iwm_softc *sc) { struct ieee80211_node *ni; struct iwm_node *in; struct iwm_vap *iv = IWM_VAP(vap); uint32_t duration; uint32_t min_duration; int error; /* * XXX i have a feeling that the vap node is being * freed from underneath us. Grr. */ ni = ieee80211_ref_node(vap->iv_bss); - in = (struct iwm_node *) ni; + in = IWM_NODE(ni); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_STATE, "%s: called; vap=%p, bss ni=%p\n", __func__, vap, ni); in->in_assoc = 0; error = iwm_allow_mcast(vap, sc); if (error) { device_printf(sc->sc_dev, "%s: failed to set multicast\n", __func__); goto out; } /* * This is where it deviates from what Linux does. * * Linux iwlwifi doesn't reset the nic each time, nor does it * call ctxt_add() here. Instead, it adds it during vap creation, * and always does does a mac_ctx_changed(). * * The openbsd port doesn't attempt to do that - it reset things * at odd states and does the add here. * * So, until the state handling is fixed (ie, we never reset * the NIC except for a firmware failure, which should drag * the NIC back to IDLE, re-setup and re-add all the mac/phy * contexts that are required), let's do a dirty hack here. */ if (iv->is_uploaded) { if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } } else { if ((error = iwm_mvm_mac_ctxt_add(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } } if ((error = iwm_mvm_phy_ctxt_changed(sc, &sc->sc_phyctxt[0], in->in_ni.ni_chan, 1, 1)) != 0) { device_printf(sc->sc_dev, "%s: failed add phy ctxt\n", __func__); goto out; } in->in_phyctxt = &sc->sc_phyctxt[0]; if ((error = iwm_mvm_binding_add_vif(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: binding cmd\n", __func__); goto out; } if ((error = iwm_mvm_add_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to add MAC\n", __func__); goto out; } /* a bit superfluous? */ while (sc->sc_auth_prot) msleep(&sc->sc_auth_prot, &sc->sc_mtx, 0, "iwmauth", 0); sc->sc_auth_prot = 1; duration = min(IWM_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS, 200 + in->in_ni.ni_intval); min_duration = min(IWM_MVM_TE_SESSION_PROTECTION_MIN_TIME_MS, 100 + in->in_ni.ni_intval); iwm_mvm_protect_session(sc, in, duration, min_duration, 500); IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: waiting for auth_prot\n", __func__); while (sc->sc_auth_prot != 2) { /* * well, meh, but if the kernel is sleeping for half a * second, we have bigger problems */ if (sc->sc_auth_prot == 0) { device_printf(sc->sc_dev, "%s: missed auth window!\n", __func__); error = ETIMEDOUT; goto out; } else if (sc->sc_auth_prot == -1) { device_printf(sc->sc_dev, "%s: no time event, denied!\n", __func__); sc->sc_auth_prot = 0; error = EAUTH; goto out; } msleep(&sc->sc_auth_prot, &sc->sc_mtx, 0, "iwmau2", 0); } IWM_DPRINTF(sc, IWM_DEBUG_RESET, "<-%s\n", __func__); error = 0; out: ieee80211_free_node(ni); return (error); } static int iwm_assoc(struct ieee80211vap *vap, struct iwm_softc *sc) { - struct iwm_node *in = (struct iwm_node *)vap->iv_bss; + struct iwm_node *in = IWM_NODE(vap->iv_bss); int error; if ((error = iwm_mvm_update_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "%s: failed to update STA\n", __func__); return error; } in->in_assoc = 1; if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) { device_printf(sc->sc_dev, "%s: failed to update MAC\n", __func__); return error; } return 0; } static int iwm_release(struct iwm_softc *sc, struct iwm_node *in) { /* * Ok, so *technically* the proper set of calls for going * from RUN back to SCAN is: * * iwm_mvm_power_mac_disable(sc, in); * iwm_mvm_mac_ctxt_changed(sc, in); * iwm_mvm_rm_sta(sc, in); * iwm_mvm_update_quotas(sc, NULL); * iwm_mvm_mac_ctxt_changed(sc, in); * iwm_mvm_binding_remove_vif(sc, in); * iwm_mvm_mac_ctxt_remove(sc, in); * * However, that freezes the device not matter which permutations * and modifications are attempted. Obviously, this driver is missing * something since it works in the Linux driver, but figuring out what * is missing is a little more complicated. Now, since we're going * back to nothing anyway, we'll just do a complete device reset. * Up your's, device! */ //iwm_mvm_flush_tx_path(sc, 0xf, 1); iwm_stop_device(sc); iwm_init_hw(sc); if (in) in->in_assoc = 0; return 0; #if 0 int error; iwm_mvm_power_mac_disable(sc, in); if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) { device_printf(sc->sc_dev, "mac ctxt change fail 1 %d\n", error); return error; } if ((error = iwm_mvm_rm_sta(sc, in)) != 0) { device_printf(sc->sc_dev, "sta remove fail %d\n", error); return error; } error = iwm_mvm_rm_sta(sc, in); in->in_assoc = 0; iwm_mvm_update_quotas(sc, NULL); if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) { device_printf(sc->sc_dev, "mac ctxt change fail 2 %d\n", error); return error; } iwm_mvm_binding_remove_vif(sc, in); iwm_mvm_mac_ctxt_remove(sc, in); return error; #endif } static struct ieee80211_node * iwm_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { return malloc(sizeof (struct iwm_node), M_80211_NODE, M_NOWAIT | M_ZERO); } static void iwm_setrates(struct iwm_softc *sc, struct iwm_node *in) { struct ieee80211_node *ni = &in->in_ni; struct iwm_lq_cmd *lq = &in->in_lq; int nrates = ni->ni_rates.rs_nrates; int i, ridx, tab = 0; int txant = 0; if (nrates > nitems(lq->rs_table)) { device_printf(sc->sc_dev, "%s: node supports %d rates, driver handles " "only %zu\n", __func__, nrates, nitems(lq->rs_table)); return; } /* * XXX .. and most of iwm_node is not initialised explicitly; * it's all just 0x0 passed to the firmware. */ /* first figure out which rates we should support */ /* XXX TODO: this isn't 11n aware /at all/ */ memset(&in->in_ridx, -1, sizeof(in->in_ridx)); IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: nrates=%d\n", __func__, nrates); for (i = 0; i < nrates; i++) { int rate = ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL; /* Map 802.11 rate to HW rate index. */ for (ridx = 0; ridx <= IWM_RIDX_MAX; ridx++) if (iwm_rates[ridx].rate == rate) break; if (ridx > IWM_RIDX_MAX) { device_printf(sc->sc_dev, "%s: WARNING: device rate for %d not found!\n", __func__, rate); } else { IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: rate: i: %d, rate=%d, ridx=%d\n", __func__, i, rate, ridx); in->in_ridx[i] = ridx; } } /* then construct a lq_cmd based on those */ memset(lq, 0, sizeof(*lq)); lq->sta_id = IWM_STATION_ID; /* * are these used? (we don't do SISO or MIMO) * need to set them to non-zero, though, or we get an error. */ lq->single_stream_ant_msk = 1; lq->dual_stream_ant_msk = 1; /* * Build the actual rate selection table. * The lowest bits are the rates. Additionally, * CCK needs bit 9 to be set. The rest of the bits * we add to the table select the tx antenna * Note that we add the rates in the highest rate first * (opposite of ni_rates). */ /* * XXX TODO: this should be looping over the min of nrates * and LQ_MAX_RETRY_NUM. Sigh. */ for (i = 0; i < nrates; i++) { int nextant; if (txant == 0) txant = IWM_FW_VALID_TX_ANT(sc); nextant = 1<<(ffs(txant)-1); txant &= ~nextant; /* * Map the rate id into a rate index into * our hardware table containing the * configuration to use for this rate. */ ridx = in->in_ridx[(nrates-1)-i]; tab = iwm_rates[ridx].plcp; tab |= nextant << IWM_RATE_MCS_ANT_POS; if (IWM_RIDX_IS_CCK(ridx)) tab |= IWM_RATE_MCS_CCK_MSK; IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "station rate i=%d, rate=%d, hw=%x\n", i, iwm_rates[ridx].rate, tab); lq->rs_table[i] = htole32(tab); } /* then fill the rest with the lowest possible rate */ for (i = nrates; i < nitems(lq->rs_table); i++) { KASSERT(tab != 0, ("invalid tab")); lq->rs_table[i] = htole32(tab); } } static int iwm_media_change(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; struct iwm_softc *sc = ic->ic_softc; int error; error = ieee80211_media_change(ifp); if (error != ENETRESET) return error; IWM_LOCK(sc); if (ic->ic_nrunning > 0) { iwm_stop(sc); iwm_init(sc); } IWM_UNLOCK(sc); return error; } static int iwm_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct iwm_vap *ivp = IWM_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct iwm_softc *sc = ic->ic_softc; struct iwm_node *in; int error; IWM_DPRINTF(sc, IWM_DEBUG_STATE, "switching state %s -> %s\n", ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate]); IEEE80211_UNLOCK(ic); IWM_LOCK(sc); /* disable beacon filtering if we're hopping out of RUN */ if (vap->iv_state == IEEE80211_S_RUN && nstate != vap->iv_state) { iwm_mvm_disable_beacon_filter(sc); - if (((in = (void *)vap->iv_bss) != NULL)) + if (((in = IWM_NODE(vap->iv_bss)) != NULL)) in->in_assoc = 0; iwm_release(sc, NULL); /* * It's impossible to directly go RUN->SCAN. If we iwm_release() * above then the card will be completely reinitialized, * so the driver must do everything necessary to bring the card * from INIT to SCAN. * * Additionally, upon receiving deauth frame from AP, * OpenBSD 802.11 stack puts the driver in IEEE80211_S_AUTH * state. This will also fail with this driver, so bring the FSM * from IEEE80211_S_RUN to IEEE80211_S_SCAN in this case as well. * * XXX TODO: fix this for FreeBSD! */ if (nstate == IEEE80211_S_SCAN || nstate == IEEE80211_S_AUTH || nstate == IEEE80211_S_ASSOC) { IWM_DPRINTF(sc, IWM_DEBUG_STATE, "Force transition to INIT; MGT=%d\n", arg); IWM_UNLOCK(sc); IEEE80211_LOCK(ic); vap->iv_newstate(vap, IEEE80211_S_INIT, arg); IWM_DPRINTF(sc, IWM_DEBUG_STATE, "Going INIT->SCAN\n"); nstate = IEEE80211_S_SCAN; IEEE80211_UNLOCK(ic); IWM_LOCK(sc); } } switch (nstate) { case IEEE80211_S_INIT: sc->sc_scanband = 0; break; case IEEE80211_S_AUTH: if ((error = iwm_auth(vap, sc)) != 0) { device_printf(sc->sc_dev, "%s: could not move to auth state: %d\n", __func__, error); break; } break; case IEEE80211_S_ASSOC: if ((error = iwm_assoc(vap, sc)) != 0) { device_printf(sc->sc_dev, "%s: failed to associate: %d\n", __func__, error); break; } break; case IEEE80211_S_RUN: { struct iwm_host_cmd cmd = { .id = IWM_LQ_CMD, .len = { sizeof(in->in_lq), }, .flags = IWM_CMD_SYNC, }; /* Update the association state, now we have it all */ /* (eg associd comes in at this point */ error = iwm_assoc(vap, sc); if (error != 0) { device_printf(sc->sc_dev, "%s: failed to update association state: %d\n", __func__, error); break; } - in = (struct iwm_node *)vap->iv_bss; + in = IWM_NODE(vap->iv_bss); iwm_mvm_power_mac_update_mode(sc, in); iwm_mvm_enable_beacon_filter(sc, in); iwm_mvm_update_quotas(sc, in); iwm_setrates(sc, in); cmd.data[0] = &in->in_lq; if ((error = iwm_send_cmd(sc, &cmd)) != 0) { device_printf(sc->sc_dev, "%s: IWM_LQ_CMD failed\n", __func__); } break; } default: break; } IWM_UNLOCK(sc); IEEE80211_LOCK(ic); return (ivp->iv_newstate(vap, nstate, arg)); } void iwm_endscan_cb(void *arg, int pending) { struct iwm_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; int done; int error; IWM_DPRINTF(sc, IWM_DEBUG_SCAN | IWM_DEBUG_TRACE, "%s: scan ended\n", __func__); IWM_LOCK(sc); if (sc->sc_scanband == IEEE80211_CHAN_2GHZ && sc->sc_nvm.sku_cap_band_52GHz_enable) { done = 0; if ((error = iwm_mvm_scan_request(sc, IEEE80211_CHAN_5GHZ, 0, NULL, 0)) != 0) { device_printf(sc->sc_dev, "could not initiate scan\n"); done = 1; } } else { done = 1; } if (done) { IWM_UNLOCK(sc); ieee80211_scan_done(TAILQ_FIRST(&ic->ic_vaps)); IWM_LOCK(sc); sc->sc_scanband = 0; } IWM_UNLOCK(sc); } static int iwm_init_hw(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; int error, i, qid; if ((error = iwm_start_hw(sc)) != 0) return error; if ((error = iwm_run_init_mvm_ucode(sc, 0)) != 0) { return error; } /* * should stop and start HW since that INIT * image just loaded */ iwm_stop_device(sc); if ((error = iwm_start_hw(sc)) != 0) { device_printf(sc->sc_dev, "could not initialize hardware\n"); return error; } /* omstart, this time with the regular firmware */ error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_REGULAR); if (error) { device_printf(sc->sc_dev, "could not load firmware\n"); goto error; } if ((error = iwm_send_tx_ant_cfg(sc, IWM_FW_VALID_TX_ANT(sc))) != 0) goto error; /* Send phy db control command and then phy db calibration*/ if ((error = iwm_send_phy_db_data(sc)) != 0) goto error; if ((error = iwm_send_phy_cfg_cmd(sc)) != 0) goto error; /* Add auxiliary station for scanning */ if ((error = iwm_mvm_add_aux_sta(sc)) != 0) goto error; for (i = 0; i < IWM_NUM_PHY_CTX; i++) { /* * The channel used here isn't relevant as it's * going to be overwritten in the other flows. * For now use the first channel we have. */ if ((error = iwm_mvm_phy_ctxt_add(sc, &sc->sc_phyctxt[i], &ic->ic_channels[1], 1, 1)) != 0) goto error; } error = iwm_mvm_power_update_device(sc); if (error) goto error; /* Mark TX rings as active. */ for (qid = 0; qid < 4; qid++) { iwm_enable_txq(sc, qid, qid); } return 0; error: iwm_stop_device(sc); return error; } /* Allow multicast from our BSSID. */ static int iwm_allow_mcast(struct ieee80211vap *vap, struct iwm_softc *sc) { struct ieee80211_node *ni = vap->iv_bss; struct iwm_mcast_filter_cmd *cmd; size_t size; int error; size = roundup(sizeof(*cmd), 4); cmd = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (cmd == NULL) return ENOMEM; cmd->filter_own = 1; cmd->port_id = 0; cmd->count = 0; cmd->pass_all = 1; IEEE80211_ADDR_COPY(cmd->bssid, ni->ni_bssid); error = iwm_mvm_send_cmd_pdu(sc, IWM_MCAST_FILTER_CMD, IWM_CMD_SYNC, size, cmd); free(cmd, M_DEVBUF); return (error); } static void iwm_init(struct iwm_softc *sc) { int error; if (sc->sc_flags & IWM_FLAG_HW_INITED) { return; } sc->sc_generation++; sc->sc_flags &= ~IWM_FLAG_STOPPED; if ((error = iwm_init_hw(sc)) != 0) { iwm_stop(sc); return; } /* * Ok, firmware loaded and we are jogging */ sc->sc_flags |= IWM_FLAG_HW_INITED; callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc); } static int iwm_transmit(struct ieee80211com *ic, struct mbuf *m) { struct iwm_softc *sc; int error; sc = ic->ic_softc; IWM_LOCK(sc); if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) { IWM_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { IWM_UNLOCK(sc); return (error); } iwm_start(sc); IWM_UNLOCK(sc); return (0); } /* * Dequeue packets from sendq and call send. */ static void iwm_start(struct iwm_softc *sc) { struct ieee80211_node *ni; struct mbuf *m; int ac = 0; IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "->%s\n", __func__); while (sc->qfullmsk == 0 && (m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; if (iwm_tx(sc, m, ni, ac) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); continue; } sc->sc_tx_timer = 15; } IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "<-%s\n", __func__); } static void iwm_stop(struct iwm_softc *sc) { sc->sc_flags &= ~IWM_FLAG_HW_INITED; sc->sc_flags |= IWM_FLAG_STOPPED; sc->sc_generation++; sc->sc_scanband = 0; sc->sc_auth_prot = 0; sc->sc_tx_timer = 0; iwm_stop_device(sc); } static void iwm_watchdog(void *arg) { struct iwm_softc *sc = arg; if (sc->sc_tx_timer > 0) { if (--sc->sc_tx_timer == 0) { device_printf(sc->sc_dev, "device timeout\n"); #ifdef IWM_DEBUG iwm_nic_error(sc); #endif iwm_stop(sc); counter_u64_add(sc->sc_ic.ic_oerrors, 1); return; } } callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc); } static void iwm_parent(struct ieee80211com *ic) { struct iwm_softc *sc = ic->ic_softc; int startall = 0; IWM_LOCK(sc); if (ic->ic_nrunning > 0) { if (!(sc->sc_flags & IWM_FLAG_HW_INITED)) { iwm_init(sc); startall = 1; } } else if (sc->sc_flags & IWM_FLAG_HW_INITED) iwm_stop(sc); IWM_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } /* * The interrupt side of things */ /* * error dumping routines are from iwlwifi/mvm/utils.c */ /* * Note: This structure is read from the device with IO accesses, * and the reading already does the endian conversion. As it is * read with uint32_t-sized accesses, any members with a different size * need to be ordered correctly though! */ struct iwm_error_event_table { uint32_t valid; /* (nonzero) valid, (0) log is empty */ uint32_t error_id; /* type of error */ uint32_t pc; /* program counter */ uint32_t blink1; /* branch link */ uint32_t blink2; /* branch link */ uint32_t ilink1; /* interrupt link */ uint32_t ilink2; /* interrupt link */ uint32_t data1; /* error-specific data */ uint32_t data2; /* error-specific data */ uint32_t data3; /* error-specific data */ uint32_t bcon_time; /* beacon timer */ uint32_t tsf_low; /* network timestamp function timer */ uint32_t tsf_hi; /* network timestamp function timer */ uint32_t gp1; /* GP1 timer register */ uint32_t gp2; /* GP2 timer register */ uint32_t gp3; /* GP3 timer register */ uint32_t ucode_ver; /* uCode version */ uint32_t hw_ver; /* HW Silicon version */ uint32_t brd_ver; /* HW board version */ uint32_t log_pc; /* log program counter */ uint32_t frame_ptr; /* frame pointer */ uint32_t stack_ptr; /* stack pointer */ uint32_t hcmd; /* last host command header */ uint32_t isr0; /* isr status register LMPM_NIC_ISR0: * rxtx_flag */ uint32_t isr1; /* isr status register LMPM_NIC_ISR1: * host_flag */ uint32_t isr2; /* isr status register LMPM_NIC_ISR2: * enc_flag */ uint32_t isr3; /* isr status register LMPM_NIC_ISR3: * time_flag */ uint32_t isr4; /* isr status register LMPM_NIC_ISR4: * wico interrupt */ uint32_t isr_pref; /* isr status register LMPM_NIC_PREF_STAT */ uint32_t wait_event; /* wait event() caller address */ uint32_t l2p_control; /* L2pControlField */ uint32_t l2p_duration; /* L2pDurationField */ uint32_t l2p_mhvalid; /* L2pMhValidBits */ uint32_t l2p_addr_match; /* L2pAddrMatchStat */ uint32_t lmpm_pmg_sel; /* indicate which clocks are turned on * (LMPM_PMG_SEL) */ uint32_t u_timestamp; /* indicate when the date and time of the * compilation */ uint32_t flow_handler; /* FH read/write pointers, RX credit */ } __packed; #define ERROR_START_OFFSET (1 * sizeof(uint32_t)) #define ERROR_ELEM_SIZE (7 * sizeof(uint32_t)) #ifdef IWM_DEBUG struct { const char *name; uint8_t num; } advanced_lookup[] = { { "NMI_INTERRUPT_WDG", 0x34 }, { "SYSASSERT", 0x35 }, { "UCODE_VERSION_MISMATCH", 0x37 }, { "BAD_COMMAND", 0x38 }, { "NMI_INTERRUPT_DATA_ACTION_PT", 0x3C }, { "FATAL_ERROR", 0x3D }, { "NMI_TRM_HW_ERR", 0x46 }, { "NMI_INTERRUPT_TRM", 0x4C }, { "NMI_INTERRUPT_BREAK_POINT", 0x54 }, { "NMI_INTERRUPT_WDG_RXF_FULL", 0x5C }, { "NMI_INTERRUPT_WDG_NO_RBD_RXF_FULL", 0x64 }, { "NMI_INTERRUPT_HOST", 0x66 }, { "NMI_INTERRUPT_ACTION_PT", 0x7C }, { "NMI_INTERRUPT_UNKNOWN", 0x84 }, { "NMI_INTERRUPT_INST_ACTION_PT", 0x86 }, { "ADVANCED_SYSASSERT", 0 }, }; static const char * iwm_desc_lookup(uint32_t num) { int i; for (i = 0; i < nitems(advanced_lookup) - 1; i++) if (advanced_lookup[i].num == num) return advanced_lookup[i].name; /* No entry matches 'num', so it is the last: ADVANCED_SYSASSERT */ return advanced_lookup[i].name; } /* * Support for dumping the error log seemed like a good idea ... * but it's mostly hex junk and the only sensible thing is the * hw/ucode revision (which we know anyway). Since it's here, * I'll just leave it in, just in case e.g. the Intel guys want to * help us decipher some "ADVANCED_SYSASSERT" later. */ static void iwm_nic_error(struct iwm_softc *sc) { struct iwm_error_event_table table; uint32_t base; device_printf(sc->sc_dev, "dumping device error log\n"); base = sc->sc_uc.uc_error_event_table; if (base < 0x800000 || base >= 0x80C000) { device_printf(sc->sc_dev, "Not valid error log pointer 0x%08x\n", base); return; } if (iwm_read_mem(sc, base, &table, sizeof(table)/sizeof(uint32_t)) != 0) { device_printf(sc->sc_dev, "reading errlog failed\n"); return; } if (!table.valid) { device_printf(sc->sc_dev, "errlog not found, skipping\n"); return; } if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) { device_printf(sc->sc_dev, "Start IWL Error Log Dump:\n"); device_printf(sc->sc_dev, "Status: 0x%x, count: %d\n", sc->sc_flags, table.valid); } device_printf(sc->sc_dev, "0x%08X | %-28s\n", table.error_id, iwm_desc_lookup(table.error_id)); device_printf(sc->sc_dev, "%08X | uPc\n", table.pc); device_printf(sc->sc_dev, "%08X | branchlink1\n", table.blink1); device_printf(sc->sc_dev, "%08X | branchlink2\n", table.blink2); device_printf(sc->sc_dev, "%08X | interruptlink1\n", table.ilink1); device_printf(sc->sc_dev, "%08X | interruptlink2\n", table.ilink2); device_printf(sc->sc_dev, "%08X | data1\n", table.data1); device_printf(sc->sc_dev, "%08X | data2\n", table.data2); device_printf(sc->sc_dev, "%08X | data3\n", table.data3); device_printf(sc->sc_dev, "%08X | beacon time\n", table.bcon_time); device_printf(sc->sc_dev, "%08X | tsf low\n", table.tsf_low); device_printf(sc->sc_dev, "%08X | tsf hi\n", table.tsf_hi); device_printf(sc->sc_dev, "%08X | time gp1\n", table.gp1); device_printf(sc->sc_dev, "%08X | time gp2\n", table.gp2); device_printf(sc->sc_dev, "%08X | time gp3\n", table.gp3); device_printf(sc->sc_dev, "%08X | uCode version\n", table.ucode_ver); device_printf(sc->sc_dev, "%08X | hw version\n", table.hw_ver); device_printf(sc->sc_dev, "%08X | board version\n", table.brd_ver); device_printf(sc->sc_dev, "%08X | hcmd\n", table.hcmd); device_printf(sc->sc_dev, "%08X | isr0\n", table.isr0); device_printf(sc->sc_dev, "%08X | isr1\n", table.isr1); device_printf(sc->sc_dev, "%08X | isr2\n", table.isr2); device_printf(sc->sc_dev, "%08X | isr3\n", table.isr3); device_printf(sc->sc_dev, "%08X | isr4\n", table.isr4); device_printf(sc->sc_dev, "%08X | isr_pref\n", table.isr_pref); device_printf(sc->sc_dev, "%08X | wait_event\n", table.wait_event); device_printf(sc->sc_dev, "%08X | l2p_control\n", table.l2p_control); device_printf(sc->sc_dev, "%08X | l2p_duration\n", table.l2p_duration); device_printf(sc->sc_dev, "%08X | l2p_mhvalid\n", table.l2p_mhvalid); device_printf(sc->sc_dev, "%08X | l2p_addr_match\n", table.l2p_addr_match); device_printf(sc->sc_dev, "%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel); device_printf(sc->sc_dev, "%08X | timestamp\n", table.u_timestamp); device_printf(sc->sc_dev, "%08X | flow_handler\n", table.flow_handler); } #endif #define SYNC_RESP_STRUCT(_var_, _pkt_) \ do { \ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\ _var_ = (void *)((_pkt_)+1); \ } while (/*CONSTCOND*/0) #define SYNC_RESP_PTR(_ptr_, _len_, _pkt_) \ do { \ bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\ _ptr_ = (void *)((_pkt_)+1); \ } while (/*CONSTCOND*/0) #define ADVANCE_RXQ(sc) (sc->rxq.cur = (sc->rxq.cur + 1) % IWM_RX_RING_COUNT); /* * Process an IWM_CSR_INT_BIT_FH_RX or IWM_CSR_INT_BIT_SW_RX interrupt. * Basic structure from if_iwn */ static void iwm_notif_intr(struct iwm_softc *sc) { uint16_t hw; bus_dmamap_sync(sc->rxq.stat_dma.tag, sc->rxq.stat_dma.map, BUS_DMASYNC_POSTREAD); hw = le16toh(sc->rxq.stat->closed_rb_num) & 0xfff; while (sc->rxq.cur != hw) { struct iwm_rx_ring *ring = &sc->rxq; struct iwm_rx_data *data = &sc->rxq.data[sc->rxq.cur]; struct iwm_rx_packet *pkt; struct iwm_cmd_response *cresp; int qid, idx; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); pkt = mtod(data->m, struct iwm_rx_packet *); qid = pkt->hdr.qid & ~0x80; idx = pkt->hdr.idx; IWM_DPRINTF(sc, IWM_DEBUG_INTR, "rx packet qid=%d idx=%d flags=%x type=%x %d %d\n", pkt->hdr.qid & ~0x80, pkt->hdr.idx, pkt->hdr.flags, pkt->hdr.code, sc->rxq.cur, hw); /* * randomly get these from the firmware, no idea why. * they at least seem harmless, so just ignore them for now */ if (__predict_false((pkt->hdr.code == 0 && qid == 0 && idx == 0) || pkt->len_n_flags == htole32(0x55550000))) { ADVANCE_RXQ(sc); continue; } switch (pkt->hdr.code) { case IWM_REPLY_RX_PHY_CMD: iwm_mvm_rx_rx_phy_cmd(sc, pkt, data); break; case IWM_REPLY_RX_MPDU_CMD: iwm_mvm_rx_rx_mpdu(sc, pkt, data); break; case IWM_TX_CMD: iwm_mvm_rx_tx_cmd(sc, pkt, data); break; case IWM_MISSED_BEACONS_NOTIFICATION: { struct iwm_missed_beacons_notif *resp; int missed; /* XXX look at mac_id to determine interface ID */ struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); SYNC_RESP_STRUCT(resp, pkt); missed = le32toh(resp->consec_missed_beacons); IWM_DPRINTF(sc, IWM_DEBUG_BEACON | IWM_DEBUG_STATE, "%s: MISSED_BEACON: mac_id=%d, " "consec_since_last_rx=%d, consec=%d, num_expect=%d " "num_rx=%d\n", __func__, le32toh(resp->mac_id), le32toh(resp->consec_missed_beacons_since_last_rx), le32toh(resp->consec_missed_beacons), le32toh(resp->num_expected_beacons), le32toh(resp->num_recvd_beacons)); /* Be paranoid */ if (vap == NULL) break; /* XXX no net80211 locking? */ if (vap->iv_state == IEEE80211_S_RUN && (ic->ic_flags & IEEE80211_F_SCAN) == 0) { if (missed > vap->iv_bmissthreshold) { /* XXX bad locking; turn into task */ IWM_UNLOCK(sc); ieee80211_beacon_miss(ic); IWM_LOCK(sc); } } break; } case IWM_MVM_ALIVE: { struct iwm_mvm_alive_resp *resp; SYNC_RESP_STRUCT(resp, pkt); sc->sc_uc.uc_error_event_table = le32toh(resp->error_event_table_ptr); sc->sc_uc.uc_log_event_table = le32toh(resp->log_event_table_ptr); sc->sched_base = le32toh(resp->scd_base_ptr); sc->sc_uc.uc_ok = resp->status == IWM_ALIVE_STATUS_OK; sc->sc_uc.uc_intr = 1; wakeup(&sc->sc_uc); break; } case IWM_CALIB_RES_NOTIF_PHY_DB: { struct iwm_calib_res_notif_phy_db *phy_db_notif; SYNC_RESP_STRUCT(phy_db_notif, pkt); iwm_phy_db_set_section(sc, phy_db_notif); break; } case IWM_STATISTICS_NOTIFICATION: { struct iwm_notif_statistics *stats; SYNC_RESP_STRUCT(stats, pkt); memcpy(&sc->sc_stats, stats, sizeof(sc->sc_stats)); sc->sc_noise = iwm_get_noise(&stats->rx.general); break; } case IWM_NVM_ACCESS_CMD: if (sc->sc_wantresp == ((qid << 16) | idx)) { bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); memcpy(sc->sc_cmd_resp, pkt, sizeof(sc->sc_cmd_resp)); } break; case IWM_PHY_CONFIGURATION_CMD: case IWM_TX_ANT_CONFIGURATION_CMD: case IWM_ADD_STA: case IWM_MAC_CONTEXT_CMD: case IWM_REPLY_SF_CFG_CMD: case IWM_POWER_TABLE_CMD: case IWM_PHY_CONTEXT_CMD: case IWM_BINDING_CONTEXT_CMD: case IWM_TIME_EVENT_CMD: case IWM_SCAN_REQUEST_CMD: case IWM_REPLY_BEACON_FILTERING_CMD: case IWM_MAC_PM_POWER_TABLE: case IWM_TIME_QUOTA_CMD: case IWM_REMOVE_STA: case IWM_TXPATH_FLUSH: case IWM_LQ_CMD: SYNC_RESP_STRUCT(cresp, pkt); if (sc->sc_wantresp == ((qid << 16) | idx)) { memcpy(sc->sc_cmd_resp, pkt, sizeof(*pkt)+sizeof(*cresp)); } break; /* ignore */ case 0x6c: /* IWM_PHY_DB_CMD, no idea why it's not in fw-api.h */ break; case IWM_INIT_COMPLETE_NOTIF: sc->sc_init_complete = 1; wakeup(&sc->sc_init_complete); break; case IWM_SCAN_COMPLETE_NOTIFICATION: { struct iwm_scan_complete_notif *notif; SYNC_RESP_STRUCT(notif, pkt); taskqueue_enqueue(sc->sc_tq, &sc->sc_es_task); break; } case IWM_REPLY_ERROR: { struct iwm_error_resp *resp; SYNC_RESP_STRUCT(resp, pkt); device_printf(sc->sc_dev, "firmware error 0x%x, cmd 0x%x\n", le32toh(resp->error_type), resp->cmd_id); break; } case IWM_TIME_EVENT_NOTIFICATION: { struct iwm_time_event_notif *notif; SYNC_RESP_STRUCT(notif, pkt); if (notif->status) { if (le32toh(notif->action) & IWM_TE_V2_NOTIF_HOST_EVENT_START) sc->sc_auth_prot = 2; else sc->sc_auth_prot = 0; } else { sc->sc_auth_prot = -1; } IWM_DPRINTF(sc, IWM_DEBUG_INTR, "%s: time event notification auth_prot=%d\n", __func__, sc->sc_auth_prot); wakeup(&sc->sc_auth_prot); break; } case IWM_MCAST_FILTER_CMD: break; default: device_printf(sc->sc_dev, "frame %d/%d %x UNHANDLED (this should " "not happen)\n", qid, idx, pkt->len_n_flags); break; } /* * Why test bit 0x80? The Linux driver: * * There is one exception: uCode sets bit 15 when it * originates the response/notification, i.e. when the * response/notification is not a direct response to a * command sent by the driver. For example, uCode issues * IWM_REPLY_RX when it sends a received frame to the driver; * it is not a direct response to any driver command. * * Ok, so since when is 7 == 15? Well, the Linux driver * uses a slightly different format for pkt->hdr, and "qid" * is actually the upper byte of a two-byte field. */ if (!(pkt->hdr.qid & (1 << 7))) { iwm_cmd_done(sc, pkt); } ADVANCE_RXQ(sc); } IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL, IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ); /* * Tell the firmware what we have processed. * Seems like the hardware gets upset unless we align * the write by 8?? */ hw = (hw == 0) ? IWM_RX_RING_COUNT - 1 : hw - 1; IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, hw & ~7); } static void iwm_intr(void *arg) { struct iwm_softc *sc = arg; int handled = 0; int r1, r2, rv = 0; int isperiodic = 0; IWM_LOCK(sc); IWM_WRITE(sc, IWM_CSR_INT_MASK, 0); if (sc->sc_flags & IWM_FLAG_USE_ICT) { uint32_t *ict = sc->ict_dma.vaddr; int tmp; tmp = htole32(ict[sc->ict_cur]); if (!tmp) goto out_ena; /* * ok, there was something. keep plowing until we have all. */ r1 = r2 = 0; while (tmp) { r1 |= tmp; ict[sc->ict_cur] = 0; sc->ict_cur = (sc->ict_cur+1) % IWM_ICT_COUNT; tmp = htole32(ict[sc->ict_cur]); } /* this is where the fun begins. don't ask */ if (r1 == 0xffffffff) r1 = 0; /* i am not expected to understand this */ if (r1 & 0xc0000) r1 |= 0x8000; r1 = (0xff & r1) | ((0xff00 & r1) << 16); } else { r1 = IWM_READ(sc, IWM_CSR_INT); /* "hardware gone" (where, fishing?) */ if (r1 == 0xffffffff || (r1 & 0xfffffff0) == 0xa5a5a5a0) goto out; r2 = IWM_READ(sc, IWM_CSR_FH_INT_STATUS); } if (r1 == 0 && r2 == 0) { goto out_ena; } IWM_WRITE(sc, IWM_CSR_INT, r1 | ~sc->sc_intmask); /* ignored */ handled |= (r1 & (IWM_CSR_INT_BIT_ALIVE /*| IWM_CSR_INT_BIT_SCD*/)); if (r1 & IWM_CSR_INT_BIT_SW_ERR) { #ifdef IWM_DEBUG int i; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); iwm_nic_error(sc); /* Dump driver status (TX and RX rings) while we're here. */ device_printf(sc->sc_dev, "driver status:\n"); for (i = 0; i < IWM_MVM_MAX_QUEUES; i++) { struct iwm_tx_ring *ring = &sc->txq[i]; device_printf(sc->sc_dev, " tx ring %2d: qid=%-2d cur=%-3d " "queued=%-3d\n", i, ring->qid, ring->cur, ring->queued); } device_printf(sc->sc_dev, " rx ring: cur=%d\n", sc->rxq.cur); device_printf(sc->sc_dev, " 802.11 state %d\n", vap->iv_state); #endif device_printf(sc->sc_dev, "fatal firmware error\n"); iwm_stop(sc); rv = 1; goto out; } if (r1 & IWM_CSR_INT_BIT_HW_ERR) { handled |= IWM_CSR_INT_BIT_HW_ERR; device_printf(sc->sc_dev, "hardware error, stopping device\n"); iwm_stop(sc); rv = 1; goto out; } /* firmware chunk loaded */ if (r1 & IWM_CSR_INT_BIT_FH_TX) { IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_TX_MASK); handled |= IWM_CSR_INT_BIT_FH_TX; sc->sc_fw_chunk_done = 1; wakeup(&sc->sc_fw); } if (r1 & IWM_CSR_INT_BIT_RF_KILL) { handled |= IWM_CSR_INT_BIT_RF_KILL; if (iwm_check_rfkill(sc)) { device_printf(sc->sc_dev, "%s: rfkill switch, disabling interface\n", __func__); iwm_stop(sc); } } /* * The Linux driver uses periodic interrupts to avoid races. * We cargo-cult like it's going out of fashion. */ if (r1 & IWM_CSR_INT_BIT_RX_PERIODIC) { handled |= IWM_CSR_INT_BIT_RX_PERIODIC; IWM_WRITE(sc, IWM_CSR_INT, IWM_CSR_INT_BIT_RX_PERIODIC); if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) == 0) IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_DIS); isperiodic = 1; } if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) || isperiodic) { handled |= (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX); IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_RX_MASK); iwm_notif_intr(sc); /* enable periodic interrupt, see above */ if (r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX) && !isperiodic) IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_ENA); } if (__predict_false(r1 & ~handled)) IWM_DPRINTF(sc, IWM_DEBUG_INTR, "%s: unhandled interrupts: %x\n", __func__, r1); rv = 1; out_ena: iwm_restore_interrupts(sc); out: IWM_UNLOCK(sc); return; } /* * Autoconf glue-sniffing */ #define PCI_VENDOR_INTEL 0x8086 #define PCI_PRODUCT_INTEL_WL_3160_1 0x08b3 #define PCI_PRODUCT_INTEL_WL_3160_2 0x08b4 #define PCI_PRODUCT_INTEL_WL_7260_1 0x08b1 #define PCI_PRODUCT_INTEL_WL_7260_2 0x08b2 #define PCI_PRODUCT_INTEL_WL_7265_1 0x095a #define PCI_PRODUCT_INTEL_WL_7265_2 0x095b static const struct iwm_devices { uint16_t device; const char *name; } iwm_devices[] = { { PCI_PRODUCT_INTEL_WL_3160_1, "Intel Dual Band Wireless AC 3160" }, { PCI_PRODUCT_INTEL_WL_3160_2, "Intel Dual Band Wireless AC 3160" }, { PCI_PRODUCT_INTEL_WL_7260_1, "Intel Dual Band Wireless AC 7260" }, { PCI_PRODUCT_INTEL_WL_7260_2, "Intel Dual Band Wireless AC 7260" }, { PCI_PRODUCT_INTEL_WL_7265_1, "Intel Dual Band Wireless AC 7265" }, { PCI_PRODUCT_INTEL_WL_7265_2, "Intel Dual Band Wireless AC 7265" }, }; static int iwm_probe(device_t dev) { int i; for (i = 0; i < nitems(iwm_devices); i++) if (pci_get_vendor(dev) == PCI_VENDOR_INTEL && pci_get_device(dev) == iwm_devices[i].device) { device_set_desc(dev, iwm_devices[i].name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int iwm_dev_check(device_t dev) { struct iwm_softc *sc; sc = device_get_softc(dev); switch (pci_get_device(dev)) { case PCI_PRODUCT_INTEL_WL_3160_1: case PCI_PRODUCT_INTEL_WL_3160_2: sc->sc_fwname = "iwm3160fw"; sc->host_interrupt_operation_mode = 1; return (0); case PCI_PRODUCT_INTEL_WL_7260_1: case PCI_PRODUCT_INTEL_WL_7260_2: sc->sc_fwname = "iwm7260fw"; sc->host_interrupt_operation_mode = 1; return (0); case PCI_PRODUCT_INTEL_WL_7265_1: case PCI_PRODUCT_INTEL_WL_7265_2: sc->sc_fwname = "iwm7265fw"; sc->host_interrupt_operation_mode = 0; return (0); default: device_printf(dev, "unknown adapter type\n"); return ENXIO; } } static int iwm_pci_attach(device_t dev) { struct iwm_softc *sc; int count, error, rid; uint16_t reg; sc = device_get_softc(dev); /* Clear device-specific "PCI retry timeout" register (41h). */ reg = pci_read_config(dev, 0x40, sizeof(reg)); pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg)); /* Enable bus-mastering and hardware bug workaround. */ pci_enable_busmaster(dev); reg = pci_read_config(dev, PCIR_STATUS, sizeof(reg)); /* if !MSI */ if (reg & PCIM_STATUS_INTxSTATE) { reg &= ~PCIM_STATUS_INTxSTATE; } pci_write_config(dev, PCIR_STATUS, reg, sizeof(reg)); rid = PCIR_BAR(0); sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_mem == NULL) { device_printf(sc->sc_dev, "can't map mem space\n"); return (ENXIO); } sc->sc_st = rman_get_bustag(sc->sc_mem); sc->sc_sh = rman_get_bushandle(sc->sc_mem); /* Install interrupt handler. */ count = 1; rid = 0; if (pci_alloc_msi(dev, &count) == 0) rid = 1; sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->sc_irq == NULL) { device_printf(dev, "can't map interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, iwm_intr, sc, &sc->sc_ih); if (sc->sc_ih == NULL) { device_printf(dev, "can't establish interrupt"); return (ENXIO); } sc->sc_dmat = bus_get_dma_tag(sc->sc_dev); return (0); } static void iwm_pci_detach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); if (sc->sc_irq != NULL) { bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->sc_irq), sc->sc_irq); pci_release_msi(dev); } if (sc->sc_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->sc_mem), sc->sc_mem); } static int iwm_attach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; int error; int txq_i, i; sc->sc_dev = dev; - mtx_init(&sc->sc_mtx, "iwm_mtx", MTX_DEF, 0); + IWM_LOCK_INIT(sc); mbufq_init(&sc->sc_snd, ifqmaxlen); callout_init_mtx(&sc->sc_watchdog_to, &sc->sc_mtx, 0); TASK_INIT(&sc->sc_es_task, 0, iwm_endscan_cb, sc); sc->sc_tq = taskqueue_create("iwm_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->sc_tq); error = taskqueue_start_threads(&sc->sc_tq, 1, 0, "iwm_taskq"); if (error != 0) { device_printf(dev, "can't start threads, error %d\n", error); goto fail; } /* PCI attach */ error = iwm_pci_attach(dev); if (error != 0) goto fail; sc->sc_wantresp = -1; /* Check device type */ error = iwm_dev_check(dev); if (error != 0) goto fail; sc->sc_fwdmasegsz = IWM_FWDMASEGSZ; /* * We now start fiddling with the hardware */ sc->sc_hw_rev = IWM_READ(sc, IWM_CSR_HW_REV); if (iwm_prepare_card_hw(sc) != 0) { device_printf(dev, "could not initialize hardware\n"); goto fail; } /* Allocate DMA memory for firmware transfers. */ if ((error = iwm_alloc_fwmem(sc)) != 0) { device_printf(dev, "could not allocate memory for firmware\n"); goto fail; } /* Allocate "Keep Warm" page. */ if ((error = iwm_alloc_kw(sc)) != 0) { device_printf(dev, "could not allocate keep warm page\n"); goto fail; } /* We use ICT interrupts */ if ((error = iwm_alloc_ict(sc)) != 0) { device_printf(dev, "could not allocate ICT table\n"); goto fail; } /* Allocate TX scheduler "rings". */ if ((error = iwm_alloc_sched(sc)) != 0) { device_printf(dev, "could not allocate TX scheduler rings\n"); goto fail; } /* Allocate TX rings */ for (txq_i = 0; txq_i < nitems(sc->txq); txq_i++) { if ((error = iwm_alloc_tx_ring(sc, &sc->txq[txq_i], txq_i)) != 0) { device_printf(dev, "could not allocate TX ring %d\n", txq_i); goto fail; } } /* Allocate RX ring. */ if ((error = iwm_alloc_rx_ring(sc, &sc->rxq)) != 0) { device_printf(dev, "could not allocate RX ring\n"); goto fail; } /* Clear pending interrupts. */ IWM_WRITE(sc, IWM_CSR_INT, 0xffffffff); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ ic->ic_opmode = IEEE80211_M_STA; /* default to BSS mode */ /* Set device capabilities. */ ic->ic_caps = IEEE80211_C_STA | IEEE80211_C_WPA | /* WPA/RSN */ IEEE80211_C_WME | IEEE80211_C_SHSLOT | /* short slot time supported */ IEEE80211_C_SHPREAMBLE /* short preamble supported */ // IEEE80211_C_BGSCAN /* capable of bg scanning */ ; for (i = 0; i < nitems(sc->sc_phyctxt); i++) { sc->sc_phyctxt[i].id = i; sc->sc_phyctxt[i].color = 0; sc->sc_phyctxt[i].ref = 0; sc->sc_phyctxt[i].channel = NULL; } /* Max RSSI */ sc->sc_max_rssi = IWM_MAX_DBM - IWM_MIN_DBM; sc->sc_preinit_hook.ich_func = iwm_preinit; sc->sc_preinit_hook.ich_arg = sc; if (config_intrhook_establish(&sc->sc_preinit_hook) != 0) { device_printf(dev, "config_intrhook_establish failed\n"); goto fail; } #ifdef IWM_DEBUG SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &sc->sc_debug, 0, "control debugging"); #endif IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "<-%s\n", __func__); return 0; /* Free allocated memory if something failed during attachment. */ fail: iwm_detach_local(sc, 0); return ENXIO; } static int iwm_update_edca(struct ieee80211com *ic) { struct iwm_softc *sc = ic->ic_softc; device_printf(sc->sc_dev, "%s: called\n", __func__); return (0); } static void iwm_preinit(void *arg) { struct iwm_softc *sc = arg; device_t dev = sc->sc_dev; struct ieee80211com *ic = &sc->sc_ic; int error; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s\n", __func__); IWM_LOCK(sc); if ((error = iwm_start_hw(sc)) != 0) { device_printf(dev, "could not initialize hardware\n"); IWM_UNLOCK(sc); goto fail; } error = iwm_run_init_mvm_ucode(sc, 1); iwm_stop_device(sc); if (error) { IWM_UNLOCK(sc); goto fail; } device_printf(dev, "revision: 0x%x, firmware %d.%d (API ver. %d)\n", sc->sc_hw_rev & IWM_CSR_HW_REV_TYPE_MSK, IWM_UCODE_MAJOR(sc->sc_fwver), IWM_UCODE_MINOR(sc->sc_fwver), IWM_UCODE_API(sc->sc_fwver)); /* not all hardware can do 5GHz band */ if (!sc->sc_nvm.sku_cap_band_52GHz_enable) memset(&ic->ic_sup_rates[IEEE80211_MODE_11A], 0, sizeof(ic->ic_sup_rates[IEEE80211_MODE_11A])); IWM_UNLOCK(sc); /* * At this point we've committed - if we fail to do setup, * we now also have to tear down the net80211 state. */ ieee80211_ifattach(ic); ic->ic_vap_create = iwm_vap_create; ic->ic_vap_delete = iwm_vap_delete; ic->ic_raw_xmit = iwm_raw_xmit; ic->ic_node_alloc = iwm_node_alloc; ic->ic_scan_start = iwm_scan_start; ic->ic_scan_end = iwm_scan_end; ic->ic_update_mcast = iwm_update_mcast; ic->ic_set_channel = iwm_set_channel; ic->ic_scan_curchan = iwm_scan_curchan; ic->ic_scan_mindwell = iwm_scan_mindwell; ic->ic_wme.wme_update = iwm_update_edca; ic->ic_parent = iwm_parent; ic->ic_transmit = iwm_transmit; iwm_radiotap_attach(sc); if (bootverbose) ieee80211_announce(ic); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "<-%s\n", __func__); config_intrhook_disestablish(&sc->sc_preinit_hook); return; fail: config_intrhook_disestablish(&sc->sc_preinit_hook); iwm_detach_local(sc, 0); } /* * Attach the interface to 802.11 radiotap. */ static void iwm_radiotap_attach(struct iwm_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s begin\n", __func__); ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), IWM_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), IWM_RX_RADIOTAP_PRESENT); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE, "->%s end\n", __func__); } static struct ieee80211vap * iwm_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwm_vap *ivp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; ivp = malloc(sizeof(struct iwm_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &ivp->iv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); vap->iv_bmissthreshold = 10; /* override default */ /* Override with driver methods. */ ivp->iv_newstate = vap->iv_newstate; vap->iv_newstate = iwm_newstate; ieee80211_ratectl_init(vap); /* Complete setup. */ ieee80211_vap_attach(vap, iwm_media_change, ieee80211_media_status, mac); ic->ic_opmode = opmode; return vap; } static void iwm_vap_delete(struct ieee80211vap *vap) { struct iwm_vap *ivp = IWM_VAP(vap); ieee80211_ratectl_deinit(vap); ieee80211_vap_detach(vap); free(ivp, M_80211_VAP); } static void iwm_scan_start(struct ieee80211com *ic) { struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwm_softc *sc = ic->ic_softc; int error; if (sc->sc_scanband) return; IWM_LOCK(sc); error = iwm_mvm_scan_request(sc, IEEE80211_CHAN_2GHZ, 0, NULL, 0); if (error) { device_printf(sc->sc_dev, "could not initiate scan\n"); IWM_UNLOCK(sc); ieee80211_cancel_scan(vap); } else IWM_UNLOCK(sc); } static void iwm_scan_end(struct ieee80211com *ic) { } static void iwm_update_mcast(struct ieee80211com *ic) { } static void iwm_set_channel(struct ieee80211com *ic) { } static void iwm_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { } static void iwm_scan_mindwell(struct ieee80211_scan_state *ss) { return; } void iwm_init_task(void *arg1) { struct iwm_softc *sc = arg1; IWM_LOCK(sc); while (sc->sc_flags & IWM_FLAG_BUSY) msleep(&sc->sc_flags, &sc->sc_mtx, 0, "iwmpwr", 0); sc->sc_flags |= IWM_FLAG_BUSY; iwm_stop(sc); if (sc->sc_ic.ic_nrunning > 0) iwm_init(sc); sc->sc_flags &= ~IWM_FLAG_BUSY; wakeup(&sc->sc_flags); IWM_UNLOCK(sc); } static int iwm_resume(device_t dev) { uint16_t reg; /* Clear device-specific "PCI retry timeout" register (41h). */ reg = pci_read_config(dev, 0x40, sizeof(reg)); pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg)); iwm_init_task(device_get_softc(dev)); return 0; } static int iwm_suspend(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); if (sc->sc_ic.ic_nrunning > 0) { IWM_LOCK(sc); iwm_stop(sc); IWM_UNLOCK(sc); } return (0); } static int iwm_detach_local(struct iwm_softc *sc, int do_net80211) { struct iwm_fw_info *fw = &sc->sc_fw; device_t dev = sc->sc_dev; int i; if (sc->sc_tq) { taskqueue_drain_all(sc->sc_tq); taskqueue_free(sc->sc_tq); } callout_drain(&sc->sc_watchdog_to); iwm_stop_device(sc); if (do_net80211) ieee80211_ifdetach(&sc->sc_ic); /* Free descriptor rings */ for (i = 0; i < nitems(sc->txq); i++) iwm_free_tx_ring(sc, &sc->txq[i]); /* Free firmware */ if (fw->fw_rawdata != NULL) iwm_fw_info_free(fw); /* free scheduler */ iwm_free_sched(sc); if (sc->ict_dma.vaddr != NULL) iwm_free_ict(sc); if (sc->kw_dma.vaddr != NULL) iwm_free_kw(sc); if (sc->fw_dma.vaddr != NULL) iwm_free_fwmem(sc); /* Finished with the hardware - detach things */ iwm_pci_detach(dev); mbufq_drain(&sc->sc_snd); - mtx_destroy(&sc->sc_mtx); + IWM_LOCK_DESTROY(sc); return (0); } static int iwm_detach(device_t dev) { struct iwm_softc *sc = device_get_softc(dev); return (iwm_detach_local(sc, 1)); } static device_method_t iwm_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, iwm_probe), DEVMETHOD(device_attach, iwm_attach), DEVMETHOD(device_detach, iwm_detach), DEVMETHOD(device_suspend, iwm_suspend), DEVMETHOD(device_resume, iwm_resume), DEVMETHOD_END }; static driver_t iwm_pci_driver = { "iwm", iwm_pci_methods, sizeof (struct iwm_softc) }; static devclass_t iwm_devclass; DRIVER_MODULE(iwm, pci, iwm_pci_driver, iwm_devclass, NULL, NULL); MODULE_DEPEND(iwm, firmware, 1, 1, 1); MODULE_DEPEND(iwm, pci, 1, 1, 1); MODULE_DEPEND(iwm, wlan, 1, 1, 1); diff --git a/sys/dev/iwm/if_iwm_mac_ctxt.c b/sys/dev/iwm/if_iwm_mac_ctxt.c index edaacc9e9964..50b1b131509f 100644 --- a/sys/dev/iwm/if_iwm_mac_ctxt.c +++ b/sys/dev/iwm/if_iwm_mac_ctxt.c @@ -1,533 +1,533 @@ /* $OpenBSD: if_iwm.c,v 1.39 2015/03/23 00:35:19 jsg Exp $ */ /* * Copyright (c) 2014 genua mbh * Copyright (c) 2014 Fixup Software Ltd. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- * Based on BSD-licensed source modules in the Linux iwlwifi driver, * which were used as the reference documentation for this implementation. * * Driver version we are currently based off of is * Linux 3.14.3 (tag id a2df521e42b1d9a23f620ac79dbfe8655a8391dd) * *********************************************************************** * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2007 - 2013 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, * USA * * The full GNU General Public License is included in this distribution * in the file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * * * BSD LICENSE * * Copyright(c) 2005 - 2013 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2007-2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * BEGIN mvm/mac-ctxt.c */ static void iwm_mvm_ack_rates(struct iwm_softc *sc, int is2ghz, int *cck_rates, int *ofdm_rates) { int lowest_present_ofdm = 100; int lowest_present_cck = 100; uint8_t cck = 0; uint8_t ofdm = 0; int i; if (is2ghz) { for (i = 0; i <= IWM_LAST_CCK_RATE; i++) { cck |= (1 << i); if (lowest_present_cck > i) lowest_present_cck = i; } } for (i = IWM_FIRST_OFDM_RATE; i <= IWM_LAST_NON_HT_RATE; i++) { int adj = i - IWM_FIRST_OFDM_RATE; ofdm |= (1 << adj); if (lowest_present_ofdm > i) lowest_present_ofdm = i; } /* * Now we've got the basic rates as bitmaps in the ofdm and cck * variables. This isn't sufficient though, as there might not * be all the right rates in the bitmap. E.g. if the only basic * rates are 5.5 Mbps and 11 Mbps, we still need to add 1 Mbps * and 6 Mbps because the 802.11-2007 standard says in 9.6: * * [...] a STA responding to a received frame shall transmit * its Control Response frame [...] at the highest rate in the * BSSBasicRateSet parameter that is less than or equal to the * rate of the immediately previous frame in the frame exchange * sequence ([...]) and that is of the same modulation class * ([...]) as the received frame. If no rate contained in the * BSSBasicRateSet parameter meets these conditions, then the * control frame sent in response to a received frame shall be * transmitted at the highest mandatory rate of the PHY that is * less than or equal to the rate of the received frame, and * that is of the same modulation class as the received frame. * * As a consequence, we need to add all mandatory rates that are * lower than all of the basic rates to these bitmaps. */ if (IWM_RATE_24M_INDEX < lowest_present_ofdm) ofdm |= IWM_RATE_BIT_MSK(24) >> IWM_FIRST_OFDM_RATE; if (IWM_RATE_12M_INDEX < lowest_present_ofdm) ofdm |= IWM_RATE_BIT_MSK(12) >> IWM_FIRST_OFDM_RATE; /* 6M already there or needed so always add */ ofdm |= IWM_RATE_BIT_MSK(6) >> IWM_FIRST_OFDM_RATE; /* * CCK is a bit more complex with DSSS vs. HR/DSSS vs. ERP. * Note, however: * - if no CCK rates are basic, it must be ERP since there must * be some basic rates at all, so they're OFDM => ERP PHY * (or we're in 5 GHz, and the cck bitmap will never be used) * - if 11M is a basic rate, it must be ERP as well, so add 5.5M * - if 5.5M is basic, 1M and 2M are mandatory * - if 2M is basic, 1M is mandatory * - if 1M is basic, that's the only valid ACK rate. * As a consequence, it's not as complicated as it sounds, just add * any lower rates to the ACK rate bitmap. */ if (IWM_RATE_11M_INDEX < lowest_present_cck) cck |= IWM_RATE_BIT_MSK(11) >> IWM_FIRST_CCK_RATE; if (IWM_RATE_5M_INDEX < lowest_present_cck) cck |= IWM_RATE_BIT_MSK(5) >> IWM_FIRST_CCK_RATE; if (IWM_RATE_2M_INDEX < lowest_present_cck) cck |= IWM_RATE_BIT_MSK(2) >> IWM_FIRST_CCK_RATE; /* 1M already there or needed so always add */ cck |= IWM_RATE_BIT_MSK(1) >> IWM_FIRST_CCK_RATE; *cck_rates = cck; *ofdm_rates = ofdm; } static void iwm_mvm_mac_ctxt_cmd_common(struct iwm_softc *sc, struct iwm_node *in, struct iwm_mac_ctx_cmd *cmd, uint32_t action) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; int cck_ack_rates, ofdm_ack_rates; int i; int is2ghz; /* * id is the MAC address ID - something to do with MAC filtering. * color - not sure. * * These are both functions of the vap, not of the node. * So, for now, hard-code both to 0 (default). */ cmd->id_and_color = htole32(IWM_FW_CMD_ID_AND_COLOR(IWM_DEFAULT_MACID, IWM_DEFAULT_COLOR)); cmd->action = htole32(action); cmd->mac_type = htole32(IWM_FW_MAC_TYPE_BSS_STA); /* * The TSF ID is one of four TSF tracking resources in the firmware. * Read the iwlwifi/mvm code for more details. * * For now, just hard-code it to TSF tracking ID 0; we only support * a single STA mode VAP. * * It's per-vap, not per-node. */ cmd->tsf_id = htole32(IWM_DEFAULT_TSFID); IEEE80211_ADDR_COPY(cmd->node_addr, ic->ic_macaddr); /* * XXX should we error out if in_assoc is 1 and ni == NULL? */ if (in->in_assoc) { IEEE80211_ADDR_COPY(cmd->bssid_addr, ni->ni_bssid); } else { /* eth broadcast address */ memset(cmd->bssid_addr, 0xff, sizeof(cmd->bssid_addr)); } /* * Default to 2ghz if no node information is given. */ if (in) { is2ghz = !! IEEE80211_IS_CHAN_2GHZ(in->in_ni.ni_chan); } else { is2ghz = 1; } iwm_mvm_ack_rates(sc, is2ghz, &cck_ack_rates, &ofdm_ack_rates); cmd->cck_rates = htole32(cck_ack_rates); cmd->ofdm_rates = htole32(ofdm_ack_rates); cmd->cck_short_preamble = htole32((ic->ic_flags & IEEE80211_F_SHPREAMBLE) ? IWM_MAC_FLG_SHORT_PREAMBLE : 0); cmd->short_slot = htole32((ic->ic_flags & IEEE80211_F_SHSLOT) ? IWM_MAC_FLG_SHORT_SLOT : 0); /* XXX TODO: set wme parameters; also handle getting updated wme parameters */ for (i = 0; i < IWM_AC_NUM+1; i++) { int txf = i; cmd->ac[txf].cw_min = htole16(0x0f); cmd->ac[txf].cw_max = htole16(0x3f); cmd->ac[txf].aifsn = 1; cmd->ac[txf].fifos_mask = (1 << txf); cmd->ac[txf].edca_txop = 0; } if (ic->ic_flags & IEEE80211_F_USEPROT) cmd->protection_flags |= htole32(IWM_MAC_PROT_FLG_TGG_PROTECT); cmd->filter_flags = htole32(IWM_MAC_FILTER_ACCEPT_GRP); } static int iwm_mvm_mac_ctxt_send_cmd(struct iwm_softc *sc, struct iwm_mac_ctx_cmd *cmd) { int ret = iwm_mvm_send_cmd_pdu(sc, IWM_MAC_CONTEXT_CMD, IWM_CMD_SYNC, sizeof(*cmd), cmd); if (ret) device_printf(sc->sc_dev, "%s: Failed to send MAC context (action:%d): %d\n", __func__, le32toh(cmd->action), ret); return ret; } /* * Fill the specific data for mac context of type station or p2p client */ static void iwm_mvm_mac_ctxt_cmd_fill_sta(struct iwm_softc *sc, struct iwm_node *in, struct iwm_mac_data_sta *ctxt_sta, int force_assoc_off) { struct ieee80211_node *ni = &in->in_ni; unsigned dtim_period, dtim_count; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); /* will this work? */ dtim_period = vap->iv_dtim_period; dtim_count = vap->iv_dtim_count; IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_BEACON | IWM_DEBUG_CMD, "%s: force_assoc_off=%d\n", __func__, force_assoc_off); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_BEACON | IWM_DEBUG_CMD, "DTIM: period=%d count=%d\n", dtim_period, dtim_count); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_BEACON | IWM_DEBUG_CMD, "BEACON: tsf: %llu, ni_intval=%d\n", (unsigned long long) le64toh(ni->ni_tstamp.tsf), ni->ni_intval); /* We need the dtim_period to set the MAC as associated */ if (in->in_assoc && dtim_period && !force_assoc_off) { uint64_t tsf; uint32_t dtim_offs; /* * The DTIM count counts down, so when it is N that means N * more beacon intervals happen until the DTIM TBTT. Therefore * add this to the current time. If that ends up being in the * future, the firmware will handle it. * * Also note that the system_timestamp (which we get here as * "sync_device_ts") and TSF timestamp aren't at exactly the * same offset in the frame -- the TSF is at the first symbol * of the TSF, the system timestamp is at signal acquisition * time. This means there's an offset between them of at most * a few hundred microseconds (24 * 8 bits + PLCP time gives * 384us in the longest case), this is currently not relevant * as the firmware wakes up around 2ms before the TBTT. */ dtim_offs = dtim_count * ni->ni_intval; /* convert TU to usecs */ dtim_offs *= 1024; /* * net80211: TSF is in 802.11 order, so convert up to local * ordering before we manipulate things. */ tsf = le64toh(ni->ni_tstamp.tsf); ctxt_sta->dtim_tsf = htole64(tsf + dtim_offs); ctxt_sta->dtim_time = htole32(tsf + dtim_offs); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_BEACON | IWM_DEBUG_CMD, "DTIM TBTT is 0x%llx/0x%x, offset %d\n", (long long)le64toh(ctxt_sta->dtim_tsf), le32toh(ctxt_sta->dtim_time), dtim_offs); ctxt_sta->is_assoc = htole32(1); } else { ctxt_sta->is_assoc = htole32(0); } IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_CMD | IWM_DEBUG_BEACON, "%s: ni_intval: %d, bi_reciprocal: %d, dtim_interval: %d, dtim_reciprocal: %d\n", __func__, ni->ni_intval, iwm_mvm_reciprocal(ni->ni_intval), ni->ni_intval * dtim_period, iwm_mvm_reciprocal(ni->ni_intval * dtim_period)); ctxt_sta->bi = htole32(ni->ni_intval); ctxt_sta->bi_reciprocal = htole32(iwm_mvm_reciprocal(ni->ni_intval)); ctxt_sta->dtim_interval = htole32(ni->ni_intval * dtim_period); ctxt_sta->dtim_reciprocal = htole32(iwm_mvm_reciprocal(ni->ni_intval * dtim_period)); /* 10 = CONN_MAX_LISTEN_INTERVAL */ ctxt_sta->listen_interval = htole32(10); IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_CMD | IWM_DEBUG_BEACON, "%s: associd=%d\n", __func__, IEEE80211_AID(ni->ni_associd)); ctxt_sta->assoc_id = htole32(IEEE80211_AID(ni->ni_associd)); } static int iwm_mvm_mac_ctxt_cmd_station(struct iwm_softc *sc, struct ieee80211vap *vap, uint32_t action) { struct ieee80211_node *ni = vap->iv_bss; - struct iwm_node *in = (struct iwm_node *) ni; + struct iwm_node *in = IWM_NODE(ni); struct iwm_mac_ctx_cmd cmd; IWM_DPRINTF(sc, IWM_DEBUG_RESET, "%s: called; action=%d\n", __func__, action); memset(&cmd, 0, sizeof(cmd)); /* Fill the common data for all mac context types */ iwm_mvm_mac_ctxt_cmd_common(sc, in, &cmd, action); /* Allow beacons to pass through as long as we are not associated,or we * do not have dtim period information */ if (!in->in_assoc || !vap->iv_dtim_period) cmd.filter_flags |= htole32(IWM_MAC_FILTER_IN_BEACON); else cmd.filter_flags &= ~htole32(IWM_MAC_FILTER_IN_BEACON); /* Fill the data specific for station mode */ iwm_mvm_mac_ctxt_cmd_fill_sta(sc, in, &cmd.sta, action == IWM_FW_CTXT_ACTION_ADD); return iwm_mvm_mac_ctxt_send_cmd(sc, &cmd); } static int iwm_mvm_mac_ctx_send(struct iwm_softc *sc, struct ieee80211vap *vap, uint32_t action) { int ret; ret = iwm_mvm_mac_ctxt_cmd_station(sc, vap, action); if (ret) return (ret); return (0); } int iwm_mvm_mac_ctxt_add(struct iwm_softc *sc, struct ieee80211vap *vap) { struct iwm_vap *iv = IWM_VAP(vap); int ret; if (iv->is_uploaded != 0) { device_printf(sc->sc_dev, "%s: called; uploaded != 0\n", __func__); return (EIO); } ret = iwm_mvm_mac_ctx_send(sc, vap, IWM_FW_CTXT_ACTION_ADD); if (ret) return (ret); iv->is_uploaded = 1; return (0); } int iwm_mvm_mac_ctxt_changed(struct iwm_softc *sc, struct ieee80211vap *vap) { struct iwm_vap *iv = IWM_VAP(vap); int ret; if (iv->is_uploaded == 0) { device_printf(sc->sc_dev, "%s: called; uploaded = 0\n", __func__); return (EIO); } ret = iwm_mvm_mac_ctx_send(sc, vap, IWM_FW_CTXT_ACTION_MODIFY); if (ret) return (ret); return (0); } #if 0 static int iwm_mvm_mac_ctxt_remove(struct iwm_softc *sc, struct iwm_node *in) { struct iwm_mac_ctx_cmd cmd; int ret; if (!in->in_uploaded) { device_printf(sc->sc_dev, "attempt to remove !uploaded node %p", in); return EIO; } memset(&cmd, 0, sizeof(cmd)); cmd.id_and_color = htole32(IWM_FW_CMD_ID_AND_COLOR(IWM_DEFAULT_MACID, IWM_DEFAULT_COLOR)); cmd.action = htole32(IWM_FW_CTXT_ACTION_REMOVE); ret = iwm_mvm_send_cmd_pdu(sc, IWM_MAC_CONTEXT_CMD, IWM_CMD_SYNC, sizeof(cmd), &cmd); if (ret) { device_printf(sc->sc_dev, "Failed to remove MAC context: %d\n", ret); return ret; } in->in_uploaded = 0; return 0; } #endif diff --git a/sys/dev/iwm/if_iwmvar.h b/sys/dev/iwm/if_iwmvar.h index 9434b838fce1..52363c57d1b1 100644 --- a/sys/dev/iwm/if_iwmvar.h +++ b/sys/dev/iwm/if_iwmvar.h @@ -1,528 +1,524 @@ /* $OpenBSD: if_iwmvar.h,v 1.7 2015/03/02 13:51:10 jsg Exp $ */ /* $FreeBSD$ */ /* * Copyright (c) 2014 genua mbh * Copyright (c) 2014 Fixup Software Ltd. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*- * Based on BSD-licensed source modules in the Linux iwlwifi driver, * which were used as the reference documentation for this implementation. * * Driver version we are currently based off of is * Linux 3.14.3 (tag id a2df521e42b1d9a23f620ac79dbfe8655a8391dd) * *********************************************************************** * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2007 - 2013 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, * USA * * The full GNU General Public License is included in this distribution * in the file called COPYING. * * Contact Information: * Intel Linux Wireless * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 * * * BSD LICENSE * * Copyright(c) 2005 - 2013 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2007-2010 Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ struct iwm_rx_radiotap_header { struct ieee80211_radiotap_header wr_ihdr; uint64_t wr_tsft; uint8_t wr_flags; uint8_t wr_rate; uint16_t wr_chan_freq; uint16_t wr_chan_flags; int8_t wr_dbm_antsignal; int8_t wr_dbm_antnoise; } __packed; #define IWM_RX_RADIOTAP_PRESENT \ ((1 << IEEE80211_RADIOTAP_TSFT) | \ (1 << IEEE80211_RADIOTAP_FLAGS) | \ (1 << IEEE80211_RADIOTAP_RATE) | \ (1 << IEEE80211_RADIOTAP_CHANNEL) | \ (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | \ (1 << IEEE80211_RADIOTAP_DBM_ANTNOISE)) struct iwm_tx_radiotap_header { struct ieee80211_radiotap_header wt_ihdr; uint8_t wt_flags; uint8_t wt_rate; uint16_t wt_chan_freq; uint16_t wt_chan_flags; - uint8_t wt_hwqueue; } __packed; #define IWM_TX_RADIOTAP_PRESENT \ ((1 << IEEE80211_RADIOTAP_FLAGS) | \ (1 << IEEE80211_RADIOTAP_RATE) | \ (1 << IEEE80211_RADIOTAP_CHANNEL)) #define IWM_UCODE_SECT_MAX 6 #define IWM_FWDMASEGSZ (192*1024) /* sanity check value */ #define IWM_FWMAXSIZE (2*1024*1024) /* * fw_status is used to determine if we've already parsed the firmware file * * In addition to the following, status < 0 ==> -error */ #define IWM_FW_STATUS_NONE 0 #define IWM_FW_STATUS_INPROGRESS 1 #define IWM_FW_STATUS_DONE 2 -#define IWM_LOCK(_sc) mtx_lock(&sc->sc_mtx) -#define IWM_UNLOCK(_sc) mtx_unlock(&sc->sc_mtx) - enum iwm_ucode_type { IWM_UCODE_TYPE_INIT, IWM_UCODE_TYPE_REGULAR, IWM_UCODE_TYPE_WOW, IWM_UCODE_TYPE_MAX }; struct iwm_fw_info { const void *fw_rawdata; size_t fw_rawsize; int fw_status; struct iwm_fw_sects { struct iwm_fw_onesect { const void *fws_data; uint32_t fws_len; uint32_t fws_devoff; } fw_sect[IWM_UCODE_SECT_MAX]; size_t fw_totlen; int fw_count; } fw_sects[IWM_UCODE_TYPE_MAX]; }; struct iwm_nvm_data { int n_hw_addrs; uint8_t hw_addr[IEEE80211_ADDR_LEN]; uint8_t calib_version; uint16_t calib_voltage; uint16_t raw_temperature; uint16_t kelvin_temperature; uint16_t kelvin_voltage; uint16_t xtal_calib[2]; int sku_cap_band_24GHz_enable; int sku_cap_band_52GHz_enable; int sku_cap_11n_enable; int sku_cap_amt_enable; int sku_cap_ipan_enable; uint8_t radio_cfg_type; uint8_t radio_cfg_step; uint8_t radio_cfg_dash; uint8_t radio_cfg_pnum; uint8_t valid_tx_ant, valid_rx_ant; uint16_t nvm_version; uint8_t max_tx_pwr_half_dbm; }; /* max bufs per tfd the driver will use */ #define IWM_MAX_CMD_TBS_PER_TFD 2 struct iwm_rx_packet; struct iwm_host_cmd { const void *data[IWM_MAX_CMD_TBS_PER_TFD]; struct iwm_rx_packet *resp_pkt; unsigned long _rx_page_addr; uint32_t _rx_page_order; int handler_status; uint32_t flags; uint16_t len[IWM_MAX_CMD_TBS_PER_TFD]; uint8_t dataflags[IWM_MAX_CMD_TBS_PER_TFD]; uint8_t id; }; /* * DMA glue is from iwn */ typedef caddr_t iwm_caddr_t; typedef void *iwm_hookarg_t; struct iwm_dma_info { bus_dma_tag_t tag; bus_dmamap_t map; bus_dma_segment_t seg; bus_addr_t paddr; void *vaddr; bus_size_t size; }; #define IWM_TX_RING_COUNT 256 #define IWM_TX_RING_LOMARK 192 #define IWM_TX_RING_HIMARK 224 struct iwm_tx_data { - bus_dmamap_t map; - bus_addr_t cmd_paddr; - bus_addr_t scratch_paddr; - struct mbuf *m; - struct iwm_node *in; - int done; + bus_dmamap_t map; + bus_addr_t cmd_paddr; + bus_addr_t scratch_paddr; + struct mbuf *m; + struct iwm_node *in; + int done; }; struct iwm_tx_ring { struct iwm_dma_info desc_dma; struct iwm_dma_info cmd_dma; struct iwm_tfd *desc; struct iwm_device_cmd *cmd; bus_dma_tag_t data_dmat; struct iwm_tx_data data[IWM_TX_RING_COUNT]; int qid; int queued; int cur; }; #define IWM_RX_RING_COUNT 256 #define IWM_RBUF_COUNT (IWM_RX_RING_COUNT + 32) /* Linux driver optionally uses 8k buffer */ #define IWM_RBUF_SIZE 4096 #define IWM_MAX_SCATTER 20 struct iwm_softc; struct iwm_rbuf { struct iwm_softc *sc; void *vaddr; bus_addr_t paddr; }; struct iwm_rx_data { struct mbuf *m; bus_dmamap_t map; int wantresp; }; struct iwm_rx_ring { struct iwm_dma_info desc_dma; struct iwm_dma_info stat_dma; struct iwm_dma_info buf_dma; uint32_t *desc; struct iwm_rb_status *stat; struct iwm_rx_data data[IWM_RX_RING_COUNT]; bus_dma_tag_t data_dmat; int cur; }; -#define IWM_FLAG_USE_ICT 0x01 -#define IWM_FLAG_HW_INITED 0x02 -#define IWM_FLAG_STOPPED 0x04 -#define IWM_FLAG_RFKILL 0x08 -#define IWM_FLAG_BUSY 0x10 - struct iwm_ucode_status { uint32_t uc_error_event_table; uint32_t uc_log_event_table; int uc_ok; int uc_intr; }; #define IWM_CMD_RESP_MAX PAGE_SIZE #define IWM_OTP_LOW_IMAGE_SIZE 2048 #define IWM_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS 500 #define IWM_MVM_TE_SESSION_PROTECTION_MIN_TIME_MS 400 /* * Command headers are in iwl-trans.h, which is full of all * kinds of other junk, so we just replicate the structures here. * First the software bits: */ enum IWM_CMD_MODE { IWM_CMD_SYNC = 0, IWM_CMD_ASYNC = (1 << 0), IWM_CMD_WANT_SKB = (1 << 1), IWM_CMD_SEND_IN_RFKILL = (1 << 2), }; enum iwm_hcmd_dataflag { IWM_HCMD_DFL_NOCOPY = (1 << 0), IWM_HCMD_DFL_DUP = (1 << 1), }; /* * iwlwifi/iwl-phy-db */ #define IWM_NUM_PAPD_CH_GROUPS 4 #define IWM_NUM_TXP_CH_GROUPS 9 struct iwm_phy_db_entry { uint16_t size; uint8_t *data; }; struct iwm_phy_db { struct iwm_phy_db_entry cfg; struct iwm_phy_db_entry calib_nch; struct iwm_phy_db_entry calib_ch_group_papd[IWM_NUM_PAPD_CH_GROUPS]; struct iwm_phy_db_entry calib_ch_group_txp[IWM_NUM_TXP_CH_GROUPS]; }; struct iwm_int_sta { uint32_t sta_id; uint32_t tfd_queue_msk; }; struct iwm_mvm_phy_ctxt { uint16_t id; uint16_t color; uint32_t ref; struct ieee80211_channel *channel; }; struct iwm_bf_data { int bf_enabled; /* filtering */ int ba_enabled; /* abort */ int ave_beacon_signal; int last_cqm_event; }; struct iwm_vap { - struct ieee80211vap iv_vap; - uint8_t macaddr[IEEE80211_ADDR_LEN]; - int is_uploaded; + struct ieee80211vap iv_vap; + int is_uploaded; - int (*iv_newstate)(struct ieee80211vap *, enum ieee80211_state, int); + int (*iv_newstate)(struct ieee80211vap *, + enum ieee80211_state, int); }; +#define IWM_VAP(_vap) ((struct iwm_vap *)(_vap)) -#define IWM_VAP(_vap) ((struct iwm_vap *)(_vap)) +struct iwm_node { + struct ieee80211_node in_ni; + struct iwm_mvm_phy_ctxt *in_phyctxt; + + /* status "bits" */ + int in_assoc; + + struct iwm_lq_cmd in_lq; + + uint8_t in_ridx[IEEE80211_RATE_MAXSIZE]; +}; +#define IWM_NODE(_ni) ((struct iwm_node *)(_ni)) + +#define IWM_STATION_ID 0 + +#define IWM_DEFAULT_MACID 0 +#define IWM_DEFAULT_COLOR 0 +#define IWM_DEFAULT_TSFID 0 + +#define IWM_ICT_SIZE 4096 +#define IWM_ICT_COUNT (IWM_ICT_SIZE / sizeof (uint32_t)) +#define IWM_ICT_PADDR_SHIFT 12 struct iwm_softc { + device_t sc_dev; + uint32_t sc_debug; + struct mtx sc_mtx; struct mbufq sc_snd; struct ieee80211com sc_ic; - device_t sc_dev; + + int sc_flags; +#define IWM_FLAG_USE_ICT (1 << 0) +#define IWM_FLAG_HW_INITED (1 << 1) +#define IWM_FLAG_STOPPED (1 << 2) +#define IWM_FLAG_RFKILL (1 << 3) +#define IWM_FLAG_BUSY (1 << 4) struct intr_config_hook sc_preinit_hook; - struct callout sc_watchdog_to; + struct callout sc_watchdog_to; struct task init_task; - struct resource *sc_irq; - struct resource *sc_mem; - bus_space_tag_t sc_st; - bus_space_handle_t sc_sh; - bus_size_t sc_sz; - bus_dma_tag_t sc_dmat; - void *sc_ih; + struct resource *sc_irq; + struct resource *sc_mem; + bus_space_tag_t sc_st; + bus_space_handle_t sc_sh; + bus_size_t sc_sz; + bus_dma_tag_t sc_dmat; + void *sc_ih; /* TX scheduler rings. */ - struct iwm_dma_info sched_dma; - uint32_t sched_base; + struct iwm_dma_info sched_dma; + uint32_t sched_base; /* TX/RX rings. */ - struct iwm_tx_ring txq[IWM_MVM_MAX_QUEUES]; - struct iwm_rx_ring rxq; - int qfullmsk; + struct iwm_tx_ring txq[IWM_MVM_MAX_QUEUES]; + struct iwm_rx_ring rxq; + int qfullmsk; - int sc_sf_state; + int sc_sf_state; /* ICT table. */ struct iwm_dma_info ict_dma; int ict_cur; - int sc_hw_rev; - int sc_hw_id; + int sc_hw_rev; + int sc_hw_id; - struct iwm_dma_info kw_dma; - struct iwm_dma_info fw_dma; + struct iwm_dma_info kw_dma; + struct iwm_dma_info fw_dma; - int sc_fw_chunk_done; - int sc_init_complete; + int sc_fw_chunk_done; + int sc_init_complete; - struct iwm_ucode_status sc_uc; - enum iwm_ucode_type sc_uc_current; - int sc_fwver; + struct iwm_ucode_status sc_uc; + enum iwm_ucode_type sc_uc_current; + int sc_fwver; - int sc_capaflags; - int sc_capa_max_probe_len; + int sc_capaflags; + int sc_capa_max_probe_len; - int sc_intmask; - int sc_flags; - uint32_t sc_debug; + int sc_intmask; /* * So why do we need a separate stopped flag and a generation? * the former protects the device from issueing commands when it's * stopped (duh). The latter protects against race from a very * fast stop/unstop cycle where threads waiting for responses do * not have a chance to run in between. Notably: we want to stop * the device from interrupt context when it craps out, so we * don't have the luxury of waiting for quiescense. */ - int sc_generation; + int sc_generation; - const char *sc_fwname; - bus_size_t sc_fwdmasegsz; - struct iwm_fw_info sc_fw; - int sc_fw_phy_config; + const char *sc_fwname; + bus_size_t sc_fwdmasegsz; + struct iwm_fw_info sc_fw; + int sc_fw_phy_config; struct iwm_tlv_calib_ctrl sc_default_calib[IWM_UCODE_TYPE_MAX]; - struct iwm_nvm_data sc_nvm; - struct iwm_phy_db sc_phy_db; + struct iwm_nvm_data sc_nvm; + struct iwm_phy_db sc_phy_db; - struct iwm_bf_data sc_bf; + struct iwm_bf_data sc_bf; - int sc_tx_timer; + int sc_tx_timer; - struct iwm_scan_cmd *sc_scan_cmd; - size_t sc_scan_cmd_len; - int sc_scan_last_antenna; - int sc_scanband; + struct iwm_scan_cmd *sc_scan_cmd; + size_t sc_scan_cmd_len; + int sc_scan_last_antenna; + int sc_scanband; - int sc_auth_prot; + int sc_auth_prot; - int sc_fixed_ridx; + int sc_fixed_ridx; - int sc_staid; - int sc_nodecolor; + int sc_staid; + int sc_nodecolor; - uint8_t sc_cmd_resp[IWM_CMD_RESP_MAX]; - int sc_wantresp; + uint8_t sc_cmd_resp[IWM_CMD_RESP_MAX]; + int sc_wantresp; - struct taskqueue *sc_tq; - struct task sc_es_task; + struct taskqueue *sc_tq; + struct task sc_es_task; - struct iwm_rx_phy_info sc_last_phy_info; - int sc_ampdu_ref; + struct iwm_rx_phy_info sc_last_phy_info; + int sc_ampdu_ref; - struct iwm_int_sta sc_aux_sta; + struct iwm_int_sta sc_aux_sta; /* phy contexts. we only use the first one */ - struct iwm_mvm_phy_ctxt sc_phyctxt[IWM_NUM_PHY_CTX]; + struct iwm_mvm_phy_ctxt sc_phyctxt[IWM_NUM_PHY_CTX]; struct iwm_notif_statistics sc_stats; - int sc_noise; + int sc_noise; - int host_interrupt_operation_mode; + int host_interrupt_operation_mode; caddr_t sc_drvbpf; - union { - struct iwm_rx_radiotap_header th; - uint8_t pad[IEEE80211_RADIOTAP_HDRLEN]; - } sc_rxtapu; -#define sc_rxtap sc_rxtapu.th - - union { - struct iwm_tx_radiotap_header th; - uint8_t pad[IEEE80211_RADIOTAP_HDRLEN]; - } sc_txtapu; -#define sc_txtap sc_txtapu.th - - int sc_max_rssi; -}; - -#define IWM_DEFAULT_MACID 0 -#define IWM_DEFAULT_COLOR 0 -#define IWM_DEFAULT_TSFID 0 - -struct iwm_node { - struct ieee80211_node in_ni; - struct iwm_mvm_phy_ctxt *in_phyctxt; - - /* status "bits" */ - int in_assoc; - - struct iwm_lq_cmd in_lq; + struct iwm_rx_radiotap_header sc_rxtap; + struct iwm_tx_radiotap_header sc_txtap; - uint8_t in_ridx[IEEE80211_RATE_MAXSIZE]; + int sc_max_rssi; }; -#define IWM_STATION_ID 0 -#define IWM_ICT_SIZE 4096 -#define IWM_ICT_COUNT (IWM_ICT_SIZE / sizeof (uint32_t)) -#define IWM_ICT_PADDR_SHIFT 12 +#define IWM_LOCK_INIT(_sc) \ + mtx_init(&(_sc)->sc_mtx, device_get_nameunit((_sc)->sc_dev), \ + MTX_NETWORK_LOCK, MTX_DEF); +#define IWM_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) +#define IWM_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) +#define IWM_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_mtx) diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 04281400f175..505d4b4cbb12 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1,6680 +1,6684 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXL_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixl.h" #include "ixl_pf.h" #ifdef RSS #include #endif /********************************************************************* * Driver version *********************************************************************/ char ixl_driver_version[] = "1.4.3"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixl_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixl_vendor_info_t ixl_vendor_info_array[] = { {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_A, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_20G_KR2, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_20G_KR2_A, 0, 0, 0}, #ifdef X722_SUPPORT {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722, 0, 0, 0}, #endif /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixl_strings[] = { "Intel(R) Ethernet Connection XL710 Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixl_probe(device_t); static int ixl_attach(device_t); static int ixl_detach(device_t); static int ixl_shutdown(device_t); static int ixl_get_hw_capabilities(struct ixl_pf *); static void ixl_cap_txcsum_tso(struct ixl_vsi *, struct ifnet *, int); static int ixl_ioctl(struct ifnet *, u_long, caddr_t); static void ixl_init(void *); static void ixl_init_locked(struct ixl_pf *); static void ixl_stop(struct ixl_pf *); static void ixl_media_status(struct ifnet *, struct ifmediareq *); static int ixl_media_change(struct ifnet *); static void ixl_update_link_status(struct ixl_pf *); static int ixl_allocate_pci_resources(struct ixl_pf *); static u16 ixl_get_bus_info(struct i40e_hw *, device_t); static int ixl_setup_stations(struct ixl_pf *); static int ixl_switch_config(struct ixl_pf *); static int ixl_initialize_vsi(struct ixl_vsi *); static int ixl_assign_vsi_msix(struct ixl_pf *); static int ixl_assign_vsi_legacy(struct ixl_pf *); static int ixl_init_msix(struct ixl_pf *); static void ixl_configure_msix(struct ixl_pf *); static void ixl_configure_itr(struct ixl_pf *); static void ixl_configure_legacy(struct ixl_pf *); static void ixl_free_pci_resources(struct ixl_pf *); static void ixl_local_timer(void *); static int ixl_setup_interface(device_t, struct ixl_vsi *); static void ixl_link_event(struct ixl_pf *, struct i40e_arq_event_info *); static void ixl_config_rss(struct ixl_vsi *); static void ixl_set_queue_rx_itr(struct ixl_queue *); static void ixl_set_queue_tx_itr(struct ixl_queue *); static int ixl_set_advertised_speeds(struct ixl_pf *, int); static int ixl_enable_rings(struct ixl_vsi *); static int ixl_disable_rings(struct ixl_vsi *); static void ixl_enable_intr(struct ixl_vsi *); static void ixl_disable_intr(struct ixl_vsi *); static void ixl_disable_rings_intr(struct ixl_vsi *); static void ixl_enable_adminq(struct i40e_hw *); static void ixl_disable_adminq(struct i40e_hw *); static void ixl_enable_queue(struct i40e_hw *, int); static void ixl_disable_queue(struct i40e_hw *, int); static void ixl_enable_legacy(struct i40e_hw *); static void ixl_disable_legacy(struct i40e_hw *); static void ixl_set_promisc(struct ixl_vsi *); static void ixl_add_multi(struct ixl_vsi *); static void ixl_del_multi(struct ixl_vsi *); static void ixl_register_vlan(void *, struct ifnet *, u16); static void ixl_unregister_vlan(void *, struct ifnet *, u16); static void ixl_setup_vlan_filters(struct ixl_vsi *); static void ixl_init_filters(struct ixl_vsi *); static void ixl_reconfigure_filters(struct ixl_vsi *vsi); static void ixl_add_filter(struct ixl_vsi *, u8 *, s16 vlan); static void ixl_del_filter(struct ixl_vsi *, u8 *, s16 vlan); static void ixl_add_hw_filters(struct ixl_vsi *, int, int); static void ixl_del_hw_filters(struct ixl_vsi *, int); static struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *, u8 *, s16); static void ixl_add_mc_filter(struct ixl_vsi *, u8 *); static void ixl_free_mac_filters(struct ixl_vsi *vsi); /* Sysctl debug interface */ #ifdef IXL_DEBUG_SYSCTL static int ixl_debug_info(SYSCTL_HANDLER_ARGS); static void ixl_print_debug_info(struct ixl_pf *); #endif /* The MSI/X Interrupt handlers */ static void ixl_intr(void *); static void ixl_msix_que(void *); static void ixl_msix_adminq(void *); static void ixl_handle_mdd_event(struct ixl_pf *); /* Deferred interrupt tasklets */ static void ixl_do_adminq(void *, int); /* Sysctl handlers */ static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); /* Statistics */ static void ixl_add_hw_stats(struct ixl_pf *); static void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct i40e_hw_port_stats *); static void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct i40e_eth_stats *); static void ixl_update_stats_counters(struct ixl_pf *); static void ixl_update_eth_stats(struct ixl_vsi *); static void ixl_update_vsi_stats(struct ixl_vsi *); static void ixl_pf_reset_stats(struct ixl_pf *); static void ixl_vsi_reset_stats(struct ixl_vsi *); static void ixl_stat_update48(struct i40e_hw *, u32, u32, bool, u64 *, u64 *); static void ixl_stat_update32(struct i40e_hw *, u32, bool, u64 *, u64 *); #ifdef IXL_DEBUG_SYSCTL static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); #endif #ifdef PCI_IOV static int ixl_adminq_err_to_errno(enum i40e_admin_queue_err err); static int ixl_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t*); static void ixl_iov_uninit(device_t dev); static int ixl_add_vf(device_t dev, uint16_t vfnum, const nvlist_t*); static void ixl_handle_vf_msg(struct ixl_pf *, struct i40e_arq_event_info *); static void ixl_handle_vflr(void *arg, int pending); static void ixl_reset_vf(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_reinit_vf(struct ixl_pf *pf, struct ixl_vf *vf); #endif /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixl_probe), DEVMETHOD(device_attach, ixl_attach), DEVMETHOD(device_detach, ixl_detach), DEVMETHOD(device_shutdown, ixl_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, ixl_iov_init), DEVMETHOD(pci_iov_uninit, ixl_iov_uninit), DEVMETHOD(pci_iov_add_vf, ixl_add_vf), #endif {0, 0} }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; devclass_t ixl_devclass; DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(ixl, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /* ** Global reset mutex */ static struct mtx ixl_reset_mtx; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD, 0, "IXL driver parameters"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixl_enable_msix = 1; TUNABLE_INT("hw.ixl.enable_msix", &ixl_enable_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixl_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Number of descriptors per ring: ** - TX and RX are the same size */ static int ixl_ringsz = DEFAULT_RING; TUNABLE_INT("hw.ixl.ringsz", &ixl_ringsz); SYSCTL_INT(_hw_ixl, OID_AUTO, ring_size, CTLFLAG_RDTUN, &ixl_ringsz, 0, "Descriptor Ring Size"); /* ** This can be set manually, if left as 0 the ** number of queues will be calculated based ** on cpus and msix vectors available. */ int ixl_max_queues = 0; TUNABLE_INT("hw.ixl.max_queues", &ixl_max_queues); SYSCTL_INT(_hw_ixl, OID_AUTO, max_queues, CTLFLAG_RDTUN, &ixl_max_queues, 0, "Number of Queues"); /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ int ixl_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); int ixl_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); #ifdef IXL_FDIR static int ixl_enable_fdir = 1; TUNABLE_INT("hw.ixl.enable_fdir", &ixl_enable_fdir); /* Rate at which we sample */ int ixl_atr_rate = 20; TUNABLE_INT("hw.ixl.atr_rate", &ixl_atr_rate); #endif #ifdef DEV_NETMAP #define NETMAP_IXL_MAIN /* only bring in one part of the netmap code */ #include #endif /* DEV_NETMAP */ static char *ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; static MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); static uint8_t ixl_bcast_addr[ETHER_ADDR_LEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; /********************************************************************* * Device identification routine * * ixl_probe determines if the driver should be loaded on * the hardware based on PCI vendor/device id of the device. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixl_probe(device_t dev) { ixl_vendor_info_t *ent; u16 pci_vendor_id, pci_device_id; u16 pci_subvendor_id, pci_subdevice_id; char device_name[256]; static bool lock_init = FALSE; INIT_DEBUGOUT("ixl_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != I40E_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixl_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(device_name, "%s, Version - %s", ixl_strings[ent->index], ixl_driver_version); device_set_desc_copy(dev, device_name); /* One shot mutex init */ if (lock_init == FALSE) { lock_init = TRUE; mtx_init(&ixl_reset_mtx, "ixl_reset", "IXL RESET Lock", MTX_DEF); } return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixl_attach(device_t dev) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; u16 bus; int error = 0; #ifdef PCI_IOV nvlist_t *pf_schema, *vf_schema; int iov_error; #endif INIT_DEBUGOUT("ixl_attach: begin"); /* Allocate, clear, and link in our primary soft structure */ pf = device_get_softc(dev); pf->dev = pf->osdep.dev = dev; hw = &pf->hw; /* ** Note this assumes we have a single embedded VSI, ** this could be enhanced later to allocate multiple */ vsi = &pf->vsi; vsi->dev = pf->dev; /* Core Lock Init*/ IXL_PF_LOCK_INIT(pf, device_get_nameunit(dev)); /* Set up the timer callout */ callout_init_mtx(&pf->timer, &pf->pf_mtx, 0); /* Set up sysctls */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_advertise, "I", "Advertised Speed"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rx_itr", CTLFLAG_RW, &ixl_rx_itr, IXL_ITR_8K, "RX ITR"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &ixl_dynamic_rx_itr, 0, "Dynamic RX ITR"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "tx_itr", CTLFLAG_RW, &ixl_tx_itr, IXL_ITR_4K, "TX ITR"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &ixl_dynamic_tx_itr, 0, "Dynamic TX ITR"); #ifdef IXL_DEBUG_SYSCTL SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, pf, 0, ixl_debug_info, "I", "Debug Information"); /* Debug shared-code message level */ SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Debug Message Level"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "vc_debug_level", CTLFLAG_RW, &pf->vc_debug_lvl, 0, "PF/VF Virtual Channel debug level"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_link_status, "A", "Current Link Status"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); #endif /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); pf->vc_debug_lvl = 1; /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Establish a clean starting point */ i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev,"PF reset failure %x\n", error); error = EIO; goto err_out; } /* Set admin queue parameters */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUFSZ; hw->aq.asq_buf_size = IXL_AQ_BUFSZ; /* Initialize the shared code */ error = i40e_init_shared_code(hw); if (error) { device_printf(dev,"Unable to initialize the shared code\n"); error = EIO; goto err_out; } /* Set up the admin queue */ error = i40e_init_adminq(hw); if (error) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n" "You must install the most recent version of " " the network driver.\n"); goto err_out; } device_printf(dev, "%s\n", ixl_fw_version_str(hw)); if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n" "Please install the most recent version of the network driver.\n"); else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n" "Please update the NVM image.\n"); /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "HW capabilities failure!\n"); goto err_get_cap; } /* Set up host memory cache */ error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); goto err_get_cap; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); goto err_mac_hmc; } /* Disable LLDP from the firmware */ i40e_aq_stop_lldp(hw, TRUE, NULL); i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Set up VSI and queues */ if (ixl_setup_stations(pf) != 0) { device_printf(dev, "setup stations failed!\n"); error = ENOMEM; goto err_mac_hmc; } /* Initialize mac filter list for VSI */ SLIST_INIT(&vsi->ftl); /* Set up interrupt routing here */ if (pf->msix > 1) error = ixl_assign_vsi_msix(pf); else error = ixl_assign_vsi_legacy(pf); if (error) goto err_late; if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) device_printf(dev, "link restart failed, aq_err=%d\n", pf->hw.aq.asq_last_status); } /* Determine link state */ i40e_aq_get_link_info(hw, TRUE, NULL, NULL); i40e_get_link_status(hw, &pf->link_up); /* Setup OS specific network interface */ if (ixl_setup_interface(dev, vsi) != 0) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err_late; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial switch config failed: %d\n", error); goto err_mac_hmc; } /* Limit phy interrupts to link and modules failure */ error = i40e_aq_set_phy_int_mask(hw, I40E_AQ_EVENT_LINK_UPDOWN | I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); if (error) device_printf(dev, "set phy mask failed: %d\n", error); /* Get the bus configuration and set the shared code */ bus = ixl_get_bus_info(hw, dev); i40e_set_pci_config_data(hw, bus); /* Initialize statistics */ ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); /* Register for VLAN events */ vsi->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixl_register_vlan, vsi, EVENTHANDLER_PRI_FIRST); vsi->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixl_unregister_vlan, vsi, EVENTHANDLER_PRI_FIRST); #ifdef PCI_IOV /* SR-IOV is only supported when MSI-X is in use. */ if (pf->msix > 1) { pf_schema = pci_iov_schema_alloc_node(); vf_schema = pci_iov_schema_alloc_node(); pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", IOV_SCHEMA_HASDEFAULT, TRUE); pci_iov_schema_add_bool(vf_schema, "allow-set-mac", IOV_SCHEMA_HASDEFAULT, FALSE); pci_iov_schema_add_bool(vf_schema, "allow-promisc", IOV_SCHEMA_HASDEFAULT, FALSE); iov_error = pci_iov_attach(dev, pf_schema, vf_schema); if (iov_error != 0) device_printf(dev, "Failed to initialize SR-IOV (error=%d)\n", iov_error); } #endif #ifdef DEV_NETMAP ixl_netmap_attach(vsi); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixl_attach: end"); return (0); err_late: if (vsi->ifp != NULL) if_free(vsi->ifp); err_mac_hmc: i40e_shutdown_lan_hmc(hw); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); ixl_free_vsi(vsi); IXL_PF_LOCK_DESTROY(pf); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixl_detach(device_t dev) { struct ixl_pf *pf = device_get_softc(dev); struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; i40e_status status; #ifdef PCI_IOV int error; #endif INIT_DEBUGOUT("ixl_detach: begin"); /* Make sure VLANS are not using driver */ if (vsi->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV error = pci_iov_detach(dev); if (error != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (error); } #endif ether_ifdetach(vsi->ifp); if (vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_stop(pf); IXL_PF_UNLOCK(pf); } for (int i = 0; i < vsi->num_queues; i++, que++) { if (que->tq) { taskqueue_drain(que->tq, &que->task); taskqueue_drain(que->tq, &que->tx_task); taskqueue_free(que->tq); } } /* Shutdown LAN HMC */ status = i40e_shutdown_lan_hmc(hw); if (status) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", status); /* Shutdown admin queue */ status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "Shutdown Admin queue failed with code %d\n", status); /* Unregister VLAN events */ if (vsi->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, vsi->vlan_attach); if (vsi->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, vsi->vlan_detach); callout_drain(&pf->timer); #ifdef DEV_NETMAP netmap_detach(vsi->ifp); #endif /* DEV_NETMAP */ ixl_free_pci_resources(pf); bus_generic_detach(dev); if_free(vsi->ifp); ixl_free_vsi(vsi); IXL_PF_LOCK_DESTROY(pf); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixl_shutdown(device_t dev) { struct ixl_pf *pf = device_get_softc(dev); IXL_PF_LOCK(pf); ixl_stop(pf); IXL_PF_UNLOCK(pf); return (0); } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ static int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error, len; u16 needed; bool again = TRUE; len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ error = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_DEVBUF); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { device_printf(dev, "capability discovery failed: %d\n", pf->hw.aq.asq_last_status); return (ENODEV); } /* Capture this PF's starting queue pair */ pf->qbase = hw->func_caps.base_queue; #ifdef IXL_DEBUG device_printf(dev,"pf_id=%d, num_vfs=%d, msix_pf=%d, " "msix_vf=%d, fd_g=%d, fd_b=%d, tx_qp=%d rx_qp=%d qbase=%d\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.fd_filters_guaranteed, hw->func_caps.fd_filters_best_effort, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, hw->func_caps.base_queue); #endif return (error); } static void ixl_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { device_t dev = vsi->dev; /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; device_printf(dev, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); device_printf(dev, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; device_printf(dev, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); device_printf(dev, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /********************************************************************* * Ioctl entry point * * ixl_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixl_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixl_init(pf); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; } else { IXL_PF_LOCK(pf); ifp->if_mtu = ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXL_PF_LOCK(pf); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ pf->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixl_set_promisc(vsi); } } else ixl_init_locked(pf); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixl_stop(pf); pf->if_flags = ifp->if_flags; IXL_PF_UNLOCK(pf); break; case SIOCADDMULTI: IOCTL_DEBUGOUT("ioctl: SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_intr(vsi); ixl_add_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOCDELMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_intr(vsi); ixl_del_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: #ifdef IFM_ETH_XTYPE case SIOCGIFXMEDIA: #endif IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &vsi->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); ixl_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } VLAN_CAPABILITIES(ifp); break; } default: IOCTL_DEBUGOUT("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void ixl_init_locked(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; struct i40e_filter_control_settings filter; u8 tmpaddr[ETHER_ADDR_LEN]; int ret; mtx_assert(&pf->pf_mtx, MA_OWNED); INIT_DEBUGOUT("ixl_init: begin"); ixl_stop(pf); /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, I40E_ETH_LENGTH_OF_ADDRESS); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, I40E_ETH_LENGTH_OF_ADDRESS); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address" "change failed!!\n"); return; } else { ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); } } /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; #ifdef IXL_FDIR filter.enable_fdir = TRUE; #endif if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "set_filter_control() failed\n"); /* Set up RSS */ ixl_config_rss(vsi); /* ** Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Add protocol filters to list */ ixl_init_filters(vsi); /* Setup vlan's if needed */ ixl_setup_vlan_filters(vsi); /* Start the local timer */ callout_reset(&pf->timer, hz, ixl_local_timer, pf); /* Set up MSI/X routing and the ITR settings */ if (ixl_enable_msix) { ixl_configure_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); ixl_reconfigure_filters(vsi); /* Set MTU in hardware*/ int aq_error = i40e_aq_set_mac_config(hw, vsi->max_frame_size, TRUE, 0, NULL); if (aq_error) device_printf(vsi->dev, "aq_set_mac_config in init error, code %d\n", aq_error); /* And now turn on interrupts */ ixl_enable_intr(vsi); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } static void ixl_init(void *arg) { struct ixl_pf *pf = arg; IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static void ixl_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ void ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; struct tx_ring *txr = &que->txr; u32 reg, icr0, mask; bool more_tx, more_rx; ++que->irqs; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; icr0 = rd32(hw, I40E_PFINT_ICR0); reg = rd32(hw, I40E_PFINT_DYN_CTL0); reg = reg | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK; wr32(hw, I40E_PFINT_DYN_CTL0, reg); mask = rd32(hw, I40E_PFINT_ICR0_ENA); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) taskqueue_enqueue(pf->tq, &pf->vflr_task); #endif if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { taskqueue_enqueue(pf->tq, &pf->adminq); return; } more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); /* re-enable other interrupt causes */ wr32(hw, I40E_PFINT_ICR0_ENA, mask); /* And now the queues */ reg = rd32(hw, I40E_QINT_RQCTL(0)); reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; wr32(hw, I40E_QINT_RQCTL(0), reg); reg = rd32(hw, I40E_QINT_TQCTL(0)); reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; reg &= ~I40E_PFINT_ICR0_INTEVENT_MASK; wr32(hw, I40E_QINT_TQCTL(0), reg); ixl_enable_legacy(hw); return; } /********************************************************************* * * MSIX VSI Interrupt Service routine * **********************************************************************/ void ixl_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Protect against spurious interrupts */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); ixl_set_queue_rx_itr(que); ixl_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * MSIX Admin Queue Interrupt Service routine * **********************************************************************/ static void ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; u32 reg, mask; ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ixl_handle_mdd_event(pf); mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; taskqueue_enqueue(pf->tq, &pf->vflr_task); } #endif reg = rd32(hw, I40E_PFINT_DYN_CTL0); reg = reg | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK; wr32(hw, I40E_PFINT_DYN_CTL0, reg); taskqueue_enqueue(pf->tq, &pf->adminq); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixl_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); IXL_PF_LOCK(pf); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { IXL_PF_UNLOCK(pf); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; #ifndef IFM_ETH_XTYPE case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_CX; break; case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_CX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_40GBASE_KR4: case I40E_PHY_TYPE_XLPPI: ifmr->ifm_active |= IFM_40G_SR4; break; #else case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: ifmr->ifm_active |= IFM_40G_XLPPI; break; #endif default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; IXL_PF_UNLOCK(pf); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixl_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(ifp, "Media change is currently not supported.\n"); return (ENODEV); } #ifdef IXL_FDIR /* ** ATR: Application Targetted Receive - creates a filter ** based on TX flow info that will keep the receive ** portion of the flow on the same queue. Based on the ** implementation this is only available for TCP connections */ void ixl_atr(struct ixl_queue *que, struct tcphdr *th, int etype) { struct ixl_vsi *vsi = que->vsi; struct tx_ring *txr = &que->txr; struct i40e_filter_program_desc *FDIR; u32 ptype, dtype; int idx; /* check if ATR is enabled and sample rate */ if ((!ixl_enable_fdir) || (!txr->atr_rate)) return; /* ** We sample all TCP SYN/FIN packets, ** or at the selected sample rate */ txr->atr_count++; if (((th->th_flags & (TH_FIN | TH_SYN)) == 0) && (txr->atr_count < txr->atr_rate)) return; txr->atr_count = 0; /* Get a descriptor to use */ idx = txr->next_avail; FDIR = (struct i40e_filter_program_desc *) &txr->base[idx]; if (++idx == que->num_desc) idx = 0; txr->avail--; txr->next_avail = idx; ptype = (que->me << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; ptype |= (etype == ETHERTYPE_IP) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); ptype |= vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; dtype = I40E_TX_DESC_DTYPE_FILTER_PROG; /* ** We use the TCP TH_FIN as a trigger to remove ** the filter, otherwise its an update. */ dtype |= (th->th_flags & TH_FIN) ? (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << I40E_TXD_FLTR_QW1_PCMD_SHIFT) : (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << I40E_TXD_FLTR_QW1_PCMD_SHIFT); dtype |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX << I40E_TXD_FLTR_QW1_DEST_SHIFT; dtype |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; FDIR->qindex_flex_ptype_vsi = htole32(ptype); FDIR->dtype_cmd_cntindex = htole32(dtype); return; } #endif static void ixl_set_promisc(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int err, mcnt = 0; bool uni = FALSE, multi = FALSE; if (ifp->if_flags & IFF_ALLMULTI) multi = TRUE; else { /* Need to count the multicast addresses */ struct ifmultiaddr *ifma; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_MULTICAST_ADDR) break; mcnt++; } if_maddr_runlock(ifp); } if (mcnt >= MAX_MULTICAST_ADDR) multi = TRUE; if (ifp->if_flags & IFF_PROMISC) uni = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return; } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ static void ixl_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0, flags; IOCTL_DEBUGOUT("ixl_add_multi: begin"); if_maddr_rlock(ifp); /* ** First just get a count, to decide if we ** we simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete existing MC filters */ ixl_del_hw_filters(vsi, mcnt); i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; ixl_add_mc_filter(vsi, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr)); mcnt++; } if_maddr_runlock(ifp); if (mcnt > 0) { flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } IOCTL_DEBUGOUT("ixl_add_multi: end"); return; } static void ixl_del_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct ifmultiaddr *ifma; struct ixl_mac_filter *f; int mcnt = 0; bool match = FALSE; IOCTL_DEBUGOUT("ixl_del_multi: begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, &vsi->ftl, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } if (match == FALSE) { f->flags |= IXL_FILTER_DEL; mcnt++; } } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_del_hw_filters(vsi, mcnt); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixl_local_timer(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; device_t dev = pf->dev; int hung = 0; u32 mask; mtx_assert(&pf->pf_mtx, MA_OWNED); /* Fire off the adminq task */ taskqueue_enqueue(pf->tq, &pf->adminq); /* Update stats */ ixl_update_stats_counters(pf); /* ** Check status of the queues */ mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK); for (int i = 0; i < vsi->num_queues; i++,que++) { /* Any queues with outstanding work get a sw irq */ if (que->busy) wr32(hw, I40E_PFINT_DYN_CTLN(que->me), mask); /* ** Each time txeof runs without cleaning, but there ** are uncleaned descriptors it increments busy. If ** we get to 5 we declare it hung. */ if (que->busy == IXL_QUEUE_HUNG) { ++hung; /* Mark the queue as inactive */ vsi->active_queues &= ~((u64)1 << que->me); continue; } else { /* Check if we've come back from hung */ if ((vsi->active_queues & ((u64)1 << que->me)) == 0) vsi->active_queues |= ((u64)1 << que->me); } if (que->busy >= IXL_MAX_TX_BUSY) { #ifdef IXL_DEBUG device_printf(dev,"Warning queue %d " "appears to be hung!\n", i); #endif que->busy = IXL_QUEUE_HUNG; ++hung; } } /* Only reinit if all queues show hung */ if (hung == vsi->num_queues) goto hung; callout_reset(&pf->timer, hz, ixl_local_timer, pf); return; hung: device_printf(dev, "Local Timer: HANG DETECT - Resetting!!\n"); ixl_init_locked(pf); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; if (pf->link_up){ if (vsi->link_active == FALSE) { pf->fc = hw->fc.current_mode; if (bootverbose) { device_printf(dev,"Link is up %d Gbps %s," " Flow Control: %s\n", ((pf->link_speed == I40E_LINK_SPEED_40GB)? 40:10), "Full Duplex", ixl_fc_string[pf->fc]); } vsi->link_active = TRUE; /* ** Warn user if link speed on NPAR enabled ** partition is not at least 10GB */ if (hw->func_caps.npar_enable && (hw->phy.link_info.link_speed == I40E_LINK_SPEED_1GB || hw->phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) device_printf(dev, "The partition detected" "link speed that is less than 10Gbps\n"); if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixl_stop(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; mtx_assert(&pf->pf_mtx, MA_OWNED); INIT_DEBUGOUT("ixl_stop: begin\n"); if (pf->num_vfs == 0) ixl_disable_intr(vsi); else ixl_disable_rings_intr(vsi); ixl_disable_rings(vsi); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* Stop the local timer */ callout_stop(&pf->timer); return; } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI * **********************************************************************/ static int ixl_assign_vsi_legacy(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; int error, rid = 0; if (pf->msix == 1) rid = 1; pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (pf->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: vsi legacy/msi interrupt\n"); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_intr, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "Failed to register legacy/msi handler"); return (error); } bus_describe_intr(dev, pf->res, pf->tag, "irq0"); TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(dev)); TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); #ifdef PCI_IOV TASK_INIT(&pf->vflr_task, 0, ixl_handle_vflr, pf); #endif pf->tq = taskqueue_create_fast("ixl_adm", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s adminq", device_get_nameunit(dev)); return (0); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI * **********************************************************************/ static int ixl_assign_vsi_msix(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 0; #ifdef RSS cpuset_t cpu_mask; #endif /* Admin Que is vector 0*/ rid = vector + 1; pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!pf->res) { device_printf(dev,"Unable to allocate" " bus resource: Adminq interrupt [%d]\n", rid); return (ENXIO); } /* Set the adminq vector and handler */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_adminq, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "Failed to register Admin que handler"); return (error); } bus_describe_intr(dev, pf->res, pf->tag, "aq"); pf->admvec = vector; /* Tasklet for Admin Queue */ TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); #ifdef PCI_IOV TASK_INIT(&pf->vflr_task, 0, ixl_handle_vflr, pf); #endif pf->tq = taskqueue_create_fast("ixl_adm", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s adminq", device_get_nameunit(pf->dev)); ++vector; /* Now set up the stations */ for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register que handler"); return (error); } bus_describe_intr(dev, que->res, que->tag, "q%d", i); /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif bus_bind_intr(dev, que->res, cpu_id); que->msix = vector; TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(dev)); #endif } return (0); } /* * Allocate MSI/X vectors */ static int ixl_init_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, want, vectors, queues, available; /* Override by tuneable */ if (ixl_enable_msix == 0) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { u16 pci_cmd_word; int msix_ctrl; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* First try MSI/X */ rid = PCIR_BAR(IXL_BAR); pf->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!pf->msix_mem) { /* May not be enabled */ device_printf(pf->dev, "Unable to map MSIX table \n"); goto msi; } available = pci_msix_count(dev); if (available == 0) { /* system has msix disabled */ bus_release_resource(dev, SYS_RES_MEMORY, rid, pf->msix_mem); pf->msix_mem = NULL; goto msi; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (available - 1)) ? (available - 1) : mp_ncpus; /* Override with hardcoded value if sane */ if ((ixl_max_queues != 0) && (ixl_max_queues <= queues)) queues = ixl_max_queues; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(pf->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); return (0); /* Will go to Legacy setup */ } if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(pf->dev, "Using MSIX interrupts with %d vectors\n", vectors); pf->msix = vectors; pf->vsi.num_queues = queues; #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif return (vectors); } msi: vectors = pci_msi_count(dev); pf->vsi.num_queues = 1; pf->msix = 1; ixl_max_queues = 1; ixl_enable_msix = 0; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) device_printf(pf->dev,"Using an MSI interrupt\n"); else { pf->msix = 0; device_printf(pf->dev,"Using a Legacy interrupt\n"); } return (vectors); } /* * Plumb MSI/X vectors */ static void ixl_configure_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x003E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); /* Next configure the queues */ for (int i = 0; i < vsi->num_queues; i++, vector++) { wr32(hw, I40E_PFINT_DYN_CTLN(i), i); wr32(hw, I40E_PFINT_LNKLSTN(i), i); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | ((i+1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); if (i == (vsi->num_queues - 1)) reg |= (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for MSI single vector operation */ static void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; wr32(hw, I40E_PFINT_ITR0(0), 0); wr32(hw, I40E_PFINT_ITR0(1), 0); /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_GPIO_MASK | I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* SW_ITR_IDX = 0, but don't change INTENA */ wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTLN_INTENA_MSK_MASK); /* SW_ITR_IDX = 0, OTHER_ITR_IDX = 0 */ wr32(hw, I40E_PFINT_STAT_CTL0, 0); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); /* Next enable the queue pair */ reg = rd32(hw, I40E_QTX_ENA(0)); reg |= I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(0), reg); reg = rd32(hw, I40E_QRX_ENA(0)); reg |= I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(0), reg); } /* * Set the Initial ITR state */ static void ixl_configure_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->rx_itr_setting = ixl_rx_itr; if (ixl_dynamic_rx_itr) vsi->rx_itr_setting |= IXL_ITR_DYNAMIC; vsi->tx_itr_setting = ixl_tx_itr; if (ixl_dynamic_tx_itr) vsi->tx_itr_setting |= IXL_ITR_DYNAMIC; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static int ixl_allocate_pci_resources(struct ixl_pf *pf) { int rid; device_t dev = pf->dev; rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev,"Unable to allocate bus resource: memory\n"); return (ENXIO); } pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ pf->msix = ixl_init_msix(pf); return (0); } static void ixl_free_pci_resources(struct ixl_pf * pf) { struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; device_t dev = pf->dev; int rid, memrid; memrid = PCIR_BAR(IXL_BAR); /* We may get here before stations are setup */ if ((!ixl_enable_msix) || (que == NULL)) goto early; /* ** Release all msix VSI resources: */ for (int i = 0; i < vsi->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } early: /* Clean the AdminQ interrupt last */ if (pf->admvec) /* we are doing MSIX */ rid = pf->admvec + 1; else (pf->msix != 0) ? (rid = 1):(rid = 0); if (pf->tag != NULL) { bus_teardown_intr(dev, pf->res, pf->tag); pf->tag = NULL; } if (pf->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, pf->res); if (pf->msix) pci_release_msi(dev); if (pf->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, pf->msix_mem); if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), pf->pci_mem); return; } static void ixl_add_ifmedia(struct ixl_vsi *vsi, u32 phy_type) { /* Display supported media types */ if (phy_type & (1 << I40E_PHY_TYPE_100BASE_TX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_SX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_LX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_XAUI) || phy_type & (1 << I40E_PHY_TYPE_XFI) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_CR4) || phy_type & (1 << I40E_PHY_TYPE_40GBASE_CR4_CU) || phy_type & (1 << I40E_PHY_TYPE_40GBASE_AOC) || phy_type & (1 << I40E_PHY_TYPE_XLAUI) || phy_type & (1 << I40E_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_SR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_LR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); #ifndef IFM_ETH_XTYPE if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_CX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1_CU) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_AOC) || phy_type & (1 << I40E_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); #else if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1_CU) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_AOC)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_20GBASE_KR2)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); #endif } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixl_setup_interface(device_t dev, struct ixl_vsi *vsi) { struct ifnet *ifp; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DEBUGOUT("ixl_setup_interface: begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_baudrate = IF_Gbps(40); ifp->if_init = ixl_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixl_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; /* VLAN capabilties */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&vsi->media, IFM_IMASK, ixl_media_change, ixl_media_status); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); return (0); } ixl_add_ifmedia(vsi, abilities.phy_type); /* Use autoselect media by default */ ifmedia_add(&vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vsi->media, IFM_ETHER | IFM_AUTO); ether_ifattach(ifp, hw->mac.addr); return (0); } /* ** Run when the Admin Queue gets a ** link transition interrupt. */ static void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; bool check; hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &check); pf->link_up = check; #ifdef IXL_DEBUG printf("Link is %s\n", check ? "up":"down"); #endif /* Report if Unqualified modules are found */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP))) device_printf(pf->dev, "Link failed because " "an unqualified module was detected\n"); return; } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ static int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = vsi->dev; struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev,"aq_get_switch_config failed (ret=%d)!!\n", ret); return (ret); } #ifdef IXL_DEBUG device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", sw_config->header.num_reported, sw_config->header.num_total); for (int i = 0; i < sw_config->header.num_reported; i++) { device_printf(dev, "%d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, sw_config->element[i].seid, sw_config->element[i].uplink_seid, sw_config->element[i].downlink_seid); } #endif /* Simplified due to a single VSI at the moment */ vsi->uplink_seid = sw_config->element[0].uplink_seid; vsi->downlink_seid = sw_config->element[0].downlink_seid; vsi->seid = sw_config->element[0].seid; return (ret); } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ static int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev,"get vsi params failed %x!!\n", err); return (err); } #ifdef IXL_DEBUG printf("get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); #endif /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; ctxt.info.queue_mapping[0] = 0; ctxt.info.tc_mapping[0] = 0x0800; /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (vsi->ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; /* Keep copy of VSI info in VSI for statistic counters */ memcpy(&vsi->info, &ctxt.info, sizeof(ctxt.info)); /* Reset VSI statistics */ ixl_vsi_reset_stats(vsi); vsi->hw_filters_add = 0; vsi->hw_filters_del = 0; ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev,"update vsi params failed %x!!\n", hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; struct i40e_hmc_obj_txq tctx; struct i40e_hmc_obj_rxq rctx; u32 txctl; u16 size; /* Setup the HMC TX Context */ size = que->num_desc * sizeof(struct i40e_tx_desc); memset(&tctx, 0, sizeof(struct i40e_hmc_obj_txq)); tctx.new_context = 1; tctx.base = (txr->dma.pa/IXL_TX_CTX_BASE_UNITS); tctx.qlen = que->num_desc; tctx.fc_ena = 0; tctx.rdylist = vsi->info.qs_handle[0]; /* index is TC */ /* Enable HEAD writeback */ tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->dma.pa + (que->num_desc * sizeof(struct i40e_tx_desc)); tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(que); /* Next setup the HMC RX Context */ if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (vsi->max_frame_size < max_rxmax) ? vsi->max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no HDR split initially */ rctx.base = (rxr->dma.pa/IXL_RX_CTX_BASE_UNITS); rctx.qlen = que->num_desc; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; rctx.tphhead_ena = 0; rctx.lrxqthresh = 2; rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; rctx.fc_ena = 0; rctx.prefena = 1; err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } err = ixl_init_rx_ring(que); if (err) { device_printf(dev, "Fail in init_rx_ring %d\n", i); break; } wr32(vsi->hw, I40E_QRX_TAIL(que->me), 0); #ifdef DEV_NETMAP /* preserve queue */ if (vsi->ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(vsi->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); wr32(vsi->hw, I40E_QRX_TAIL(que->me), t); } else #endif /* DEV_NETMAP */ wr32(vsi->hw, I40E_QRX_TAIL(que->me), que->num_desc - 1); } return (err); } /********************************************************************* * * Free all VSI structs. * **********************************************************************/ void ixl_free_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; /* Free station queues */ for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); /* Free VSI filter list */ ixl_free_mac_filters(vsi); } static void ixl_free_mac_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; while (!SLIST_EMPTY(&vsi->ftl)) { f = SLIST_FIRST(&vsi->ftl); SLIST_REMOVE_HEAD(&vsi->ftl, next); free(f, M_DEVBUF); } } /********************************************************************* * * Allocate memory for the VSI (virtual station interface) and their * associated queues, rings and the descriptors associated with each, * called only once at attach. * **********************************************************************/ static int ixl_setup_stations(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi; struct ixl_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize; int error = I40E_SUCCESS; vsi = &pf->vsi; vsi->back = (void *)pf; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->back = pf; /* Get memory for the station queues */ if (!(vsi->queues = (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto early; } for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; que->num_desc = ixl_ringsz; que->me = i; que->vsi = vsi; /* mark the queue as active */ vsi->active_queues |= (u64)1 << que->me; txr = &que->txr; txr->que = que; txr->tail = I40E_QTX_TAIL(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* Create the TX descriptor ring */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(&pf->hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } /* * Next the RX queues... */ rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr = &que->rxr; rxr->que = que; rxr->tail = I40E_QRX_TAIL(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(&pf->hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring*/ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } } return (0); fail: for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; rxr = &que->rxr; txr = &que->txr; if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); } early: return (error); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ static void ixl_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (ixl_dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = rx_itr & IXL_MAX_ITR; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = ixl_rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ static void ixl_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (ixl_dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = tx_itr & IXL_MAX_ITR; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = ixl_tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } #define QUEUE_NAME_LEN 32 static void ixl_add_vsi_sysctls(struct ixl_pf *pf, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(pf->dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats); } static void ixl_add_hw_stats(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *queues = vsi->queues; struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid_list *vsi_list; struct sysctl_oid *queue_node; struct sysctl_oid_list *queue_list; struct tx_ring *txr; struct rx_ring *rxr; char queue_namebuf[QUEUE_NAME_LEN]; /* Driver statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &pf->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQ Handled"); ixl_add_vsi_sysctls(pf, &pf->vsi, ctx, "pf"); vsi_list = SYSCTL_CHILDREN(pf->vsi.vsi_node); /* Queue statistics */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "dropped", CTLFLAG_RD, &(queues[q].dropped_pkts), "Driver dropped packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dma_setup", CTLFLAG_RD, &(queues[q].tx_dma_setup), "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); } /* MAC stats */ ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } static void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats) { struct ixl_sysctl_info ctls[] = { {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {ð_stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {ð_stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {ð_stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"}, {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {ð_stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {ð_stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } static void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } /* ** ixl_config_rss - setup RSS ** - note this is done for the single vsi */ static void ixl_config_rss(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; u32 lut = 0; u64 set_hena = 0, hena; int i, j, que_id; #ifdef RSS u32 rss_hash_config; u32 rss_seed[IXL_KEYSZ]; #else u32 rss_seed[IXL_KEYSZ] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1}; #endif #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #endif /* Fill out hash function seed */ for (i = 0; i < IXL_KEYSZ; i++) wr32(hw, I40E_PFQF_HKEY(i), rss_seed[i]); /* Enable PCTYPES for RSS: */ #ifdef RSS rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else set_hena = ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) | ((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD); #endif hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; wr32(hw, I40E_PFQF_HENA(0), (u32)hena); wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); /* Populate the LUT with max no. of queues in round robin fashion */ for (i = j = 0; i < pf->hw.func_caps.rss_table_size; i++, j++) { if (j == vsi->num_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = j; #endif /* lut = 4-byte sliding window of 4 lut entries */ lut = (lut << 8) | (que_id & ((0x1 << pf->hw.func_caps.rss_table_entry_width) - 1)); /* On i = 3, we have 4 entries in lut; write to the register */ if ((i & 3) == 3) wr32(hw, I40E_PFQF_HLUT(i >> 2), lut); } ixl_flush(hw); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixl_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixl_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine updates vlan filters, called by init ** it scans the filter table and then updates the hw ** after a soft reset. */ static void ixl_setup_vlan_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; int cnt = 0, flags; if (vsi->num_vlans == 0) return; /* ** Scan the filter list for vlan entries, ** mark them for addition and then call ** for the AQ update. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags & IXL_FILTER_VLAN) { f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); cnt++; } } if (cnt == 0) { printf("setup vlan: no filters found!\n"); return; } flags = IXL_FILTER_VLAN; flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); ixl_add_hw_filters(vsi, flags, cnt); return; } /* ** Initialize filter list and add filters that the hardware ** needs to know about. */ static void ixl_init_filters(struct ixl_vsi *vsi) { /* Add broadcast address */ ixl_add_filter(vsi, ixl_bcast_addr, IXL_VLAN_ANY); } /* ** This routine adds mulicast filters */ static void ixl_add_mc_filter(struct ixl_vsi *vsi, u8 *macaddr) { struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) return; f = ixl_get_filter(vsi); if (f == NULL) { printf("WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = IXL_VLAN_ANY; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); return; } static void ixl_reconfigure_filters(struct ixl_vsi *vsi) { ixl_add_hw_filters(vsi, IXL_FILTER_USED, vsi->num_macs); } /* ** This routine adds macvlan filters */ static void ixl_add_filter(struct ixl_vsi *vsi, u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; DEBUGOUT("ixl_add_filter: begin"); pf = vsi->back; dev = pf->dev; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, vlan); if (f != NULL) return; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { ixl_del_filter(vsi, macaddr, IXL_VLAN_ANY); ixl_add_filter(vsi, macaddr, 0); } } f = ixl_get_filter(vsi); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = vlan; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; ixl_add_hw_filters(vsi, f->flags, 1); return; } static void ixl_del_filter(struct ixl_vsi *vsi, u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; f = ixl_find_filter(vsi, macaddr, vlan); if (f == NULL) return; f->flags |= IXL_FILTER_DEL; ixl_del_hw_filters(vsi, 1); vsi->num_macs--; /* Check if this is the last vlan removal */ if (vlan != IXL_VLAN_ANY && vsi->num_vlans == 0) { /* Switch back to a non-vlan filter */ ixl_del_filter(vsi, macaddr, 0); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } return; } /* ** Find the filter with both matching mac addr and vlan id */ static struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *vsi, u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, &vsi->ftl, next) { if (!cmp_etheraddr(f->macaddr, macaddr)) continue; if (f->vlan == vlan) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ static void ixl_add_hw_filters(struct ixl_vsi *vsi, int flags, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; int err, j = 0; pf = vsi->back; dev = pf->dev; hw = &pf->hw; IXL_PF_LOCK_ASSERT(pf); a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } /* ** Scan the filter list, each time we find one ** we add it to the admin queue array and turn off ** the add bit. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags == flags) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { b->vlan_tag = f->vlan; b->flags = 0; } b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; f->flags &= ~IXL_FILTER_ADD; j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (err) device_printf(dev, "aq_add_macvlan err %d, " "aq_error %d\n", err, hw->aq.asq_last_status); else vsi->hw_filters_add += j; } free(a, M_DEVBUF); return; } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ static void ixl_del_hw_filters(struct ixl_vsi *vsi, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; int err, j = 0; DEBUGOUT("ixl_del_hw_filters: begin\n"); pf = vsi->back; hw = &pf->hw; dev = pf->dev; d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (d == NULL) { printf("del hw filter failed to get memory\n"); return; } SLIST_FOREACH_SAFE(f, &vsi->ftl, next, f_temp) { if (f->flags & IXL_FILTER_DEL) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->vlan_tag = (f->vlan == IXL_VLAN_ANY ? 0 : f->vlan); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; /* delete entry from vsi list */ SLIST_REMOVE(&vsi->ftl, f, ixl_mac_filter, next); free(f, M_DEVBUF); j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); /* NOTE: returns ENOENT every time but seems to work fine, so we'll ignore that specific error. */ // TODO: Does this still occur on current firmwares? if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) { int sc = 0; for (int i = 0; i < j; i++) sc += (!d[i].error_code); vsi->hw_filters_del += sc; device_printf(dev, "Failed to remove %d/%d filters, aq error %d\n", j - sc, j, hw->aq.asq_last_status); } else vsi->hw_filters_del += j; } free(d, M_DEVBUF); DEBUGOUT("ixl_del_hw_filters: end\n"); return; } static int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int index, error; u32 reg; error = 0; for (int i = 0; i < vsi->num_queues; i++) { index = vsi->first_queue + i; i40e_pre_tx_queue_cfg(hw, index, TRUE); reg = rd32(hw, I40E_QTX_ENA(index)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(index), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(index)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d disabled!\n", index); error = ETIMEDOUT; } reg = rd32(hw, I40E_QRX_ENA(index)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(index), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(index)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d disabled!\n", index); error = ETIMEDOUT; } } return (error); } static int ixl_disable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int index, error; u32 reg; error = 0; for (int i = 0; i < vsi->num_queues; i++) { index = vsi->first_queue + i; i40e_pre_tx_queue_cfg(hw, index, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(index)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(index), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(index)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", index); error = ETIMEDOUT; } reg = rd32(hw, I40E_QRX_ENA(index)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(index), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(index)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", index); error = ETIMEDOUT; } } return (error); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ static void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; bool mdd_detected = false; bool pf_mdd_detected = false; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event 0x%02x" " on TX queue %d pf number 0x%02x\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event 0x%02x" " on RX queue %d of function 0x%02x\n", event, queue, func); wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (mdd_detected) { reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); device_printf(dev, "MDD TX event is for this function 0x%08x", reg); pf_mdd_detected = true; } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); device_printf(dev, "MDD RX event is for this function 0x%08x", reg); pf_mdd_detected = true; } } /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } static void ixl_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; if (ixl_enable_msix) { ixl_enable_adminq(hw); for (int i = 0; i < vsi->num_queues; i++, que++) ixl_enable_queue(hw, que->me); } else ixl_enable_legacy(hw); } static void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) ixl_disable_queue(hw, que->me); } static void ixl_disable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; if (ixl_enable_msix) ixl_disable_adminq(hw); else ixl_disable_legacy(hw); } static void ixl_enable_adminq(struct i40e_hw *hw) { u32 reg; reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); return; } static void ixl_disable_adminq(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); return; } static void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } static void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); return; } static void ixl_enable_legacy(struct i40e_hw *hw) { u32 reg; reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } static void ixl_disable_legacy(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); return; } static void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } /* ** Tasklet handler for MSIX Adminq interrupts ** - do outside interrupt since it might sleep */ static void ixl_do_adminq(void *context, int pending) { struct ixl_pf *pf = context; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct i40e_arq_event_info event; i40e_status ret; u32 reg, loop = 0; u16 opcode, result; event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_DEVBUF, M_NOWAIT | M_ZERO); if (!event.msg_buf) { printf("Unable to allocate adminq memory\n"); return; } IXL_PF_LOCK(pf); /* clean and process any events */ do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; opcode = LE16_TO_CPU(event.desc.opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); ixl_update_link_status(pf); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; case i40e_aqc_opc_event_lan_overflow: break; default: #ifdef IXL_DEBUG printf("AdminQ unknown event %x\n", opcode); #endif break; } } while (result && (loop++ < IXL_ADM_LIMIT)); reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); free(event.msg_buf, M_DEVBUF); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt and go to sleep. */ if (result > 0) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } #ifdef IXL_DEBUG_SYSCTL static int ixl_debug_info(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf; int error, input = 0; error = sysctl_handle_int(oidp, &input, 0, req); if (error || !req->newptr) return (error); if (input == 1) { pf = (struct ixl_pf *)arg1; ixl_print_debug_info(pf); } return (error); } static void ixl_print_debug_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct rx_ring *rxr = &que->rxr; struct tx_ring *txr = &que->txr; u32 reg; printf("Queue irqs = %jx\n", (uintmax_t)que->irqs); printf("AdminQ irqs = %jx\n", (uintmax_t)pf->admin_irq); printf("RX next check = %x\n", rxr->next_check); printf("RX not ready = %jx\n", (uintmax_t)rxr->not_done); printf("RX packets = %jx\n", (uintmax_t)rxr->rx_packets); printf("TX desc avail = %x\n", txr->avail); reg = rd32(hw, I40E_GLV_GORCL(0xc)); printf("RX Bytes = %x\n", reg); reg = rd32(hw, I40E_GLPRT_GORCL(hw->port)); printf("Port RX Bytes = %x\n", reg); reg = rd32(hw, I40E_GLV_RDPC(0xc)); printf("RX discard = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RDPC(hw->port)); printf("Port RX discard = %x\n", reg); reg = rd32(hw, I40E_GLV_TEPC(0xc)); printf("TX errors = %x\n", reg); reg = rd32(hw, I40E_GLV_GOTCL(0xc)); printf("TX Bytes = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RUC(hw->port)); printf("RX undersize = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RFC(hw->port)); printf("RX fragments = %x\n", reg); reg = rd32(hw, I40E_GLPRT_ROC(hw->port)); printf("RX oversize = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RLEC(hw->port)); printf("RX length error = %x\n", reg); reg = rd32(hw, I40E_GLPRT_MRFC(hw->port)); printf("mac remote fault = %x\n", reg); reg = rd32(hw, I40E_GLPRT_MLFC(hw->port)); printf("mac local fault = %x\n", reg); } #endif /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; struct i40e_hw_port_stats *nsd; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; nsd = &pf->stats; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } static void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct ifnet *ifp; struct i40e_eth_stats *es; u64 tx_discards; struct i40e_hw_port_stats *nsd; pf = vsi->back; ifp = vsi->ifp; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; for (int i = 0; i < vsi->num_queues; i++) tx_discards += vsi->queues[i].txr.br->br_drops; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->rx_undersize + nsd->rx_oversize + nsd->rx_fragments + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ static void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 8 byte bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ static void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS) { /* * TODO: ensure flow control is disabled if * priority flow control is enabled * * TODO: ensure tx CRC by hardware should be enabled * if tx flow control is enabled. */ struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ error = sysctl_handle_int(oidp, &pf->fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->fc < 0 || pf->fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* ** Changing flow control mode currently does not work on ** 40GBASE-CR4 PHYs */ if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_40GBASE_CR4 || hw->phy.link_info.phy_type == I40E_PHY_TYPE_40GBASE_CR4_CU) { device_printf(dev, "Changing flow control mode unsupported" " on 40GBase-CR4 media.\n"); return (ENODEV); } /* Set fc ability for port */ hw->fc.requested_mode = pf->fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting new fc mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EAGAIN); } return (0); } static int ixl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0, index = 0; char *speeds[] = { "Unknown", "100M", "1G", "10G", "40G", "20G" }; ixl_update_link_status(pf); switch (hw->phy.link_info.link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } error = sysctl_handle_string(oidp, speeds[index], strlen(speeds[index]), req); return (error); } static int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EAGAIN); } /* Prepare new config */ bzero(&config, sizeof(config)); config.phy_type = abilities.phy_type; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; /* Translate into aq cmd link_speed */ if (speeds & 0x8) config.link_speed |= I40E_LINK_SPEED_20GB; if (speeds & 0x4) config.link_speed |= I40E_LINK_SPEED_10GB; if (speeds & 0x2) config.link_speed |= I40E_LINK_SPEED_1GB; if (speeds & 0x1) config.link_speed |= I40E_LINK_SPEED_100MB; /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EAGAIN); } /* ** This seems a bit heavy handed, but we ** need to get a reinit on some devices */ IXL_PF_LOCK(pf); ixl_stop(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); return (0); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** ** Does not work on 40G devices. */ static int ixl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_ls = 0; int error = 0; /* ** FW doesn't support changing advertised speed ** for 40G devices; speed is always 40G. */ if (i40e_is_40G_device(hw->device_id)) return (ENODEV); /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_ls < 0x1 || requested_ls > 0xE) { device_printf(dev, "Invalid advertised speed; " "valid modes are 0x1 through 0xE\n"); return (EINVAL); } /* Then check for validity based on adapter type */ switch (hw->device_id) { case I40E_DEV_ID_10G_BASE_T: if (requested_ls & 0x8) { device_printf(dev, "20Gbs speed not supported on this device.\n"); return (EINVAL); } break; case I40E_DEV_ID_20G_KR2: if (requested_ls & 0x1) { device_printf(dev, "100Mbs speed not supported on this device.\n"); return (EINVAL); } break; default: if (requested_ls & ~0x6) { device_printf(dev, "Only 1/10Gbs speeds are supported on this device.\n"); return (EINVAL); } break; } /* Exit if no change */ if (pf->advertised_speed == requested_ls) return (0); error = ixl_set_advertised_speeds(pf, requested_ls); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ static u16 ixl_get_bus_info(struct i40e_hw *hw, device_t dev) { u16 link; u32 offset; /* Get the PCI Express Capabilities offset */ pci_find_cap(dev, PCIY_EXPRESS, &offset); /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); switch (link & I40E_PCI_LINK_WIDTH) { case I40E_PCI_LINK_WIDTH_1: hw->bus.width = i40e_bus_width_pcie_x1; break; case I40E_PCI_LINK_WIDTH_2: hw->bus.width = i40e_bus_width_pcie_x2; break; case I40E_PCI_LINK_WIDTH_4: hw->bus.width = i40e_bus_width_pcie_x4; break; case I40E_PCI_LINK_WIDTH_8: hw->bus.width = i40e_bus_width_pcie_x8; break; default: hw->bus.width = i40e_bus_width_unknown; break; } switch (link & I40E_PCI_LINK_SPEED) { case I40E_PCI_LINK_SPEED_2500: hw->bus.speed = i40e_bus_speed_2500; break; case I40E_PCI_LINK_SPEED_5000: hw->bus.speed = i40e_bus_speed_5000; break; case I40E_PCI_LINK_SPEED_8000: hw->bus.speed = i40e_bus_speed_8000; break; default: hw->bus.speed = i40e_bus_speed_unknown; break; } device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); if ((hw->bus.width <= i40e_bus_width_pcie_x8) && (hw->bus.speed < i40e_bus_speed_8000)) { device_printf(dev, "PCI-Express bandwidth available" " for this device\n may be insufficient for" " optimal performance.\n"); device_printf(dev, "For expected performance a x8 " "PCIE Gen3 slot is required.\n"); } return (link); } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; char buf[32]; snprintf(buf, sizeof(buf), "f%d.%d a%d.%d n%02x.%02x e%08x", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.api_maj_ver, hw->aq.api_min_ver, (hw->nvm.version & IXL_NVM_VERSION_HI_MASK) >> IXL_NVM_VERSION_HI_SHIFT, (hw->nvm.version & IXL_NVM_VERSION_LO_MASK) >> IXL_NVM_VERSION_LO_SHIFT, hw->nvm.eetrack); return (sysctl_handle_string(oidp, buf, strlen(buf), req)); } #ifdef IXL_DEBUG_SYSCTL static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct i40e_link_status link_status; char buf[512]; enum i40e_status_code aq_error = 0; aq_error = i40e_aq_get_link_info(hw, TRUE, &link_status, NULL); if (aq_error) { printf("i40e_aq_get_link_info() error %d\n", aq_error); return (EPERM); } sprintf(buf, "\n" "PHY Type : %#04x\n" "Speed : %#04x\n" "Link info: %#04x\n" "AN info : %#04x\n" "Ext info : %#04x", link_status.phy_type, link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info); return (sysctl_handle_string(oidp, buf, strlen(buf), req)); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; char buf[512]; enum i40e_status_code aq_error = 0; struct i40e_aq_get_phy_abilities_resp abilities; aq_error = i40e_aq_get_phy_capabilities(hw, TRUE, FALSE, &abilities, NULL); if (aq_error) { printf("i40e_aq_get_phy_capabilities() error %d\n", aq_error); return (EPERM); } sprintf(buf, "\n" "PHY Type : %#010x\n" "Speed : %#04x\n" "Abilities: %#04x\n" "EEE cap : %#06x\n" "EEER reg : %#010x\n" "D3 Lpan : %#04x", abilities.phy_type, abilities.link_speed, abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan); return (sysctl_handle_string(oidp, buf, strlen(buf), req)); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; char *buf, *buf_i; int error = 0; int ftl_len = 0; int ftl_counter = 0; int buf_len = 0; int entry_len = 42; SLIST_FOREACH(f, &vsi->ftl, next) { ftl_len++; } if (ftl_len < 1) { sysctl_handle_string(oidp, "(none)", 6, req); return (0); } buf_len = sizeof(char) * (entry_len + 1) * ftl_len + 2; buf = buf_i = malloc(buf_len, M_DEVBUF, M_NOWAIT); sprintf(buf_i++, "\n"); SLIST_FOREACH(f, &vsi->ftl, next) { sprintf(buf_i, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); buf_i += entry_len; /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) { sprintf(buf_i, "\n"); buf_i++; } } error = sysctl_handle_string(oidp, buf, strlen(buf), req); if (error) printf("sysctl error: %d\n", error); free(buf, M_DEVBUF); return error; } #define IXL_SW_RES_SIZE 0x14 static int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 0, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); error = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (error) { device_printf(dev, "%s: get_switch_resource_alloc() error %d, aq error %d\n", __func__, error, hw->aq.asq_last_status); sbuf_delete(buf); return error; } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, "Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%#4x | %10d %5d %6d %12d", resp[i].resource_type, resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); sbuf_delete(buf); return (error); } /* ** Caller must init and delete sbuf; this function will clear and ** finish it for caller. */ static char * ixl_switch_element_string(struct sbuf *s, u16 seid, bool uplink) { sbuf_clear(s); if (seid == 0 && uplink) sbuf_cat(s, "Network"); else if (seid == 0) sbuf_cat(s, "Host"); else if (seid == 1) sbuf_cat(s, "EMP"); else if (seid <= 5) sbuf_printf(s, "MAC %d", seid - 2); else if (seid <= 15) sbuf_cat(s, "Reserved"); else if (seid <= 31) sbuf_printf(s, "PF %d", seid - 16); else if (seid <= 159) sbuf_printf(s, "VF %d", seid - 32); else if (seid <= 287) sbuf_cat(s, "Reserved"); else if (seid <= 511) sbuf_cat(s, "Other"); // for other structures else if (seid <= 895) sbuf_printf(s, "VSI %d", seid - 512); else if (seid <= 1023) sbuf_printf(s, "Reserved"); else sbuf_cat(s, "Invalid"); sbuf_finish(s); return sbuf_data(s); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; int error = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; u16 next = 0; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 0, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } error = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (error) { device_printf(dev, "%s: aq_get_switch_config() error %d, aq error %d\n", __func__, error, hw->aq.asq_last_status); sbuf_delete(buf); return error; } nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); // Assuming <= 255 elements in switch sbuf_printf(buf, "# of elements: %d\n", sw_config->header.num_reported); /* Exclude: ** Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Uplink | Downlink | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", sw_config->element[i].seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, sw_config->element[i].seid, false)); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8s", ixl_switch_element_string(nmbuf, sw_config->element[i].uplink_seid, true)); sbuf_cat(buf, " "); sbuf_printf(buf, "%8s", ixl_switch_element_string(nmbuf, sw_config->element[i].downlink_seid, false)); sbuf_cat(buf, " "); sbuf_printf(buf, "%#8x", sw_config->element[i].connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); sbuf_delete(buf); return (error); } #endif /* IXL_DEBUG_SYSCTL */ #ifdef PCI_IOV static int ixl_vf_alloc_vsi(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; struct ixl_vsi *vsi; struct i40e_vsi_context vsi_ctx; int i; uint16_t first_queue; enum i40e_status_code code; hw = &pf->hw; vsi = &pf->vsi; vsi_ctx.pf_num = hw->pf_id; vsi_ctx.uplink_seid = pf->veb_seid; vsi_ctx.connection_type = IXL_VSI_DATA_PORT; vsi_ctx.vf_num = hw->func_caps.vf_base_id + vf->vf_num; vsi_ctx.flags = I40E_AQ_VSI_TYPE_VF; bzero(&vsi_ctx.info, sizeof(vsi_ctx.info)); vsi_ctx.info.valid_sections = htole16(I40E_AQ_VSI_PROP_SWITCH_VALID); vsi_ctx.info.switch_id = htole16(0); vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_SECURITY_VALID); vsi_ctx.info.sec_flags = 0; if (vf->vf_flags & VF_FLAG_MAC_ANTI_SPOOF) vsi_ctx.info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi_ctx.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_NOTHING; vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID); vsi_ctx.info.mapping_flags = htole16(I40E_AQ_VSI_QUE_MAP_NONCONTIG); first_queue = vsi->num_queues + vf->vf_num * IXLV_MAX_QUEUES; for (i = 0; i < IXLV_MAX_QUEUES; i++) vsi_ctx.info.queue_mapping[i] = htole16(first_queue + i); for (; i < nitems(vsi_ctx.info.queue_mapping); i++) vsi_ctx.info.queue_mapping[i] = htole16(I40E_AQ_VSI_QUEUE_MASK); vsi_ctx.info.tc_mapping[0] = htole16( (0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (1 << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)); code = i40e_aq_add_vsi(hw, &vsi_ctx, NULL); if (code != I40E_SUCCESS) return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); vf->vsi.seid = vsi_ctx.seid; vf->vsi.vsi_num = vsi_ctx.vsi_number; vf->vsi.first_queue = first_queue; vf->vsi.num_queues = IXLV_MAX_QUEUES; code = i40e_aq_get_vsi_params(hw, &vsi_ctx, NULL); if (code != I40E_SUCCESS) return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); code = i40e_aq_config_vsi_bw_limit(hw, vf->vsi.seid, 0, 0, NULL); if (code != I40E_SUCCESS) { device_printf(pf->dev, "Failed to disable BW limit: %d\n", ixl_adminq_err_to_errno(hw->aq.asq_last_status)); return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); } memcpy(&vf->vsi.info, &vsi_ctx.info, sizeof(vf->vsi.info)); return (0); } static int ixl_vf_setup_vsi(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; int error; hw = &pf->hw; error = ixl_vf_alloc_vsi(pf, vf); if (error != 0) return (error); vf->vsi.hw_filters_add = 0; vf->vsi.hw_filters_del = 0; ixl_add_filter(&vf->vsi, ixl_bcast_addr, IXL_VLAN_ANY); ixl_reconfigure_filters(&vf->vsi); return (0); } static void ixl_vf_map_vsi_queue(struct i40e_hw *hw, struct ixl_vf *vf, int qnum, uint32_t val) { uint32_t qtable; int index, shift; /* * Two queues are mapped in a single register, so we have to do some * gymnastics to convert the queue number into a register index and * shift. */ index = qnum / 2; shift = (qnum % 2) * I40E_VSILAN_QTABLE_QINDEX_1_SHIFT; qtable = rd32(hw, I40E_VSILAN_QTABLE(index, vf->vsi.vsi_num)); qtable &= ~(I40E_VSILAN_QTABLE_QINDEX_0_MASK << shift); qtable |= val << shift; wr32(hw, I40E_VSILAN_QTABLE(index, vf->vsi.vsi_num), qtable); } static void ixl_vf_map_queues(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t qtable; int i; hw = &pf->hw; /* * Contiguous mappings aren't actually supported by the hardware, * so we have to use non-contiguous mappings. */ wr32(hw, I40E_VSILAN_QBASE(vf->vsi.vsi_num), I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); wr32(hw, I40E_VPLAN_MAPENA(vf->vf_num), I40E_VPLAN_MAPENA_TXRX_ENA_MASK); for (i = 0; i < vf->vsi.num_queues; i++) { qtable = (vf->vsi.first_queue + i) << I40E_VPLAN_QTABLE_QINDEX_SHIFT; wr32(hw, I40E_VPLAN_QTABLE(i, vf->vf_num), qtable); } /* Map queues allocated to VF to its VSI. */ for (i = 0; i < vf->vsi.num_queues; i++) ixl_vf_map_vsi_queue(hw, vf, i, vf->vsi.first_queue + i); /* Set rest of VSI queues as unused. */ for (; i < IXL_MAX_VSI_QUEUES; i++) ixl_vf_map_vsi_queue(hw, vf, i, I40E_VSILAN_QTABLE_QINDEX_0_MASK); ixl_flush(hw); } static void ixl_vf_vsi_release(struct ixl_pf *pf, struct ixl_vsi *vsi) { struct i40e_hw *hw; hw = &pf->hw; if (vsi->seid == 0) return; i40e_aq_delete_element(hw, vsi->seid, NULL); } static void ixl_vf_disable_queue_intr(struct i40e_hw *hw, uint32_t vfint_reg) { wr32(hw, vfint_reg, I40E_VFINT_DYN_CTLN_CLEARPBA_MASK); ixl_flush(hw); } static void ixl_vf_unregister_intr(struct i40e_hw *hw, uint32_t vpint_reg) { wr32(hw, vpint_reg, I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK | I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK); ixl_flush(hw); } static void ixl_vf_release_resources(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfint_reg, vpint_reg; int i; hw = &pf->hw; ixl_vf_vsi_release(pf, &vf->vsi); /* Index 0 has a special register. */ ixl_vf_disable_queue_intr(hw, I40E_VFINT_DYN_CTL0(vf->vf_num)); for (i = 1; i < hw->func_caps.num_msix_vectors_vf; i++) { vfint_reg = IXL_VFINT_DYN_CTLN_REG(hw, i , vf->vf_num); ixl_vf_disable_queue_intr(hw, vfint_reg); } /* Index 0 has a special register. */ ixl_vf_unregister_intr(hw, I40E_VPINT_LNKLST0(vf->vf_num)); for (i = 1; i < hw->func_caps.num_msix_vectors_vf; i++) { vpint_reg = IXL_VPINT_LNKLSTN_REG(hw, i, vf->vf_num); ixl_vf_unregister_intr(hw, vpint_reg); } vf->vsi.num_queues = 0; } static int ixl_flush_pcie(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; int i; uint16_t global_vf_num; uint32_t ciad; hw = &pf->hw; global_vf_num = hw->func_caps.vf_base_id + vf->vf_num; wr32(hw, I40E_PF_PCI_CIAA, IXL_PF_PCI_CIAA_VF_DEVICE_STATUS | (global_vf_num << I40E_PF_PCI_CIAA_VF_NUM_SHIFT)); for (i = 0; i < IXL_VF_RESET_TIMEOUT; i++) { ciad = rd32(hw, I40E_PF_PCI_CIAD); if ((ciad & IXL_PF_PCI_CIAD_VF_TRANS_PENDING_MASK) == 0) return (0); DELAY(1); } return (ETIMEDOUT); } static void ixl_reset_vf(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfrtrig; hw = &pf->hw; vfrtrig = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num)); vfrtrig |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num), vfrtrig); ixl_flush(hw); ixl_reinit_vf(pf, vf); } static void ixl_reinit_vf(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfrstat, vfrtrig; int i, error; hw = &pf->hw; error = ixl_flush_pcie(pf, vf); if (error != 0) device_printf(pf->dev, "Timed out waiting for PCIe activity to stop on VF-%d\n", vf->vf_num); for (i = 0; i < IXL_VF_RESET_TIMEOUT; i++) { DELAY(10); vfrstat = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_num)); if (vfrstat & I40E_VPGEN_VFRSTAT_VFRD_MASK) break; } if (i == IXL_VF_RESET_TIMEOUT) device_printf(pf->dev, "VF %d failed to reset\n", vf->vf_num); wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_num), I40E_VFR_COMPLETED); vfrtrig = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num)); vfrtrig &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num), vfrtrig); if (vf->vsi.seid != 0) ixl_disable_rings(&vf->vsi); ixl_vf_release_resources(pf, vf); ixl_vf_setup_vsi(pf, vf); ixl_vf_map_queues(pf, vf); wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_num), I40E_VFR_VFACTIVE); ixl_flush(hw); } static const char * ixl_vc_opcode_str(uint16_t op) { switch (op) { case I40E_VIRTCHNL_OP_VERSION: return ("VERSION"); case I40E_VIRTCHNL_OP_RESET_VF: return ("RESET_VF"); case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: return ("GET_VF_RESOURCES"); case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE: return ("CONFIG_TX_QUEUE"); case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE: return ("CONFIG_RX_QUEUE"); case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: return ("CONFIG_VSI_QUEUES"); case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: return ("CONFIG_IRQ_MAP"); case I40E_VIRTCHNL_OP_ENABLE_QUEUES: return ("ENABLE_QUEUES"); case I40E_VIRTCHNL_OP_DISABLE_QUEUES: return ("DISABLE_QUEUES"); case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: return ("ADD_ETHER_ADDRESS"); case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: return ("DEL_ETHER_ADDRESS"); case I40E_VIRTCHNL_OP_ADD_VLAN: return ("ADD_VLAN"); case I40E_VIRTCHNL_OP_DEL_VLAN: return ("DEL_VLAN"); case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: return ("CONFIG_PROMISCUOUS_MODE"); case I40E_VIRTCHNL_OP_GET_STATS: return ("GET_STATS"); case I40E_VIRTCHNL_OP_FCOE: return ("FCOE"); case I40E_VIRTCHNL_OP_EVENT: return ("EVENT"); default: return ("UNKNOWN"); } } static int ixl_vc_opcode_level(uint16_t opcode) { switch (opcode) { case I40E_VIRTCHNL_OP_GET_STATS: return (10); default: return (5); } } static void ixl_send_vf_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, void *msg, uint16_t len) { struct i40e_hw *hw; int global_vf_id; hw = &pf->hw; global_vf_id = hw->func_caps.vf_base_id + vf->vf_num; I40E_VC_DEBUG(pf, ixl_vc_opcode_level(op), "Sending msg (op=%s[%d], status=%d) to VF-%d\n", ixl_vc_opcode_str(op), op, status, vf->vf_num); i40e_aq_send_msg_to_vf(hw, global_vf_id, op, status, msg, len, NULL); } static void ixl_send_vf_ack(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op) { ixl_send_vf_msg(pf, vf, op, I40E_SUCCESS, NULL, 0); } static void ixl_send_vf_nack_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, const char *file, int line) { I40E_VC_DEBUG(pf, 1, "Sending NACK (op=%s[%d], err=%d) to VF-%d from %s:%d\n", ixl_vc_opcode_str(op), op, status, vf->vf_num, file, line); ixl_send_vf_msg(pf, vf, op, status, NULL, 0); } static void ixl_vf_version_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_version_info reply; if (msg_size != sizeof(struct i40e_virtchnl_version_info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_VERSION, I40E_ERR_PARAM); return; } reply.major = I40E_VIRTCHNL_VERSION_MAJOR; reply.minor = I40E_VIRTCHNL_VERSION_MINOR; ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_VERSION, I40E_SUCCESS, &reply, sizeof(reply)); } static void ixl_vf_reset_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { if (msg_size != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_RESET_VF, I40E_ERR_PARAM); return; } ixl_reset_vf(pf, vf); /* No response to a reset message. */ } static void ixl_vf_get_resources_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vf_resource reply; if (msg_size != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES, I40E_ERR_PARAM); return; } bzero(&reply, sizeof(reply)); reply.vf_offload_flags = I40E_VIRTCHNL_VF_OFFLOAD_L2; reply.num_vsis = 1; reply.num_queue_pairs = vf->vsi.num_queues; reply.max_vectors = pf->hw.func_caps.num_msix_vectors_vf; reply.vsi_res[0].vsi_id = vf->vsi.vsi_num; reply.vsi_res[0].vsi_type = I40E_VSI_SRIOV; reply.vsi_res[0].num_queue_pairs = vf->vsi.num_queues; memcpy(reply.vsi_res[0].default_mac_addr, vf->mac, ETHER_ADDR_LEN); ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES, I40E_SUCCESS, &reply, sizeof(reply)); } static int ixl_vf_config_tx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_txq_info *info) { struct i40e_hw *hw; struct i40e_hmc_obj_txq txq; uint16_t global_queue_num, global_vf_num; enum i40e_status_code status; uint32_t qtx_ctl; hw = &pf->hw; global_queue_num = vf->vsi.first_queue + info->queue_id; global_vf_num = hw->func_caps.vf_base_id + vf->vf_num; bzero(&txq, sizeof(txq)); status = i40e_clear_lan_tx_queue_context(hw, global_queue_num); if (status != I40E_SUCCESS) return (EINVAL); txq.base = info->dma_ring_addr / IXL_TX_CTX_BASE_UNITS; txq.head_wb_ena = info->headwb_enabled; txq.head_wb_addr = info->dma_headwb_addr; txq.qlen = info->ring_len; txq.rdylist = le16_to_cpu(vf->vsi.info.qs_handle[0]); txq.rdylist_act = 0; status = i40e_set_lan_tx_queue_context(hw, global_queue_num, &txq); if (status != I40E_SUCCESS) return (EINVAL); qtx_ctl = I40E_QTX_CTL_VF_QUEUE | (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) | (global_vf_num << I40E_QTX_CTL_VFVM_INDX_SHIFT); wr32(hw, I40E_QTX_CTL(global_queue_num), qtx_ctl); ixl_flush(hw); return (0); } static int ixl_vf_config_rx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_rxq_info *info) { struct i40e_hw *hw; struct i40e_hmc_obj_rxq rxq; uint16_t global_queue_num; enum i40e_status_code status; hw = &pf->hw; global_queue_num = vf->vsi.first_queue + info->queue_id; bzero(&rxq, sizeof(rxq)); if (info->databuffer_size > IXL_VF_MAX_BUFFER) return (EINVAL); if (info->max_pkt_size > IXL_VF_MAX_FRAME || info->max_pkt_size < ETHER_MIN_LEN) return (EINVAL); if (info->splithdr_enabled) { if (info->hdr_size > IXL_VF_MAX_HDR_BUFFER) return (EINVAL); rxq.hsplit_0 = info->rx_split_pos & (I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2 | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP); rxq.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT; rxq.dtype = 2; } status = i40e_clear_lan_rx_queue_context(hw, global_queue_num); if (status != I40E_SUCCESS) return (EINVAL); rxq.base = info->dma_ring_addr / IXL_RX_CTX_BASE_UNITS; rxq.qlen = info->ring_len; rxq.dbuff = info->databuffer_size >> I40E_RXQ_CTX_DBUFF_SHIFT; rxq.dsize = 1; rxq.crcstrip = 1; rxq.l2tsel = 1; rxq.rxmax = info->max_pkt_size; rxq.tphrdesc_ena = 1; rxq.tphwdesc_ena = 1; rxq.tphdata_ena = 1; rxq.tphhead_ena = 1; rxq.lrxqthresh = 2; rxq.prefena = 1; status = i40e_set_lan_rx_queue_context(hw, global_queue_num, &rxq); if (status != I40E_SUCCESS) return (EINVAL); return (0); } static void ixl_vf_config_vsi_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vsi_queue_config_info *info; struct i40e_virtchnl_queue_pair_info *pair; int i; if (msg_size < sizeof(*info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } info = msg; if (info->num_queue_pairs == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (msg_size != sizeof(*info) + info->num_queue_pairs * sizeof(*pair)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (info->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } for (i = 0; i < info->num_queue_pairs; i++) { pair = &info->qpair[i]; if (pair->txq.vsi_id != vf->vsi.vsi_num || pair->rxq.vsi_id != vf->vsi.vsi_num || pair->txq.queue_id != pair->rxq.queue_id || pair->txq.queue_id >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (ixl_vf_config_tx_queue(pf, vf, &pair->txq) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (ixl_vf_config_rx_queue(pf, vf, &pair->rxq) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES); } static void ixl_vf_set_qctl(struct ixl_pf *pf, const struct i40e_virtchnl_vector_map *vector, enum i40e_queue_type cur_type, uint16_t cur_queue, enum i40e_queue_type *last_type, uint16_t *last_queue) { uint32_t offset, qctl; uint16_t itr_indx; if (cur_type == I40E_QUEUE_TYPE_RX) { offset = I40E_QINT_RQCTL(cur_queue); itr_indx = vector->rxitr_idx; } else { offset = I40E_QINT_TQCTL(cur_queue); itr_indx = vector->txitr_idx; } qctl = htole32((vector->vector_id << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (*last_type << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | (*last_queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK | (itr_indx << I40E_QINT_RQCTL_ITR_INDX_SHIFT)); wr32(&pf->hw, offset, qctl); *last_type = cur_type; *last_queue = cur_queue; } static void ixl_vf_config_vector(struct ixl_pf *pf, struct ixl_vf *vf, const struct i40e_virtchnl_vector_map *vector) { struct i40e_hw *hw; u_int qindex; enum i40e_queue_type type, last_type; uint32_t lnklst_reg; uint16_t rxq_map, txq_map, cur_queue, last_queue; hw = &pf->hw; rxq_map = vector->rxq_map; txq_map = vector->txq_map; last_queue = IXL_END_OF_INTR_LNKLST; last_type = I40E_QUEUE_TYPE_RX; /* * The datasheet says to optimize performance, RX queues and TX queues * should be interleaved in the interrupt linked list, so we process * both at once here. */ while ((rxq_map != 0) || (txq_map != 0)) { if (txq_map != 0) { qindex = ffs(txq_map) - 1; type = I40E_QUEUE_TYPE_TX; cur_queue = vf->vsi.first_queue + qindex; ixl_vf_set_qctl(pf, vector, type, cur_queue, &last_type, &last_queue); txq_map &= ~(1 << qindex); } if (rxq_map != 0) { qindex = ffs(rxq_map) - 1; type = I40E_QUEUE_TYPE_RX; cur_queue = vf->vsi.first_queue + qindex; ixl_vf_set_qctl(pf, vector, type, cur_queue, &last_type, &last_queue); rxq_map &= ~(1 << qindex); } } if (vector->vector_id == 0) lnklst_reg = I40E_VPINT_LNKLST0(vf->vf_num); else lnklst_reg = IXL_VPINT_LNKLSTN_REG(hw, vector->vector_id, vf->vf_num); wr32(hw, lnklst_reg, (last_queue << I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT) | (last_type << I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT)); ixl_flush(hw); } static void ixl_vf_config_irq_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_irq_map_info *map; struct i40e_virtchnl_vector_map *vector; struct i40e_hw *hw; int i, largest_txq, largest_rxq; hw = &pf->hw; if (msg_size < sizeof(*map)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } map = msg; if (map->num_vectors == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } if (msg_size != sizeof(*map) + map->num_vectors * sizeof(*vector)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } for (i = 0; i < map->num_vectors; i++) { vector = &map->vecmap[i]; if ((vector->vector_id >= hw->func_caps.num_msix_vectors_vf) || vector->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } if (vector->rxq_map != 0) { largest_rxq = fls(vector->rxq_map) - 1; if (largest_rxq >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } } if (vector->txq_map != 0) { largest_txq = fls(vector->txq_map) - 1; if (largest_txq >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } } if (vector->rxitr_idx > IXL_MAX_ITR_IDX || vector->txitr_idx > IXL_MAX_ITR_IDX) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } ixl_vf_config_vector(pf, vf, vector); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP); } static void ixl_vf_enable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *select; int error; if (msg_size != sizeof(*select)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_PARAM); return; } select = msg; if (select->vsi_id != vf->vsi.vsi_num || select->rx_queues == 0 || select->tx_queues == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_PARAM); return; } error = ixl_enable_rings(&vf->vsi); if (error) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_TIMEOUT); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES); } static void ixl_vf_disable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *select; int error; if (msg_size != sizeof(*select)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_PARAM); return; } select = msg; if (select->vsi_id != vf->vsi.vsi_num || select->rx_queues == 0 || select->tx_queues == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_PARAM); return; } error = ixl_disable_rings(&vf->vsi); if (error) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_TIMEOUT); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES); } static boolean_t ixl_zero_mac(const uint8_t *addr) { uint8_t zero[ETHER_ADDR_LEN] = {0, 0, 0, 0, 0, 0}; return (cmp_etheraddr(addr, zero)); } static boolean_t ixl_bcast_mac(const uint8_t *addr) { return (cmp_etheraddr(addr, ixl_bcast_addr)); } static int ixl_vf_mac_valid(struct ixl_vf *vf, const uint8_t *addr) { if (ixl_zero_mac(addr) || ixl_bcast_mac(addr)) return (EINVAL); /* * If the VF is not allowed to change its MAC address, don't let it * set a MAC filter for an address that is not a multicast address and * is not its assigned MAC. */ if (!(vf->vf_flags & VF_FLAG_SET_MAC_CAP) && !(ETHER_IS_MULTICAST(addr) || cmp_etheraddr(addr, vf->mac))) return (EPERM); return (0); } static void ixl_vf_add_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_ether_addr_list *addr_list; struct i40e_virtchnl_ether_addr *addr; struct ixl_vsi *vsi; int i; size_t expected_size; vsi = &vf->vsi; if (msg_size < sizeof(*addr_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } addr_list = msg; expected_size = sizeof(*addr_list) + addr_list->num_elements * sizeof(*addr); if (addr_list->num_elements == 0 || addr_list->vsi_id != vsi->vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } for (i = 0; i < addr_list->num_elements; i++) { if (ixl_vf_mac_valid(vf, addr_list->list[i].addr) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; ixl_add_filter(vsi, addr->addr, IXL_VLAN_ANY); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS); } static void ixl_vf_del_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_ether_addr_list *addr_list; struct i40e_virtchnl_ether_addr *addr; size_t expected_size; int i; if (msg_size < sizeof(*addr_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } addr_list = msg; expected_size = sizeof(*addr_list) + addr_list->num_elements * sizeof(*addr); if (addr_list->num_elements == 0 || addr_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; if (ixl_zero_mac(addr->addr) || ixl_bcast_mac(addr->addr)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; ixl_del_filter(&vf->vsi, addr->addr, IXL_VLAN_ANY); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS); } static enum i40e_status_code ixl_vf_enable_vlan_strip(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_vsi_context vsi_ctx; vsi_ctx.seid = vf->vsi.seid; bzero(&vsi_ctx.info, sizeof(vsi_ctx.info)); vsi_ctx.info.valid_sections = htole16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi_ctx.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; return (i40e_aq_update_vsi_params(&pf->hw, &vsi_ctx, NULL)); } static void ixl_vf_add_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vlan_filter_list *filter_list; enum i40e_status_code code; size_t expected_size; int i; if (msg_size < sizeof(*filter_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } filter_list = msg; expected_size = sizeof(*filter_list) + filter_list->num_elements * sizeof(uint16_t); if (filter_list->num_elements == 0 || filter_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } if (!(vf->vf_flags & VF_FLAG_VLAN_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) { if (filter_list->vlan_id[i] > EVL_VLID_MASK) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } } code = ixl_vf_enable_vlan_strip(pf, vf); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); } for (i = 0; i < filter_list->num_elements; i++) ixl_add_filter(&vf->vsi, vf->mac, filter_list->vlan_id[i]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN); } static void ixl_vf_del_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vlan_filter_list *filter_list; int i; size_t expected_size; if (msg_size < sizeof(*filter_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN, I40E_ERR_PARAM); return; } filter_list = msg; expected_size = sizeof(*filter_list) + filter_list->num_elements * sizeof(uint16_t); if (filter_list->num_elements == 0 || filter_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) { if (filter_list->vlan_id[i] > EVL_VLID_MASK) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } } if (!(vf->vf_flags & VF_FLAG_VLAN_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) ixl_del_filter(&vf->vsi, vf->mac, filter_list->vlan_id[i]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN); } static void ixl_vf_config_promisc_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_promisc_info *info; enum i40e_status_code code; if (msg_size != sizeof(*info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } if (!vf->vf_flags & VF_FLAG_PROMISC_CAP) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } info = msg; if (info->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } code = i40e_aq_set_vsi_unicast_promiscuous(&pf->hw, info->vsi_id, info->flags & I40E_FLAG_VF_UNICAST_PROMISC, NULL); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, code); return; } code = i40e_aq_set_vsi_multicast_promiscuous(&pf->hw, info->vsi_id, info->flags & I40E_FLAG_VF_MULTICAST_PROMISC, NULL); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, code); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE); } static void ixl_vf_get_stats_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *queue; if (msg_size != sizeof(*queue)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_ERR_PARAM); return; } queue = msg; if (queue->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_ERR_PARAM); return; } ixl_update_eth_stats(&vf->vsi); ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_SUCCESS, &vf->vsi.eth_stats, sizeof(vf->vsi.eth_stats)); } static void ixl_handle_vf_msg(struct ixl_pf *pf, struct i40e_arq_event_info *event) { struct ixl_vf *vf; void *msg; uint16_t vf_num, msg_size; uint32_t opcode; vf_num = le16toh(event->desc.retval) - pf->hw.func_caps.vf_base_id; opcode = le32toh(event->desc.cookie_high); if (vf_num >= pf->num_vfs) { device_printf(pf->dev, "Got msg from illegal VF: %d\n", vf_num); return; } vf = &pf->vfs[vf_num]; msg = event->msg_buf; msg_size = event->msg_len; I40E_VC_DEBUG(pf, ixl_vc_opcode_level(opcode), "Got msg %s(%d) from VF-%d of size %d\n", ixl_vc_opcode_str(opcode), opcode, vf_num, msg_size); switch (opcode) { case I40E_VIRTCHNL_OP_VERSION: ixl_vf_version_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_RESET_VF: ixl_vf_reset_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: ixl_vf_get_resources_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: ixl_vf_config_vsi_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: ixl_vf_config_irq_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: ixl_vf_enable_queues_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: ixl_vf_disable_queues_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: ixl_vf_add_mac_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: ixl_vf_del_mac_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ADD_VLAN: ixl_vf_add_vlan_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DEL_VLAN: ixl_vf_del_vlan_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: ixl_vf_config_promisc_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_GET_STATS: ixl_vf_get_stats_msg(pf, vf, msg, msg_size); break; /* These two opcodes have been superseded by CONFIG_VSI_QUEUES. */ case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE: case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE: default: i40e_send_vf_nack(pf, vf, opcode, I40E_ERR_NOT_IMPLEMENTED); break; } } /* Handle any VFs that have reset themselves via a Function Level Reset(FLR). */ static void ixl_handle_vflr(void *arg, int pending) { struct ixl_pf *pf; struct i40e_hw *hw; uint16_t global_vf_num; uint32_t vflrstat_index, vflrstat_mask, vflrstat, icr0; int i; pf = arg; hw = &pf->hw; IXL_PF_LOCK(pf); for (i = 0; i < pf->num_vfs; i++) { global_vf_num = hw->func_caps.vf_base_id + i; vflrstat_index = IXL_GLGEN_VFLRSTAT_INDEX(global_vf_num); vflrstat_mask = IXL_GLGEN_VFLRSTAT_MASK(global_vf_num); vflrstat = rd32(hw, I40E_GLGEN_VFLRSTAT(vflrstat_index)); if (vflrstat & vflrstat_mask) { wr32(hw, I40E_GLGEN_VFLRSTAT(vflrstat_index), vflrstat_mask); ixl_reinit_vf(pf, &pf->vfs[i]); } } icr0 = rd32(hw, I40E_PFINT_ICR0_ENA); icr0 |= I40E_PFINT_ICR0_ENA_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, icr0); ixl_flush(hw); IXL_PF_UNLOCK(pf); } static int ixl_adminq_err_to_errno(enum i40e_admin_queue_err err) { switch (err) { case I40E_AQ_RC_EPERM: return (EPERM); case I40E_AQ_RC_ENOENT: return (ENOENT); case I40E_AQ_RC_ESRCH: return (ESRCH); case I40E_AQ_RC_EINTR: return (EINTR); case I40E_AQ_RC_EIO: return (EIO); case I40E_AQ_RC_ENXIO: return (ENXIO); case I40E_AQ_RC_E2BIG: return (E2BIG); case I40E_AQ_RC_EAGAIN: return (EAGAIN); case I40E_AQ_RC_ENOMEM: return (ENOMEM); case I40E_AQ_RC_EACCES: return (EACCES); case I40E_AQ_RC_EFAULT: return (EFAULT); case I40E_AQ_RC_EBUSY: return (EBUSY); case I40E_AQ_RC_EEXIST: return (EEXIST); case I40E_AQ_RC_EINVAL: return (EINVAL); case I40E_AQ_RC_ENOTTY: return (ENOTTY); case I40E_AQ_RC_ENOSPC: return (ENOSPC); case I40E_AQ_RC_ENOSYS: return (ENOSYS); case I40E_AQ_RC_ERANGE: return (ERANGE); case I40E_AQ_RC_EFLUSHED: return (EINVAL); /* No exact equivalent in errno.h */ case I40E_AQ_RC_BAD_ADDR: return (EFAULT); case I40E_AQ_RC_EMODE: return (EPERM); case I40E_AQ_RC_EFBIG: return (EFBIG); default: return (EINVAL); } } static int ixl_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *pf_vsi; enum i40e_status_code ret; int i, error; pf = device_get_softc(dev); hw = &pf->hw; pf_vsi = &pf->vsi; IXL_PF_LOCK(pf); pf->vfs = malloc(sizeof(struct ixl_vf) * num_vfs, M_IXL, M_NOWAIT | M_ZERO); if (pf->vfs == NULL) { error = ENOMEM; goto fail; } for (i = 0; i < num_vfs; i++) sysctl_ctx_init(&pf->vfs[i].ctx); ret = i40e_aq_add_veb(hw, pf_vsi->uplink_seid, pf_vsi->seid, 1, FALSE, FALSE, &pf->veb_seid, NULL); if (ret != I40E_SUCCESS) { error = ixl_adminq_err_to_errno(hw->aq.asq_last_status); device_printf(dev, "add_veb failed; code=%d error=%d", ret, error); goto fail; } ixl_configure_msix(pf); ixl_enable_adminq(hw); pf->num_vfs = num_vfs; IXL_PF_UNLOCK(pf); return (0); fail: free(pf->vfs, M_IXL); pf->vfs = NULL; IXL_PF_UNLOCK(pf); return (error); } static void ixl_iov_uninit(device_t dev) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; struct ifnet *ifp; struct ixl_vf *vfs; int i, num_vfs; pf = device_get_softc(dev); hw = &pf->hw; vsi = &pf->vsi; ifp = vsi->ifp; IXL_PF_LOCK(pf); for (i = 0; i < pf->num_vfs; i++) { if (pf->vfs[i].vsi.seid != 0) i40e_aq_delete_element(hw, pf->vfs[i].vsi.seid, NULL); } if (pf->veb_seid != 0) { i40e_aq_delete_element(hw, pf->veb_seid, NULL); pf->veb_seid = 0; } +#if __FreeBSD_version > 1100022 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) +#else + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) +#endif ixl_disable_intr(vsi); vfs = pf->vfs; num_vfs = pf->num_vfs; pf->vfs = NULL; pf->num_vfs = 0; IXL_PF_UNLOCK(pf); /* Do this after the unlock as sysctl_ctx_free might sleep. */ for (i = 0; i < num_vfs; i++) sysctl_ctx_free(&vfs[i].ctx); free(vfs, M_IXL); } static int ixl_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { char sysctl_name[QUEUE_NAME_LEN]; struct ixl_pf *pf; struct ixl_vf *vf; const void *mac; size_t size; int error; pf = device_get_softc(dev); vf = &pf->vfs[vfnum]; IXL_PF_LOCK(pf); vf->vf_num = vfnum; vf->vsi.back = pf; vf->vf_flags = VF_FLAG_ENABLED; SLIST_INIT(&vf->vsi.ftl); error = ixl_vf_setup_vsi(pf, vf); if (error != 0) goto out; if (nvlist_exists_binary(params, "mac-addr")) { mac = nvlist_get_binary(params, "mac-addr", &size); bcopy(mac, vf->mac, ETHER_ADDR_LEN); if (nvlist_get_bool(params, "allow-set-mac")) vf->vf_flags |= VF_FLAG_SET_MAC_CAP; } else /* * If the administrator has not specified a MAC address then * we must allow the VF to choose one. */ vf->vf_flags |= VF_FLAG_SET_MAC_CAP; if (nvlist_get_bool(params, "mac-anti-spoof")) vf->vf_flags |= VF_FLAG_MAC_ANTI_SPOOF; if (nvlist_get_bool(params, "allow-promisc")) vf->vf_flags |= VF_FLAG_PROMISC_CAP; vf->vf_flags |= VF_FLAG_VLAN_CAP; ixl_reset_vf(pf, vf); out: IXL_PF_UNLOCK(pf); if (error == 0) { snprintf(sysctl_name, sizeof(sysctl_name), "vf%d", vfnum); ixl_add_vsi_sysctls(pf, &vf->vsi, &vf->ctx, sysctl_name); } return (error); } #endif /* PCI_IOV */ diff --git a/sys/dev/ofw/ofw_bus_subr.c b/sys/dev/ofw/ofw_bus_subr.c index c286373940d4..60a341c538ee 100644 --- a/sys/dev/ofw/ofw_bus_subr.c +++ b/sys/dev/ofw/ofw_bus_subr.c @@ -1,753 +1,754 @@ /*- * Copyright (c) 2001 - 2003 by Thomas Moestl . * Copyright (c) 2005 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include "ofw_bus_if.h" int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *obd, phandle_t node) { if (obd == NULL) return (ENOMEM); /* The 'name' property is considered mandatory. */ if ((OF_getprop_alloc(node, "name", 1, (void **)&obd->obd_name)) == -1) return (EINVAL); OF_getprop_alloc(node, "compatible", 1, (void **)&obd->obd_compat); OF_getprop_alloc(node, "device_type", 1, (void **)&obd->obd_type); OF_getprop_alloc(node, "model", 1, (void **)&obd->obd_model); OF_getprop_alloc(node, "status", 1, (void **)&obd->obd_status); obd->obd_node = node; return (0); } void ofw_bus_gen_destroy_devinfo(struct ofw_bus_devinfo *obd) { if (obd == NULL) return; if (obd->obd_compat != NULL) free(obd->obd_compat, M_OFWPROP); if (obd->obd_model != NULL) free(obd->obd_model, M_OFWPROP); if (obd->obd_name != NULL) free(obd->obd_name, M_OFWPROP); if (obd->obd_type != NULL) free(obd->obd_type, M_OFWPROP); if (obd->obd_status != NULL) free(obd->obd_status, M_OFWPROP); } int ofw_bus_gen_child_pnpinfo_str(device_t cbdev, device_t child, char *buf, size_t buflen) { if (ofw_bus_get_name(child) != NULL) { strlcat(buf, "name=", buflen); strlcat(buf, ofw_bus_get_name(child), buflen); } if (ofw_bus_get_compat(child) != NULL) { strlcat(buf, " compat=", buflen); strlcat(buf, ofw_bus_get_compat(child), buflen); } return (0); }; const char * ofw_bus_gen_get_compat(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_compat); } const char * ofw_bus_gen_get_model(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_model); } const char * ofw_bus_gen_get_name(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_name); } phandle_t ofw_bus_gen_get_node(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (0); return (obd->obd_node); } const char * ofw_bus_gen_get_type(device_t bus, device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(bus, dev); if (obd == NULL) return (NULL); return (obd->obd_type); } const char * ofw_bus_get_status(device_t dev) { const struct ofw_bus_devinfo *obd; obd = OFW_BUS_GET_DEVINFO(device_get_parent(dev), dev); if (obd == NULL) return (NULL); return (obd->obd_status); } int ofw_bus_status_okay(device_t dev) { const char *status; status = ofw_bus_get_status(dev); if (status == NULL || strcmp(status, "okay") == 0 || strcmp(status, "ok") == 0) return (1); return (0); } static int ofw_bus_node_is_compatible(const char *compat, int len, const char *onecompat) { int onelen, l, ret; onelen = strlen(onecompat); ret = 0; while (len > 0) { if (strlen(compat) == onelen && strncasecmp(compat, onecompat, onelen) == 0) { /* Found it. */ ret = 1; break; } /* Slide to the next sub-string. */ l = strlen(compat) + 1; compat += l; len -= l; } return (ret); } int ofw_bus_is_compatible(device_t dev, const char *onecompat) { phandle_t node; const char *compat; int len; if ((compat = ofw_bus_get_compat(dev)) == NULL) return (0); if ((node = ofw_bus_get_node(dev)) == -1) return (0); /* Get total 'compatible' prop len */ if ((len = OF_getproplen(node, "compatible")) <= 0) return (0); return (ofw_bus_node_is_compatible(compat, len, onecompat)); } int ofw_bus_is_compatible_strict(device_t dev, const char *compatible) { const char *compat; size_t len; if ((compat = ofw_bus_get_compat(dev)) == NULL) return (0); len = strlen(compatible); if (strlen(compat) == len && strncasecmp(compat, compatible, len) == 0) return (1); return (0); } const struct ofw_compat_data * ofw_bus_search_compatible(device_t dev, const struct ofw_compat_data *compat) { if (compat == NULL) return NULL; for (; compat->ocd_str != NULL; ++compat) { if (ofw_bus_is_compatible(dev, compat->ocd_str)) break; } return (compat); } int ofw_bus_has_prop(device_t dev, const char *propname) { phandle_t node; if ((node = ofw_bus_get_node(dev)) == -1) return (0); return (OF_hasprop(node, propname)); } void ofw_bus_setup_iinfo(phandle_t node, struct ofw_bus_iinfo *ii, int intrsz) { pcell_t addrc; int msksz; if (OF_getencprop(node, "#address-cells", &addrc, sizeof(addrc)) == -1) addrc = 2; ii->opi_addrc = addrc * sizeof(pcell_t); ii->opi_imapsz = OF_getencprop_alloc(node, "interrupt-map", 1, (void **)&ii->opi_imap); if (ii->opi_imapsz > 0) { msksz = OF_getencprop_alloc(node, "interrupt-map-mask", 1, (void **)&ii->opi_imapmsk); /* * Failure to get the mask is ignored; a full mask is used * then. We barf on bad mask sizes, however. */ if (msksz != -1 && msksz != ii->opi_addrc + intrsz) panic("ofw_bus_setup_iinfo: bad interrupt-map-mask " "property!"); } } int ofw_bus_lookup_imap(phandle_t node, struct ofw_bus_iinfo *ii, void *reg, int regsz, void *pintr, int pintrsz, void *mintr, int mintrsz, phandle_t *iparent) { uint8_t maskbuf[regsz + pintrsz]; int rv; if (ii->opi_imapsz <= 0) return (0); KASSERT(regsz >= ii->opi_addrc, ("ofw_bus_lookup_imap: register size too small: %d < %d", regsz, ii->opi_addrc)); if (node != -1) { rv = OF_getencprop(node, "reg", reg, regsz); if (rv < regsz) panic("ofw_bus_lookup_imap: cannot get reg property"); } return (ofw_bus_search_intrmap(pintr, pintrsz, reg, ii->opi_addrc, ii->opi_imap, ii->opi_imapsz, ii->opi_imapmsk, maskbuf, mintr, mintrsz, iparent)); } /* * Map an interrupt using the firmware reg, interrupt-map and * interrupt-map-mask properties. * The interrupt property to be mapped must be of size intrsz, and pointed to * by intr. The regs property of the node for which the mapping is done must * be passed as regs. This property is an array of register specifications; * the size of the address part of such a specification must be passed as * physsz. Only the first element of the property is used. * imap and imapsz hold the interrupt mask and it's size. * imapmsk is a pointer to the interrupt-map-mask property, which must have * a size of physsz + intrsz; it may be NULL, in which case a full mask is * assumed. * maskbuf must point to a buffer of length physsz + intrsz. * The interrupt is returned in result, which must point to a buffer of length * rintrsz (which gives the expected size of the mapped interrupt). * Returns number of cells in the interrupt if a mapping was found, 0 otherwise. */ int ofw_bus_search_intrmap(void *intr, int intrsz, void *regs, int physsz, void *imap, int imapsz, void *imapmsk, void *maskbuf, void *result, int rintrsz, phandle_t *iparent) { phandle_t parent; uint8_t *ref = maskbuf; uint8_t *uiintr = intr; uint8_t *uiregs = regs; uint8_t *uiimapmsk = imapmsk; uint8_t *mptr; pcell_t paddrsz; pcell_t pintrsz; int i, rsz, tsz; rsz = -1; if (imapmsk != NULL) { for (i = 0; i < physsz; i++) ref[i] = uiregs[i] & uiimapmsk[i]; for (i = 0; i < intrsz; i++) ref[physsz + i] = uiintr[i] & uiimapmsk[physsz + i]; } else { bcopy(regs, ref, physsz); bcopy(intr, ref + physsz, intrsz); } mptr = imap; i = imapsz; paddrsz = 0; while (i > 0) { bcopy(mptr + physsz + intrsz, &parent, sizeof(parent)); -#ifdef OFW_EPAPR +#ifndef OFW_IMAP_NO_IPARENT_ADDR_CELLS /* - * Find if we need to read the parent address data. Sparc64 - * uses a different encoding that doesn't include this data. + * Find if we need to read the parent address data. + * CHRP-derived OF bindings, including ePAPR-compliant FDTs, + * use this as an optional part of the specifier. */ if (OF_getencprop(OF_node_from_xref(parent), "#address-cells", &paddrsz, sizeof(paddrsz)) == -1) paddrsz = 0; /* default */ paddrsz *= sizeof(pcell_t); #endif if (OF_searchencprop(OF_node_from_xref(parent), "#interrupt-cells", &pintrsz, sizeof(pintrsz)) == -1) pintrsz = 1; /* default */ pintrsz *= sizeof(pcell_t); /* Compute the map stride size. */ tsz = physsz + intrsz + sizeof(phandle_t) + paddrsz + pintrsz; KASSERT(i >= tsz, ("ofw_bus_search_intrmap: truncated map")); if (bcmp(ref, mptr, physsz + intrsz) == 0) { bcopy(mptr + physsz + intrsz + sizeof(parent) + paddrsz, result, MIN(rintrsz, pintrsz)); if (iparent != NULL) *iparent = parent; return (pintrsz/sizeof(pcell_t)); } mptr += tsz; i -= tsz; } return (0); } int ofw_bus_reg_to_rl(device_t dev, phandle_t node, pcell_t acells, pcell_t scells, struct resource_list *rl) { uint64_t phys, size; ssize_t i, j, rid, nreg, ret; uint32_t *reg; char *name; /* * This may be just redundant when having ofw_bus_devinfo * but makes this routine independent of it. */ ret = OF_getprop_alloc(node, "name", sizeof(*name), (void **)&name); if (ret == -1) name = NULL; ret = OF_getencprop_alloc(node, "reg", sizeof(*reg), (void **)®); nreg = (ret == -1) ? 0 : ret; if (nreg % (acells + scells) != 0) { if (bootverbose) device_printf(dev, "Malformed reg property on <%s>\n", (name == NULL) ? "unknown" : name); nreg = 0; } for (i = 0, rid = 0; i < nreg; i += acells + scells, rid++) { phys = size = 0; for (j = 0; j < acells; j++) { phys <<= 32; phys |= reg[i + j]; } for (j = 0; j < scells; j++) { size <<= 32; size |= reg[i + acells + j]; } /* Skip the dummy reg property of glue devices like ssm(4). */ if (size != 0) resource_list_add(rl, SYS_RES_MEMORY, rid, phys, phys + size - 1, size); } free(name, M_OFWPROP); free(reg, M_OFWPROP); return (0); } /* * Get interrupt parent for given node. * Returns 0 if interrupt parent doesn't exist. */ phandle_t ofw_bus_find_iparent(phandle_t node) { phandle_t iparent; if (OF_searchencprop(node, "interrupt-parent", &iparent, sizeof(iparent)) == -1) { for (iparent = node; iparent != 0; iparent = OF_parent(iparent)) { if (OF_hasprop(iparent, "interrupt-controller")) break; } iparent = OF_xref_from_node(iparent); } return (iparent); } int ofw_bus_intr_to_rl(device_t dev, phandle_t node, struct resource_list *rl, int *rlen) { phandle_t iparent; uint32_t icells, *intr; int err, i, irqnum, nintr, rid; boolean_t extended; nintr = OF_getencprop_alloc(node, "interrupts", sizeof(*intr), (void **)&intr); if (nintr > 0) { iparent = ofw_bus_find_iparent(node); if (iparent == 0) { device_printf(dev, "No interrupt-parent found, " "assuming direct parent\n"); iparent = OF_parent(node); iparent = OF_xref_from_node(iparent); } if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)) == -1) { device_printf(dev, "Missing #interrupt-cells " "property, assuming <1>\n"); icells = 1; } if (icells < 1 || icells > nintr) { device_printf(dev, "Invalid #interrupt-cells property " "value <%d>, assuming <1>\n", icells); icells = 1; } extended = false; } else { nintr = OF_getencprop_alloc(node, "interrupts-extended", sizeof(*intr), (void **)&intr); if (nintr <= 0) return (0); extended = true; } err = 0; rid = 0; for (i = 0; i < nintr; i += icells) { if (extended) { iparent = intr[i++]; if (OF_searchencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)) == -1) { device_printf(dev, "Missing #interrupt-cells " "property\n"); err = ENOENT; break; } if (icells < 1 || (i + icells) > nintr) { device_printf(dev, "Invalid #interrupt-cells " "property value <%d>\n", icells); err = ERANGE; break; } } irqnum = ofw_bus_map_intr(dev, iparent, icells, &intr[i]); resource_list_add(rl, SYS_RES_IRQ, rid++, irqnum, irqnum, 1); } if (rlen != NULL) *rlen = rid; free(intr, M_OFWPROP); return (err); } phandle_t ofw_bus_find_child(phandle_t start, const char *child_name) { char *name; int ret; phandle_t child; for (child = OF_child(start); child != 0; child = OF_peer(child)) { ret = OF_getprop_alloc(child, "name", sizeof(*name), (void **)&name); if (ret == -1) continue; if (strcmp(name, child_name) == 0) { free(name, M_OFWPROP); return (child); } free(name, M_OFWPROP); } return (0); } phandle_t ofw_bus_find_compatible(phandle_t node, const char *onecompat) { phandle_t child, ret; void *compat; int len; /* * Traverse all children of 'start' node, and find first with * matching 'compatible' property. */ for (child = OF_child(node); child != 0; child = OF_peer(child)) { len = OF_getprop_alloc(child, "compatible", 1, &compat); if (len >= 0) { ret = ofw_bus_node_is_compatible(compat, len, onecompat); free(compat, M_OFWPROP); if (ret != 0) return (child); } ret = ofw_bus_find_compatible(child, onecompat); if (ret != 0) return (ret); } return (0); } /** * @brief Return child of bus whose phandle is node * * A direct child of @p will be returned if it its phandle in the * OFW tree is @p node. Otherwise, NULL is returned. * * @param bus The bus to examine * @param node The phandle_t to look for. */ device_t ofw_bus_find_child_device_by_phandle(device_t bus, phandle_t node) { device_t *children, retval, child; int nkid, i; /* * Nothing can match the flag value for no node. */ if (node == -1) return (NULL); /* * Search the children for a match. We microoptimize * a bit by not using ofw_bus_get since we already know * the parent. We do not recurse. */ if (device_get_children(bus, &children, &nkid) != 0) return (NULL); retval = NULL; for (i = 0; i < nkid; i++) { child = children[i]; if (OFW_BUS_GET_NODE(bus, child) == node) { retval = child; break; } } free(children, M_TEMP); return (retval); } /* * Parse property that contain list of xrefs and values * (like standard "clocks" and "resets" properties) * Input arguments: * node - consumers device node * list_name - name of parsed list - "clocks" * cells_name - name of size property - "#clock-cells" * Output arguments: * producer - handle of producer * ncells - number of cells in result * cells - array of decoded cells */ int ofw_bus_parse_xref_list_alloc(phandle_t node, const char *list_name, const char *cells_name, int idx, phandle_t *producer, int *ncells, pcell_t **cells) { phandle_t pnode; phandle_t *elems; uint32_t pcells; int rv, i, j, nelems, cnt; elems = NULL; nelems = OF_getencprop_alloc(node, list_name, sizeof(*elems), (void **)&elems); if (nelems <= 0) return (ENOENT); rv = ENOENT; for (i = 0, cnt = 0; i < nelems; i += pcells, cnt++) { pnode = elems[i++]; if (OF_getencprop(OF_node_from_xref(pnode), cells_name, &pcells, sizeof(pcells)) == -1) { printf("Missing %s property\n", cells_name); rv = ENOENT; break; } if ((i + pcells) > nelems) { printf("Invalid %s property value <%d>\n", cells_name, pcells); rv = ERANGE; break; } if (cnt == idx) { *cells= malloc(pcells * sizeof(**cells), M_OFWPROP, M_WAITOK); *producer = pnode; *ncells = pcells; for (j = 0; j < pcells; j++) (*cells)[j] = elems[i + j]; rv = 0; break; } } if (elems != NULL) free(elems, M_OFWPROP); return (rv); } /* * Find index of string in string list property (case sensitive). */ int ofw_bus_find_string_index(phandle_t node, const char *list_name, const char *name, int *idx) { char *elems; int rv, i, cnt, nelems; elems = NULL; nelems = OF_getprop_alloc(node, list_name, 1, (void **)&elems); if (nelems <= 0) return (ENOENT); rv = ENOENT; for (i = 0, cnt = 0; i < nelems; cnt++) { if (strcmp(elems + i, name) == 0) { *idx = cnt; rv = 0; break; } i += strlen(elems + i) + 1; } if (elems != NULL) free(elems, M_OFWPROP); return (rv); } /* * Create zero terminated array of strings from string list property. */ int ofw_bus_string_list_to_array(phandle_t node, const char *list_name, const char ***array) { char *elems, *tptr; int i, cnt, nelems, len; elems = NULL; nelems = OF_getprop_alloc(node, list_name, 1, (void **)&elems); if (nelems <= 0) return (nelems); /* Count number of strings. */ for (i = 0, cnt = 0; i < nelems; cnt++) i += strlen(elems + i) + 1; /* Allocate space for arrays and all strings. */ *array = malloc((cnt + 1) * sizeof(char *) + nelems, M_OFWPROP, M_WAITOK); /* Get address of first string. */ tptr = (char *)(*array + cnt); /* Copy strings. */ memcpy(tptr, elems, nelems); free(elems, M_OFWPROP); /* Fill string pointers. */ for (i = 0, cnt = 0; i < nelems; cnt++) { len = strlen(tptr + i) + 1; *array[cnt] = tptr; i += len; tptr += len; } *array[cnt] = 0; return (cnt); } diff --git a/sys/dev/rt/if_rt.c b/sys/dev/rt/if_rt.c index 4deb8deccfdb..3d3a3f35623a 100644 --- a/sys/dev/rt/if_rt.c +++ b/sys/dev/rt/if_rt.c @@ -1,2834 +1,2814 @@ /*- * Copyright (c) 2015, Stanislav Galabov * Copyright (c) 2014, Aleksandr A. Mityaev * Copyright (c) 2011, Aleksandr Rybalko * based on hard work * by Alexander Egorenkov * and by Damien Bergamini * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "if_rtvar.h" #include "if_rtreg.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_platform.h" #include "opt_rt305x.h" #ifdef FDT #include #include #include #endif #include #include #include #include #ifdef IF_RT_PHY_SUPPORT #include "miibus_if.h" #endif /* * Defines and macros */ #define RT_MAX_AGG_SIZE 3840 #define RT_TX_DATA_SEG0_SIZE MJUMPAGESIZE #define RT_MS(_v, _f) (((_v) & _f) >> _f##_S) #define RT_SM(_v, _f) (((_v) << _f##_S) & _f) #define RT_TX_WATCHDOG_TIMEOUT 5 #define RT_CHIPID_RT3050 0x3050 #define RT_CHIPID_RT3052 0x3052 #define RT_CHIPID_RT5350 0x5350 #define RT_CHIPID_RT6855 0x6855 #define RT_CHIPID_MT7620 0x7620 #ifdef FDT /* more specific and new models should go first */ static const struct ofw_compat_data rt_compat_data[] = { { "ralink,rt6855-eth", (uintptr_t)RT_CHIPID_RT6855 }, { "ralink,rt5350-eth", (uintptr_t)RT_CHIPID_RT5350 }, { "ralink,rt3052-eth", (uintptr_t)RT_CHIPID_RT3052 }, { "ralink,rt305x-eth", (uintptr_t)RT_CHIPID_RT3050 }, { NULL, (uintptr_t)NULL } }; #endif /* * Static function prototypes */ static int rt_probe(device_t dev); static int rt_attach(device_t dev); static int rt_detach(device_t dev); static int rt_shutdown(device_t dev); static int rt_suspend(device_t dev); static int rt_resume(device_t dev); static void rt_init_locked(void *priv); static void rt_init(void *priv); static void rt_stop_locked(void *priv); static void rt_stop(void *priv); static void rt_start(struct ifnet *ifp); static int rt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static void rt_periodic(void *arg); static void rt_tx_watchdog(void *arg); static void rt_intr(void *arg); static void rt_rt5350_intr(void *arg); static void rt_tx_coherent_intr(struct rt_softc *sc); static void rt_rx_coherent_intr(struct rt_softc *sc); static void rt_rx_delay_intr(struct rt_softc *sc); static void rt_tx_delay_intr(struct rt_softc *sc); static void rt_rx_intr(struct rt_softc *sc, int qid); static void rt_tx_intr(struct rt_softc *sc, int qid); static void rt_rx_done_task(void *context, int pending); static void rt_tx_done_task(void *context, int pending); static void rt_periodic_task(void *context, int pending); static int rt_rx_eof(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int limit); static void rt_tx_eof(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_update_stats(struct rt_softc *sc); static void rt_watchdog(struct rt_softc *sc); static void rt_update_raw_counters(struct rt_softc *sc); static void rt_intr_enable(struct rt_softc *sc, uint32_t intr_mask); static void rt_intr_disable(struct rt_softc *sc, uint32_t intr_mask); static int rt_txrx_enable(struct rt_softc *sc); static int rt_alloc_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int qid); static void rt_reset_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static void rt_free_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring); static int rt_alloc_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring, int qid); static void rt_reset_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_free_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring); static void rt_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void rt_sysctl_attach(struct rt_softc *sc); #ifdef IF_RT_PHY_SUPPORT void rt_miibus_statchg(device_t); static int rt_miibus_readreg(device_t, int, int); static int rt_miibus_writereg(device_t, int, int, int); #endif static int rt_ifmedia_upd(struct ifnet *); static void rt_ifmedia_sts(struct ifnet *, struct ifmediareq *); static SYSCTL_NODE(_hw, OID_AUTO, rt, CTLFLAG_RD, 0, "RT driver parameters"); #ifdef IF_RT_DEBUG static int rt_debug = 0; SYSCTL_INT(_hw_rt, OID_AUTO, debug, CTLFLAG_RWTUN, &rt_debug, 0, "RT debug level"); #endif static int rt_probe(device_t dev) { struct rt_softc *sc = device_get_softc(dev); char buf[80]; #ifdef FDT const struct ofw_compat_data * cd; cd = ofw_bus_search_compatible(dev, rt_compat_data); if (cd->ocd_data == (uintptr_t)NULL) return (ENXIO); sc->rt_chipid = (unsigned int)(cd->ocd_data); #else #if defined(MT7620) sc->rt_chipid = RT_CHIPID_MT7620; #elif defined(RT5350) sc->rt_chipid = RT_CHIPID_RT5350; #else sc->rt_chipid = RT_CHIPID_RT3050; #endif #endif snprintf(buf, sizeof(buf), "Ralink RT%x onChip Ethernet driver", sc->rt_chipid); device_set_desc_copy(dev, buf); return (BUS_PROBE_GENERIC); } /* * macaddr_atoi - translate string MAC address to uint8_t array */ static int macaddr_atoi(const char *str, uint8_t *mac) { int count, i; unsigned int amac[ETHER_ADDR_LEN]; /* Aligned version */ count = sscanf(str, "%x%*c%x%*c%x%*c%x%*c%x%*c%x", &amac[0], &amac[1], &amac[2], &amac[3], &amac[4], &amac[5]); if (count < ETHER_ADDR_LEN) { memset(mac, 0, ETHER_ADDR_LEN); return (1); } /* Copy aligned to result */ for (i = 0; i < ETHER_ADDR_LEN; i ++) mac[i] = (amac[i] & 0xff); return (0); } #ifdef USE_GENERATED_MAC_ADDRESS -static char * -kernenv_next(char *cp) -{ - - if (cp != NULL) { - while (*cp != 0) - cp++; - cp++; - if (*cp == 0) - cp = NULL; - } - return (cp); -} - /* * generate_mac(uin8_t *mac) * This is MAC address generator for cases when real device MAC address * unknown or not yet accessible. * Use 'b','s','d' signature and 3 octets from CRC32 on kenv. * MAC = 'b', 's', 'd', CRC[3]^CRC[2], CRC[1], CRC[0] * * Output - MAC address, that do not change between reboots, if hints or * bootloader info unchange. */ static void generate_mac(uint8_t *mac) { unsigned char *cp; int i = 0; uint32_t crc = 0xffffffff; /* Generate CRC32 on kenv */ - if (dynamic_kenv) { - for (cp = kenvp[0]; cp != NULL; cp = kenvp[++i]) { - crc = calculate_crc32c(crc, cp, strlen(cp) + 1); - } - } else { - for (cp = kern_envp; cp != NULL; cp = kernenv_next(cp)) { - crc = calculate_crc32c(crc, cp, strlen(cp) + 1); - } + for (cp = kenvp[0]; cp != NULL; cp = kenvp[++i]) { + crc = calculate_crc32c(crc, cp, strlen(cp) + 1); } crc = ~crc; mac[0] = 'b'; mac[1] = 's'; mac[2] = 'd'; mac[3] = (crc >> 24) ^ ((crc >> 16) & 0xff); mac[4] = (crc >> 8) & 0xff; mac[5] = crc & 0xff; } #endif /* * ether_request_mac - try to find usable MAC address. */ static int ether_request_mac(device_t dev, uint8_t *mac) { char *var; /* * "ethaddr" is passed via envp on RedBoot platforms * "kmac" is passed via argv on RouterBOOT platforms */ #if defined(RT305X_UBOOT) || defined(__REDBOOT__) || defined(__ROUTERBOOT__) if ((var = kern_getenv("ethaddr")) != NULL || (var = kern_getenv("kmac")) != NULL ) { if(!macaddr_atoi(var, mac)) { printf("%s: use %s macaddr from KENV\n", device_get_nameunit(dev), var); freeenv(var); return (0); } freeenv(var); } #endif /* * Try from hints * hint.[dev].[unit].macaddr */ if (!resource_string_value(device_get_name(dev), device_get_unit(dev), "macaddr", (const char **)&var)) { if(!macaddr_atoi(var, mac)) { printf("%s: use %s macaddr from hints\n", device_get_nameunit(dev), var); return (0); } } #ifdef USE_GENERATED_MAC_ADDRESS generate_mac(mac); device_printf(dev, "use generated %02x:%02x:%02x:%02x:%02x:%02x " "macaddr\n", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); #else /* Hardcoded */ mac[0] = 0x00; mac[1] = 0x18; mac[2] = 0xe7; mac[3] = 0xd5; mac[4] = 0x83; mac[5] = 0x90; device_printf(dev, "use hardcoded 00:18:e7:d5:83:90 macaddr\n"); #endif return (0); } /* * Reset hardware */ static void reset_freng(struct rt_softc *sc) { /* XXX hard reset kills everything so skip it ... */ return; } static int rt_attach(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; int error, i; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->lock, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE); sc->mem_rid = 0; sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, RF_ACTIVE); if (sc->mem == NULL) { device_printf(dev, "could not allocate memory resource\n"); error = ENXIO; goto fail; } sc->bst = rman_get_bustag(sc->mem); sc->bsh = rman_get_bushandle(sc->mem); sc->irq_rid = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid, RF_ACTIVE); if (sc->irq == NULL) { device_printf(dev, "could not allocate interrupt resource\n"); error = ENXIO; goto fail; } #ifdef IF_RT_DEBUG sc->debug = rt_debug; SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "rt debug level"); #endif /* Reset hardware */ reset_freng(sc); /* Fill in soc-specific registers map */ switch(sc->rt_chipid) { case RT_CHIPID_MT7620: case RT_CHIPID_RT5350: device_printf(dev, "RT%x Ethernet MAC (rev 0x%08x)\n", sc->rt_chipid, sc->mac_rev); /* RT5350: No GDMA, PSE, CDMA, PPE */ RT_WRITE(sc, GE_PORT_BASE + 0x0C00, // UDPCS, TCPCS, IPCS=1 RT_READ(sc, GE_PORT_BASE + 0x0C00) | (0x7<<16)); sc->delay_int_cfg=RT5350_PDMA_BASE+RT5350_DELAY_INT_CFG; sc->fe_int_status=RT5350_FE_INT_STATUS; sc->fe_int_enable=RT5350_FE_INT_ENABLE; sc->pdma_glo_cfg=RT5350_PDMA_BASE+RT5350_PDMA_GLO_CFG; sc->pdma_rst_idx=RT5350_PDMA_BASE+RT5350_PDMA_RST_IDX; for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { sc->tx_base_ptr[i]=RT5350_PDMA_BASE+RT5350_TX_BASE_PTR(i); sc->tx_max_cnt[i]=RT5350_PDMA_BASE+RT5350_TX_MAX_CNT(i); sc->tx_ctx_idx[i]=RT5350_PDMA_BASE+RT5350_TX_CTX_IDX(i); sc->tx_dtx_idx[i]=RT5350_PDMA_BASE+RT5350_TX_DTX_IDX(i); } sc->rx_ring_count=2; sc->rx_base_ptr[0]=RT5350_PDMA_BASE+RT5350_RX_BASE_PTR0; sc->rx_max_cnt[0]=RT5350_PDMA_BASE+RT5350_RX_MAX_CNT0; sc->rx_calc_idx[0]=RT5350_PDMA_BASE+RT5350_RX_CALC_IDX0; sc->rx_drx_idx[0]=RT5350_PDMA_BASE+RT5350_RX_DRX_IDX0; sc->rx_base_ptr[1]=RT5350_PDMA_BASE+RT5350_RX_BASE_PTR1; sc->rx_max_cnt[1]=RT5350_PDMA_BASE+RT5350_RX_MAX_CNT1; sc->rx_calc_idx[1]=RT5350_PDMA_BASE+RT5350_RX_CALC_IDX1; sc->rx_drx_idx[1]=RT5350_PDMA_BASE+RT5350_RX_DRX_IDX1; sc->int_rx_done_mask=RT5350_INT_RXQ0_DONE; sc->int_tx_done_mask=RT5350_INT_TXQ0_DONE; break; case RT_CHIPID_RT6855: device_printf(dev, "RT6855 Ethernet MAC (rev 0x%08x)\n", sc->mac_rev); break; default: device_printf(dev, "RT305XF Ethernet MAC (rev 0x%08x)\n", sc->mac_rev); RT_WRITE(sc, GDMA1_BASE + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* fwd UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* fwd BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* fwd MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* fwd Other to CPU */ )); sc->delay_int_cfg=PDMA_BASE+DELAY_INT_CFG; sc->fe_int_status=GE_PORT_BASE+FE_INT_STATUS; sc->fe_int_enable=GE_PORT_BASE+FE_INT_ENABLE; sc->pdma_glo_cfg=PDMA_BASE+PDMA_GLO_CFG; sc->pdma_glo_cfg=PDMA_BASE+PDMA_GLO_CFG; sc->pdma_rst_idx=PDMA_BASE+PDMA_RST_IDX; for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { sc->tx_base_ptr[i]=PDMA_BASE+TX_BASE_PTR(i); sc->tx_max_cnt[i]=PDMA_BASE+TX_MAX_CNT(i); sc->tx_ctx_idx[i]=PDMA_BASE+TX_CTX_IDX(i); sc->tx_dtx_idx[i]=PDMA_BASE+TX_DTX_IDX(i); } sc->rx_ring_count=1; sc->rx_base_ptr[0]=PDMA_BASE+RX_BASE_PTR0; sc->rx_max_cnt[0]=PDMA_BASE+RX_MAX_CNT0; sc->rx_calc_idx[0]=PDMA_BASE+RX_CALC_IDX0; sc->rx_drx_idx[0]=PDMA_BASE+RX_DRX_IDX0; sc->int_rx_done_mask=INT_RX_DONE; sc->int_tx_done_mask=INT_TXQ0_DONE; }; /* allocate Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { error = rt_alloc_tx_ring(sc, &sc->tx_ring[i], i); if (error != 0) { device_printf(dev, "could not allocate Tx ring #%d\n", i); goto fail; } } sc->tx_ring_mgtqid = 5; for (i = 0; i < sc->rx_ring_count; i++) { error = rt_alloc_rx_ring(sc, &sc->rx_ring[i], i); if (error != 0) { device_printf(dev, "could not allocate Rx ring\n"); goto fail; } } callout_init(&sc->periodic_ch, 0); callout_init_mtx(&sc->tx_watchdog_ch, &sc->lock, 0); ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "could not if_alloc()\n"); error = ENOMEM; goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = rt_init; ifp->if_ioctl = rt_ioctl; ifp->if_start = rt_start; #define RT_TX_QLEN 256 IFQ_SET_MAXLEN(&ifp->if_snd, RT_TX_QLEN); ifp->if_snd.ifq_drv_maxlen = RT_TX_QLEN; IFQ_SET_READY(&ifp->if_snd); #ifdef IF_RT_PHY_SUPPORT error = mii_attach(dev, &sc->rt_miibus, ifp, rt_ifmedia_upd, rt_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); error = ENXIO; goto fail; } #else ifmedia_init(&sc->rt_ifmedia, 0, rt_ifmedia_upd, rt_ifmedia_sts); ifmedia_add(&sc->rt_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); ifmedia_set(&sc->rt_ifmedia, IFM_ETHER | IFM_100_TX | IFM_FDX); #endif /* IF_RT_PHY_SUPPORT */ ether_request_mac(dev, sc->mac_addr); ether_ifattach(ifp, sc->mac_addr); /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_RXCSUM|IFCAP_TXCSUM; ifp->if_capenable |= IFCAP_RXCSUM|IFCAP_TXCSUM; /* init task queue */ TASK_INIT(&sc->rx_done_task, 0, rt_rx_done_task, sc); TASK_INIT(&sc->tx_done_task, 0, rt_tx_done_task, sc); TASK_INIT(&sc->periodic_task, 0, rt_periodic_task, sc); sc->rx_process_limit = 100; sc->taskqueue = taskqueue_create("rt_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->taskqueue); taskqueue_start_threads(&sc->taskqueue, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); rt_sysctl_attach(sc); /* set up interrupt */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, (sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620) ? rt_rt5350_intr : rt_intr, sc, &sc->irqh); if (error != 0) { printf("%s: could not set up interrupt\n", device_get_nameunit(dev)); goto fail; } #ifdef IF_RT_DEBUG device_printf(dev, "debug var at %#08x\n", (u_int)&(sc->debug)); #endif return (0); fail: /* free Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_free_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < sc->rx_ring_count; i++) rt_free_rx_ring(sc, &sc->rx_ring[i]); mtx_destroy(&sc->lock); if (sc->mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem); if (sc->irq != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq); return (error); } /* * Set media options. */ static int rt_ifmedia_upd(struct ifnet *ifp) { struct rt_softc *sc; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; struct mii_softc *miisc; int error = 0; sc = ifp->if_softc; RT_SOFTC_LOCK(sc); mii = device_get_softc(sc->rt_miibus); LIST_FOREACH(miisc, &mii->mii_phys, mii_list) PHY_RESET(miisc); error = mii_mediachg(mii); RT_SOFTC_UNLOCK(sc); return (error); #else /* !IF_RT_PHY_SUPPORT */ struct ifmedia *ifm; struct ifmedia_entry *ife; sc = ifp->if_softc; ifm = &sc->rt_ifmedia; ife = ifm->ifm_cur; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (IFM_SUBTYPE(ife->ifm_media) == IFM_AUTO) { device_printf(sc->dev, "AUTO is not supported for multiphy MAC"); return (EINVAL); } /* * Ignore everything */ return (0); #endif /* IF_RT_PHY_SUPPORT */ } /* * Report current media status. */ static void rt_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { #ifdef IF_RT_PHY_SUPPORT struct rt_softc *sc; struct mii_data *mii; sc = ifp->if_softc; RT_SOFTC_LOCK(sc); mii = device_get_softc(sc->rt_miibus); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; RT_SOFTC_UNLOCK(sc); #else /* !IF_RT_PHY_SUPPORT */ ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER | IFM_100_TX | IFM_FDX; #endif /* IF_RT_PHY_SUPPORT */ } static int rt_detach(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; int i; sc = device_get_softc(dev); ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "detaching\n"); RT_SOFTC_LOCK(sc); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->periodic_ch); callout_stop(&sc->tx_watchdog_ch); taskqueue_drain(sc->taskqueue, &sc->rx_done_task); taskqueue_drain(sc->taskqueue, &sc->tx_done_task); taskqueue_drain(sc->taskqueue, &sc->periodic_task); /* free Tx and Rx rings */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_free_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < sc->rx_ring_count; i++) rt_free_rx_ring(sc, &sc->rx_ring[i]); RT_SOFTC_UNLOCK(sc); #ifdef IF_RT_PHY_SUPPORT if (sc->rt_miibus != NULL) device_delete_child(dev, sc->rt_miibus); #endif ether_ifdetach(ifp); if_free(ifp); taskqueue_free(sc->taskqueue); mtx_destroy(&sc->lock); bus_generic_detach(dev); bus_teardown_intr(dev, sc->irq, sc->irqh); bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq); bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem); return (0); } static int rt_shutdown(device_t dev) { struct rt_softc *sc; sc = device_get_softc(dev); RT_DPRINTF(sc, RT_DEBUG_ANY, "shutting down\n"); rt_stop(sc); return (0); } static int rt_suspend(device_t dev) { struct rt_softc *sc; sc = device_get_softc(dev); RT_DPRINTF(sc, RT_DEBUG_ANY, "suspending\n"); rt_stop(sc); return (0); } static int rt_resume(device_t dev) { struct rt_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "resuming\n"); if (ifp->if_flags & IFF_UP) rt_init(sc); return (0); } /* * rt_init_locked - Run initialization process having locked mtx. */ static void rt_init_locked(void *priv) { struct rt_softc *sc; struct ifnet *ifp; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; #endif int i, ntries; uint32_t tmp; sc = priv; ifp = sc->ifp; #ifdef IF_RT_PHY_SUPPORT mii = device_get_softc(sc->rt_miibus); #endif RT_DPRINTF(sc, RT_DEBUG_ANY, "initializing\n"); RT_SOFTC_ASSERT_LOCKED(sc); /* hardware reset */ //RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); //rt305x_sysctl_set(SYSCTL_RSTCTRL, SYSCTL_RSTCTRL_FRENG); /* Fwd to CPU (uni|broad|multi)cast and Unknown */ if(sc->rt_chipid == RT_CHIPID_RT3050 || sc->rt_chipid == RT_CHIPID_RT3052) RT_WRITE(sc, GDMA1_BASE + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* Forward UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* Forward BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* Forward MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* Forward Other to CPU */ )); /* disable DMA engine */ RT_WRITE(sc, sc->pdma_glo_cfg, 0); RT_WRITE(sc, sc->pdma_rst_idx, 0xffffffff); /* wait while DMA engine is busy */ for (ntries = 0; ntries < 100; ntries++) { tmp = RT_READ(sc, sc->pdma_glo_cfg); if (!(tmp & (FE_TX_DMA_BUSY | FE_RX_DMA_BUSY))) break; DELAY(1000); } if (ntries == 100) { device_printf(sc->dev, "timeout waiting for DMA engine\n"); goto fail; } /* reset Rx and Tx rings */ tmp = FE_RST_DRX_IDX0 | FE_RST_DTX_IDX3 | FE_RST_DTX_IDX2 | FE_RST_DTX_IDX1 | FE_RST_DTX_IDX0; RT_WRITE(sc, sc->pdma_rst_idx, tmp); /* XXX switch set mac address */ for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_reset_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { /* update TX_BASE_PTRx */ RT_WRITE(sc, sc->tx_base_ptr[i], sc->tx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->tx_max_cnt[i], RT_SOFTC_TX_RING_DESC_COUNT); RT_WRITE(sc, sc->tx_ctx_idx[i], 0); } /* init Rx ring */ for (i = 0; i < sc->rx_ring_count; i++) rt_reset_rx_ring(sc, &sc->rx_ring[i]); /* update RX_BASE_PTRx */ for (i = 0; i < sc->rx_ring_count; i++) { RT_WRITE(sc, sc->rx_base_ptr[i], sc->rx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->rx_max_cnt[i], RT_SOFTC_RX_RING_DATA_COUNT); RT_WRITE(sc, sc->rx_calc_idx[i], RT_SOFTC_RX_RING_DATA_COUNT - 1); } /* write back DDONE, 16byte burst enable RX/TX DMA */ tmp = FE_TX_WB_DDONE | FE_DMA_BT_SIZE16 | FE_RX_DMA_EN | FE_TX_DMA_EN; if (sc->rt_chipid == RT_CHIPID_MT7620) tmp |= (1<<31); RT_WRITE(sc, sc->pdma_glo_cfg, tmp); /* disable interrupts mitigation */ RT_WRITE(sc, sc->delay_int_cfg, 0); /* clear pending interrupts */ RT_WRITE(sc, sc->fe_int_status, 0xffffffff); /* enable interrupts */ if (sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620) tmp = RT5350_INT_TX_COHERENT | RT5350_INT_RX_COHERENT | RT5350_INT_TXQ3_DONE | RT5350_INT_TXQ2_DONE | RT5350_INT_TXQ1_DONE | RT5350_INT_TXQ0_DONE | RT5350_INT_RXQ1_DONE | RT5350_INT_RXQ0_DONE; else tmp = CNT_PPE_AF | CNT_GDM_AF | PSE_P2_FC | GDM_CRC_DROP | PSE_BUF_DROP | GDM_OTHER_DROP | PSE_P1_FC | PSE_P0_FC | PSE_FQ_EMPTY | INT_TX_COHERENT | INT_RX_COHERENT | INT_TXQ3_DONE | INT_TXQ2_DONE | INT_TXQ1_DONE | INT_TXQ0_DONE | INT_RX_DONE; sc->intr_enable_mask = tmp; RT_WRITE(sc, sc->fe_int_enable, tmp); if (rt_txrx_enable(sc) != 0) goto fail; #ifdef IF_RT_PHY_SUPPORT if (mii) mii_mediachg(mii); #endif /* IF_RT_PHY_SUPPORT */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->periodic_round = 0; callout_reset(&sc->periodic_ch, hz / 10, rt_periodic, sc); return; fail: rt_stop_locked(sc); } /* * rt_init - lock and initialize device. */ static void rt_init(void *priv) { struct rt_softc *sc; sc = priv; RT_SOFTC_LOCK(sc); rt_init_locked(sc); RT_SOFTC_UNLOCK(sc); } /* * rt_stop_locked - stop TX/RX w/ lock */ static void rt_stop_locked(void *priv) { struct rt_softc *sc; struct ifnet *ifp; sc = priv; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_ANY, "stopping\n"); RT_SOFTC_ASSERT_LOCKED(sc); sc->tx_timer = 0; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->periodic_ch); callout_stop(&sc->tx_watchdog_ch); RT_SOFTC_UNLOCK(sc); taskqueue_block(sc->taskqueue); /* * Sometime rt_stop_locked called from isr and we get panic * When found, I fix it */ #ifdef notyet taskqueue_drain(sc->taskqueue, &sc->rx_done_task); taskqueue_drain(sc->taskqueue, &sc->tx_done_task); taskqueue_drain(sc->taskqueue, &sc->periodic_task); #endif RT_SOFTC_LOCK(sc); /* disable interrupts */ RT_WRITE(sc, sc->fe_int_enable, 0); if(sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620) { } else { /* reset adapter */ RT_WRITE(sc, GE_PORT_BASE + FE_RST_GLO, PSE_RESET); RT_WRITE(sc, GDMA1_BASE + GDMA_FWD_CFG, ( GDM_ICS_EN | /* Enable IP Csum */ GDM_TCS_EN | /* Enable TCP Csum */ GDM_UCS_EN | /* Enable UDP Csum */ GDM_STRPCRC | /* Strip CRC from packet */ GDM_DST_PORT_CPU << GDM_UFRC_P_SHIFT | /* Forward UCast to CPU */ GDM_DST_PORT_CPU << GDM_BFRC_P_SHIFT | /* Forward BCast to CPU */ GDM_DST_PORT_CPU << GDM_MFRC_P_SHIFT | /* Forward MCast to CPU */ GDM_DST_PORT_CPU << GDM_OFRC_P_SHIFT /* Forward Other to CPU */ )); } } static void rt_stop(void *priv) { struct rt_softc *sc; sc = priv; RT_SOFTC_LOCK(sc); rt_stop_locked(sc); RT_SOFTC_UNLOCK(sc); } /* * rt_tx_data - transmit packet. */ static int rt_tx_data(struct rt_softc *sc, struct mbuf *m, int qid) { struct ifnet *ifp; struct rt_softc_tx_ring *ring; struct rt_softc_tx_data *data; struct rt_txdesc *desc; struct mbuf *m_d; bus_dma_segment_t dma_seg[RT_SOFTC_MAX_SCATTER]; int error, ndmasegs, ndescs, i; KASSERT(qid >= 0 && qid < RT_SOFTC_TX_RING_COUNT, ("%s: Tx data: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_SOFTC_TX_RING_ASSERT_LOCKED(&sc->tx_ring[qid]); ifp = sc->ifp; ring = &sc->tx_ring[qid]; desc = &ring->desc[ring->desc_cur]; data = &ring->data[ring->data_cur]; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, m, dma_seg, &ndmasegs, 0); if (error != 0) { /* too many fragments, linearize */ RT_DPRINTF(sc, RT_DEBUG_TX, "could not load mbuf DMA map, trying to linearize " "mbuf: ndmasegs=%d, len=%d, error=%d\n", ndmasegs, m->m_pkthdr.len, error); m_d = m_collapse(m, M_NOWAIT, 16); if (m_d == NULL) { m_freem(m); m = NULL; return (ENOMEM); } m = m_d; sc->tx_defrag_packets++; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, m, dma_seg, &ndmasegs, 0); if (error != 0) { device_printf(sc->dev, "could not load mbuf DMA map: " "ndmasegs=%d, len=%d, error=%d\n", ndmasegs, m->m_pkthdr.len, error); m_freem(m); return (error); } } if (m->m_pkthdr.len == 0) ndmasegs = 0; /* determine how many Tx descs are required */ ndescs = 1 + ndmasegs / 2; if ((ring->desc_queued + ndescs) > (RT_SOFTC_TX_RING_DESC_COUNT - 2)) { RT_DPRINTF(sc, RT_DEBUG_TX, "there are not enough Tx descs\n"); sc->no_tx_desc_avail++; bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(m); return (EFBIG); } data->m = m; /* set up Tx descs */ for (i = 0; i < ndmasegs; i += 2) { /* TODO: this needs to be refined as MT7620 for example has * a different word3 layout than RT305x and RT5350 (the last * one doesn't use word3 at all). */ /* Set destination */ if (sc->rt_chipid != RT_CHIPID_MT7620) desc->dst = (TXDSCR_DST_PORT_GDMA1); if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) desc->dst |= (TXDSCR_IP_CSUM_GEN|TXDSCR_UDP_CSUM_GEN| TXDSCR_TCP_CSUM_GEN); /* Set queue id */ desc->qn = qid; /* No PPPoE */ desc->pppoe = 0; /* No VLAN */ desc->vid = 0; desc->sdp0 = htole32(dma_seg[i].ds_addr); desc->sdl0 = htole16(dma_seg[i].ds_len | ( ((i+1) == ndmasegs )?RT_TXDESC_SDL0_LASTSEG:0 )); if ((i+1) < ndmasegs) { desc->sdp1 = htole32(dma_seg[i+1].ds_addr); desc->sdl1 = htole16(dma_seg[i+1].ds_len | ( ((i+2) == ndmasegs )?RT_TXDESC_SDL1_LASTSEG:0 )); } else { desc->sdp1 = 0; desc->sdl1 = 0; } if ((i+2) < ndmasegs) { ring->desc_queued++; ring->desc_cur = (ring->desc_cur + 1) % RT_SOFTC_TX_RING_DESC_COUNT; } desc = &ring->desc[ring->desc_cur]; } RT_DPRINTF(sc, RT_DEBUG_TX, "sending data: len=%d, ndmasegs=%d, " "DMA ds_len=%d/%d/%d/%d/%d\n", m->m_pkthdr.len, ndmasegs, (int) dma_seg[0].ds_len, (int) dma_seg[1].ds_len, (int) dma_seg[2].ds_len, (int) dma_seg[3].ds_len, (int) dma_seg[4].ds_len); bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREWRITE); ring->desc_queued++; ring->desc_cur = (ring->desc_cur + 1) % RT_SOFTC_TX_RING_DESC_COUNT; ring->data_queued++; ring->data_cur = (ring->data_cur + 1) % RT_SOFTC_TX_RING_DATA_COUNT; /* kick Tx */ RT_WRITE(sc, sc->tx_ctx_idx[qid], ring->desc_cur); return (0); } /* * rt_start - start Transmit/Receive */ static void rt_start(struct ifnet *ifp) { struct rt_softc *sc; struct mbuf *m; int qid = 0 /* XXX must check QoS priority */; sc = ifp->if_softc; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; for (;;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; m->m_pkthdr.rcvif = NULL; RT_SOFTC_TX_RING_LOCK(&sc->tx_ring[qid]); if (sc->tx_ring[qid].data_queued >= RT_SOFTC_TX_RING_DATA_COUNT) { RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); RT_DPRINTF(sc, RT_DEBUG_TX, "if_start: Tx ring with qid=%d is full\n", qid); m_freem(m); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); sc->tx_data_queue_full[qid]++; break; } if (rt_tx_data(sc, m, qid) != 0) { RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); break; } RT_SOFTC_TX_RING_UNLOCK(&sc->tx_ring[qid]); sc->tx_timer = RT_TX_WATCHDOG_TIMEOUT; callout_reset(&sc->tx_watchdog_ch, hz, rt_tx_watchdog, sc); } } /* * rt_update_promisc - set/clear promiscuous mode. Unused yet, because * filtering done by attached Ethernet switch. */ static void rt_update_promisc(struct ifnet *ifp) { struct rt_softc *sc; sc = ifp->if_softc; printf("%s: %s promiscuous mode\n", device_get_nameunit(sc->dev), (ifp->if_flags & IFF_PROMISC) ? "entering" : "leaving"); } /* * rt_ioctl - ioctl handler. */ static int rt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct rt_softc *sc; struct ifreq *ifr; #ifdef IF_RT_PHY_SUPPORT struct mii_data *mii; #endif /* IF_RT_PHY_SUPPORT */ int error, startall; sc = ifp->if_softc; ifr = (struct ifreq *) data; error = 0; switch (cmd) { case SIOCSIFFLAGS: startall = 0; RT_SOFTC_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->if_flags) & IFF_PROMISC) rt_update_promisc(ifp); } else { rt_init_locked(sc); startall = 1; } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) rt_stop_locked(sc); } sc->if_flags = ifp->if_flags; RT_SOFTC_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: #ifdef IF_RT_PHY_SUPPORT mii = device_get_softc(sc->rt_miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, cmd); #else error = ifmedia_ioctl(ifp, ifr, &sc->rt_ifmedia, cmd); #endif /* IF_RT_PHY_SUPPORT */ break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * rt_periodic - Handler of PERIODIC interrupt */ static void rt_periodic(void *arg) { struct rt_softc *sc; sc = arg; RT_DPRINTF(sc, RT_DEBUG_PERIODIC, "periodic\n"); taskqueue_enqueue(sc->taskqueue, &sc->periodic_task); } /* * rt_tx_watchdog - Handler of TX Watchdog */ static void rt_tx_watchdog(void *arg) { struct rt_softc *sc; struct ifnet *ifp; sc = arg; ifp = sc->ifp; if (sc->tx_timer == 0) return; if (--sc->tx_timer == 0) { device_printf(sc->dev, "Tx watchdog timeout: resetting\n"); #ifdef notyet /* * XXX: Commented out, because reset break input. */ rt_stop_locked(sc); rt_init_locked(sc); #endif if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); sc->tx_watchdog_timeouts++; } callout_reset(&sc->tx_watchdog_ch, hz, rt_tx_watchdog, sc); } /* * rt_cnt_ppe_af - Handler of PPE Counter Table Almost Full interrupt */ static void rt_cnt_ppe_af(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PPE Counter Table Almost Full\n"); } /* * rt_cnt_gdm_af - Handler of GDMA 1 & 2 Counter Table Almost Full interrupt */ static void rt_cnt_gdm_af(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 Counter Table Almost Full\n"); } /* * rt_pse_p2_fc - Handler of PSE port2 (GDMA 2) flow control interrupt */ static void rt_pse_p2_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port2 (GDMA 2) flow control asserted.\n"); } /* * rt_gdm_crc_drop - Handler of GDMA 1/2 discard a packet due to CRC error * interrupt */ static void rt_gdm_crc_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 discard a packet due to CRC error\n"); } /* * rt_pse_buf_drop - Handler of buffer sharing limitation interrupt */ static void rt_pse_buf_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE discards a packet due to buffer sharing limitation\n"); } /* * rt_gdm_other_drop - Handler of discard on other reason interrupt */ static void rt_gdm_other_drop(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "GDMA 1 & 2 discard a packet due to other reason\n"); } /* * rt_pse_p1_fc - Handler of PSE port1 (GDMA 1) flow control interrupt */ static void rt_pse_p1_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port1 (GDMA 1) flow control asserted.\n"); } /* * rt_pse_p0_fc - Handler of PSE port0 (CDMA) flow control interrupt */ static void rt_pse_p0_fc(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE port0 (CDMA) flow control asserted.\n"); } /* * rt_pse_fq_empty - Handler of PSE free Q empty threshold reached interrupt */ static void rt_pse_fq_empty(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "PSE free Q empty threshold reached & forced drop " "condition occurred.\n"); } /* * rt_intr - main ISR */ static void rt_intr(void *arg) { struct rt_softc *sc; struct ifnet *ifp; uint32_t status; sc = arg; ifp = sc->ifp; /* acknowledge interrupts */ status = RT_READ(sc, sc->fe_int_status); RT_WRITE(sc, sc->fe_int_status, status); RT_DPRINTF(sc, RT_DEBUG_INTR, "interrupt: status=0x%08x\n", status); if (status == 0xffffffff || /* device likely went away */ status == 0) /* not for us */ return; sc->interrupts++; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; if (status & CNT_PPE_AF) rt_cnt_ppe_af(sc); if (status & CNT_GDM_AF) rt_cnt_gdm_af(sc); if (status & PSE_P2_FC) rt_pse_p2_fc(sc); if (status & GDM_CRC_DROP) rt_gdm_crc_drop(sc); if (status & PSE_BUF_DROP) rt_pse_buf_drop(sc); if (status & GDM_OTHER_DROP) rt_gdm_other_drop(sc); if (status & PSE_P1_FC) rt_pse_p1_fc(sc); if (status & PSE_P0_FC) rt_pse_p0_fc(sc); if (status & PSE_FQ_EMPTY) rt_pse_fq_empty(sc); if (status & INT_TX_COHERENT) rt_tx_coherent_intr(sc); if (status & INT_RX_COHERENT) rt_rx_coherent_intr(sc); if (status & RX_DLY_INT) rt_rx_delay_intr(sc); if (status & TX_DLY_INT) rt_tx_delay_intr(sc); if (status & INT_RX_DONE) rt_rx_intr(sc, 0); if (status & INT_TXQ3_DONE) rt_tx_intr(sc, 3); if (status & INT_TXQ2_DONE) rt_tx_intr(sc, 2); if (status & INT_TXQ1_DONE) rt_tx_intr(sc, 1); if (status & INT_TXQ0_DONE) rt_tx_intr(sc, 0); } /* * rt_rt5350_intr - main ISR for Ralink 5350 SoC */ static void rt_rt5350_intr(void *arg) { struct rt_softc *sc; struct ifnet *ifp; uint32_t status; sc = arg; ifp = sc->ifp; /* acknowledge interrupts */ status = RT_READ(sc, sc->fe_int_status); RT_WRITE(sc, sc->fe_int_status, status); RT_DPRINTF(sc, RT_DEBUG_INTR, "interrupt: status=0x%08x\n", status); if (status == 0xffffffff || /* device likely went away */ status == 0) /* not for us */ return; sc->interrupts++; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; if (status & RT5350_INT_TX_COHERENT) rt_tx_coherent_intr(sc); if (status & RT5350_INT_RX_COHERENT) rt_rx_coherent_intr(sc); if (status & RT5350_RX_DLY_INT) rt_rx_delay_intr(sc); if (status & RT5350_TX_DLY_INT) rt_tx_delay_intr(sc); if (status & RT5350_INT_RXQ1_DONE) rt_rx_intr(sc, 1); if (status & RT5350_INT_RXQ0_DONE) rt_rx_intr(sc, 0); if (status & RT5350_INT_TXQ3_DONE) rt_tx_intr(sc, 3); if (status & RT5350_INT_TXQ2_DONE) rt_tx_intr(sc, 2); if (status & RT5350_INT_TXQ1_DONE) rt_tx_intr(sc, 1); if (status & RT5350_INT_TXQ0_DONE) rt_tx_intr(sc, 0); } static void rt_tx_coherent_intr(struct rt_softc *sc) { uint32_t tmp; int i; RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx coherent interrupt\n"); sc->tx_coherent_interrupts++; /* restart DMA engine */ tmp = RT_READ(sc, sc->pdma_glo_cfg); tmp &= ~(FE_TX_WB_DDONE | FE_TX_DMA_EN); RT_WRITE(sc, sc->pdma_glo_cfg, tmp); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) rt_reset_tx_ring(sc, &sc->tx_ring[i]); for (i = 0; i < RT_SOFTC_TX_RING_COUNT; i++) { RT_WRITE(sc, sc->tx_base_ptr[i], sc->tx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->tx_max_cnt[i], RT_SOFTC_TX_RING_DESC_COUNT); RT_WRITE(sc, sc->tx_ctx_idx[i], 0); } rt_txrx_enable(sc); } /* * rt_rx_coherent_intr */ static void rt_rx_coherent_intr(struct rt_softc *sc) { uint32_t tmp; int i; RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx coherent interrupt\n"); sc->rx_coherent_interrupts++; /* restart DMA engine */ tmp = RT_READ(sc, sc->pdma_glo_cfg); tmp &= ~(FE_RX_DMA_EN); RT_WRITE(sc, sc->pdma_glo_cfg, tmp); /* init Rx ring */ for (i = 0; i < sc->rx_ring_count; i++) rt_reset_rx_ring(sc, &sc->rx_ring[i]); for (i = 0; i < sc->rx_ring_count; i++) { RT_WRITE(sc, sc->rx_base_ptr[i], sc->rx_ring[i].desc_phys_addr); RT_WRITE(sc, sc->rx_max_cnt[i], RT_SOFTC_RX_RING_DATA_COUNT); RT_WRITE(sc, sc->rx_calc_idx[i], RT_SOFTC_RX_RING_DATA_COUNT - 1); } rt_txrx_enable(sc); } /* * rt_rx_intr - a packet received */ static void rt_rx_intr(struct rt_softc *sc, int qid) { KASSERT(qid >= 0 && qid < sc->rx_ring_count, ("%s: Rx interrupt: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx interrupt\n"); sc->rx_interrupts[qid]++; RT_SOFTC_LOCK(sc); if (!(sc->intr_disable_mask & (sc->int_rx_done_mask << qid))) { rt_intr_disable(sc, (sc->int_rx_done_mask << qid)); taskqueue_enqueue(sc->taskqueue, &sc->rx_done_task); } sc->intr_pending_mask |= (sc->int_rx_done_mask << qid); RT_SOFTC_UNLOCK(sc); } static void rt_rx_delay_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Rx delay interrupt\n"); sc->rx_delay_interrupts++; } static void rt_tx_delay_intr(struct rt_softc *sc) { RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx delay interrupt\n"); sc->tx_delay_interrupts++; } /* * rt_tx_intr - Transsmition of packet done */ static void rt_tx_intr(struct rt_softc *sc, int qid) { KASSERT(qid >= 0 && qid < RT_SOFTC_TX_RING_COUNT, ("%s: Tx interrupt: invalid qid=%d\n", device_get_nameunit(sc->dev), qid)); RT_DPRINTF(sc, RT_DEBUG_INTR, "Tx interrupt: qid=%d\n", qid); sc->tx_interrupts[qid]++; RT_SOFTC_LOCK(sc); if (!(sc->intr_disable_mask & (sc->int_tx_done_mask << qid))) { rt_intr_disable(sc, (sc->int_tx_done_mask << qid)); taskqueue_enqueue(sc->taskqueue, &sc->tx_done_task); } sc->intr_pending_mask |= (sc->int_tx_done_mask << qid); RT_SOFTC_UNLOCK(sc); } /* * rt_rx_done_task - run RX task */ static void rt_rx_done_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; int again; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_RX, "Rx done task\n"); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; sc->intr_pending_mask &= ~sc->int_rx_done_mask; again = rt_rx_eof(sc, &sc->rx_ring[0], sc->rx_process_limit); RT_SOFTC_LOCK(sc); if ((sc->intr_pending_mask & sc->int_rx_done_mask) || again) { RT_DPRINTF(sc, RT_DEBUG_RX, "Rx done task: scheduling again\n"); taskqueue_enqueue(sc->taskqueue, &sc->rx_done_task); } else { rt_intr_enable(sc, sc->int_rx_done_mask); } RT_SOFTC_UNLOCK(sc); } /* * rt_tx_done_task - check for pending TX task in all queues */ static void rt_tx_done_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; uint32_t intr_mask; int i; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_TX, "Tx done task\n"); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; for (i = RT_SOFTC_TX_RING_COUNT - 1; i >= 0; i--) { if (sc->intr_pending_mask & (sc->int_tx_done_mask << i)) { sc->intr_pending_mask &= ~(sc->int_tx_done_mask << i); rt_tx_eof(sc, &sc->tx_ring[i]); } } sc->tx_timer = 0; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if(sc->rt_chipid == RT_CHIPID_RT5350 || sc->rt_chipid == RT_CHIPID_MT7620) intr_mask = ( RT5350_INT_TXQ3_DONE | RT5350_INT_TXQ2_DONE | RT5350_INT_TXQ1_DONE | RT5350_INT_TXQ0_DONE); else intr_mask = ( INT_TXQ3_DONE | INT_TXQ2_DONE | INT_TXQ1_DONE | INT_TXQ0_DONE); RT_SOFTC_LOCK(sc); rt_intr_enable(sc, ~sc->intr_pending_mask & (sc->intr_disable_mask & intr_mask)); if (sc->intr_pending_mask & intr_mask) { RT_DPRINTF(sc, RT_DEBUG_TX, "Tx done task: scheduling again\n"); taskqueue_enqueue(sc->taskqueue, &sc->tx_done_task); } RT_SOFTC_UNLOCK(sc); if (!IFQ_IS_EMPTY(&ifp->if_snd)) rt_start(ifp); } /* * rt_periodic_task - run periodic task */ static void rt_periodic_task(void *context, int pending) { struct rt_softc *sc; struct ifnet *ifp; sc = context; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_PERIODIC, "periodic task: round=%lu\n", sc->periodic_round); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; RT_SOFTC_LOCK(sc); sc->periodic_round++; rt_update_stats(sc); if ((sc->periodic_round % 10) == 0) { rt_update_raw_counters(sc); rt_watchdog(sc); } RT_SOFTC_UNLOCK(sc); callout_reset(&sc->periodic_ch, hz / 10, rt_periodic, sc); } /* * rt_rx_eof - check for frames that done by DMA engine and pass it into * network subsystem. */ static int rt_rx_eof(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int limit) { struct ifnet *ifp; /* struct rt_softc_rx_ring *ring; */ struct rt_rxdesc *desc; struct rt_softc_rx_data *data; struct mbuf *m, *mnew; bus_dma_segment_t segs[1]; bus_dmamap_t dma_map; uint32_t index, desc_flags; int error, nsegs, len, nframes; ifp = sc->ifp; /* ring = &sc->rx_ring[0]; */ nframes = 0; while (limit != 0) { index = RT_READ(sc, sc->rx_drx_idx[0]); if (ring->cur == index) break; desc = &ring->desc[ring->cur]; data = &ring->data[ring->cur]; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef IF_RT_DEBUG if ( sc->debug & RT_DEBUG_RX ) { printf("\nRX Descriptor[%#08x] dump:\n", (u_int)desc); hexdump(desc, 16, 0, 0); printf("-----------------------------------\n"); } #endif /* XXX Sometime device don`t set DDONE bit */ #ifdef DDONE_FIXED if (!(desc->sdl0 & htole16(RT_RXDESC_SDL0_DDONE))) { RT_DPRINTF(sc, RT_DEBUG_RX, "DDONE=0, try next\n"); break; } #endif len = le16toh(desc->sdl0) & 0x3fff; RT_DPRINTF(sc, RT_DEBUG_RX, "new frame len=%d\n", len); nframes++; mnew = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (mnew == NULL) { sc->rx_mbuf_alloc_errors++; if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto skip; } mnew->m_len = mnew->m_pkthdr.len = MJUMPAGESIZE; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, ring->spare_dma_map, mnew, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { RT_DPRINTF(sc, RT_DEBUG_RX, "could not load Rx mbuf DMA map: " "error=%d, nsegs=%d\n", error, nsegs); m_freem(mnew); sc->rx_mbuf_dmamap_errors++; if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto skip; } KASSERT(nsegs == 1, ("%s: too many DMA segments", device_get_nameunit(sc->dev))); bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); dma_map = data->dma_map; data->dma_map = ring->spare_dma_map; ring->spare_dma_map = dma_map; bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_PREREAD); m = data->m; desc_flags = desc->src; data->m = mnew; /* Add 2 for proper align of RX IP header */ desc->sdp0 = htole32(segs[0].ds_addr+2); desc->sdl0 = htole32(segs[0].ds_len-2); desc->src = 0; desc->ai = 0; desc->foe = 0; RT_DPRINTF(sc, RT_DEBUG_RX, "Rx frame: rxdesc flags=0x%08x\n", desc_flags); m->m_pkthdr.rcvif = ifp; /* Add 2 to fix data align, after sdp0 = addr + 2 */ m->m_data += 2; m->m_pkthdr.len = m->m_len = len; /* check for crc errors */ if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { /*check for valid checksum*/ if (desc_flags & (RXDSXR_SRC_IP_CSUM_FAIL| RXDSXR_SRC_L4_CSUM_FAIL)) { RT_DPRINTF(sc, RT_DEBUG_RX, "rxdesc: crc error\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if (!(ifp->if_flags & IFF_PROMISC)) { m_freem(m); goto skip; } } if ((desc_flags & RXDSXR_SRC_IP_CSUM_FAIL) != 0) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; m->m_pkthdr.csum_flags |= CSUM_IP_VALID; m->m_pkthdr.csum_data = 0xffff; } m->m_flags &= ~M_HASFCS; } (*ifp->if_input)(ifp, m); skip: desc->sdl0 &= ~htole16(RT_RXDESC_SDL0_DDONE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->cur = (ring->cur + 1) % RT_SOFTC_RX_RING_DATA_COUNT; limit--; } if (ring->cur == 0) RT_WRITE(sc, sc->rx_calc_idx[0], RT_SOFTC_RX_RING_DATA_COUNT - 1); else RT_WRITE(sc, sc->rx_calc_idx[0], ring->cur - 1); RT_DPRINTF(sc, RT_DEBUG_RX, "Rx eof: nframes=%d\n", nframes); sc->rx_packets += nframes; return (limit == 0); } /* * rt_tx_eof - check for successful transmitted frames and mark their * descriptor as free. */ static void rt_tx_eof(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct ifnet *ifp; struct rt_txdesc *desc; struct rt_softc_tx_data *data; uint32_t index; int ndescs, nframes; ifp = sc->ifp; ndescs = 0; nframes = 0; for (;;) { index = RT_READ(sc, sc->tx_dtx_idx[ring->qid]); if (ring->desc_next == index) break; ndescs++; desc = &ring->desc[ring->desc_next]; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (desc->sdl0 & htole16(RT_TXDESC_SDL0_LASTSEG) || desc->sdl1 & htole16(RT_TXDESC_SDL1_LASTSEG)) { nframes++; data = &ring->data[ring->data_next]; bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); data->m = NULL; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); RT_SOFTC_TX_RING_LOCK(ring); ring->data_queued--; ring->data_next = (ring->data_next + 1) % RT_SOFTC_TX_RING_DATA_COUNT; RT_SOFTC_TX_RING_UNLOCK(ring); } desc->sdl0 &= ~htole16(RT_TXDESC_SDL0_DDONE); bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); RT_SOFTC_TX_RING_LOCK(ring); ring->desc_queued--; ring->desc_next = (ring->desc_next + 1) % RT_SOFTC_TX_RING_DESC_COUNT; RT_SOFTC_TX_RING_UNLOCK(ring); } RT_DPRINTF(sc, RT_DEBUG_TX, "Tx eof: qid=%d, ndescs=%d, nframes=%d\n", ring->qid, ndescs, nframes); } /* * rt_update_stats - query statistics counters and update related variables. */ static void rt_update_stats(struct rt_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; RT_DPRINTF(sc, RT_DEBUG_STATS, "update statistic: \n"); /* XXX do update stats here */ } /* * rt_watchdog - reinit device on watchdog event. */ static void rt_watchdog(struct rt_softc *sc) { uint32_t tmp; #ifdef notyet int ntries; #endif if(sc->rt_chipid != RT_CHIPID_RT5350 && sc->rt_chipid != RT_CHIPID_MT7620) { tmp = RT_READ(sc, PSE_BASE + CDMA_OQ_STA); RT_DPRINTF(sc, RT_DEBUG_WATCHDOG, "watchdog: PSE_IQ_STA=0x%08x\n", tmp); } /* XXX: do not reset */ #ifdef notyet if (((tmp >> P0_IQ_PCNT_SHIFT) & 0xff) != 0) { sc->tx_queue_not_empty[0]++; for (ntries = 0; ntries < 10; ntries++) { tmp = RT_READ(sc, PSE_BASE + PSE_IQ_STA); if (((tmp >> P0_IQ_PCNT_SHIFT) & 0xff) == 0) break; DELAY(1); } } if (((tmp >> P1_IQ_PCNT_SHIFT) & 0xff) != 0) { sc->tx_queue_not_empty[1]++; for (ntries = 0; ntries < 10; ntries++) { tmp = RT_READ(sc, PSE_BASE + PSE_IQ_STA); if (((tmp >> P1_IQ_PCNT_SHIFT) & 0xff) == 0) break; DELAY(1); } } #endif } /* * rt_update_raw_counters - update counters. */ static void rt_update_raw_counters(struct rt_softc *sc) { sc->tx_bytes += RT_READ(sc, CNTR_BASE + GDMA_TX_GBCNT0); sc->tx_packets += RT_READ(sc, CNTR_BASE + GDMA_TX_GPCNT0); sc->tx_skip += RT_READ(sc, CNTR_BASE + GDMA_TX_SKIPCNT0); sc->tx_collision+= RT_READ(sc, CNTR_BASE + GDMA_TX_COLCNT0); sc->rx_bytes += RT_READ(sc, CNTR_BASE + GDMA_RX_GBCNT0); sc->rx_packets += RT_READ(sc, CNTR_BASE + GDMA_RX_GPCNT0); sc->rx_crc_err += RT_READ(sc, CNTR_BASE + GDMA_RX_CSUM_ERCNT0); sc->rx_short_err+= RT_READ(sc, CNTR_BASE + GDMA_RX_SHORT_ERCNT0); sc->rx_long_err += RT_READ(sc, CNTR_BASE + GDMA_RX_LONG_ERCNT0); sc->rx_phy_err += RT_READ(sc, CNTR_BASE + GDMA_RX_FERCNT0); sc->rx_fifo_overflows+= RT_READ(sc, CNTR_BASE + GDMA_RX_OERCNT0); } static void rt_intr_enable(struct rt_softc *sc, uint32_t intr_mask) { uint32_t tmp; sc->intr_disable_mask &= ~intr_mask; tmp = sc->intr_enable_mask & ~sc->intr_disable_mask; RT_WRITE(sc, sc->fe_int_enable, tmp); } static void rt_intr_disable(struct rt_softc *sc, uint32_t intr_mask) { uint32_t tmp; sc->intr_disable_mask |= intr_mask; tmp = sc->intr_enable_mask & ~sc->intr_disable_mask; RT_WRITE(sc, sc->fe_int_enable, tmp); } /* * rt_txrx_enable - enable TX/RX DMA */ static int rt_txrx_enable(struct rt_softc *sc) { struct ifnet *ifp; uint32_t tmp; int ntries; ifp = sc->ifp; /* enable Tx/Rx DMA engine */ for (ntries = 0; ntries < 200; ntries++) { tmp = RT_READ(sc, sc->pdma_glo_cfg); if (!(tmp & (FE_TX_DMA_BUSY | FE_RX_DMA_BUSY))) break; DELAY(1000); } if (ntries == 200) { device_printf(sc->dev, "timeout waiting for DMA engine\n"); return (-1); } DELAY(50); tmp |= FE_TX_WB_DDONE | FE_RX_DMA_EN | FE_TX_DMA_EN; RT_WRITE(sc, sc->pdma_glo_cfg, tmp); /* XXX set Rx filter */ return (0); } /* * rt_alloc_rx_ring - allocate RX DMA ring buffer */ static int rt_alloc_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring, int qid) { struct rt_rxdesc *desc; struct rt_softc_rx_data *data; bus_dma_segment_t segs[1]; int i, nsegs, error; error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), 1, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), 0, NULL, NULL, &ring->desc_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Rx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dma_tag, (void **) &ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Rx desc DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dma_tag, ring->desc_dma_map, ring->desc, RT_SOFTC_RX_RING_DATA_COUNT * sizeof(struct rt_rxdesc), rt_dma_map_addr, &ring->desc_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Rx desc DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Rx data DMA tag\n"); goto fail; } for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { desc = &ring->desc[i]; data = &ring->data[i]; error = bus_dmamap_create(ring->data_dma_tag, 0, &data->dma_map); if (error != 0) { device_printf(sc->dev, "could not create Rx data DMA " "map\n"); goto fail; } data->m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); if (data->m == NULL) { device_printf(sc->dev, "could not allocate Rx mbuf\n"); error = ENOMEM; goto fail; } data->m->m_len = data->m->m_pkthdr.len = MJUMPAGESIZE; error = bus_dmamap_load_mbuf_sg(ring->data_dma_tag, data->dma_map, data->m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->dev, "could not load Rx mbuf DMA map\n"); goto fail; } KASSERT(nsegs == 1, ("%s: too many DMA segments", device_get_nameunit(sc->dev))); /* Add 2 for proper align of RX IP header */ desc->sdp0 = htole32(segs[0].ds_addr+2); desc->sdl0 = htole32(segs[0].ds_len-2); } error = bus_dmamap_create(ring->data_dma_tag, 0, &ring->spare_dma_map); if (error != 0) { device_printf(sc->dev, "could not create Rx spare DMA map\n"); goto fail; } bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->qid = qid; return (0); fail: rt_free_rx_ring(sc, ring); return (error); } /* * rt_reset_rx_ring - reset RX ring buffer */ static void rt_reset_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_rxdesc *desc; int i; for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { desc = &ring->desc[i]; desc->sdl0 &= ~htole16(RT_RXDESC_SDL0_DDONE); } bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ring->cur = 0; } /* * rt_free_rx_ring - free memory used by RX ring buffer */ static void rt_free_rx_ring(struct rt_softc *sc, struct rt_softc_rx_ring *ring) { struct rt_softc_rx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dma_tag, ring->desc_dma_map); bus_dmamem_free(ring->desc_dma_tag, ring->desc, ring->desc_dma_map); } if (ring->desc_dma_tag != NULL) bus_dma_tag_destroy(ring->desc_dma_tag); for (i = 0; i < RT_SOFTC_RX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); } if (data->dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, data->dma_map); } if (ring->spare_dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, ring->spare_dma_map); if (ring->data_dma_tag != NULL) bus_dma_tag_destroy(ring->data_dma_tag); } /* * rt_alloc_tx_ring - allocate TX ring buffer */ static int rt_alloc_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring, int qid) { struct rt_softc_tx_data *data; int error, i; mtx_init(&ring->lock, device_get_nameunit(sc->dev), NULL, MTX_DEF); error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc), 1, RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc), 0, NULL, NULL, &ring->desc_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dma_tag, (void **) &ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Tx desc DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dma_tag, ring->desc_dma_map, ring->desc, (RT_SOFTC_TX_RING_DESC_COUNT * sizeof(struct rt_txdesc)), rt_dma_map_addr, &ring->desc_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Tx desc DMA map\n"); goto fail; } ring->desc_queued = 0; ring->desc_cur = 0; ring->desc_next = 0; error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, 1, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, 0, NULL, NULL, &ring->seg0_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx seg0 DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->seg0_dma_tag, (void **) &ring->seg0, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->seg0_dma_map); if (error != 0) { device_printf(sc->dev, "could not allocate Tx seg0 DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->seg0_dma_tag, ring->seg0_dma_map, ring->seg0, RT_SOFTC_TX_RING_DATA_COUNT * RT_TX_DATA_SEG0_SIZE, rt_dma_map_addr, &ring->seg0_phys_addr, 0); if (error != 0) { device_printf(sc->dev, "could not load Tx seg0 DMA map\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->dev), PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, RT_SOFTC_MAX_SCATTER, MJUMPAGESIZE, 0, NULL, NULL, &ring->data_dma_tag); if (error != 0) { device_printf(sc->dev, "could not create Tx data DMA tag\n"); goto fail; } for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; error = bus_dmamap_create(ring->data_dma_tag, 0, &data->dma_map); if (error != 0) { device_printf(sc->dev, "could not create Tx data DMA " "map\n"); goto fail; } } ring->data_queued = 0; ring->data_cur = 0; ring->data_next = 0; ring->qid = qid; return (0); fail: rt_free_tx_ring(sc, ring); return (error); } /* * rt_reset_tx_ring - reset TX ring buffer to empty state */ static void rt_reset_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct rt_softc_tx_data *data; struct rt_txdesc *desc; int i; for (i = 0; i < RT_SOFTC_TX_RING_DESC_COUNT; i++) { desc = &ring->desc[i]; desc->sdl0 = 0; desc->sdl1 = 0; } ring->desc_queued = 0; ring->desc_cur = 0; ring->desc_next = 0; bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_PREWRITE); for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); data->m = NULL; } } ring->data_queued = 0; ring->data_cur = 0; ring->data_next = 0; } /* * rt_free_tx_ring - free RX ring buffer */ static void rt_free_tx_ring(struct rt_softc *sc, struct rt_softc_tx_ring *ring) { struct rt_softc_tx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dma_tag, ring->desc_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dma_tag, ring->desc_dma_map); bus_dmamem_free(ring->desc_dma_tag, ring->desc, ring->desc_dma_map); } if (ring->desc_dma_tag != NULL) bus_dma_tag_destroy(ring->desc_dma_tag); if (ring->seg0 != NULL) { bus_dmamap_sync(ring->seg0_dma_tag, ring->seg0_dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->seg0_dma_tag, ring->seg0_dma_map); bus_dmamem_free(ring->seg0_dma_tag, ring->seg0, ring->seg0_dma_map); } if (ring->seg0_dma_tag != NULL) bus_dma_tag_destroy(ring->seg0_dma_tag); for (i = 0; i < RT_SOFTC_TX_RING_DATA_COUNT; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dma_tag, data->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dma_tag, data->dma_map); m_freem(data->m); } if (data->dma_map != NULL) bus_dmamap_destroy(ring->data_dma_tag, data->dma_map); } if (ring->data_dma_tag != NULL) bus_dma_tag_destroy(ring->data_dma_tag); mtx_destroy(&ring->lock); } /* * rt_dma_map_addr - get address of busdma segment */ static void rt_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error != 0) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); *(bus_addr_t *) arg = segs[0].ds_addr; } /* * rt_sysctl_attach - attach sysctl nodes for NIC counters. */ static void rt_sysctl_attach(struct rt_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid *stats; ctx = device_get_sysctl_ctx(sc->dev); tree = device_get_sysctl_tree(sc->dev); /* statistic counters */ stats = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "stats", CTLFLAG_RD, 0, "statistic"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "interrupts", CTLFLAG_RD, &sc->interrupts, "all interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_coherent_interrupts", CTLFLAG_RD, &sc->tx_coherent_interrupts, "Tx coherent interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_coherent_interrupts", CTLFLAG_RD, &sc->rx_coherent_interrupts, "Rx coherent interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_interrupts", CTLFLAG_RD, &sc->rx_interrupts[0], "Rx interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_delay_interrupts", CTLFLAG_RD, &sc->rx_delay_interrupts, "Rx delay interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_interrupts", CTLFLAG_RD, &sc->tx_interrupts[3], "Tx AC3 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_interrupts", CTLFLAG_RD, &sc->tx_interrupts[2], "Tx AC2 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_interrupts", CTLFLAG_RD, &sc->tx_interrupts[1], "Tx AC1 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_interrupts", CTLFLAG_RD, &sc->tx_interrupts[0], "Tx AC0 interrupts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_delay_interrupts", CTLFLAG_RD, &sc->tx_delay_interrupts, "Tx delay interrupts"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_desc_queued", CTLFLAG_RD, &sc->tx_ring[3].desc_queued, 0, "Tx AC3 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_data_queued", CTLFLAG_RD, &sc->tx_ring[3].data_queued, 0, "Tx AC3 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_desc_queued", CTLFLAG_RD, &sc->tx_ring[2].desc_queued, 0, "Tx AC2 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_data_queued", CTLFLAG_RD, &sc->tx_ring[2].data_queued, 0, "Tx AC2 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_desc_queued", CTLFLAG_RD, &sc->tx_ring[1].desc_queued, 0, "Tx AC1 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_data_queued", CTLFLAG_RD, &sc->tx_ring[1].data_queued, 0, "Tx AC1 data queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_desc_queued", CTLFLAG_RD, &sc->tx_ring[0].desc_queued, 0, "Tx AC0 descriptors queued"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_data_queued", CTLFLAG_RD, &sc->tx_ring[0].data_queued, 0, "Tx AC0 data queued"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ3_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[3], "Tx AC3 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ2_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[2], "Tx AC2 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ1_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[1], "Tx AC1 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "TXQ0_data_queue_full", CTLFLAG_RD, &sc->tx_data_queue_full[0], "Tx AC0 data queue full"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_watchdog_timeouts", CTLFLAG_RD, &sc->tx_watchdog_timeouts, "Tx watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_defrag_packets", CTLFLAG_RD, &sc->tx_defrag_packets, "Tx defragmented packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "no_tx_desc_avail", CTLFLAG_RD, &sc->no_tx_desc_avail, "no Tx descriptors available"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_mbuf_alloc_errors", CTLFLAG_RD, &sc->rx_mbuf_alloc_errors, "Rx mbuf allocation errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_mbuf_dmamap_errors", CTLFLAG_RD, &sc->rx_mbuf_dmamap_errors, "Rx mbuf DMA mapping errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_queue_0_not_empty", CTLFLAG_RD, &sc->tx_queue_not_empty[0], "Tx queue 0 not empty"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_queue_1_not_empty", CTLFLAG_RD, &sc->tx_queue_not_empty[1], "Tx queue 1 not empty"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_packets", CTLFLAG_RD, &sc->rx_packets, "Rx packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &sc->rx_crc_err, "Rx CRC errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_phy_errors", CTLFLAG_RD, &sc->rx_phy_err, "Rx PHY errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_dup_packets", CTLFLAG_RD, &sc->rx_dup_packets, "Rx duplicate packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_fifo_overflows", CTLFLAG_RD, &sc->rx_fifo_overflows, "Rx FIFO overflows"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_bytes", CTLFLAG_RD, &sc->rx_bytes, "Rx bytes"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_long_err", CTLFLAG_RD, &sc->rx_long_err, "Rx too long frame errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "rx_short_err", CTLFLAG_RD, &sc->rx_short_err, "Rx too short frame errors"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_bytes", CTLFLAG_RD, &sc->tx_bytes, "Tx bytes"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_packets", CTLFLAG_RD, &sc->tx_packets, "Tx packets"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_skip", CTLFLAG_RD, &sc->tx_skip, "Tx skip count for GDMA ports"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(stats), OID_AUTO, "tx_collision", CTLFLAG_RD, &sc->tx_collision, "Tx collision count for GDMA ports"); } #ifdef IF_RT_PHY_SUPPORT static int rt_miibus_readreg(device_t dev, int phy, int reg) { struct rt_softc *sc = device_get_softc(dev); /* * PSEUDO_PHYAD is a special value for indicate switch attached. * No one PHY use PSEUDO_PHYAD (0x1e) address. */ if (phy == 31) { /* Fake PHY ID for bfeswitch attach */ switch (reg) { case MII_BMSR: return (BMSR_EXTSTAT|BMSR_MEDIAMASK); case MII_PHYIDR1: return (0x40); /* As result of faking */ case MII_PHYIDR2: /* PHY will detect as */ return (0x6250); /* bfeswitch */ } } /* Wait prev command done if any */ while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); RT_WRITE(sc, MDIO_ACCESS, MDIO_CMD_ONGO || ((phy << MDIO_PHY_ADDR_SHIFT) & MDIO_PHY_ADDR_MASK) || ((reg << MDIO_PHYREG_ADDR_SHIFT) & MDIO_PHYREG_ADDR_MASK)); while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); return (RT_READ(sc, MDIO_ACCESS) & MDIO_PHY_DATA_MASK); } static int rt_miibus_writereg(device_t dev, int phy, int reg, int val) { struct rt_softc *sc = device_get_softc(dev); /* Wait prev command done if any */ while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); RT_WRITE(sc, MDIO_ACCESS, MDIO_CMD_ONGO || MDIO_CMD_WR || ((phy << MDIO_PHY_ADDR_SHIFT) & MDIO_PHY_ADDR_MASK) || ((reg << MDIO_PHYREG_ADDR_SHIFT) & MDIO_PHYREG_ADDR_MASK) || (val & MDIO_PHY_DATA_MASK)); while (RT_READ(sc, MDIO_ACCESS) & MDIO_CMD_ONGO); return (0); } void rt_miibus_statchg(device_t dev) { struct rt_softc *sc = device_get_softc(dev); struct mii_data *mii; mii = device_get_softc(sc->rt_miibus); if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) == (IFM_ACTIVE | IFM_AVALID)) { switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_10_T: case IFM_100_TX: /* XXX check link here */ sc->flags |= 1; break; default: break; } } } #endif /* IF_RT_PHY_SUPPORT */ static device_method_t rt_dev_methods[] = { DEVMETHOD(device_probe, rt_probe), DEVMETHOD(device_attach, rt_attach), DEVMETHOD(device_detach, rt_detach), DEVMETHOD(device_shutdown, rt_shutdown), DEVMETHOD(device_suspend, rt_suspend), DEVMETHOD(device_resume, rt_resume), #ifdef IF_RT_PHY_SUPPORT /* MII interface */ DEVMETHOD(miibus_readreg, rt_miibus_readreg), DEVMETHOD(miibus_writereg, rt_miibus_writereg), DEVMETHOD(miibus_statchg, rt_miibus_statchg), #endif DEVMETHOD_END }; static driver_t rt_driver = { "rt", rt_dev_methods, sizeof(struct rt_softc) }; static devclass_t rt_dev_class; DRIVER_MODULE(rt, nexus, rt_driver, rt_dev_class, 0, 0); #ifdef FDT DRIVER_MODULE(rt, simplebus, rt_driver, rt_dev_class, 0, 0); #endif MODULE_DEPEND(rt, ether, 1, 1, 1); MODULE_DEPEND(rt, miibus, 1, 1, 1); diff --git a/sys/dev/sec/sec.c b/sys/dev/sec/sec.c index 3b99a3e326a0..b83e7ebb7843 100644 --- a/sys/dev/sec/sec.c +++ b/sys/dev/sec/sec.c @@ -1,1875 +1,1876 @@ /*- * Copyright (C) 2008-2009 Semihalf, Piotr Ziecik * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Freescale integrated Security Engine (SEC) driver. Currently SEC 2.0 and * 3.0 are supported. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include "cryptodev_if.h" #include #include static int sec_probe(device_t dev); static int sec_attach(device_t dev); static int sec_detach(device_t dev); static int sec_suspend(device_t dev); static int sec_resume(device_t dev); static int sec_shutdown(device_t dev); static void sec_primary_intr(void *arg); static void sec_secondary_intr(void *arg); static int sec_setup_intr(struct sec_softc *sc, struct resource **ires, void **ihand, int *irid, driver_intr_t handler, const char *iname); static void sec_release_intr(struct sec_softc *sc, struct resource *ires, void *ihand, int irid, const char *iname); static int sec_controller_reset(struct sec_softc *sc); static int sec_channel_reset(struct sec_softc *sc, int channel, int full); static int sec_init(struct sec_softc *sc); static int sec_alloc_dma_mem(struct sec_softc *sc, struct sec_dma_mem *dma_mem, bus_size_t size); static int sec_desc_map_dma(struct sec_softc *sc, struct sec_dma_mem *dma_mem, void *mem, bus_size_t size, int type, struct sec_desc_map_info *sdmi); static void sec_free_dma_mem(struct sec_dma_mem *dma_mem); static void sec_enqueue(struct sec_softc *sc); static int sec_enqueue_desc(struct sec_softc *sc, struct sec_desc *desc, int channel); static int sec_eu_channel(struct sec_softc *sc, int eu); static int sec_make_pointer(struct sec_softc *sc, struct sec_desc *desc, u_int n, void *data, bus_size_t doffset, bus_size_t dsize, int dtype); static int sec_make_pointer_direct(struct sec_softc *sc, struct sec_desc *desc, u_int n, bus_addr_t data, bus_size_t dsize); static int sec_alloc_session(struct sec_softc *sc); static int sec_newsession(device_t dev, u_int32_t *sidp, struct cryptoini *cri); static int sec_freesession(device_t dev, uint64_t tid); static int sec_process(device_t dev, struct cryptop *crp, int hint); static int sec_split_cri(struct cryptoini *cri, struct cryptoini **enc, struct cryptoini **mac); static int sec_split_crp(struct cryptop *crp, struct cryptodesc **enc, struct cryptodesc **mac); static int sec_build_common_ns_desc(struct sec_softc *sc, struct sec_desc *desc, struct sec_session *ses, struct cryptop *crp, struct cryptodesc *enc, int buftype); static int sec_build_common_s_desc(struct sec_softc *sc, struct sec_desc *desc, struct sec_session *ses, struct cryptop *crp, struct cryptodesc *enc, struct cryptodesc *mac, int buftype); static struct sec_session *sec_get_session(struct sec_softc *sc, u_int sid); static struct sec_desc *sec_find_desc(struct sec_softc *sc, bus_addr_t paddr); /* AESU */ static int sec_aesu_newsession(struct sec_softc *sc, struct sec_session *ses, struct cryptoini *enc, struct cryptoini *mac); static int sec_aesu_make_desc(struct sec_softc *sc, struct sec_session *ses, struct sec_desc *desc, struct cryptop *crp, int buftype); /* DEU */ static int sec_deu_newsession(struct sec_softc *sc, struct sec_session *ses, struct cryptoini *enc, struct cryptoini *mac); static int sec_deu_make_desc(struct sec_softc *sc, struct sec_session *ses, struct sec_desc *desc, struct cryptop *crp, int buftype); /* MDEU */ static int sec_mdeu_can_handle(u_int alg); static int sec_mdeu_config(struct cryptodesc *crd, u_int *eu, u_int *mode, u_int *hashlen); static int sec_mdeu_newsession(struct sec_softc *sc, struct sec_session *ses, struct cryptoini *enc, struct cryptoini *mac); static int sec_mdeu_make_desc(struct sec_softc *sc, struct sec_session *ses, struct sec_desc *desc, struct cryptop *crp, int buftype); static device_method_t sec_methods[] = { /* Device interface */ DEVMETHOD(device_probe, sec_probe), DEVMETHOD(device_attach, sec_attach), DEVMETHOD(device_detach, sec_detach), DEVMETHOD(device_suspend, sec_suspend), DEVMETHOD(device_resume, sec_resume), DEVMETHOD(device_shutdown, sec_shutdown), /* Crypto methods */ DEVMETHOD(cryptodev_newsession, sec_newsession), DEVMETHOD(cryptodev_freesession,sec_freesession), DEVMETHOD(cryptodev_process, sec_process), DEVMETHOD_END }; static driver_t sec_driver = { "sec", sec_methods, sizeof(struct sec_softc), }; static devclass_t sec_devclass; DRIVER_MODULE(sec, simplebus, sec_driver, sec_devclass, 0, 0); MODULE_DEPEND(sec, crypto, 1, 1, 1); static struct sec_eu_methods sec_eus[] = { { sec_aesu_newsession, sec_aesu_make_desc, }, { sec_deu_newsession, sec_deu_make_desc, }, { sec_mdeu_newsession, sec_mdeu_make_desc, }, { NULL, NULL } }; static inline void sec_sync_dma_mem(struct sec_dma_mem *dma_mem, bus_dmasync_op_t op) { /* Sync only if dma memory is valid */ if (dma_mem->dma_vaddr != NULL) bus_dmamap_sync(dma_mem->dma_tag, dma_mem->dma_map, op); } static inline void sec_free_session(struct sec_softc *sc, struct sec_session *ses) { SEC_LOCK(sc, sessions); ses->ss_used = 0; SEC_UNLOCK(sc, sessions); } static inline void * sec_get_pointer_data(struct sec_desc *desc, u_int n) { return (desc->sd_ptr_dmem[n].dma_vaddr); } static int sec_probe(device_t dev) { struct sec_softc *sc; uint64_t id; if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "fsl,sec2.0")) return (ENXIO); sc = device_get_softc(dev); sc->sc_rrid = 0; sc->sc_rres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_rrid, RF_ACTIVE); if (sc->sc_rres == NULL) return (ENXIO); sc->sc_bas.bsh = rman_get_bushandle(sc->sc_rres); sc->sc_bas.bst = rman_get_bustag(sc->sc_rres); id = SEC_READ(sc, SEC_ID); bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_rrid, sc->sc_rres); switch (id) { case SEC_20_ID: device_set_desc(dev, "Freescale Security Engine 2.0"); sc->sc_version = 2; break; case SEC_30_ID: device_set_desc(dev, "Freescale Security Engine 3.0"); sc->sc_version = 3; break; case SEC_31_ID: device_set_desc(dev, "Freescale Security Engine 3.1"); sc->sc_version = 3; break; default: - device_printf(dev, "unknown SEC ID 0x%016llx!\n", id); + device_printf(dev, "unknown SEC ID 0x%016"PRIx64"!\n", id); return (ENXIO); } return (0); } static int sec_attach(device_t dev) { struct sec_softc *sc; struct sec_hw_lt *lt; int error = 0; int i; sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_blocked = 0; sc->sc_shutdown = 0; sc->sc_cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE); if (sc->sc_cid < 0) { device_printf(dev, "could not get crypto driver ID!\n"); return (ENXIO); } /* Init locks */ mtx_init(&sc->sc_controller_lock, device_get_nameunit(dev), "SEC Controller lock", MTX_DEF); mtx_init(&sc->sc_descriptors_lock, device_get_nameunit(dev), "SEC Descriptors lock", MTX_DEF); mtx_init(&sc->sc_sessions_lock, device_get_nameunit(dev), "SEC Sessions lock", MTX_DEF); /* Allocate I/O memory for SEC registers */ sc->sc_rrid = 0; sc->sc_rres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_rrid, RF_ACTIVE); if (sc->sc_rres == NULL) { device_printf(dev, "could not allocate I/O memory!\n"); goto fail1; } sc->sc_bas.bsh = rman_get_bushandle(sc->sc_rres); sc->sc_bas.bst = rman_get_bustag(sc->sc_rres); /* Setup interrupts */ sc->sc_pri_irid = 0; error = sec_setup_intr(sc, &sc->sc_pri_ires, &sc->sc_pri_ihand, &sc->sc_pri_irid, sec_primary_intr, "primary"); if (error) goto fail2; if (sc->sc_version == 3) { sc->sc_sec_irid = 1; error = sec_setup_intr(sc, &sc->sc_sec_ires, &sc->sc_sec_ihand, &sc->sc_sec_irid, sec_secondary_intr, "secondary"); if (error) goto fail3; } /* Alloc DMA memory for descriptors and link tables */ error = sec_alloc_dma_mem(sc, &(sc->sc_desc_dmem), SEC_DESCRIPTORS * sizeof(struct sec_hw_desc)); if (error) goto fail4; error = sec_alloc_dma_mem(sc, &(sc->sc_lt_dmem), (SEC_LT_ENTRIES + 1) * sizeof(struct sec_hw_lt)); if (error) goto fail5; /* Fill in descriptors and link tables */ for (i = 0; i < SEC_DESCRIPTORS; i++) { sc->sc_desc[i].sd_desc = (struct sec_hw_desc*)(sc->sc_desc_dmem.dma_vaddr) + i; sc->sc_desc[i].sd_desc_paddr = sc->sc_desc_dmem.dma_paddr + (i * sizeof(struct sec_hw_desc)); } for (i = 0; i < SEC_LT_ENTRIES + 1; i++) { sc->sc_lt[i].sl_lt = (struct sec_hw_lt*)(sc->sc_lt_dmem.dma_vaddr) + i; sc->sc_lt[i].sl_lt_paddr = sc->sc_lt_dmem.dma_paddr + (i * sizeof(struct sec_hw_lt)); } /* Last entry in link table is used to create a circle */ lt = sc->sc_lt[SEC_LT_ENTRIES].sl_lt; lt->shl_length = 0; lt->shl_r = 0; lt->shl_n = 1; lt->shl_ptr = sc->sc_lt[0].sl_lt_paddr; /* Init descriptor and link table queues pointers */ SEC_CNT_INIT(sc, sc_free_desc_get_cnt, SEC_DESCRIPTORS); SEC_CNT_INIT(sc, sc_free_desc_put_cnt, SEC_DESCRIPTORS); SEC_CNT_INIT(sc, sc_ready_desc_get_cnt, SEC_DESCRIPTORS); SEC_CNT_INIT(sc, sc_ready_desc_put_cnt, SEC_DESCRIPTORS); SEC_CNT_INIT(sc, sc_queued_desc_get_cnt, SEC_DESCRIPTORS); SEC_CNT_INIT(sc, sc_queued_desc_put_cnt, SEC_DESCRIPTORS); SEC_CNT_INIT(sc, sc_lt_alloc_cnt, SEC_LT_ENTRIES); SEC_CNT_INIT(sc, sc_lt_free_cnt, SEC_LT_ENTRIES); /* Create masks for fast checks */ sc->sc_int_error_mask = 0; for (i = 0; i < SEC_CHANNELS; i++) sc->sc_int_error_mask |= (~0ULL & SEC_INT_CH_ERR(i)); switch (sc->sc_version) { case 2: sc->sc_channel_idle_mask = (SEC_CHAN_CSR2_FFLVL_M << SEC_CHAN_CSR2_FFLVL_S) | (SEC_CHAN_CSR2_MSTATE_M << SEC_CHAN_CSR2_MSTATE_S) | (SEC_CHAN_CSR2_PSTATE_M << SEC_CHAN_CSR2_PSTATE_S) | (SEC_CHAN_CSR2_GSTATE_M << SEC_CHAN_CSR2_GSTATE_S); break; case 3: sc->sc_channel_idle_mask = (SEC_CHAN_CSR3_FFLVL_M << SEC_CHAN_CSR3_FFLVL_S) | (SEC_CHAN_CSR3_MSTATE_M << SEC_CHAN_CSR3_MSTATE_S) | (SEC_CHAN_CSR3_PSTATE_M << SEC_CHAN_CSR3_PSTATE_S) | (SEC_CHAN_CSR3_GSTATE_M << SEC_CHAN_CSR3_GSTATE_S); break; } /* Init hardware */ error = sec_init(sc); if (error) goto fail6; /* Register in OCF (AESU) */ crypto_register(sc->sc_cid, CRYPTO_AES_CBC, 0, 0); /* Register in OCF (DEU) */ crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0); /* Register in OCF (MDEU) */ crypto_register(sc->sc_cid, CRYPTO_MD5, 0, 0); crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA1, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA2_256_HMAC, 0, 0); if (sc->sc_version >= 3) { crypto_register(sc->sc_cid, CRYPTO_SHA2_384_HMAC, 0, 0); crypto_register(sc->sc_cid, CRYPTO_SHA2_512_HMAC, 0, 0); } return (0); fail6: sec_free_dma_mem(&(sc->sc_lt_dmem)); fail5: sec_free_dma_mem(&(sc->sc_desc_dmem)); fail4: sec_release_intr(sc, sc->sc_sec_ires, sc->sc_sec_ihand, sc->sc_sec_irid, "secondary"); fail3: sec_release_intr(sc, sc->sc_pri_ires, sc->sc_pri_ihand, sc->sc_pri_irid, "primary"); fail2: bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_rrid, sc->sc_rres); fail1: mtx_destroy(&sc->sc_controller_lock); mtx_destroy(&sc->sc_descriptors_lock); mtx_destroy(&sc->sc_sessions_lock); return (ENXIO); } static int sec_detach(device_t dev) { struct sec_softc *sc = device_get_softc(dev); int i, error, timeout = SEC_TIMEOUT; /* Prepare driver to shutdown */ SEC_LOCK(sc, descriptors); sc->sc_shutdown = 1; SEC_UNLOCK(sc, descriptors); /* Wait until all queued processing finishes */ while (1) { SEC_LOCK(sc, descriptors); i = SEC_READY_DESC_CNT(sc) + SEC_QUEUED_DESC_CNT(sc); SEC_UNLOCK(sc, descriptors); if (i == 0) break; if (timeout < 0) { device_printf(dev, "queue flush timeout!\n"); /* DMA can be still active - stop it */ for (i = 0; i < SEC_CHANNELS; i++) sec_channel_reset(sc, i, 1); break; } timeout -= 1000; DELAY(1000); } /* Disable interrupts */ SEC_WRITE(sc, SEC_IER, 0); /* Unregister from OCF */ crypto_unregister_all(sc->sc_cid); /* Free DMA memory */ for (i = 0; i < SEC_DESCRIPTORS; i++) SEC_DESC_FREE_POINTERS(&(sc->sc_desc[i])); sec_free_dma_mem(&(sc->sc_lt_dmem)); sec_free_dma_mem(&(sc->sc_desc_dmem)); /* Release interrupts */ sec_release_intr(sc, sc->sc_pri_ires, sc->sc_pri_ihand, sc->sc_pri_irid, "primary"); sec_release_intr(sc, sc->sc_sec_ires, sc->sc_sec_ihand, sc->sc_sec_irid, "secondary"); /* Release memory */ if (sc->sc_rres) { error = bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_rrid, sc->sc_rres); if (error) device_printf(dev, "bus_release_resource() failed for" " I/O memory, error %d\n", error); sc->sc_rres = NULL; } mtx_destroy(&sc->sc_controller_lock); mtx_destroy(&sc->sc_descriptors_lock); mtx_destroy(&sc->sc_sessions_lock); return (0); } static int sec_suspend(device_t dev) { return (0); } static int sec_resume(device_t dev) { return (0); } static int sec_shutdown(device_t dev) { return (0); } static int sec_setup_intr(struct sec_softc *sc, struct resource **ires, void **ihand, int *irid, driver_intr_t handler, const char *iname) { int error; (*ires) = bus_alloc_resource_any(sc->sc_dev, SYS_RES_IRQ, irid, RF_ACTIVE); if ((*ires) == NULL) { device_printf(sc->sc_dev, "could not allocate %s IRQ\n", iname); return (ENXIO); } error = bus_setup_intr(sc->sc_dev, *ires, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, sc, ihand); if (error) { device_printf(sc->sc_dev, "failed to set up %s IRQ\n", iname); if (bus_release_resource(sc->sc_dev, SYS_RES_IRQ, *irid, *ires)) device_printf(sc->sc_dev, "could not release %s IRQ\n", iname); (*ires) = NULL; return (error); } return (0); } static void sec_release_intr(struct sec_softc *sc, struct resource *ires, void *ihand, int irid, const char *iname) { int error; if (ires == NULL) return; error = bus_teardown_intr(sc->sc_dev, ires, ihand); if (error) device_printf(sc->sc_dev, "bus_teardown_intr() failed for %s" " IRQ, error %d\n", iname, error); error = bus_release_resource(sc->sc_dev, SYS_RES_IRQ, irid, ires); if (error) device_printf(sc->sc_dev, "bus_release_resource() failed for %s" " IRQ, error %d\n", iname, error); } static void sec_primary_intr(void *arg) { struct sec_softc *sc = arg; struct sec_desc *desc; uint64_t isr; int i, wakeup = 0; SEC_LOCK(sc, controller); /* Check for errors */ isr = SEC_READ(sc, SEC_ISR); if (isr & sc->sc_int_error_mask) { /* Check each channel for error */ for (i = 0; i < SEC_CHANNELS; i++) { if ((isr & SEC_INT_CH_ERR(i)) == 0) continue; device_printf(sc->sc_dev, "I/O error on channel %i!\n", i); /* Find and mark problematic descriptor */ desc = sec_find_desc(sc, SEC_READ(sc, SEC_CHAN_CDPR(i))); if (desc != NULL) desc->sd_error = EIO; /* Do partial channel reset */ sec_channel_reset(sc, i, 0); } } /* ACK interrupt */ SEC_WRITE(sc, SEC_ICR, 0xFFFFFFFFFFFFFFFFULL); SEC_UNLOCK(sc, controller); SEC_LOCK(sc, descriptors); /* Handle processed descriptors */ SEC_DESC_SYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); while (SEC_QUEUED_DESC_CNT(sc) > 0) { desc = SEC_GET_QUEUED_DESC(sc); if (desc->sd_desc->shd_done != 0xFF && desc->sd_error == 0) { SEC_PUT_BACK_QUEUED_DESC(sc); break; } SEC_DESC_SYNC_POINTERS(desc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); desc->sd_crp->crp_etype = desc->sd_error; crypto_done(desc->sd_crp); SEC_DESC_FREE_POINTERS(desc); SEC_DESC_FREE_LT(sc, desc); SEC_DESC_QUEUED2FREE(sc); } SEC_DESC_SYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (!sc->sc_shutdown) { wakeup = sc->sc_blocked; sc->sc_blocked = 0; } SEC_UNLOCK(sc, descriptors); /* Enqueue ready descriptors in hardware */ sec_enqueue(sc); if (wakeup) crypto_unblock(sc->sc_cid, wakeup); } static void sec_secondary_intr(void *arg) { struct sec_softc *sc = arg; device_printf(sc->sc_dev, "spurious secondary interrupt!\n"); sec_primary_intr(arg); } static int sec_controller_reset(struct sec_softc *sc) { int timeout = SEC_TIMEOUT; /* Reset Controller */ SEC_WRITE(sc, SEC_MCR, SEC_MCR_SWR); while (SEC_READ(sc, SEC_MCR) & SEC_MCR_SWR) { DELAY(1000); timeout -= 1000; if (timeout < 0) { device_printf(sc->sc_dev, "timeout while waiting for " "device reset!\n"); return (ETIMEDOUT); } } return (0); } static int sec_channel_reset(struct sec_softc *sc, int channel, int full) { int timeout = SEC_TIMEOUT; uint64_t bit = (full) ? SEC_CHAN_CCR_R : SEC_CHAN_CCR_CON; uint64_t reg; /* Reset Channel */ reg = SEC_READ(sc, SEC_CHAN_CCR(channel)); SEC_WRITE(sc, SEC_CHAN_CCR(channel), reg | bit); while (SEC_READ(sc, SEC_CHAN_CCR(channel)) & bit) { DELAY(1000); timeout -= 1000; if (timeout < 0) { device_printf(sc->sc_dev, "timeout while waiting for " "channel reset!\n"); return (ETIMEDOUT); } } if (full) { reg = SEC_CHAN_CCR_CDIE | SEC_CHAN_CCR_NT | SEC_CHAN_CCR_BS; switch(sc->sc_version) { case 2: reg |= SEC_CHAN_CCR_CDWE; break; case 3: reg |= SEC_CHAN_CCR_AWSE | SEC_CHAN_CCR_WGN; break; } SEC_WRITE(sc, SEC_CHAN_CCR(channel), reg); } return (0); } static int sec_init(struct sec_softc *sc) { uint64_t reg; int error, i; /* Reset controller twice to clear all pending interrupts */ error = sec_controller_reset(sc); if (error) return (error); error = sec_controller_reset(sc); if (error) return (error); /* Reset channels */ for (i = 0; i < SEC_CHANNELS; i++) { error = sec_channel_reset(sc, i, 1); if (error) return (error); } /* Enable Interrupts */ reg = SEC_INT_ITO; for (i = 0; i < SEC_CHANNELS; i++) reg |= SEC_INT_CH_DN(i) | SEC_INT_CH_ERR(i); SEC_WRITE(sc, SEC_IER, reg); return (error); } static void sec_alloc_dma_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct sec_dma_mem *dma_mem = arg; if (error) return; KASSERT(nseg == 1, ("Wrong number of segments, should be 1")); dma_mem->dma_paddr = segs->ds_addr; } static void sec_dma_map_desc_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct sec_desc_map_info *sdmi = arg; struct sec_softc *sc = sdmi->sdmi_sc; struct sec_lt *lt = NULL; bus_addr_t addr; bus_size_t size; int i; SEC_LOCK_ASSERT(sc, descriptors); if (error) return; for (i = 0; i < nseg; i++) { addr = segs[i].ds_addr; size = segs[i].ds_len; /* Skip requested offset */ if (sdmi->sdmi_offset >= size) { sdmi->sdmi_offset -= size; continue; } addr += sdmi->sdmi_offset; size -= sdmi->sdmi_offset; sdmi->sdmi_offset = 0; /* Do not link more than requested */ if (sdmi->sdmi_size < size) size = sdmi->sdmi_size; lt = SEC_ALLOC_LT_ENTRY(sc); lt->sl_lt->shl_length = size; lt->sl_lt->shl_r = 0; lt->sl_lt->shl_n = 0; lt->sl_lt->shl_ptr = addr; if (sdmi->sdmi_lt_first == NULL) sdmi->sdmi_lt_first = lt; sdmi->sdmi_lt_used += 1; if ((sdmi->sdmi_size -= size) == 0) break; } sdmi->sdmi_lt_last = lt; } static void sec_dma_map_desc_cb2(void *arg, bus_dma_segment_t *segs, int nseg, bus_size_t size, int error) { sec_dma_map_desc_cb(arg, segs, nseg, error); } static int sec_alloc_dma_mem(struct sec_softc *sc, struct sec_dma_mem *dma_mem, bus_size_t size) { int error; if (dma_mem->dma_vaddr != NULL) return (EBUSY); error = bus_dma_tag_create(NULL, /* parent */ SEC_DMA_ALIGNMENT, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ size, 1, /* maxsize, nsegments */ size, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &(dma_mem->dma_tag)); /* dmat */ if (error) { device_printf(sc->sc_dev, "failed to allocate busdma tag, error" " %i!\n", error); goto err1; } error = bus_dmamem_alloc(dma_mem->dma_tag, &(dma_mem->dma_vaddr), BUS_DMA_NOWAIT | BUS_DMA_ZERO, &(dma_mem->dma_map)); if (error) { device_printf(sc->sc_dev, "failed to allocate DMA safe" " memory, error %i!\n", error); goto err2; } error = bus_dmamap_load(dma_mem->dma_tag, dma_mem->dma_map, dma_mem->dma_vaddr, size, sec_alloc_dma_mem_cb, dma_mem, BUS_DMA_NOWAIT); if (error) { device_printf(sc->sc_dev, "cannot get address of the DMA" " memory, error %i\n", error); goto err3; } dma_mem->dma_is_map = 0; return (0); err3: bus_dmamem_free(dma_mem->dma_tag, dma_mem->dma_vaddr, dma_mem->dma_map); err2: bus_dma_tag_destroy(dma_mem->dma_tag); err1: dma_mem->dma_vaddr = NULL; return(error); } static int sec_desc_map_dma(struct sec_softc *sc, struct sec_dma_mem *dma_mem, void *mem, bus_size_t size, int type, struct sec_desc_map_info *sdmi) { int error; if (dma_mem->dma_vaddr != NULL) return (EBUSY); switch (type) { case SEC_MEMORY: break; case SEC_UIO: size = SEC_FREE_LT_CNT(sc) * SEC_MAX_DMA_BLOCK_SIZE; break; case SEC_MBUF: size = m_length((struct mbuf*)mem, NULL); break; default: return (EINVAL); } error = bus_dma_tag_create(NULL, /* parent */ SEC_DMA_ALIGNMENT, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ size, /* maxsize */ SEC_FREE_LT_CNT(sc), /* nsegments */ SEC_MAX_DMA_BLOCK_SIZE, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &(dma_mem->dma_tag)); /* dmat */ if (error) { device_printf(sc->sc_dev, "failed to allocate busdma tag, error" " %i!\n", error); dma_mem->dma_vaddr = NULL; return (error); } error = bus_dmamap_create(dma_mem->dma_tag, 0, &(dma_mem->dma_map)); if (error) { device_printf(sc->sc_dev, "failed to create DMA map, error %i!" "\n", error); bus_dma_tag_destroy(dma_mem->dma_tag); return (error); } switch (type) { case SEC_MEMORY: error = bus_dmamap_load(dma_mem->dma_tag, dma_mem->dma_map, mem, size, sec_dma_map_desc_cb, sdmi, BUS_DMA_NOWAIT); break; case SEC_UIO: error = bus_dmamap_load_uio(dma_mem->dma_tag, dma_mem->dma_map, mem, sec_dma_map_desc_cb2, sdmi, BUS_DMA_NOWAIT); break; case SEC_MBUF: error = bus_dmamap_load_mbuf(dma_mem->dma_tag, dma_mem->dma_map, mem, sec_dma_map_desc_cb2, sdmi, BUS_DMA_NOWAIT); break; } if (error) { device_printf(sc->sc_dev, "cannot get address of the DMA" " memory, error %i!\n", error); bus_dmamap_destroy(dma_mem->dma_tag, dma_mem->dma_map); bus_dma_tag_destroy(dma_mem->dma_tag); return (error); } dma_mem->dma_is_map = 1; dma_mem->dma_vaddr = mem; return (0); } static void sec_free_dma_mem(struct sec_dma_mem *dma_mem) { /* Check for double free */ if (dma_mem->dma_vaddr == NULL) return; bus_dmamap_unload(dma_mem->dma_tag, dma_mem->dma_map); if (dma_mem->dma_is_map) bus_dmamap_destroy(dma_mem->dma_tag, dma_mem->dma_map); else bus_dmamem_free(dma_mem->dma_tag, dma_mem->dma_vaddr, dma_mem->dma_map); bus_dma_tag_destroy(dma_mem->dma_tag); dma_mem->dma_vaddr = NULL; } static int sec_eu_channel(struct sec_softc *sc, int eu) { uint64_t reg; int channel = 0; SEC_LOCK_ASSERT(sc, controller); reg = SEC_READ(sc, SEC_EUASR); switch (eu) { case SEC_EU_AFEU: channel = SEC_EUASR_AFEU(reg); break; case SEC_EU_DEU: channel = SEC_EUASR_DEU(reg); break; case SEC_EU_MDEU_A: case SEC_EU_MDEU_B: channel = SEC_EUASR_MDEU(reg); break; case SEC_EU_RNGU: channel = SEC_EUASR_RNGU(reg); break; case SEC_EU_PKEU: channel = SEC_EUASR_PKEU(reg); break; case SEC_EU_AESU: channel = SEC_EUASR_AESU(reg); break; case SEC_EU_KEU: channel = SEC_EUASR_KEU(reg); break; case SEC_EU_CRCU: channel = SEC_EUASR_CRCU(reg); break; } return (channel - 1); } static int sec_enqueue_desc(struct sec_softc *sc, struct sec_desc *desc, int channel) { u_int fflvl = SEC_MAX_FIFO_LEVEL; uint64_t reg; int i; SEC_LOCK_ASSERT(sc, controller); /* Find free channel if have not got one */ if (channel < 0) { for (i = 0; i < SEC_CHANNELS; i++) { reg = SEC_READ(sc, SEC_CHAN_CSR(channel)); if ((reg & sc->sc_channel_idle_mask) == 0) { channel = i; break; } } } /* There is no free channel */ if (channel < 0) return (-1); /* Check FIFO level on selected channel */ reg = SEC_READ(sc, SEC_CHAN_CSR(channel)); switch(sc->sc_version) { case 2: fflvl = (reg >> SEC_CHAN_CSR2_FFLVL_S) & SEC_CHAN_CSR2_FFLVL_M; break; case 3: fflvl = (reg >> SEC_CHAN_CSR3_FFLVL_S) & SEC_CHAN_CSR3_FFLVL_M; break; } if (fflvl >= SEC_MAX_FIFO_LEVEL) return (-1); /* Enqueue descriptor in channel */ SEC_WRITE(sc, SEC_CHAN_FF(channel), desc->sd_desc_paddr); return (channel); } static void sec_enqueue(struct sec_softc *sc) { struct sec_desc *desc; int ch0, ch1; SEC_LOCK(sc, descriptors); SEC_LOCK(sc, controller); while (SEC_READY_DESC_CNT(sc) > 0) { desc = SEC_GET_READY_DESC(sc); ch0 = sec_eu_channel(sc, desc->sd_desc->shd_eu_sel0); ch1 = sec_eu_channel(sc, desc->sd_desc->shd_eu_sel1); /* * Both EU are used by the same channel. * Enqueue descriptor in channel used by busy EUs. */ if (ch0 >= 0 && ch0 == ch1) { if (sec_enqueue_desc(sc, desc, ch0) >= 0) { SEC_DESC_READY2QUEUED(sc); continue; } } /* * Only one EU is free. * Enqueue descriptor in channel used by busy EU. */ if ((ch0 >= 0 && ch1 < 0) || (ch1 >= 0 && ch0 < 0)) { if (sec_enqueue_desc(sc, desc, (ch0 >= 0) ? ch0 : ch1) >= 0) { SEC_DESC_READY2QUEUED(sc); continue; } } /* * Both EU are free. * Enqueue descriptor in first free channel. */ if (ch0 < 0 && ch1 < 0) { if (sec_enqueue_desc(sc, desc, -1) >= 0) { SEC_DESC_READY2QUEUED(sc); continue; } } /* Current descriptor can not be queued at the moment */ SEC_PUT_BACK_READY_DESC(sc); break; } SEC_UNLOCK(sc, controller); SEC_UNLOCK(sc, descriptors); } static struct sec_desc * sec_find_desc(struct sec_softc *sc, bus_addr_t paddr) { struct sec_desc *desc = NULL; int i; SEC_LOCK_ASSERT(sc, descriptors); for (i = 0; i < SEC_CHANNELS; i++) { if (sc->sc_desc[i].sd_desc_paddr == paddr) { desc = &(sc->sc_desc[i]); break; } } return (desc); } static int sec_make_pointer_direct(struct sec_softc *sc, struct sec_desc *desc, u_int n, bus_addr_t data, bus_size_t dsize) { struct sec_hw_desc_ptr *ptr; SEC_LOCK_ASSERT(sc, descriptors); ptr = &(desc->sd_desc->shd_pointer[n]); ptr->shdp_length = dsize; ptr->shdp_extent = 0; ptr->shdp_j = 0; ptr->shdp_ptr = data; return (0); } static int sec_make_pointer(struct sec_softc *sc, struct sec_desc *desc, u_int n, void *data, bus_size_t doffset, bus_size_t dsize, int dtype) { struct sec_desc_map_info sdmi = { sc, dsize, doffset, NULL, NULL, 0 }; struct sec_hw_desc_ptr *ptr; int error; SEC_LOCK_ASSERT(sc, descriptors); /* For flat memory map only requested region */ if (dtype == SEC_MEMORY) { data = (uint8_t*)(data) + doffset; sdmi.sdmi_offset = 0; } error = sec_desc_map_dma(sc, &(desc->sd_ptr_dmem[n]), data, dsize, dtype, &sdmi); if (error) return (error); sdmi.sdmi_lt_last->sl_lt->shl_r = 1; desc->sd_lt_used += sdmi.sdmi_lt_used; ptr = &(desc->sd_desc->shd_pointer[n]); ptr->shdp_length = dsize; ptr->shdp_extent = 0; ptr->shdp_j = 1; ptr->shdp_ptr = sdmi.sdmi_lt_first->sl_lt_paddr; return (0); } static int sec_split_cri(struct cryptoini *cri, struct cryptoini **enc, struct cryptoini **mac) { struct cryptoini *e, *m; e = cri; m = cri->cri_next; /* We can haldle only two operations */ if (m && m->cri_next) return (EINVAL); if (sec_mdeu_can_handle(e->cri_alg)) { cri = m; m = e; e = cri; } if (m && !sec_mdeu_can_handle(m->cri_alg)) return (EINVAL); *enc = e; *mac = m; return (0); } static int sec_split_crp(struct cryptop *crp, struct cryptodesc **enc, struct cryptodesc **mac) { struct cryptodesc *e, *m, *t; e = crp->crp_desc; m = e->crd_next; /* We can haldle only two operations */ if (m && m->crd_next) return (EINVAL); if (sec_mdeu_can_handle(e->crd_alg)) { t = m; m = e; e = t; } if (m && !sec_mdeu_can_handle(m->crd_alg)) return (EINVAL); *enc = e; *mac = m; return (0); } static int sec_alloc_session(struct sec_softc *sc) { struct sec_session *ses = NULL; int sid = -1; u_int i; SEC_LOCK(sc, sessions); for (i = 0; i < SEC_MAX_SESSIONS; i++) { if (sc->sc_sessions[i].ss_used == 0) { ses = &(sc->sc_sessions[i]); ses->ss_used = 1; ses->ss_ivlen = 0; ses->ss_klen = 0; ses->ss_mklen = 0; sid = i; break; } } SEC_UNLOCK(sc, sessions); return (sid); } static struct sec_session * sec_get_session(struct sec_softc *sc, u_int sid) { struct sec_session *ses; if (sid >= SEC_MAX_SESSIONS) return (NULL); SEC_LOCK(sc, sessions); ses = &(sc->sc_sessions[sid]); if (ses->ss_used == 0) ses = NULL; SEC_UNLOCK(sc, sessions); return (ses); } static int sec_newsession(device_t dev, u_int32_t *sidp, struct cryptoini *cri) { struct sec_softc *sc = device_get_softc(dev); struct sec_eu_methods *eu = sec_eus; struct cryptoini *enc = NULL; struct cryptoini *mac = NULL; struct sec_session *ses; int error = -1; int sid; error = sec_split_cri(cri, &enc, &mac); if (error) return (error); /* Check key lengths */ if (enc && enc->cri_key && (enc->cri_klen / 8) > SEC_MAX_KEY_LEN) return (E2BIG); if (mac && mac->cri_key && (mac->cri_klen / 8) > SEC_MAX_KEY_LEN) return (E2BIG); /* Only SEC 3.0 supports digests larger than 256 bits */ if (sc->sc_version < 3 && mac && mac->cri_klen > 256) return (E2BIG); sid = sec_alloc_session(sc); if (sid < 0) return (ENOMEM); ses = sec_get_session(sc, sid); /* Find EU for this session */ while (eu->sem_make_desc != NULL) { error = eu->sem_newsession(sc, ses, enc, mac); if (error >= 0) break; eu++; } /* If not found, return EINVAL */ if (error < 0) { sec_free_session(sc, ses); return (EINVAL); } /* Save cipher key */ if (enc && enc->cri_key) { ses->ss_klen = enc->cri_klen / 8; memcpy(ses->ss_key, enc->cri_key, ses->ss_klen); } /* Save digest key */ if (mac && mac->cri_key) { ses->ss_mklen = mac->cri_klen / 8; memcpy(ses->ss_mkey, mac->cri_key, ses->ss_mklen); } ses->ss_eu = eu; *sidp = sid; return (0); } static int sec_freesession(device_t dev, uint64_t tid) { struct sec_softc *sc = device_get_softc(dev); struct sec_session *ses; int error = 0; ses = sec_get_session(sc, CRYPTO_SESID2LID(tid)); if (ses == NULL) return (EINVAL); sec_free_session(sc, ses); return (error); } static int sec_process(device_t dev, struct cryptop *crp, int hint) { struct sec_softc *sc = device_get_softc(dev); struct sec_desc *desc = NULL; struct cryptodesc *mac, *enc; struct sec_session *ses; int buftype, error = 0; /* Check Session ID */ ses = sec_get_session(sc, CRYPTO_SESID2LID(crp->crp_sid)); if (ses == NULL) { crp->crp_etype = EINVAL; crypto_done(crp); return (0); } /* Check for input length */ if (crp->crp_ilen > SEC_MAX_DMA_BLOCK_SIZE) { crp->crp_etype = E2BIG; crypto_done(crp); return (0); } /* Get descriptors */ if (sec_split_crp(crp, &enc, &mac)) { crp->crp_etype = EINVAL; crypto_done(crp); return (0); } SEC_LOCK(sc, descriptors); SEC_DESC_SYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Block driver if there is no free descriptors or we are going down */ if (SEC_FREE_DESC_CNT(sc) == 0 || sc->sc_shutdown) { sc->sc_blocked |= CRYPTO_SYMQ; SEC_UNLOCK(sc, descriptors); return (ERESTART); } /* Prepare descriptor */ desc = SEC_GET_FREE_DESC(sc); desc->sd_lt_used = 0; desc->sd_error = 0; desc->sd_crp = crp; if (crp->crp_flags & CRYPTO_F_IOV) buftype = SEC_UIO; else if (crp->crp_flags & CRYPTO_F_IMBUF) buftype = SEC_MBUF; else buftype = SEC_MEMORY; if (enc && enc->crd_flags & CRD_F_ENCRYPT) { if (enc->crd_flags & CRD_F_IV_EXPLICIT) memcpy(desc->sd_desc->shd_iv, enc->crd_iv, ses->ss_ivlen); else arc4rand(desc->sd_desc->shd_iv, ses->ss_ivlen, 0); if ((enc->crd_flags & CRD_F_IV_PRESENT) == 0) crypto_copyback(crp->crp_flags, crp->crp_buf, enc->crd_inject, ses->ss_ivlen, desc->sd_desc->shd_iv); } else if (enc) { if (enc->crd_flags & CRD_F_IV_EXPLICIT) memcpy(desc->sd_desc->shd_iv, enc->crd_iv, ses->ss_ivlen); else crypto_copydata(crp->crp_flags, crp->crp_buf, enc->crd_inject, ses->ss_ivlen, desc->sd_desc->shd_iv); } if (enc && enc->crd_flags & CRD_F_KEY_EXPLICIT) { if ((enc->crd_klen / 8) <= SEC_MAX_KEY_LEN) { ses->ss_klen = enc->crd_klen / 8; memcpy(ses->ss_key, enc->crd_key, ses->ss_klen); } else error = E2BIG; } if (!error && mac && mac->crd_flags & CRD_F_KEY_EXPLICIT) { if ((mac->crd_klen / 8) <= SEC_MAX_KEY_LEN) { ses->ss_mklen = mac->crd_klen / 8; memcpy(ses->ss_mkey, mac->crd_key, ses->ss_mklen); } else error = E2BIG; } if (!error) { memcpy(desc->sd_desc->shd_key, ses->ss_key, ses->ss_klen); memcpy(desc->sd_desc->shd_mkey, ses->ss_mkey, ses->ss_mklen); error = ses->ss_eu->sem_make_desc(sc, ses, desc, crp, buftype); } if (error) { SEC_DESC_FREE_POINTERS(desc); SEC_DESC_PUT_BACK_LT(sc, desc); SEC_PUT_BACK_FREE_DESC(sc); SEC_UNLOCK(sc, descriptors); crp->crp_etype = error; crypto_done(crp); return (0); } /* * Skip DONE interrupt if this is not last request in burst, but only * if we are running on SEC 3.X. On SEC 2.X we have to enable DONE * signaling on each descriptor. */ if ((hint & CRYPTO_HINT_MORE) && sc->sc_version == 3) desc->sd_desc->shd_dn = 0; else desc->sd_desc->shd_dn = 1; SEC_DESC_SYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); SEC_DESC_SYNC_POINTERS(desc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); SEC_DESC_FREE2READY(sc); SEC_UNLOCK(sc, descriptors); /* Enqueue ready descriptors in hardware */ sec_enqueue(sc); return (0); } static int sec_build_common_ns_desc(struct sec_softc *sc, struct sec_desc *desc, struct sec_session *ses, struct cryptop *crp, struct cryptodesc *enc, int buftype) { struct sec_hw_desc *hd = desc->sd_desc; int error; hd->shd_desc_type = SEC_DT_COMMON_NONSNOOP; hd->shd_eu_sel1 = SEC_EU_NONE; hd->shd_mode1 = 0; /* Pointer 0: NULL */ error = sec_make_pointer_direct(sc, desc, 0, 0, 0); if (error) return (error); /* Pointer 1: IV IN */ error = sec_make_pointer_direct(sc, desc, 1, desc->sd_desc_paddr + offsetof(struct sec_hw_desc, shd_iv), ses->ss_ivlen); if (error) return (error); /* Pointer 2: Cipher Key */ error = sec_make_pointer_direct(sc, desc, 2, desc->sd_desc_paddr + offsetof(struct sec_hw_desc, shd_key), ses->ss_klen); if (error) return (error); /* Pointer 3: Data IN */ error = sec_make_pointer(sc, desc, 3, crp->crp_buf, enc->crd_skip, enc->crd_len, buftype); if (error) return (error); /* Pointer 4: Data OUT */ error = sec_make_pointer(sc, desc, 4, crp->crp_buf, enc->crd_skip, enc->crd_len, buftype); if (error) return (error); /* Pointer 5: IV OUT (Not used: NULL) */ error = sec_make_pointer_direct(sc, desc, 5, 0, 0); if (error) return (error); /* Pointer 6: NULL */ error = sec_make_pointer_direct(sc, desc, 6, 0, 0); return (error); } static int sec_build_common_s_desc(struct sec_softc *sc, struct sec_desc *desc, struct sec_session *ses, struct cryptop *crp, struct cryptodesc *enc, struct cryptodesc *mac, int buftype) { struct sec_hw_desc *hd = desc->sd_desc; u_int eu, mode, hashlen; int error; if (mac->crd_len < enc->crd_len) return (EINVAL); if (mac->crd_skip + mac->crd_len != enc->crd_skip + enc->crd_len) return (EINVAL); error = sec_mdeu_config(mac, &eu, &mode, &hashlen); if (error) return (error); hd->shd_desc_type = SEC_DT_HMAC_SNOOP; hd->shd_eu_sel1 = eu; hd->shd_mode1 = mode; /* Pointer 0: HMAC Key */ error = sec_make_pointer_direct(sc, desc, 0, desc->sd_desc_paddr + offsetof(struct sec_hw_desc, shd_mkey), ses->ss_mklen); if (error) return (error); /* Pointer 1: HMAC-Only Data IN */ error = sec_make_pointer(sc, desc, 1, crp->crp_buf, mac->crd_skip, mac->crd_len - enc->crd_len, buftype); if (error) return (error); /* Pointer 2: Cipher Key */ error = sec_make_pointer_direct(sc, desc, 2, desc->sd_desc_paddr + offsetof(struct sec_hw_desc, shd_key), ses->ss_klen); if (error) return (error); /* Pointer 3: IV IN */ error = sec_make_pointer_direct(sc, desc, 3, desc->sd_desc_paddr + offsetof(struct sec_hw_desc, shd_iv), ses->ss_ivlen); if (error) return (error); /* Pointer 4: Data IN */ error = sec_make_pointer(sc, desc, 4, crp->crp_buf, enc->crd_skip, enc->crd_len, buftype); if (error) return (error); /* Pointer 5: Data OUT */ error = sec_make_pointer(sc, desc, 5, crp->crp_buf, enc->crd_skip, enc->crd_len, buftype); if (error) return (error); /* Pointer 6: HMAC OUT */ error = sec_make_pointer(sc, desc, 6, crp->crp_buf, mac->crd_inject, hashlen, buftype); return (error); } /* AESU */ static int sec_aesu_newsession(struct sec_softc *sc, struct sec_session *ses, struct cryptoini *enc, struct cryptoini *mac) { if (enc == NULL) return (-1); if (enc->cri_alg != CRYPTO_AES_CBC) return (-1); ses->ss_ivlen = AES_BLOCK_LEN; return (0); } static int sec_aesu_make_desc(struct sec_softc *sc, struct sec_session *ses, struct sec_desc *desc, struct cryptop *crp, int buftype) { struct sec_hw_desc *hd = desc->sd_desc; struct cryptodesc *enc, *mac; int error; error = sec_split_crp(crp, &enc, &mac); if (error) return (error); if (!enc) return (EINVAL); hd->shd_eu_sel0 = SEC_EU_AESU; hd->shd_mode0 = SEC_AESU_MODE_CBC; if (enc->crd_alg != CRYPTO_AES_CBC) return (EINVAL); if (enc->crd_flags & CRD_F_ENCRYPT) { hd->shd_mode0 |= SEC_AESU_MODE_ED; hd->shd_dir = 0; } else hd->shd_dir = 1; if (mac) error = sec_build_common_s_desc(sc, desc, ses, crp, enc, mac, buftype); else error = sec_build_common_ns_desc(sc, desc, ses, crp, enc, buftype); return (error); } /* DEU */ static int sec_deu_newsession(struct sec_softc *sc, struct sec_session *ses, struct cryptoini *enc, struct cryptoini *mac) { if (enc == NULL) return (-1); switch (enc->cri_alg) { case CRYPTO_DES_CBC: case CRYPTO_3DES_CBC: break; default: return (-1); } ses->ss_ivlen = DES_BLOCK_LEN; return (0); } static int sec_deu_make_desc(struct sec_softc *sc, struct sec_session *ses, struct sec_desc *desc, struct cryptop *crp, int buftype) { struct sec_hw_desc *hd = desc->sd_desc; struct cryptodesc *enc, *mac; int error; error = sec_split_crp(crp, &enc, &mac); if (error) return (error); if (!enc) return (EINVAL); hd->shd_eu_sel0 = SEC_EU_DEU; hd->shd_mode0 = SEC_DEU_MODE_CBC; switch (enc->crd_alg) { case CRYPTO_3DES_CBC: hd->shd_mode0 |= SEC_DEU_MODE_TS; break; case CRYPTO_DES_CBC: break; default: return (EINVAL); } if (enc->crd_flags & CRD_F_ENCRYPT) { hd->shd_mode0 |= SEC_DEU_MODE_ED; hd->shd_dir = 0; } else hd->shd_dir = 1; if (mac) error = sec_build_common_s_desc(sc, desc, ses, crp, enc, mac, buftype); else error = sec_build_common_ns_desc(sc, desc, ses, crp, enc, buftype); return (error); } /* MDEU */ static int sec_mdeu_can_handle(u_int alg) { switch (alg) { case CRYPTO_MD5: case CRYPTO_SHA1: case CRYPTO_MD5_HMAC: case CRYPTO_SHA1_HMAC: case CRYPTO_SHA2_256_HMAC: case CRYPTO_SHA2_384_HMAC: case CRYPTO_SHA2_512_HMAC: return (1); default: return (0); } } static int sec_mdeu_config(struct cryptodesc *crd, u_int *eu, u_int *mode, u_int *hashlen) { *mode = SEC_MDEU_MODE_PD | SEC_MDEU_MODE_INIT; *eu = SEC_EU_NONE; switch (crd->crd_alg) { case CRYPTO_MD5_HMAC: *mode |= SEC_MDEU_MODE_HMAC; /* FALLTHROUGH */ case CRYPTO_MD5: *eu = SEC_EU_MDEU_A; *mode |= SEC_MDEU_MODE_MD5; *hashlen = MD5_HASH_LEN; break; case CRYPTO_SHA1_HMAC: *mode |= SEC_MDEU_MODE_HMAC; /* FALLTHROUGH */ case CRYPTO_SHA1: *eu = SEC_EU_MDEU_A; *mode |= SEC_MDEU_MODE_SHA1; *hashlen = SHA1_HASH_LEN; break; case CRYPTO_SHA2_256_HMAC: *mode |= SEC_MDEU_MODE_HMAC | SEC_MDEU_MODE_SHA256; *eu = SEC_EU_MDEU_A; break; case CRYPTO_SHA2_384_HMAC: *mode |= SEC_MDEU_MODE_HMAC | SEC_MDEU_MODE_SHA384; *eu = SEC_EU_MDEU_B; break; case CRYPTO_SHA2_512_HMAC: *mode |= SEC_MDEU_MODE_HMAC | SEC_MDEU_MODE_SHA512; *eu = SEC_EU_MDEU_B; break; default: return (EINVAL); } if (*mode & SEC_MDEU_MODE_HMAC) *hashlen = SEC_HMAC_HASH_LEN; return (0); } static int sec_mdeu_newsession(struct sec_softc *sc, struct sec_session *ses, struct cryptoini *enc, struct cryptoini *mac) { if (mac && sec_mdeu_can_handle(mac->cri_alg)) return (0); return (-1); } static int sec_mdeu_make_desc(struct sec_softc *sc, struct sec_session *ses, struct sec_desc *desc, struct cryptop *crp, int buftype) { struct cryptodesc *enc, *mac; struct sec_hw_desc *hd = desc->sd_desc; u_int eu, mode, hashlen; int error; error = sec_split_crp(crp, &enc, &mac); if (error) return (error); if (enc) return (EINVAL); error = sec_mdeu_config(mac, &eu, &mode, &hashlen); if (error) return (error); hd->shd_desc_type = SEC_DT_COMMON_NONSNOOP; hd->shd_eu_sel0 = eu; hd->shd_mode0 = mode; hd->shd_eu_sel1 = SEC_EU_NONE; hd->shd_mode1 = 0; /* Pointer 0: NULL */ error = sec_make_pointer_direct(sc, desc, 0, 0, 0); if (error) return (error); /* Pointer 1: Context In (Not used: NULL) */ error = sec_make_pointer_direct(sc, desc, 1, 0, 0); if (error) return (error); /* Pointer 2: HMAC Key (or NULL, depending on digest type) */ if (hd->shd_mode0 & SEC_MDEU_MODE_HMAC) error = sec_make_pointer_direct(sc, desc, 2, desc->sd_desc_paddr + offsetof(struct sec_hw_desc, shd_mkey), ses->ss_mklen); else error = sec_make_pointer_direct(sc, desc, 2, 0, 0); if (error) return (error); /* Pointer 3: Input Data */ error = sec_make_pointer(sc, desc, 3, crp->crp_buf, mac->crd_skip, mac->crd_len, buftype); if (error) return (error); /* Pointer 4: NULL */ error = sec_make_pointer_direct(sc, desc, 4, 0, 0); if (error) return (error); /* Pointer 5: Hash out */ error = sec_make_pointer(sc, desc, 5, crp->crp_buf, mac->crd_inject, hashlen, buftype); if (error) return (error); /* Pointer 6: NULL */ error = sec_make_pointer_direct(sc, desc, 6, 0, 0); return (0); } diff --git a/sys/dev/syscons/plasma/fp16.c b/sys/dev/syscons/plasma/fp16.c index f0f97e34303c..dddba2ea4565 100644 --- a/sys/dev/syscons/plasma/fp16.c +++ b/sys/dev/syscons/plasma/fp16.c @@ -1,131 +1,155 @@ /*- * Copyright (c) 2015 Dag-Erling Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifdef _KERNEL #include #else #include #include #endif #include "fp16.h" /* * Compute the quare root of x, using Newton's method with 2^(log2(x)/2) * as the initial estimate. */ fp16_t fp16_sqrt(fp16_t x) { fp16_t y, delta; signed int log2x; /* special case */ if (x == 0) return (0); /* shift toward 0 by half the logarithm */ log2x = flsl(x) - 1; if (log2x >= 16) { y = x >> (log2x - 16) / 2; } else { #if 0 y = x << (16 - log2x) / 2; #else /* XXX for now, return 0 for anything < 1 */ return (0); #endif } while (y > 0) { /* delta = y^2 / 2y */ delta = fp16_div(fp16_sub(fp16_mul(y, y), x), y * 2); if (delta == 0) break; y = fp16_sub(y, delta); } return (y); } -static fp16_t fp16_cos_table[256] = { - 65536, 65534, 65531, 65524, 65516, 65505, 65491, 65475, - 65457, 65436, 65412, 65386, 65358, 65327, 65294, 65258, - 65220, 65179, 65136, 65091, 65043, 64992, 64939, 64884, - 64826, 64766, 64703, 64638, 64571, 64501, 64428, 64353, - 64276, 64197, 64115, 64030, 63943, 63854, 63762, 63668, - 63571, 63473, 63371, 63268, 63162, 63053, 62942, 62829, - 62714, 62596, 62475, 62353, 62228, 62100, 61971, 61839, - 61705, 61568, 61429, 61288, 61144, 60998, 60850, 60700, - 60547, 60392, 60235, 60075, 59913, 59749, 59583, 59414, - 59243, 59070, 58895, 58718, 58538, 58356, 58172, 57986, - 57797, 57606, 57414, 57219, 57022, 56822, 56621, 56417, - 56212, 56004, 55794, 55582, 55368, 55152, 54933, 54713, - 54491, 54266, 54040, 53811, 53581, 53348, 53114, 52877, - 52639, 52398, 52155, 51911, 51665, 51416, 51166, 50914, - 50660, 50403, 50146, 49886, 49624, 49360, 49095, 48828, - 48558, 48288, 48015, 47740, 47464, 47186, 46906, 46624, - 46340, 46055, 45768, 45480, 45189, 44897, 44603, 44308, - 44011, 43712, 43412, 43110, 42806, 42501, 42194, 41885, - 41575, 41263, 40950, 40636, 40319, 40002, 39682, 39362, - 39039, 38716, 38390, 38064, 37736, 37406, 37075, 36743, - 36409, 36074, 35738, 35400, 35061, 34721, 34379, 34036, - 33692, 33346, 32999, 32651, 32302, 31952, 31600, 31247, - 30893, 30538, 30181, 29824, 29465, 29105, 28745, 28383, - 28020, 27656, 27291, 26925, 26557, 26189, 25820, 25450, - 25079, 24707, 24334, 23960, 23586, 23210, 22833, 22456, - 22078, 21699, 21319, 20938, 20557, 20175, 19792, 19408, - 19024, 18638, 18253, 17866, 17479, 17091, 16702, 16313, - 15923, 15533, 15142, 14751, 14359, 13966, 13573, 13179, - 12785, 12390, 11995, 11600, 11204, 10807, 10410, 10013, - 9616, 9218, 8819, 8421, 8022, 7623, 7223, 6823, - 6423, 6023, 5622, 5222, 4821, 4420, 4018, 3617, - 3215, 2814, 2412, 2010, 1608, 1206, 804, 402, +static fp16_t fp16_sin_table[256] = { + 0, 402, 804, 1206, 1608, 2010, 2412, 2814, + 3215, 3617, 4018, 4420, 4821, 5222, 5622, 6023, + 6423, 6823, 7223, 7623, 8022, 8421, 8819, 9218, + 9616, 10013, 10410, 10807, 11204, 11600, 11995, 12390, + 12785, 13179, 13573, 13966, 14359, 14751, 15142, 15533, + 15923, 16313, 16702, 17091, 17479, 17866, 18253, 18638, + 19024, 19408, 19792, 20175, 20557, 20938, 21319, 21699, + 22078, 22456, 22833, 23210, 23586, 23960, 24334, 24707, + 25079, 25450, 25820, 26189, 26557, 26925, 27291, 27656, + 28020, 28383, 28745, 29105, 29465, 29824, 30181, 30538, + 30893, 31247, 31600, 31952, 32302, 32651, 32999, 33346, + 33692, 34036, 34379, 34721, 35061, 35400, 35738, 36074, + 36409, 36743, 37075, 37406, 37736, 38064, 38390, 38716, + 39039, 39362, 39682, 40002, 40319, 40636, 40950, 41263, + 41575, 41885, 42194, 42501, 42806, 43110, 43412, 43712, + 44011, 44308, 44603, 44897, 45189, 45480, 45768, 46055, + 46340, 46624, 46906, 47186, 47464, 47740, 48015, 48288, + 48558, 48828, 49095, 49360, 49624, 49886, 50146, 50403, + 50660, 50914, 51166, 51416, 51665, 51911, 52155, 52398, + 52639, 52877, 53114, 53348, 53581, 53811, 54040, 54266, + 54491, 54713, 54933, 55152, 55368, 55582, 55794, 56004, + 56212, 56417, 56621, 56822, 57022, 57219, 57414, 57606, + 57797, 57986, 58172, 58356, 58538, 58718, 58895, 59070, + 59243, 59414, 59583, 59749, 59913, 60075, 60235, 60392, + 60547, 60700, 60850, 60998, 61144, 61288, 61429, 61568, + 61705, 61839, 61971, 62100, 62228, 62353, 62475, 62596, + 62714, 62829, 62942, 63053, 63162, 63268, 63371, 63473, + 63571, 63668, 63762, 63854, 63943, 64030, 64115, 64197, + 64276, 64353, 64428, 64501, 64571, 64638, 64703, 64766, + 64826, 64884, 64939, 64992, 65043, 65091, 65136, 65179, + 65220, 65258, 65294, 65327, 65358, 65386, 65412, 65436, + 65457, 65475, 65491, 65505, 65516, 65524, 65531, 65534, }; +/* + * Compute the sine of theta. + */ +fp16_t +fp16_sin(fp16_t theta) +{ + unsigned int i; + + i = 1024 * (theta % FP16_2PI) / FP16_2PI; + switch (i / 256) { + case 0: + return (fp16_sin_table[i % 256]); + case 1: + return (fp16_sin_table[255 - i % 256]); + case 2: + return (-fp16_sin_table[i % 256]); + case 3: + return (-fp16_sin_table[255 - i % 256]); + default: + /* inconceivable! */ + return (0); + } +} + /* * Compute the cosine of theta. */ fp16_t fp16_cos(fp16_t theta) { unsigned int i; i = 1024 * (theta % FP16_2PI) / FP16_2PI; switch (i / 256) { case 0: - return (fp16_cos_table[i % 256]); + return (fp16_sin_table[255 - i % 256]); case 1: - return (-fp16_cos_table[255 - i % 256]); + return (-fp16_sin_table[i % 256]); case 2: - return (-fp16_cos_table[i % 256]); + return (-fp16_sin_table[255 - i % 256]); case 3: - return (fp16_cos_table[255 - i % 256]); + return (fp16_sin_table[i % 256]); default: /* inconceivable! */ return (0); } } diff --git a/sys/dev/syscons/plasma/fp16.h b/sys/dev/syscons/plasma/fp16.h index bc0c117e3eb4..ae54d2047da4 100644 --- a/sys/dev/syscons/plasma/fp16.h +++ b/sys/dev/syscons/plasma/fp16.h @@ -1,84 +1,85 @@ /*- * Copyright (c) 2015 Dag-Erling Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef FP16_H_INCLUDED #define FP16_H_INCLUDED typedef signed long long fp16_t; #define ItoFP16(n) ((signed long long)(n) << 16) #define FP16toI(n) ((signed long long)(n) >> 16) #ifndef _KERNEL #define FP16toF(n) ((n) / 65536.0) #endif /* add a and b */ static inline fp16_t fp16_add(fp16_t a, fp16_t b) { return (a + b); } /* subtract b from a */ static inline fp16_t fp16_sub(fp16_t a, fp16_t b) { return (a - b); } /* multiply a by b */ static inline fp16_t fp16_mul(fp16_t a, fp16_t b) { return (a * b >> 16); } /* divide a by b */ static inline fp16_t fp16_div(fp16_t a, fp16_t b) { return ((a << 16) / b); } /* square root */ fp16_t fp16_sqrt(fp16_t); #define FP16_2PI 411774 #define FP16_PI 205887 #define FP16_PI_2 102943 #define FP16_PI_4 51471 -/* cosine */ +/* sine and cosine */ +fp16_t fp16_sin(fp16_t); fp16_t fp16_cos(fp16_t); #endif diff --git a/sys/dev/syscons/plasma/plasma_saver.c b/sys/dev/syscons/plasma/plasma_saver.c index b5ed02fb028e..761aa5cc5752 100644 --- a/sys/dev/syscons/plasma/plasma_saver.c +++ b/sys/dev/syscons/plasma/plasma_saver.c @@ -1,239 +1,239 @@ /*- * Copyright (c) 2015 Dag-Erling Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * * To CJA, in appreciation of Nighthawk brunches past and future. */ #include #include #include #include #include #include #include #include #include #include #define SAVER_NAME "plasma_saver" #include "fp16.h" /* * Preferred video modes */ static int modes[] = { M_VGA_CG640, M_PC98_PEGC640x480, M_PC98_PEGC640x400, M_VGA_CG320, -1 }; /* * Display parameters */ static unsigned char *vid; static unsigned int banksize, scrmode, scrw, scrh; static unsigned int blanked; /* * List of foci */ #define FOCI 3 static struct { int x, y; /* coordinates */ int vx, vy; /* velocity */ } plasma_foci[FOCI]; /* * Palette */ static struct { unsigned char r, g, b; } plasma_pal[256]; /* * Draw a new frame */ static void plasma_update(video_adapter_t *adp) { unsigned int x, y; /* coordinates */ signed int dx, dy; /* horizontal / vertical distance */ fp16_t sqd, d; /* square of distance and distance */ fp16_t m; /* magnitude */ unsigned int org, off; /* origin and offset */ unsigned int i; /* loop index */ /* switch to bank 0 */ vidd_set_win_org(adp, 0); /* for each scan line */ for (y = org = off = 0; y < scrh; ++y) { /* for each pixel on scan line */ for (x = 0; x < scrw; ++x, ++off) { /* for each focus */ for (i = m = 0; i < FOCI; ++i) { dx = x - plasma_foci[i].x; dy = y - plasma_foci[i].y; sqd = ItoFP16(dx * dx + dy * dy); d = fp16_sqrt(sqd); /* divide by 4 to stretch out the pattern */ - m = fp16_add(m, fp16_cos(d / 4)); + m = fp16_sub(m, fp16_cos(d / 4)); } /* * m is now in the range +/- FOCI, but we need a * value between 0 and 255. We scale to +/- 127 * and add 127, which moves it into the range [0, * 254]. */ m = fp16_mul(m, ItoFP16(127)); m = fp16_div(m, ItoFP16(FOCI)); m = fp16_add(m, ItoFP16(127)); /* switch banks if necessary */ if (off > banksize) { off -= banksize; org += banksize; vidd_set_win_org(adp, org); } /* plot */ vid[off] = FP16toI(m); } } /* now move the foci */ for (i = 0; i < FOCI; ++i) { plasma_foci[i].x += plasma_foci[i].vx; if (plasma_foci[i].x < 0) { /* bounce against left wall */ plasma_foci[i].vx = -plasma_foci[i].vx; plasma_foci[i].x = -plasma_foci[i].x; } else if (plasma_foci[i].x >= scrw) { /* bounce against right wall */ plasma_foci[i].vx = -plasma_foci[i].vx; plasma_foci[i].x = scrw - (plasma_foci[i].x - scrw); } plasma_foci[i].y += plasma_foci[i].vy; if (plasma_foci[i].y < 0) { /* bounce against ceiling */ plasma_foci[i].vy = -plasma_foci[i].vy; plasma_foci[i].y = -plasma_foci[i].y; } else if (plasma_foci[i].y >= scrh) { /* bounce against floor */ plasma_foci[i].vy = -plasma_foci[i].vy; plasma_foci[i].y = scrh - (plasma_foci[i].y - scrh); } } } /* * Start or stop the screensaver */ static int plasma_saver(video_adapter_t *adp, int blank) { int pl; if (blank) { /* switch to graphics mode */ if (blanked <= 0) { pl = splhigh(); vidd_set_mode(adp, scrmode); vidd_load_palette(adp, (unsigned char *)plasma_pal); vidd_set_border(adp, 0); blanked++; vid = (unsigned char *)adp->va_window; banksize = adp->va_window_size; splx(pl); vidd_clear(adp); } /* update display */ plasma_update(adp); } else { blanked = 0; } return (0); } /* * Initialize on module load */ static int plasma_init(video_adapter_t *adp) { video_info_t info; int i; /* select video mode */ for (i = 0; modes[i] >= 0; ++i) if (vidd_get_info(adp, modes[i], &info) == 0) break; if (modes[i] < 0) { log(LOG_NOTICE, "%s: no supported video modes\n", SAVER_NAME); return (ENODEV); } scrmode = modes[i]; scrw = info.vi_width; scrh = info.vi_height; /* initialize the palette */ for (i = 0; i < 256; ++i) plasma_pal[i].r = plasma_pal[i].g = plasma_pal[i].b = i; /* randomize the foci */ for (i = 0; i < FOCI; i++) { plasma_foci[i].x = random() % scrw; plasma_foci[i].y = random() % scrh; plasma_foci[i].vx = random() % 5 - 2; plasma_foci[i].vy = random() % 5 - 2; } return (0); } /* * Clean up before module unload */ static int plasma_term(video_adapter_t *adp) { return (0); } /* * Boilerplate */ static scrn_saver_t plasma_module = { SAVER_NAME, plasma_init, plasma_term, plasma_saver, NULL }; SAVER_MODULE(plasma_saver, plasma_module); diff --git a/sys/dev/tsec/if_tsec.c b/sys/dev/tsec/if_tsec.c index 7ec2f986ef65..3fd872659f50 100644 --- a/sys/dev/tsec/if_tsec.c +++ b/sys/dev/tsec/if_tsec.c @@ -1,1940 +1,1940 @@ /*- * Copyright (C) 2007-2008 Semihalf, Rafal Jaworowski * Copyright (C) 2006-2007 Semihalf, Piotr Kruszynski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Freescale integrated Three-Speed Ethernet Controller (TSEC) driver. */ #include __FBSDID("$FreeBSD$"); #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int tsec_alloc_dma_desc(device_t dev, bus_dma_tag_t *dtag, bus_dmamap_t *dmap, bus_size_t dsize, void **vaddr, void *raddr, const char *dname); static void tsec_dma_ctl(struct tsec_softc *sc, int state); static int tsec_encap(struct tsec_softc *sc, struct mbuf *m_head, int fcb_inserted); static void tsec_free_dma(struct tsec_softc *sc); static void tsec_free_dma_desc(bus_dma_tag_t dtag, bus_dmamap_t dmap, void *vaddr); static int tsec_ifmedia_upd(struct ifnet *ifp); static void tsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int tsec_new_rxbuf(bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **mbufp, uint32_t *paddr); static void tsec_map_dma_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void tsec_intrs_ctl(struct tsec_softc *sc, int state); static void tsec_init(void *xsc); static void tsec_init_locked(struct tsec_softc *sc); static int tsec_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void tsec_reset_mac(struct tsec_softc *sc); static void tsec_setfilter(struct tsec_softc *sc); static void tsec_set_mac_address(struct tsec_softc *sc); static void tsec_start(struct ifnet *ifp); static void tsec_start_locked(struct ifnet *ifp); static void tsec_stop(struct tsec_softc *sc); static void tsec_tick(void *arg); static void tsec_watchdog(struct tsec_softc *sc); static void tsec_add_sysctls(struct tsec_softc *sc); static int tsec_sysctl_ic_time(SYSCTL_HANDLER_ARGS); static int tsec_sysctl_ic_count(SYSCTL_HANDLER_ARGS); static void tsec_set_rxic(struct tsec_softc *sc); static void tsec_set_txic(struct tsec_softc *sc); static int tsec_receive_intr_locked(struct tsec_softc *sc, int count); static void tsec_transmit_intr_locked(struct tsec_softc *sc); static void tsec_error_intr_locked(struct tsec_softc *sc, int count); static void tsec_offload_setup(struct tsec_softc *sc); static void tsec_offload_process_frame(struct tsec_softc *sc, struct mbuf *m); static void tsec_setup_multicast(struct tsec_softc *sc); static int tsec_set_mtu(struct tsec_softc *sc, unsigned int mtu); devclass_t tsec_devclass; DRIVER_MODULE(miibus, tsec, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(tsec, ether, 1, 1, 1); MODULE_DEPEND(tsec, miibus, 1, 1, 1); struct mtx tsec_phy_mtx; int tsec_attach(struct tsec_softc *sc) { uint8_t hwaddr[ETHER_ADDR_LEN]; struct ifnet *ifp; bus_dmamap_t *map_ptr; bus_dmamap_t **map_pptr; int error = 0; int i; /* Initialize global (because potentially shared) MII lock */ if (!mtx_initialized(&tsec_phy_mtx)) mtx_init(&tsec_phy_mtx, "tsec mii", NULL, MTX_DEF); /* Reset all TSEC counters */ TSEC_TX_RX_COUNTERS_INIT(sc); /* Stop DMA engine if enabled by firmware */ tsec_dma_ctl(sc, 0); /* Reset MAC */ tsec_reset_mac(sc); /* Disable interrupts for now */ tsec_intrs_ctl(sc, 0); /* Configure defaults for interrupts coalescing */ sc->rx_ic_time = 768; sc->rx_ic_count = 16; sc->tx_ic_time = 768; sc->tx_ic_count = 16; tsec_set_rxic(sc); tsec_set_txic(sc); tsec_add_sysctls(sc); /* Allocate a busdma tag and DMA safe memory for TX descriptors. */ error = tsec_alloc_dma_desc(sc->dev, &sc->tsec_tx_dtag, &sc->tsec_tx_dmap, sizeof(*sc->tsec_tx_vaddr) * TSEC_TX_NUM_DESC, (void **)&sc->tsec_tx_vaddr, &sc->tsec_tx_raddr, "TX"); if (error) { tsec_detach(sc); return (ENXIO); } /* Allocate a busdma tag and DMA safe memory for RX descriptors. */ error = tsec_alloc_dma_desc(sc->dev, &sc->tsec_rx_dtag, &sc->tsec_rx_dmap, sizeof(*sc->tsec_rx_vaddr) * TSEC_RX_NUM_DESC, (void **)&sc->tsec_rx_vaddr, &sc->tsec_rx_raddr, "RX"); if (error) { tsec_detach(sc); return (ENXIO); } /* Allocate a busdma tag for TX mbufs. */ error = bus_dma_tag_create(NULL, /* parent */ TSEC_TXBUFFER_ALIGNMENT, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES * (TSEC_TX_NUM_DESC - 1), /* maxsize */ TSEC_TX_NUM_DESC - 1, /* nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->tsec_tx_mtag); /* dmat */ if (error) { device_printf(sc->dev, "failed to allocate busdma tag " "(tx mbufs)\n"); tsec_detach(sc); return (ENXIO); } /* Allocate a busdma tag for RX mbufs. */ error = bus_dma_tag_create(NULL, /* parent */ TSEC_RXBUFFER_ALIGNMENT, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->tsec_rx_mtag); /* dmat */ if (error) { device_printf(sc->dev, "failed to allocate busdma tag " "(rx mbufs)\n"); tsec_detach(sc); return (ENXIO); } /* Create TX busdma maps */ map_ptr = sc->tx_map_data; map_pptr = sc->tx_map_unused_data; for (i = 0; i < TSEC_TX_NUM_DESC; i++) { map_pptr[i] = &map_ptr[i]; error = bus_dmamap_create(sc->tsec_tx_mtag, 0, map_pptr[i]); if (error) { device_printf(sc->dev, "failed to init TX ring\n"); tsec_detach(sc); return (ENXIO); } } /* Create RX busdma maps and zero mbuf handlers */ for (i = 0; i < TSEC_RX_NUM_DESC; i++) { error = bus_dmamap_create(sc->tsec_rx_mtag, 0, &sc->rx_data[i].map); if (error) { device_printf(sc->dev, "failed to init RX ring\n"); tsec_detach(sc); return (ENXIO); } sc->rx_data[i].mbuf = NULL; } /* Create mbufs for RX buffers */ for (i = 0; i < TSEC_RX_NUM_DESC; i++) { error = tsec_new_rxbuf(sc->tsec_rx_mtag, sc->rx_data[i].map, &sc->rx_data[i].mbuf, &sc->rx_data[i].paddr); if (error) { device_printf(sc->dev, "can't load rx DMA map %d, " "error = %d\n", i, error); tsec_detach(sc); return (error); } } /* Create network interface for upper layers */ ifp = sc->tsec_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->dev, "if_alloc() failed\n"); tsec_detach(sc); return (ENOMEM); } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = IFF_SIMPLEX | IFF_MULTICAST | IFF_BROADCAST; ifp->if_init = tsec_init; ifp->if_start = tsec_start; ifp->if_ioctl = tsec_ioctl; IFQ_SET_MAXLEN(&ifp->if_snd, TSEC_TX_NUM_DESC - 1); ifp->if_snd.ifq_drv_maxlen = TSEC_TX_NUM_DESC - 1; IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = IFCAP_VLAN_MTU; if (sc->is_etsec) ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capenable = ifp->if_capabilities; #ifdef DEVICE_POLLING /* Advertise that polling is supported */ ifp->if_capabilities |= IFCAP_POLLING; #endif /* Attach PHY(s) */ error = mii_attach(sc->dev, &sc->tsec_miibus, ifp, tsec_ifmedia_upd, tsec_ifmedia_sts, BMSR_DEFCAPMASK, sc->phyaddr, MII_OFFSET_ANY, 0); if (error) { device_printf(sc->dev, "attaching PHYs failed\n"); if_free(ifp); sc->tsec_ifp = NULL; tsec_detach(sc); return (error); } sc->tsec_mii = device_get_softc(sc->tsec_miibus); /* Set MAC address */ tsec_get_hwaddr(sc, hwaddr); ether_ifattach(ifp, hwaddr); return (0); } int tsec_detach(struct tsec_softc *sc) { if (sc->tsec_ifp != NULL) { #ifdef DEVICE_POLLING if (sc->tsec_ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(sc->tsec_ifp); #endif /* Stop TSEC controller and free TX queue */ if (sc->sc_rres) tsec_shutdown(sc->dev); /* Detach network interface */ ether_ifdetach(sc->tsec_ifp); if_free(sc->tsec_ifp); sc->tsec_ifp = NULL; } /* Free DMA resources */ tsec_free_dma(sc); return (0); } int tsec_shutdown(device_t dev) { struct tsec_softc *sc; sc = device_get_softc(dev); TSEC_GLOBAL_LOCK(sc); tsec_stop(sc); TSEC_GLOBAL_UNLOCK(sc); return (0); } int tsec_suspend(device_t dev) { /* TODO not implemented! */ return (0); } int tsec_resume(device_t dev) { /* TODO not implemented! */ return (0); } static void tsec_init(void *xsc) { struct tsec_softc *sc = xsc; TSEC_GLOBAL_LOCK(sc); tsec_init_locked(sc); TSEC_GLOBAL_UNLOCK(sc); } static void tsec_init_locked(struct tsec_softc *sc) { struct tsec_desc *tx_desc = sc->tsec_tx_vaddr; struct tsec_desc *rx_desc = sc->tsec_rx_vaddr; struct ifnet *ifp = sc->tsec_ifp; uint32_t timeout, val, i; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; TSEC_GLOBAL_LOCK_ASSERT(sc); tsec_stop(sc); /* * These steps are according to the MPC8555E PowerQUICCIII RM: * 14.7 Initialization/Application Information */ /* Step 1: soft reset MAC */ tsec_reset_mac(sc); /* Step 2: Initialize MACCFG2 */ TSEC_WRITE(sc, TSEC_REG_MACCFG2, TSEC_MACCFG2_FULLDUPLEX | /* Full Duplex = 1 */ TSEC_MACCFG2_PADCRC | /* PAD/CRC append */ TSEC_MACCFG2_GMII | /* I/F Mode bit */ TSEC_MACCFG2_PRECNT /* Preamble count = 7 */ ); /* Step 3: Initialize ECNTRL * While the documentation states that R100M is ignored if RPM is * not set, it does seem to be needed to get the orange boxes to * work (which have a Marvell 88E1111 PHY). Go figure. */ /* * XXX kludge - use circumstancial evidence to program ECNTRL * correctly. Ideally we need some board information to guide * us here. */ i = TSEC_READ(sc, TSEC_REG_ID2); val = (i & 0xffff) ? (TSEC_ECNTRL_TBIM | TSEC_ECNTRL_SGMIIM) /* Sumatra */ : TSEC_ECNTRL_R100M; /* Orange + CDS */ TSEC_WRITE(sc, TSEC_REG_ECNTRL, TSEC_ECNTRL_STEN | val); /* Step 4: Initialize MAC station address */ tsec_set_mac_address(sc); /* * Step 5: Assign a Physical address to the TBI so as to not conflict * with the external PHY physical address */ TSEC_WRITE(sc, TSEC_REG_TBIPA, 5); TSEC_PHY_LOCK(sc); /* Step 6: Reset the management interface */ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_RESETMGMT); /* Step 7: Setup the MII Mgmt clock speed */ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_CLKDIV28); /* Step 8: Read MII Mgmt indicator register and check for Busy = 0 */ timeout = TSEC_READ_RETRY; while (--timeout && (TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) & TSEC_MIIMIND_BUSY)) DELAY(TSEC_READ_DELAY); if (timeout == 0) { if_printf(ifp, "tsec_init_locked(): Mgmt busy timeout\n"); return; } TSEC_PHY_UNLOCK(sc); /* Step 9: Setup the MII Mgmt */ mii_mediachg(sc->tsec_mii); /* Step 10: Clear IEVENT register */ TSEC_WRITE(sc, TSEC_REG_IEVENT, 0xffffffff); /* Step 11: Enable interrupts */ #ifdef DEVICE_POLLING /* * ...only if polling is not turned on. Disable interrupts explicitly * if polling is enabled. */ if (ifp->if_capenable & IFCAP_POLLING ) tsec_intrs_ctl(sc, 0); else #endif /* DEVICE_POLLING */ tsec_intrs_ctl(sc, 1); /* Step 12: Initialize IADDRn */ TSEC_WRITE(sc, TSEC_REG_IADDR0, 0); TSEC_WRITE(sc, TSEC_REG_IADDR1, 0); TSEC_WRITE(sc, TSEC_REG_IADDR2, 0); TSEC_WRITE(sc, TSEC_REG_IADDR3, 0); TSEC_WRITE(sc, TSEC_REG_IADDR4, 0); TSEC_WRITE(sc, TSEC_REG_IADDR5, 0); TSEC_WRITE(sc, TSEC_REG_IADDR6, 0); TSEC_WRITE(sc, TSEC_REG_IADDR7, 0); /* Step 13: Initialize GADDRn */ TSEC_WRITE(sc, TSEC_REG_GADDR0, 0); TSEC_WRITE(sc, TSEC_REG_GADDR1, 0); TSEC_WRITE(sc, TSEC_REG_GADDR2, 0); TSEC_WRITE(sc, TSEC_REG_GADDR3, 0); TSEC_WRITE(sc, TSEC_REG_GADDR4, 0); TSEC_WRITE(sc, TSEC_REG_GADDR5, 0); TSEC_WRITE(sc, TSEC_REG_GADDR6, 0); TSEC_WRITE(sc, TSEC_REG_GADDR7, 0); /* Step 14: Initialize RCTRL */ TSEC_WRITE(sc, TSEC_REG_RCTRL, 0); /* Step 15: Initialize DMACTRL */ tsec_dma_ctl(sc, 1); /* Step 16: Initialize FIFO_PAUSE_CTRL */ TSEC_WRITE(sc, TSEC_REG_FIFO_PAUSE_CTRL, TSEC_FIFO_PAUSE_CTRL_EN); /* * Step 17: Initialize transmit/receive descriptor rings. * Initialize TBASE and RBASE. */ TSEC_WRITE(sc, TSEC_REG_TBASE, sc->tsec_tx_raddr); TSEC_WRITE(sc, TSEC_REG_RBASE, sc->tsec_rx_raddr); for (i = 0; i < TSEC_TX_NUM_DESC; i++) { tx_desc[i].bufptr = 0; tx_desc[i].length = 0; tx_desc[i].flags = ((i == TSEC_TX_NUM_DESC - 1) ? TSEC_TXBD_W : 0); } bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); for (i = 0; i < TSEC_RX_NUM_DESC; i++) { rx_desc[i].bufptr = sc->rx_data[i].paddr; rx_desc[i].length = 0; rx_desc[i].flags = TSEC_RXBD_E | TSEC_RXBD_I | ((i == TSEC_RX_NUM_DESC - 1) ? TSEC_RXBD_W : 0); } bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Step 18: Initialize the maximum receive buffer length */ TSEC_WRITE(sc, TSEC_REG_MRBLR, MCLBYTES); /* Step 19: Configure ethernet frame sizes */ TSEC_WRITE(sc, TSEC_REG_MINFLR, TSEC_MIN_FRAME_SIZE); tsec_set_mtu(sc, ifp->if_mtu); /* Step 20: Enable Rx and RxBD sdata snooping */ TSEC_WRITE(sc, TSEC_REG_ATTR, TSEC_ATTR_RDSEN | TSEC_ATTR_RBDSEN); TSEC_WRITE(sc, TSEC_REG_ATTRELI, 0); /* Step 21: Reset collision counters in hardware */ TSEC_WRITE(sc, TSEC_REG_MON_TSCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TMCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TLCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TXCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TNCL, 0); /* Step 22: Mask all CAM interrupts */ TSEC_WRITE(sc, TSEC_REG_MON_CAM1, 0xffffffff); TSEC_WRITE(sc, TSEC_REG_MON_CAM2, 0xffffffff); /* Step 23: Enable Rx and Tx */ val = TSEC_READ(sc, TSEC_REG_MACCFG1); val |= (TSEC_MACCFG1_RX_EN | TSEC_MACCFG1_TX_EN); TSEC_WRITE(sc, TSEC_REG_MACCFG1, val); /* Step 24: Reset TSEC counters for Tx and Rx rings */ TSEC_TX_RX_COUNTERS_INIT(sc); /* Step 25: Setup TCP/IP Off-Load engine */ if (sc->is_etsec) tsec_offload_setup(sc); /* Step 26: Setup multicast filters */ tsec_setup_multicast(sc); /* Step 27: Activate network interface */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->tsec_if_flags = ifp->if_flags; sc->tsec_watchdog = 0; /* Schedule watchdog timeout */ callout_reset(&sc->tsec_callout, hz, tsec_tick, sc); } static void tsec_set_mac_address(struct tsec_softc *sc) { uint32_t macbuf[2] = { 0, 0 }; char *macbufp, *curmac; int i; TSEC_GLOBAL_LOCK_ASSERT(sc); KASSERT((ETHER_ADDR_LEN <= sizeof(macbuf)), - ("tsec_set_mac_address: (%d <= %d", ETHER_ADDR_LEN, + ("tsec_set_mac_address: (%d <= %zd", ETHER_ADDR_LEN, sizeof(macbuf))); macbufp = (char *)macbuf; curmac = (char *)IF_LLADDR(sc->tsec_ifp); /* Correct order of MAC address bytes */ for (i = 1; i <= ETHER_ADDR_LEN; i++) macbufp[ETHER_ADDR_LEN-i] = curmac[i-1]; /* Initialize MAC station address MACSTNADDR2 and MACSTNADDR1 */ TSEC_WRITE(sc, TSEC_REG_MACSTNADDR2, macbuf[1]); TSEC_WRITE(sc, TSEC_REG_MACSTNADDR1, macbuf[0]); } /* * DMA control function, if argument state is: * 0 - DMA engine will be disabled * 1 - DMA engine will be enabled */ static void tsec_dma_ctl(struct tsec_softc *sc, int state) { device_t dev; uint32_t dma_flags, timeout; dev = sc->dev; dma_flags = TSEC_READ(sc, TSEC_REG_DMACTRL); switch (state) { case 0: /* Temporarily clear stop graceful stop bits. */ tsec_dma_ctl(sc, 1000); /* Set it again */ dma_flags |= (TSEC_DMACTRL_GRS | TSEC_DMACTRL_GTS); break; case 1000: case 1: /* Set write with response (WWR), wait (WOP) and snoop bits */ dma_flags |= (TSEC_DMACTRL_TDSEN | TSEC_DMACTRL_TBDSEN | DMACTRL_WWR | DMACTRL_WOP); /* Clear graceful stop bits */ dma_flags &= ~(TSEC_DMACTRL_GRS | TSEC_DMACTRL_GTS); break; default: device_printf(dev, "tsec_dma_ctl(): unknown state value: %d\n", state); } TSEC_WRITE(sc, TSEC_REG_DMACTRL, dma_flags); switch (state) { case 0: /* Wait for DMA stop */ timeout = TSEC_READ_RETRY; while (--timeout && (!(TSEC_READ(sc, TSEC_REG_IEVENT) & (TSEC_IEVENT_GRSC | TSEC_IEVENT_GTSC)))) DELAY(TSEC_READ_DELAY); if (timeout == 0) device_printf(dev, "tsec_dma_ctl(): timeout!\n"); break; case 1: /* Restart transmission function */ TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); } } /* * Interrupts control function, if argument state is: * 0 - all TSEC interrupts will be masked * 1 - all TSEC interrupts will be unmasked */ static void tsec_intrs_ctl(struct tsec_softc *sc, int state) { device_t dev; dev = sc->dev; switch (state) { case 0: TSEC_WRITE(sc, TSEC_REG_IMASK, 0); break; case 1: TSEC_WRITE(sc, TSEC_REG_IMASK, TSEC_IMASK_BREN | TSEC_IMASK_RXCEN | TSEC_IMASK_BSYEN | TSEC_IMASK_EBERREN | TSEC_IMASK_BTEN | TSEC_IMASK_TXEEN | TSEC_IMASK_TXBEN | TSEC_IMASK_TXFEN | TSEC_IMASK_XFUNEN | TSEC_IMASK_RXFEN); break; default: device_printf(dev, "tsec_intrs_ctl(): unknown state value: %d\n", state); } } static void tsec_reset_mac(struct tsec_softc *sc) { uint32_t maccfg1_flags; /* Set soft reset bit */ maccfg1_flags = TSEC_READ(sc, TSEC_REG_MACCFG1); maccfg1_flags |= TSEC_MACCFG1_SOFT_RESET; TSEC_WRITE(sc, TSEC_REG_MACCFG1, maccfg1_flags); /* Clear soft reset bit */ maccfg1_flags = TSEC_READ(sc, TSEC_REG_MACCFG1); maccfg1_flags &= ~TSEC_MACCFG1_SOFT_RESET; TSEC_WRITE(sc, TSEC_REG_MACCFG1, maccfg1_flags); } static void tsec_watchdog(struct tsec_softc *sc) { struct ifnet *ifp; TSEC_GLOBAL_LOCK_ASSERT(sc); if (sc->tsec_watchdog == 0 || --sc->tsec_watchdog > 0) return; ifp = sc->tsec_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); tsec_stop(sc); tsec_init_locked(sc); } static void tsec_start(struct ifnet *ifp) { struct tsec_softc *sc = ifp->if_softc; TSEC_TRANSMIT_LOCK(sc); tsec_start_locked(ifp); TSEC_TRANSMIT_UNLOCK(sc); } static void tsec_start_locked(struct ifnet *ifp) { struct tsec_softc *sc; struct mbuf *m0, *mtmp; struct tsec_tx_fcb *tx_fcb; unsigned int queued = 0; int csum_flags, fcb_inserted = 0; sc = ifp->if_softc; TSEC_TRANSMIT_LOCK_ASSERT(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (sc->tsec_link == 0) return; bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { /* Get packet from the queue */ IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; /* Insert TCP/IP Off-load frame control block */ csum_flags = m0->m_pkthdr.csum_flags; if (csum_flags) { M_PREPEND(m0, sizeof(struct tsec_tx_fcb), M_NOWAIT); if (m0 == NULL) break; tx_fcb = mtod(m0, struct tsec_tx_fcb *); tx_fcb->flags = 0; tx_fcb->l3_offset = ETHER_HDR_LEN; tx_fcb->l4_offset = sizeof(struct ip); if (csum_flags & CSUM_IP) tx_fcb->flags |= TSEC_TX_FCB_IP4 | TSEC_TX_FCB_CSUM_IP; if (csum_flags & CSUM_TCP) tx_fcb->flags |= TSEC_TX_FCB_TCP | TSEC_TX_FCB_CSUM_TCP_UDP; if (csum_flags & CSUM_UDP) tx_fcb->flags |= TSEC_TX_FCB_UDP | TSEC_TX_FCB_CSUM_TCP_UDP; fcb_inserted = 1; } mtmp = m_defrag(m0, M_NOWAIT); if (mtmp) m0 = mtmp; if (tsec_encap(sc, m0, fcb_inserted)) { IFQ_DRV_PREPEND(&ifp->if_snd, m0); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } queued++; BPF_MTAP(ifp, m0); } bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (queued) { /* Enable transmitter and watchdog timer */ TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); sc->tsec_watchdog = 5; } } static int tsec_encap(struct tsec_softc *sc, struct mbuf *m0, int fcb_inserted) { struct tsec_desc *tx_desc = NULL; struct ifnet *ifp; bus_dma_segment_t segs[TSEC_TX_NUM_DESC]; bus_dmamap_t *mapp; int csum_flag = 0, error, seg, nsegs; TSEC_TRANSMIT_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; if (TSEC_FREE_TX_DESC(sc) == 0) { /* No free descriptors */ return (-1); } /* Fetch unused map */ mapp = TSEC_ALLOC_TX_MAP(sc); /* Create mapping in DMA memory */ error = bus_dmamap_load_mbuf_sg(sc->tsec_tx_mtag, *mapp, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0 || nsegs > TSEC_FREE_TX_DESC(sc) || nsegs <= 0) { bus_dmamap_unload(sc->tsec_tx_mtag, *mapp); TSEC_FREE_TX_MAP(sc, mapp); return ((error != 0) ? error : -1); } bus_dmamap_sync(sc->tsec_tx_mtag, *mapp, BUS_DMASYNC_PREWRITE); if ((ifp->if_flags & IFF_DEBUG) && (nsegs > 1)) if_printf(ifp, "TX buffer has %d segments\n", nsegs); if (fcb_inserted) csum_flag = TSEC_TXBD_TOE; /* Everything is ok, now we can send buffers */ for (seg = 0; seg < nsegs; seg++) { tx_desc = TSEC_GET_CUR_TX_DESC(sc); tx_desc->length = segs[seg].ds_len; tx_desc->bufptr = segs[seg].ds_addr; /* * Set flags: * - wrap * - checksum * - ready to send * - transmit the CRC sequence after the last data byte * - interrupt after the last buffer */ tx_desc->flags = (tx_desc->flags & TSEC_TXBD_W) | ((seg == 0) ? csum_flag : 0) | TSEC_TXBD_R | TSEC_TXBD_TC | ((seg == nsegs - 1) ? TSEC_TXBD_L | TSEC_TXBD_I : 0); } /* Save mbuf and DMA mapping for release at later stage */ TSEC_PUT_TX_MBUF(sc, m0); TSEC_PUT_TX_MAP(sc, mapp); return (0); } static void tsec_setfilter(struct tsec_softc *sc) { struct ifnet *ifp; uint32_t flags; ifp = sc->tsec_ifp; flags = TSEC_READ(sc, TSEC_REG_RCTRL); /* Promiscuous mode */ if (ifp->if_flags & IFF_PROMISC) flags |= TSEC_RCTRL_PROM; else flags &= ~TSEC_RCTRL_PROM; TSEC_WRITE(sc, TSEC_REG_RCTRL, flags); } #ifdef DEVICE_POLLING static poll_handler_t tsec_poll; static int tsec_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { uint32_t ie; struct tsec_softc *sc = ifp->if_softc; int rx_npkts; rx_npkts = 0; TSEC_GLOBAL_LOCK(sc); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { TSEC_GLOBAL_UNLOCK(sc); return (rx_npkts); } if (cmd == POLL_AND_CHECK_STATUS) { tsec_error_intr_locked(sc, count); /* Clear all events reported */ ie = TSEC_READ(sc, TSEC_REG_IEVENT); TSEC_WRITE(sc, TSEC_REG_IEVENT, ie); } tsec_transmit_intr_locked(sc); TSEC_GLOBAL_TO_RECEIVE_LOCK(sc); rx_npkts = tsec_receive_intr_locked(sc, count); TSEC_RECEIVE_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ static int tsec_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct tsec_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; device_t dev; int mask, error = 0; dev = sc->dev; switch (command) { case SIOCSIFMTU: TSEC_GLOBAL_LOCK(sc); if (tsec_set_mtu(sc, ifr->ifr_mtu)) ifp->if_mtu = ifr->ifr_mtu; else error = EINVAL; TSEC_GLOBAL_UNLOCK(sc); break; case SIOCSIFFLAGS: TSEC_GLOBAL_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((sc->tsec_if_flags ^ ifp->if_flags) & IFF_PROMISC) tsec_setfilter(sc); if ((sc->tsec_if_flags ^ ifp->if_flags) & IFF_ALLMULTI) tsec_setup_multicast(sc); } else tsec_init_locked(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) tsec_stop(sc); sc->tsec_if_flags = ifp->if_flags; TSEC_GLOBAL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) { TSEC_GLOBAL_LOCK(sc); tsec_setup_multicast(sc); TSEC_GLOBAL_UNLOCK(sc); } case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->tsec_mii->mii_media, command); break; case SIOCSIFCAP: mask = ifp->if_capenable ^ ifr->ifr_reqcap; if ((mask & IFCAP_HWCSUM) && sc->is_etsec) { TSEC_GLOBAL_LOCK(sc); ifp->if_capenable &= ~IFCAP_HWCSUM; ifp->if_capenable |= IFCAP_HWCSUM & ifr->ifr_reqcap; tsec_offload_setup(sc); TSEC_GLOBAL_UNLOCK(sc); } #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(tsec_poll, ifp); if (error) return (error); TSEC_GLOBAL_LOCK(sc); /* Disable interrupts */ tsec_intrs_ctl(sc, 0); ifp->if_capenable |= IFCAP_POLLING; TSEC_GLOBAL_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); TSEC_GLOBAL_LOCK(sc); /* Enable interrupts */ tsec_intrs_ctl(sc, 1); ifp->if_capenable &= ~IFCAP_POLLING; TSEC_GLOBAL_UNLOCK(sc); } } #endif break; default: error = ether_ioctl(ifp, command, data); } /* Flush buffers if not empty */ if (ifp->if_flags & IFF_UP) tsec_start(ifp); return (error); } static int tsec_ifmedia_upd(struct ifnet *ifp) { struct tsec_softc *sc = ifp->if_softc; struct mii_data *mii; TSEC_TRANSMIT_LOCK(sc); mii = sc->tsec_mii; mii_mediachg(mii); TSEC_TRANSMIT_UNLOCK(sc); return (0); } static void tsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct tsec_softc *sc = ifp->if_softc; struct mii_data *mii; TSEC_TRANSMIT_LOCK(sc); mii = sc->tsec_mii; mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; TSEC_TRANSMIT_UNLOCK(sc); } static int tsec_new_rxbuf(bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **mbufp, uint32_t *paddr) { struct mbuf *new_mbuf; bus_dma_segment_t seg[1]; int error, nsegs; KASSERT(mbufp != NULL, ("NULL mbuf pointer!")); new_mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MCLBYTES); if (new_mbuf == NULL) return (ENOBUFS); new_mbuf->m_len = new_mbuf->m_pkthdr.len = new_mbuf->m_ext.ext_size; if (*mbufp) { bus_dmamap_sync(tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(tag, map); } error = bus_dmamap_load_mbuf_sg(tag, map, new_mbuf, seg, &nsegs, BUS_DMA_NOWAIT); KASSERT(nsegs == 1, ("Too many segments returned!")); if (nsegs != 1 || error) panic("tsec_new_rxbuf(): nsegs(%d), error(%d)", nsegs, error); #if 0 if (error) { printf("tsec: bus_dmamap_load_mbuf_sg() returned: %d!\n", error); m_freem(new_mbuf); return (ENOBUFS); } #endif #if 0 KASSERT(((seg->ds_addr) & (TSEC_RXBUFFER_ALIGNMENT-1)) == 0, ("Wrong alignment of RX buffer!")); #endif bus_dmamap_sync(tag, map, BUS_DMASYNC_PREREAD); (*mbufp) = new_mbuf; (*paddr) = seg->ds_addr; return (0); } static void tsec_map_dma_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { u_int32_t *paddr; KASSERT(nseg == 1, ("wrong number of segments, should be 1")); paddr = arg; *paddr = segs->ds_addr; } static int tsec_alloc_dma_desc(device_t dev, bus_dma_tag_t *dtag, bus_dmamap_t *dmap, bus_size_t dsize, void **vaddr, void *raddr, const char *dname) { int error; /* Allocate a busdma tag and DMA safe memory for TX/RX descriptors. */ error = bus_dma_tag_create(NULL, /* parent */ PAGE_SIZE, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ dsize, 1, /* maxsize, nsegments */ dsize, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ dtag); /* dmat */ if (error) { device_printf(dev, "failed to allocate busdma %s tag\n", dname); (*vaddr) = NULL; return (ENXIO); } error = bus_dmamem_alloc(*dtag, vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO, dmap); if (error) { device_printf(dev, "failed to allocate %s DMA safe memory\n", dname); bus_dma_tag_destroy(*dtag); (*vaddr) = NULL; return (ENXIO); } error = bus_dmamap_load(*dtag, *dmap, *vaddr, dsize, tsec_map_dma_addr, raddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "cannot get address of the %s " "descriptors\n", dname); bus_dmamem_free(*dtag, *vaddr, *dmap); bus_dma_tag_destroy(*dtag); (*vaddr) = NULL; return (ENXIO); } return (0); } static void tsec_free_dma_desc(bus_dma_tag_t dtag, bus_dmamap_t dmap, void *vaddr) { if (vaddr == NULL) return; /* Unmap descriptors from DMA memory */ bus_dmamap_sync(dtag, dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dtag, dmap); /* Free descriptors memory */ bus_dmamem_free(dtag, vaddr, dmap); /* Destroy descriptors tag */ bus_dma_tag_destroy(dtag); } static void tsec_free_dma(struct tsec_softc *sc) { int i; /* Free TX maps */ for (i = 0; i < TSEC_TX_NUM_DESC; i++) if (sc->tx_map_data[i] != NULL) bus_dmamap_destroy(sc->tsec_tx_mtag, sc->tx_map_data[i]); /* Destroy tag for TX mbufs */ bus_dma_tag_destroy(sc->tsec_tx_mtag); /* Free RX mbufs and maps */ for (i = 0; i < TSEC_RX_NUM_DESC; i++) { if (sc->rx_data[i].mbuf) { /* Unload buffer from DMA */ bus_dmamap_sync(sc->tsec_rx_mtag, sc->rx_data[i].map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->tsec_rx_mtag, sc->rx_data[i].map); /* Free buffer */ m_freem(sc->rx_data[i].mbuf); } /* Destroy map for this buffer */ if (sc->rx_data[i].map != NULL) bus_dmamap_destroy(sc->tsec_rx_mtag, sc->rx_data[i].map); } /* Destroy tag for RX mbufs */ bus_dma_tag_destroy(sc->tsec_rx_mtag); /* Unload TX/RX descriptors */ tsec_free_dma_desc(sc->tsec_tx_dtag, sc->tsec_tx_dmap, sc->tsec_tx_vaddr); tsec_free_dma_desc(sc->tsec_rx_dtag, sc->tsec_rx_dmap, sc->tsec_rx_vaddr); } static void tsec_stop(struct tsec_softc *sc) { struct ifnet *ifp; struct mbuf *m0; bus_dmamap_t *mapp; uint32_t tmpval; TSEC_GLOBAL_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; /* Disable interface and watchdog timer */ callout_stop(&sc->tsec_callout); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->tsec_watchdog = 0; /* Disable all interrupts and stop DMA */ tsec_intrs_ctl(sc, 0); tsec_dma_ctl(sc, 0); /* Remove pending data from TX queue */ while (!TSEC_EMPTYQ_TX_MBUF(sc)) { m0 = TSEC_GET_TX_MBUF(sc); mapp = TSEC_GET_TX_MAP(sc); bus_dmamap_sync(sc->tsec_tx_mtag, *mapp, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->tsec_tx_mtag, *mapp); TSEC_FREE_TX_MAP(sc, mapp); m_freem(m0); } /* Disable RX and TX */ tmpval = TSEC_READ(sc, TSEC_REG_MACCFG1); tmpval &= ~(TSEC_MACCFG1_RX_EN | TSEC_MACCFG1_TX_EN); TSEC_WRITE(sc, TSEC_REG_MACCFG1, tmpval); DELAY(10); } static void tsec_tick(void *arg) { struct tsec_softc *sc = arg; struct ifnet *ifp; int link; TSEC_GLOBAL_LOCK(sc); tsec_watchdog(sc); ifp = sc->tsec_ifp; link = sc->tsec_link; mii_tick(sc->tsec_mii); if (link == 0 && sc->tsec_link == 1 && (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))) tsec_start_locked(ifp); /* Schedule another timeout one second from now. */ callout_reset(&sc->tsec_callout, hz, tsec_tick, sc); TSEC_GLOBAL_UNLOCK(sc); } /* * This is the core RX routine. It replenishes mbufs in the descriptor and * sends data which have been dma'ed into host memory to upper layer. * * Loops at most count times if count is > 0, or until done if count < 0. */ static int tsec_receive_intr_locked(struct tsec_softc *sc, int count) { struct tsec_desc *rx_desc; struct ifnet *ifp; struct rx_data_type *rx_data; struct mbuf *m; device_t dev; uint32_t i; int c, rx_npkts; uint16_t flags; TSEC_RECEIVE_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; rx_data = sc->rx_data; dev = sc->dev; rx_npkts = 0; bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (c = 0; ; c++) { if (count >= 0 && count-- == 0) break; rx_desc = TSEC_GET_CUR_RX_DESC(sc); flags = rx_desc->flags; /* Check if there is anything to receive */ if ((flags & TSEC_RXBD_E) || (c >= TSEC_RX_NUM_DESC)) { /* * Avoid generating another interrupt */ if (flags & TSEC_RXBD_E) TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_RXB | TSEC_IEVENT_RXF); /* * We didn't consume current descriptor and have to * return it to the queue */ TSEC_BACK_CUR_RX_DESC(sc); break; } if (flags & (TSEC_RXBD_LG | TSEC_RXBD_SH | TSEC_RXBD_NO | TSEC_RXBD_CR | TSEC_RXBD_OV | TSEC_RXBD_TR)) { rx_desc->length = 0; rx_desc->flags = (rx_desc->flags & ~TSEC_RXBD_ZEROONINIT) | TSEC_RXBD_E | TSEC_RXBD_I; if (sc->frame != NULL) { m_free(sc->frame); sc->frame = NULL; } continue; } /* Ok... process frame */ i = TSEC_GET_CUR_RX_DESC_CNT(sc); m = rx_data[i].mbuf; m->m_len = rx_desc->length; if (sc->frame != NULL) { if ((flags & TSEC_RXBD_L) != 0) m->m_len -= m_length(sc->frame, NULL); m->m_flags &= ~M_PKTHDR; m_cat(sc->frame, m); } else { sc->frame = m; } m = NULL; if ((flags & TSEC_RXBD_L) != 0) { m = sc->frame; sc->frame = NULL; } if (tsec_new_rxbuf(sc->tsec_rx_mtag, rx_data[i].map, &rx_data[i].mbuf, &rx_data[i].paddr)) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); /* * We ran out of mbufs; didn't consume current * descriptor and have to return it to the queue. */ TSEC_BACK_CUR_RX_DESC(sc); break; } /* Attach new buffer to descriptor and clear flags */ rx_desc->bufptr = rx_data[i].paddr; rx_desc->length = 0; rx_desc->flags = (rx_desc->flags & ~TSEC_RXBD_ZEROONINIT) | TSEC_RXBD_E | TSEC_RXBD_I; if (m != NULL) { m->m_pkthdr.rcvif = ifp; m_fixhdr(m); m_adj(m, -ETHER_CRC_LEN); if (sc->is_etsec) tsec_offload_process_frame(sc, m); TSEC_RECEIVE_UNLOCK(sc); (*ifp->if_input)(ifp, m); TSEC_RECEIVE_LOCK(sc); rx_npkts++; } } bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Make sure TSEC receiver is not halted. * * Various conditions can stop the TSEC receiver, but not all are * signaled and handled by error interrupt, so make sure the receiver * is running. Writing to TSEC_REG_RSTAT restarts the receiver when * halted, and is harmless if already running. */ TSEC_WRITE(sc, TSEC_REG_RSTAT, TSEC_RSTAT_QHLT); return (rx_npkts); } void tsec_receive_intr(void *arg) { struct tsec_softc *sc = arg; TSEC_RECEIVE_LOCK(sc); #ifdef DEVICE_POLLING if (sc->tsec_ifp->if_capenable & IFCAP_POLLING) { TSEC_RECEIVE_UNLOCK(sc); return; } #endif /* Confirm the interrupt was received by driver */ TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_RXB | TSEC_IEVENT_RXF); tsec_receive_intr_locked(sc, -1); TSEC_RECEIVE_UNLOCK(sc); } static void tsec_transmit_intr_locked(struct tsec_softc *sc) { struct tsec_desc *tx_desc; struct ifnet *ifp; struct mbuf *m0; bus_dmamap_t *mapp; int send = 0; TSEC_TRANSMIT_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; /* Update collision statistics */ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, TSEC_READ(sc, TSEC_REG_MON_TNCL)); /* Reset collision counters in hardware */ TSEC_WRITE(sc, TSEC_REG_MON_TSCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TMCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TLCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TXCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TNCL, 0); bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); while (TSEC_CUR_DIFF_DIRTY_TX_DESC(sc)) { tx_desc = TSEC_GET_DIRTY_TX_DESC(sc); if (tx_desc->flags & TSEC_TXBD_R) { TSEC_BACK_DIRTY_TX_DESC(sc); break; } if ((tx_desc->flags & TSEC_TXBD_L) == 0) continue; /* * This is the last buf in this packet, so unmap and free it. */ m0 = TSEC_GET_TX_MBUF(sc); mapp = TSEC_GET_TX_MAP(sc); bus_dmamap_sync(sc->tsec_tx_mtag, *mapp, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->tsec_tx_mtag, *mapp); TSEC_FREE_TX_MAP(sc, mapp); m_freem(m0); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); send = 1; } bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (send) { /* Now send anything that was pending */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; tsec_start_locked(ifp); /* Stop wathdog if all sent */ if (TSEC_EMPTYQ_TX_MBUF(sc)) sc->tsec_watchdog = 0; } } void tsec_transmit_intr(void *arg) { struct tsec_softc *sc = arg; TSEC_TRANSMIT_LOCK(sc); #ifdef DEVICE_POLLING if (sc->tsec_ifp->if_capenable & IFCAP_POLLING) { TSEC_TRANSMIT_UNLOCK(sc); return; } #endif /* Confirm the interrupt was received by driver */ TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_TXB | TSEC_IEVENT_TXF); tsec_transmit_intr_locked(sc); TSEC_TRANSMIT_UNLOCK(sc); } static void tsec_error_intr_locked(struct tsec_softc *sc, int count) { struct ifnet *ifp; uint32_t eflags; TSEC_GLOBAL_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; eflags = TSEC_READ(sc, TSEC_REG_IEVENT); /* Clear events bits in hardware */ TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_RXC | TSEC_IEVENT_BSY | TSEC_IEVENT_EBERR | TSEC_IEVENT_MSRO | TSEC_IEVENT_BABT | TSEC_IEVENT_TXC | TSEC_IEVENT_TXE | TSEC_IEVENT_LC | TSEC_IEVENT_CRL | TSEC_IEVENT_XFUN); /* Check transmitter errors */ if (eflags & TSEC_IEVENT_TXE) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (eflags & TSEC_IEVENT_LC) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); } /* Check receiver errors */ if (eflags & TSEC_IEVENT_BSY) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* Get data from RX buffers */ tsec_receive_intr_locked(sc, count); } if (ifp->if_flags & IFF_DEBUG) if_printf(ifp, "tsec_error_intr(): event flags: 0x%x\n", eflags); if (eflags & TSEC_IEVENT_EBERR) { if_printf(ifp, "System bus error occurred during" "DMA transaction (flags: 0x%x)\n", eflags); tsec_init_locked(sc); } if (eflags & TSEC_IEVENT_BABT) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (eflags & TSEC_IEVENT_BABR) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } void tsec_error_intr(void *arg) { struct tsec_softc *sc = arg; TSEC_GLOBAL_LOCK(sc); tsec_error_intr_locked(sc, -1); TSEC_GLOBAL_UNLOCK(sc); } int tsec_miibus_readreg(device_t dev, int phy, int reg) { struct tsec_softc *sc; uint32_t timeout; int rv; sc = device_get_softc(dev); TSEC_PHY_LOCK(); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, 0); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, TSEC_MIIMCOM_READCYCLE); timeout = TSEC_READ_RETRY; while (--timeout && TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) & (TSEC_MIIMIND_NOTVALID | TSEC_MIIMIND_BUSY)) DELAY(TSEC_READ_DELAY); if (timeout == 0) device_printf(dev, "Timeout while reading from PHY!\n"); rv = TSEC_PHY_READ(sc, TSEC_REG_MIIMSTAT); TSEC_PHY_UNLOCK(); return (rv); } int tsec_miibus_writereg(device_t dev, int phy, int reg, int value) { struct tsec_softc *sc; uint32_t timeout; sc = device_get_softc(dev); TSEC_PHY_LOCK(); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCON, value); timeout = TSEC_READ_RETRY; while (--timeout && (TSEC_READ(sc, TSEC_REG_MIIMIND) & TSEC_MIIMIND_BUSY)) DELAY(TSEC_READ_DELAY); TSEC_PHY_UNLOCK(); if (timeout == 0) device_printf(dev, "Timeout while writing to PHY!\n"); return (0); } void tsec_miibus_statchg(device_t dev) { struct tsec_softc *sc; struct mii_data *mii; uint32_t ecntrl, id, tmp; int link; sc = device_get_softc(dev); mii = sc->tsec_mii; link = ((mii->mii_media_status & IFM_ACTIVE) ? 1 : 0); tmp = TSEC_READ(sc, TSEC_REG_MACCFG2) & ~TSEC_MACCFG2_IF; if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) tmp |= TSEC_MACCFG2_FULLDUPLEX; else tmp &= ~TSEC_MACCFG2_FULLDUPLEX; switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_1000_T: case IFM_1000_SX: tmp |= TSEC_MACCFG2_GMII; sc->tsec_link = link; break; case IFM_100_TX: case IFM_10_T: tmp |= TSEC_MACCFG2_MII; sc->tsec_link = link; break; case IFM_NONE: if (link) device_printf(dev, "No speed selected but link " "active!\n"); sc->tsec_link = 0; return; default: sc->tsec_link = 0; device_printf(dev, "Unknown speed (%d), link %s!\n", IFM_SUBTYPE(mii->mii_media_active), ((link) ? "up" : "down")); return; } TSEC_WRITE(sc, TSEC_REG_MACCFG2, tmp); /* XXX kludge - use circumstantial evidence for reduced mode. */ id = TSEC_READ(sc, TSEC_REG_ID2); if (id & 0xffff) { ecntrl = TSEC_READ(sc, TSEC_REG_ECNTRL) & ~TSEC_ECNTRL_R100M; ecntrl |= (tmp & TSEC_MACCFG2_MII) ? TSEC_ECNTRL_R100M : 0; TSEC_WRITE(sc, TSEC_REG_ECNTRL, ecntrl); } } static void tsec_add_sysctls(struct tsec_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *tree; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "int_coal", CTLFLAG_RD, 0, "TSEC Interrupts coalescing"); children = SYSCTL_CHILDREN(tree); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rx_time", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_RX, tsec_sysctl_ic_time, "I", "IC RX time threshold (0-65535)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rx_count", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_RX, tsec_sysctl_ic_count, "I", "IC RX frame count threshold (0-255)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_time", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_TX, tsec_sysctl_ic_time, "I", "IC TX time threshold (0-65535)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_count", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_TX, tsec_sysctl_ic_count, "I", "IC TX frame count threshold (0-255)"); } /* * With Interrupt Coalescing (IC) active, a transmit/receive frame * interrupt is raised either upon: * * - threshold-defined period of time elapsed, or * - threshold-defined number of frames is received/transmitted, * whichever occurs first. * * The following sysctls regulate IC behaviour (for TX/RX separately): * * dev.tsec..int_coal.rx_time * dev.tsec..int_coal.rx_count * dev.tsec..int_coal.tx_time * dev.tsec..int_coal.tx_count * * Values: * * - 0 for either time or count disables IC on the given TX/RX path * * - count: 1-255 (expresses frame count number; note that value of 1 is * effectively IC off) * * - time: 1-65535 (value corresponds to a real time period and is * expressed in units equivalent to 64 TSEC interface clocks, i.e. one timer * threshold unit is 26.5 us, 2.56 us, or 512 ns, corresponding to 10 Mbps, * 100 Mbps, or 1Gbps, respectively. For detailed discussion consult the * TSEC reference manual. */ static int tsec_sysctl_ic_time(SYSCTL_HANDLER_ARGS) { int error; uint32_t time; struct tsec_softc *sc = (struct tsec_softc *)arg1; time = (arg2 == TSEC_IC_RX) ? sc->rx_ic_time : sc->tx_ic_time; error = sysctl_handle_int(oidp, &time, 0, req); if (error != 0) return (error); if (time > 65535) return (EINVAL); TSEC_IC_LOCK(sc); if (arg2 == TSEC_IC_RX) { sc->rx_ic_time = time; tsec_set_rxic(sc); } else { sc->tx_ic_time = time; tsec_set_txic(sc); } TSEC_IC_UNLOCK(sc); return (0); } static int tsec_sysctl_ic_count(SYSCTL_HANDLER_ARGS) { int error; uint32_t count; struct tsec_softc *sc = (struct tsec_softc *)arg1; count = (arg2 == TSEC_IC_RX) ? sc->rx_ic_count : sc->tx_ic_count; error = sysctl_handle_int(oidp, &count, 0, req); if (error != 0) return (error); if (count > 255) return (EINVAL); TSEC_IC_LOCK(sc); if (arg2 == TSEC_IC_RX) { sc->rx_ic_count = count; tsec_set_rxic(sc); } else { sc->tx_ic_count = count; tsec_set_txic(sc); } TSEC_IC_UNLOCK(sc); return (0); } static void tsec_set_rxic(struct tsec_softc *sc) { uint32_t rxic_val; if (sc->rx_ic_count == 0 || sc->rx_ic_time == 0) /* Disable RX IC */ rxic_val = 0; else { rxic_val = 0x80000000; rxic_val |= (sc->rx_ic_count << 21); rxic_val |= sc->rx_ic_time; } TSEC_WRITE(sc, TSEC_REG_RXIC, rxic_val); } static void tsec_set_txic(struct tsec_softc *sc) { uint32_t txic_val; if (sc->tx_ic_count == 0 || sc->tx_ic_time == 0) /* Disable TX IC */ txic_val = 0; else { txic_val = 0x80000000; txic_val |= (sc->tx_ic_count << 21); txic_val |= sc->tx_ic_time; } TSEC_WRITE(sc, TSEC_REG_TXIC, txic_val); } static void tsec_offload_setup(struct tsec_softc *sc) { struct ifnet *ifp = sc->tsec_ifp; uint32_t reg; TSEC_GLOBAL_LOCK_ASSERT(sc); reg = TSEC_READ(sc, TSEC_REG_TCTRL); reg |= TSEC_TCTRL_IPCSEN | TSEC_TCTRL_TUCSEN; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = TSEC_CHECKSUM_FEATURES; else ifp->if_hwassist = 0; TSEC_WRITE(sc, TSEC_REG_TCTRL, reg); reg = TSEC_READ(sc, TSEC_REG_RCTRL); reg &= ~(TSEC_RCTRL_IPCSEN | TSEC_RCTRL_TUCSEN | TSEC_RCTRL_PRSDEP); reg |= TSEC_RCTRL_PRSDEP_PARSE_L2 | TSEC_RCTRL_VLEX; if (ifp->if_capenable & IFCAP_RXCSUM) reg |= TSEC_RCTRL_IPCSEN | TSEC_RCTRL_TUCSEN | TSEC_RCTRL_PRSDEP_PARSE_L234; TSEC_WRITE(sc, TSEC_REG_RCTRL, reg); } static void tsec_offload_process_frame(struct tsec_softc *sc, struct mbuf *m) { struct tsec_rx_fcb rx_fcb; int csum_flags = 0; int protocol, flags; TSEC_RECEIVE_LOCK_ASSERT(sc); m_copydata(m, 0, sizeof(struct tsec_rx_fcb), (caddr_t)(&rx_fcb)); flags = rx_fcb.flags; protocol = rx_fcb.protocol; if (TSEC_RX_FCB_IP_CSUM_CHECKED(flags)) { csum_flags |= CSUM_IP_CHECKED; if ((flags & TSEC_RX_FCB_IP_CSUM_ERROR) == 0) csum_flags |= CSUM_IP_VALID; } if ((protocol == IPPROTO_TCP || protocol == IPPROTO_UDP) && TSEC_RX_FCB_TCP_UDP_CSUM_CHECKED(flags) && (flags & TSEC_RX_FCB_TCP_UDP_CSUM_ERROR) == 0) { csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; } m->m_pkthdr.csum_flags = csum_flags; if (flags & TSEC_RX_FCB_VLAN) { m->m_pkthdr.ether_vtag = rx_fcb.vlan; m->m_flags |= M_VLANTAG; } m_adj(m, sizeof(struct tsec_rx_fcb)); } static void tsec_setup_multicast(struct tsec_softc *sc) { uint32_t hashtable[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; struct ifnet *ifp = sc->tsec_ifp; struct ifmultiaddr *ifma; uint32_t h; int i; TSEC_GLOBAL_LOCK_ASSERT(sc); if (ifp->if_flags & IFF_ALLMULTI) { for (i = 0; i < 8; i++) TSEC_WRITE(sc, TSEC_REG_GADDR(i), 0xFFFFFFFF); return; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = (ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 24) & 0xFF; hashtable[(h >> 5)] |= 1 << (0x1F - (h & 0x1F)); } if_maddr_runlock(ifp); for (i = 0; i < 8; i++) TSEC_WRITE(sc, TSEC_REG_GADDR(i), hashtable[i]); } static int tsec_set_mtu(struct tsec_softc *sc, unsigned int mtu) { mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; TSEC_GLOBAL_LOCK_ASSERT(sc); if (mtu >= TSEC_MIN_FRAME_SIZE && mtu <= TSEC_MAX_FRAME_SIZE) { TSEC_WRITE(sc, TSEC_REG_MAXFRM, mtu); return (mtu); } return (0); } diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index eb2f98ad969f..8a6b0c516f4a 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -1,1925 +1,1925 @@ /*- * Copyright (c) 2000-2004 * Poul-Henning Kamp. All rights reserved. * Copyright (c) 1989, 1992-1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 * * $FreeBSD$ */ /* * TODO: * mkdir: want it ? */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct vop_vector devfs_vnodeops; static struct vop_vector devfs_specops; static struct fileops devfs_ops_f; #include #include #include #include #include #include static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); struct mtx devfs_de_interlock; MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); struct sx clone_drain_lock; SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); struct mtx cdevpriv_mtx; MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); SYSCTL_DECL(_vfs_devfs); static int devfs_dotimes; SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW, &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision"); /* * Update devfs node timestamp. Note that updates are unlocked and * stat(2) could see partially updated times. */ static void devfs_timestamp(struct timespec *tsp) { time_t ts; if (devfs_dotimes) { vfs_timestamp(tsp); } else { ts = time_second; if (tsp->tv_sec != ts) { tsp->tv_sec = ts; tsp->tv_nsec = 0; } } } static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp, int *ref) { *dswp = devvn_refthread(fp->f_vnode, devp, ref); if (*devp != fp->f_data) { if (*dswp != NULL) dev_relthread(*devp, *ref); return (ENXIO); } KASSERT((*devp)->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); if (*dswp == NULL) return (ENXIO); curthread->td_fpop = fp; return (0); } int devfs_get_cdevpriv(void **datap) { struct file *fp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (EBADF); p = fp->f_cdevpriv; if (p != NULL) { error = 0; *datap = p->cdpd_data; } else error = ENOENT; return (error); } int devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr) { struct file *fp; struct cdev_priv *cdp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (ENOENT); cdp = cdev2priv((struct cdev *)fp->f_data); p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); p->cdpd_data = priv; p->cdpd_dtr = priv_dtr; p->cdpd_fp = fp; mtx_lock(&cdevpriv_mtx); if (fp->f_cdevpriv == NULL) { LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); fp->f_cdevpriv = p; mtx_unlock(&cdevpriv_mtx); error = 0; } else { mtx_unlock(&cdevpriv_mtx); free(p, M_CDEVPDATA); error = EBUSY; } return (error); } void devfs_destroy_cdevpriv(struct cdev_privdata *p) { mtx_assert(&cdevpriv_mtx, MA_OWNED); p->cdpd_fp->f_cdevpriv = NULL; LIST_REMOVE(p, cdpd_list); mtx_unlock(&cdevpriv_mtx); (p->cdpd_dtr)(p->cdpd_data); free(p, M_CDEVPDATA); } void devfs_fpdrop(struct file *fp) { struct cdev_privdata *p; mtx_lock(&cdevpriv_mtx); if ((p = fp->f_cdevpriv) == NULL) { mtx_unlock(&cdevpriv_mtx); return; } devfs_destroy_cdevpriv(p); } void devfs_clear_cdevpriv(void) { struct file *fp; fp = curthread->td_fpop; if (fp == NULL) return; devfs_fpdrop(fp); } /* * On success devfs_populate_vp() returns with dmp->dm_lock held. */ static int devfs_populate_vp(struct vnode *vp) { struct devfs_dirent *de; struct devfs_mount *dmp; int locked; ASSERT_VOP_LOCKED(vp, "devfs_populate_vp"); dmp = VFSTODEVFS(vp->v_mount); locked = VOP_ISLOCKED(vp); sx_xlock(&dmp->dm_lock); DEVFS_DMP_HOLD(dmp); /* Can't call devfs_populate() with the vnode lock held. */ VOP_UNLOCK(vp, 0); devfs_populate(dmp); sx_xunlock(&dmp->dm_lock); vn_lock(vp, locked | LK_RETRY); sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); - return (EBADF); + return (ERESTART); } if ((vp->v_iflag & VI_DOOMED) != 0) { sx_xunlock(&dmp->dm_lock); - return (EBADF); + return (ERESTART); } de = vp->v_data; KASSERT(de != NULL, ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed")); if ((de->de_flags & DE_DOOMED) != 0) { sx_xunlock(&dmp->dm_lock); - return (EBADF); + return (ERESTART); } return (0); } static int devfs_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct devfs_mount *dmp; char *buf = ap->a_buf; int *buflen = ap->a_buflen; struct devfs_dirent *dd, *de; int i, error; dmp = VFSTODEVFS(vp->v_mount); error = devfs_populate_vp(vp); if (error != 0) return (error); i = *buflen; dd = vp->v_data; if (vp->v_type == VCHR) { i -= strlen(dd->de_cdp->cdp_c.si_name); if (i < 0) { error = ENOMEM; goto finished; } bcopy(dd->de_cdp->cdp_c.si_name, buf + i, strlen(dd->de_cdp->cdp_c.si_name)); de = dd->de_dir; } else if (vp->v_type == VDIR) { if (dd == dmp->dm_rootdir) { *dvp = vp; vref(*dvp); goto finished; } i -= dd->de_dirent->d_namlen; if (i < 0) { error = ENOMEM; goto finished; } bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen); de = dd; } else { error = ENOENT; goto finished; } *buflen = i; de = devfs_parent_dirent(de); if (de == NULL) { error = ENOENT; goto finished; } mtx_lock(&devfs_de_interlock); *dvp = de->de_vnode; if (*dvp != NULL) { VI_LOCK(*dvp); mtx_unlock(&devfs_de_interlock); vholdl(*dvp); VI_UNLOCK(*dvp); vref(*dvp); vdrop(*dvp); } else { mtx_unlock(&devfs_de_interlock); error = ENOENT; } finished: sx_xunlock(&dmp->dm_lock); return (error); } /* * Construct the fully qualified path name relative to the mountpoint. * If a NULL cnp is provided, no '/' is appended to the resulting path. */ char * devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd, struct componentname *cnp) { int i; struct devfs_dirent *de; sx_assert(&dmp->dm_lock, SA_LOCKED); i = SPECNAMELEN; buf[i] = '\0'; if (cnp != NULL) i -= cnp->cn_namelen; if (i < 0) return (NULL); if (cnp != NULL) bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); de = dd; while (de != dmp->dm_rootdir) { if (cnp != NULL || i < SPECNAMELEN) { i--; if (i < 0) return (NULL); buf[i] = '/'; } i -= de->de_dirent->d_namlen; if (i < 0) return (NULL); bcopy(de->de_dirent->d_name, buf + i, de->de_dirent->d_namlen); de = devfs_parent_dirent(de); if (de == NULL) return (NULL); } return (buf + i); } static int devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, struct devfs_dirent *de) { int not_found; not_found = 0; if (de->de_flags & DE_DOOMED) not_found = 1; if (DEVFS_DE_DROP(de)) { KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); devfs_dirent_free(de); } if (DEVFS_DMP_DROP(dmp)) { KASSERT(not_found == 1, ("DEVFS mount struct freed before dirent")); not_found = 2; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } if (not_found == 1 || (drop_dm_lock && not_found != 2)) sx_unlock(&dmp->dm_lock); return (not_found); } static void devfs_insmntque_dtr(struct vnode *vp, void *arg) { struct devfs_dirent *de; de = (struct devfs_dirent *)arg; mtx_lock(&devfs_de_interlock); vp->v_data = NULL; de->de_vnode = NULL; mtx_unlock(&devfs_de_interlock); vgone(vp); vput(vp); } /* * devfs_allocv shall be entered with dmp->dm_lock held, and it drops * it on return. */ int devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, struct vnode **vpp) { int error; struct vnode *vp; struct cdev *dev; struct devfs_mount *dmp; struct cdevsw *dsw; dmp = VFSTODEVFS(mp); if (de->de_flags & DE_DOOMED) { sx_xunlock(&dmp->dm_lock); return (ENOENT); } loop: DEVFS_DE_HOLD(de); DEVFS_DMP_HOLD(dmp); mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); sx_xunlock(&dmp->dm_lock); vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); sx_xlock(&dmp->dm_lock); if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } else if ((vp->v_iflag & VI_DOOMED) != 0) { mtx_lock(&devfs_de_interlock); if (de->de_vnode == vp) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vput(vp); goto loop; } sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } mtx_unlock(&devfs_de_interlock); if (de->de_dirent->d_type == DT_CHR) { if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { devfs_allocv_drop_refs(1, dmp, de); return (ENOENT); } dev = &de->de_cdp->cdp_c; } else { dev = NULL; } error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { devfs_allocv_drop_refs(1, dmp, de); printf("devfs_allocv: failed to allocate new vnode\n"); return (error); } if (de->de_dirent->d_type == DT_CHR) { vp->v_type = VCHR; VI_LOCK(vp); dev_lock(); dev_refl(dev); /* XXX: v_rdev should be protect by vnode lock */ vp->v_rdev = dev; KASSERT(vp->v_usecount == 1, ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); dev->si_usecount += vp->v_usecount; /* Special casing of ttys for deadfs. Probably redundant. */ dsw = dev->si_devsw; if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) vp->v_vflag |= VV_ISTTY; dev_unlock(); VI_UNLOCK(vp); if ((dev->si_flags & SI_ETERNAL) != 0) vp->v_vflag |= VV_ETERNALDEV; vp->v_op = &devfs_specops; } else if (de->de_dirent->d_type == DT_DIR) { vp->v_type = VDIR; } else if (de->de_dirent->d_type == DT_LNK) { vp->v_type = VLNK; } else { vp->v_type = VBAD; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); VN_LOCK_ASHARE(vp); mtx_lock(&devfs_de_interlock); vp->v_data = de; de->de_vnode = vp; mtx_unlock(&devfs_de_interlock); error = insmntque1(vp, mp, devfs_insmntque_dtr, de); if (error != 0) { (void) devfs_allocv_drop_refs(1, dmp, de); return (error); } if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } #ifdef MAC mac_devfs_vnode_associate(mp, de, vp); #endif sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } static int devfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *de; struct proc *p; int error; de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, ap->a_accmode, ap->a_cred, NULL); if (error == 0) return (0); if (error != EACCES) return (error); p = ap->a_td->td_proc; /* We do, however, allow access to the controlling terminal */ PROC_LOCK(p); if (!(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); return (error); } if (p->p_session->s_ttydp == de->de_cdp) error = 0; PROC_UNLOCK(p); return (error); } _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0, "devfs-only flag reuse failed"); static int devfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp, *oldvp; struct thread *td = ap->a_td; struct proc *p; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; int dflags, error, ref, vp_locked; /* * XXX: Don't call d_close() if we were called because of * XXX: insmntque1() failure. */ if (vp->v_data == NULL) return (0); /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (td != NULL) { p = td->td_proc; PROC_LOCK(p); if (vp == p->p_session->s_ttyvp) { PROC_UNLOCK(p); oldvp = NULL; sx_xlock(&proctree_lock); if (vp == p->p_session->s_ttyvp) { SESS_LOCK(p->p_session); VI_LOCK(vp); if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { p->p_session->s_ttyvp = NULL; p->p_session->s_ttydp = NULL; oldvp = vp; } VI_UNLOCK(vp); SESS_UNLOCK(p->p_session); } sx_xunlock(&proctree_lock); if (oldvp != NULL) vrele(oldvp); } else PROC_UNLOCK(p); } /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (ENXIO); dflags = 0; VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { /* Forced close. */ dflags |= FREVOKE | FNONBLOCK; } else if (dsw->d_flags & D_TRACKCLOSE) { /* Keep device updated on status. */ } else if (count_dev(dev) > 1) { VI_UNLOCK(vp); dev_relthread(dev, ref); return (0); } if (count_dev(dev) == 1) dflags |= FLASTCLOSE; vholdl(vp); VI_UNLOCK(vp); vp_locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); KASSERT(dev->si_refcount > 0, ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td); dev_relthread(dev, ref); vn_lock(vp, vp_locked | LK_RETRY); vdrop(vp); return (error); } static int devfs_close_f(struct file *fp, struct thread *td) { int error; struct file *fpop; /* * NB: td may be NULL if this descriptor is closed due to * garbage collection from a closed UNIX domain socket. */ fpop = curthread->td_fpop; curthread->td_fpop = fp; error = vnops.fo_close(fp, td); curthread->td_fpop = fpop; /* * The f_cdevpriv cannot be assigned non-NULL value while we * are destroying the file. */ if (fp->f_cdevpriv != NULL) devfs_fpdrop(fp); return (error); } static int devfs_fsync(struct vop_fsync_args *ap) { int error; struct bufobj *bo; struct devfs_dirent *de; if (!vn_isdisk(ap->a_vp, &error)) { bo = &ap->a_vp->v_bufobj; de = ap->a_vp->v_data; if (error == ENXIO && bo->bo_dirty.bv_cnt > 0) { printf("Device %s went missing before all of the data " "could be written to it; expect data loss.\n", de->de_dirent->d_name); error = vop_stdfsync(ap); if (bo->bo_dirty.bv_cnt != 0 || error != 0) panic("devfs_fsync: vop_stdfsync failed."); } return (0); } return (vop_stdfsync(ap)); } static int devfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; int error; struct devfs_dirent *de; struct devfs_mount *dmp; struct cdev *dev; error = devfs_populate_vp(vp); if (error != 0) return (error); dmp = VFSTODEVFS(vp->v_mount); sx_xunlock(&dmp->dm_lock); de = vp->v_data; KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); if (vp->v_type == VDIR) { de = de->de_dir; KASSERT(de != NULL, ("Null dir dirent in devfs_getattr vp=%p", vp)); } vap->va_uid = de->de_uid; vap->va_gid = de->de_gid; vap->va_mode = de->de_mode; if (vp->v_type == VLNK) vap->va_size = strlen(de->de_symlink); else if (vp->v_type == VDIR) vap->va_size = vap->va_bytes = DEV_BSIZE; else vap->va_size = 0; if (vp->v_type != VDIR) vap->va_bytes = 0; vap->va_blocksize = DEV_BSIZE; vap->va_type = vp->v_type; #define fix(aa) \ do { \ if ((aa).tv_sec <= 3600) { \ (aa).tv_sec = boottime.tv_sec; \ (aa).tv_nsec = boottime.tv_usec * 1000; \ } \ } while (0) if (vp->v_type != VCHR) { fix(de->de_atime); vap->va_atime = de->de_atime; fix(de->de_mtime); vap->va_mtime = de->de_mtime; fix(de->de_ctime); vap->va_ctime = de->de_ctime; } else { dev = vp->v_rdev; fix(dev->si_atime); vap->va_atime = dev->si_atime; fix(dev->si_mtime); vap->va_mtime = dev->si_mtime; fix(dev->si_ctime); vap->va_ctime = dev->si_ctime; vap->va_rdev = cdev2priv(dev)->cdp_inode; } vap->va_gen = 0; vap->va_flags = 0; vap->va_filerev = 0; vap->va_nlink = de->de_links; vap->va_fileid = de->de_inode; return (error); } /* ARGSUSED */ static int devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; struct vnode *vp; struct vnode *vpold; int error, i, ref; const char *p; struct fiodgname_arg *fgn; struct file *fpop; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_ioctl(fp, com, data, cred, td); return (error); } if (com == FIODTYPE) { *(int *)data = dsw->d_flags & D_TYPEMASK; td->td_fpop = fpop; dev_relthread(dev, ref); return (0); } else if (com == FIODGNAME) { fgn = data; p = devtoname(dev); i = strlen(p) + 1; if (i > fgn->len) error = EINVAL; else error = copyout(p, fgn->buf, i); td->td_fpop = fpop; dev_relthread(dev, ref); return (error); } error = dsw->d_ioctl(dev, com, data, fp->f_flag, td); td->td_fpop = NULL; dev_relthread(dev, ref); if (error == ENOIOCTL) error = ENOTTY; if (error == 0 && com == TIOCSCTTY) { vp = fp->f_vnode; /* Do nothing if reassigning same control tty */ sx_slock(&proctree_lock); if (td->td_proc->p_session->s_ttyvp == vp) { sx_sunlock(&proctree_lock); return (0); } vpold = td->td_proc->p_session->s_ttyvp; VREF(vp); SESS_LOCK(td->td_proc->p_session); td->td_proc->p_session->s_ttyvp = vp; td->td_proc->p_session->s_ttydp = cdev2priv(dev); SESS_UNLOCK(td->td_proc->p_session); sx_sunlock(&proctree_lock); /* Get rid of reference to old control tty */ if (vpold) vrele(vpold); } return (error); } /* ARGSUSED */ static int devfs_kqfilter_f(struct file *fp, struct knote *kn) { struct cdev *dev; struct cdevsw *dsw; int error, ref; struct file *fpop; struct thread *td; td = curthread; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error) return (error); error = dsw->d_kqfilter(dev, kn); td->td_fpop = fpop; dev_relthread(dev, ref); return (error); } static inline int devfs_prison_check(struct devfs_dirent *de, struct thread *td) { struct cdev_priv *cdp; struct ucred *dcr; struct proc *p; int error; cdp = de->de_cdp; if (cdp == NULL) return (0); dcr = cdp->cdp_c.si_cred; if (dcr == NULL) return (0); error = prison_check(td->td_ucred, dcr); if (error == 0) return (0); /* We do, however, allow access to the controlling terminal */ p = td->td_proc; PROC_LOCK(p); if (!(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); return (error); } if (p->p_session->s_ttydp == cdp) error = 0; PROC_UNLOCK(p); return (error); } static int devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) { struct componentname *cnp; struct vnode *dvp, **vpp; struct thread *td; struct devfs_dirent *de, *dd; struct devfs_dirent **dde; struct devfs_mount *dmp; struct cdev *cdev; int error, flags, nameiop, dvplocked; char specname[SPECNAMELEN + 1], *pname; cnp = ap->a_cnp; vpp = ap->a_vpp; dvp = ap->a_dvp; pname = cnp->cn_nameptr; td = cnp->cn_thread; flags = cnp->cn_flags; nameiop = cnp->cn_nameiop; dmp = VFSTODEVFS(dvp->v_mount); dd = dvp->v_data; *vpp = NULLVP; if ((flags & ISLASTCN) && nameiop == RENAME) return (EOPNOTSUPP); if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) return (EIO); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); if (error) return (error); if (cnp->cn_namelen == 1 && *pname == '.') { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); *vpp = dvp; VREF(dvp); return (0); } if (flags & ISDOTDOT) { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); de = devfs_parent_dirent(dd); if (de == NULL) return (ENOENT); dvplocked = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK, vpp); *dm_unlock = 0; vn_lock(dvp, dvplocked | LK_RETRY); return (error); } dd = dvp->v_data; de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0); while (de == NULL) { /* While(...) so we can use break */ if (nameiop == DELETE) return (ENOENT); /* * OK, we didn't have an entry for the name we were asked for * so we try to see if anybody can create it on demand. */ pname = devfs_fqpn(specname, dmp, dd, cnp); if (pname == NULL) break; cdev = NULL; DEVFS_DMP_HOLD(dmp); sx_xunlock(&dmp->dm_lock); sx_slock(&clone_drain_lock); EVENTHANDLER_INVOKE(dev_clone, td->td_ucred, pname, strlen(pname), &cdev); sx_sunlock(&clone_drain_lock); if (cdev == NULL) sx_xlock(&dmp->dm_lock); else if (devfs_populate_vp(dvp) != 0) { *dm_unlock = 0; sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } else sx_xunlock(&dmp->dm_lock); dev_rel(cdev); return (ENOENT); } if (DEVFS_DMP_DROP(dmp)) { *dm_unlock = 0; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); if (cdev != NULL) dev_rel(cdev); return (ENOENT); } if (cdev == NULL) break; dev_lock(); dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; if (dde != NULL && *dde != NULL) de = *dde; dev_unlock(); dev_rel(cdev); break; } if (de == NULL || de->de_flags & DE_WHITEOUT) { if ((nameiop == CREATE || nameiop == RENAME) && (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } return (ENOENT); } if (devfs_prison_check(de, td)) return (ENOENT); if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) return (error); if (*vpp == dvp) { VREF(dvp); *vpp = dvp; return (0); } } error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK, vpp); *dm_unlock = 0; return (error); } static int devfs_lookup(struct vop_lookup_args *ap) { int j; struct devfs_mount *dmp; int dm_unlock; if (devfs_populate_vp(ap->a_dvp) != 0) return (ENOTDIR); dmp = VFSTODEVFS(ap->a_dvp->v_mount); dm_unlock = 1; j = devfs_lookupx(ap, &dm_unlock); if (dm_unlock == 1) sx_xunlock(&dmp->dm_lock); return (j); } static int devfs_mknod(struct vop_mknod_args *ap) { struct componentname *cnp; struct vnode *dvp, **vpp; struct devfs_dirent *dd, *de; struct devfs_mount *dmp; int error; /* * The only type of node we should be creating here is a * character device, for anything else return EOPNOTSUPP. */ if (ap->a_vap->va_type != VCHR) return (EOPNOTSUPP); dvp = ap->a_dvp; dmp = VFSTODEVFS(dvp->v_mount); cnp = ap->a_cnp; vpp = ap->a_vpp; dd = dvp->v_data; error = ENOENT; sx_xlock(&dmp->dm_lock); TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (cnp->cn_namelen != de->de_dirent->d_namlen) continue; if (de->de_dirent->d_type == DT_CHR && (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, de->de_dirent->d_namlen) != 0) continue; if (de->de_flags & DE_WHITEOUT) break; goto notfound; } if (de == NULL) goto notfound; de->de_flags &= ~DE_WHITEOUT; error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp); return (error); notfound: sx_xunlock(&dmp->dm_lock); return (error); } /* ARGSUSED */ static int devfs_open(struct vop_open_args *ap) { struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; struct cdev *dev = vp->v_rdev; struct file *fp = ap->a_fp; int error, ref, vlocked; struct cdevsw *dsw; struct file *fpop; struct mtx *mtxp; if (vp->v_type == VBLK) return (ENXIO); if (dev == NULL) return (ENXIO); /* Make this field valid before any I/O in d_open. */ if (dev->si_iosize_max == 0) dev->si_iosize_max = DFLTPHYS; dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (ENXIO); if (fp == NULL && dsw->d_fdopen != NULL) { dev_relthread(dev, ref); return (ENXIO); } vlocked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); fpop = td->td_fpop; td->td_fpop = fp; if (fp != NULL) { fp->f_data = dev; fp->f_vnode = vp; } if (dsw->d_fdopen != NULL) error = dsw->d_fdopen(dev, ap->a_mode, td, fp); else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); - /* cleanup any cdevpriv upon error */ + /* Clean up any cdevpriv upon error. */ if (error != 0) devfs_clear_cdevpriv(); td->td_fpop = fpop; vn_lock(vp, vlocked | LK_RETRY); dev_relthread(dev, ref); if (error != 0) { if (error == ERESTART) error = EINTR; return (error); } #if 0 /* /dev/console */ KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); #else if (fp == NULL) return (error); #endif if (fp->f_ops == &badfileops) finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); mtxp = mtx_pool_find(mtxpool_sleep, fp); /* * Hint to the dofilewrite() to not force the buffer draining * on the writer to the file. Most likely, the write would * not need normal buffers. */ mtx_lock(mtxp); fp->f_vnread_flags |= FDEVFS_VNODE; mtx_unlock(mtxp); return (error); } static int devfs_pathconf(struct vop_pathconf_args *ap) { switch (ap->a_name) { case _PC_MAC_PRESENT: #ifdef MAC /* * If MAC is enabled, devfs automatically supports * trivial non-persistant label storage. */ *ap->a_retval = 1; #else *ap->a_retval = 0; #endif return (0); default: return (vop_stdpathconf(ap)); } /* NOTREACHED */ } /* ARGSUSED */ static int devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; int error, ref; struct file *fpop; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_poll(fp, events, cred, td); return (error); } error = dsw->d_poll(dev, events, td); td->td_fpop = fpop; dev_relthread(dev, ref); return(error); } /* * Print out the contents of a special device vnode. */ static int devfs_print(struct vop_print_args *ap) { printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); return (0); } static int devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int ioflag, error, ref; ssize_t resid; struct cdevsw *dsw; struct file *fpop; if (uio->uio_resid > DEVFS_IOSIZE_MAX) return (EINVAL); fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_read(fp, uio, cred, flags, td); return (error); } resid = uio->uio_resid; ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); error = dsw->d_read(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) devfs_timestamp(&dev->si_atime); td->td_fpop = fpop; dev_relthread(dev, ref); foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); return (error); } static int devfs_readdir(struct vop_readdir_args *ap) { int error; struct uio *uio; struct dirent *dp; struct devfs_dirent *dd; struct devfs_dirent *de; struct devfs_mount *dmp; off_t off; int *tmp_ncookies = NULL; if (ap->a_vp->v_type != VDIR) return (ENOTDIR); uio = ap->a_uio; if (uio->uio_offset < 0) return (EINVAL); /* * XXX: This is a temporary hack to get around this filesystem not * supporting cookies. We store the location of the ncookies pointer * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() * and set the number of cookies to 0. We then set the pointer to * NULL so that vfs_read_dirent doesn't try to call realloc() on * ap->a_cookies. Later in this function, we restore the ap->a_ncookies * pointer to its original location before returning to the caller. */ if (ap->a_ncookies != NULL) { tmp_ncookies = ap->a_ncookies; *ap->a_ncookies = 0; ap->a_ncookies = NULL; } dmp = VFSTODEVFS(ap->a_vp->v_mount); if (devfs_populate_vp(ap->a_vp) != 0) { if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (EIO); } error = 0; de = ap->a_vp->v_data; off = 0; TAILQ_FOREACH(dd, &de->de_dlist, de_list) { KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); if (dd->de_flags & (DE_COVERED | DE_WHITEOUT)) continue; if (devfs_prison_check(dd, uio->uio_td)) continue; if (dd->de_dirent->d_type == DT_DIR) de = dd->de_dir; else de = dd; dp = dd->de_dirent; if (dp->d_reclen > uio->uio_resid) break; dp->d_fileno = de->de_inode; if (off >= uio->uio_offset) { error = vfs_read_dirent(ap, dp, off); if (error) break; } off += dp->d_reclen; } sx_xunlock(&dmp->dm_lock); uio->uio_offset = off; /* * Restore ap->a_ncookies if it wasn't originally NULL in the first * place. */ if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (error); } static int devfs_readlink(struct vop_readlink_args *ap) { struct devfs_dirent *de; de = ap->a_vp->v_data; return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); } static int devfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *de; struct cdev *dev; mtx_lock(&devfs_de_interlock); de = vp->v_data; if (de != NULL) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vnode_destroy_vobject(vp); VI_LOCK(vp); dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; if (dev == NULL) { dev_unlock(); VI_UNLOCK(vp); return (0); } dev->si_usecount -= vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); dev_rel(dev); return (0); } static int devfs_remove(struct vop_remove_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct devfs_dirent *dd; struct devfs_dirent *de, *de_covered; struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); ASSERT_VOP_ELOCKED(dvp, "devfs_remove"); ASSERT_VOP_ELOCKED(vp, "devfs_remove"); sx_xlock(&dmp->dm_lock); dd = ap->a_dvp->v_data; de = vp->v_data; if (de->de_cdp == NULL) { TAILQ_REMOVE(&dd->de_dlist, de, de_list); if (de->de_dirent->d_type == DT_LNK) { de_covered = devfs_find(dd, de->de_dirent->d_name, de->de_dirent->d_namlen, 0); if (de_covered != NULL) de_covered->de_flags &= ~DE_COVERED; } /* We need to unlock dvp because devfs_delete() may lock it. */ VOP_UNLOCK(vp, 0); if (dvp != vp) VOP_UNLOCK(dvp, 0); devfs_delete(dmp, de, 0); sx_xunlock(&dmp->dm_lock); if (dvp != vp) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } else { de->de_flags |= DE_WHITEOUT; sx_xunlock(&dmp->dm_lock); } return (0); } /* * Revoke is called on a tty when a terminal session ends. The vnode * is orphaned by setting v_op to deadfs so we need to let go of it * as well so that we create a new one next time around. * */ static int devfs_revoke(struct vop_revoke_args *ap) { struct vnode *vp = ap->a_vp, *vp2; struct cdev *dev; struct cdev_priv *cdp; struct devfs_dirent *de; int i; KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); dev = vp->v_rdev; cdp = cdev2priv(dev); dev_lock(); cdp->cdp_inuse++; dev_unlock(); vhold(vp); vgone(vp); vdrop(vp); VOP_UNLOCK(vp,0); loop: for (;;) { mtx_lock(&devfs_de_interlock); dev_lock(); vp2 = NULL; for (i = 0; i <= cdp->cdp_maxdirent; i++) { de = cdp->cdp_dirents[i]; if (de == NULL) continue; vp2 = de->de_vnode; if (vp2 != NULL) { dev_unlock(); VI_LOCK(vp2); mtx_unlock(&devfs_de_interlock); if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) goto loop; vhold(vp2); vgone(vp2); vdrop(vp2); vput(vp2); break; } } if (vp2 != NULL) { continue; } dev_unlock(); mtx_unlock(&devfs_de_interlock); break; } dev_lock(); cdp->cdp_inuse--; if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); } else dev_unlock(); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); return (0); } static int devfs_rioctl(struct vop_ioctl_args *ap) { struct vnode *vp; struct devfs_mount *dmp; int error; vp = ap->a_vp; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); return (EBADF); } dmp = VFSTODEVFS(vp->v_mount); sx_xlock(&dmp->dm_lock); VOP_UNLOCK(vp, 0); DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); sx_xunlock(&dmp->dm_lock); return (error); } static int devfs_rread(struct vop_read_args *ap) { if (ap->a_vp->v_type != VDIR) return (EINVAL); return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); } static int devfs_setattr(struct vop_setattr_args *ap) { struct devfs_dirent *de; struct vattr *vap; struct vnode *vp; struct thread *td; int c, error; uid_t uid; gid_t gid; vap = ap->a_vap; vp = ap->a_vp; td = curthread; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_flags != VNOVAL && vap->va_flags != 0) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } error = devfs_populate_vp(vp); if (error != 0) return (error); de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; c = 0; if (vap->va_uid == (uid_t)VNOVAL) uid = de->de_uid; else uid = vap->va_uid; if (vap->va_gid == (gid_t)VNOVAL) gid = de->de_gid; else gid = vap->va_gid; if (uid != de->de_uid || gid != de->de_gid) { if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { error = priv_check(td, PRIV_VFS_CHOWN); if (error != 0) goto ret; } de->de_uid = uid; de->de_gid = gid; c = 1; } if (vap->va_mode != (mode_t)VNOVAL) { if (ap->a_cred->cr_uid != de->de_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error != 0) goto ret; } de->de_mode = vap->va_mode; c = 1; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { error = vn_utimes_perm(vp, vap, ap->a_cred, td); if (error != 0) goto ret; if (vap->va_atime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_atime = vap->va_atime; else de->de_atime = vap->va_atime; } if (vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_mtime = vap->va_mtime; else de->de_mtime = vap->va_mtime; } c = 1; } if (c) { if (vp->v_type == VCHR) vfs_timestamp(&vp->v_rdev->si_ctime); else vfs_timestamp(&de->de_mtime); } ret: sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock); return (error); } #ifdef MAC static int devfs_setlabel(struct vop_setlabel_args *ap) { struct vnode *vp; struct devfs_dirent *de; vp = ap->a_vp; de = vp->v_data; mac_vnode_relabel(ap->a_cred, vp, ap->a_label); mac_devfs_update(vp->v_mount, de, vp); return (0); } #endif static int devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) { return (vnops.fo_stat(fp, sb, cred, td)); } static int devfs_symlink(struct vop_symlink_args *ap) { int i, error; struct devfs_dirent *dd; struct devfs_dirent *de, *de_covered, *de_dotdot; struct devfs_mount *dmp; error = priv_check(curthread, PRIV_DEVFS_SYMLINK); if (error) return(error); dmp = VFSTODEVFS(ap->a_dvp->v_mount); if (devfs_populate_vp(ap->a_dvp) != 0) return (ENOENT); dd = ap->a_dvp->v_data; de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); de->de_flags = DE_USER; de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_inode = alloc_unr(devfs_inos); de->de_dir = dd; de->de_dirent->d_type = DT_LNK; i = strlen(ap->a_target) + 1; de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); bcopy(ap->a_target, de->de_symlink, i); #ifdef MAC mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); #endif de_covered = devfs_find(dd, de->de_dirent->d_name, de->de_dirent->d_namlen, 0); if (de_covered != NULL) { if ((de_covered->de_flags & DE_USER) != 0) { devfs_delete(dmp, de, DEVFS_DEL_NORECURSE); sx_xunlock(&dmp->dm_lock); return (EEXIST); } KASSERT((de_covered->de_flags & DE_COVERED) == 0, ("devfs_symlink: entry %p already covered", de_covered)); de_covered->de_flags |= DE_COVERED; } de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */ de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */ TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list); devfs_dir_ref_de(dmp, dd); devfs_rules_apply(dmp, de); return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp)); } static int devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) { return (vnops.fo_truncate(fp, length, cred, td)); } static int devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int error, ioflag, ref; ssize_t resid; struct cdevsw *dsw; struct file *fpop; if (uio->uio_resid > DEVFS_IOSIZE_MAX) return (EINVAL); fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_write(fp, uio, cred, flags, td); return (error); } KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); resid = uio->uio_resid; error = dsw->d_write(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) { devfs_timestamp(&dev->si_ctime); dev->si_mtime = dev->si_ctime; } td->td_fpop = fpop; dev_relthread(dev, ref); foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); return (error); } static int devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; struct mount *mp; struct vnode *vp; struct file *fpop; vm_object_t object; vm_prot_t maxprot; int error, ref; vp = fp->f_vnode; /* * Ensure that file and memory protections are * compatible. */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) maxprot = VM_PROT_NONE; else maxprot = VM_PROT_EXECUTE; if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_READ; else if ((prot & VM_PROT_READ) != 0) return (EACCES); /* * If we are sharing potential changes via MAP_SHARED and we * are trying to get write permission although we opened it * without asking for it, bail out. * * Note that most character devices always share mappings. * The one exception is that D_MMAP_ANON devices * (i.e. /dev/zero) permit private writable mappings. * * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests * as well as updating maxprot to permit writing for * D_MMAP_ANON devices rather than doing that here. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_flag & FWRITE) != 0) maxprot |= VM_PROT_WRITE; else if ((prot & VM_PROT_WRITE) != 0) return (EACCES); } maxprot &= cap_maxprot; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) return (error); error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, &object); td->td_fpop = fpop; dev_relthread(dev, ref); if (error != 0) return (error); error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, foff, FALSE, td); if (error != 0) vm_object_deallocate(object); return (error); } dev_t dev2udev(struct cdev *x) { if (x == NULL) return (NODEV); return (cdev2priv(x)->cdp_inode); } static struct fileops devfs_ops_f = { .fo_read = devfs_read_f, .fo_write = devfs_write_f, .fo_truncate = devfs_truncate_f, .fo_ioctl = devfs_ioctl_f, .fo_poll = devfs_poll_f, .fo_kqfilter = devfs_kqfilter_f, .fo_stat = devfs_stat_f, .fo_close = devfs_close_f, .fo_chmod = vn_chmod, .fo_chown = vn_chown, .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = devfs_mmap_f, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; static struct vop_vector devfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_getattr = devfs_getattr, .vop_ioctl = devfs_rioctl, .vop_lookup = devfs_lookup, .vop_mknod = devfs_mknod, .vop_pathconf = devfs_pathconf, .vop_read = devfs_rread, .vop_readdir = devfs_readdir, .vop_readlink = devfs_readlink, .vop_reclaim = devfs_reclaim, .vop_remove = devfs_remove, .vop_revoke = devfs_revoke, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_symlink = devfs_symlink, .vop_vptocnp = devfs_vptocnp, }; static struct vop_vector devfs_specops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_bmap = VOP_PANIC, .vop_close = devfs_close, .vop_create = VOP_PANIC, .vop_fsync = devfs_fsync, .vop_getattr = devfs_getattr, .vop_link = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = devfs_open, .vop_pathconf = devfs_pathconf, .vop_poll = dead_poll, .vop_print = devfs_print, .vop_read = dead_read, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_reclaim = devfs_reclaim, .vop_remove = devfs_remove, .vop_rename = VOP_PANIC, .vop_revoke = devfs_revoke, .vop_rmdir = VOP_PANIC, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_strategy = VOP_PANIC, .vop_symlink = VOP_PANIC, .vop_vptocnp = devfs_vptocnp, .vop_write = dead_write, }; /* * Our calling convention to the device drivers used to be that we passed * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ * flags instead since that's what open(), close() and ioctl() takes and * we don't really want vnode.h in device drivers. * We solved the source compatibility by redefining some vnode flags to * be the same as the fcntl ones and by sending down the bitwise OR of * the respective fcntl/vnode flags. These CTASSERTS make sure nobody * pulls the rug out under this. */ CTASSERT(O_NONBLOCK == IO_NDELAY); CTASSERT(O_FSYNC == IO_SYNC); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 894244c97d06..67f6ee0b3636 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1,3401 +1,3402 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #ifdef PC98 #include #else #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_APIC #include #endif #ifdef DEV_ISA #include #endif #ifdef XBOX #include int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern register_t init386(int first); extern void dblfault_handler(void); #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ static int ispc98 = 1; SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); #endif int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; #ifdef PAE FEATURE(pae, "Physical Address Extensions"); #endif /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; /* Default init_ops implementation. */ struct init_ops init_ops = { .early_clock_source_init = i8254_init, .early_delay = i8254_delay, #ifdef DEV_APIC .msi_init = msi_init, #endif }; static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; #ifndef PC98 /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } #endif /* !PC98 */ /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; sf.sf_addr = 0; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; if (p->p_sysent->sv_sigcode_base != 0) { regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szosigcode; } else { /* a.out sysentvec does not use shared page */ regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode; } regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); bzero(sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); bzero(sf.sf_uc.uc_mcontext.__spare__, sizeof(sf.sf_uc.uc_mcontext.__spare__)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef CPU_ENABLE_SSE if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { #else { #endif xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned int)sp & ~0x3F); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base; if (regs->tf_eip == 0) regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, SIGPROCMASK_OLD); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct trapframe *regs; struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) { uprintf( "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = imgp->entry_addr; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = imgp->ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); static char bootmethod[16] = "BIOS"; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = SEL_KPL, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = (int) ldt, .ssd_limit = sizeof(ldt)-1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = (int) ldt, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = (int) &dblfault_tss, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), #endif IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016llx\n", rxcr(0)); if (amd_feature & (AMDID_NX | AMDID_LM)) db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t0x%016llx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); if ((cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR)); if (cpu_feature & CPUID_PAT) db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%08x\n", rdr0()); db_printf("dr1\t0x%08x\n", rdr1()); db_printf("dr2\t0x%08x\n", rdr2()); db_printf("dr3\t0x%08x\n", rdr3()); db_printf("dr6\t0x%08x\n", rdr6()); db_printf("dr7\t0x%08x\n", rdr7()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #if !defined(PC98) static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); #ifndef PAE if (base > 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(length / 1024)); return (1); } #endif /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = physmap_idx + 2; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); return (add_physmap_entry(smap->base, smap->length, physmap, physmap_idxp)); } static void add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap, int *physmap_idxp) { struct bios_smap *smap, *smapend; u_int32_t smapsize; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes SMAP. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, physmap_idxp)) break; } #endif /* !PC98 */ static void basemem_setup(void) { vm_paddr_t pa; pt_entry_t *pte; int i; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ #ifdef PC98 static void getmemsize(int first) { int off, physmap_idx, pa_indx, da_indx; u_long physmem_tunable, memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size; int i; int pg_n; u_int extmem; u_int under16; vm_paddr_t pa; bzero(physmap, sizeof(physmap)); /* XXX - some of EPSON machines can't use PG_N */ pg_n = PG_N; if (pc98_machine_type & M_EPSON_PC98) { switch (epson_machine_id) { #ifdef WB_CACHE default: #endif case EPSON_PC486_HX: case EPSON_PC486_HG: case EPSON_PC486_HA: pg_n = 0; break; } } under16 = pc98_getmemsize(&basemem, &extmem); basemem_setup(); physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * By default keep the memtest enabled. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* * We need to divide chunk if Maxmem is larger than 16MB and * under 16MB area is not full of memory. * (1) system area (15-16MB region) is cut off * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY") */ if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) { /* 15M - 16M region is cut off, so need to divide chunk */ physmap[physmap_idx + 1] = under16 * 1024; physmap_idx += 2; physmap[physmap_idx] = 0x1000000; physmap[physmap_idx + 1] = physmap[2] + extmem * 1024; } /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP3; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR3; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | pg_n; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } #else /* PC98 */ static void getmemsize(int first) { int has_smap, off, physmap_idx, pa_indx, da_indx; u_long memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size, physmem_tunable; int hasbrokenint12, i, res; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa; struct bios_smap *smap, *smapbase; caddr_t kmdp; has_smap = 0; #ifdef XBOX if (arch_i386_is_xbox) { /* * We queried the memory size before, so chop off 4MB for * the framebuffer and inform the OS of this. */ physmap[0] = 0; physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; physmap_idx = 0; goto physmap_done; } #endif bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Check if the loader supplied an SMAP memory map. If so, * use that and do not make any VM86 calls. */ physmap_idx = 0; smapbase = NULL; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { add_smap_entries(smapbase, physmap, &physmap_idx); has_smap = 1; goto have_smap; } /* * Some newer BIOSes have a broken INT 12H implementation * which causes a kernel panic immediately. In this case, we * need use the SMAP to determine the base memory size. */ hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); if (hasbrokenint12 == 0) { /* Use INT12 to determine base memory size. */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; basemem_setup(); } /* * Fetch the memory map with INT 15:E820. Map page 1 R/W into * the kernel page table so we can use it as a buffer. The * kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); KASSERT(res != 0, ("vm86_getptr() failed: address not found")); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); have_smap: /* * If we didn't fetch the "base memory" size from INT12, * figure it out from the SMAP (or just guess). */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* XXX: If we couldn't find basemem from SMAP, just guess. */ if (basemem == 0) basemem = 640; basemem_setup(); } if (physmap[1] != 0) goto physmap_done; /* * If we failed to find an SMAP, figure out the extended * memory size. We will then build a simple memory map with * two segments, one for "base memory" and the second for * "extended memory". Note that "extended memory" starts at a * physical address of 1MB and that both basemem and extmem * are in units of 1KB. * * First, try to fetch the extended memory size via INT 15:E801. */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { /* * If INT15:E801 fails, this is our last ditch effort * to determine the extended memory size. Currently * we prefer the RTC value over INT15:88. */ #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); /* * By default enable the memory test on real hardware, and disable * it if we appear to be running in a VM. This avoids touching all * pages unnecessarily, which doesn't matter on real hardware but is * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP3; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR3; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } #endif /* PC98 */ register_t init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; #ifdef CPU_ENABLE_SSE struct xstate_hdr *xhdr; #endif thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = TD0_KSTACK_PAGES; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); #ifdef PC98 /* * Initialize DMAC */ pc98_init_dmac(); #endif metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } - if (envmode == 1) - kern_envp = static_env; - else if (bootinfo.bi_envp) - kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; + + if (bootinfo.bi_envp) + init_static_kenv((caddr_t)bootinfo.bi_envp + KERNBASE, 0); + else + init_static_kenv(NULL, 0); /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) pmap_kenter(pa + KERNBASE, pa); dpcpu_init((void *)(first + KERNBASE), 0); first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #ifdef XENHVM setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); #ifdef XBOX /* * The following code queries the PCI ID of 0:0:0. For the XBOX, * This should be 0x10de / 0x02a5. * * This is exactly what Linux does. */ outl(0xcf8, 0x80000000); if (inl(0xcfc) == 0x02a510de) { arch_i386_is_xbox = 1; pic16l_setled(XBOX_LED_GREEN); /* * We are an XBOX, but we may have either 64MB or 128MB of * memory. The PCI host bridge should be programmed for this, * so we just query it. */ outl(0xcf8, 0x80000084); arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; } #endif /* XBOX */ /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #if defined(PAE) || defined(PAE_TABLES) dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA #ifdef DEV_ATPIC #ifndef PC98 elcr_probe(); #endif atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #endif #ifdef DDB db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab); #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif msgbufinit(msgbufp, msgbufsize); #ifdef DEV_NPX npxinit(true); #endif /* * Set up thread0 pcb after npxinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); #ifdef CPU_ENABLE_SSE if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } #endif PCPU_SET(curpcb, thread0.td_pcb); /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #if defined(PAE) || defined(PAE_TABLES) thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((register_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } #ifndef PC98 static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); #endif /* !PC98 */ void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO); if (tmp == 0) panic("kmem_malloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; pcb->pcb_gs = rgs(); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); #ifdef DEV_NPX npxgetregs(td); #else bzero(fpregs, sizeof(*fpregs)); #endif #ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm, (struct save87 *)fpregs); else #endif /* CPU_ENABLE_SSE */ bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs, sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_set_fpregs_xmm((struct save87 *)fpregs, &get_pcb_user_save_td(td)->sv_xmm); else #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87, sizeof(*fpregs)); #ifdef DEV_NPX npxuserinited(td); #endif return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; mcp->mc_flags = 0; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; char *xfpustate; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { #ifdef CPU_ENABLE_SSE size_t max_len, len; #endif #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); #else mcp->mc_ownedfp = npxgetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); #ifdef CPU_ENABLE_SSE if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(union savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); #endif #endif } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { union savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { #ifdef DEV_NPX fpstate = (union savefpu *)&mcp->mc_fpstate; #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; #endif error = npxsetregs(td, fpstate, xfpustate, xfpustate_len); #else error = EINVAL; #endif } else return (EINVAL); return (error); } static void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c index 03c722601979..cd6930b9cada 100644 --- a/sys/kern/kern_environment.c +++ b/sys/kern/kern_environment.c @@ -1,623 +1,655 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The unified bootloader passes us a pointer to a preserved copy of * bootstrap/kernel environment variables. We convert them to a * dynamic array of strings later when the VM subsystem is up. * * We make these available through the kenv(2) syscall for userland * and through kern_getenv()/freeenv() kern_setenv() kern_unsetenv() testenv() for * the kernel. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_KENV, "kenv", "kernel environment"); #define KENV_SIZE 512 /* Maximum number of environment strings */ /* pointer to the static environment */ char *kern_envp; static int env_len; static int env_pos; static char *kernenv_next(char *); /* dynamic environment variables */ char **kenvp; struct mtx kenv_lock; /* * No need to protect this with a mutex since SYSINITS are single threaded. */ int dynamic_kenv = 0; #define KENV_CHECK if (!dynamic_kenv) \ panic("%s: called before SI_SUB_KMEM", __func__) int sys_kenv(td, uap) struct thread *td; struct kenv_args /* { int what; const char *name; char *value; int len; } */ *uap; { char *name, *value, *buffer = NULL; size_t len, done, needed, buflen; int error, i; KASSERT(dynamic_kenv, ("kenv: dynamic_kenv = 0")); error = 0; if (uap->what == KENV_DUMP) { #ifdef MAC error = mac_kenv_check_dump(td->td_ucred); if (error) return (error); #endif done = needed = 0; buflen = uap->len; if (buflen > KENV_SIZE * (KENV_MNAMELEN + KENV_MVALLEN + 2)) buflen = KENV_SIZE * (KENV_MNAMELEN + KENV_MVALLEN + 2); if (uap->len > 0 && uap->value != NULL) buffer = malloc(buflen, M_TEMP, M_WAITOK|M_ZERO); mtx_lock(&kenv_lock); for (i = 0; kenvp[i] != NULL; i++) { len = strlen(kenvp[i]) + 1; needed += len; len = min(len, buflen - done); /* * If called with a NULL or insufficiently large * buffer, just keep computing the required size. */ if (uap->value != NULL && buffer != NULL && len > 0) { bcopy(kenvp[i], buffer + done, len); done += len; } } mtx_unlock(&kenv_lock); if (buffer != NULL) { error = copyout(buffer, uap->value, done); free(buffer, M_TEMP); } td->td_retval[0] = ((done == needed) ? 0 : needed); return (error); } switch (uap->what) { case KENV_SET: error = priv_check(td, PRIV_KENV_SET); if (error) return (error); break; case KENV_UNSET: error = priv_check(td, PRIV_KENV_UNSET); if (error) return (error); break; } name = malloc(KENV_MNAMELEN + 1, M_TEMP, M_WAITOK); error = copyinstr(uap->name, name, KENV_MNAMELEN + 1, NULL); if (error) goto done; switch (uap->what) { case KENV_GET: #ifdef MAC error = mac_kenv_check_get(td->td_ucred, name); if (error) goto done; #endif value = kern_getenv(name); if (value == NULL) { error = ENOENT; goto done; } len = strlen(value) + 1; if (len > uap->len) len = uap->len; error = copyout(value, uap->value, len); freeenv(value); if (error) goto done; td->td_retval[0] = len; break; case KENV_SET: len = uap->len; if (len < 1) { error = EINVAL; goto done; } if (len > KENV_MVALLEN + 1) len = KENV_MVALLEN + 1; value = malloc(len, M_TEMP, M_WAITOK); error = copyinstr(uap->value, value, len, NULL); if (error) { free(value, M_TEMP); goto done; } #ifdef MAC error = mac_kenv_check_set(td->td_ucred, name, value); if (error == 0) #endif kern_setenv(name, value); free(value, M_TEMP); break; case KENV_UNSET: #ifdef MAC error = mac_kenv_check_unset(td->td_ucred, name); if (error) goto done; #endif error = kern_unsetenv(name); if (error) error = ENOENT; break; default: error = EINVAL; break; } done: free(name, M_TEMP); return (error); } +/* + * Populate the initial kernel environment. + * + * This is called very early in MD startup, either to provide a copy of the + * environment obtained from a boot loader, or to provide an empty buffer into + * which MD code can store an initial environment using kern_setenv() calls. + * + * If the global envmode is 1, the environment is initialized from the global + * static_env[], regardless of the arguments passed. This implements the env + * keyword described in config(5). In this case env_pos is set to env_len, + * causing kern_setenv() to return -1 (if len > 0) or panic (if len == 0) until + * the dynamic environment is available. The envmode and static_env variables + * are defined in env.c which is generated by config(8). + * + * If len is non-zero, the caller is providing an empty buffer. The caller will + * subsequently use kern_setenv() to add up to len bytes of initial environment + * before the dynamic environment is available. + * + * If len is zero, the caller is providing a pre-loaded buffer containing + * environment strings. Additional strings cannot be added until the dynamic + * environment is available. The memory pointed to must remain stable at least + * until sysinit runs init_dynamic_kenv(). If no initial environment is + * available from the boot loader, passing a NULL pointer allows the static_env + * to be installed if it is configured. + */ void init_static_kenv(char *buf, size_t len) { - kern_envp = buf; - env_len = len; - env_pos = 0; + + if (envmode == 1) { + kern_envp = static_env; + env_len = len; + env_pos = len; + } else { + kern_envp = buf; + env_len = len; + env_pos = 0; + } } /* * Setup the dynamic kernel environment. */ static void init_dynamic_kenv(void *data __unused) { char *cp, *cpnext; size_t len; int i; kenvp = malloc((KENV_SIZE + 1) * sizeof(char *), M_KENV, M_WAITOK | M_ZERO); i = 0; if (kern_envp && *kern_envp != '\0') { for (cp = kern_envp; cp != NULL; cp = cpnext) { cpnext = kernenv_next(cp); len = strlen(cp) + 1; if (len > KENV_MNAMELEN + 1 + KENV_MVALLEN + 1) { printf( "WARNING: too long kenv string, ignoring %s\n", cp); continue; } if (i < KENV_SIZE) { kenvp[i] = malloc(len, M_KENV, M_WAITOK); strcpy(kenvp[i++], cp); memset(cp, 0, strlen(cp)); } else printf( "WARNING: too many kenv strings, ignoring %s\n", cp); } } kenvp[i] = NULL; mtx_init(&kenv_lock, "kernel environment", NULL, MTX_DEF); dynamic_kenv = 1; } SYSINIT(kenv, SI_SUB_KMEM, SI_ORDER_ANY, init_dynamic_kenv, NULL); void freeenv(char *env) { if (dynamic_kenv && env != NULL) { memset(env, 0, strlen(env)); free(env, M_KENV); } } /* * Internal functions for string lookup. */ static char * _getenv_dynamic(const char *name, int *idx) { char *cp; int len, i; mtx_assert(&kenv_lock, MA_OWNED); len = strlen(name); for (cp = kenvp[0], i = 0; cp != NULL; cp = kenvp[++i]) { if ((strncmp(cp, name, len) == 0) && (cp[len] == '=')) { if (idx != NULL) *idx = i; return (cp + len + 1); } } return (NULL); } static char * _getenv_static(const char *name) { char *cp, *ep; int len; for (cp = kern_envp; cp != NULL; cp = kernenv_next(cp)) { for (ep = cp; (*ep != '=') && (*ep != 0); ep++) ; if (*ep != '=') continue; len = ep - cp; ep++; if (!strncmp(name, cp, len) && name[len] == 0) return (ep); } return (NULL); } /* * Look up an environment variable by name. * Return a pointer to the string if found. * The pointer has to be freed with freeenv() * after use. */ char * kern_getenv(const char *name) { char buf[KENV_MNAMELEN + 1 + KENV_MVALLEN + 1]; char *ret; if (dynamic_kenv) { if (getenv_string(name, buf, sizeof(buf))) { ret = strdup(buf, M_KENV); } else { ret = NULL; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "getenv"); } } else ret = _getenv_static(name); return (ret); } /* * Test if an environment variable is defined. */ int testenv(const char *name) { char *cp; if (dynamic_kenv) { mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, NULL); mtx_unlock(&kenv_lock); } else cp = _getenv_static(name); if (cp != NULL) return (1); return (0); } static int setenv_static(const char *name, const char *value) { int len; if (env_pos >= env_len) return (-1); /* Check space for x=y and two nuls */ len = strlen(name) + strlen(value); if (len + 3 < env_len - env_pos) { len = sprintf(&kern_envp[env_pos], "%s=%s", name, value); env_pos += len+1; kern_envp[env_pos] = '\0'; return (0); } else return (-1); } /* * Set an environment variable by name. */ int kern_setenv(const char *name, const char *value) { char *buf, *cp, *oldenv; int namelen, vallen, i; if (dynamic_kenv == 0 && env_len > 0) return (setenv_static(name, value)); KENV_CHECK; namelen = strlen(name) + 1; if (namelen > KENV_MNAMELEN + 1) return (-1); vallen = strlen(value) + 1; if (vallen > KENV_MVALLEN + 1) return (-1); buf = malloc(namelen + vallen, M_KENV, M_WAITOK); sprintf(buf, "%s=%s", name, value); mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, &i); if (cp != NULL) { oldenv = kenvp[i]; kenvp[i] = buf; mtx_unlock(&kenv_lock); free(oldenv, M_KENV); } else { /* We add the option if it wasn't found */ for (i = 0; (cp = kenvp[i]) != NULL; i++) ; /* Bounds checking */ if (i < 0 || i >= KENV_SIZE) { free(buf, M_KENV); mtx_unlock(&kenv_lock); return (-1); } kenvp[i] = buf; kenvp[i + 1] = NULL; mtx_unlock(&kenv_lock); } return (0); } /* * Unset an environment variable string. */ int kern_unsetenv(const char *name) { char *cp, *oldenv; int i, j; KENV_CHECK; mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, &i); if (cp != NULL) { oldenv = kenvp[i]; for (j = i + 1; kenvp[j] != NULL; j++) kenvp[i++] = kenvp[j]; kenvp[i] = NULL; mtx_unlock(&kenv_lock); memset(oldenv, 0, strlen(oldenv)); free(oldenv, M_KENV); return (0); } mtx_unlock(&kenv_lock); return (-1); } /* * Return a string value from an environment variable. */ int getenv_string(const char *name, char *data, int size) { char *cp; if (dynamic_kenv) { mtx_lock(&kenv_lock); cp = _getenv_dynamic(name, NULL); if (cp != NULL) strlcpy(data, cp, size); mtx_unlock(&kenv_lock); } else { cp = _getenv_static(name); if (cp != NULL) strlcpy(data, cp, size); } return (cp != NULL); } /* * Return an integer value from an environment variable. */ int getenv_int(const char *name, int *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (int) tmp; return (rval); } /* * Return an unsigned integer value from an environment variable. */ int getenv_uint(const char *name, unsigned int *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (unsigned int) tmp; return (rval); } /* * Return a long value from an environment variable. */ int getenv_long(const char *name, long *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (long) tmp; return (rval); } /* * Return an unsigned long value from an environment variable. */ int getenv_ulong(const char *name, unsigned long *data) { quad_t tmp; int rval; rval = getenv_quad(name, &tmp); if (rval) *data = (unsigned long) tmp; return (rval); } /* * Return a quad_t value from an environment variable. */ int getenv_quad(const char *name, quad_t *data) { char value[KENV_MNAMELEN + 1 + KENV_MVALLEN + 1]; char *vtp; quad_t iv; if (!getenv_string(name, value, sizeof(value))) return (0); iv = strtoq(value, &vtp, 0); if (vtp == value || (vtp[0] != '\0' && vtp[1] != '\0')) return (0); switch (vtp[0]) { case 't': case 'T': iv *= 1024; case 'g': case 'G': iv *= 1024; case 'm': case 'M': iv *= 1024; case 'k': case 'K': iv *= 1024; case '\0': break; default: return (0); } *data = iv; return (1); } /* * Find the next entry after the one which (cp) falls within, return a * pointer to its start or NULL if there are no more. */ static char * kernenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } void tunable_int_init(void *data) { struct tunable_int *d = (struct tunable_int *)data; TUNABLE_INT_FETCH(d->path, d->var); } void tunable_long_init(void *data) { struct tunable_long *d = (struct tunable_long *)data; TUNABLE_LONG_FETCH(d->path, d->var); } void tunable_ulong_init(void *data) { struct tunable_ulong *d = (struct tunable_ulong *)data; TUNABLE_ULONG_FETCH(d->path, d->var); } void tunable_quad_init(void *data) { struct tunable_quad *d = (struct tunable_quad *)data; TUNABLE_QUAD_FETCH(d->path, d->var); } void tunable_str_init(void *data) { struct tunable_str *d = (struct tunable_str *)data; TUNABLE_STR_FETCH(d->path, d->var, d->size); } diff --git a/sys/kgssapi/gss_impl.c b/sys/kgssapi/gss_impl.c index ab6e55bc377d..3a427a5e8126 100644 --- a/sys/kgssapi/gss_impl.c +++ b/sys/kgssapi/gss_impl.c @@ -1,333 +1,336 @@ /*- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gssd.h" #include "kgss_if.h" MALLOC_DEFINE(M_GSSAPI, "GSS-API", "GSS-API"); /* * Syscall hooks */ static int gssd_syscall_offset = SYS_gssd_syscall; static struct sysent gssd_syscall_prev_sysent; MAKE_SYSENT(gssd_syscall); static bool_t gssd_syscall_registered = FALSE; struct kgss_mech_list kgss_mechs; CLIENT *kgss_gssd_handle; struct mtx kgss_gssd_lock; static void kgss_init(void *dummy) { int error; LIST_INIT(&kgss_mechs); error = syscall_register(&gssd_syscall_offset, &gssd_syscall_sysent, &gssd_syscall_prev_sysent, SY_THR_STATIC_KLD); if (error) printf("Can't register GSSD syscall\n"); else gssd_syscall_registered = TRUE; } SYSINIT(kgss_init, SI_SUB_LOCK, SI_ORDER_FIRST, kgss_init, NULL); static void kgss_uninit(void *dummy) { if (gssd_syscall_registered) syscall_deregister(&gssd_syscall_offset, &gssd_syscall_prev_sysent); } SYSUNINIT(kgss_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, kgss_uninit, NULL); int sys_gssd_syscall(struct thread *td, struct gssd_syscall_args *uap) { struct sockaddr_un sun; struct netconfig *nconf; char path[MAXPATHLEN]; int error; CLIENT *cl, *oldcl; error = priv_check(td, PRIV_NFS_DAEMON); if (error) return (error); error = copyinstr(uap->path, path, sizeof(path), NULL); if (error) return (error); - sun.sun_family = AF_LOCAL; - strcpy(sun.sun_path, path); - sun.sun_len = SUN_LEN(&sun); - - nconf = getnetconfigent("local"); - cl = clnt_reconnect_create(nconf, - (struct sockaddr *) &sun, GSSD, GSSDVERS, - RPC_MAXDATASIZE, RPC_MAXDATASIZE); + if (path[0] != '\0') { + sun.sun_family = AF_LOCAL; + strcpy(sun.sun_path, path); + sun.sun_len = SUN_LEN(&sun); + + nconf = getnetconfigent("local"); + cl = clnt_reconnect_create(nconf, + (struct sockaddr *) &sun, GSSD, GSSDVERS, + RPC_MAXDATASIZE, RPC_MAXDATASIZE); + } else + cl = NULL; mtx_lock(&kgss_gssd_lock); oldcl = kgss_gssd_handle; kgss_gssd_handle = cl; mtx_unlock(&kgss_gssd_lock); if (oldcl != NULL) { CLNT_CLOSE(oldcl); CLNT_RELEASE(oldcl); } return (0); } int kgss_oid_equal(const gss_OID oid1, const gss_OID oid2) { if (oid1 == oid2) return (1); if (!oid1 || !oid2) return (0); if (oid1->length != oid2->length) return (0); if (memcmp(oid1->elements, oid2->elements, oid1->length)) return (0); return (1); } void kgss_install_mech(gss_OID mech_type, const char *name, struct kobj_class *cls) { struct kgss_mech *km; km = malloc(sizeof(struct kgss_mech), M_GSSAPI, M_WAITOK); km->km_mech_type = mech_type; km->km_mech_name = name; km->km_class = cls; LIST_INSERT_HEAD(&kgss_mechs, km, km_link); } void kgss_uninstall_mech(gss_OID mech_type) { struct kgss_mech *km; LIST_FOREACH(km, &kgss_mechs, km_link) { if (kgss_oid_equal(km->km_mech_type, mech_type)) { LIST_REMOVE(km, km_link); free(km, M_GSSAPI); return; } } } gss_OID kgss_find_mech_by_name(const char *name) { struct kgss_mech *km; LIST_FOREACH(km, &kgss_mechs, km_link) { if (!strcmp(km->km_mech_name, name)) { return (km->km_mech_type); } } return (GSS_C_NO_OID); } const char * kgss_find_mech_by_oid(const gss_OID oid) { struct kgss_mech *km; LIST_FOREACH(km, &kgss_mechs, km_link) { if (kgss_oid_equal(km->km_mech_type, oid)) { return (km->km_mech_name); } } return (NULL); } gss_ctx_id_t kgss_create_context(gss_OID mech_type) { struct kgss_mech *km; gss_ctx_id_t ctx; LIST_FOREACH(km, &kgss_mechs, km_link) { if (kgss_oid_equal(km->km_mech_type, mech_type)) break; } if (!km) return (NULL); ctx = (gss_ctx_id_t) kobj_create(km->km_class, M_GSSAPI, M_WAITOK); KGSS_INIT(ctx); return (ctx); } void kgss_delete_context(gss_ctx_id_t ctx, gss_buffer_t output_token) { KGSS_DELETE(ctx, output_token); kobj_delete((kobj_t) ctx, M_GSSAPI); } OM_uint32 kgss_transfer_context(gss_ctx_id_t ctx) { struct export_sec_context_res res; struct export_sec_context_args args; enum clnt_stat stat; OM_uint32 maj_stat; if (!kgss_gssd_handle) return (GSS_S_FAILURE); args.ctx = ctx->handle; bzero(&res, sizeof(res)); stat = gssd_export_sec_context_1(&args, &res, kgss_gssd_handle); if (stat != RPC_SUCCESS) { return (GSS_S_FAILURE); } maj_stat = KGSS_IMPORT(ctx, res.format, &res.interprocess_token); ctx->handle = 0; xdr_free((xdrproc_t) xdr_export_sec_context_res, &res); return (maj_stat); } void kgss_copy_buffer(const gss_buffer_t from, gss_buffer_t to) { to->length = from->length; if (from->length) { to->value = malloc(from->length, M_GSSAPI, M_WAITOK); bcopy(from->value, to->value, from->length); } else { to->value = NULL; } } /* * Acquire the kgss_gssd_handle and return it with a reference count, * if it is available. */ CLIENT * kgss_gssd_client(void) { CLIENT *cl; mtx_lock(&kgss_gssd_lock); cl = kgss_gssd_handle; if (cl != NULL) CLNT_ACQUIRE(cl); mtx_unlock(&kgss_gssd_lock); return (cl); } /* * Kernel module glue */ static int kgssapi_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: rpc_gss_entries.rpc_gss_refresh_auth = rpc_gss_refresh_auth; rpc_gss_entries.rpc_gss_secfind = rpc_gss_secfind; rpc_gss_entries.rpc_gss_secpurge = rpc_gss_secpurge; rpc_gss_entries.rpc_gss_seccreate = rpc_gss_seccreate; rpc_gss_entries.rpc_gss_set_defaults = rpc_gss_set_defaults; rpc_gss_entries.rpc_gss_max_data_length = rpc_gss_max_data_length; rpc_gss_entries.rpc_gss_get_error = rpc_gss_get_error; rpc_gss_entries.rpc_gss_mech_to_oid = rpc_gss_mech_to_oid; rpc_gss_entries.rpc_gss_oid_to_mech = rpc_gss_oid_to_mech; rpc_gss_entries.rpc_gss_qop_to_num = rpc_gss_qop_to_num; rpc_gss_entries.rpc_gss_get_mechanisms = rpc_gss_get_mechanisms; rpc_gss_entries.rpc_gss_get_versions = rpc_gss_get_versions; rpc_gss_entries.rpc_gss_is_installed = rpc_gss_is_installed; rpc_gss_entries.rpc_gss_set_svc_name = rpc_gss_set_svc_name; rpc_gss_entries.rpc_gss_clear_svc_name = rpc_gss_clear_svc_name; rpc_gss_entries.rpc_gss_getcred = rpc_gss_getcred; rpc_gss_entries.rpc_gss_set_callback = rpc_gss_set_callback; rpc_gss_entries.rpc_gss_clear_callback = rpc_gss_clear_callback; rpc_gss_entries.rpc_gss_get_principal_name = rpc_gss_get_principal_name; rpc_gss_entries.rpc_gss_svc_max_data_length = rpc_gss_svc_max_data_length; mtx_init(&kgss_gssd_lock, "kgss_gssd_lock", NULL, MTX_DEF); break; case MOD_UNLOAD: /* * Unloading of the kgssapi module is not currently supported. * If somebody wants this, we would need to keep track of * currently executing threads and make sure the count is 0. */ /* FALLTHROUGH */ default: error = EOPNOTSUPP; }; return (error); } static moduledata_t kgssapi_mod = { "kgssapi", kgssapi_modevent, NULL, }; DECLARE_MODULE(kgssapi, kgssapi_mod, SI_SUB_VFS, SI_ORDER_ANY); MODULE_DEPEND(kgssapi, krpc, 1, 1, 1); MODULE_VERSION(kgssapi, 1); diff --git a/sys/mips/beri/beri_machdep.c b/sys/mips/beri/beri_machdep.c index 758083f681f8..54f61abaa811 100644 --- a/sys/mips/beri/beri_machdep.c +++ b/sys/mips/beri/beri_machdep.c @@ -1,317 +1,317 @@ /*- * Copyright (c) 2006 Wojciech A. Koszek * Copyright (c) 2012-2014 Robert N. M. Watson * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include extern int *edata; extern int *end; void platform_cpu_init() { /* Nothing special */ } static void mips_init(void) { int i; #ifdef FDT struct mem_region mr[FDT_MEM_REGIONS]; int mr_cnt, val; int j; #endif for (i = 0; i < 10; i++) { phys_avail[i] = 0; } /* phys_avail regions are in bytes */ phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); dump_avail[0] = phys_avail[0]; dump_avail[1] = phys_avail[1]; physmem = realmem; #ifdef FDT if (fdt_get_mem_regions(mr, &mr_cnt, &val) == 0) { physmem = btoc(val); KASSERT((phys_avail[0] >= mr[0].mr_start) && \ (phys_avail[0] < (mr[0].mr_start + mr[0].mr_size)), ("First region is not within FDT memory range")); /* Limit size of the first region */ phys_avail[1] = (mr[0].mr_start + MIN(mr[0].mr_size, ctob(realmem))); dump_avail[1] = phys_avail[1]; /* Add the rest of regions */ for (i = 1, j = 2; i < mr_cnt; i++, j+=2) { phys_avail[j] = mr[i].mr_start; phys_avail[j+1] = (mr[i].mr_start + mr[i].mr_size); dump_avail[j] = phys_avail[j]; dump_avail[j+1] = phys_avail[j+1]; } } #endif init_param1(); init_param2(physmem); mips_cpu_init(); pmap_bootstrap(); mips_proc0_init(); mutex_init(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } /* * Perform a board-level soft-reset. */ void platform_reset(void) { /* XXX SMP will likely require us to do more. */ __asm__ __volatile__( "mfc0 $k0, $12\n\t" "li $k1, 0x00100000\n\t" "or $k0, $k0, $k1\n\t" "mtc0 $k0, $12\n"); for( ; ; ) __asm__ __volatile("wait"); } #ifdef FDT /* Parse cmd line args as env - copied from xlp_machdep. */ /* XXX-BZ this should really be centrally provided for all (boot) code. */ static void _parse_bootargs(char *cmdline) { char *n, *v; while ((v = strsep(&cmdline, " \n")) != NULL) { if (*v == '\0') continue; if (*v == '-') { while (*v != '\0') { v++; switch (*v) { case 'a': boothowto |= RB_ASKNAME; break; /* Someone should simulate that ;-) */ case 'C': boothowto |= RB_CDROM; break; case 'd': boothowto |= RB_KDB; break; case 'D': boothowto |= RB_MULTIPLE; break; case 'm': boothowto |= RB_MUTE; break; case 'g': boothowto |= RB_GDB; break; case 'h': boothowto |= RB_SERIAL; break; case 'p': boothowto |= RB_PAUSE; break; case 'r': boothowto |= RB_DFLTROOT; break; case 's': boothowto |= RB_SINGLE; break; case 'v': boothowto |= RB_VERBOSE; break; } } } else { n = strsep(&v, "="); if (v == NULL) kern_setenv(n, "1"); else kern_setenv(n, v); } } } #endif void platform_start(__register_t a0, __register_t a1, __register_t a2, __register_t a3) { struct bootinfo *bootinfop; vm_offset_t kernend; uint64_t platform_counter_freq; int argc = a0; char **argv = (char **)a1; char **envp = (char **)a2; long memsize; #ifdef FDT char buf[2048]; /* early stack supposedly big enough */ vm_offset_t dtbp; phandle_t chosen; void *kmdp; #endif int i; /* clear the BSS and SBSS segments */ kernend = (vm_offset_t)&end; memset(&edata, 0, kernend - (vm_offset_t)(&edata)); mips_postboot_fixup(); mips_pcpu0_init(); /* * Over time, we've changed out boot-time binary interface for the * kernel. Miniboot simply provides a 'memsize' in a3, whereas the * FreeBSD boot loader provides a 'bootinfo *' in a3. While slightly * grody, we support both here by detecting 'pointer-like' values in * a3 and assuming physical memory can never be that back. * * XXXRW: Pull more values than memsize out of bootinfop -- e.g., * module information. */ if (a3 >= 0x9800000000000000ULL) { bootinfop = (void *)a3; memsize = bootinfop->bi_memsize; preload_metadata = (caddr_t)bootinfop->bi_modulep; } else { bootinfop = NULL; memsize = a3; } #ifdef FDT /* * Find the dtb passed in by the boot loader (currently fictional). */ kmdp = preload_search_by_type("elf kernel"); dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #else #error "Non-static FDT not yet supported on BERI" #endif if (OF_install(OFW_FDT, 0) == FALSE) while (1); if (OF_init((void *)dtbp) != 0) while (1); /* * Configure more boot-time parameters passed in by loader. */ boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); /* * Get bootargs from FDT if specified. */ chosen = OF_finddevice("/chosen"); if (OF_getprop(chosen, "bootargs", buf, sizeof(buf)) != -1) _parse_bootargs(buf); #endif /* * XXXRW: We have no way to compare wallclock time to cycle rate on * BERI, so for now assume we run at the MALTA default (100MHz). */ platform_counter_freq = MIPS_DEFAULT_HZ; mips_timer_early_init(platform_counter_freq); cninit(); printf("entry: platform_start()\n"); bootverbose = 1; if (bootverbose) { printf("cmd line: "); for (i = 0; i < argc; i++) printf("%s ", argv[i]); printf("\n"); printf("envp:\n"); for (i = 0; envp[i]; i += 2) printf("\t%s = %s\n", envp[i], envp[i+1]); if (bootinfop != NULL) printf("bootinfo found at %p\n", bootinfop); printf("memsize = %p\n", (void *)memsize); } realmem = btoc(memsize); mips_init(); mips_timer_init_params(platform_counter_freq, 0); } diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 0fad8aaceb5d..76a12d3c73c3 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -1,786 +1,787 @@ # $FreeBSD$ SYSDIR?=${.CURDIR}/.. .include "${SYSDIR}/conf/kern.opts.mk" SUBDIR_PARALLEL= # Modules that include binary-only blobs of microcode should be selectable by # MK_SOURCELESS_UCODE option (see below). .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES) SUBDIR=${MODULES_OVERRIDE} .else SUBDIR= \ ${_3dfx} \ ${_3dfx_linux} \ ${_aac} \ ${_aacraid} \ accf_data \ accf_dns \ accf_http \ acl_nfs4 \ acl_posix1e \ ${_acpi} \ ae \ ${_aesni} \ age \ ${_agp} \ aha \ ${_ahb} \ ahci \ ${_aic} \ aic7xxx \ aio \ alc \ ale \ alq \ ${_amdsbwd} \ ${_amdtemp} \ amr \ ${_an} \ ${_aout} \ ${_apm} \ ${_arcmsr} \ ${_arcnet} \ ${_asmc} \ ata \ ath \ ath_pci \ ${_autofs} \ ${_auxio} \ ${_bce} \ bfe \ bge \ ${_bxe} \ ${_bios} \ ${_bktr} \ ${_bm} \ bridgestp \ bwi \ bwn \ cam \ ${_canbepm} \ ${_canbus} \ ${_cardbus} \ ${_carp} \ cas \ ${_cbb} \ cc \ cd9660 \ cd9660_iconv \ ${_ce} \ ${_cfi} \ ${_ciss} \ cloudabi \ ${_cloudabi64} \ ${_cm} \ ${_cmx} \ ${_coff} \ ${_coretemp} \ ${_cp} \ ${_cpsw} \ ${_cpuctl} \ ${_cpufreq} \ ${_crypto} \ ${_cryptodev} \ ${_cs} \ ${_ct} \ ${_ctau} \ ctl \ ${_cxgb} \ ${_cxgbe} \ dc \ dcons \ dcons_crom \ de \ ${_dpms} \ ${_dpt} \ ${_drm} \ ${_drm2} \ dummynet \ ${_ed} \ ${_elink} \ ${_em} \ en \ ${_ep} \ ${_epic} \ esp \ ${_et} \ ${_ex} \ ${_exca} \ ext2fs \ ${_fatm} \ fdc \ fdescfs \ ${_fe} \ ${_filemon} \ firewire \ firmware \ fuse \ ${_fxp} \ gem \ geom \ ${_glxiic} \ ${_glxsb} \ hatm \ hifn \ hme \ ${_hpt27xx} \ ${_hptiop} \ ${_hptmv} \ ${_hptnr} \ ${_hptrr} \ hwpmc \ ${_hyperv} \ i2c \ ${_ibcore} \ ${_ibcs2} \ ${_ichwd} \ ${_ida} \ ${_ie} \ if_bridge \ if_disc \ if_edsc \ ${_if_enc} \ if_epair \ ${_if_gif} \ ${_if_gre} \ ${_if_me} \ if_lagg \ ${_if_ndis} \ ${_if_stf} \ if_tap \ if_tun \ if_vlan \ if_vxlan \ ${_igb} \ ${_iir} \ imgact_binmisc \ ${_io} \ ${_ioat} \ ${_ipoib} \ ${_ipdivert} \ ${_ipfilter} \ ${_ipfw} \ ipfw_nat \ ${_ipmi} \ ip6_mroute_mod \ ip_mroute_mod \ ${_ips} \ ${_ipw} \ ${_ipwfw} \ ${_isci} \ isp \ ${_ispfw} \ ${_iwi} \ ${_iwifw} \ ${_iwm} \ ${_iwmfw} \ ${_iwn} \ ${_iwnfw} \ ${_ix} \ ${_ixv} \ ${_ixgb} \ ${_ixl} \ ${_ixlv} \ jme \ joy \ kbdmux \ kgssapi \ kgssapi_krb5 \ khelp \ krpc \ ksyms \ le \ lge \ libalias \ libiconv \ libmbpool \ libmchain \ ${_linprocfs} \ ${_linsysfs} \ ${_linux} \ ${_linux_common} \ ${_linux64} \ linuxkpi \ lmc \ lpt \ mac_biba \ mac_bsdextended \ mac_ifoff \ mac_lomac \ mac_mls \ mac_none \ mac_partition \ mac_portacl \ mac_seeotheruids \ mac_stub \ mac_test \ malo \ mcd \ md \ mdio \ mem \ mfi \ mii \ mlx \ ${_mlx4} \ ${_mlx4ib} \ ${_mlxen} \ ${_mlx5} \ ${_mlx5en} \ ${_mly} \ mmc \ mmcsd \ mpr \ mps \ mpt \ mqueue \ mrsas \ msdosfs \ msdosfs_iconv \ ${_mse} \ msk \ ${_mthca} \ mvs \ mwl \ ${_mwlfw} \ mxge \ my \ ${_nandfs} \ ${_nandsim} \ ${_ncr} \ ${_ncv} \ ${_ndis} \ netfpga10g \ ${_netgraph} \ ${_nfe} \ nfscl \ nfscommon \ nfsd \ nfslock \ nfslockd \ nfssvc \ nge \ nmdm \ ${_nsp} \ nullfs \ ${_ntb} \ ${_nvd} \ ${_nvme} \ ${_nvram} \ ${_nxge} \ oce \ otus \ otusfw \ ow \ ${_padlock} \ ${_padlock_rng} \ patm \ ${_pccard} \ ${_pcfclock} \ pcn \ ${_pf} \ ${_pflog} \ ${_pfsync} \ plip \ ${_pmc} \ ${_pms} \ ppbus \ ppc \ ppi \ pps \ procfs \ proto \ pseudofs \ ${_pst} \ pty \ puc \ ${_qlxge} \ ${_qlxgb} \ ${_qlxgbe} \ ral \ ${_ralfw} \ ${_random_fortuna} \ ${_random_yarrow} \ ${_random_other} \ rc4 \ ${_rdma} \ ${_rdrand_rng} \ re \ reiserfs \ rl \ rtwn \ - rtwnfw \ + ${_rtwnfw} \ ${_s3} \ ${_safe} \ ${_sbni} \ scc \ scd \ ${_scsi_low} \ sdhci \ sdhci_pci \ sem \ send \ ${_sf} \ ${_sfxge} \ sge \ ${_si} \ siba_bwn \ siftr \ siis \ sis \ sk \ smbfs \ sn \ ${_snc} \ snp \ sound \ ${_speaker} \ ${_splash} \ ${_sppp} \ ste \ ${_stg} \ stge \ ${_streams} \ ${_svr4} \ ${_sym} \ ${_syscons} \ sysvipc \ ${_ti} \ tcp/fastpath \ tests/framework \ tests/callout_test \ tl \ tmpfs \ ${_toecore} \ ${_tpm} \ trm \ ${_twa} \ twe \ tws \ tx \ ${_txp} \ uart \ ubsec \ udf \ udf_iconv \ ufs \ unionfs \ usb \ utopia \ ${_vesa} \ ${_virtio} \ vge \ ${_viawd} \ videomode \ vkbd \ ${_vmm} \ ${_vmware} \ ${_vpo} \ vr \ vte \ vx \ ${_vxge} \ wb \ ${_wbwd} \ ${_wds} \ ${_wi} \ ${_wl} \ wlan \ wlan_acl \ wlan_amrr \ wlan_ccmp \ wlan_rssadapt \ wlan_tkip \ wlan_wep \ wlan_xauth \ ${_wpi} \ ${_wpifw} \ ${_x86bios} \ ${_xe} \ xl \ zlib .if ${MK_AUTOFS} != "no" || defined(ALL_MODULES) _autofs= autofs .endif .if ${MK_CDDL} != "no" || defined(ALL_MODULES) .if (${MACHINE_CPUARCH} != "arm" || ${MACHINE_ARCH:Marmv6*} != "") && \ ${MACHINE_CPUARCH} != "mips" && \ ${MACHINE_CPUARCH} != "sparc64" SUBDIR+= dtrace .endif SUBDIR+= opensolaris .endif .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) .if exists(${.CURDIR}/../opencrypto) _crypto= crypto _cryptodev= cryptodev _random_fortuna=random_fortuna _random_yarrow= random_yarrow _random_other= random_other .endif .endif .if ${MK_CUSE} != "no" || defined(ALL_MODULES) SUBDIR+= cuse .endif .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _carp= carp _toecore= toecore _if_enc= if_enc _if_gif= if_gif _if_gre= if_gre .endif .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _if_stf= if_stf .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) _if_me= if_me _ipdivert= ipdivert _ipfw= ipfw .endif .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES) _ipfilter= ipfilter .endif .if ${MK_ISCSI} != "no" || defined(ALL_MODULES) SUBDIR+= iscsi SUBDIR+= iscsi_initiator .endif .if ${MK_NAND} != "no" || defined(ALL_MODULES) _nandfs= nandfs _nandsim= nandsim .endif .if ${MK_NETGRAPH} != "no" || defined(ALL_MODULES) _netgraph= netgraph .endif .if (${MK_PF} != "no" && (${MK_INET_SUPPORT} != "no" || \ ${MK_INET6_SUPPORT} != "no")) || defined(ALL_MODULES) _pf= pf _pflog= pflog .if ${MK_INET_SUPPORT} != "no" _pfsync= pfsync .endif .endif .if ${MK_SOURCELESS_UCODE} != "no" _bce= bce _fatm= fatm _fxp= fxp _ispfw= ispfw _mwlfw= mwlfw _ralfw= ralfw +_rtwnfw= rtwnfw _sf= sf _ti= ti _txp= txp .endif .if ${MK_SOURCELESS_UCODE} != "no" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_ARCH:C/mips(el)?/mips/} != "mips" && \ ${MACHINE_ARCH} != "powerpc" _cxgbe= cxgbe .endif .if ${MK_ZFS} != "no" || defined(ALL_MODULES) SUBDIR+= zfs .endif .if ${MACHINE_CPUARCH} != "aarch64" && ${MACHINE_CPUARCH} != "arm" && \ ${MACHINE_CPUARCH} != "mips" && ${MACHINE_CPUARCH} != "powerpc" _syscons= syscons _vpo= vpo .endif .if ${MACHINE_CPUARCH} != "mips" # no BUS_SPACE_UNSPECIFIED # No barrier instruction support (specific to this driver) _sym= sym # intr_disable() is a macro, causes problems .if ${MK_SOURCELESS_UCODE} != "no" _cxgb= cxgb .endif .endif .if ${MACHINE_CPUARCH} == "aarch64" _em= em _igb= igb .endif .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" _agp= agp _an= an _aout= aout _bktr= bktr _bxe= bxe _cardbus= cardbus _cbb= cbb _cpuctl= cpuctl _cpufreq= cpufreq _cs= cs _dpms= dpms _drm= drm _drm2= drm2 _ed= ed _em= em _ep= ep _et= et _exca= exca _fe= fe _filemon= filemon .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ibcore= ibcore .endif _if_ndis= if_ndis _igb= igb _io= io .if ${MK_OFED} != "no" || defined(ALL_MODULES) _ipoib= ipoib .endif _ix= ix _ixv= ixv _linprocfs= linprocfs _linsysfs= linsysfs _linux= linux _ndis= ndis _pccard= pccard .if ${MK_OFED} != "no" || defined(ALL_MODULES) _rdma= rdma .endif _safe= safe _scsi_low= scsi_low _si= si _speaker= speaker _splash= splash _sppp= sppp _vmware= vmware _vxge= vxge _wbwd= wbwd _wi= wi _xe= xe .if ${MACHINE} != "pc98" _aac= aac _aacraid= aacraid _acpi= acpi .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _aesni= aesni .endif _amdsbwd= amdsbwd _amdtemp= amdtemp _arcmsr= arcmsr _asmc= asmc _ciss= ciss _cmx= cmx _coretemp= coretemp .if ${MK_SOURCELESS_HOST} != "no" _hpt27xx= hpt27xx .endif _hptiop= hptiop .if ${MK_SOURCELESS_HOST} != "no" _hptmv= hptmv _hptnr= hptnr _hptrr= hptrr .endif _hyperv= hyperv _ichwd= ichwd _ida= ida _iir= iir _ipmi= ipmi _ips= ips _isci= isci _ipw= ipw _iwi= iwi _iwm= iwm _iwn= iwn _ixgb= ixgb .if ${MK_SOURCELESS_UCODE} != "no" _ipwfw= ipwfw _iwifw= iwifw _iwmfw= iwmfw _iwnfw= iwnfw .endif .if ${MK_OFED} != "no" || defined(ALL_MODULES) _mlx4= mlx4 _mlx4ib= mlx4ib _mlxen= mlxen .endif _mlx5= mlx5 .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ defined(ALL_MODULES) _mlx5en= mlx5en .endif _mly= mly .if ${MK_OFED} != "no" || defined(ALL_MODULES) _mthca= mthca .endif _nfe= nfe _nvd= nvd _nvme= nvme _nvram= nvram _nxge= nxge .if ${MK_CRYPT} != "no" || defined(ALL_MODULES) _padlock= padlock _padlock_rng= padlock_rng _rdrand_rng= rdrand_rng .endif _s3= s3 _tpm= tpm _twa= twa _vesa= vesa _viawd= viawd _virtio= virtio _wpi= wpi .if ${MK_SOURCELESS_UCODE} != "no" _wpifw= wpifw .endif _x86bios= x86bios .endif .endif .if ${MACHINE_CPUARCH} == "amd64" _ioat= ioat _ixl= ixl _ixlv= ixlv _linux64= linux64 _linux_common= linux_common _ntb= ntb _pms= pms _qlxge= qlxge _qlxgb= qlxgb _qlxgbe= qlxgbe _sfxge= sfxge .if ${MK_BHYVE} != "no" || defined(ALL_MODULES) _vmm= vmm .endif .endif .if ${MACHINE_CPUARCH} == "i386" # XXX some of these can move to the general case when de-i386'ed # XXX some of these can move now, but are untested on other architectures. _3dfx= 3dfx _3dfx_linux= 3dfx_linux _aic= aic _apm= apm _arcnet= arcnet .if ${MK_SOURCELESS_UCODE} != "no" _ce= ce .endif _coff= coff .if ${MK_SOURCELESS_UCODE} != "no" _cp= cp .endif _elink= elink _glxiic= glxiic _glxsb= glxsb #_ibcs2= ibcs2 _ie= ie _mse= mse _ncr= ncr _ncv= ncv _nsp= nsp _pcfclock= pcfclock _pst= pst _sbni= sbni _streams= streams _stg= stg _svr4= svr4 _wds= wds .if ${MACHINE} == "i386" .if ${MK_EISA} != "no" _ahb= ahb .endif _bios= bios _cm= cm .if ${MK_SOURCELESS_UCODE} != "no" _ctau= ctau .endif _dpt= dpt _ex= ex _wl= wl .elif ${MACHINE} == "pc98" _canbepm= canbepm _canbus= canbus _ct= ct _pmc= pmc _snc= snc .endif .endif .if ${MACHINE_CPUARCH} == "arm" _cfi= cfi _cpsw= cpsw .endif .if ${MACHINE_CPUARCH} == "powerpc" _agp= agp _an= an _bm= bm _cardbus= cardbus _cbb= cbb _cfi= cfi _cpufreq= cpufreq _drm= drm _exca= exca _nvram= powermac_nvram _pccard= pccard _wi= wi .endif .if ${MACHINE_ARCH} == "powerpc64" _drm2= drm2 .endif .if ${MACHINE_CPUARCH} == "sparc64" _auxio= auxio _em= em _epic= epic _igb= igb .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" _cloudabi64= cloudabi64 .endif .endif SUBDIR+=${MODULES_EXTRA} .for reject in ${WITHOUT_MODULES} SUBDIR:= ${SUBDIR:N${reject}} .endfor # Calling kldxref(8) for each module is expensive. .if !defined(NO_XREF) .MAKEFLAGS+= -DNO_XREF afterinstall: @if type kldxref >/dev/null 2>&1; then \ ${ECHO} kldxref ${DESTDIR}${KMODDIR}; \ kldxref ${DESTDIR}${KMODDIR}; \ fi .endif .include "${SYSDIR}/conf/config.mk" # Use sys/conf/kmod.mk's MPATH to avoid redundantly running in every subdir. .if !defined(__MPATH) __MPATH!=find ${SYSDIR:tA}/ -name \*_if.m .export __MPATH .endif SUBDIR:= ${SUBDIR:u:O} .include diff --git a/sys/net/route.c b/sys/net/route.c index 86f99ee93df9..11a5b8da8483 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1,2188 +1,2331 @@ /*- * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ /************************************************************************ * Note: In this file a 'fib' is a "forwarding information base" * * Which is the new name for an in kernel routing (next hop) table. * ***********************************************************************/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include "opt_sctp.h" #include "opt_mrouting.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #define RT_MAXFIBS UINT16_MAX /* Kernel config default option. */ #ifdef ROUTETABLES #if ROUTETABLES <= 0 #error "ROUTETABLES defined too low" #endif #if ROUTETABLES > RT_MAXFIBS #error "ROUTETABLES defined too big" #endif #define RT_NUMFIBS ROUTETABLES #endif /* ROUTETABLES */ /* Initialize to default if not otherwise set. */ #ifndef RT_NUMFIBS #define RT_NUMFIBS 1 #endif #if defined(INET) || defined(INET6) #ifdef SCTP extern void sctp_addr_change(struct ifaddr *ifa, int cmd); #endif /* SCTP */ #endif /* This is read-only.. */ u_int rt_numfibs = RT_NUMFIBS; SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); /* * By default add routes to all fibs for new interfaces. * Once this is set to 0 then only allocate routes on interface * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. * XXX also has the problems getting the FIB from curthread which will not * always work given the fib can be overridden and prefixes can be added * from the network stack context. */ VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1; SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(rt_add_addr_allfibs), 0, ""); VNET_DEFINE(struct rtstat, rtstat); #define V_rtstat VNET(rtstat) VNET_DEFINE(struct radix_node_head *, rt_tables); #define V_rt_tables VNET(rt_tables) VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ #define V_rttrash VNET(rttrash) /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. * The operation can be done safely (in this code) because a * 'struct rtentry' starts with two 'struct radix_node''s, the first * one representing leaf nodes in the routing tree, which is * what the code in radix.c passes us as a 'struct radix_node'. * * But because there are a lot of assumptions in this conversion, * do not cast explicitly, but always use the macro below. */ #define RNTORT(p) ((struct rtentry *)(p)) static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) static int rtrequest1_fib_change(struct radix_node_head *, struct rt_addrinfo *, struct rtentry **, u_int); static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); static int rt_ifdelroute(const struct rtentry *rt, void *arg); static struct rtentry *rt_unlinkrte(struct radix_node_head *rnh, struct rt_addrinfo *info, int *perror); static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); #ifdef RADIX_MPATH static struct radix_node *rt_mpath_unlink(struct radix_node_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror); #endif +static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, + int flags); struct if_mtuinfo { struct ifnet *ifp; int mtu; }; static int if_updatemtu_cb(struct radix_node *, void *); /* * handler for net.my_fibnum */ static int sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { int fibnum; int error; fibnum = curthread->td_proc->p_fibnum; error = sysctl_handle_int(oidp, &fibnum, 0, req); return (error); } SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static __inline struct radix_node_head ** rt_tables_get_rnh_ptr(int table, int fam) { struct radix_node_head **rnh; KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", __func__)); KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", __func__)); /* rnh is [fib=0][af=0]. */ rnh = (struct radix_node_head **)V_rt_tables; /* Get the offset to the requested table and fam. */ rnh += table * (AF_MAX+1) + fam; return (rnh); } struct radix_node_head * rt_tables_get_rnh(int table, int fam) { return (*rt_tables_get_rnh_ptr(table, fam)); } /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ static void route_init(void) { /* whack the tunable ints into line. */ if (rt_numfibs > RT_MAXFIBS) rt_numfibs = RT_MAXFIBS; if (rt_numfibs == 0) rt_numfibs = 1; } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); static int rtentry_zinit(void *mem, int size, int how) { struct rtentry *rt = mem; rt->rt_pksent = counter_u64_alloc(how); if (rt->rt_pksent == NULL) return (ENOMEM); RT_LOCK_INIT(rt); return (0); } static void rtentry_zfini(void *mem, int size) { struct rtentry *rt = mem; RT_LOCK_DESTROY(rt); counter_u64_free(rt->rt_pksent); } static int rtentry_ctor(void *mem, int size, void *arg, int how) { struct rtentry *rt = mem; bzero(rt, offsetof(struct rtentry, rt_endzero)); counter_u64_zero(rt->rt_pksent); rt->rt_chain = NULL; return (0); } static void rtentry_dtor(void *mem, int size, void *arg) { struct rtentry *rt = mem; RT_UNLOCK_COND(rt); } static void vnet_route_init(const void *unused __unused) { struct domain *dom; struct radix_node_head **rnh; int table; int fam; V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), rtentry_ctor, rtentry_dtor, rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtattach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtattach((void **)rnh, 0); } } } VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, vnet_route_init, 0); #ifdef VIMAGE static void vnet_route_uninit(const void *unused __unused) { int table; int fam; struct domain *dom; struct radix_node_head **rnh; for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtdetach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtdetach((void **)rnh, 0); } } free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_route_uninit, 0); #endif #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; }; #endif int sys_setfib(struct thread *td, struct setfib_args *uap) { if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) return EINVAL; td->td_proc->p_fibnum = uap->fibnum; return (0); } /* * Packet routing routines. */ void rtalloc(struct route *ro) { rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); } void rtalloc_fib(struct route *ro, u_int fibnum) { rtalloc_ign_fib(ro, 0UL, fibnum); } void rtalloc_ign(struct route *ro, u_long ignore) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } void rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. * * The returned route, if any, is locked. */ struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); } struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { struct radix_node_head *rnh; struct radix_node *rn; struct rtentry *newrt; struct rt_addrinfo info; int err = 0, msgtype = RTM_MISS; int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); rnh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; if (rnh == NULL) goto miss; /* * Look up the address in the table for that Address Family */ needlock = !(ignflags & RTF_RNH_LOCKED); if (needlock) RADIX_NODE_HEAD_RLOCK(rnh); #ifdef INVARIANTS else RADIX_NODE_HEAD_LOCK_ASSERT(rnh); #endif rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { newrt = RNTORT(rn); RT_LOCK(newrt); RT_ADDREF(newrt); if (needlock) RADIX_NODE_HEAD_RUNLOCK(rnh); goto done; } else if (needlock) RADIX_NODE_HEAD_RUNLOCK(rnh); /* * Either we hit the root or couldn't find any match, * Which basically means * "caint get there frm here" */ miss: V_rtstat.rts_unreach++; if (report) { /* * If required, report the failure to the supervising * Authorities. * For a delete, this is not an error. (report == 0) */ bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; rt_missmsg_fib(msgtype, &info, 0, err, fibnum); } done: if (newrt) RT_LOCK_ASSERT(newrt); return (newrt); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void rtfree(struct rtentry *rt) { struct radix_node_head *rnh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); RT_LOCK_ASSERT(rt); /* * The callers should use RTFREE_LOCKED() or RTFREE(), so * we should come here exactly with the last reference. */ RT_REMREF(rt); if (rt->rt_refcnt > 0) { log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); goto done; } /* * On last reference give the "close method" a chance * to cleanup private state. This also permits (for * IPv4 and IPv6) a chance to decide if the routing table * entry should be purged immediately or at a later time. * When an immediate purge is to happen the close routine * typically calls rtexpunge which clears the RTF_UP flag * on the entry so that the code below reclaims the storage. */ if (rt->rt_refcnt == 0 && rnh->rnh_close) rnh->rnh_close((struct radix_node *)rt, rnh); /* * If we are no longer "up" (and ref == 0) * then we can free the resources associated * with the route. */ if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); /* * the rtentry must have been removed from the routing table * so it is represented in rttrash.. remove that now. */ V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ if (rt->rt_ifa) ifa_free(rt->rt_ifa); /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ R_Free(rt_key(rt)); /* * and the rtentry itself of course */ uma_zfree(V_rtzone, rt); return; } done: RT_UNLOCK(rt); } /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. */ void rtredirect(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src) { rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); } void rtredirect_fib(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src, u_int fibnum) { struct rtentry *rt, *rt0 = NULL; int error = 0; short *stat = NULL; struct rt_addrinfo info; struct ifaddr *ifa; struct radix_node_head *rnh; ifa = NULL; rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) { error = EAFNOSUPPORT; goto out; } /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) { error = ENETUNREACH; goto out; } rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ if (!(flags & RTF_DONE) && rt) { if (!sa_equal(src, rt->rt_gateway)) { error = EINVAL; goto done; } if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) { error = EINVAL; goto done; } } if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) { error = EHOSTUNREACH; goto done; } /* * Create a new entry if we just got back a wildcard entry * or the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: rt0 = rt; rt = NULL; flags |= RTF_DYNAMIC; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; if (rt0 != NULL) RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); if (rt0 != NULL) EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); flags = rt->rt_flags; } if (rt0 != NULL) RTFREE(rt0); stat = &V_rtstat.rts_dynamic; } else { struct rtentry *gwrt; /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ if ((flags & RTF_GATEWAY) == 0) rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &V_rtstat.rts_newgateway; /* * add the key and gateway (in one malloc'd chunk). */ RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); rt_setgate(rt, rt_key(rt), gateway); gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); RADIX_NODE_HEAD_UNLOCK(rnh); EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); if (gwrt) RTFREE_LOCKED(gwrt); } } else error = EHOSTUNREACH; done: if (rt) RTFREE_LOCKED(rt); out: if (error) V_rtstat.rts_badredirect++; else if (stat != NULL) (*stat)++; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); if (ifa != NULL) ifa_free(ifa); } int rtioctl(u_long req, caddr_t data) { return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); } /* * Routing table ioctl interface. */ int rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* * If more ioctl commands are added here, make sure the proper * super-user checks are being performed because it is possible for * prison-root to make it this far if raw sockets have been enabled * in jails. */ #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } struct ifaddr * ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, u_int fibnum) { struct ifaddr *ifa; int not_found = 0; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst, fibnum); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway, fibnum); } if (ifa == NULL) ifa = ifa_ifwithnet(gateway, 0, fibnum); if (ifa == NULL) { struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); if (rt == NULL) return (NULL); /* * dismiss a gateway that is reachable only * through the default router */ switch (gateway->sa_family) { case AF_INET: if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) not_found = 1; break; case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) not_found = 1; break; default: break; } if (!not_found && rt->rt_ifa != NULL) { ifa = rt->rt_ifa; ifa_ref(ifa); } RT_REMREF(rt); RT_UNLOCK(rt); if (not_found || ifa == NULL) return (NULL); } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; else ifa_free(oifa); } return (ifa); } /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) { return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, RT_DEFAULT_FIB)); } int rtrequest_fib(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, u_int fibnum) { struct rt_addrinfo info; if (dst->sa_len == 0) return(EINVAL); bzero((caddr_t)&info, sizeof(info)); info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; return rtrequest1_fib(req, &info, ret_nrt, fibnum); } +/* + * Copy most of @rt data into @info. + * + * If @flags contains NHR_COPY, copies dst,netmask and gw to the + * pointers specified by @info structure. Assume such pointers + * are zeroed sockaddr-like structures with sa_len field initialized + * to reflect size of the provided buffer. if no NHR_COPY is specified, + * point dst,netmask and gw @info fields to appropriate @rt values. + * + * if @flags contains NHR_REF, do refcouting on rt_ifp. + * + * Returns 0 on success. + */ +int +rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags) +{ + struct rt_metrics *rmx; + struct sockaddr *src, *dst; + int sa_len; + + if (flags & NHR_COPY) { + /* Copy destination if dst is non-zero */ + src = rt_key(rt); + dst = info->rti_info[RTAX_DST]; + sa_len = src->sa_len; + if (src != NULL && dst != NULL) { + if (src->sa_len > dst->sa_len) + return (ENOMEM); + memcpy(dst, src, src->sa_len); + info->rti_addrs |= RTA_DST; + } + + /* Copy mask if set && dst is non-zero */ + src = rt_mask(rt); + dst = info->rti_info[RTAX_NETMASK]; + if (src != NULL && dst != NULL) { + + /* + * Radix stores different value in sa_len, + * assume rt_mask() to have the same length + * as rt_key() + */ + if (sa_len > dst->sa_len) + return (ENOMEM); + memcpy(dst, src, src->sa_len); + info->rti_addrs |= RTA_NETMASK; + } + + /* Copy gateway is set && dst is non-zero */ + src = rt->rt_gateway; + dst = info->rti_info[RTAX_GATEWAY]; + if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){ + if (src->sa_len > dst->sa_len) + return (ENOMEM); + memcpy(dst, src, src->sa_len); + info->rti_addrs |= RTA_GATEWAY; + } + } else { + info->rti_info[RTAX_DST] = rt_key(rt); + info->rti_addrs |= RTA_DST; + if (rt_mask(rt) != NULL) { + info->rti_info[RTAX_NETMASK] = rt_mask(rt); + info->rti_addrs |= RTA_NETMASK; + } + if (rt->rt_flags & RTF_GATEWAY) { + info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info->rti_addrs |= RTA_GATEWAY; + } + } + + rmx = info->rti_rmx; + if (rmx != NULL) { + info->rti_mflags |= RTV_MTU; + rmx->rmx_mtu = rt->rt_mtu; + } + + info->rti_flags = rt->rt_flags; + info->rti_ifp = rt->rt_ifp; + info->rti_ifa = rt->rt_ifa; + + if (flags & NHR_REF) { + /* Do 'traditional' refcouting */ + if_ref(info->rti_ifp); + } + + return (0); +} + +/* + * Lookups up route entry for @dst in RIB database for fib @fibnum. + * Exports entry data to @info using rt_exportinfo(). + * + * if @flags contains NHR_REF, refcouting is performed on rt_ifp. + * All references can be released later by calling rib_free_info() + * + * Returns 0 on success. + * Returns ENOENT for lookup failure, ENOMEM for export failure. + */ +int +rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, + uint32_t flowid, struct rt_addrinfo *info) +{ + struct radix_node_head *rh; + struct radix_node *rn; + struct rtentry *rt; + int error; + + KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, dst->sa_family); + if (rh == NULL) + return (ENOENT); + + RADIX_NODE_HEAD_RLOCK(rh); + rn = rh->rnh_matchaddr(__DECONST(void *, dst), rh); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rt = RNTORT(rn); + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(rt->rt_ifp)) { + flags = (flags & NHR_REF) | NHR_COPY; + error = rt_exportinfo(rt, info, flags); + RADIX_NODE_HEAD_RUNLOCK(rh); + + return (error); + } + } + RADIX_NODE_HEAD_RUNLOCK(rh); + + return (ENOENT); +} + +/* + * Releases all references acquired by rib_lookup_info() when + * called with NHR_REF flags. + */ +void +rib_free_info(struct rt_addrinfo *info) +{ + + if_rele(info->rti_ifp); +} + /* * Iterates over all existing fibs in system calling * @setwa_f function prior to traversing each fib. * Calls @wa_f function for each element in current fib. * If af is not AF_UNSPEC, iterates over fibs in particular * address family. */ void rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f, void *arg) { struct radix_node_head *rnh; uint32_t fibnum; int i; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { rnh = rt_tables_get_rnh(fibnum, af); if (rnh == NULL) continue; if (setwa_f != NULL) setwa_f(rnh, fibnum, af, arg); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, (walktree_f_t *)wa_f, arg); RADIX_NODE_HEAD_UNLOCK(rnh); continue; } for (i = 1; i <= AF_MAX; i++) { rnh = rt_tables_get_rnh(fibnum, i); if (rnh == NULL) continue; if (setwa_f != NULL) setwa_f(rnh, fibnum, i, arg); RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, (walktree_f_t *)wa_f, arg); RADIX_NODE_HEAD_UNLOCK(rnh); } } } struct rt_delinfo { struct rt_addrinfo info; struct radix_node_head *rnh; struct rtentry *head; }; /* * Conditionally unlinks @rn from radix tree based * on info data passed in @arg. */ static int rt_checkdelroute(struct radix_node *rn, void *arg) { struct rt_delinfo *di; struct rt_addrinfo *info; struct rtentry *rt; int error; di = (struct rt_delinfo *)arg; rt = (struct rtentry *)rn; info = &di->info; error = 0; info->rti_info[RTAX_DST] = rt_key(rt); info->rti_info[RTAX_NETMASK] = rt_mask(rt); info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; rt = rt_unlinkrte(di->rnh, info, &error); if (rt == NULL) { /* Either not allowed or not matched. Skip entry */ return (0); } /* Entry was unlinked. Add to the list and return */ rt->rt_chain = di->head; di->head = rt; return (0); } /* * Iterates over all existing fibs in system. * Deletes each element for which @filter_f function returned * non-zero value. * If @af is not AF_UNSPEC, iterates over fibs in particular * address family. */ void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg) { struct radix_node_head *rnh; struct rt_delinfo di; struct rtentry *rt; uint32_t fibnum; int i, start, end; bzero(&di, sizeof(di)); di.info.rti_filter = filter_f; di.info.rti_filterdata = arg; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { start = af; end = af; } else { start = 1; end = AF_MAX; } for (i = start; i <= end; i++) { rnh = rt_tables_get_rnh(fibnum, i); if (rnh == NULL) continue; di.rnh = rnh; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, rt_checkdelroute, &di); RADIX_NODE_HEAD_UNLOCK(rnh); if (di.head == NULL) continue; /* We might have something to reclaim */ while (di.head != NULL) { rt = di.head; di.head = rt->rt_chain; rt->rt_chain = NULL; /* TODO std rt -> rt_addrinfo export */ di.info.rti_info[RTAX_DST] = rt_key(rt); di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_notifydelete(rt, &di.info); RTFREE_LOCKED(rt); } } } } /* * Delete Routes for a Network Interface * * Called for each routing entry via the rnh->rnh_walktree() call above * to delete all route entries referencing a detaching network interface. * * Arguments: * rt pointer to rtentry * arg argument passed to rnh->rnh_walktree() - detaching interface * * Returns: * 0 successful * errno failed - reason indicated */ static int rt_ifdelroute(const struct rtentry *rt, void *arg) { struct ifnet *ifp = arg; if (rt->rt_ifp != ifp) return (0); /* * Protect (sorta) against walktree recursion problems * with cloned routes */ if ((rt->rt_flags & RTF_UP) == 0) return (0); return (1); } /* * Delete all remaining routes using this interface * Unfortuneatly the only way to do this is to slog through * the entire routing table looking for routes which point * to this interface...oh well... */ void rt_flushifroutes(struct ifnet *ifp) { rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); } /* * Conditionally unlinks rtentry matching data inside @info from @rnh. * Returns unlinked, locked and referenced @rtentry on success, * Returns NULL and sets @perror to: * ESRCH - if prefix was not found, * EADDRINUSE - if trying to delete PINNED route without appropriate flag. * ENOENT - if supplied filter function returned 0 (not matched). */ static struct rtentry * rt_unlinkrte(struct radix_node_head *rnh, struct rt_addrinfo *info, int *perror) { struct sockaddr *dst, *netmask; struct rtentry *rt; struct radix_node *rn; dst = info->rti_info[RTAX_DST]; netmask = info->rti_info[RTAX_NETMASK]; rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, rnh); if (rt == NULL) { *perror = ESRCH; return (NULL); } if ((info->rti_flags & RTF_PINNED) == 0) { /* Check if target route can be deleted */ if (rt->rt_flags & RTF_PINNED) { *perror = EADDRINUSE; return (NULL); } } if (info->rti_filter != NULL) { if (info->rti_filter(rt, info->rti_filterdata) == 0) { /* Not matched */ *perror = ENOENT; return (NULL); } /* * Filter function requested rte deletion. * Ease the caller work by filling in remaining info * from that particular entry. */ info->rti_info[RTAX_GATEWAY] = rt->rt_gateway; } /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. */ *perror = ESRCH; #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) rn = rt_mpath_unlink(rnh, info, rt, perror); else #endif rn = rnh->rnh_deladdr(dst, netmask, rnh); if (rn == NULL) return (NULL); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); rt = RNTORT(rn); RT_LOCK(rt); RT_ADDREF(rt); rt->rt_flags &= ~RTF_UP; *perror = 0; return (rt); } static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa; /* * give the protocol a chance to keep things in sync. */ ifa = rt->rt_ifa; if (ifa != NULL && ifa->ifa_rtrequest != NULL) ifa->ifa_rtrequest(RTM_DELETE, rt, info); /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented * when RTFREE(rt) is eventually called. */ V_rttrash++; } /* * These (questionable) definitions of apparent local variables apply * to the next two functions. XXXXXX!!! */ #define dst info->rti_info[RTAX_DST] #define gateway info->rti_info[RTAX_GATEWAY] #define netmask info->rti_info[RTAX_NETMASK] #define ifaaddr info->rti_info[RTAX_IFA] #define ifpaddr info->rti_info[RTAX_IFP] #define flags info->rti_flags /* * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, * it will be referenced so the caller must free it. */ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { struct ifaddr *ifa; int error = 0; /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ if (info->rti_ifp == NULL && ifpaddr != NULL && ifpaddr->sa_family == AF_LINK && (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) { info->rti_ifp = ifa->ifa_ifp; ifa_free(ifa); } if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { struct sockaddr *sa; sa = ifaaddr != NULL ? ifaaddr : (gateway != NULL ? gateway : dst); if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute(flags, dst, gateway, fibnum); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute(flags, sa, sa, fibnum); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = ifa->ifa_ifp; } else error = ENETUNREACH; return (error); } static int if_updatemtu_cb(struct radix_node *rn, void *arg) { struct rtentry *rt; struct if_mtuinfo *ifmtu; rt = (struct rtentry *)rn; ifmtu = (struct if_mtuinfo *)arg; if (rt->rt_ifp != ifmtu->ifp) return (0); if (rt->rt_mtu >= ifmtu->mtu) { /* We have to decrease mtu regardless of flags */ rt->rt_mtu = ifmtu->mtu; return (0); } /* * New MTU is bigger. Check if are allowed to alter it */ if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) { /* * Skip routes with user-supplied MTU and * non-interface routes */ return (0); } /* We are safe to update route MTU */ rt->rt_mtu = ifmtu->mtu; return (0); } void rt_updatemtu(struct ifnet *ifp) { struct if_mtuinfo ifmtu; struct radix_node_head *rnh; int i, j; ifmtu.ifp = ifp; /* * Try to update rt_mtu for all routes using this interface * Unfortunately the only way to do this is to traverse all * routing tables in all fibs/domains. */ for (i = 1; i <= AF_MAX; i++) { ifmtu.mtu = if_getmtu_family(ifp, i); for (j = 0; j < rt_numfibs; j++) { rnh = rt_tables_get_rnh(j, i); if (rnh == NULL) continue; RADIX_NODE_HEAD_LOCK(rnh); rnh->rnh_walktree(rnh, if_updatemtu_cb, &ifmtu); RADIX_NODE_HEAD_UNLOCK(rnh); } } } #if 0 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); int rt_print(char *buf, int buflen, struct rtentry *rt); int p_sockaddr(char *buf, int buflen, struct sockaddr *s) { void *paddr = NULL; switch (s->sa_family) { case AF_INET: paddr = &((struct sockaddr_in *)s)->sin_addr; break; case AF_INET6: paddr = &((struct sockaddr_in6 *)s)->sin6_addr; break; } if (paddr == NULL) return (0); if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) return (0); return (strlen(buf)); } int rt_print(char *buf, int buflen, struct rtentry *rt) { struct sockaddr *addr, *mask; int i = 0; addr = rt_key(rt); mask = rt_mask(rt); i = p_sockaddr(buf, buflen, addr); if (!(rt->rt_flags & RTF_HOST)) { buf[i++] = '/'; i += p_sockaddr(buf + i, buflen - i, mask); } if (rt->rt_flags & RTF_GATEWAY) { buf[i++] = '>'; i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway); } return (i); } #endif #ifdef RADIX_MPATH /* * Deletes key for single-path routes, unlinks rtentry with * gateway specified in @info from multi-path routes. * * Returnes unlinked entry. In case of failure, returns NULL * and sets @perror to ESRCH. */ static struct radix_node * rt_mpath_unlink(struct radix_node_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror) { /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ struct rtentry *rt; // *rto = NULL; struct radix_node *rn; struct sockaddr *gw; gw = info->rti_info[RTAX_GATEWAY]; rt = rt_mpath_matchgate(rto, gw); if (rt == NULL) { *perror = ESRCH; return (NULL); } /* * this is the first entry in the chain */ if (rto == rt) { rn = rn_mpath_next((struct radix_node *)rt); /* * there is another entry, now it's active */ if (rn) { rto = RNTORT(rn); RT_LOCK(rto); rto->rt_flags |= RTF_UP; RT_UNLOCK(rto); } else if (rt->rt_flags & RTF_GATEWAY) { /* * For gateway routes, we need to * make sure that we we are deleting * the correct gateway. * rt_mpath_matchgate() does not * check the case when there is only * one route in the chain. */ if (gw && (rt->rt_gateway->sa_len != gw->sa_len || memcmp(rt->rt_gateway, gw, gw->sa_len))) { *perror = ESRCH; return (NULL); } } /* * use the normal delete code to remove * the first entry */ rn = rnh->rnh_deladdr(dst, netmask, rnh); *perror = 0; return (rn); } /* * if the entry is 2nd and on up */ if (rt_mpath_deldup(rto, rt) == 0) panic ("rtrequest1: rt_mpath_deldup"); *perror = 0; rn = (struct radix_node *)rt; return (rn); } #endif #ifdef FLOWTABLE static struct rtentry * rt_flowtable_check_route(struct radix_node_head *rnh, struct rt_addrinfo *info) { #if defined(INET6) || defined(INET) struct radix_node *rn; #endif struct rtentry *rt0; rt0 = NULL; /* "flow-table" only supports IPv6 and IPv4 at the moment. */ switch (dst->sa_family) { #ifdef INET6 case AF_INET6: #endif #ifdef INET case AF_INET: #endif #if defined(INET6) || defined(INET) rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { struct sockaddr *mask; u_char *m, *n; int len; /* * compare mask to see if the new route is * more specific than the existing one */ rt0 = RNTORT(rn); RT_LOCK(rt0); RT_ADDREF(rt0); RT_UNLOCK(rt0); /* * A host route is already present, so * leave the flow-table entries as is. */ if (rt0->rt_flags & RTF_HOST) { RTFREE(rt0); rt0 = NULL; } else if (!(flags & RTF_HOST) && netmask) { mask = rt_mask(rt0); len = mask->sa_len; m = (u_char *)mask; n = (u_char *)netmask; while (len-- > 0) { if (*n != *m) break; n++; m++; } if (len == 0 || (*n < *m)) { RTFREE(rt0); rt0 = NULL; } } } #endif/* INET6 || INET */ } return (rt0); } #endif int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { int error = 0, needlock = 0; struct rtentry *rt, *rt_old; #ifdef FLOWTABLE struct rtentry *rt0; #endif struct radix_node *rn; struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_storage mdst; #define senderr(x) { error = x ; goto bad; } KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } /* * Find the correct routing tree to use for this Address Family */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) return (EAFNOSUPPORT); needlock = ((flags & RTF_RNH_LOCKED) == 0); flags &= ~RTF_RNH_LOCKED; if (needlock) RADIX_NODE_HEAD_LOCK(rnh); else RADIX_NODE_HEAD_LOCK_ASSERT(rnh); /* * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ if (flags & RTF_HOST) netmask = NULL; switch (req) { case RTM_DELETE: if (netmask) { rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); dst = (struct sockaddr *)&mdst; } rt = rt_unlinkrte(rnh, info, &error); if (error != 0) goto bad; rt_notifydelete(rt, info); /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) { *ret_nrt = rt; RT_UNLOCK(rt); } else RTFREE_LOCKED(rt); break; case RTM_RESOLVE: /* * resolve was only used for route cloning * here for compat */ break; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) senderr(EINVAL); if (dst && gateway && (dst->sa_family != gateway->sa_family) && (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) senderr(EINVAL); if (info->rti_ifa == NULL) { error = rt_getifa_fib(info, fibnum); if (error) senderr(error); } else ifa_ref(info->rti_ifa); ifa = info->rti_ifa; rt = uma_zalloc(V_rtzone, M_NOWAIT); if (rt == NULL) { ifa_free(ifa); senderr(ENOBUFS); } rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it. */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { ifa_free(ifa); uma_zfree(V_rtzone, rt); senderr(error); } /* * point to the (possibly newly malloc'd) dest address. */ ndst = (struct sockaddr *)rt_key(rt); /* * make sure it contains the value we want (masked if needed). */ if (netmask) { rt_maskedcopy(dst, ndst, netmask); } else bcopy(dst, ndst, dst->sa_len); /* * We use the ifa reference returned by rt_getifa_fib(). * This moved from below so that rnh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; rt->rt_weight = 1; rt_setmetrics(info, rt); #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rnh) && rt_mpath_conflict(rnh, rt, netmask)) { ifa_free(rt->rt_ifa); R_Free(rt_key(rt)); uma_zfree(V_rtzone, rt); senderr(EEXIST); } #endif #ifdef FLOWTABLE rt0 = rt_flowtable_check_route(rnh, info); #endif /* FLOWTABLE */ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); rt_old = NULL; if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { /* * Force removal and re-try addition * TODO: better multipath&pinned support */ struct sockaddr *info_dst = info->rti_info[RTAX_DST]; info->rti_info[RTAX_DST] = ndst; /* Do not delete existing PINNED(interface) routes */ info->rti_flags &= ~RTF_PINNED; rt_old = rt_unlinkrte(rnh, info, &error); info->rti_flags |= RTF_PINNED; info->rti_info[RTAX_DST] = info_dst; if (rt_old != NULL) rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); } if (rt_old != NULL) RT_UNLOCK(rt_old); /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == NULL) { ifa_free(rt->rt_ifa); R_Free(rt_key(rt)); uma_zfree(V_rtzone, rt); #ifdef FLOWTABLE if (rt0 != NULL) RTFREE(rt0); #endif senderr(EEXIST); } #ifdef FLOWTABLE else if (rt0 != NULL) { flowtable_route_flush(dst->sa_family, rt0); RTFREE(rt0); } #endif if (rt_old != NULL) { rt_notifydelete(rt_old, info); RTFREE(rt_old); } /* * If this protocol has something to add to this then * allow it to do that as well. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); /* * actually return a resultant rtentry and * give the caller a single reference. */ if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } RT_UNLOCK(rt); break; case RTM_CHANGE: error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum); break; default: error = EOPNOTSUPP; } bad: if (needlock) RADIX_NODE_HEAD_UNLOCK(rnh); return (error); #undef senderr } #undef dst #undef gateway #undef netmask #undef ifaaddr #undef ifpaddr #undef flags static int rtrequest1_fib_change(struct radix_node_head *rnh, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { struct rtentry *rt = NULL; int error = 0; int free_ifa = 0; int family, mtu; struct if_mtuinfo ifmtu; rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh); if (rt == NULL) return (ESRCH); #ifdef RADIX_MPATH /* * If we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. */ if (rn_mpath_capable(rnh)) { rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); if (rt == NULL) return (ESRCH); } #endif RT_LOCK(rt); rt_setmetrics(info, rt); /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info->rti_info[RTAX_GATEWAY] != NULL) || info->rti_info[RTAX_IFP] != NULL || (info->rti_info[RTAX_IFA] != NULL && !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { error = rt_getifa_fib(info, fibnum); if (info->rti_ifa != NULL) free_ifa = 1; if (error != 0) goto bad; } /* Check if outgoing interface has changed */ if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa && rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) { rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info); ifa_free(rt->rt_ifa); } /* Update gateway address */ if (info->rti_info[RTAX_GATEWAY] != NULL) { error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]); if (error != 0) goto bad; rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= (RTF_GATEWAY & info->rti_flags); } if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) { ifa_ref(info->rti_ifa); rt->rt_ifa = info->rti_ifa; rt->rt_ifp = info->rti_ifp; } /* Allow some flags to be toggled on change. */ rt->rt_flags &= ~RTF_FMASK; rt->rt_flags |= info->rti_flags & RTF_FMASK; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info); /* Alter route MTU if necessary */ if (rt->rt_ifp != NULL) { family = info->rti_info[RTAX_DST]->sa_family; mtu = if_getmtu_family(rt->rt_ifp, family); /* Set default MTU */ if (rt->rt_mtu == 0) rt->rt_mtu = mtu; if (rt->rt_mtu != mtu) { /* Check if we really need to update */ ifmtu.ifp = rt->rt_ifp; ifmtu.mtu = mtu; if_updatemtu_cb(rt->rt_nodes, &ifmtu); } } if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } bad: RT_UNLOCK(rt); if (free_ifa != 0) ifa_free(info->rti_ifa); return (error); } static void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) { if (info->rti_mflags & RTV_MTU) { if (info->rti_rmx->rmx_mtu != 0) { /* * MTU was explicitly provided by user. * Keep it. */ rt->rt_flags |= RTF_FIXEDMTU; } else { /* * User explicitly sets MTU to 0. * Assume rollback to default. */ rt->rt_flags &= ~RTF_FIXEDMTU; } rt->rt_mtu = info->rti_rmx->rmx_mtu; } if (info->rti_mflags & RTV_WEIGHT) rt->rt_weight = info->rti_rmx->rmx_weight; /* Kernel -> userland timebase conversion. */ if (info->rti_mflags & RTV_EXPIRE) rt->rt_expire = info->rti_rmx->rmx_expire ? info->rti_rmx->rmx_expire - time_second + time_uptime : 0; } int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { /* XXX dst may be overwritten, can we move this to below */ int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); #ifdef INVARIANTS struct radix_node_head *rnh; rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); #endif RT_LOCK_ASSERT(rt); RADIX_NODE_HEAD_LOCK_ASSERT(rnh); /* * Prepare to store the gateway in rt->rt_gateway. * Both dst and gateway are stored one after the other in the same * malloc'd chunk. If we have room, we can reuse the old buffer, * rt_gateway already points to the right place. * Otherwise, malloc a new block and update the 'dst' address. */ if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { caddr_t new; R_Malloc(new, caddr_t, dlen + glen); if (new == NULL) return ENOBUFS; /* * XXX note, we copy from *dst and not *rt_key(rt) because * rt_setgate() can be called to initialize a newly * allocated route entry, in which case rt_key(rt) == NULL * (and also rt->rt_gateway == NULL). * Free()/free() handle a NULL argument just fine. */ bcopy(dst, new, dlen); R_Free(rt_key(rt)); /* free old block, if any */ rt_key(rt) = (struct sockaddr *)new; rt->rt_gateway = (struct sockaddr *)(new + dlen); } /* * Copy the new gateway value into the memory chunk. */ bcopy(gate, rt->rt_gateway, glen); return (0); } void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { u_char *cp1 = (u_char *)src; u_char *cp2 = (u_char *)dst; u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; struct rt_addrinfo info; int error = 0; int startfib, endfib; char tempbuf[_SOCKADDR_TMPSIZE]; int didwork = 0; int a_failure = 0; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct radix_node_head *rnh; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; netmask = NULL; } else { dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } if (dst->sa_len == 0) return(EINVAL); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } if (fibnum == RT_ALL_FIBS) { if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) startfib = endfib = ifa->ifa_ifp->if_fib; else { startfib = 0; endfib = rt_numfibs - 1; } } else { KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); startfib = fibnum; endfib = fibnum; } /* * If it's a delete, check that if it exists, * it's on the correct interface or we might scrub * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) * XXX this is kinda inet specific.. */ if (netmask != NULL) { rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); dst = (struct sockaddr *)tempbuf; } } /* * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { struct radix_node *rn; /* * Look up an rtentry that is in the routing tree and * contains the correct info. */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) /* this table doesn't exist but others might */ continue; RADIX_NODE_HEAD_RLOCK(rnh); rn = rnh->rnh_lookup(dst, netmask, rnh); #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) { if (rn == NULL) error = ESRCH; else { rt = RNTORT(rn); /* * for interface route the * rt->rt_gateway is sockaddr_intf * for cloning ARP entries, so * rt_mpath_matchgate must use the * interface address */ rt = rt_mpath_matchgate(rt, ifa->ifa_addr); if (rt == NULL) error = ESRCH; } } #endif error = (rn == NULL || (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_ifa != ifa); RADIX_NODE_HEAD_RUNLOCK(rnh); if (error) { /* this is only an error if bad on ALL tables */ continue; } } /* * Do the actual request */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; info.rti_info[RTAX_DST] = dst; /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ RT_LOCK(rt); #ifdef RADIX_MPATH /* * in case address alias finds the first address * e.g. ifconfig bge0 192.0.2.246/24 * e.g. ifconfig bge0 192.0.2.247/24 * the address set in the route is 192.0.2.246 * so we need to replace it with 192.0.2.247 */ if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { ifa_free(rt->rt_ifa); ifa_ref(ifa); rt->rt_ifp = ifa->ifa_ifp; rt->rt_ifa = ifa; } #endif /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) { ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = rt->rt_ifp->if_type; ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = rt->rt_ifp->if_index; } RT_ADDREF(rt); RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); RT_LOCK(rt); RT_REMREF(rt); if (cmd == RTM_DELETE) { /* * If we are deleting, and we found an entry, * then it's been removed from the tree.. * now throw it away. */ RTFREE_LOCKED(rt); } else { if (cmd == RTM_ADD) { /* * We just wanted to add it.. * we don't actually need a reference. */ RT_REMREF(rt); } RT_UNLOCK(rt); } didwork = 1; } if (error) a_failure = error; } if (cmd == RTM_DELETE) { if (didwork) { error = 0; } else { /* we only give an error if it wasn't in any table */ error = ((flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } } else { if (a_failure) { /* return an error if any of them failed */ error = a_failure; } } return (error); } /* * Set up a routing table entry, normally * for an interface. */ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; } else { dst = ifa->ifa_addr; } switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We do support multiple FIBs. */ fib = RT_ALL_FIBS; break; } return (rtinit1(ifa, cmd, flags, fib)); } /* * Announce interface address arrival/withdraw * Returns 0 on success. */ int rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); #if defined(INET) || defined(INET6) #ifdef SCTP /* * notify the SCTP stack * this will only get called when an address is added/deleted * XXX pass the ifaddr struct instead if ifa->ifa_addr... */ sctp_addr_change(ifa, cmd); #endif /* SCTP */ #endif return (rtsock_addrmsg(cmd, ifa, fibnum)); } /* * Announce route addition/removal. * Users of this function MUST validate input data BEFORE calling. * However we have to be able to handle invalid data: * if some userland app sends us "invalid" route message (invalid mask, * no dst, wrong address families, etc...) we need to pass it back * to app (and any other rtsock consumers) with rtm_errno field set to * non-zero value. * Returns 0 on success. */ int rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %d", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); return (rtsock_routemsg(cmd, ifp, error, rt, fibnum)); } void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) { rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS); } /* * This is called to generate messages from the routing socket * indicating a network interface has had addresses associated with it. */ void rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, int fibnum) { KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, ("unexpected cmd %u", cmd)); KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); if (cmd == RTM_ADD) { rt_addrmsg(cmd, ifa, fibnum); if (rt != NULL) rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); } else { if (rt != NULL) rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); rt_addrmsg(cmd, ifa, fibnum); } } diff --git a/sys/net/route.h b/sys/net/route.h index 743c830f54f9..7c69e1c9a1c7 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -1,468 +1,475 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ #include #include /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* - * A route consists of a destination address, a reference - * to a routing entry, and a reference to an llentry. - * These are often held by protocols in their control - * blocks, e.g. inpcb. + * Struct route consiste of a destination address, + * a route entry pointer, link-layer prepend data pointer along + * with its length. */ struct route { struct rtentry *ro_rt; char *ro_prepend; uint16_t ro_plen; uint16_t ro_flags; + uint16_t ro_mtu; /* saved ro_rt mtu */ + uint16_t spare; struct sockaddr ro_dst; }; #define RT_L2_ME_BIT 2 /* dst L2 addr is our address */ #define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */ #define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */ #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ #define RT_L2_ME (1 << RT_L2_ME_BIT) #define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT) #define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT) struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* route weight */ u_long rmx_filler[3]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) /* lle state is exported in rmx_state rt_metrics field */ #define rmx_state rmx_weight #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ #ifdef _KERNEL extern u_int rt_numfibs; /* number of usable routing tables */ VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) #endif /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include #ifdef RADIX_MPATH #include #endif #endif #if defined(_KERNEL) || defined(_WANT_RTENTRY) struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* * XXX struct rtentry must begin with a struct radix_node (or two!) * because the code does some casts of a 'struct radix_node *' * to a 'struct rtentry *' */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ u_int rt_fibnum; /* which FIB */ u_long rt_mtu; /* MTU for this path */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ #define rt_endzero rt_pksent counter_u64_t rt_pksent; /* packets sent using this route */ struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ }; #endif /* _KERNEL || _WANT_RTENTRY */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ /* 0x100 unused, was RTF_CLONING */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* DEPRECATED - exists ONLY for backward compatibility */ #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ /* 0x10000 unused, was RTF_PRCLONING */ /* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ #define RTF_FIXEDMTU 0x80000 /* MTU was explicitly specified */ #define RTF_PINNED 0x100000 /* route is immutable */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x8000000 and up unassigned */ #define RTF_STICKY 0x10000000 /* always route dst->src */ #define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */ #define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting with existing routing apps */ /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \ (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ RTF_REJECT | RTF_STATIC | RTF_STICKY) /* * fib_ nexthop API flags. */ /* Consumer-visible nexthop info flags */ #define NHF_REJECT 0x0010 /* RTF_REJECT */ #define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */ #define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */ #define NHF_DEFAULT 0x0080 /* Default route */ #define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */ #define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */ /* Nexthop request flags */ #define NHR_IFAIF 0x01 /* Return ifa_ifp interface */ #define NHR_REF 0x02 /* For future use */ +/* Control plane route request flags */ +#define NHR_COPY 0x100 /* Copy rte data */ + /* rte<>nhop translation */ static inline uint16_t fib_rte_to_nh_flags(int rt_flags) { uint16_t res; res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0; return (res); } /* * Routing statistics. */ struct rtstat { short rts_badredirect; /* bogus redirect calls */ short rts_dynamic; /* routes created by redirects */ short rts_newgateway; /* routes modified by redirects */ short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ /* 0x9 */ /* 0xa */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */ #define RTM_IEEE80211 0x12 /* IEEE80211 wireless event */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTV_WEIGHT 0x100 /* init or lock _weight */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ typedef int rt_filter_f_t(const struct rtentry *, void *); struct rt_addrinfo { int rti_addrs; /* Route RTF_ flags */ int rti_flags; /* Route RTF_ flags */ struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */ struct ifaddr *rti_ifa; /* value of rt_ifa addr */ struct ifnet *rti_ifp; /* route interface */ rt_filter_f_t *rti_filter; /* filter function */ void *rti_filterdata; /* filter paramenters */ u_long rti_mflags; /* metrics RTV_ flags */ u_long rti_spare; /* Will be used for fib */ struct rt_metrics *rti_rmx; /* Pointer to route metrics */ }; /* * This macro returns the size of a struct sockaddr when passed * through a routing socket. Basically we round up sa_len to * a multiple of sizeof(long), with a minimum of sizeof(long). * The check for a NULL pointer is just a convenience, probably never used. * The case sa_len == 0 should only apply to empty structures. */ #define SA_SIZE(sa) \ ( (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ? \ sizeof(long) : \ 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) ) #define sa_equal(a, b) ( \ (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \ (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0)) #ifdef _KERNEL #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) #define RT_LOCK_INIT(_rt) \ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) #define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) #define RT_UNLOCK_COND(_rt) do { \ if (mtx_owned(&(_rt)->rt_mtx)) \ mtx_unlock(&(_rt)->rt_mtx); \ } while (0) #define RT_ADDREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt >= 0, \ ("negative refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt++; \ } while (0) #define RT_REMREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt > 0, \ ("bogus refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt--; \ } while (0) #define RTFREE_LOCKED(_rt) do { \ if ((_rt)->rt_refcnt <= 1) \ rtfree(_rt); \ else { \ RT_REMREF(_rt); \ RT_UNLOCK(_rt); \ } \ /* guard against invalid refs */ \ _rt = 0; \ } while (0) #define RTFREE(_rt) do { \ RT_LOCK(_rt); \ RTFREE_LOCKED(_rt); \ } while (0) #define RO_RTFREE(_ro) do { \ if ((_ro)->ro_rt) { \ if ((_ro)->ro_flags & RT_NORTREF) { \ (_ro)->ro_flags &= ~RT_NORTREF; \ (_ro)->ro_rt = NULL; \ } else { \ RT_LOCK((_ro)->ro_rt); \ RTFREE_LOCKED((_ro)->ro_rt); \ } \ } \ } while (0) struct radix_node_head *rt_tables_get_rnh(int, int); struct ifmultiaddr; void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); void rt_missmsg(int, struct rt_addrinfo *, int, int); void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int); void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int); int rt_addrmsg(int, struct ifaddr *, int); int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); void rt_newmaddrmsg(int, struct ifmultiaddr *); int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); int rtsock_addrmsg(int, struct ifaddr *, int); int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); /* * Note the following locking behavior: * * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked * * rtalloc1() returns a locked rtentry * * rtfree() and RTFREE_LOCKED() require a locked rtentry * * RTFREE() uses an unlocked entry. */ int rt_expunge(struct radix_node_head *, struct rtentry *); void rtfree(struct rtentry *); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); void rt_updatemtu(struct ifnet *); typedef int rt_walktree_f_t(struct rtentry *, void *); typedef void rt_setwarg_t(struct radix_node_head *, uint32_t, int, void *); void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *); void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg); void rt_flushifroutes(struct ifnet *ifp); /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ /* Thes are used by old code not yet converted to use multiple FIBS */ void rtalloc_ign(struct route *ro, u_long ignflags); void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ struct rtentry *rtalloc1(struct sockaddr *, int, u_long); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *); int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones * but this will change.. */ int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); void rtalloc_fib(struct route *ro, u_int fibnum); struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); int rtioctl_fib(u_long, caddr_t, u_int); void rtredirect_fib(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int rtrequest_fib(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); +int rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t, + struct rt_addrinfo *); +void rib_free_info(struct rt_addrinfo *info); #include typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn); #endif #endif diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 6fcbbc6ec431..a5ca9faf4023 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,1920 +1,1925 @@ /*- * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_mpath.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef COMPAT_FREEBSD32 #include #include struct if_msghdr32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; struct if_data ifm_data; }; struct if_msghdrl32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; uint16_t _ifm_spare1; uint16_t ifm_len; uint16_t ifm_data_off; struct if_data ifm_data; }; struct ifa_msghdrl32 { uint16_t ifam_msglen; uint8_t ifam_version; uint8_t ifam_type; int32_t ifam_addrs; int32_t ifam_flags; uint16_t ifam_index; uint16_t _ifam_spare1; uint16_t ifam_len; uint16_t ifam_data_off; int32_t ifam_metric; struct if_data ifam_data; }; #endif /* COMPAT_FREEBSD32 */ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; /* These are external hooks for CARP. */ int (*carp_get_vhid_p)(struct ifaddr *); /* * Used by rtsock/raw_input callback code to decide whether to filter the update * notification to a socket bound to a particular FIB. */ #define RTS_FILTER_FIB M_PROTO8 typedef struct { int ip_count; /* attached w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int any_count; /* total attached */ } route_cb_t; static VNET_DEFINE(route_cb_t, route_cb); #define V_route_cb VNET(route_cb) struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static void rts_input(struct mbuf *m); static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo); static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen); static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static int route_output(struct mbuf *m, struct socket *so, ...); static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, sa_family_t); static struct sockaddr *rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask, struct sockaddr_storage *dmask); static struct netisr_handler rtsock_nh = { .nh_name = "rtsock", .nh_handler = rts_input, .nh_proto = NETISR_ROUTE, .nh_policy = NETISR_POLICY_SOURCE, }; static int sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&rtsock_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&rtsock_nh, qlimit)); } SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_route_netisr_maxqlen, "I", "maximum routing socket dispatch queue length"); static void rts_init(void) { int tmp; if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) rtsock_nh.nh_qlimit = tmp; netisr_register(&rtsock_nh); } SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0); static int raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src, struct rawcb *rp) { int fibnum; KASSERT(m != NULL, ("%s: m is NULL", __func__)); KASSERT(proto != NULL, ("%s: proto is NULL", __func__)); KASSERT(rp != NULL, ("%s: rp is NULL", __func__)); /* No filtering requested. */ if ((m->m_flags & RTS_FILTER_FIB) == 0) return (0); /* Check if it is a rts and the fib matches the one of the socket. */ fibnum = M_GETFIB(m); if (proto->sp_family != PF_ROUTE || rp->rcb_socket == NULL || rp->rcb_socket->so_fibnum == fibnum) return (0); /* Filtering requested and no match, the socket shall be skipped. */ return (1); } static void rts_input(struct mbuf *m) { struct sockproto route_proto; unsigned short *family; struct m_tag *tag; route_proto.sp_family = PF_ROUTE; tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); if (tag != NULL) { family = (unsigned short *)(tag + 1); route_proto.sp_protocol = *family; m_tag_delete(m, tag); } else route_proto.sp_protocol = 0; raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb); } /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static void rts_abort(struct socket *so) { raw_usrreqs.pru_abort(so); } static void rts_close(struct socket *so) { raw_usrreqs.pru_close(so); } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rawcb *rp; int error; KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL")); /* XXX */ rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO); if (rp == NULL) return ENOBUFS; so->so_pcb = (caddr_t)rp; so->so_fibnum = td->td_proc->p_fibnum; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { so->so_pcb = NULL; free(rp, M_PCB); return error; } RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: V_route_cb.ip_count++; break; case AF_INET6: V_route_cb.ip6_count++; break; } V_route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); so->so_options |= SO_USELOOPBACK; return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ } static int rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */ } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static void rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); KASSERT(rp != NULL, ("rts_detach: rp == NULL")); RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: V_route_cb.ip_count--; break; case AF_INET6: V_route_cb.ip6_count--; break; } V_route_cb.any_count--; RTSOCK_UNLOCK(); raw_usrreqs.pru_detach(so); } static int rts_disconnect(struct socket *so) { return (raw_usrreqs.pru_disconnect(so)); } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_peeraddr(so, nam)); } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { return (raw_usrreqs.pru_send(so, flags, m, nam, control, td)); } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { return (raw_usrreqs.pru_shutdown(so)); } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_sockaddr(so, nam)); } static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_attach = rts_attach, .pru_bind = rts_bind, .pru_connect = rts_connect, .pru_detach = rts_detach, .pru_disconnect = rts_disconnect, .pru_peeraddr = rts_peeraddr, .pru_send = rts_send, .pru_shutdown = rts_shutdown, .pru_sockaddr = rts_sockaddr, .pru_close = rts_close, }; #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED /* * The union of all possible address formats we handle. */ union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ static int rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred) { /* First, see if the returned address is part of the jail. */ if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) { info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; return (0); } switch (info->rti_info[RTAX_DST]->sa_family) { #ifdef INET case AF_INET: { struct in_addr ia; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; ia = ((struct sockaddr_in *)sa)->sin_addr; if (prison_check_ip4(cred, &ia) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)-> sin_addr; if (prison_get_ip4(cred, &ia) != 0) return (ESRCH); } bzero(&saun->sin, sizeof(struct sockaddr_in)); saun->sin.sin_len = sizeof(struct sockaddr_in); saun->sin.sin_family = AF_INET; saun->sin.sin_addr.s_addr = ia.s_addr; info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin; break; } #endif #ifdef INET6 case AF_INET6: { struct in6_addr ia6; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET6) continue; bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr, &ia6, sizeof(struct in6_addr)); if (prison_check_ip6(cred, &ia6) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)-> sin6_addr; if (prison_get_ip6(cred, &ia6) != 0) return (ESRCH); } bzero(&saun->sin6, sizeof(struct sockaddr_in6)); saun->sin6.sin6_len = sizeof(struct sockaddr_in6); saun->sin6.sin6_family = AF_INET6; bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr)); if (sa6_recoverscope(&saun->sin6) != 0) return (ESRCH); info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6; break; } #endif default: return (ESRCH); } return (0); } /*ARGSUSED*/ static int route_output(struct mbuf *m, struct socket *so, ...) { struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct radix_node_head *rnh; struct rt_addrinfo info; struct sockaddr_storage ss; #ifdef INET6 struct sockaddr_in6 *sin6; int i, rti_need_deembed = 0; #endif int alloc_len = 0, len, error = 0, fibnum; struct ifnet *ifp = NULL; union sockaddr_union saun; sa_family_t saf = AF_UNSPEC; struct rawcb *rp = NULL; struct walkarg w; fibnum = so->so_fibnum; #define senderr(e) { error = e; goto flush;} if (m == NULL || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) senderr(EINVAL); /* * Most of current messages are in range 200-240 bytes, * minimize possible re-allocation on reply using larger size * buffer aligned on 1k boundaty. */ alloc_len = roundup2(len, 1024); if ((rtm = malloc(alloc_len, M_TEMP, M_NOWAIT)) == NULL) senderr(ENOBUFS); m_copydata(m, 0, len, (caddr_t)rtm); bzero(&info, sizeof(info)); bzero(&w, sizeof(w)); if (rtm->rtm_version != RTM_VERSION) { /* Do not touch message since format is unknown */ free(rtm, M_TEMP); rtm = NULL; senderr(EPROTONOSUPPORT); } /* * Starting from here, it is possible * to alter original message and insert * caller PID and error value. */ rtm->rtm_pid = curproc->p_pid; info.rti_addrs = rtm->rtm_addrs; info.rti_mflags = rtm->rtm_inits; info.rti_rmx = &rtm->rtm_rmx; /* * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6 * link-local address because rtrequest requires addresses with * embedded scope id. */ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) senderr(EINVAL); info.rti_flags = rtm->rtm_flags; if (info.rti_info[RTAX_DST] == NULL || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); saf = info.rti_info[RTAX_DST]->sa_family; /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET) { error = priv_check(curthread, PRIV_NET_ROUTE); if (error) senderr(error); } /* * The given gateway address may be an interface address. * For example, issuing a "route change" command on a route * entry that was created from a tunnel, and the gateway * address given is the local end point. In this case the * RTF_GATEWAY flag must be cleared or the destination will * not be reachable even though there is no error message. */ if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) { - struct route gw_ro; + struct rt_addrinfo ginfo; + struct sockaddr *gdst; + + bzero(&ginfo, sizeof(ginfo)); + bzero(&ss, sizeof(ss)); + ss.ss_len = sizeof(ss); + + ginfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&ss; + gdst = info.rti_info[RTAX_GATEWAY]; - bzero(&gw_ro, sizeof(gw_ro)); - gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY]; - rtalloc_ign_fib(&gw_ro, 0, fibnum); /* * A host route through the loopback interface is * installed for each interface adddress. In pre 8.0 * releases the interface address of a PPP link type * is not reachable locally. This behavior is fixed as * part of the new L2/L3 redesign and rewrite work. The * signature of this interface address route is the * AF_LINK sa_family type of the rt_gateway, and the * rt_ifp has the IFF_LOOPBACK flag set. */ - if (gw_ro.ro_rt != NULL && - gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK && - gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) { - info.rti_flags &= ~RTF_GATEWAY; - info.rti_flags |= RTF_GWFLAG_COMPAT; + if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) { + if (ss.ss_family == AF_LINK && + ginfo.rti_ifp->if_flags & IFF_LOOPBACK) { + info.rti_flags &= ~RTF_GATEWAY; + info.rti_flags |= RTF_GWFLAG_COMPAT; + } + rib_free_info(&ginfo); } - if (gw_ro.ro_rt != NULL) - RTFREE(gw_ro.ro_rt); } switch (rtm->rtm_type) { struct rtentry *saved_nrt; case RTM_ADD: case RTM_CHANGE: if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(rtm->rtm_type, &info, &saved_nrt, fibnum); if (error == 0 && saved_nrt != NULL) { #ifdef INET6 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif RT_LOCK(saved_nrt); rtm->rtm_index = saved_nrt->rt_ifp->if_index; RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY] && (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, fibnum); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; goto report; } #ifdef INET6 /* rt_msg2() will not be used when RTM_DELETE fails. */ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; case RTM_GET: rnh = rt_tables_get_rnh(fibnum, saf); if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_RLOCK(rnh); if (info.rti_info[RTAX_NETMASK] == NULL && rtm->rtm_type == RTM_GET) { /* * Provide logest prefix match for * address lookup (no mask). * 'route -n get addr' */ rt = (struct rtentry *) rnh->rnh_matchaddr( info.rti_info[RTAX_DST], rnh); } else rt = (struct rtentry *) rnh->rnh_lookup( info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh); if (rt == NULL) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } #ifdef RADIX_MPATH /* * for RTM_CHANGE/LOCK, if we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. * * for RTM_GET, gate is optional even with multipath. * if gate == NULL the first match is returned. * (no need to call rt_mpath_matchgate if gate == NULL) */ if (rn_mpath_capable(rnh) && (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); if (!rt) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } } #endif /* * If performing proxied L2 entry insertion, and * the actual PPP host entry is found, perform * another search to retrieve the prefix route of * the local end point of the PPP link. */ if (rtm->rtm_flags & RTF_ANNOUNCE) { struct sockaddr laddr; if (rt->rt_ifp != NULL && rt->rt_ifp->if_type == IFT_PROPVIRTUAL) { struct ifaddr *ifa; ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1, RT_ALL_FIBS); if (ifa != NULL) rt_maskedcopy(ifa->ifa_addr, &laddr, ifa->ifa_netmask); } else rt_maskedcopy(rt->rt_ifa->ifa_addr, &laddr, rt->rt_ifa->ifa_netmask); /* * refactor rt and no lock operation necessary */ rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh); if (rt == NULL) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } } RT_LOCK(rt); RT_ADDREF(rt); RADIX_NODE_HEAD_RUNLOCK(rnh); report: RT_LOCK_ASSERT(rt); if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(curthread->td_ucred) : prison_if(curthread->td_ucred, rt_key(rt)) != 0) { RT_UNLOCK(rt); senderr(ESRCH); } info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss); info.rti_info[RTAX_GENMASK] = 0; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; error = rtm_get_jailed(&info, ifp, rt, &saun, curthread->td_ucred); if (error != 0) { RT_UNLOCK(rt); senderr(error); } if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info.rti_info[RTAX_IFP] = NULL; info.rti_info[RTAX_IFA] = NULL; } } else if ((ifp = rt->rt_ifp) != NULL) { rtm->rtm_index = ifp->if_index; } /* Check if we need to realloc storage */ rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len); if (len > alloc_len) { struct rt_msghdr *new_rtm; new_rtm = malloc(len, M_TEMP, M_NOWAIT); if (new_rtm == NULL) { RT_UNLOCK(rt); senderr(ENOBUFS); } bcopy(rtm, new_rtm, rtm->rtm_msglen); free(rtm, M_TEMP); rtm = new_rtm; alloc_len = len; } w.w_tmem = (caddr_t)rtm; w.w_tmemsize = alloc_len; rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len); if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; RT_UNLOCK(rt); break; default: senderr(EOPNOTSUPP); } flush: if (rt != NULL) RTFREE(rt); /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (V_route_cb.any_count <= 1) { if (rtm != NULL) free(rtm, M_TEMP); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm != NULL) { #ifdef INET6 if (rti_need_deembed) { /* sin6_scope_id is recovered before sending rtm. */ sin6 = (struct sockaddr_in6 *)&ss; for (i = 0; i < RTAX_MAX; i++) { if (info.rti_info[i] == NULL) continue; if (info.rti_info[i]->sa_family != AF_INET6) continue; bcopy(info.rti_info[i], sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) bcopy(sin6, info.rti_info[i], sizeof(*sin6)); } } #endif if (error != 0) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); free(rtm, M_TEMP); } if (m != NULL) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; if (rp) { /* * XXX insure we don't get a copy by * invalidating our protocol */ unsigned short family = rp->rcb_proto.sp_family; rp->rcb_proto.sp_family = 0; rt_dispatch(m, saf); rp->rcb_proto.sp_family = family; } else rt_dispatch(m, saf); } return (error); } static void rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out) { bzero(out, sizeof(*out)); out->rmx_mtu = rt->rt_mtu; out->rmx_weight = rt->rt_weight; out->rmx_pksent = counter_u64_fetch(rt->rt_pksent); /* Kernel -> userland timebase conversion. */ out->rmx_expire = rt->rt_expire ? rt->rt_expire - time_uptime + time_second : 0; } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) return (EINVAL); /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ #ifdef INET6 if (sa->sa_family == AF_INET6) sa6_embedscope((struct sockaddr_in6 *)sa, V_ip6_use_defzone); #endif rtinfo->rti_info[i] = sa; cp += SA_SIZE(sa); } return (0); } /* * Fill in @dmask with valid netmask leaving original @smask * intact. Mostly used with radix netmasks. */ static struct sockaddr * rtsock_fix_netmask(struct sockaddr *dst, struct sockaddr *smask, struct sockaddr_storage *dmask) { if (dst == NULL || smask == NULL) return (NULL); memset(dmask, 0, dst->sa_len); memcpy(dmask, smask, smask->sa_len); dmask->ss_len = dst->sa_len; dmask->ss_family = dst->sa_family; return ((struct sockaddr *)dmask); } /* * Writes information related to @rtinfo object to newly-allocated mbuf. * Assumes MCLBYTES is enough to construct any message. * Used for OS notifications of vaious events (if/ifa announces,etc) * * Returns allocated mbuf or NULL on failure. */ static struct mbuf * rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; struct sockaddr *sa; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: case RTM_IEEE80211: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } /* XXXGL: can we use MJUMPAGESIZE cluster here? */ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__)); if (len > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } /* * Writes information related to @rtinfo object to preallocated buffer. * Stores needed size in @plen. If @w is NULL, calculates size without * writing. * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation. * * Returns 0 on success. * */ static int rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen) { int i; int len, buflen = 0, dlen; caddr_t cp = NULL; struct rt_msghdr *rtm = NULL; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif switch (type) { case RTM_DELADDR: case RTM_NEWADDR: if (w != NULL && w->w_op == NET_RT_IFLISTL) { #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) len = sizeof(struct ifa_msghdrl32); else #endif len = sizeof(struct ifa_msghdrl); } else len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: #ifdef COMPAT_FREEBSD32 if (w != NULL && w->w_req->flags & SCTL_MASK32) { if (w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl32); else len = sizeof(struct if_msghdr32); break; } #endif if (w != NULL && w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl); else len = sizeof(struct if_msghdr); break; case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; default: len = sizeof(struct rt_msghdr); } if (w != NULL) { rtm = (struct rt_msghdr *)w->w_tmem; buflen = w->w_tmemsize - len; cp = (caddr_t)w->w_tmem + len; } rtinfo->rti_addrs = 0; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); if (cp != NULL && buflen >= dlen) { #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; buflen -= dlen; } else if (cp != NULL) { /* * Buffer too small. Count needed size * and return with error. */ cp = NULL; } len += dlen; } if (cp != NULL) { dlen = ALIGN(len) - len; if (buflen < dlen) cp = NULL; else buflen -= dlen; } len = ALIGN(len); if (cp != NULL) { /* fill header iff buffer is large enough */ rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } *plen = len; if (w != NULL && cp == NULL) return (ENOBUFS); return (0); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error, int fibnum) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (V_route_cb.any_count == 0) return; m = rtsock_msg_mbuf(type, rtinfo); if (m == NULL) return; if (fibnum != RT_ALL_FIBS) { KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); } void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rtsock_msg_mbuf(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; if_data_copy(ifp, &ifm->ifm_data); ifm->ifm_addrs = 0; rt_dispatch(m, AF_UNSPEC); } /* * Announce interface address arrival/withdraw. * Please do not call directly, use rt_addrmsg(). * Assume input data to be valid. * Returns 0 on success. */ int rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; int ncmd; struct mbuf *m; struct ifa_msghdr *ifam; struct ifnet *ifp = ifa->ifa_ifp; struct sockaddr_storage ss; if (V_route_cb.any_count == 0) return (0); ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask( info.rti_info[RTAX_IFP], ifa->ifa_netmask, &ss); info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL) return (ENOBUFS); ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_ifp->if_metric; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * Announce route addition/removal. * Please do not call directly, use rt_routemsg(). * Note that @rt data MAY be inconsistent/invalid: * if some userland app sends us "invalid" route message (invalid mask, * no dst, wrong address families, etc...) we need to pass it back * to app (and any other rtsock consumers) with rtm_errno field set to * non-zero value. * * Returns 0 on success. */ int rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, int fibnum) { struct rt_addrinfo info; struct sockaddr *sa; struct mbuf *m; struct rt_msghdr *rtm; struct sockaddr_storage ss; if (V_route_cb.any_count == 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = sa = rt_key(rt); info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(sa, rt_mask(rt), &ss); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; if ((m = rtsock_msg_mbuf(cmd, &info)) == NULL) return (ENOBUFS); rtm = mtod(m, struct rt_msghdr *); rtm->rtm_index = ifp->if_index; rtm->rtm_flags |= rt->rt_flags; rtm->rtm_errno = error; rtm->rtm_addrs = info.rti_addrs; if (fibnum != RT_ALL_FIBS) { M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); return (0); } /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (V_route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rtsock_msg_mbuf(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n", __func__)); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (V_route_cb.any_count == 0) return NULL; bzero((caddr_t)info, sizeof(*info)); m = rtsock_msg_mbuf(type, info); if (m != NULL) { ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; } return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m != NULL) { /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rt_dispatch(m, AF_UNSPEC); } } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); if (m != NULL) rt_dispatch(m, AF_UNSPEC); } static void rt_dispatch(struct mbuf *m, sa_family_t saf) { struct m_tag *tag; /* * Preserve the family from the sockaddr, if any, in an m_tag for * use when injecting the mbuf into the routing socket buffer from * the netisr. */ if (saf != AF_UNSPEC) { tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), M_NOWAIT); if (tag == NULL) { m_freem(m); return; } *(unsigned short *)(tag + 1) = saf; m_tag_prepend(m, tag); } #ifdef VIMAGE if (V_loif) m->m_pkthdr.rcvif = V_loif; else { m_freem(m); return; } #endif netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; struct sockaddr_storage ss; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(w->w_req->td->td_ucred) : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss); info.rti_info[RTAX_GENMASK] = 0; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0) return (error); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(rt, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } static int sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdrl *ifm; struct if_data *ifd; ifm = (struct if_msghdrl *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdrl32 *ifm32; ifm32 = (struct if_msghdrl32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifm32->_ifm_spare1 = 0; ifm32->ifm_len = sizeof(*ifm32); ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data); ifd = &ifm32->ifm_data; } else #endif { ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifm->_ifm_spare1 = 0; ifm->ifm_len = sizeof(*ifm); ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data); ifd = &ifm->ifm_data; } if_data_copy(ifp, ifd); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdr *ifm; struct if_data *ifd; ifm = (struct if_msghdr *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdr32 *ifm32; ifm32 = (struct if_msghdr32 *)ifm; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifd = &ifm32->ifm_data; } else #endif { ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifd = &ifm->ifm_data; } if_data_copy(ifp, ifd); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdrl *ifam; struct if_data *ifd; ifam = (struct ifa_msghdrl *)w->w_tmem; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct ifa_msghdrl32 *ifam32; ifam32 = (struct ifa_msghdrl32 *)ifam; ifam32->ifam_addrs = info->rti_addrs; ifam32->ifam_flags = ifa->ifa_flags; ifam32->ifam_index = ifa->ifa_ifp->if_index; ifam32->_ifam_spare1 = 0; ifam32->ifam_len = sizeof(*ifam32); ifam32->ifam_data_off = offsetof(struct ifa_msghdrl32, ifam_data); ifam32->ifam_metric = ifa->ifa_ifp->if_metric; ifd = &ifam32->ifam_data; } else #endif { ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->_ifam_spare1 = 0; ifam->ifam_len = sizeof(*ifam); ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); ifam->ifam_metric = ifa->ifa_ifp->if_metric; ifd = &ifam->ifam_data; } bzero(ifd, sizeof(*ifd)); ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifd->ifi_vhid = (*carp_get_vhid_p)(ifa); return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_metric = ifa->ifa_ifp->if_metric; return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; struct sockaddr_storage ss; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len); if (error != 0) goto done; info.rti_info[RTAX_IFP] = NULL; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifml(ifp, &info, w, len); else error = sysctl_iflist_ifm(ifp, &info, w, len); if (error) goto done; } while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { if (af && af != ifa->ifa_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifa->ifa_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask( ifa->ifa_addr, ifa->ifa_netmask, &ss); info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len); if (error != 0) goto done; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifaml(ifa, &info, w, len); else error = sysctl_iflist_ifam(ifa, &info, w, len); if (error) goto done; } } IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = NULL; info.rti_info[RTAX_NETMASK] = NULL; info.rti_info[RTAX_BRD] = NULL; } done: if (ifp != NULL) IF_ADDR_RUNLOCK(ifp); IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_ifmalist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct rt_addrinfo info; int len, error = 0; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (af && af != ifma->ifma_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifma->ifma_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_GATEWAY] = (ifma->ifma_addr->sa_family != AF_LINK) ? ifma->ifma_lladdr : NULL; error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len); if (error != 0) goto done; if (w->w_req && w->w_tmem) { struct ifma_msghdr *ifmam; ifmam = (struct ifma_msghdr *)w->w_tmem; ifmam->ifmam_index = ifma->ifma_ifp->if_index; ifmam->ifmam_flags = 0; ifmam->ifmam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) { IF_ADDR_RUNLOCK(ifp); goto done; } } } IF_ADDR_RUNLOCK(ifp); } done: IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct radix_node_head *rnh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; int fib = 0; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (name[1] == NET_RT_DUMP) { if (namelen == 3) fib = req->td->td_proc->p_fibnum; else if (namelen == 4) fib = (name[3] == RT_ALL_FIBS) ? req->td->td_proc->p_fibnum : name[3]; else return ((namelen < 3) ? EISDIR : ENOTDIR); if (fib < 0 || fib >= rt_numfibs) return (EINVAL); } else if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); /* * Allocate reply buffer in advance. * All rtsock messages has maximum length of u_short. */ w.w_tmemsize = 65536; w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af == 0) { /* dump all tables */ i = 1; lim = AF_MAX; } else /* dump only one table */ i = lim = af; /* * take care of llinfo entries, the caller must * specify an AF */ if (w.w_op == NET_RT_FLAGS && (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) { if (af != 0) error = lltable_sysctl_dumparp(af, w.w_req); else error = EINVAL; break; } /* * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) { rnh = rt_tables_get_rnh(fib, i); if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); RADIX_NODE_HEAD_RUNLOCK(rnh); } else if (af != 0) error = EAFNOSUPPORT; } break; case NET_RT_IFLIST: case NET_RT_IFLISTL: error = sysctl_iflist(af, &w); break; case NET_RT_IFMALIST: error = sysctl_ifmalist(af, &w); break; } free(w.w_tmem, M_TEMP); return (error); } static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ static struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { .pr_type = SOCK_RAW, .pr_domain = &routedomain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_output = route_output, .pr_ctlinput = raw_ctlinput, .pr_init = raw_init, .pr_usrreqs = &route_usrreqs } }; static struct domain routedomain = { .dom_family = PF_ROUTE, .dom_name = "route", .dom_protosw = routesw, .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])] }; VNET_DOMAIN_SET(route); diff --git a/sys/net80211/ieee80211_scan_sw.c b/sys/net80211/ieee80211_scan_sw.c index 163633ccd20b..53e45203f10c 100644 --- a/sys/net80211/ieee80211_scan_sw.c +++ b/sys/net80211/ieee80211_scan_sw.c @@ -1,958 +1,958 @@ /*- * Copyright (c) 2002-2008 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IEEE 802.11 scanning support. */ #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include struct scan_state { struct ieee80211_scan_state base; /* public state */ u_int ss_iflags; /* flags used internally */ #define ISCAN_MINDWELL 0x0001 /* min dwell time reached */ #define ISCAN_DISCARD 0x0002 /* discard rx'd frames */ #define ISCAN_CANCEL 0x0004 /* cancel current scan */ #define ISCAN_ABORT 0x0008 /* end the scan immediately */ unsigned long ss_chanmindwell; /* min dwell on curchan */ unsigned long ss_scanend; /* time scan must stop */ u_int ss_duration; /* duration for next scan */ struct task ss_scan_task; /* scan execution */ struct cv ss_scan_cv; /* scan signal */ struct callout ss_scan_timer; /* scan timer */ }; #define SCAN_PRIVATE(ss) ((struct scan_state *) ss) /* * Amount of time to go off-channel during a background * scan. This value should be large enough to catch most * ap's but short enough that we can return on-channel * before our listen interval expires. * * XXX tunable * XXX check against configured listen interval */ #define IEEE80211_SCAN_OFFCHANNEL msecs_to_ticks(150) /* * Roaming-related defaults. RSSI thresholds are as returned by the * driver (.5dBm). Transmit rate thresholds are IEEE rate codes (i.e * .5M units) or MCS. */ /* rssi thresholds */ #define ROAM_RSSI_11A_DEFAULT 14 /* 11a bss */ #define ROAM_RSSI_11B_DEFAULT 14 /* 11b bss */ #define ROAM_RSSI_11BONLY_DEFAULT 14 /* 11b-only bss */ /* transmit rate thresholds */ #define ROAM_RATE_11A_DEFAULT 2*12 /* 11a bss */ #define ROAM_RATE_11B_DEFAULT 2*5 /* 11b bss */ #define ROAM_RATE_11BONLY_DEFAULT 2*1 /* 11b-only bss */ #define ROAM_RATE_HALF_DEFAULT 2*6 /* half-width 11a/g bss */ #define ROAM_RATE_QUARTER_DEFAULT 2*3 /* quarter-width 11a/g bss */ #define ROAM_MCS_11N_DEFAULT (1 | IEEE80211_RATE_MCS) /* 11n bss */ static void scan_curchan(struct ieee80211_scan_state *, unsigned long); static void scan_mindwell(struct ieee80211_scan_state *); static void scan_signal(void *); static void scan_task(void *, int); MALLOC_DEFINE(M_80211_SCAN, "80211scan", "802.11 scan state"); static void ieee80211_swscan_detach(struct ieee80211com *ic) { struct ieee80211_scan_state *ss = ic->ic_scan; if (ss != NULL) { IEEE80211_LOCK(ic); SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_ABORT; scan_signal(ss); IEEE80211_UNLOCK(ic); ieee80211_draintask(ic, &SCAN_PRIVATE(ss)->ss_scan_task); callout_drain(&SCAN_PRIVATE(ss)->ss_scan_timer); KASSERT((ic->ic_flags & IEEE80211_F_SCAN) == 0, ("scan still running")); /* * For now, do the ss_ops detach here rather * than ieee80211_scan_detach(). * * I'll figure out how to cleanly split things up * at a later date. */ if (ss->ss_ops != NULL) { ss->ss_ops->scan_detach(ss); ss->ss_ops = NULL; } ic->ic_scan = NULL; IEEE80211_FREE(SCAN_PRIVATE(ss), M_80211_SCAN); } } static void ieee80211_swscan_vattach(struct ieee80211vap *vap) { /* nothing to do for now */ /* * TODO: all of the vap scan calls should be methods! */ } static void ieee80211_swscan_vdetach(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss; IEEE80211_LOCK_ASSERT(ic); ss = ic->ic_scan; if (ss != NULL && ss->ss_vap == vap) { if (ic->ic_flags & IEEE80211_F_SCAN) { SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_ABORT; scan_signal(ss); } } } static void ieee80211_swscan_set_scan_duration(struct ieee80211vap *vap, u_int duration) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; IEEE80211_LOCK_ASSERT(ic); /* NB: flush frames rx'd before 1st channel change */ SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_DISCARD; SCAN_PRIVATE(ss)->ss_duration = duration; } /* * Start a scan unless one is already going. */ static int ieee80211_swscan_start_scan_locked(const struct ieee80211_scanner *scan, struct ieee80211vap *vap, int flags, u_int duration, u_int mindwell, u_int maxdwell, u_int nssid, const struct ieee80211_scan_ssid ssids[]) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; IEEE80211_LOCK_ASSERT(ic); if (ic->ic_flags & IEEE80211_F_CSAPENDING) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: scan inhibited by pending channel change\n", __func__); } else if ((ic->ic_flags & IEEE80211_F_SCAN) == 0) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: %s scan, duration %u mindwell %u maxdwell %u, desired mode %s, %s%s%s%s%s%s\n" , __func__ , flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive" , duration, mindwell, maxdwell , ieee80211_phymode_name[vap->iv_des_mode] , flags & IEEE80211_SCAN_FLUSH ? "flush" : "append" , flags & IEEE80211_SCAN_NOPICK ? ", nopick" : "" , flags & IEEE80211_SCAN_NOJOIN ? ", nojoin" : "" , flags & IEEE80211_SCAN_NOBCAST ? ", nobcast" : "" , flags & IEEE80211_SCAN_PICK1ST ? ", pick1st" : "" , flags & IEEE80211_SCAN_ONCE ? ", once" : "" ); ieee80211_scan_update_locked(vap, scan); if (ss->ss_ops != NULL) { if ((flags & IEEE80211_SCAN_NOSSID) == 0) ieee80211_scan_copy_ssid(vap, ss, nssid, ssids); /* NB: top 4 bits for internal use */ ss->ss_flags = flags & 0xfff; if (ss->ss_flags & IEEE80211_SCAN_ACTIVE) vap->iv_stats.is_scan_active++; else vap->iv_stats.is_scan_passive++; if (flags & IEEE80211_SCAN_FLUSH) ss->ss_ops->scan_flush(ss); if (flags & IEEE80211_SCAN_BGSCAN) ic->ic_flags_ext |= IEEE80211_FEXT_BGSCAN; /* Set duration for this particular scan */ ieee80211_swscan_set_scan_duration(vap, duration); ss->ss_next = 0; ss->ss_mindwell = mindwell; ss->ss_maxdwell = maxdwell; /* NB: scan_start must be before the scan runtask */ ss->ss_ops->scan_start(ss, vap); #ifdef IEEE80211_DEBUG if (ieee80211_msg_scan(vap)) ieee80211_scan_dump(ss); #endif /* IEEE80211_DEBUG */ ic->ic_flags |= IEEE80211_F_SCAN; /* Start scan task */ ieee80211_runtask(ic, &SCAN_PRIVATE(ss)->ss_scan_task); } return 1; } else { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: %s scan already in progress\n", __func__, ss->ss_flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive"); } return 0; } /* * Start a scan unless one is already going. * * Called without the comlock held; grab the comlock as appropriate. */ static int ieee80211_swscan_start_scan(const struct ieee80211_scanner *scan, struct ieee80211vap *vap, int flags, u_int duration, u_int mindwell, u_int maxdwell, u_int nssid, const struct ieee80211_scan_ssid ssids[]) { struct ieee80211com *ic = vap->iv_ic; int result; IEEE80211_UNLOCK_ASSERT(ic); IEEE80211_LOCK(ic); result = ieee80211_swscan_start_scan_locked(scan, vap, flags, duration, mindwell, maxdwell, nssid, ssids); IEEE80211_UNLOCK(ic); return result; } /* * Check the scan cache for an ap/channel to use; if that * fails then kick off a new scan. * * Called with the comlock held. * * XXX TODO: split out! */ static int ieee80211_swscan_check_scan(const struct ieee80211_scanner *scan, struct ieee80211vap *vap, int flags, u_int duration, u_int mindwell, u_int maxdwell, u_int nssid, const struct ieee80211_scan_ssid ssids[]) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; int result; IEEE80211_LOCK_ASSERT(ic); if (ss->ss_ops != NULL) { /* XXX verify ss_ops matches vap->iv_opmode */ if ((flags & IEEE80211_SCAN_NOSSID) == 0) { /* * Update the ssid list and mark flags so if * we call start_scan it doesn't duplicate work. */ ieee80211_scan_copy_ssid(vap, ss, nssid, ssids); flags |= IEEE80211_SCAN_NOSSID; } if ((ic->ic_flags & IEEE80211_F_SCAN) == 0 && (flags & IEEE80211_SCAN_FLUSH) == 0 && time_before(ticks, ic->ic_lastscan + vap->iv_scanvalid)) { /* * We're not currently scanning and the cache is * deemed hot enough to consult. Lock out others * by marking IEEE80211_F_SCAN while we decide if * something is already in the scan cache we can * use. Also discard any frames that might come * in while temporarily marked as scanning. */ SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_DISCARD; ic->ic_flags |= IEEE80211_F_SCAN; /* NB: need to use supplied flags in check */ ss->ss_flags = flags & 0xff; result = ss->ss_ops->scan_end(ss, vap); ic->ic_flags &= ~IEEE80211_F_SCAN; SCAN_PRIVATE(ss)->ss_iflags &= ~ISCAN_DISCARD; if (result) { ieee80211_notify_scan_done(vap); return 1; } } } result = ieee80211_swscan_start_scan_locked(scan, vap, flags, duration, mindwell, maxdwell, nssid, ssids); return result; } /* * Restart a previous scan. If the previous scan completed * then we start again using the existing channel list. */ static int ieee80211_swscan_bg_scan(const struct ieee80211_scanner *scan, struct ieee80211vap *vap, int flags) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; /* XXX assert unlocked? */ // IEEE80211_UNLOCK_ASSERT(ic); IEEE80211_LOCK(ic); if ((ic->ic_flags & IEEE80211_F_SCAN) == 0) { u_int duration; /* * Go off-channel for a fixed interval that is large * enough to catch most ap's but short enough that * we can return on-channel before our listen interval * expires. */ duration = IEEE80211_SCAN_OFFCHANNEL; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, - "%s: %s scan, ticks %u duration %lu\n", __func__, + "%s: %s scan, ticks %u duration %u\n", __func__, ss->ss_flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive", ticks, duration); ieee80211_scan_update_locked(vap, scan); if (ss->ss_ops != NULL) { ss->ss_vap = vap; /* * A background scan does not select a new sta; it * just refreshes the scan cache. Also, indicate * the scan logic should follow the beacon schedule: * we go off-channel and scan for a while, then * return to the bss channel to receive a beacon, * then go off-channel again. All during this time * we notify the ap we're in power save mode. When * the scan is complete we leave power save mode. * If any beacon indicates there are frames pending * for us then we drop out of power save mode * (and background scan) automatically by way of the * usual sta power save logic. */ ss->ss_flags |= IEEE80211_SCAN_NOPICK | IEEE80211_SCAN_BGSCAN | flags ; /* if previous scan completed, restart */ if (ss->ss_next >= ss->ss_last) { if (ss->ss_flags & IEEE80211_SCAN_ACTIVE) vap->iv_stats.is_scan_active++; else vap->iv_stats.is_scan_passive++; /* * NB: beware of the scan cache being flushed; * if the channel list is empty use the * scan_start method to populate it. */ ss->ss_next = 0; if (ss->ss_last != 0) ss->ss_ops->scan_restart(ss, vap); else { ss->ss_ops->scan_start(ss, vap); #ifdef IEEE80211_DEBUG if (ieee80211_msg_scan(vap)) ieee80211_scan_dump(ss); #endif /* IEEE80211_DEBUG */ } } ieee80211_swscan_set_scan_duration(vap, duration); ss->ss_maxdwell = duration; ic->ic_flags |= IEEE80211_F_SCAN; ic->ic_flags_ext |= IEEE80211_FEXT_BGSCAN; ieee80211_runtask(ic, &SCAN_PRIVATE(ss)->ss_scan_task); } else { /* XXX msg+stat */ } } else { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: %s scan already in progress\n", __func__, ss->ss_flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive"); } IEEE80211_UNLOCK(ic); /* NB: racey, does it matter? */ return (ic->ic_flags & IEEE80211_F_SCAN); } /* * Cancel any scan currently going on for the specified vap. */ static void ieee80211_swscan_cancel_scan(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; IEEE80211_LOCK(ic); if ((ic->ic_flags & IEEE80211_F_SCAN) && ss->ss_vap == vap && (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL) == 0) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: cancel %s scan\n", __func__, ss->ss_flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive"); /* clear bg scan NOPICK and mark cancel request */ ss->ss_flags &= ~IEEE80211_SCAN_NOPICK; SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_CANCEL; /* wake up the scan task */ scan_signal(ss); } else { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: called; F_SCAN=%d, vap=%s, CANCEL=%d\n", __func__, !! (ic->ic_flags & IEEE80211_F_SCAN), (ss->ss_vap == vap ? "match" : "nomatch"), !! (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL)); } IEEE80211_UNLOCK(ic); } /* * Cancel any scan currently going on. */ static void ieee80211_swscan_cancel_anyscan(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; IEEE80211_LOCK(ic); if ((ic->ic_flags & IEEE80211_F_SCAN) && (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL) == 0) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: cancel %s scan\n", __func__, ss->ss_flags & IEEE80211_SCAN_ACTIVE ? "active" : "passive"); /* clear bg scan NOPICK and mark cancel request */ ss->ss_flags &= ~IEEE80211_SCAN_NOPICK; SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_CANCEL; /* wake up the scan task */ scan_signal(ss); } else { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: called; F_SCAN=%d, vap=%s, CANCEL=%d\n", __func__, !! (ic->ic_flags & IEEE80211_F_SCAN), (ss->ss_vap == vap ? "match" : "nomatch"), !! (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL)); } IEEE80211_UNLOCK(ic); } /* * Public access to scan_next for drivers that manage * scanning themselves (e.g. for firmware-based devices). */ static void ieee80211_swscan_scan_next(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: called\n", __func__); /* wake up the scan task */ IEEE80211_LOCK(ic); scan_signal(ss); IEEE80211_UNLOCK(ic); } /* * Public access to scan_next for drivers that are not able to scan single * channels (e.g. for firmware-based devices). */ static void ieee80211_swscan_scan_done(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss; IEEE80211_LOCK_ASSERT(ic); ss = ic->ic_scan; scan_signal(ss); } /* * Probe the curent channel, if allowed, while scanning. * If the channel is not marked passive-only then send * a probe request immediately. Otherwise mark state and * listen for beacons on the channel; if we receive something * then we'll transmit a probe request. */ static void ieee80211_swscan_probe_curchan(struct ieee80211vap *vap, int force) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; struct ifnet *ifp = vap->iv_ifp; int i; /* * Send directed probe requests followed by any * broadcast probe request. * XXX remove dependence on ic/vap->iv_bss */ for (i = 0; i < ss->ss_nssid; i++) ieee80211_send_probereq(vap->iv_bss, vap->iv_myaddr, ifp->if_broadcastaddr, ifp->if_broadcastaddr, ss->ss_ssid[i].ssid, ss->ss_ssid[i].len); if ((ss->ss_flags & IEEE80211_SCAN_NOBCAST) == 0) ieee80211_send_probereq(vap->iv_bss, vap->iv_myaddr, ifp->if_broadcastaddr, ifp->if_broadcastaddr, "", 0); } /* * Scan curchan. If this is an active scan and the channel * is not marked passive then send probe request frame(s). * Arrange for the channel change after maxdwell ticks. */ static void scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { struct ieee80211vap *vap = ss->ss_vap; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: calling; maxdwell=%lu\n", __func__, maxdwell); IEEE80211_LOCK(vap->iv_ic); if (ss->ss_flags & IEEE80211_SCAN_ACTIVE) ieee80211_probe_curchan(vap, 0); callout_reset(&SCAN_PRIVATE(ss)->ss_scan_timer, maxdwell, scan_signal, ss); IEEE80211_UNLOCK(vap->iv_ic); } static void scan_signal(void *arg) { struct ieee80211_scan_state *ss = (struct ieee80211_scan_state *) arg; IEEE80211_LOCK_ASSERT(ss->ss_ic); cv_signal(&SCAN_PRIVATE(ss)->ss_scan_cv); } /* * Handle mindwell requirements completed; initiate a channel * change to the next channel asap. */ static void scan_mindwell(struct ieee80211_scan_state *ss) { struct ieee80211com *ic = ss->ss_ic; IEEE80211_DPRINTF(ss->ss_vap, IEEE80211_MSG_SCAN, "%s: called\n", __func__); IEEE80211_LOCK(ic); scan_signal(ss); IEEE80211_UNLOCK(ic); } static void scan_task(void *arg, int pending) { #define ISCAN_REP (ISCAN_MINDWELL | ISCAN_DISCARD) struct ieee80211_scan_state *ss = (struct ieee80211_scan_state *) arg; struct ieee80211vap *vap = ss->ss_vap; struct ieee80211com *ic = ss->ss_ic; struct ieee80211_channel *chan; unsigned long maxdwell, scanend; int scandone = 0; IEEE80211_LOCK(ic); if (vap == NULL || (ic->ic_flags & IEEE80211_F_SCAN) == 0 || (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_ABORT)) { /* Cancelled before we started */ goto done; } if (ss->ss_next == ss->ss_last) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: no channels to scan\n", __func__); scandone = 1; goto done; } if (vap->iv_opmode == IEEE80211_M_STA && vap->iv_state == IEEE80211_S_RUN) { if ((vap->iv_bss->ni_flags & IEEE80211_NODE_PWR_MGT) == 0) { /* Enable station power save mode */ vap->iv_sta_ps(vap, 1); /* * Use an 1ms delay so the null data frame has a chance * to go out. * XXX Should use M_TXCB mechanism to eliminate this. */ cv_timedwait(&SCAN_PRIVATE(ss)->ss_scan_cv, IEEE80211_LOCK_OBJ(ic), msecs_to_ticks(1)); if (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_ABORT) goto done; } } scanend = ticks + SCAN_PRIVATE(ss)->ss_duration; /* XXX scan state can change! Re-validate scan state! */ IEEE80211_UNLOCK(ic); ic->ic_scan_start(ic); /* notify driver */ IEEE80211_LOCK(ic); for (;;) { scandone = (ss->ss_next >= ss->ss_last) || (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL) != 0; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: loop start; scandone=%d\n", __func__, scandone); if (scandone || (ss->ss_flags & IEEE80211_SCAN_GOTPICK) || (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_ABORT) || time_after(ticks + ss->ss_mindwell, scanend)) break; chan = ss->ss_chans[ss->ss_next++]; /* * Watch for truncation due to the scan end time. */ if (time_after(ticks + ss->ss_maxdwell, scanend)) maxdwell = scanend - ticks; else maxdwell = ss->ss_maxdwell; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: chan %3d%c -> %3d%c [%s, dwell min %lums max %lums]\n", __func__, ieee80211_chan2ieee(ic, ic->ic_curchan), ieee80211_channel_type_char(ic->ic_curchan), ieee80211_chan2ieee(ic, chan), ieee80211_channel_type_char(chan), (ss->ss_flags & IEEE80211_SCAN_ACTIVE) && (chan->ic_flags & IEEE80211_CHAN_PASSIVE) == 0 ? "active" : "passive", ticks_to_msecs(ss->ss_mindwell), ticks_to_msecs(maxdwell)); /* * Potentially change channel and phy mode. */ ic->ic_curchan = chan; ic->ic_rt = ieee80211_get_ratetable(chan); IEEE80211_UNLOCK(ic); /* * Perform the channel change and scan unlocked so the driver * may sleep. Once set_channel returns the hardware has * completed the channel change. */ ic->ic_set_channel(ic); ieee80211_radiotap_chan_change(ic); /* * Scan curchan. Drivers for "intelligent hardware" * override ic_scan_curchan to tell the device to do * the work. Otherwise we manage the work outselves; * sending a probe request (as needed), and arming the * timeout to switch channels after maxdwell ticks. * * scan_curchan should only pause for the time required to * prepare/initiate the hardware for the scan (if at all), the * below condvar is used to sleep for the channels dwell time * and allows it to be signalled for abort. */ ic->ic_scan_curchan(ss, maxdwell); IEEE80211_LOCK(ic); /* XXX scan state can change! Re-validate scan state! */ SCAN_PRIVATE(ss)->ss_chanmindwell = ticks + ss->ss_mindwell; /* clear mindwell lock and initial channel change flush */ SCAN_PRIVATE(ss)->ss_iflags &= ~ISCAN_REP; if ((SCAN_PRIVATE(ss)->ss_iflags & (ISCAN_CANCEL|ISCAN_ABORT))) continue; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: waiting\n", __func__); /* Wait to be signalled to scan the next channel */ cv_wait(&SCAN_PRIVATE(ss)->ss_scan_cv, IEEE80211_LOCK_OBJ(ic)); } IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: out\n", __func__); if (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_ABORT) goto done; IEEE80211_UNLOCK(ic); ic->ic_scan_end(ic); /* notify driver */ IEEE80211_LOCK(ic); /* XXX scan state can change! Re-validate scan state! */ /* * Since a cancellation may have occured during one of the * driver calls (whilst unlocked), update scandone. */ if (scandone == 0 && ((SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL) != 0)) { /* XXX printf? */ if_printf(vap->iv_ifp, "%s: OOPS! scan cancelled during driver call (1)!\n", __func__); scandone = 1; } /* * Record scan complete time. Note that we also do * this when canceled so any background scan will * not be restarted for a while. */ if (scandone) ic->ic_lastscan = ticks; /* return to the bss channel */ if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && ic->ic_curchan != ic->ic_bsschan) { ieee80211_setupcurchan(ic, ic->ic_bsschan); IEEE80211_UNLOCK(ic); ic->ic_set_channel(ic); ieee80211_radiotap_chan_change(ic); IEEE80211_LOCK(ic); } /* clear internal flags and any indication of a pick */ SCAN_PRIVATE(ss)->ss_iflags &= ~ISCAN_REP; ss->ss_flags &= ~IEEE80211_SCAN_GOTPICK; /* * If not canceled and scan completed, do post-processing. * If the callback function returns 0, then it wants to * continue/restart scanning. Unfortunately we needed to * notify the driver to end the scan above to avoid having * rx frames alter the scan candidate list. */ if ((SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL) == 0 && !ss->ss_ops->scan_end(ss, vap) && (ss->ss_flags & IEEE80211_SCAN_ONCE) == 0 && time_before(ticks + ss->ss_mindwell, scanend)) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: done, restart " "[ticks %u, dwell min %lu scanend %lu]\n", __func__, ticks, ss->ss_mindwell, scanend); ss->ss_next = 0; /* reset to begining */ if (ss->ss_flags & IEEE80211_SCAN_ACTIVE) vap->iv_stats.is_scan_active++; else vap->iv_stats.is_scan_passive++; ss->ss_ops->scan_restart(ss, vap); /* XXX? */ ieee80211_runtask(ic, &SCAN_PRIVATE(ss)->ss_scan_task); IEEE80211_UNLOCK(ic); return; } /* past here, scandone is ``true'' if not in bg mode */ if ((ss->ss_flags & IEEE80211_SCAN_BGSCAN) == 0) scandone = 1; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: %s, [ticks %u, dwell min %lu scanend %lu]\n", __func__, scandone ? "done" : "stopped", ticks, ss->ss_mindwell, scanend); /* * Since a cancellation may have occured during one of the * driver calls (whilst unlocked), update scandone. */ if (scandone == 0 && ((SCAN_PRIVATE(ss)->ss_iflags & ISCAN_CANCEL) != 0)) { /* XXX printf? */ if_printf(vap->iv_ifp, "%s: OOPS! scan cancelled during driver call (2)!\n", __func__); scandone = 1; } /* * Clear the SCAN bit first in case frames are * pending on the station power save queue. If * we defer this then the dispatch of the frames * may generate a request to cancel scanning. */ done: ic->ic_flags &= ~IEEE80211_F_SCAN; /* * Drop out of power save mode when a scan has * completed. If this scan was prematurely terminated * because it is a background scan then don't notify * the ap; we'll either return to scanning after we * receive the beacon frame or we'll drop out of power * save mode because the beacon indicates we have frames * waiting for us. */ if (scandone) { vap->iv_sta_ps(vap, 0); if (ss->ss_next >= ss->ss_last) { ieee80211_notify_scan_done(vap); ic->ic_flags_ext &= ~IEEE80211_FEXT_BGSCAN; } } SCAN_PRIVATE(ss)->ss_iflags &= ~(ISCAN_CANCEL|ISCAN_ABORT); ss->ss_flags &= ~(IEEE80211_SCAN_ONCE | IEEE80211_SCAN_PICK1ST); IEEE80211_UNLOCK(ic); #undef ISCAN_REP } /* * Process a beacon or probe response frame. */ static void ieee80211_swscan_add_scan(struct ieee80211vap *vap, struct ieee80211_channel *curchan, const struct ieee80211_scanparams *sp, const struct ieee80211_frame *wh, int subtype, int rssi, int noise) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_scan_state *ss = ic->ic_scan; /* XXX locking */ /* * Frames received during startup are discarded to avoid * using scan state setup on the initial entry to the timer * callback. This can occur because the device may enable * rx prior to our doing the initial channel change in the * timer routine. */ if (SCAN_PRIVATE(ss)->ss_iflags & ISCAN_DISCARD) return; #ifdef IEEE80211_DEBUG if (ieee80211_msg_scan(vap) && (ic->ic_flags & IEEE80211_F_SCAN)) ieee80211_scan_dump_probe_beacon(subtype, 1, wh->i_addr2, sp, rssi); #endif if (ss->ss_ops != NULL && ss->ss_ops->scan_add(ss, curchan, sp, wh, subtype, rssi, noise)) { /* * If we've reached the min dwell time terminate * the timer so we'll switch to the next channel. */ if ((SCAN_PRIVATE(ss)->ss_iflags & ISCAN_MINDWELL) == 0 && time_after_eq(ticks, SCAN_PRIVATE(ss)->ss_chanmindwell)) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: chan %3d%c min dwell met (%u > %lu)\n", __func__, ieee80211_chan2ieee(ic, ic->ic_curchan), ieee80211_channel_type_char(ic->ic_curchan), ticks, SCAN_PRIVATE(ss)->ss_chanmindwell); SCAN_PRIVATE(ss)->ss_iflags |= ISCAN_MINDWELL; /* * NB: trigger at next clock tick or wait for the * hardware. */ ic->ic_scan_mindwell(ss); } } } static struct ieee80211_scan_methods swscan_methods = { .sc_attach = ieee80211_swscan_attach, .sc_detach = ieee80211_swscan_detach, .sc_vattach = ieee80211_swscan_vattach, .sc_vdetach = ieee80211_swscan_vdetach, .sc_set_scan_duration = ieee80211_swscan_set_scan_duration, .sc_start_scan = ieee80211_swscan_start_scan, .sc_check_scan = ieee80211_swscan_check_scan, .sc_bg_scan = ieee80211_swscan_bg_scan, .sc_cancel_scan = ieee80211_swscan_cancel_scan, .sc_cancel_anyscan = ieee80211_swscan_cancel_anyscan, .sc_scan_next = ieee80211_swscan_scan_next, .sc_scan_done = ieee80211_swscan_scan_done, .sc_scan_probe_curchan = ieee80211_swscan_probe_curchan, .sc_add_scan = ieee80211_swscan_add_scan }; /* * Default scan attach method. */ void ieee80211_swscan_attach(struct ieee80211com *ic) { struct scan_state *ss; /* * Setup the default methods */ ic->ic_scan_methods = &swscan_methods; /* Allocate initial scan state */ ss = (struct scan_state *) IEEE80211_MALLOC(sizeof(struct scan_state), M_80211_SCAN, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (ss == NULL) { ic->ic_scan = NULL; return; } callout_init_mtx(&ss->ss_scan_timer, IEEE80211_LOCK_OBJ(ic), 0); cv_init(&ss->ss_scan_cv, "scan"); TASK_INIT(&ss->ss_scan_task, 0, scan_task, ss); ic->ic_scan = &ss->base; ss->base.ss_ic = ic; ic->ic_scan_curchan = scan_curchan; ic->ic_scan_mindwell = scan_mindwell; } diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 7422fbd7745d..9b3f7d839ce5 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -1,1195 +1,1195 @@ /*- * Copyright (c) 2010-2011 Alexander V. Chernikov * Copyright (c) 2004-2005 Gleb Smirnoff * Copyright (c) 2001-2003 Roman V. Palagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NBUCKETS (65536) /* must be power of 2 */ /* This hash is for TCP or UDP packets. */ #define FULL_HASH(addr1, addr2, port1, port2) \ (((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) ^ \ port1 ^ htons(port2)) & \ (NBUCKETS - 1)) /* This hash is for all other IP packets. */ #define ADDR_HASH(addr1, addr2) \ ((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) & \ (NBUCKETS - 1)) /* Macros to shorten logical constructions */ /* XXX: priv must exist in namespace */ #define INACTIVE(fle) (time_uptime - fle->f.last > priv->nfinfo_inact_t) #define AGED(fle) (time_uptime - fle->f.first > priv->nfinfo_act_t) #define ISFREE(fle) (fle->f.packets == 0) /* * 4 is a magical number: statistically number of 4-packet flows is * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case * of reachable host and 4-packet otherwise. */ #define SMALL(fle) (fle->f.packets <= 4) MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); static int export_add(item_p, struct flow_entry *); static int export_send(priv_p, fib_export_p, item_p, int); static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *, int, uint8_t, uint8_t); #ifdef INET6 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *, int, uint8_t, uint8_t); #endif static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int); /* * Generate hash for a given flow record. * * FIB is not used here, because: * most VRFS will carry public IPv4 addresses which are unique even * without FIB private addresses can overlap, but this is worked out * via flow_rec bcmp() containing fib id. In IPv6 world addresses are * all globally unique (it's not fully true, there is FC00::/7 for example, * but chances of address overlap are MUCH smaller) */ static inline uint32_t ip_hash(struct flow_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr, r->r_sport, r->r_dport); default: return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr); } } #ifdef INET6 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */ static inline uint32_t ip6_hash(struct flow6_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport, r->r_dport); default: return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3]); } } #endif /* * Detach export datagram from priv, if there is any. * If there is no, allocate a new one. */ static item_p get_export_dgram(priv_p priv, fib_export_p fe) { item_p item = NULL; mtx_lock(&fe->export_mtx); if (fe->exp.item != NULL) { item = fe->exp.item; fe->exp.item = NULL; } mtx_unlock(&fe->export_mtx); if (item == NULL) { struct netflow_v5_export_dgram *dgram; struct mbuf *m; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (NULL); item = ng_package_data(m, NG_NOFLAGS); if (item == NULL) return (NULL); dgram = mtod(m, struct netflow_v5_export_dgram *); dgram->header.count = 0; dgram->header.version = htons(NETFLOW_V5); dgram->header.pad = 0; } return (item); } /* * Re-attach incomplete datagram back to priv. * If there is already another one, then send incomplete. */ static void return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags) { /* * It may happen on SMP, that some thread has already * put its item there, in this case we bail out and * send what we have to collector. */ mtx_lock(&fe->export_mtx); if (fe->exp.item == NULL) { fe->exp.item = item; mtx_unlock(&fe->export_mtx); } else { mtx_unlock(&fe->export_mtx); export_send(priv, fe, item, flags); } } /* * The flow is over. Call export_add() and free it. If datagram is * full, then call export_send(). */ static void expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags) { struct netflow_export_item exp; uint16_t version = fle->f.version; if ((priv->export != NULL) && (version == IPVERSION)) { exp.item = get_export_dgram(priv, fe); if (exp.item == NULL) { priv->nfinfo_export_failed++; if (priv->export9 != NULL) priv->nfinfo_export9_failed++; /* fle definitely contains IPv4 flow. */ uma_zfree_arg(priv->zone, fle, priv); return; } if (export_add(exp.item, fle) > 0) export_send(priv, fe, exp.item, flags); else return_export_dgram(priv, fe, exp.item, NG_QUEUE); } if (priv->export9 != NULL) { exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt); if (exp.item9 == NULL) { priv->nfinfo_export9_failed++; if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif else panic("ng_netflow: Unknown IP proto: %d", version); return; } if (export9_add(exp.item9, exp.item9_opt, fle) > 0) export9_send(priv, fe, exp.item9, exp.item9_opt, flags); else return_export9_dgram(priv, fe, exp.item9, exp.item9_opt, NG_QUEUE); } if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif } /* Get a snapshot of node statistics */ void ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i) { i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes); i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets); i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6); i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6); i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes); i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets); i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6); i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6); i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp); i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp); i->nfinfo_used = uma_zone_get_cur(priv->zone); #ifdef INET6 i->nfinfo_used6 = uma_zone_get_cur(priv->zone6); #endif i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed; i->nfinfo_export_failed = priv->nfinfo_export_failed; i->nfinfo_export9_failed = priv->nfinfo_export9_failed; i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf; i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs; i->nfinfo_inact_t = priv->nfinfo_inact_t; i->nfinfo_act_t = priv->nfinfo_act_t; } /* * Insert a record into defined slot. * * First we get for us a free flow entry, then fill in all * possible fields in it. * * TODO: consider dropping hash mutex while filling in datagram, * as this was done in previous version. Need to test & profile * to be sure. */ static int hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow_entry *fle; struct sockaddr_in sin; struct rtentry *rt; mtx_assert(&hsh->mtx, MA_OWNED); fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT); if (fle == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle->f.version = IPVERSION; bcopy(r, &fle->f.r, sizeof(struct flow_rec)); fle->f.bytes = plen; fle->f.packets = 1; fle->f.tcp_flags = tcp_flags; fle->f.first = fle->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { bzero(&sin, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_dst; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); if (rt != NULL) { fle->f.fle_o_ifx = rt->rt_ifp->if_index; if (rt->rt_flags & RTF_GATEWAY && rt->rt_gateway->sa_family == AF_INET) fle->f.next_hop = ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr; if (rt_mask(rt)) fle->f.dst_mask = bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr); else if (rt->rt_flags & RTF_HOST) /* Give up. We can't determine mask :( */ fle->f.dst_mask = 32; RTFREE_LOCKED(rt); } } /* Do route lookup on source address, to fill in src_mask. */ if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { bzero(&sin, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = fle->f.r.r_src; rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib); if (rt != NULL) { if (rt_mask(rt)) fle->f.src_mask = bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr); else if (rt->rt_flags & RTF_HOST) /* Give up. We can't determine mask :( */ fle->f.src_mask = 32; RTFREE_LOCKED(rt); } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); return (0); } #ifdef INET6 /* XXX: make normal function, instead of.. */ #define ipv6_masklen(x) bitcount32((x).__u6_addr.__u6_addr32[0]) + \ bitcount32((x).__u6_addr.__u6_addr32[1]) + \ bitcount32((x).__u6_addr.__u6_addr32[2]) + \ bitcount32((x).__u6_addr.__u6_addr32[3]) #define RT_MASK6(x) (ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr)) static int hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, int plen, uint8_t flags, uint8_t tcp_flags) { struct flow6_entry *fle6; struct sockaddr_in6 *src, *dst; struct rtentry *rt; struct route_in6 rin6; mtx_assert(&hsh6->mtx, MA_OWNED); fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT); if (fle6 == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle6->f.version = IP6VERSION; bcopy(r, &fle6->f.r, sizeof(struct flow6_rec)); fle6->f.bytes = plen; fle6->f.packets = 1; fle6->f.tcp_flags = tcp_flags; fle6->f.first = fle6->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { bzero(&rin6, sizeof(struct route_in6)); dst = (struct sockaddr_in6 *)&rin6.ro_dst; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = r->dst.r_dst6; rin6.ro_rt = rtalloc1_fib((struct sockaddr *)dst, 0, 0, r->fib); if (rin6.ro_rt != NULL) { rt = rin6.ro_rt; fle6->f.fle_o_ifx = rt->rt_ifp->if_index; if (rt->rt_flags & RTF_GATEWAY && rt->rt_gateway->sa_family == AF_INET6) fle6->f.n.next_hop6 = ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr; if (rt_mask(rt)) fle6->f.dst_mask = RT_MASK6(rt); else fle6->f.dst_mask = 128; RTFREE_LOCKED(rt); } } - if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { + if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { /* Do route lookup on source address, to fill in src_mask. */ bzero(&rin6, sizeof(struct route_in6)); src = (struct sockaddr_in6 *)&rin6.ro_dst; src->sin6_len = sizeof(struct sockaddr_in6); src->sin6_family = AF_INET6; src->sin6_addr = r->src.r_src6; rin6.ro_rt = rtalloc1_fib((struct sockaddr *)src, 0, 0, r->fib); if (rin6.ro_rt != NULL) { rt = rin6.ro_rt; if (rt_mask(rt)) fle6->f.src_mask = RT_MASK6(rt); else fle6->f.src_mask = 128; RTFREE_LOCKED(rt); } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash); return (0); } #undef ipv6_masklen #undef RT_MASK6 #endif /* * Non-static functions called from ng_netflow.c */ /* Allocate memory and set up flow cache */ void ng_netflow_cache_init(priv_p priv) { struct flow_hash_entry *hsh; int i; /* Initialize cache UMA zone. */ priv->zone = uma_zcreate("NetFlow IPv4 cache", sizeof(struct flow_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone, CACHESIZE); #ifdef INET6 priv->zone6 = uma_zcreate("NetFlow IPv6 cache", sizeof(struct flow6_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone6, CACHESIZE); #endif /* Allocate hash. */ priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #ifdef INET6 /* Allocate hash. */ priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #endif priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets = counter_u64_alloc(M_WAITOK); priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK); priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK); ng_netflow_v9_cache_init(priv); CTR0(KTR_NET, "ng_netflow startup()"); } /* Initialize new FIB table for v5 and v9 */ int ng_netflow_fib_init(priv_p priv, int fib) { fib_export_p fe = priv_to_fib(priv, fib); CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib); if (fe != NULL) return (0); if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF); mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF); fe->fib = fib; fe->domain_id = fib; if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib], (uintptr_t)NULL, (uintptr_t)fe) == 0) { /* FIB already set up by other ISR */ CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p", fib, fe, priv_to_fib(priv, fib)); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } else { /* Increase counter for statistics */ CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)", fib, fe, priv_to_fib(priv, fib)); priv->nfinfo_alloc_fibs++; } return (0); } /* Free all flow cache memory. Called from node close method. */ void ng_netflow_cache_flush(priv_p priv) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct netflow_export_item exp; fib_export_p fe; int i; bzero(&exp, sizeof(exp)); /* * We are going to free probably billable data. * Expire everything before freeing it. * No locking is required since callout is already drained. */ for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #ifdef INET6 for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #endif uma_zdestroy(priv->zone); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash != NULL) free(priv->hash, M_NETFLOW_HASH); #ifdef INET6 uma_zdestroy(priv->zone6); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash6 != NULL) free(priv->hash6, M_NETFLOW_HASH); #endif for (i = 0; i < priv->maxfibs; i++) { if ((fe = priv_to_fib(priv, i)) == NULL) continue; if (fe->exp.item != NULL) export_send(priv, fe, fe->exp.item, NG_QUEUE); if (fe->exp.item9 != NULL) export9_send(priv, fe, fe->exp.item9, fe->exp.item9_opt, NG_QUEUE); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } counter_u64_free(priv->nfinfo_bytes); counter_u64_free(priv->nfinfo_packets); counter_u64_free(priv->nfinfo_bytes6); counter_u64_free(priv->nfinfo_packets6); counter_u64_free(priv->nfinfo_sbytes); counter_u64_free(priv->nfinfo_spackets); counter_u64_free(priv->nfinfo_sbytes6); counter_u64_free(priv->nfinfo_spackets6); counter_u64_free(priv->nfinfo_act_exp); counter_u64_free(priv->nfinfo_inact_exp); ng_netflow_v9_cache_flush(priv); } /* Insert packet from into flow cache. */ int ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct flow_rec r; int hlen, plen; int error = 0; uint16_t eproto; uint8_t tcp_flags = 0; bzero(&r, sizeof(r)); if (ip->ip_v != IPVERSION) return (EINVAL); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) return (EINVAL); eproto = ETHERTYPE_IP; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V4_L4; r.r_src = ip->ip_src; r.r_dst = ip->ip_dst; r.fib = fe->fib; plen = ntohs(ip->ip_len); r.r_ip_p = ip->ip_p; r.r_tos = ip->ip_tos; r.r_i_ifx = src_if_index; /* * XXX NOTE: only first fragment of fragmented TCP, UDP and * ICMP packet will be recorded with proper s_port and d_port. * Following fragments will be recorded simply as IP packet with * ip_proto = ip->ip_p and s_port, d_port set to zero. * I know, it looks like bug. But I don't want to re-implement * ip packet assebmling here. Anyway, (in)famous trafd works this way - * and nobody complains yet :) */ if ((ip->ip_off & htons(IP_OFFMASK)) == 0) switch(r.r_ip_p) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)((caddr_t )ip + hlen); r.r_sport = tcp->th_sport; r.r_dport = tcp->th_dport; tcp_flags = tcp->th_flags; break; } case IPPROTO_UDP: r.r_ports = *(uint32_t *)((caddr_t )ip + hlen); break; } counter_u64_add(priv->nfinfo_packets, 1); counter_u64_add(priv->nfinfo_bytes, plen); /* Find hash slot. */ hsh = &priv->hash[ip_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0) break; if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle) { /* An existent entry. */ fle->f.bytes += plen; fle->f.packets ++; fle->f.tcp_flags |= tcp_flags; fle->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #ifdef INET6 /* Insert IPv6 packet from into flow cache. */ int ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle = NULL, *fle1; struct flow6_entry *fle6; struct flow_hash_entry *hsh; struct flow6_rec r; int plen; int error = 0; uint8_t tcp_flags = 0; /* check version */ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) return (EINVAL); bzero(&r, sizeof(r)); r.src.r_src6 = ip6->ip6_src; r.dst.r_dst6 = ip6->ip6_dst; r.fib = fe->fib; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V6_L4; plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); #if 0 /* XXX: set DSCP/CoS value */ r.r_tos = ip->ip_tos; #endif if ((flags & NG_NETFLOW_IS_FRAG) == 0) { switch(upper_proto) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)upper_ptr; r.r_ports = *(uint32_t *)upper_ptr; tcp_flags = tcp->th_flags; break; } case IPPROTO_UDP: case IPPROTO_SCTP: r.r_ports = *(uint32_t *)upper_ptr; break; } } r.r_ip_p = upper_proto; r.r_i_ifx = src_if_index; counter_u64_add(priv->nfinfo_packets6, 1); counter_u64_add(priv->nfinfo_bytes6, plen); /* Find hash slot. */ hsh = &priv->hash6[ip6_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (fle->f.version != IP6VERSION) continue; fle6 = (struct flow6_entry *)fle; if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0) break; if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle != NULL) { /* An existent entry. */ fle6 = (struct flow6_entry *)fle; fle6->f.bytes += plen; fle6->f.packets ++; fle6->f.tcp_flags |= tcp_flags; fle6->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) || (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #endif /* * Return records from cache to userland. * * TODO: matching particular IP should be done in kernel, here. */ int ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req, struct ngnf_show_header *resp) { struct flow_hash_entry *hsh; struct flow_entry *fle; struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1); #ifdef INET6 struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1); #endif int i, max; i = req->hash_id; if (i > NBUCKETS-1) return (EINVAL); #ifdef INET6 if (req->version == 6) { resp->version = 6; hsh = priv->hash6 + i; max = NREC6_AT_ONCE; } else #endif if (req->version == 4) { resp->version = 4; hsh = priv->hash + i; max = NREC_AT_ONCE; } else return (EINVAL); /* * We will transfer not more than NREC_AT_ONCE. More data * will come in next message. * We send current hash index and current record number in list * to userland, and userland should return it back to us. * Then, we will restart with new entry. * * The resulting cache snapshot can be inaccurate if flow expiration * is taking place on hash item between userland data requests for * this hash item id. */ resp->nentries = 0; for (; i < NBUCKETS; hsh++, i++) { int list_id; if (mtx_trylock(&hsh->mtx) == 0) { /* * Requested hash index is not available, * relay decision to skip or re-request data * to userland. */ resp->hash_id = i; resp->list_id = 0; return (0); } list_id = 0; TAILQ_FOREACH(fle, &hsh->head, fle_hash) { if (hsh->mtx.mtx_lock & MTX_CONTESTED) { resp->hash_id = i; resp->list_id = list_id; mtx_unlock(&hsh->mtx); return (0); } list_id++; /* Search for particular record in list. */ if (req->list_id > 0) { if (list_id < req->list_id) continue; /* Requested list position found. */ req->list_id = 0; } #ifdef INET6 if (req->version == 6) { struct flow6_entry *fle6; fle6 = (struct flow6_entry *)fle; bcopy(&fle6->f, data6 + resp->nentries, sizeof(fle6->f)); } else #endif bcopy(&fle->f, data + resp->nentries, sizeof(fle->f)); resp->nentries++; if (resp->nentries == max) { resp->hash_id = i; /* * If it was the last item in list * we simply skip to next hash_id. */ resp->list_id = list_id + 1; mtx_unlock(&hsh->mtx); return (0); } } mtx_unlock(&hsh->mtx); } resp->hash_id = resp->list_id = 0; return (0); } /* We have full datagram in privdata. Send it to export hook. */ static int export_send(priv_p priv, fib_export_p fe, item_p item, int flags) { struct mbuf *m = NGI_M(item); struct netflow_v5_export_dgram *dgram = mtod(m, struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct timespec ts; int error = 0; /* Fill mbuf header. */ m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) * header->count + sizeof(struct netflow_v5_header); /* Fill export header. */ header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); getnanotime(&ts); header->unix_secs = htonl(ts.tv_sec); header->unix_nsecs = htonl(ts.tv_nsec); header->engine_type = 0; header->engine_id = fe->domain_id; header->pad = 0; header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq, header->count)); header->count = htons(header->count); if (priv->export != NULL) NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags); else NG_FREE_ITEM(item); return (error); } /* Add export record to dgram. */ static int export_add(item_p item, struct flow_entry *fle) { struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item), struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct netflow_v5_record *rec; rec = &dgram->r[header->count]; header->count ++; KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS, ("ng_netflow: export too big")); /* Fill in export record. */ rec->src_addr = fle->f.r.r_src.s_addr; rec->dst_addr = fle->f.r.r_dst.s_addr; rec->next_hop = fle->f.next_hop.s_addr; rec->i_ifx = htons(fle->f.fle_i_ifx); rec->o_ifx = htons(fle->f.fle_o_ifx); rec->packets = htonl(fle->f.packets); rec->octets = htonl(fle->f.bytes); rec->first = htonl(MILLIUPTIME(fle->f.first)); rec->last = htonl(MILLIUPTIME(fle->f.last)); rec->s_port = fle->f.r.r_sport; rec->d_port = fle->f.r.r_dport; rec->flags = fle->f.tcp_flags; rec->prot = fle->f.r.r_ip_p; rec->tos = fle->f.r.r_tos; rec->dst_mask = fle->f.dst_mask; rec->src_mask = fle->f.src_mask; rec->pad1 = 0; rec->pad2 = 0; /* Not supported fields. */ rec->src_as = rec->dst_as = 0; if (header->count == NETFLOW_V5_MAX_RECORDS) return (1); /* end of datagram */ else return (0); } /* Periodic flow expiry run. */ void ng_netflow_expire(void *arg) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; priv_p priv = (priv_p )arg; int used, i; /* * Going through all the cache. */ used = uma_zone_get_cur(priv->zone); for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) { /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ if (hsh->mtx.mtx_lock & MTX_CONTESTED) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle)) break; if ((INACTIVE(fle) && (SMALL(fle) || (used > (NBUCKETS*2)))) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #ifdef INET6 used = uma_zone_get_cur(priv->zone6); for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) { struct flow6_entry *fle6; /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { fle6 = (struct flow6_entry *)fle; /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ if (hsh->mtx.mtx_lock & MTX_CONTESTED) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle6)) break; if ((INACTIVE(fle6) && (SMALL(fle6) || (used > (NBUCKETS*2)))) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #endif /* Schedule next expire. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } diff --git a/sys/netinet/in.c b/sys/netinet/in.c index c30c1f3fdf88..80b14483654a 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -1,1422 +1,1432 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2001 WIDE Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *); static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *); static void in_socktrim(struct sockaddr_in *); static void in_purgemaddrs(struct ifnet *); static VNET_DEFINE(int, nosameprefix); #define V_nosameprefix VNET(nosameprefix) SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nosameprefix), 0, "Refuse to create same prefixes on different interfaces"); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) static struct sx in_control_sx; SX_SYSINIT(in_control_sx, &in_control_sx, "in_control"); /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). */ int in_localaddr(struct in_addr in) { struct rm_priotracker in_ifa_tracker; register u_long i = ntohl(in.s_addr); register struct in_ifaddr *ia; IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((i & ia->ia_subnetmask) == ia->ia_subnet) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (1); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in_localip(struct in_addr in) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) { if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (1); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (0); } /* * Return 1 if an internet address is configured on an interface. */ int in_ifhasaddr(struct ifnet *ifp, struct in_addr in) { struct ifaddr *ifa; struct in_ifaddr *ia; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = (struct in_ifaddr *)ifa; if (ia->ia_addr.sin_addr.s_addr == in.s_addr) { IF_ADDR_RUNLOCK(ifp); return (1); } } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Return a reference to the interface address which is different to * the supplied one but with same IP address value. */ static struct in_ifaddr * in_localip_more(struct in_ifaddr *ia) { struct rm_priotracker in_ifa_tracker; in_addr_t in = IA_SIN(ia)->sin_addr.s_addr; struct in_ifaddr *it; IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(it, INADDR_HASH(in), ia_hash) { if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) { ifa_ref(&it->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (it); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (NULL); } /* * Determine whether an IP address is in a reserved set of addresses * that may not be forwarded, or whether datagrams to that destination * may be forwarded. */ int in_canforward(struct in_addr in) { register u_long i = ntohl(in.s_addr); register u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i)) return (0); if (IN_CLASSA(i)) { net = i & IN_CLASSA_NET; if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) return (0); } return (1); } /* * Trim a mask in a sockaddr */ static void in_socktrim(struct sockaddr_in *ap) { register char *cplim = (char *) &ap->sin_addr; register char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) if (*cp) { (ap)->sin_len = cp - (char *) (ap) + 1; break; } } /* * Generic internet control operations (ioctl's). */ int in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr; struct ifaddr *ifa; struct in_ifaddr *ia; int error; if (ifp == NULL) return (EADDRNOTAVAIL); /* * Filter out 4 ioctls we implement directly. Forward the rest * to specific functions and ifp->if_ioctl(). */ switch (cmd) { case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFDSTADDR: case SIOCGIFNETMASK: break; case SIOCDIFADDR: sx_xlock(&in_control_sx); error = in_difaddr_ioctl(data, ifp, td); sx_xunlock(&in_control_sx); return (error); case OSIOCAIFADDR: /* 9.x compat */ case SIOCAIFADDR: sx_xlock(&in_control_sx); error = in_aifaddr_ioctl(cmd, data, ifp, td); sx_xunlock(&in_control_sx); return (error); case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: /* We no longer support that old commands. */ return (EINVAL); default: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); return ((*ifp->if_ioctl)(ifp, cmd, data)); } if (addr->sin_addr.s_addr != INADDR_ANY && prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0) return (EADDRNOTAVAIL); /* * Find address for this interface, if it exists. If an * address was specified, find that one instead of the * first one on the interface, if possible. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = (struct in_ifaddr *)ifa; if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr) break; } if (ifa == NULL) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { ia = (struct in_ifaddr *)ifa; if (prison_check_ip4(td->td_ucred, &ia->ia_addr.sin_addr) == 0) break; } if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EADDRNOTAVAIL); } error = 0; switch (cmd) { case SIOCGIFADDR: *addr = ia->ia_addr; break; case SIOCGIFBRDADDR: if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EINVAL; break; } *addr = ia->ia_broadaddr; break; case SIOCGIFDSTADDR: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; break; } *addr = ia->ia_dstaddr; break; case SIOCGIFNETMASK: *addr = ia->ia_sockmask; break; } IF_ADDR_RUNLOCK(ifp); return (error); } static int in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { const struct in_aliasreq *ifra = (struct in_aliasreq *)data; const struct sockaddr_in *addr = &ifra->ifra_addr; const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr; const struct sockaddr_in *mask = &ifra->ifra_mask; const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr; const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0; struct ifaddr *ifa; struct in_ifaddr *ia; bool iaIsFirst; int error = 0; error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); /* * ifra_addr must be present and be of INET family. * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional. */ if (addr->sin_len != sizeof(struct sockaddr_in) || addr->sin_family != AF_INET) return (EINVAL); if (broadaddr->sin_len != 0 && (broadaddr->sin_len != sizeof(struct sockaddr_in) || broadaddr->sin_family != AF_INET)) return (EINVAL); if (mask->sin_len != 0 && (mask->sin_len != sizeof(struct sockaddr_in) || mask->sin_family != AF_INET)) return (EINVAL); if ((ifp->if_flags & IFF_POINTOPOINT) && (dstaddr->sin_len != sizeof(struct sockaddr_in) || dstaddr->sin_addr.s_addr == INADDR_ANY)) return (EDESTADDRREQ); if (vhid > 0 && carp_attach_p == NULL) return (EPROTONOSUPPORT); /* * See whether address already exist. */ iaIsFirst = true; ia = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in_ifaddr *it; if (ifa->ifa_addr->sa_family != AF_INET) continue; it = (struct in_ifaddr *)ifa; iaIsFirst = false; if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0) ia = it; } IF_ADDR_RUNLOCK(ifp); if (ia != NULL) (void )in_difaddr_ioctl(data, ifp, td); ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK); ia = (struct in_ifaddr *)ifa; ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; ia->ia_ifp = ifp; ia->ia_addr = *addr; if (mask->sin_len != 0) { ia->ia_sockmask = *mask; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); } else { in_addr_t i = ntohl(addr->sin_addr.s_addr); /* * Be compatible with network classes, if netmask isn't * supplied, guess it based on classes. */ if (IN_CLASSA(i)) ia->ia_subnetmask = IN_CLASSA_NET; else if (IN_CLASSB(i)) ia->ia_subnetmask = IN_CLASSB_NET; else ia->ia_subnetmask = IN_CLASSC_NET; ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask); } ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask; in_socktrim(&ia->ia_sockmask); if (ifp->if_flags & IFF_BROADCAST) { if (broadaddr->sin_len != 0) { ia->ia_broadaddr = *broadaddr; } else if (ia->ia_subnetmask == IN_RFC3021_MASK) { ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST; ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); ia->ia_broadaddr.sin_family = AF_INET; } else { ia->ia_broadaddr.sin_addr.s_addr = htonl(ia->ia_subnet | ~ia->ia_subnetmask); ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in); ia->ia_broadaddr.sin_family = AF_INET; } } if (ifp->if_flags & IFF_POINTOPOINT) ia->ia_dstaddr = *dstaddr; /* XXXGL: rtinit() needs this strange assignment. */ if (ifp->if_flags & IFF_LOOPBACK) ia->ia_dstaddr = ia->ia_addr; if (vhid != 0) { error = (*carp_attach_p)(&ia->ia_ifa, vhid); if (error) return (error); } /* if_addrhead is already referenced by ifa_alloc() */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(ifa); /* in_ifaddrhead */ IN_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ if (ifp->if_ioctl != NULL) { error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); if (error) goto fail1; } /* * Add route for the network. */ if (vhid == 0) { int flags = RTF_UP; if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) flags |= RTF_HOST; error = in_addprefix(ia, flags); if (error) goto fail1; } /* * Add a loopback route to self. */ if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 && ia->ia_addr.sin_addr.s_addr != INADDR_ANY && !((ifp->if_flags & IFF_POINTOPOINT) && ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) { struct in_ifaddr *eia; eia = in_localip_more(ia); if (eia == NULL) { error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error) goto fail2; } else ifa_free(&eia->ia_ifa); } if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) { struct in_addr allhosts_addr; struct in_ifinfo *ii; ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); error = in_joingroup(ifp, &allhosts_addr, NULL, &ii->ii_allhosts); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); return (error); fail2: if (vhid == 0) (void )in_scrubprefix(ia, LLE_STATIC); fail1: if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (error); } static int in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) { const struct ifreq *ifr = (struct ifreq *)data; const struct sockaddr_in *addr = (const struct sockaddr_in *) &ifr->ifr_addr; struct ifaddr *ifa; struct in_ifaddr *ia; bool deleteAny, iaIsLast; int error; if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } if (addr->sin_len != sizeof(struct sockaddr_in) || addr->sin_family != AF_INET) deleteAny = true; else deleteAny = false; iaIsLast = true; ia = NULL; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in_ifaddr *it; if (ifa->ifa_addr->sa_family != AF_INET) continue; it = (struct in_ifaddr *)ifa; if (deleteAny && ia == NULL && (td == NULL || prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0)) ia = it; if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr && (td == NULL || prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)) ia = it; if (it != ia) iaIsLast = false; } if (ia == NULL) { IF_ADDR_WUNLOCK(ifp); return (EADDRNOTAVAIL); } TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, LLE_STATIC); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); /* * If this is the last IPv4 address configured on this * interface, leave the all-hosts group. * No state-change report need be transmitted. */ if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) { struct in_ifinfo *ii; ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); IN_MULTI_LOCK(); if (ii->ii_allhosts) { (void)in_leavegroup_locked(ii->ii_allhosts, NULL); ii->ii_allhosts = NULL; } IN_MULTI_UNLOCK(); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (0); } #define rtinitflags(x) \ ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \ ? RTF_HOST : 0) /* * Check if we have a route for the given prefix already or add one accordingly. */ int in_addprefix(struct in_ifaddr *target, int flags) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error; if ((flags & RTF_HOST) != 0) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } IN_IFADDR_RLOCK(&in_ifa_tracker); /* Look for an existing address with the same prefix, mask, and fib */ TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib) continue; /* * If we got a matching prefix route inserted by other * interface address, we are done here. */ if (ia->ia_flags & IFA_ROUTE) { #ifdef RADIX_MPATH if (ia->ia_addr.sin_addr.s_addr == target->ia_addr.sin_addr.s_addr) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (EEXIST); } else break; #endif if (V_nosameprefix) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (EEXIST); } else { int fibnum; fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum); IN_IFADDR_RUNLOCK(&in_ifa_tracker); return (0); } } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * No-one seem to have this prefix route, so we try to insert it. */ error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags); if (!error) target->ia_flags |= IFA_ROUTE; return (error); } /* * Removes either all lle entries for given @ia, or lle * corresponding to @ia address. */ static void in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags) { struct sockaddr_in addr, mask; struct sockaddr *saddr, *smask; struct ifnet *ifp; saddr = (struct sockaddr *)&addr; bzero(&addr, sizeof(addr)); addr.sin_len = sizeof(addr); addr.sin_family = AF_INET; smask = (struct sockaddr *)&mask; bzero(&mask, sizeof(mask)); mask.sin_len = sizeof(mask); mask.sin_family = AF_INET; mask.sin_addr.s_addr = ia->ia_subnetmask; ifp = ia->ia_ifp; if (all) { /* * Remove all L2 entries matching given prefix. * Convert address to host representation to avoid * doing this on every callback. ia_subnetmask is already * stored in host representation. */ addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr); lltable_prefix_free(AF_INET, saddr, smask, flags); } else { /* Remove interface address only */ addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr; lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr); } } /* * If there is no other address in the system that can serve a route to the * same prefix, remove the route. Hand over the route to the new address * otherwise. */ int in_scrubprefix(struct in_ifaddr *target, u_int flags) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error = 0; /* * Remove the loopback route to the interface address. */ if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) && !(target->ia_ifp->if_flags & IFF_LOOPBACK) && (flags & LLE_STATIC)) { struct in_ifaddr *eia; /* * XXXME: add fib-aware in_localip. * We definitely don't want to switch between * prefixes in different fibs. */ eia = in_localip_more(target); if (eia != NULL) { error = ifa_switch_loopback_route((struct ifaddr *)eia, (struct sockaddr *)&target->ia_addr); ifa_free(&eia->ia_ifa); } else { error = ifa_del_loopback_route((struct ifaddr *)target, (struct sockaddr *)&target->ia_addr); } } if (rtinitflags(target)) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } if ((target->ia_flags & IFA_ROUTE) == 0) { int fibnum; fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum); /* * Removing address from !IFF_UP interface or * prefix which exists on other interface (along with route). * No entries should exist here except target addr. * Given that, delete this entry only. */ in_scrubprefixlle(target, 0, flags); return (0); } IN_IFADDR_RLOCK(&in_ifa_tracker); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if ((ia->ia_ifp->if_flags & IFF_UP) == 0) continue; /* * If we got a matching prefix address, move IFA_ROUTE and * the route itself to it. Make sure that routing daemons * get a heads-up. */ if ((ia->ia_flags & IFA_ROUTE) == 0) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", error); /* Scrub all entries IFF interface is different */ in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp, flags); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", error); ifa_free(&ia->ia_ifa); return (error); } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * remove all L2 entries on the given prefix */ in_scrubprefixlle(target, 1, flags); /* * As no-one seem to have this prefix, we can remove the route. */ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); return (error); } #undef rtinitflags /* * Return 1 if the address might be a local broadcast address. */ int in_broadcast(struct in_addr in, struct ifnet *ifp) { register struct ifaddr *ifa; u_long t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) return (1); if ((ifp->if_flags & IFF_BROADCAST) == 0) return (0); t = ntohl(in.s_addr); /* * Look through the list of addresses for a match * with a broadcast address. */ #define ia ((struct in_ifaddr *)ifa) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET && (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || /* * Check for old-style (host 0) broadcast, but * taking into account that RFC 3021 obsoletes it. */ (ia->ia_subnetmask != IN_RFC3021_MASK && t == ia->ia_subnet)) && /* * Check for an all one subnetmask. These * only exist when an interface gets a secondary * address. */ ia->ia_subnetmask != (u_long)0xffffffff) return (1); return (0); #undef ia } /* * On interface removal, clean up IPv4 data structures hung off of the ifnet. */ void in_ifdetach(struct ifnet *ifp) { in_pcbpurgeif0(&V_ripcbinfo, ifp); in_pcbpurgeif0(&V_udbinfo, ifp); in_pcbpurgeif0(&V_ulitecbinfo, ifp); in_purgemaddrs(ifp); } /* * Delete all IPv4 multicast address records, and associated link-layer * multicast address records, associated with ifp. * XXX It looks like domifdetach runs AFTER the link layer cleanup. * XXX This should not race with ifma_protospec being set during * a new allocation, if it does, we have bigger problems. */ static void in_purgemaddrs(struct ifnet *ifp) { LIST_HEAD(,in_multi) purgeinms; struct in_multi *inm, *tinm; struct ifmultiaddr *ifma; LIST_INIT(&purgeinms); IN_MULTI_LOCK(); /* * Extract list of in_multi associated with the detaching ifp * which the PF_INET layer is about to release. * We need to do this as IF_ADDR_LOCK() may be re-acquired * by code further down. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; LIST_INSERT_HEAD(&purgeinms, inm, inm_link); } IF_ADDR_RUNLOCK(ifp); LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) { LIST_REMOVE(inm, inm_link); inm_release_locked(inm); } igmp_ifdetach(ifp); IN_MULTI_UNLOCK(); } struct in_llentry { struct llentry base; }; #define IN_LLTBL_DEFAULT_HSIZE 32 #define IN_LLTBL_HASH(k, h) \ (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) /* * Do actual deallocation of @lle. * Called by LLE_FREE_LOCKED when number of references * drops to zero. */ static void in_lltable_destroy_lle(struct llentry *lle) { LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); LLE_REQ_DESTROY(lle); free(lle, M_LLTABLE); } static struct llentry * in_lltable_new(struct in_addr addr4, u_int flags) { struct in_llentry *lle; lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; /* * For IPv4 this will trigger "arpresolve" to generate * an ARP request. */ lle->base.la_expire = time_uptime; /* mark expired */ lle->base.r_l3addr.addr4 = addr4; lle->base.lle_refcnt = 1; lle->base.lle_free = in_lltable_destroy_lle; LLE_LOCK_INIT(&lle->base); LLE_REQ_INIT(&lle->base); callout_init(&lle->base.lle_timer, 1); return (&lle->base); } #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ ((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 ) static int in_lltable_match_prefix(const struct sockaddr *saddr, const struct sockaddr *smask, u_int flags, struct llentry *lle) { struct in_addr addr, mask, lle_addr; addr = ((const struct sockaddr_in *)saddr)->sin_addr; mask = ((const struct sockaddr_in *)smask)->sin_addr; lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr); if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) return (0); if (lle->la_flags & LLE_IFADDR) { /* * Delete LLE_IFADDR records IFF address & flag matches. * Note that addr is the interface address within prefix * being matched. * Note also we should handle 'ifdown' cases without removing * ifaddr macs. */ if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0) return (1); return (0); } /* flags & LLE_STATIC means deleting both dynamic and static entries */ if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) return (1); return (0); } static void in_lltable_free_entry(struct lltable *llt, struct llentry *lle) { struct ifnet *ifp; size_t pkts_dropped; LLE_WLOCK_ASSERT(lle); KASSERT(llt != NULL, ("lltable is NULL")); /* Unlink entry from table if not already */ if ((lle->la_flags & LLE_LINKED) != 0) { ifp = llt->llt_ifp; IF_AFDATA_WLOCK_ASSERT(ifp); lltable_unlink_entry(llt, lle); } /* cancel timer */ if (callout_stop(&lle->lle_timer) > 0) LLE_REMREF(lle); /* Drop hold queue */ pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } static int in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { - struct rtentry *rt; + struct rt_addrinfo info; + struct sockaddr_in rt_key, rt_mask; + struct sockaddr rt_gateway; + int rt_flags; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); - /* XXX rtalloc1_fib should take a const param */ - rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0, - ifp->if_fib); + bzero(&rt_key, sizeof(rt_key)); + rt_key.sin_len = sizeof(rt_key); + bzero(&rt_mask, sizeof(rt_mask)); + rt_mask.sin_len = sizeof(rt_mask); + bzero(&rt_gateway, sizeof(rt_gateway)); + rt_gateway.sa_len = sizeof(rt_gateway); - if (rt == NULL) + bzero(&info, sizeof(info)); + info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key; + info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&rt_mask; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; + + if (rib_lookup_info(ifp->if_fib, l3addr, NHR_REF, 0, &info) != 0) return (EINVAL); + rt_flags = info.rti_flags; + /* * If the gateway for an existing host route matches the target L3 * address, which is a special route inserted by some implementation * such as MANET, and the interface is of the correct type, then * allow for ARP to proceed. */ - if (rt->rt_flags & RTF_GATEWAY) { - if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || - rt->rt_ifp->if_type != IFT_ETHER || - (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || - memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, + if (rt_flags & RTF_GATEWAY) { + if (!(rt_flags & RTF_HOST) || !info.rti_ifp || + info.rti_ifp->if_type != IFT_ETHER || + (info.rti_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || + memcmp(rt_gateway.sa_data, l3addr->sa_data, sizeof(in_addr_t)) != 0) { - RTFREE_LOCKED(rt); + rib_free_info(&info); return (EINVAL); } } + rib_free_info(&info); /* * Make sure that at least the destination address is covered * by the route. This is for handling the case where 2 or more * interfaces have the same prefix. An incoming packet arrives * on one interface and the corresponding outgoing packet leaves * another interface. */ - if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { + if (!(rt_flags & RTF_HOST) && info.rti_ifp != ifp) { const char *sa, *mask, *addr, *lim; int len; - mask = (const char *)rt_mask(rt); + mask = (const char *)&rt_mask; /* * Just being extra cautious to avoid some custom * code getting into trouble. */ - if (mask == NULL) { - RTFREE_LOCKED(rt); + if ((info.rti_addrs & RTA_NETMASK) == 0) return (EINVAL); - } - sa = (const char *)rt_key(rt); + sa = (const char *)&rt_key; addr = (const char *)l3addr; len = ((const struct sockaddr_in *)l3addr)->sin_len; lim = addr + len; for ( ; addr < lim; sa++, mask++, addr++) { if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); #endif - RTFREE_LOCKED(rt); return (EINVAL); } } } - RTFREE_LOCKED(rt); return (0); } static inline uint32_t in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize) { return (IN_LLTBL_HASH(dst.s_addr, hsize)); } static uint32_t in_lltable_hash(const struct llentry *lle, uint32_t hsize) { return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize)); } static void in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)sa; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = lle->r_l3addr.addr4; } static inline struct llentry * in_lltable_find_dst(struct lltable *llt, struct in_addr dst) { struct llentry *lle; struct llentries *lleh; u_int hashidx; hashidx = in_lltable_hash_dst(dst, llt->llt_hsize); lleh = &llt->lle_head[hashidx]; LIST_FOREACH(lle, lleh, lle_next) { if (lle->la_flags & LLE_DELETED) continue; if (lle->r_l3addr.addr4.s_addr == dst.s_addr) break; } return (lle); } static void in_lltable_delete_entry(struct lltable *llt, struct llentry *lle) { lle->la_flags |= LLE_DELETED; EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif llentry_free(lle); } static struct llentry * in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in_lltable_rtcheck(ifp, flags, l3addr) != 0) return (NULL); lle = in_lltable_new(sin->sin_addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); return (NULL); } lle->la_flags = flags; if (flags & LLE_STATIC) lle->r_flags |= RLLE_VALID; if ((flags & LLE_IFADDR) == LLE_IFADDR) { linkhdrsize = LLE_MAX_LINKHDR; if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp), linkhdr, &linkhdrsize, &lladdr_off) != 0) return (NULL); lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off); lle->la_flags |= LLE_STATIC; lle->r_flags |= (RLLE_VALID | RLLE_IFADDR); } return (lle); } /* * Return NULL if not found or marked for deletion. * If found return lle read locked. */ static struct llentry * in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct llentry *lle; IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); lle = in_lltable_find_dst(llt, sin->sin_addr); if (lle == NULL) return (NULL); KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", flags)); if (flags & LLE_UNLOCKED) return (lle); if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); return (lle); } static int in_lltable_dump_entry(struct lltable *llt, struct llentry *lle, struct sysctl_req *wr) { struct ifnet *ifp = llt->llt_ifp; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_in sin; struct sockaddr_dl sdl; } arpc; struct sockaddr_dl *sdl; int error; bzero(&arpc, sizeof(arpc)); /* skip deleted entries */ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) return (0); /* Skip if jailed and not a valid IP of the prison. */ lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin); if (prison_if(wr->td->td_ucred, (struct sockaddr *)&arpc.sin) != 0) return (0); /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in; (IPv4) * struct sockaddr_dl; */ arpc.rtm.rtm_msglen = sizeof(arpc); arpc.rtm.rtm_version = RTM_VERSION; arpc.rtm.rtm_type = RTM_GET; arpc.rtm.rtm_flags = RTF_UP; arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; /* publish */ if (lle->la_flags & LLE_PUB) arpc.rtm.rtm_flags |= RTF_ANNOUNCE; sdl = &arpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; if ((lle->la_flags & LLE_VALID) == LLE_VALID) { sdl->sdl_alen = ifp->if_addrlen; bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); } else { sdl->sdl_alen = 0; bzero(LLADDR(sdl), ifp->if_addrlen); } arpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) arpc.rtm.rtm_flags |= RTF_STATIC; if (lle->la_flags & LLE_IFADDR) arpc.rtm.rtm_flags |= RTF_PINNED; arpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); return (error); } static struct lltable * in_lltattach(struct ifnet *ifp) { struct lltable *llt; llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE); llt->llt_af = AF_INET; llt->llt_ifp = ifp; llt->llt_lookup = in_lltable_lookup; llt->llt_alloc_entry = in_lltable_alloc; llt->llt_delete_entry = in_lltable_delete_entry; llt->llt_dump_entry = in_lltable_dump_entry; llt->llt_hash = in_lltable_hash; llt->llt_fill_sa_entry = in_lltable_fill_sa_entry; llt->llt_free_entry = in_lltable_free_entry; llt->llt_match_prefix = in_lltable_match_prefix; lltable_link(llt); return (llt); } void * in_domifattach(struct ifnet *ifp) { struct in_ifinfo *ii; ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO); ii->ii_llt = in_lltattach(ifp); ii->ii_igmp = igmp_domifattach(ifp); return (ii); } void in_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifinfo *ii = (struct in_ifinfo *)aux; igmp_domifdetach(ifp); lltable_free(ii->ii_llt); free(ii, M_IFADDR); } diff --git a/sys/netinet/in_fib.c b/sys/netinet/in_fib.c index b4101b615d2e..451b37474e32 100644 --- a/sys/netinet/in_fib.c +++ b/sys/netinet/in_fib.c @@ -1,222 +1,225 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #ifdef INET static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4); static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4); #define RNTORT(p) ((struct rtentry *)(p)) static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4) { struct sockaddr_in *gw; if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; else pnh4->nh_ifp = rte->rt_ifp; pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in *)rte->rt_gateway; pnh4->nh_addr = gw->sin_addr; } else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; /* TODO: Handle RTF_BROADCAST here */ } static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4) { struct sockaddr_in *gw; struct in_ifaddr *ia; - pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; + if ((flags & NHR_IFAIF) != 0) + pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; + else + pnh4->nh_ifp = rte->rt_ifp; pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu); if (rte->rt_flags & RTF_GATEWAY) { gw = (struct sockaddr_in *)rte->rt_gateway; pnh4->nh_addr = gw->sin_addr; } else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; /* XXX: Set RTF_BROADCAST if GW address is broadcast */ ia = ifatoia(rte->rt_ifa); pnh4->nh_src = IA_SIN(ia)->sin_addr; } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address on interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4) { struct radix_node_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct rtentry *rte; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib4_rte_to_nh_basic(rte, dst, flags, pnh4); RADIX_NODE_HEAD_RUNLOCK(rh); return (0); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores extende nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib4_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address of interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int -fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flowid, - uint32_t flags, struct nhop4_extended *pnh4) +fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, + uint32_t flowid, struct nhop4_extended *pnh4) { struct radix_node_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct rtentry *rte; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RADIX_NODE_HEAD_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, rh); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(rte->rt_ifp)) { fib4_rte_to_nh_extended(rte, dst, flags, pnh4); if ((flags & NHR_REF) != 0) { /* TODO: lwref on egress ifp's ? */ } RADIX_NODE_HEAD_RUNLOCK(rh); return (0); } } RADIX_NODE_HEAD_RUNLOCK(rh); return (ENOENT); } void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4) { } #endif diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index a7bbac05ab03..54d7e1eb6513 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1,2873 +1,2869 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct domain inet6domain; VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat); VNET_PCPUSTAT_SYSINIT(icmp6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(icmp6stat); #endif /* VIMAGE */ VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(int, icmp6errppslim); static VNET_DEFINE(int, icmp6errpps_count) = 0; static VNET_DEFINE(struct timeval, icmp6errppslim_last); VNET_DECLARE(int, icmp6_nodeinfo); #define V_ripcbinfo VNET(ripcbinfo) #define V_ripcb VNET(ripcb) #define V_icmp6errppslim VNET(icmp6errppslim) #define V_icmp6errpps_count VNET(icmp6errpps_count) #define V_icmp6errppslim_last VNET(icmp6errppslim_last) #define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) static void icmp6_errcount(int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *, struct in6_addr *); static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, struct in6_addr *); static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); static int icmp6_notify_error(struct mbuf **, int, int, int); /* * Kernel module interface for updating icmp6stat. The argument is an index * into icmp6stat treated as an array of u_quad_t. While this encodes the * general layout of icmp6stat into the caller, it doesn't encode its * location, so that future changes to add, for example, per-CPU stats * support won't cause binary compatibility problems for kernel modules. */ void kmod_icmp6stat_inc(int statnum) { counter_u64_add(VNET(icmp6stat)[statnum], 1); } static void icmp6_errcount(int type, int code) { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: ICMP6STAT_INC(icp6s_odst_unreach_noroute); return; case ICMP6_DST_UNREACH_ADMIN: ICMP6STAT_INC(icp6s_odst_unreach_admin); return; case ICMP6_DST_UNREACH_BEYONDSCOPE: ICMP6STAT_INC(icp6s_odst_unreach_beyondscope); return; case ICMP6_DST_UNREACH_ADDR: ICMP6STAT_INC(icp6s_odst_unreach_addr); return; case ICMP6_DST_UNREACH_NOPORT: ICMP6STAT_INC(icp6s_odst_unreach_noport); return; } break; case ICMP6_PACKET_TOO_BIG: ICMP6STAT_INC(icp6s_opacket_too_big); return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: ICMP6STAT_INC(icp6s_otime_exceed_transit); return; case ICMP6_TIME_EXCEED_REASSEMBLY: ICMP6STAT_INC(icp6s_otime_exceed_reassembly); return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: ICMP6STAT_INC(icp6s_oparamprob_header); return; case ICMP6_PARAMPROB_NEXTHEADER: ICMP6STAT_INC(icp6s_oparamprob_nextheader); return; case ICMP6_PARAMPROB_OPTION: ICMP6STAT_INC(icp6s_oparamprob_option); return; } break; case ND_REDIRECT: ICMP6STAT_INC(icp6s_oredirect); return; } ICMP6STAT_INC(icp6s_ounknown); } /* * A wrapper function for icmp6_error() necessary when the erroneous packet * may not contain enough scope zone information. */ void icmp6_error2(struct mbuf *m, int type, int code, int param, struct ifnet *ifp) { struct ip6_hdr *ip6; if (ifp == NULL) return; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif ip6 = mtod(m, struct ip6_hdr *); if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) return; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) return; icmp6_error(m, type, code, param); } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; ICMP6STAT_INC(icp6s_error); /* count per-type-code statistics */ icmp6_errcount(type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { ICMP6STAT_INC(icp6s_canterror); goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * If the destination address of the erroneous packet is a multicast * address, or the packet was sent using link-layer multicast, * we should basically suppress sending an error (RFC 2463, Section * 2.4). * We have two exceptions (the item e.2 in that section): * - the Packet Too Big message can be sent for path MTU discovery. * - the Parameter Problem Message that can be allowed an icmp6 error * in the option type field. This check has been done in * ip6_unknown_opt(), so we can just check the type and code. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* * RFC 2463, 2.4 (e.5): source address check. * XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ ICMP6STAT_INC(icp6s_canterror); goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { ICMP6STAT_INC(icp6s_toofreq); goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */ if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; in6_clearscope(&oip6->ip6_src); in6_clearscope(&oip6->ip6_dst); icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and output path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; ICMP6STAT_INC(icp6s_outhist[type]); icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell whether or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp, *n; struct ifnet *ifp; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; int ip6len, error; ifp = m->m_pkthdr.rcvif; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); if (icmp6len < sizeof(struct icmp6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } /* * Check multicast group membership. * Note: SSM filters are not applied for ICMPv6 traffic. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct in6_multi *inm; inm = in6m_lookup(ifp, &ip6->ip6_dst); if (inm == NULL) { IP6STAT_INC(ip6s_notmember); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); goto freeit; } } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(ip6bufs, &ip6->ip6_src))); ICMP6STAT_INC(icp6s_checksum); goto freeit; } ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(ifp, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: code = PRC_HOSTDEAD; break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); /* validation is made in icmp6_mtudisc_update */ code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; break; case ICMP6_TIME_EXCEED_REASSEMBLY: code = PRC_TIMXCEED_REASS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(ifp, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(ifp, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ break; } if (!M_WRITABLE(n) || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; int n0len; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN); n = m_gethdr(M_NOWAIT, n0->m_type); if (n == NULL) { /* Give up remote */ m_freem(n0); break; } m_move_pkthdr(n, n0); /* FIB copied. */ n0len = n0->m_pkthdr.len; /* save for use below */ /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); /* new mbuf contains only ipv6+icmpv6 headers */ n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); /* recalculate complete packet size */ n->m_pkthdr.len = n0len + (noff - off); n->m_next = n0; } else { nip6 = mtod(n, struct ip6_hdr *); IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, sizeof(*nicmp6)); noff = off; } nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]); icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(ifp, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: case MLD_LISTENER_DONE: case MLDV2_LISTENER_REPORT: /* * Drop MLD traffic which is not link-local, has a hop limit * of greater than 1 hop, or which does not have the * IPv6 HBH Router Alert option. * As IPv6 HBH options are stripped in ip6_input() we must * check an mbuf header flag. * XXX Should we also sanity check that these messages * were directed to a link-local multicast prefix? */ if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) goto freeit; if (mld_input(m, off, icmp6len) != 0) return (IPPROTO_DONE); /* m stays. */ break; case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!V_icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { struct prison *pr; u_char *p; int maxhlen, hlen; /* * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ if ((V_icmp6_nodeinfo & 5) != 5) break; if (code != 0) goto badcode; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN); n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { /* Give up remote */ break; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then * we could tolerate the dup failing (due to * the deep copy of the tag chain). For now * be conservative and just fail. */ m_free(n); n = NULL; } maxhlen = M_TRAILINGSPACE(n) - (sizeof(*nip6) + sizeof(*nicmp6) + 4); pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); hlen = strlen(pr->pr_hostname); if (maxhlen > hlen) maxhlen = hlen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); /* meaningless TTL */ bcopy(pr->pr_hostname, p + 4, maxhlen); mtx_unlock(&pr->pr_mtx); noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]); icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* give up local */ /* Send incoming SeND packet to user space. */ if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(m, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); /* -1 == no app on SEND socket */ if (error == 0) return (IPPROTO_DONE); nd6_rs_input(m, off, icmp6len); } else nd6_rs_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(n, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; /* -1 == no app on SEND socket */ nd6_rs_input(n, off, icmp6len); } else nd6_rs_input(n, off, icmp6len); /* m stays. */ break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_ra_input(m, off, icmp6len); } else nd6_ra_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_ra_input(n, off, icmp6len); } else nd6_ra_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_ns_input(m, off, icmp6len); } else nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_ns_input(n, off, icmp6len); } else nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); nd6_na_input(m, off, icmp6len); } else nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; nd6_na_input(n, off, icmp6len); } else nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(ifp, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) return (IPPROTO_DONE); icmp6_redirect_input(m, off); } else icmp6_redirect_input(m, off); m = NULL; goto freeit; } if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(n, ifp, SND_IN, ip6len); if (error == 0) goto freeit; icmp6_redirect_input(n, off); } else icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp ? ifp->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(&m, off, icmp6len, code) != 0) { /* In this case, m should've been freed. */ return (IPPROTO_DONE); } break; badcode: ICMP6STAT_INC(icp6s_badcode); break; badlen: ICMP6STAT_INC(icp6s_badlen); break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) { struct mbuf *m = *mp; struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc)(int, struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. * XXX: there is no guarantee that the source or destination * addresses of the inner packet are in the same scope as * the addresses of the icmp packet. But there is no other * way to determine the zone. */ eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*)(int, struct sockaddr *, void *)) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } *mp = m; return (0); freeit: m_freem(m); return (-1); } void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct in_conninfo inc; #if 0 /* * RFC2460 section 5, last paragraph. * even though minimum link MTU for IPv6 is IPV6_MMTU, * we may see ICMPv6 too big with mtu < IPV6_MMTU * due to packet translator in the middle. * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for * special handling. */ if (mtu < IPV6_MMTU) return; #endif /* * we reject ICMPv6 too big with abnormally small value. * XXX what is the good definition of "abnormally small"? */ if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) return; if (!validated) return; /* * In case the suggested mtu is less than IPV6_MMTU, we * only need to remember that it was for above mentioned * "alwaysfrag" case. * Try to be as close to the spec as possible. */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) return; if (mtu < tcp_maxmtu6(&inc, NULL)) { tcp_hc_updatemtu(&inc, mtu); ICMP6STAT_INC(icp6s_pmtuchg); } } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ static struct mbuf * ni6_input(struct mbuf *m, int off) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; struct prison *pr; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct in6_addr in6_subj; /* subject address */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return (NULL); } #endif /* * Validate IPv6 source address. * The default configuration MUST be to refuse answering queries from * global-scope addresses according to RFC4602. * Notes: * - it's not very clear what "refuse" means; this implementation * simply drops it. * - it's not very easy to identify global-scope (unicast) addresses * since there are many prefixes for them. It should be safer * and in practice sufficient to check "all" but loopback and * link-local (note that site-local unicast was deprecated and * ULA is defined as global scope-wise) */ if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) goto bad; /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [RFC4602, Section 5.] */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { ifa_free(&ia6->ia_ifa); nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } ifa_free(&ia6->ia_ifa); } /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(struct in6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&in6_subj); if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) goto bad; subj = (char *)&in6_subj; if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, * if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), 0); mtx_unlock(&pr->pr_mtx); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) goto bad; break; case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; case NI_QTYPE_IPV4ADDR: /* unsupported - should respond with unknown Qtype? */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* Allocate an mbuf to reply. */ if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } if (replylen > MHLEN) n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR); else n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } m_move_pkthdr(n, m); /* just for recvif and FIB */ n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in hostname? */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n->m_next = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), oldfqdn); mtx_unlock(&pr->pr_mtx); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return (n); bad: m_freem(m); if (n) m_freem(n); return (NULL); } /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. * * old - return pascal string if non-zero */ static struct mbuf * ni6_nametodns(const char *name, int namelen, int old) { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */ if (len > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) goto fail; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(const char *a, int alen, const char *b, int blen) { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, struct in6_addr *subj) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return (0); break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return (0); } } IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } addrsofif++; /* count the address */ } IF_ADDR_RUNLOCK(ifp); if (iffound) { *ifpp = ifp; IFNET_RUNLOCK_NOSLEEP(); return (addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK_NOSLEEP(); return (addrs); } static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ IFNET_RLOCK_NOSLEEP(); ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { IF_ADDR_RUNLOCK(ifp); /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_uptime) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); in6_clearscope((struct in6_addr *)cp); /* XXX */ cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } IF_ADDR_RUNLOCK(ifp); if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb *in6p; struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return (IPPROTO_DONE); } #endif /* * XXX: the address may have embedded scope zone ID, which should be * hidden from applications. */ bzero(&fromsa, sizeof(fromsa)); fromsa.sin6_family = AF_INET6; fromsa.sin6_len = sizeof(struct sockaddr_in6); fromsa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&fromsa)) { m_freem(m); return (IPPROTO_DONE); } INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; INP_RLOCK(in6p); if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) { INP_RUNLOCK(in6p); continue; } if (last != NULL) { struct mbuf *n = NULL; /* * Recent network drivers tend to allocate a single * mbuf cluster, rather than to make a couple of * mbufs without clusters. Also, since the IPv6 code * path tries to avoid m_pullup(), it is highly * probable that we still have an mbuf cluster here * even though the necessary length can be stored in an * mbuf's internal buffer. * Meanwhile, the default size of the receive socket * buffer for raw sockets is not so large. This means * the possibility of packet loss is relatively higher * than before. To avoid this scenario, we copy the * received data to a separate mbuf that does not use * a cluster, if possible. * XXX: it is better to copy the data after stripping * intermediate headers. */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; } else { m_free(n); n = NULL; } } } if (n != NULL || (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, off); SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked( &last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } SOCKBUF_UNLOCK( &last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, off); /* avoid using mbuf clusters if possible (see above) */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { struct mbuf *n; n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; m_freem(m); m = n; } else { m_freem(n); n = NULL; } } } SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); INP_RUNLOCK(last); } else { m_freem(m); IP6STAT_DEC(ip6s_delivered); } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(struct mbuf *m, size_t off) { struct in6_addr src, *srcp = NULL; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; struct ifnet *outif = NULL; int plen; int type, code; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL && !(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) srcp = &ia->ia_addr.sin6_addr; } if (srcp == NULL) { int e; struct sockaddr_in6 sin6; - struct route_in6 ro; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ - bzero(&ro, sizeof(ro)); - e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &src); - if (ro.ro_rt) - RTFREE(ro.ro_rt); /* XXX: we could use this */ + e = in6_selectsrc(&sin6, NULL, NULL, NULL, &outif, &src); if (e) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); goto bad; } srcp = &src; } /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; ip6->ip6_src = *srcp; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; if (outif) ip6->ip6_hlim = ND_IFINFO(outif)->chlim; else if (m->m_pkthdr.rcvif) { /* XXX: This may not be the outgoing interface */ ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; } else ip6->ip6_hlim = V_ip6_defhlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); if (ia != NULL) ifa_free(&ia->ia_ifa); return; bad: if (ia != NULL) ifa_free(&ia->ia_ifa); m_freem(m); return; } void icmp6_fasttimo(void) { mld_fasttimo(); } void icmp6_slowtimo(void) { mld_slowtimo(); } static const char * icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, struct in6_addr *tgt6) { static char buf[1024]; char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; char ip6buft[INET6_ADDRSTRLEN]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), ip6_sprintf(ip6buft, tgt6)); return buf; } void icmp6_redirect_input(struct mbuf *m, int off) { struct ifnet *ifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; char ip6buf[INET6_ADDRSTRLEN]; M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__)); ifp = m->m_pkthdr.rcvif; /* XXX if we are router, we don't update route by icmp6 redirect */ if (V_ip6_forwarding) goto freeit; if (!V_icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { goto freeit; } /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(ip6buf, &src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; in6_splitscope(&reddst6, &kdst, &scopeid); if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){ if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(ip6buf, &nh6.nh_addr), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n", __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", __func__, ip6_sprintf(ip6buf, &redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* Validation passed. */ /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); /* * Install a gateway route in the better-router case or an interface * route in the on-link-destination case. */ { struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; struct sockaddr *gw; int rt_flags; u_int fibnum; bzero(&sdst, sizeof(sdst)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rt_flags = RTF_HOST; if (is_router) { bzero(&sgw, sizeof(sgw)); sgw.sin6_family = AF_INET6; sgw.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); gw = (struct sockaddr *)&sgw; rt_flags |= RTF_GATEWAY; } else gw = ifp->if_addr->ifa_addr; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) in6_rtredirect((struct sockaddr *)&sdst, gw, (struct sockaddr *)NULL, rt_flags, (struct sockaddr *)&ssrc, fibnum); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badredirect); m_freem(m); } void icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct m_tag *mtag; struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; struct llentry *ln = NULL; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!V_ip6_forwarding) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto fail; M_SETFIB(m, rt->rt_fibnum); maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; /* XXXRW: reference released prematurely. */ ifa_free(&ia->ia_ifa); } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ int len; struct nd_opt_hdr *nd_opt; char *lladdr; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(router_ll6, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } } nolladdropt: if (ln != NULL) LLE_RUNLOCK(ln); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, * the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by * original ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m_tag_delete_chain(m0, NULL); m0->m_flags &= ~M_PKTHDR; m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; m0 = NULL; } noredhdropt:; if (m0) { m_freem(m0); m0 = NULL; } /* XXX: clear embedded link IDs in the inner header */ in6_clearscope(&sip6->ip6_src); in6_clearscope(&sip6->ip6_dst); in6_clearscope(&nd_rd->nd_rd_target); in6_clearscope(&nd_rd->nd_rd_dst); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto fail; *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type; m_tag_prepend(m, mtag); } /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]); return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; if (optlen != sizeof(ic6f)) { error = EMSGSIZE; break; } error = sooptcopyin(sopt, &ic6f, optlen, optlen); if (error == 0) { INP_WLOCK(inp); *inp->in6p_icmp6filt = ic6f; INP_WUNLOCK(inp); } break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; INP_RLOCK(inp); ic6f = *inp->in6p_icmp6filt; INP_RUNLOCK(inp); error = sooptcopyout(sopt, &ic6f, sizeof(ic6f)); break; } default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? * * dst - not used at this moment * type - not used at this moment * code - not used at this moment */ static int icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code) { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count, V_icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 9bc142a0068c..ae049f0d1106 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -1,738 +1,740 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 * $FreeBSD$ */ #ifndef __KAME_NETINET_IN_H_INCLUDED_ #error "do not include netinet6/in6.h directly, include netinet/in.h. see RFC2553" #endif #ifndef _NETINET6_IN6_H_ #define _NETINET6_IN6_H_ /* * Identification of the network protocol stack * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE * has the table of implementation/integration differences. */ #define __KAME__ #define __KAME_VERSION "FreeBSD" /* * IPv6 port allocation rules should mirror the IPv4 rules and are controlled * by the net.inet.ip.portrange sysctl tree. The following defines exist * for compatibility with userland applications that need them. */ #if __BSD_VISIBLE #define IPV6PORT_RESERVED 1024 #define IPV6PORT_ANONMIN 49152 #define IPV6PORT_ANONMAX 65535 #define IPV6PORT_RESERVEDMIN 600 #define IPV6PORT_RESERVEDMAX (IPV6PORT_RESERVED-1) #endif /* * IPv6 address */ struct in6_addr { union { uint8_t __u6_addr8[16]; uint16_t __u6_addr16[8]; uint32_t __u6_addr32[4]; } __u6_addr; /* 128-bit IP6 address */ }; #define s6_addr __u6_addr.__u6_addr8 #ifdef _KERNEL /* XXX nonstandard */ #define s6_addr8 __u6_addr.__u6_addr8 #define s6_addr16 __u6_addr.__u6_addr16 #define s6_addr32 __u6_addr.__u6_addr32 #endif #define INET6_ADDRSTRLEN 46 /* * XXX missing POSIX.1-2001 macro IPPROTO_IPV6. */ /* * Socket address for IPv6 */ #if __BSD_VISIBLE #define SIN6_LEN #endif struct sockaddr_in6 { uint8_t sin6_len; /* length of this struct */ sa_family_t sin6_family; /* AF_INET6 */ in_port_t sin6_port; /* Transport layer port # */ uint32_t sin6_flowinfo; /* IP6 flow information */ struct in6_addr sin6_addr; /* IP6 address */ uint32_t sin6_scope_id; /* scope zone index */ }; /* * Local definition for masks */ #ifdef _KERNEL /* XXX nonstandard */ #define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} #define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}} #define IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}} #endif #ifdef _KERNEL extern const struct sockaddr_in6 sa6_any; extern const struct in6_addr in6mask0; extern const struct in6_addr in6mask32; extern const struct in6_addr in6mask64; extern const struct in6_addr in6mask96; extern const struct in6_addr in6mask128; #endif /* _KERNEL */ /* * Macros started with IPV6_ADDR is KAME local */ #ifdef _KERNEL /* XXX nonstandard */ #if _BYTE_ORDER == _BIG_ENDIAN #define IPV6_ADDR_INT32_ONE 1 #define IPV6_ADDR_INT32_TWO 2 #define IPV6_ADDR_INT32_MNL 0xff010000 #define IPV6_ADDR_INT32_MLL 0xff020000 #define IPV6_ADDR_INT32_SMP 0x0000ffff #define IPV6_ADDR_INT16_ULL 0xfe80 #define IPV6_ADDR_INT16_USL 0xfec0 #define IPV6_ADDR_INT16_MLL 0xff02 #elif _BYTE_ORDER == _LITTLE_ENDIAN #define IPV6_ADDR_INT32_ONE 0x01000000 #define IPV6_ADDR_INT32_TWO 0x02000000 #define IPV6_ADDR_INT32_MNL 0x000001ff #define IPV6_ADDR_INT32_MLL 0x000002ff #define IPV6_ADDR_INT32_SMP 0xffff0000 #define IPV6_ADDR_INT16_ULL 0x80fe #define IPV6_ADDR_INT16_USL 0xc0fe #define IPV6_ADDR_INT16_MLL 0x02ff #endif #endif /* * Definition of some useful macros to handle IP6 addresses */ #if __BSD_VISIBLE #define IN6ADDR_ANY_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} #define IN6ADDR_LOOPBACK_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_NODELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}} #define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}} #endif extern const struct in6_addr in6addr_any; extern const struct in6_addr in6addr_loopback; #if __BSD_VISIBLE extern const struct in6_addr in6addr_nodelocal_allnodes; extern const struct in6_addr in6addr_linklocal_allnodes; extern const struct in6_addr in6addr_linklocal_allrouters; extern const struct in6_addr in6addr_linklocal_allv2routers; #endif /* * Equality * NOTE: Some of kernel programming environment (for example, openbsd/sparc) * does not supply memcmp(). For userland memcmp() is preferred as it is * in ANSI standard. */ #ifdef _KERNEL #define IN6_ARE_ADDR_EQUAL(a, b) \ (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #else #if __BSD_VISIBLE #define IN6_ARE_ADDR_EQUAL(a, b) \ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #endif #endif /* * Unspecified */ #define IN6_IS_ADDR_UNSPECIFIED(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] == 0) /* * Loopback */ #define IN6_IS_ADDR_LOOPBACK(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] == ntohl(1)) /* * IPv4 compatible */ #define IN6_IS_ADDR_V4COMPAT(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == 0 && \ (a)->__u6_addr.__u6_addr32[3] != 0 && \ (a)->__u6_addr.__u6_addr32[3] != ntohl(1)) /* * Mapped */ #define IN6_IS_ADDR_V4MAPPED(a) \ ((a)->__u6_addr.__u6_addr32[0] == 0 && \ (a)->__u6_addr.__u6_addr32[1] == 0 && \ (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff)) /* * KAME Scope Values */ #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ #define IPV6_ADDR_SCOPE_GLOBAL 0x0e #else #define __IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define __IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define __IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define __IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ #define __IPV6_ADDR_SCOPE_GLOBAL 0x0e #endif /* * Unicast Scope * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). */ #define IN6_IS_ADDR_LINKLOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) #define IN6_IS_ADDR_SITELOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) /* * Multicast */ #define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) #ifdef _KERNEL /* XXX nonstandard */ #define IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #else #define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #endif /* * Multicast Scope */ #ifdef _KERNEL /* refers nonstandard items */ #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) #define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL)) #define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL)) #define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL)) #else #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) #define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL)) #define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL)) #define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL)) #endif #ifdef _KERNEL /* nonstandard */ /* * KAME Scope */ #define IN6_IS_SCOPE_LINKLOCAL(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a))) #define IN6_IS_SCOPE_EMBED(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_INTFACELOCAL(a))) #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_pltime) #define IFA6_IS_INVALID(a) \ ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_vltime) #endif /* _KERNEL */ /* * IP6 route structure */ #if __BSD_VISIBLE struct route_in6 { struct rtentry *ro_rt; char *ro_prepend; uint16_t ro_plen; uint16_t ro_flags; + uint16_t ro_mtu; /* saved ro_rt mtu */ + uint16_t spare; struct sockaddr_in6 ro_dst; }; #endif #ifdef _KERNEL #define MTAG_ABI_IPV6 1444287380 /* IPv6 ABI */ #define IPV6_TAG_DIRECT 0 /* direct-dispatch IPv6 */ #endif /* _KERNEL */ /* * Options for use with [gs]etsockopt at the IPV6 level. * First word of comment is data type; bool is stored in int. */ /* no hdrincl */ #if 0 /* the followings are relic in IPv4 and hence are disabled */ #define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ #define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ #define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ #define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ #define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ #endif #define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ #define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ #define IPV6_MULTICAST_IF 9 /* u_int; set/get IP6 multicast i/f */ #define IPV6_MULTICAST_HOPS 10 /* int; set/get IP6 multicast hops */ #define IPV6_MULTICAST_LOOP 11 /* u_int; set/get IP6 multicast loopback */ #define IPV6_JOIN_GROUP 12 /* ipv6_mreq; join a group membership */ #define IPV6_LEAVE_GROUP 13 /* ipv6_mreq; leave a group membership */ #define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ #define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ /* RFC2292 options */ #ifdef _KERNEL #define IPV6_2292PKTINFO 19 /* bool; send/recv if, src/dst addr */ #define IPV6_2292HOPLIMIT 20 /* bool; hop limit */ #define IPV6_2292NEXTHOP 21 /* bool; next hop addr */ #define IPV6_2292HOPOPTS 22 /* bool; hop-by-hop option */ #define IPV6_2292DSTOPTS 23 /* bool; destinaion option */ #define IPV6_2292RTHDR 24 /* bool; routing header */ #define IPV6_2292PKTOPTIONS 25 /* buf/cmsghdr; set/get IPv6 options */ #endif #define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ #define IPV6_V6ONLY 27 /* bool; make AF_INET6 sockets v6 only */ #ifndef _KERNEL #define IPV6_BINDV6ONLY IPV6_V6ONLY #endif #if 1 /* IPSEC */ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ #endif /* IPSEC */ /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ #define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ #define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ #define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ #define IPV6_FW_GET 34 /* get entire firewall rule chain */ #endif /* new socket options introduced in RFC3542 */ #define IPV6_RTHDRDSTOPTS 35 /* ip6_dest; send dst option before rthdr */ #define IPV6_RECVPKTINFO 36 /* bool; recv if, dst addr */ #define IPV6_RECVHOPLIMIT 37 /* bool; recv hop limit */ #define IPV6_RECVRTHDR 38 /* bool; recv routing header */ #define IPV6_RECVHOPOPTS 39 /* bool; recv hop-by-hop option */ #define IPV6_RECVDSTOPTS 40 /* bool; recv dst option after rthdr */ #ifdef _KERNEL #define IPV6_RECVRTHDRDSTOPTS 41 /* bool; recv dst option before rthdr */ #endif #define IPV6_USE_MIN_MTU 42 /* bool; send packets at the minimum MTU */ #define IPV6_RECVPATHMTU 43 /* bool; notify an according MTU */ #define IPV6_PATHMTU 44 /* mtuinfo; get the current path MTU (sopt), 4 bytes int; MTU notification (cmsg) */ #if 0 /*obsoleted during 2292bis -> 3542*/ #define IPV6_REACHCONF 45 /* no data; ND reachability confirm (cmsg only/not in of RFC3542) */ #endif /* more new socket options introduced in RFC3542 */ #define IPV6_PKTINFO 46 /* in6_pktinfo; send if, src addr */ #define IPV6_HOPLIMIT 47 /* int; send hop limit */ #define IPV6_NEXTHOP 48 /* sockaddr; next hop addr */ #define IPV6_HOPOPTS 49 /* ip6_hbh; send hop-by-hop option */ #define IPV6_DSTOPTS 50 /* ip6_dest; send dst option befor rthdr */ #define IPV6_RTHDR 51 /* ip6_rthdr; send routing header */ #if 0 #define IPV6_PKTOPTIONS 52 /* buf/cmsghdr; set/get IPv6 options */ /* obsoleted by RFC3542 */ #endif #define IPV6_RECVTCLASS 57 /* bool; recv traffic class values */ #define IPV6_AUTOFLOWLABEL 59 /* bool; attach flowlabel automagically */ #define IPV6_TCLASS 61 /* int; send traffic class value */ #define IPV6_DONTFRAG 62 /* bool; disable IPv6 fragmentation */ #define IPV6_PREFER_TEMPADDR 63 /* int; prefer temporary addresses as * the source address. */ #define IPV6_BINDANY 64 /* bool: allow bind to any address */ #define IPV6_BINDMULTI 65 /* bool; allow multibind to same addr/port */ #define IPV6_RSS_LISTEN_BUCKET 66 /* int; set RSS listen bucket */ #define IPV6_FLOWID 67 /* int; flowid of given socket */ #define IPV6_FLOWTYPE 68 /* int; flowtype of given socket */ #define IPV6_RSSBUCKETID 69 /* int; RSS bucket ID of given socket */ #define IPV6_RECVFLOWID 70 /* bool; receive IP6 flowid/flowtype w/ datagram */ #define IPV6_RECVRSSBUCKETID 71 /* bool; receive IP6 RSS bucket id w/ datagram */ /* * The following option is private; do not use it from user applications. * It is deliberately defined to the same value as IP_MSFILTER. */ #define IPV6_MSFILTER 74 /* struct __msfilterreq; * set/get multicast source filter list. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ #define IPV6_RTHDR_STRICT 1 /* this hop must be a neighbor. XXX old spec */ #define IPV6_RTHDR_TYPE_0 0 /* IPv6 routing header type 0 */ /* * Defaults and limits for options */ #define IPV6_DEFAULT_MULTICAST_HOPS 1 /* normally limit m'casts to 1 hop */ #define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* * The im6o_membership vector for each socket is now dynamically allocated at * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized * according to a power-of-two increment. */ #define IPV6_MIN_MEMBERSHIPS 31 #define IPV6_MAX_MEMBERSHIPS 4095 /* * Default resource limits for IPv6 multicast source filtering. * These may be modified by sysctl. */ #define IPV6_MAX_GROUP_SRC_FILTER 512 /* sources per group */ #define IPV6_MAX_SOCK_SRC_FILTER 128 /* sources per socket/group */ /* * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP. */ struct ipv6_mreq { struct in6_addr ipv6mr_multiaddr; unsigned int ipv6mr_interface; }; /* * IPV6_PKTINFO: Packet information(RFC2292 sec 5) */ struct in6_pktinfo { struct in6_addr ipi6_addr; /* src/dst IPv6 address */ unsigned int ipi6_ifindex; /* send/recv interface index */ }; /* * Control structure for IPV6_RECVPATHMTU socket option. */ struct ip6_mtuinfo { struct sockaddr_in6 ip6m_addr; /* or sockaddr_storage? */ uint32_t ip6m_mtu; }; /* * Argument for IPV6_PORTRANGE: * - which range to search when port is unspecified at bind() or connect() */ #define IPV6_PORTRANGE_DEFAULT 0 /* default range */ #define IPV6_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ #define IPV6_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ #if __BSD_VISIBLE /* * Definitions for inet6 sysctl operations. * * Third level is protocol number. * Fourth level is desired variable within that protocol. */ #define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ /* * Names for IP sysctl objects */ #define IPV6CTL_FORWARDING 1 /* act as router */ #define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ #define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ #ifdef notyet #define IPV6CTL_DEFMTU 4 /* default MTU */ #endif #define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ #define IPV6CTL_STATS 6 /* stats */ #define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ #define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ #define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ #define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ #define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ #define IPV6CTL_ACCEPT_RTADV 12 /* 13; unused; was: IPV6CTL_KEEPFAITH */ #define IPV6CTL_LOG_INTERVAL 14 #define IPV6CTL_HDRNESTLIMIT 15 #define IPV6CTL_DAD_COUNT 16 #define IPV6CTL_AUTO_FLOWLABEL 17 #define IPV6CTL_DEFMCASTHLIM 18 #define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ #define IPV6CTL_KAME_VERSION 20 #define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ #define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ #if 0 /* obsolete */ #define IPV6CTL_MAPPED_ADDR 23 #endif #define IPV6CTL_V6ONLY 24 /* IPV6CTL_RTEXPIRE 25 deprecated */ /* IPV6CTL_RTMINEXPIRE 26 deprecated */ /* IPV6CTL_RTMAXCACHE 27 deprecated */ #define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ #define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ #define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ #define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ #define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ #define IPV6CTL_PREFER_TEMPADDR 37 /* prefer temporary addr as src */ #define IPV6CTL_ADDRCTLPOLICY 38 /* get/set address selection policy */ #define IPV6CTL_USE_DEFAULTZONE 39 /* use default scope zone */ #define IPV6CTL_MAXFRAGS 41 /* max fragments */ #if 0 #define IPV6CTL_IFQ 42 /* ip6intrq node */ #define IPV6CTL_ISATAPRTR 43 /* isatap router */ #endif #define IPV6CTL_MCAST_PMTU 44 /* enable pMTU discovery for multicast? */ /* New entries should be added here from current IPV6CTL_MAXID value. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6CTL_STEALTH 45 #define ICMPV6CTL_ND6_ONLINKNSRFC4861 47 #define IPV6CTL_NO_RADR 48 /* No defroute from RA */ #define IPV6CTL_NORBIT_RAIF 49 /* Disable R-bit in NA on RA * receiving IF. */ #define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding enabled */ #define IPV6CTL_MAXID 51 #endif /* __BSD_VISIBLE */ /* * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/, * the protocol specific mbuf flags are shared between them. */ #define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */ #define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */ #define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */ #define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */ #define M_AUTHIPHDR M_PROTO4 #define M_DECRYPTED M_PROTO5 #define M_LOOP M_PROTO6 #define M_AUTHIPDGM M_PROTO7 #define M_RTALERT_MLD M_PROTO8 #ifdef _KERNEL struct cmsghdr; struct ip6_hdr; int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t); int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t); int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t, u_int32_t); int in6_localaddr(struct in6_addr *); int in6_localip(struct in6_addr *); int in6_ifhasaddr(struct ifnet *, struct in6_addr *); int in6_addrscope(const struct in6_addr *); char *ip6_sprintf(char *, const struct in6_addr *); struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); extern void in6_if_up(struct ifnet *); struct sockaddr; extern u_char ip6_protox[]; void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); void in6_sin6_2_sin_in_sock(struct sockaddr *nam); void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); extern void addrsel_policy_init(void); #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) #endif /* _KERNEL */ #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef _SOCKLEN_T_DECLARED typedef __socklen_t socklen_t; #define _SOCKLEN_T_DECLARED #endif #if __BSD_VISIBLE __BEGIN_DECLS struct cmsghdr; extern int inet6_option_space(int); extern int inet6_option_init(void *, struct cmsghdr **, int); extern int inet6_option_append(struct cmsghdr *, const uint8_t *, int, int); extern uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int); extern int inet6_option_next(const struct cmsghdr *, uint8_t **); extern int inet6_option_find(const struct cmsghdr *, uint8_t **, int); extern size_t inet6_rthdr_space(int, int); extern struct cmsghdr *inet6_rthdr_init(void *, int); extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *, unsigned int); extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int); #if 0 /* not implemented yet */ extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *); #endif extern int inet6_rthdr_segments(const struct cmsghdr *); extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int); extern int inet6_rthdr_getflags(const struct cmsghdr *, int); extern int inet6_opt_init(void *, socklen_t); extern int inet6_opt_append(void *, socklen_t, int, uint8_t, socklen_t, uint8_t, void **); extern int inet6_opt_finish(void *, socklen_t, int); extern int inet6_opt_set_val(void *, int, void *, socklen_t); extern int inet6_opt_next(void *, socklen_t, int, uint8_t *, socklen_t *, void **); extern int inet6_opt_find(void *, socklen_t, int, uint8_t, socklen_t *, void **); extern int inet6_opt_get_val(void *, int, void *, socklen_t); extern socklen_t inet6_rth_space(int, int); extern void *inet6_rth_init(void *, socklen_t, int, int); extern int inet6_rth_add(void *, const struct in6_addr *); extern int inet6_rth_reverse(const void *, void *); extern int inet6_rth_segments(const void *); extern struct in6_addr *inet6_rth_getaddr(const void *, int); __END_DECLS #endif /* __BSD_VISIBLE */ #endif /* !_NETINET6_IN6_H_ */ diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 9f876a4cbb92..67ecb8ca0b69 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,1281 +1,1281 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pcbgroup.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *); int in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { struct socket *so = inp->inp_socket; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); } else { sin6 = (struct sockaddr_in6 *)nam; if (nam->sa_len != sizeof(*sin6)) return (EINVAL); /* * family check. */ if (nam->sa_family != AF_INET6) return (EAFNOSUPPORT); if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if ((error = prison_local_ip6(cred, &sin6->sin6_addr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); lport = sin6->sin6_port; if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if * SO_REUSEPORT is set, or if SO_REUSEADDR is set * and a multicast address is bound on both * new and duplicated sockets. */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ifa; sin6->sin6_port = 0; /* yech... */ if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL && (inp->inp_flags & INP_BINDANY) == 0) { return (EADDRNOTAVAIL); } /* * XXX: bind to an anycast address might accidentally * cause sending a packet with anycast source address. * We should allow to bind to a deprecated address, since * the application dares to use it. */ if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); } if (lport) { struct inpcb *t; struct tcptw *tw; /* GROSS */ if (ntohs(lport) <= V_ipport_reservedhigh && ntohs(lport) >= V_ipport_reservedlow && priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) return (EACCES); if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT, 0) != 0) { t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_flags2 & INP_REUSEPORT) == 0) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); /* * If the socket is a BINDMULTI socket, then * the credentials need to match and the * original socket also has to have been bound * with BINDMULTI. */ if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) return (EADDRINUSE); if (t && (! in_pcbbind_check_bindmulti(inp, t))) return (EADDRINUSE); } #endif } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, lookupflags, cred); if (t && (t->inp_flags & INP_TIMEWAIT)) { /* * XXXRW: If an incpb has had its timewait * state recycled, we treat the address as * being in use (for now). This is better * than a panic, but not desirable. */ tw = intotw(t); if (tw == NULL || (reuseport & tw->tw_so_options) == 0) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0) { return (EADDRINUSE); } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, lport, lookupflags, cred); if (t && t->inp_flags & INP_TIMEWAIT) { tw = intotw(t); if (tw == NULL) return (EADDRINUSE); if ((reuseport & tw->tw_so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || ((inp->inp_vflag & INP_IPV6PROTO) == (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || (t->inp_vflag & INP_IPV6PROTO) != 0)) return (EADDRINUSE); } #endif } inp->in6p_laddr = sin6->sin6_addr; } if (lport == 0) { if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; return (error); } } else { inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } } return (0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new * in6_pcbconnect(): Do some validity-checking on the remote * address (in mbuf 'nam') and then determine local host address * (i.e., which interface) to use to access that remote host. * * This preserves definition of in6_pcbconnect(), while supporting a * slightly different version for T/TCP. (This is more than * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ static int in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, struct in6_addr *plocal_addr6) { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; struct ifnet *ifp = NULL; int scope_ambiguous = 0; struct in6_addr in6a; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ if (nam->sa_len != sizeof (*sin6)) return (EINVAL); if (sin6->sin6_family != AF_INET6) return (EAFNOSUPPORT); if (sin6->sin6_port == 0) return (EADDRNOTAVAIL); if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return(error); if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) sin6->sin6_addr = in6addr_loopback; } if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); error = in6_selectsrc(sin6, inp->in6p_outputopts, - inp, NULL, inp->inp_cred, &ifp, &in6a); + inp, inp->inp_cred, &ifp, &in6a); if (error) return (error); if (ifp && scope_ambiguous && (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { return(error); } /* * Do not update this earlier, in case we return with an error. * * XXX: this in6_selectsrc result might replace the bound local * address with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ *plocal_addr6 = in6a; /* * Don't do pcblookup call here; return interface in * plocal_addr6 * and exit to caller, that will do the lookup. */ return (0); } /* * Outer subroutine: * Connect from a socket to a specified address. * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. */ int in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam, struct ucred *cred, struct mbuf *m) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr addr6; int error; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return (error); if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, cred); if (error) return (error); } inp->in6p_laddr = addr6; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; if (inp->inp_flags & IN6P_AUTOFLOWLABEL) inp->inp_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); in_pcbrehash_mbuf(inp, m); return (0); } int in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) { return (in6_pcbconnect_mbuf(inp, nam, cred, NULL)); } void in6_pcbdisconnect(struct inpcb *inp) { INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); } struct sockaddr * in6_sockaddr(in_port_t port, struct in6_addr *addr_p) { struct sockaddr_in6 *sin6; sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ return (struct sockaddr *)sin6; } struct sockaddr * in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) { struct sockaddr_in sin; struct sockaddr_in6 *sin6_p; bzero(&sin, sizeof sin); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = port; sin.sin_addr = *addr_p; sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK); in6_sin_2_v4mapsin6(&sin, sin6_p); return (struct sockaddr *)sin6_p; } int in6_getsockaddr(struct socket *so, struct sockaddr **nam) { register struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_lport; addr = inp->in6p_laddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_getpeeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); INP_RLOCK(inp); port = inp->inp_fport; addr = inp->in6p_faddr; INP_RUNLOCK(inp); *nam = in6_sockaddr(port, &addr); return 0; } int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getsockaddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif { /* scope issues will be handled in in6_getsockaddr(). */ error = in6_getsockaddr(so, nam); } return error; } int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); #ifdef INET if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { error = in_getpeeraddr(so, nam); if (error == 0) in6_sin_2_v4mapsin6_in_sock(nam); } else #endif /* scope issues will be handled in in6_getpeeraddr(). */ error = in6_getpeeraddr(so, nam); return error; } /* * Pass some notification to all connections of a protocol * associated with address dst. The local address and/or port numbers * may be specified to limit the search. The "usual action" will be * taken, depending on the ctlinput cmd. The caller must filter any * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. */ void in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, struct inpcb *(*notify)(struct inpcb *, int)) { struct inpcb *inp, *inp_temp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno; if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; sa6_dst = (struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; flowinfo = sa6_src.sin6_flowinfo; /* * Redirects go to all references to the destination, * and use in6_rtchange to invalidate the route cache. * Dead host indications: also use in6_rtchange to invalidate * the cache, and deliver the error to all the sockets. * Otherwise, if we have knowledge of the local port and address, * deliver only to that socket. */ if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); if ((inp->inp_vflag & INP_IPV6) == 0) { INP_WUNLOCK(inp); continue; } /* * If the error designates a new path MTU for a destination * and the application (associated with this socket) wanted to * know the value, notify. * XXX: should we avoid to notify the value to TCP sockets? */ if (cmd == PRC_MSGSIZE && cmdarg != NULL) ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, *(u_int32_t *)cmdarg); /* * Detect if we should notify the error. If no source and * destination ports are specifed, but non-zero flowinfo and * local address match, notify the error. This is the case * when the error is delivered with an encrypted buffer * by ESP. Otherwise, just compare addresses and ports * as usual. */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr) || inp->inp_socket == 0 || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) { INP_WUNLOCK(inp); continue; } do_notify: if (notify) { if ((*notify)(inp, errno)) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } /* * Lookup a PCB based on the local address and port. Caller must hold the * hash lock. No inpcb locks or references are acquired. */ struct inpcb * in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, u_short lport, int lookupflags, struct ucred *cred) { register struct inpcb *inp; int matchwild = 3, wildcard; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_WLOCK_ASSERT(pcbinfo); if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { struct inpcbhead *head; /* * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_lport == lport) { /* Found. */ if (cred == NULL || prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) return (inp); } } /* * Not found. */ return (NULL); } else { struct inpcbporthead *porthash; struct inpcbport *phd; struct inpcb *match = NULL; /* * Best fit PCB lookup. * * First see if this local port is in use by looking on the * port hash list. */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } if (phd != NULL) { /* * Port is in use by one or more PCBs. Look for best * fit. */ LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip6(cred->cr_prison, inp->inp_cred->cr_prison)) continue; /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) wildcard++; if (!IN6_IS_ADDR_UNSPECIFIED( &inp->in6p_laddr)) { if (IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; else if (!IN6_ARE_ADDR_EQUAL( &inp->in6p_laddr, laddr)) continue; } else { if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; } if (wildcard < matchwild) { match = inp; matchwild = wildcard; if (matchwild == 0) break; } } } return (match); } } void in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *in6p; struct ip6_moptions *im6o; int i, gap; INP_INFO_WLOCK(pcbinfo); LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { /* * Unselect the outgoing ifp for multicast if it * is being detached. */ if (im6o->im6o_multicast_ifp == ifp) im6o->im6o_multicast_ifp = NULL; /* * Drop multicast group membership if we joined * through the interface being detached. */ gap = 0; for (i = 0; i < im6o->im6o_num_memberships; i++) { if (im6o->im6o_membership[i]->in6m_ifp == ifp) { in6_mc_leave(im6o->im6o_membership[i], NULL); gap++; } else if (gap != 0) { im6o->im6o_membership[i - gap] = im6o->im6o_membership[i]; } } im6o->im6o_num_memberships -= gap; } INP_WUNLOCK(in6p); } INP_INFO_WUNLOCK(pcbinfo); } /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached * routing information. If the route was created dynamically * (by a redirect), time to try a default gateway again. */ void in6_losing(struct inpcb *in6p) { /* * We don't store route pointers in the routing table anymore */ return; } /* * After a routing change, flush old routing * and allocate a (hopefully) better one. */ struct inpcb * in6_rtchange(struct inpcb *inp, int errno) { /* * We don't store route pointers in the routing table anymore */ return inp; } #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. */ static struct inpcb * in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; /* * First look for an exact match. */ tmpinp = NULL; INP_GROUP_LOCK(pcbgroup); head = &pcbgroup->ipg_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP6)) goto found; if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) { inp = tmpinp; goto found; } /* * Then look for a wildcard match in the pcbgroup. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbgroup->ipg_hashbase[ INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) goto found; else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; if (inp != NULL) goto found; } /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_wildbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_wildmask)]; LIST_FOREACH(inp, head, inp_pcbgroup_wild) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) goto found; else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ inp = jail_wild; if (inp == NULL) inp = jail_wild; if (inp == NULL) inp = local_exact; if (inp == NULL) inp = local_wild; if (inp != NULL) goto found; } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ INP_GROUP_UNLOCK(pcbgroup); return (NULL); found: in_pcbref(inp); INP_GROUP_UNLOCK(pcbgroup); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking buf", __func__); return (inp); } #endif /* PCBGROUP */ /* * Lookup PCB in hash list. */ static struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); /* * First look for an exact match. */ tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* * XXX We should be able to directly return * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ if (prison_flag(inp->inp_cred, PR_IP6)) return (inp); if (tmpinp == NULL) tmpinp = inp; } } if (tmpinp != NULL) return (tmpinp); /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; struct inpcb *jail_wild = NULL; int injail; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. * 2. jailed, wild. * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || inp->inp_lport != lport) { continue; } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) continue; } else { if (local_exact != NULL) continue; } if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) return (inp); else local_exact = inp; } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (injail) jail_wild = inp; else local_wild = inp; } } /* LIST_FOREACH */ if (jail_wild != NULL) return (jail_wild); if (local_exact != NULL) return (local_exact); if (local_wild != NULL) return (local_wild); } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ /* * Not found. */ return (NULL); } /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the * hash list lock, and will return the inpcb locked (i.e., requires * INPLOOKUP_LOCKPCB). */ static struct inpcb * in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { struct inpcb *inp; INP_HASH_RLOCK(pcbinfo); inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); if (inp != NULL) { in_pcbref(inp); INP_HASH_RUNLOCK(pcbinfo); if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); if (in_pcbrele_wlocked(inp)) return (NULL); } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); if (in_pcbrele_rlocked(inp)) return (NULL); } else panic("%s: locking bug", __func__); } else INP_HASH_RUNLOCK(pcbinfo); return (inp); } /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. * * Possibly more of this logic should be in in6_pcbgroup.c. */ struct inpcb * in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { #if defined(PCBGROUP) && !defined(RSS) struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the * reservation table when looking up 4-tuples. When using RSS, just * use the reservation table, due to the cost of the Toeplitz hash * in software. * * XXXRW: This policy belongs in the pcbgroup code, as in principle * we could be doing RSS with a non-Toeplitz hash that is affordable * in software. */ #if defined(PCBGROUP) && !defined(RSS) if (in_pcbgroup_enabled(pcbinfo)) { pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } struct inpcb * in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { #ifdef PCBGROUP struct inpcbgroup *pcbgroup; #endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP /* * If we can use a hardware-generated hash to look up the connection * group, use that connection group to find the inpcb. Otherwise * fall back on a software hash -- or the reservation table if we're * using RSS. * * XXXRW: As above, that policy belongs in the pcbgroup code. */ if (in_pcbgroup_enabled(pcbinfo) && M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) { pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), m->m_pkthdr.flowid); if (pcbgroup != NULL) return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #ifndef RSS pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); #endif } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) { struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ return; } diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index c2e26f8fd2e2..fb362e25013a 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -1,1169 +1,1168 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct mtx addrsel_lock; #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) static struct sx addrsel_sxlock; #define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") #define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) #define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) #define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) #define ADDR_LABEL_NOTAPP (-1) static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); #define V_defaultaddrpolicy VNET(defaultaddrpolicy) VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, int, u_int); static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, + struct ip6_moptions *, struct ifnet **, struct ifnet *, u_int); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); static void init_policy_queue(void); static int add_addrsel_policyent(struct in6_addrpolicy *); static int delete_addrsel_policyent(struct in6_addrpolicy *); static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *), void *); static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); /* * Return an IPv6 address, which is the most appropriate for a given * destination and user specified options. * If necessary, this function lookups the routing table and returns * an entry to the caller for later use. */ #define REPLACE(r) do {\ IP6STAT_INC(ip6s_sources_rule[(r)]); \ rule = (r); \ /* { \ char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ } */ \ goto replace; \ } while(0) #define NEXT(r) do {\ /* { \ char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ } */ \ goto next; /* XXX: we can't use 'continue' here */ \ } while(0) #define BREAK(r) do { \ IP6STAT_INC(ip6s_sources_rule[(r)]); \ rule = (r); \ goto out; /* XXX: we can't use 'break' here */ \ } while(0) int in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, + struct inpcb *inp, struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp) { struct rm_priotracker in6_ifa_tracker; struct in6_addr dst, tmp; struct ifnet *ifp = NULL, *oifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; struct in6_pktinfo *pi = NULL; int dst_scope = -1, best_scope = -1, best_matchlen = -1; struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; u_int32_t odstzone; int prefer_tempaddr; int error, rule; struct ip6_moptions *mopts; KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); dst = dstsock->sin6_addr; /* make a copy for local operation */ if (ifpp) { /* * Save a possibly passed in ifp for in6_selectsrc. Only * neighbor discovery code should use this feature, where * we may know the interface but not the FIB number holding * the connected subnet in case someone deleted it from the * default FIB and we need to check the interface. */ if (*ifpp != NULL) oifp = *ifpp; *ifpp = NULL; } if (inp != NULL) { INP_LOCK_ASSERT(inp); mopts = inp->in6p_moptions; } else { mopts = NULL; } /* * If the source address is explicitly specified by the caller, * check if the requested source address is indeed a unicast address * assigned to the node, and can be used as the packet's source * address. If everything is okay, use the address as source. */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { struct sockaddr_in6 srcsock; struct in6_ifaddr *ia6; /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, + if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); /* * determine the appropriate zone id of the source based on * the zone of the destination and the outgoing interface. * If the specified address is ambiguous wrt the scope zone, * the interface must be specified; otherwise, ifa_ifwithaddr() * will fail matching the address. */ bzero(&srcsock, sizeof(srcsock)); srcsock.sin6_family = AF_INET6; srcsock.sin6_len = sizeof(srcsock); srcsock.sin6_addr = pi->ipi6_addr; if (ifp) { error = in6_setscope(&srcsock.sin6_addr, ifp, NULL); if (error) return (error); } if (cred != NULL && (error = prison_local_ip6(cred, &srcsock.sin6_addr, (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); ia6 = (struct in6_ifaddr *)ifa_ifwithaddr( (struct sockaddr *)&srcsock); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { if (ia6 != NULL) ifa_free(&ia6->ia_ifa); return (EADDRNOTAVAIL); } pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ if (ifpp) *ifpp = ifp; bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp)); ifa_free(&ia6->ia_ifa); return (0); } /* * Otherwise, if the socket has already bound the source, just use it. */ if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (cred != NULL && (error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); return (0); } /* * Bypass source address selection and use the primary jail IP * if requested. */ if (cred != NULL && !prison_saddrsel_ip6(cred, srcp)) return (0); /* * If the address is not specified, choose the best one based on * the outgoing interface and the destination address. */ /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, + if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); #ifdef DIAGNOSTIC if (ifp == NULL) /* this should not happen */ panic("in6_selectsrc: NULL ifp"); #endif error = in6_setscope(&dst, ifp, &odstzone); if (error) return (error); rule = 0; IN6_IFADDR_RLOCK(&in6_ifa_tracker); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { int new_scope = -1, new_matchlen = -1; struct in6_addrpolicy *new_policy = NULL; u_int32_t srczone, osrczone, dstzone; struct in6_addr src; struct ifnet *ifp1 = ia->ia_ifp; /* * We'll never take an address that breaks the scope zone * of the destination. We also skip an address if its zone * does not contain the outgoing interface. * XXX: we should probably use sin6_scope_id here. */ if (in6_setscope(&dst, ifp1, &dstzone) || odstzone != dstzone) { continue; } src = ia->ia_addr.sin6_addr; if (in6_setscope(&src, ifp, &osrczone) || in6_setscope(&src, ifp1, &srczone) || osrczone != srczone) { continue; } /* avoid unusable addresses */ if ((ia->ia6_flags & (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { continue; } if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) continue; /* If jailed only take addresses of the jail into account. */ if (cred != NULL && prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) continue; /* Rule 1: Prefer same address */ if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { ia_best = ia; BREAK(1); /* there should be no better candidate */ } if (ia_best == NULL) REPLACE(0); /* Rule 2: Prefer appropriate scope */ if (dst_scope < 0) dst_scope = in6_addrscope(&dst); new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) REPLACE(2); NEXT(2); } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) NEXT(2); REPLACE(2); } /* * Rule 3: Avoid deprecated addresses. Note that the case of * !ip6_use_deprecated is already rejected above. */ if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) NEXT(3); if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) REPLACE(3); /* Rule 4: Prefer home addresses */ /* * XXX: This is a TODO. We should probably merge the MIP6 * case above. */ /* Rule 5: Prefer outgoing interface */ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) { if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) NEXT(5); if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) REPLACE(5); } /* * Rule 6: Prefer matching label * Note that best_policy should be non-NULL here. */ if (dst_policy == NULL) dst_policy = lookup_addrsel_policy(dstsock); if (dst_policy->label != ADDR_LABEL_NOTAPP) { new_policy = lookup_addrsel_policy(&ia->ia_addr); if (dst_policy->label == best_policy->label && dst_policy->label != new_policy->label) NEXT(6); if (dst_policy->label != best_policy->label && dst_policy->label == new_policy->label) REPLACE(6); } /* * Rule 7: Prefer public addresses. * We allow users to reverse the logic by configuring * a sysctl variable, so that privacy conscious users can * always prefer temporary addresses. */ if (opts == NULL || opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { prefer_tempaddr = V_ip6_prefer_tempaddr; } else if (opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_NOTPREFER) { prefer_tempaddr = 0; } else prefer_tempaddr = 1; if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && (ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) REPLACE(7); else NEXT(7); } if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { if (prefer_tempaddr) NEXT(7); else REPLACE(7); } /* * Rule 8: prefer addresses on alive interfaces. * This is a KAME specific rule. */ if ((ia_best->ia_ifp->if_flags & IFF_UP) && !(ia->ia_ifp->if_flags & IFF_UP)) NEXT(8); if (!(ia_best->ia_ifp->if_flags & IFF_UP) && (ia->ia_ifp->if_flags & IFF_UP)) REPLACE(8); /* * Rule 9: prefer address with better virtual status. */ if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa)) REPLACE(9); if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa)) NEXT(9); /* * Rule 10: prefer address with `prefer_source' flag. */ if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 && (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0) REPLACE(10); if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 && (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0) NEXT(10); /* * Rule 14: Use longest matching prefix. * Note: in the address selection draft, this rule is * documented as "Rule 8". However, since it is also * documented that this rule can be overridden, we assign * a large number so that it is easy to assign smaller numbers * to more preferred rules. */ new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); if (best_matchlen < new_matchlen) REPLACE(14); if (new_matchlen < best_matchlen) NEXT(14); /* Rule 15 is reserved. */ /* * Last resort: just keep the current candidate. * Or, do we need more rules? */ continue; replace: ia_best = ia; best_scope = (new_scope >= 0 ? new_scope : in6_addrscope(&ia_best->ia_addr.sin6_addr)); best_policy = (new_policy ? new_policy : lookup_addrsel_policy(&ia_best->ia_addr)); best_matchlen = (new_matchlen >= 0 ? new_matchlen : in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst)); next: continue; out: break; } if ((ia = ia_best) == NULL) { IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); IP6STAT_INC(ip6s_sources_none); return (EADDRNOTAVAIL); } /* * At this point at least one of the addresses belonged to the jail * but it could still be, that we want to further restrict it, e.g. * theoratically IN6_IS_ADDR_LOOPBACK. * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should * let all others previously selected pass. * Use tmp to not change ::1 on lo0 to the primary jail address. */ tmp = ia->ia_addr.sin6_addr; if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); IP6STAT_INC(ip6s_sources_none); return (EADDRNOTAVAIL); } if (ifpp) *ifpp = ifp; bcopy(&tmp, srcp, sizeof(*srcp)); if (ia->ia_ifp == ifp) IP6STAT_INC(ip6s_sources_sameif[best_scope]); else IP6STAT_INC(ip6s_sources_otherif[best_scope]); if (dst_scope == best_scope) IP6STAT_INC(ip6s_sources_samescope[best_scope]); else IP6STAT_INC(ip6s_sources_otherscope[best_scope]); if (IFA6_IS_DEPRECATED(ia)) IP6STAT_INC(ip6s_sources_deprecated[best_scope]); IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); } /* * clone - meaningful only for bsdi and freebsd */ static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum) { int error = 0; struct ifnet *ifp = NULL; struct rtentry *rt = NULL; struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; uint32_t zoneid; #if 0 char ip6buf[INET6_ADDRSTRLEN]; if (dstsock->sin6_addr.s6_addr32[0] == 0 && dstsock->sin6_addr.s6_addr32[1] == 0 && !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { printf("in6_selectroute: strange destination %s\n", ip6_sprintf(ip6buf, &dstsock->sin6_addr)); } else { printf("in6_selectroute: destination = %s%%%d\n", ip6_sprintf(ip6buf, &dstsock->sin6_addr), dstsock->sin6_scope_id); /* for debug */ } #endif /* If the caller specify the outgoing interface explicitly, use it. */ if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { /* XXX boundary check is assumed to be already done. */ ifp = ifnet_byindex(pi->ipi6_ifindex); if (ifp != NULL && (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { /* * we do not have to check or get the route for * multicast. */ goto done; } else goto getroute; } /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. */ if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { goto done; /* we do not need a route for multicast. */ } /* * If destination address is LLA or link- or node-local multicast, * use it's embedded scope zone id to determine outgoing interface. */ if (IN6_IS_ADDR_MC_LINKLOCAL(dst) || IN6_IS_ADDR_MC_NODELOCAL(dst)) { zoneid = ntohs(in6_getscope(dst)); if (zoneid > 0) { ifp = in6_getlinkifnet(zoneid); goto done; } } getroute: /* * If the next hop address for the packet is specified by the caller, * use it as the gateway. */ if (opts && opts->ip6po_nexthop) { struct route_in6 *ron; sin6_next = satosin6(opts->ip6po_nexthop); if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) { /* * Next hop is LLA, thus it should be neighbor. * Determine outgoing interface by zone index. */ zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr)); if (zoneid > 0) { ifp = in6_getlinkifnet(zoneid); goto done; } } ron = &opts->ip6po_nextroute; /* Use a cached route if it exists and is valid. */ if (ron->ro_rt != NULL && ( (ron->ro_rt->rt_flags & RTF_UP) == 0 || ron->ro_dst.sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr, &sin6_next->sin6_addr))) RO_RTFREE(ron); if (ron->ro_rt == NULL) { ron->ro_dst = *sin6_next; in6_rtalloc(ron, fibnum); /* multi path case? */ } /* * The node identified by that address must be a * neighbor of the sending host. */ if (ron->ro_rt == NULL || (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0) error = EHOSTUNREACH; goto done; } /* * Use a cached route if it exists and is valid, else try to allocate * a new one. Note that we should check the address family of the * cached destination, in case of sharing the cache with IPv4. */ if (ro) { if (ro->ro_rt && (!(ro->ro_rt->rt_flags & RTF_UP) || ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)NULL; } if (ro->ro_rt == (struct rtentry *)NULL) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; *sa6 = *dstsock; sa6->sin6_scope_id = 0; #ifdef RADIX_MPATH rtalloc_mpath_fib((struct route *)ro, ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); #else ro->ro_rt = in6_rtalloc1((struct sockaddr *) &ro->ro_dst, 0, 0UL, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); #endif } /* * do not care about the result if we have the nexthop * explicitly specified. */ if (opts && opts->ip6po_nexthop) goto done; if (ro->ro_rt) { ifp = ro->ro_rt->rt_ifp; if (ifp == NULL) { /* can this really happen? */ RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } if (ro->ro_rt == NULL) error = EHOSTUNREACH; rt = ro->ro_rt; /* * Check if the outgoing interface conflicts with * the interface specified by ipi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to one of * our own addresses.) */ if (ifp && opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) { if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opts->ip6po_pktinfo->ipi6_ifindex) { error = EHOSTUNREACH; goto done; } } } done: if (ifp == NULL && rt == NULL) { /* * This can happen if the caller did not pass a cached route * nor any other hints. We treat this case an error. */ error = EHOSTUNREACH; } if (error == EHOSTUNREACH) IP6STAT_INC(ip6s_noroute); if (retifp != NULL) { *retifp = ifp; /* * Adjust the "outgoing" interface. If we're going to loop * the packet back to ourselves, the ifp would be the loopback * interface. However, we'd rather know the interface associated * to the destination address (which should probably be one of * our own addresses.) */ if (rt) { if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && (rt->rt_gateway->sa_family == AF_LINK)) *retifp = ifnet_byindex(((struct sockaddr_dl *) rt->rt_gateway)->sdl_index); } } if (retrt != NULL) *retrt = rt; /* rt may be NULL */ return (error); } static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, + struct ip6_moptions *mopts, struct ifnet **retifp, struct ifnet *oifp, u_int fibnum) { int error; struct route_in6 sro; struct rtentry *rt = NULL; + int rt_flags; KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); - if (ro == NULL) { - bzero(&sro, sizeof(sro)); - ro = &sro; - } + bzero(&sro, sizeof(sro)); + rt_flags = 0; + + error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum); + + if (rt) + rt_flags = rt->rt_flags; + if (rt && rt == sro.ro_rt) + RTFREE(rt); - if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 1, fibnum)) != 0) { - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); + if (error != 0) { /* Help ND. See oifp comment in in6_selectsrc(). */ if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { *retifp = oifp; error = 0; } return (error); } /* * do not use a rejected or black hole route. * XXX: this check should be done in the L2 output routine. * However, if we skipped this check here, we'd see the following * scenario: * - install a rejected route for a scoped address prefix * (like fe80::/10) * - send a packet to a destination that matches the scoped prefix, * with ambiguity about the scope zone. * - pick the outgoing interface from the route, and disambiguate the * scope zone with the interface. * - ip6_output() would try to get another route with the "new" * destination, which may be valid. * - we'd see no error on output. * Although this may not be very harmful, it should still be confusing. * We thus reject the case here. */ - if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { - int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); - return (flags); + if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) { + error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + return (error); } - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); return (0); } /* * Public wrapper function to selectroute(). * * XXX-BZ in6_selectroute() should and will grow the FIB argument. The * in6_selectroute_fib() function is only there for backward compat on stable. */ int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt) { return (selectroute(dstsock, opts, mopts, ro, retifp, retrt, 0, RT_DEFAULT_FIB)); } #ifndef BURN_BRIDGES int in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt, u_int fibnum) { return (selectroute(dstsock, opts, mopts, ro, retifp, retrt, 0, fibnum)); } #endif /* * Default hop limit selection. The precedence is as follows: * 1. Hoplimit value specified via ioctl. * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. */ int in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) { if (in6p && in6p->in6p_hops >= 0) return (in6p->in6p_hops); else if (ifp) return (ND_IFINFO(ifp)->chlim); else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { struct nhop6_basic nh6; struct in6_addr dst; uint32_t fibnum, scopeid; int hlim; fibnum = in6p->inp_inc.inc_fibnum; in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid); if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){ hlim = ND_IFINFO(nh6.nh_ifp)->chlim; return (hlim); } } return (V_ip6_defhlim); } /* * XXX: this is borrowed from in6_pcbbind(). If possible, we should * share this function by all *bsd*... */ int in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) { struct socket *so = inp->inp_socket; u_int16_t lport = 0; int error, lookupflags = 0; #ifdef INVARIANTS struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; #endif INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); error = prison_local_ip6(cred, laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); if (error) return(error); /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) lookupflags = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags); if (error != 0) return (error); inp->inp_lport = lport; if (in_pcbinshash(inp) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; return (EAGAIN); } return (0); } void addrsel_policy_init(void) { init_policy_queue(); /* initialize the "last resort" policy */ bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; if (!IS_DEFAULT_VNET(curvnet)) return; ADDRSEL_LOCK_INIT(); ADDRSEL_SXLOCK_INIT(); } static struct in6_addrpolicy * lookup_addrsel_policy(struct sockaddr_in6 *key) { struct in6_addrpolicy *match = NULL; ADDRSEL_LOCK(); match = match_addrsel_policy(key); if (match == NULL) match = &V_defaultaddrpolicy; else match->use++; ADDRSEL_UNLOCK(); return (match); } /* * Subroutines to manage the address selection policy table via sysctl. */ struct walkarg { struct sysctl_req *w_req; }; static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_DECL(_net_inet6_ip6); static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, CTLFLAG_RD, in6_src_sysctl, ""); static int in6_src_sysctl(SYSCTL_HANDLER_ARGS) { struct walkarg w; if (req->newptr) return EPERM; bzero(&w, sizeof(w)); w.w_req = req; return (walk_addrsel_policy(dump_addrsel_policyent, &w)); } int in6_src_ioctl(u_long cmd, caddr_t data) { struct in6_addrpolicy ent0; if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) return (EOPNOTSUPP); /* check for safety */ ent0 = *(struct in6_addrpolicy *)data; if (ent0.label == ADDR_LABEL_NOTAPP) return (EINVAL); /* check if the prefix mask is consecutive. */ if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) return (EINVAL); /* clear trailing garbages (if any) of the prefix address. */ IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr); ent0.use = 0; switch (cmd) { case SIOCAADDRCTL_POLICY: return (add_addrsel_policyent(&ent0)); case SIOCDADDRCTL_POLICY: return (delete_addrsel_policyent(&ent0)); } return (0); /* XXX: compromise compilers */ } /* * The followings are implementation of the policy table using a * simple tail queue. * XXX such details should be hidden. * XXX implementation using binary tree should be more efficient. */ struct addrsel_policyent { TAILQ_ENTRY(addrsel_policyent) ape_entry; struct in6_addrpolicy ape_policy; }; TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab); #define V_addrsel_policytab VNET(addrsel_policytab) static void init_policy_queue(void) { TAILQ_INIT(&V_addrsel_policytab); } static int add_addrsel_policyent(struct in6_addrpolicy *newpolicy) { struct addrsel_policyent *new, *pol; new = malloc(sizeof(*new), M_IFADDR, M_WAITOK); ADDRSEL_XLOCK(); ADDRSEL_LOCK(); /* duplication check */ TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, &pol->ape_policy.addr.sin6_addr) && IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, &pol->ape_policy.addrmask.sin6_addr)) { ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); free(new, M_IFADDR); return (EEXIST); /* or override it? */ } } bzero(new, sizeof(*new)); /* XXX: should validate entry */ new->ape_policy = *newpolicy; TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); return (0); } static int delete_addrsel_policyent(struct in6_addrpolicy *key) { struct addrsel_policyent *pol; ADDRSEL_XLOCK(); ADDRSEL_LOCK(); /* search for the entry in the table */ TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, &pol->ape_policy.addr.sin6_addr) && IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, &pol->ape_policy.addrmask.sin6_addr)) { break; } } if (pol == NULL) { ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); return (ESRCH); } TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); ADDRSEL_UNLOCK(); ADDRSEL_XUNLOCK(); free(pol, M_IFADDR); return (0); } static int walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w) { struct addrsel_policyent *pol; int error = 0; ADDRSEL_SLOCK(); TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { if ((error = (*callback)(&pol->ape_policy, w)) != 0) { ADDRSEL_SUNLOCK(); return (error); } } ADDRSEL_SUNLOCK(); return (error); } static int dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) { int error = 0; struct walkarg *w = arg; error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); return (error); } static struct in6_addrpolicy * match_addrsel_policy(struct sockaddr_in6 *key) { struct addrsel_policyent *pent; struct in6_addrpolicy *bestpol = NULL, *pol; int matchlen, bestmatchlen = -1; u_char *mp, *ep, *k, *p, m; TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { matchlen = 0; pol = &pent->ape_policy; mp = (u_char *)&pol->addrmask.sin6_addr; ep = mp + 16; /* XXX: scope field? */ k = (u_char *)&key->sin6_addr; p = (u_char *)&pol->addr.sin6_addr; for (; mp < ep && *mp; mp++, k++, p++) { m = *mp; if ((*k & m) != *p) goto next; /* not match */ if (m == 0xff) /* short cut for a typical case */ matchlen += 8; else { while (m >= 0x80) { matchlen++; m <<= 1; } } } /* matched. check if this is better than the current best. */ if (bestpol == NULL || matchlen > bestmatchlen) { bestpol = pol; bestmatchlen = matchlen; } next: continue; } return (bestpol); } diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index af9fdbafe195..ccbac7d9f96c 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -1,2998 +1,3062 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #include #include #endif /* IPSEC */ #ifdef SCTP #include #include #endif #include #include #ifdef FLOWTABLE #include #endif extern int in6_mcast_loop; struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *); static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, struct ucred *, int, int, int); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); -static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, +static int ip6_getpmtu(struct route_in6 *, int, struct ifnet *, struct in6_addr *, u_long *, int *, u_int); +static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, + u_long *, int *); +static int ip6_getpmtu_ctl(u_int, struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); /* * Make an extension header from option data. hp is the source, and * mp is the destination. */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (/*CONSTCOND*/ 0) /* * Form a chain of extension headers. * m is the extension header mbuf * mp is the previous mbuf in the chain * p is the next header * i is the type of option. */ #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (/*CONSTCOND*/ 0) void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) { u_short csum; csum = in_cksum_skip(m, offset + plen, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ if (offset + sizeof(u_short) > m->m_len) { printf("%s: delayed m_pullup, m->len: %d plen %u off %u " "csum_flags=%b\n", __func__, m->m_len, plen, offset, (int)m->m_pkthdr.csum_flags, CSUM_BITS); /* * XXX this should not happen, but if it does, the correct * behavior may be to insert the checksum in the appropriate * next mbuf in the chain. */ return; } *(u_short *)(m->m_data + offset) = csum; } int ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, int mtu, uint32_t id) { struct mbuf *m, **mnext, *m_frgpart; struct ip6_hdr *ip6, *mhip6; struct ip6_frag *ip6f; int off; int error; int tlen = m0->m_pkthdr.len; m = m0; ip6 = mtod(m, struct ip6_hdr *); mnext = &m->m_nextpkt; for (off = hlen; off < tlen; off += mtu) { m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { IP6STAT_INC(ip6s_odropped); return (error); } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + mtu >= tlen) mtu = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(mtu + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copy(m0, off, mtu)) == 0) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m_cat(m, m_frgpart); m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f); m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; m->m_pkthdr.rcvif = NULL; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; IP6STAT_INC(ip6s_ofragments); in6_ifstat_inc(ifp, ifs6_out_fragcreat); } return (0); } /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route_in6 ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_mtu. * * ifpp - XXX: just for statistics */ /* * XXX TODO: no flowid is assigned for outbound flows? */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; struct in6_addr odst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst, src0, dst0; u_int32_t zone; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int sw_csum, tso; int needfiblookup; uint32_t fibnum; struct m_tag *fwd_tag = NULL; uint32_t id; ip6 = mtod(m, struct ip6_hdr *); if (ip6 == NULL) { printf ("ip6 is NULL"); goto bad; } if (inp != NULL) { M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { /* unconditionally set flowid */ m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } finaldst = ip6->ip6_dst; bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ if (opt->ip6po_rthdr) { /* * Destination options header(1st part) * This only makes sense with a routing header. * See Section 9.2 of RFC 3542. * Disabling this part just for MIP6 convenience is * a bad idea. We need to think carefully about a * way to make the advanced API coexist with MIP6 * options, which might automatically be inserted in * the kernel. */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); } /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } #ifdef IPSEC /* * IPSec checking which handles several cases. * FAST IPSEC: We re-injected the packet. * XXX: need scope argument. */ switch(ip6_ipsec_output(&m, inp, &error)) { case 1: /* Bad packet */ goto freehdrs; case -1: /* IPSec done */ goto done; case 0: /* No IPSec */ default: break; } #endif /* IPSEC */ /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If there is at least one extension header, * separate IP6 header from the payload. */ if (optlen && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ u_char *nexthdrp = &ip6->ip6_nxt; mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); /* * If there is a routing header, discard the packet. */ if (exthdrs.ip6e_rthdr) { error = EINVAL; goto bad; } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_UNSPECSRC) == 0) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } IP6STAT_INC(ip6s_localout); /* * Route packet. */ if (ro == 0) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); again: /* * if specified, try to fill in the traffic class field. * do not override if a non-zero value is already set. * we check the diffserv field and the ecn field separately. */ if (opt && opt->ip6po_tclass >= 0) { int mask = 0; if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) mask |= 0xfc; if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) mask |= 0x03; if (mask != 0) ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); } /* fill in or override the hop limit field, if necessary. */ if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (im6o != NULL) ip6->ip6_hlim = im6o->im6o_multicast_hlim; else ip6->ip6_hlim = V_ip6_defmcasthlim; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if (ro->ro_rt && fwd_tag == NULL) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { if (fwd_tag == NULL) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, &rt, fibnum); if (error != 0) { if (ifp != NULL) in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } } if (rt == NULL) { /* * If in6_selectroute() does not return a route entry, * dst may not have been updated. */ *dst = dst_sa; /* XXX */ } /* * then rt (for unicast) and ifp must be non-NULL valid values. */ if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } if (rt != NULL) { ia = (struct in6_ifaddr *)(rt->rt_ifa); counter_u64_add(rt->rt_pksent, 1); } /* * The outgoing interface must be in the zone of source and * destination addresses. */ origifp = ifp; src0 = ip6->ip6_src; if (in6_setscope(&src0, origifp, &zone)) goto badscope; bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) goto badscope; dst0 = ip6->ip6_dst; if (in6_setscope(&dst0, origifp, &zone)) goto badscope; /* re-initialize to be sure */ bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { goto badscope; } /* We should use ia_ifp to support the case of * sending packets to an address of our own. */ if (ia != NULL && ia->ia_ifp) ifp = ia->ia_ifp; /* scope check is done. */ goto routefound; badscope: IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(origifp, ifs6_out_discard); if (error == 0) error = EHOSTUNREACH; /* XXX */ goto bad; routefound: if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 * address. */ dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; } else if ((rt->rt_flags & RTF_GATEWAY)) dst = (struct sockaddr_in6 *)rt->rt_gateway; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ } else { m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if (!(ifp->if_flags & IFF_MULTICAST)) { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } if ((im6o == NULL && in6_mcast_loop) || (im6o && im6o->im6o_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we have not joined * the address; protocols will filter it later, * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ ip6_mloopback(ifp, m); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. * However, it may not always be the case. */ m->m_pkthdr.rcvif = NULL; if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* Determine path MTU. */ - if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, + if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &finaldst, &mtu, &alwaysfrag, fibnum)) != 0) goto bad; /* * The caller of this function may specify to use the minimum MTU * in some cases. * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU * setting. The logic is a bit complicated; by default, unicast * packets will follow path MTU while multicast packets will be sent at * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets * including unicast ones will be sent at the minimum MTU. Multicast * packets will always be sent at the minimum MTU unless * IP6PO_MINMTU_DISABLE is explicitly specified. * See RFC 3542 for more details. */ if (mtu > IPV6_MMTU) { if ((flags & IPV6_MINMTU)) mtu = IPV6_MMTU; else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) mtu = IPV6_MMTU; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && (opt == NULL || opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { mtu = IPV6_MMTU; } } /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy; /* XXX unused */ u_int32_t plen = 0; /* XXX: ip6_process will check the value */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not contiguous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * contiguous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy, &plen) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; ip6 = mtod(m, struct ip6_hdr *); needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } else needfiblookup = 1; /* Redo the routing table lookup. */ } /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; fibnum = M_GETFIB(m); RO_RTFREE(ro); needfiblookup = 1; } if (needfiblookup) goto again; /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&ro->ro_dst; bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); goto again; } passout: /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. * * the logic here is rather complex: * 1: normal case (dontfrag == 0, alwaysfrag == 0) * 1-a: send as is if tlen <= path mtu * 1-b: fragment if tlen > path mtu * * 2: if user asks us not to fragment (dontfrag == 1) * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu * * 3: if we always need to attach fragment header (alwaysfrag == 1) * always fragment * * 4: if dontfrag == 1 && alwaysfrag == 1 * error, as we cannot handle this conflicting request */ sw_csum = m->m_pkthdr.csum_flags; if (!hdrsplit) { tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0; sw_csum &= ~ifp->if_hwassist; } else tso = 0; /* * If we added extension headers, we will not do TSO and calculate the * checksums ourselves for now. * XXX-BZ Need a framework to know when the NIC can handle it, even * with ext. hdrs. */ if (sw_csum & CSUM_DELAY_DATA_IPV6) { sw_csum &= ~CSUM_DELAY_DATA_IPV6; in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); } #ifdef SCTP if (sw_csum & CSUM_SCTP_IPV6) { sw_csum &= ~CSUM_SCTP_IPV6; sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); } #endif m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else dontfrag = 0; if (dontfrag && alwaysfrag) { /* case 4 */ /* conflicting request - can't transmit */ error = EMSGSIZE; goto bad; } if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the * outgoing interface. * Notify the error by sending IPV6_PATHMTU ancillary data if * application wanted to know the MTU value. Also return an * error code (this is not described in the API spec). */ if (inp != NULL) ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu); error = EMSGSIZE; goto bad; } /* * transmit packet without fragmentation */ if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); counter_u64_add(ia6->ia_ifa.ifa_obytes, m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } error = nd6_output_ifp(ifp, origifp, m, dst, NULL); goto done; } /* * try to fragment the packet. case 1-b and 3 */ if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } /* * If the interface will not calculate checksums on * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { in6_delayed_cksum(m, plen, hlen); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { sctp_delayed_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; id = htonl(ip6_randomid()); if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id))) goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } error = nd6_output_ifp(ifp, origifp, m, dst, NULL); } else m_freem(m); } if (error == 0) IP6STAT_INC(ip6s_fragmented); done: if (ro == &ip6route) RO_RTFREE(ro); - if (ro_pmtu == &ip6route) - RO_RTFREE(ro_pmtu); return (error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* FALLTHROUGH */ bad: if (m) m_freem(m); goto done; } static int ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) { struct mbuf *m; if (hlen > MCLBYTES) return (ENOBUFS); /* XXX */ if (hlen > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return (0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == 0) { mopt = m_get(M_NOWAIT, MT_DATA); if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return (ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ n = m_getcl(M_NOWAIT, MT_DATA, 0); if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return (0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, struct ip6_frag **frghdrp) { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_NOWAIT); if (n == 0) return (ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if (M_WRITABLE(mlast) && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; mfrg = m_get(M_NOWAIT, MT_DATA); if (mfrg == NULL) return (ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return (0); } +/* + * Calculates IPv6 path mtu for destination @dst. + * Resulting MTU is stored in @mtup. + * + * Returns 0 on success. + */ +static int +ip6_getpmtu_ctl(u_int fibnum, struct in6_addr *dst, u_long *mtup) +{ + struct nhop6_extended nh6; + struct in6_addr kdst; + uint32_t scopeid; + struct ifnet *ifp; + u_long mtu; + int error; + + in6_splitscope(dst, &kdst, &scopeid); + if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0) + return (EHOSTUNREACH); + + ifp = nh6.nh_ifp; + mtu = nh6.nh_mtu; + + error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL); + fib6_free_nh_ext(fibnum, &nh6); + + return (error); +} + +/* + * Calculates IPv6 path MTU for @dst based on transmit @ifp, + * and cached data in @ro_pmtu. + * MTU from (successful) route lookup is saved (along with dst) + * inside @ro_pmtu to avoid subsequent route lookups after packet + * filter processing. + * + * Stores mtu and always-frag value into @mtup and @alwaysfragp. + * Returns 0 on success. + */ static int -ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, +ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, int *alwaysfragp, u_int fibnum) { - u_int32_t mtu = 0; - int alwaysfrag = 0; - int error = 0; + struct nhop6_basic nh6; + struct in6_addr kdst; + uint32_t scopeid; + struct sockaddr_in6 *sa6_dst; + u_long mtu; - if (ro_pmtu != ro) { - /* The first hop and the final destination may differ. */ - struct sockaddr_in6 *sa6_dst = - (struct sockaddr_in6 *)&ro_pmtu->ro_dst; - if (ro_pmtu->ro_rt && - ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || - !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { - RTFREE(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = (struct rtentry *)NULL; - } - if (ro_pmtu->ro_rt == NULL) { + mtu = 0; + if (do_lookup) { + + /* + * Here ro_pmtu has final destination address, while + * ro might represent immediate destination. + * Use ro_pmtu destination since mtu might differ. + */ + sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; + if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) + ro_pmtu->ro_mtu = 0; + + if (ro_pmtu->ro_mtu == 0) { bzero(sa6_dst, sizeof(*sa6_dst)); sa6_dst->sin6_family = AF_INET6; sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; - in6_rtalloc(ro_pmtu, fibnum); + in6_splitscope(dst, &kdst, &scopeid); + if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0, + &nh6) == 0) + ro_pmtu->ro_mtu = nh6.nh_mtu; } + + mtu = ro_pmtu->ro_mtu; } - if (ro_pmtu->ro_rt) { + + if (ro_pmtu->ro_rt) + mtu = ro_pmtu->ro_rt->rt_mtu; + + return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp)); +} + +/* + * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and + * hostcache data for @dst. + * Stores mtu and always-frag value into @mtup and @alwaysfragp. + * + * Returns 0 on success. + */ +static int +ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, + u_long *mtup, int *alwaysfragp) +{ + u_long mtu = 0; + int alwaysfrag = 0; + int error = 0; + + if (rt_mtu > 0) { u_int32_t ifmtu; struct in_conninfo inc; bzero(&inc, sizeof(inc)); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; - if (ifp == NULL) - ifp = ro_pmtu->ro_rt->rt_ifp; ifmtu = IN6_LINKMTU(ifp); mtu = tcp_hc_getmtu(&inc); if (mtu) - mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu); + mtu = min(mtu, rt_mtu); else - mtu = ro_pmtu->ro_rt->rt_mtu; + mtu = rt_mtu; if (mtu == 0) mtu = ifmtu; else if (mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: * if we record ICMPv6 too big message with * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU * or smaller, with framgent header attached. * (fragment header is needed regardless from the * packet size, for translators to identify packets) */ alwaysfrag = 1; mtu = IPV6_MMTU; } } else if (ifp) { mtu = IN6_LINKMTU(ifp); } else error = EHOSTUNREACH; /* XXX */ *mtup = mtu; if (alwaysfragp) *alwaysfragp = alwaysfrag; return (error); } /* * IP6 socket option processing. */ int ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; #ifdef RSS uint32_t rss_bucket; int retval; #endif level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; error = 0; optval = 0; uproto = (int)so->so_proto->pr_protocol; if (level != IPPROTO_IPV6) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEADDR) != 0) in6p->inp_flags2 |= INP_REUSEADDR; else in6p->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(in6p); error = 0; break; case SO_REUSEPORT: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEPORT) != 0) in6p->inp_flags2 |= INP_REUSEPORT; else in6p->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(in6p); error = 0; break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(in6p); error = 0; break; default: break; } } } else { /* level == IPPROTO_IPV6 */ switch (op) { case SOPT_SET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif { struct mbuf *m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) break; } /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_RECVTCLASS: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RECVRSSBUCKETID: #endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_BINDMULTI: #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: #endif if (optname == IPV6_BINDANY && td != NULL) { error = priv_check(td, PRIV_NETINET_BINDANY); if (error) break; } if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->inp_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ INP_WLOCK(in6p); \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTSET2292(bit) \ do { \ INP_WLOCK(in6p); \ in6p->inp_flags |= IN6P_RFC2292; \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) #define OPTSET2(bit, val) do { \ INP_WLOCK(in6p); \ if (val) \ in6p->inp_flags2 |= bit; \ else \ in6p->inp_flags2 &= ~bit; \ INP_WUNLOCK(in6p); \ } while (0) #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: { struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_RECVHOPLIMIT: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPLIMIT); break; case IPV6_RECVHOPOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDRDSTOPTS); break; case IPV6_RECVRTHDR: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: /* * We ignore this option for TCP * sockets. * (RFC3542 leaves this case * unspecified.) */ if (uproto != IPPROTO_TCP) OPTSET(IN6P_MTU); break; case IPV6_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IPV6_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->inp_vflag &= ~INP_IPV4; else in6p->inp_vflag |= INP_IPV4; break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: OPTSET(INP_BINDANY); break; case IPV6_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { in6p->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; #endif } break; case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: if (optlen != sizeof(optval)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; { struct ip6_pktopts **optp; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_2292PKTINFO: OPTSET2292(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: OPTSET2292(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_HOPOPTS); break; case IPV6_2292DSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_2292RTHDR: OPTSET2292(IN6P_RTHDR); break; } break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: { /* new advanced API (RFC3542) */ u_char *optbuf; u_char optbuf_storage[MCLBYTES]; int optlen; struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } /* * We only ensure valsize is not too large * here. Further validation will be done * later. */ error = sooptcopyin(sopt, optbuf_storage, sizeof(optbuf_storage), 0); if (error) break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: case IPV6_MSFILTER: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: error = ip6_setmoptions(in6p, sopt); break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(in6p); switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags &= ~(INP_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags |= INP_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->inp_flags &= ~(INP_HIGHPORT); in6p->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(in6p); break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req; struct mbuf *m; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; req = mtod(m, caddr_t); error = ipsec_set_policy(in6p, optname, req, m->m_len, (sopt->sopt_td != NULL) ? sopt->sopt_td->td_ucred : NULL); m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif /* * RFC3542 (effectively) deprecated the * semantics of the 2292-style pktoptions. * Since it was not reliable in nature (i.e., * applications had to expect the lack of some * information after all), it would make sense * to simplify this part by always returning * empty data. */ sopt->sopt_valsize = 0; break; case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: case IPV6_UNICAST_HOPS: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_FLOWID: case IPV6_FLOWTYPE: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RSSBUCKETID: case IPV6_RECVRSSBUCKETID: #endif case IPV6_BINDMULTI: switch (optname) { case IPV6_RECVHOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: optval = OPTBIT(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: optval = OPTBIT(IN6P_RTHDRDSTOPTS); break; case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_RECVPKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_RECVHOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_RECVRTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: optval = OPTBIT(IN6P_MTU); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->inp_flags; if (flags & INP_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & INP_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } case IPV6_RECVTCLASS: optval = OPTBIT(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: optval = OPTBIT(IN6P_AUTOFLOWLABEL); break; case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IPV6_FLOWID: optval = in6p->inp_flowid; break; case IPV6_FLOWTYPE: optval = in6p->inp_flowtype; break; case IPV6_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IPV6_RSSBUCKETID: retval = rss_hash2bucket(in6p->inp_flowid, in6p->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IPV6_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IPV6_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } if (error) break; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PATHMTU: { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; - struct route_in6 sro; - - bzero(&sro, sizeof(sro)); if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); /* * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. */ - error = ip6_getpmtu(&sro, NULL, NULL, - &in6p->in6p_faddr, &pmtu, NULL, - so->so_fibnum); - if (sro.ro_rt) - RTFREE(sro.ro_rt); + error = ip6_getpmtu_ctl(so->so_fibnum, + &in6p->in6p_faddr, &pmtu); if (error) break; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; bzero(&mtuinfo, sizeof(mtuinfo)); mtuinfo.ip6m_mtu = (u_int32_t)pmtu; optdata = (void *)&mtuinfo; optdatalen = sizeof(mtuinfo); error = sooptcopyout(sopt, optdata, optdatalen); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292RTHDR: case IPV6_2292DSTOPTS: switch (optname) { case IPV6_2292PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_2292RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_2292DSTOPTS: optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_MSFILTER: error = ip6_getmoptions(in6p, sopt); break; #ifdef IPSEC case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m = NULL; struct mbuf **mp = &m; size_t ovalsize = sopt->sopt_valsize; caddr_t oval = (caddr_t)sopt->sopt_val; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; sopt->sopt_valsize = ovalsize; sopt->sopt_val = oval; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec_get_policy(in6p, req, len, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0 && m) m_freem(m); break; } #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; } } return (error); } int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0, optval, optlen; const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); struct inpcb *in6p = sotoinpcb(so); int level, op, optname; level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; if (level != IPPROTO_IPV6) { return (EINVAL); } switch (optname) { case IPV6_CHECKSUM: /* * For ICMPv6 sockets, no modification allowed for checksum * offset, permit "no change" values to help existing apps. * * RFC3542 says: "An attempt to set IPV6_CHECKSUM * for an ICMPv6 socket will fail." * The current behavior does not meet RFC3542. */ switch (op) { case SOPT_SET: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; if ((optval % 2) != 0) { /* the API assumes even offset values */ error = EINVAL; } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; } else in6p->in6p_cksum = optval; break; case SOPT_GET: if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) optval = icmp6off; else optval = in6p->in6p_cksum; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } return (error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so, struct sockopt *sopt) { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return (0); } /* set options specified by user. */ if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ? td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) { ip6_clearpktopts(opt, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return (error); } *pktopt = opt; return (0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void ip6_initpktopts(struct ip6_pktopts *opt) { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ opt->ip6po_tclass = -1; /* -1 means default traffic class */ opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; } static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, struct ucred *cred, int uproto) { struct ip6_pktopts *opt; if (*pktopt == NULL) { *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, M_WAITOK); ip6_initpktopts(*pktopt); } opt = *pktopt; return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); } static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) { void *optdata = NULL; int optdatalen = 0; struct ip6_ext *ip6e; int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; int defpreftemp = IP6PO_TEMPADDR_SYSTEM; switch (optname) { case IPV6_PKTINFO: optdata = (void *)&null_pktinfo; if (pktopt && pktopt->ip6po_pktinfo) { bcopy(pktopt->ip6po_pktinfo, &null_pktinfo, sizeof(null_pktinfo)); in6_clearscope(&null_pktinfo.ipi6_addr); } else { /* XXX: we don't have to do this every time... */ bzero(&null_pktinfo, sizeof(null_pktinfo)); } optdatalen = sizeof(struct in6_pktinfo); break; case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) optdata = (void *)&pktopt->ip6po_tclass; else optdata = (void *)&deftclass; optdatalen = sizeof(int); break; case IPV6_HOPOPTS: if (pktopt && pktopt->ip6po_hbh) { optdata = (void *)pktopt->ip6po_hbh; ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDR: if (pktopt && pktopt->ip6po_rthdr) { optdata = (void *)pktopt->ip6po_rthdr; ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDRDSTOPTS: if (pktopt && pktopt->ip6po_dest1) { optdata = (void *)pktopt->ip6po_dest1; ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_DSTOPTS: if (pktopt && pktopt->ip6po_dest2) { optdata = (void *)pktopt->ip6po_dest2; ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_NEXTHOP: if (pktopt && pktopt->ip6po_nexthop) { optdata = (void *)pktopt->ip6po_nexthop; optdatalen = pktopt->ip6po_nexthop->sa_len; } break; case IPV6_USE_MIN_MTU: if (pktopt) optdata = (void *)&pktopt->ip6po_minmtu; else optdata = (void *)&defminmtu; optdatalen = sizeof(int); break; case IPV6_DONTFRAG: if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) on = 1; else on = 0; optdata = (void *)&on; optdatalen = sizeof(on); break; case IPV6_PREFER_TEMPADDR: if (pktopt) optdata = (void *)&pktopt->ip6po_prefer_tempaddr; else optdata = (void *)&defpreftemp; optdatalen = sizeof(int); break; default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif return (ENOPROTOOPT); } error = sooptcopyout(sopt, optdata, optdatalen); return (error); } void ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) { if (pktopt == NULL) return; if (optname == -1 || optname == IPV6_PKTINFO) { if (pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1 || optname == IPV6_HOPLIMIT) pktopt->ip6po_hlim = -1; if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { if (pktopt->ip6po_nextroute.ro_rt) { RTFREE(pktopt->ip6po_nextroute.ro_rt); pktopt->ip6po_nextroute.ro_rt = NULL; } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1 || optname == IPV6_HOPOPTS) { if (pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { if (pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1 || optname == IPV6_RTHDR) { if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ if (dst->type == NULL && canwait == M_NOWAIT)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (/*CONSTCOND*/ 0) static int copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) { if (dst == NULL || src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return (EINVAL); } dst->ip6po_hlim = src->ip6po_hlim; dst->ip6po_tclass = src->ip6po_tclass; dst->ip6po_flags = src->ip6po_flags; dst->ip6po_minmtu = src->ip6po_minmtu; dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return (0); bad: ip6_clearpktopts(dst, -1); return (ENOBUFS); } #undef PKTOPT_EXTHDRCPY struct ip6_pktopts * ip6_copypktopts(struct ip6_pktopts *src, int canwait) { int error; struct ip6_pktopts *dst; dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL) return (NULL); ip6_initpktopts(dst); if ((error = copypktopts(dst, src, canwait)) != 0) { free(dst, M_IP6OPT); return (NULL); } return (dst); } void ip6_freepcbopts(struct ip6_pktopts *pktopt) { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, -1); free(pktopt, M_IP6OPT); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) { struct cmsghdr *cm = 0; if (control == NULL || opt == NULL) return (EINVAL); ip6_initpktopts(opt); if (stickyopt) { int error; /* * If stickyopt is provided, make a local copy of the options * for this particular packet, then override them by ancillary * objects. * XXX: copypktopts() does not copy the cached route to a next * hop (if any). This is not very good in terms of efficiency, * but we can allow this since this option should be rarely * used. */ if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) return (error); } /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return (EINVAL); for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { int error; if (control->m_len < CMSG_LEN(0)) return (EINVAL); cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto); if (error) return (error); } return (0); } /* * Set a particular packet option, as a sticky option or an ancillary data * item. "len" can be 0 only when it's a sticky option. * We have 4 cases of combination of "sticky" and "cmsg": * "sticky=0, cmsg=0": impossible * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data * "sticky=1, cmsg=0": RFC3542 socket option * "sticky=1, cmsg=1": RFC2292 socket option */ static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, struct ucred *cred, int sticky, int cmsg, int uproto) { int minmtupolicy, preftemp; int error; if (!sticky && !cmsg) { #ifdef DIAGNOSTIC printf("ip6_setpktopt: impossible case\n"); #endif return (EINVAL); } /* * IPV6_2292xxx is for backward compatibility to RFC2292, and should * not be specified in the context of RFC3542. Conversely, * RFC3542 types should not be specified in the context of RFC2292. */ if (!cmsg) { switch (optname) { case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292NEXTHOP: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: case IPV6_2292PKTOPTIONS: return (ENOPROTOOPT); } } if (sticky && cmsg) { switch (optname) { case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_NEXTHOP: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_USE_MIN_MTU: case IPV6_DONTFRAG: case IPV6_TCLASS: case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ return (ENOPROTOOPT); } } switch (optname) { case IPV6_2292PKTINFO: case IPV6_PKTINFO: { struct ifnet *ifp = NULL; struct in6_pktinfo *pktinfo; if (len != sizeof(struct in6_pktinfo)) return (EINVAL); pktinfo = (struct in6_pktinfo *)buf; /* * An application can clear any sticky IPV6_PKTINFO option by * doing a "regular" setsockopt with ipi6_addr being * in6addr_any and ipi6_ifindex being zero. * [RFC 3542, Section 6] */ if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && pktinfo->ipi6_ifindex == 0 && IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { ip6_clearpktopts(opt, optname); break; } if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { return (EINVAL); } if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr)) return (EINVAL); /* validate the interface index if specified. */ if (pktinfo->ipi6_ifindex > V_if_index) return (ENXIO); if (pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(pktinfo->ipi6_ifindex); if (ifp == NULL) return (ENXIO); } if (ifp != NULL && ( ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) return (ENETDOWN); if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { struct in6_ifaddr *ia; in6_setscope(&pktinfo->ipi6_addr, ifp, NULL); ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr); if (ia == NULL) return (EADDRNOTAVAIL); ifa_free(&ia->ia_ifa); } /* * We store the address anyway, and let in6_selectsrc() * validate the specified address. This is because ipi6_addr * may not have enough information about its scope zone, and * we may need additional information (such as outgoing * interface or the scope zone of a destination address) to * disambiguate the scope. * XXX: the delay of the validation may confuse the * application when it is used as a sticky option. */ if (opt->ip6po_pktinfo == NULL) { opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), M_IP6OPT, M_NOWAIT); if (opt->ip6po_pktinfo == NULL) return (ENOBUFS); } bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); break; } case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: { int *hlimp; /* * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT * to simplify the ordering among hoplimit options. */ if (optname == IPV6_HOPLIMIT && sticky) return (ENOPROTOOPT); if (len != sizeof(int)) return (EINVAL); hlimp = (int *)buf; if (*hlimp < -1 || *hlimp > 255) return (EINVAL); opt->ip6po_hlim = *hlimp; break; } case IPV6_TCLASS: { int tclass; if (len != sizeof(int)) return (EINVAL); tclass = *(int *)buf; if (tclass < -1 || tclass > 255) return (EINVAL); opt->ip6po_tclass = tclass; break; } case IPV6_2292NEXTHOP: case IPV6_NEXTHOP: if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { /* just remove the option */ ip6_clearpktopts(opt, IPV6_NEXTHOP); break; } /* check if cmsg_len is large enough for sa_len */ if (len < sizeof(struct sockaddr) || len < *buf) return (EINVAL); switch (((struct sockaddr *)buf)->sa_family) { case AF_INET6: { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; int error; if (sa6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { return (EINVAL); } if ((error = sa6_embedscope(sa6, V_ip6_use_defzone)) != 0) { return (error); } break; } case AF_LINK: /* should eventually be supported */ default: return (EAFNOSUPPORT); } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_NEXTHOP); opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); if (opt->ip6po_nexthop == NULL) return (ENOBUFS); bcopy(buf, opt->ip6po_nexthop, *buf); break; case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; /* * XXX: We don't allow a non-privileged user to set ANY HbH * options, since per-option restriction has too much * overhead. */ if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, IPV6_HOPOPTS); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_hbh)) return (EINVAL); hbh = (struct ip6_hbh *)buf; hbhlen = (hbh->ip6h_len + 1) << 3; if (len != hbhlen) return (EINVAL); /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_HOPOPTS); opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_hbh == NULL) return (ENOBUFS); bcopy(hbh, opt->ip6po_hbh, hbhlen); break; } case IPV6_2292DSTOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: { struct ip6_dest *dest, **newdest = NULL; int destlen; if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */ error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, optname); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_dest)) return (EINVAL); dest = (struct ip6_dest *)buf; destlen = (dest->ip6d_len + 1) << 3; if (len != destlen) return (EINVAL); /* * Determine the position that the destination options header * should be inserted; before or after the routing header. */ switch (optname) { case IPV6_2292DSTOPTS: /* * The old advacned API is ambiguous on this point. * Our approach is to determine the position based * according to the existence of a routing header. * Note, however, that this depends on the order of the * extension headers in the ancillary data; the 1st * part of the destination options header must appear * before the routing header in the ancillary data, * too. * RFC3542 solved the ambiguity by introducing * separate ancillary data or option types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; break; case IPV6_RTHDRDSTOPTS: newdest = &opt->ip6po_dest1; break; case IPV6_DSTOPTS: newdest = &opt->ip6po_dest2; break; } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, optname); *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); if (*newdest == NULL) return (ENOBUFS); bcopy(dest, *newdest, destlen); break; } case IPV6_2292RTHDR: case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (len == 0) { ip6_clearpktopts(opt, IPV6_RTHDR); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_rthdr)) return (EINVAL); rth = (struct ip6_rthdr *)buf; rthlen = (rth->ip6r_len + 1) << 3; if (len != rthlen) return (EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: if (rth->ip6r_len == 0) /* must contain one addr */ return (EINVAL); if (rth->ip6r_len % 2) /* length must be even */ return (EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return (EINVAL); break; default: return (EINVAL); /* not supported */ } /* turn off the previous option */ ip6_clearpktopts(opt, IPV6_RTHDR); opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_rthdr == NULL) return (ENOBUFS); bcopy(rth, opt->ip6po_rthdr, rthlen); break; } case IPV6_USE_MIN_MTU: if (len != sizeof(int)) return (EINVAL); minmtupolicy = *(int *)buf; if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && minmtupolicy != IP6PO_MINMTU_DISABLE && minmtupolicy != IP6PO_MINMTU_ALL) { return (EINVAL); } opt->ip6po_minmtu = minmtupolicy; break; case IPV6_DONTFRAG: if (len != sizeof(int)) return (EINVAL); if (uproto == IPPROTO_TCP || *(int *)buf == 0) { /* * we ignore this option for TCP sockets. * (RFC3542 leaves this case unspecified.) */ opt->ip6po_flags &= ~IP6PO_DONTFRAG; } else opt->ip6po_flags |= IP6PO_DONTFRAG; break; case IPV6_PREFER_TEMPADDR: if (len != sizeof(int)) return (EINVAL); preftemp = *(int *)buf; if (preftemp != IP6PO_TEMPADDR_SYSTEM && preftemp != IP6PO_TEMPADDR_NOTPREFER && preftemp != IP6PO_TEMPADDR_PREFER) { return (EINVAL); } opt->ip6po_prefer_tempaddr = preftemp; break; default: return (ENOPROTOOPT); } /* end of switch */ return (0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(struct ifnet *ifp, const struct mbuf *m) { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copy(m, 0, M_COPYALL); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if (!M_WRITABLE(copym) || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } ip6 = mtod(copym, struct ip6_hdr *); /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } if_simloop(ifp, copym, AF_INET6, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { m_freem(m); return ENOBUFS; } m_move_pkthdr(mh, m); M_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(struct inpcb *in6p) { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index 2159f29536e7..f43a28c199b1 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -1,435 +1,435 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NETINET6_IP6_VAR_H_ #define _NETINET6_IP6_VAR_H_ /* * IP6 reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. */ struct ip6q { struct ip6asfrag *ip6q_down; struct ip6asfrag *ip6q_up; u_int32_t ip6q_ident; u_int8_t ip6q_nxt; u_int8_t ip6q_ecn; u_int8_t ip6q_ttl; struct in6_addr ip6q_src, ip6q_dst; struct ip6q *ip6q_next; struct ip6q *ip6q_prev; int ip6q_unfrglen; /* len of unfragmentable part */ #ifdef notyet u_char *ip6q_nxtp; #endif int ip6q_nfrag; /* # of fragments */ struct label *ip6q_label; }; struct ip6asfrag { struct ip6asfrag *ip6af_down; struct ip6asfrag *ip6af_up; struct mbuf *ip6af_m; int ip6af_offset; /* offset in ip6af_m to next header */ int ip6af_frglen; /* fragmentable part length */ int ip6af_off; /* fragment offset */ u_int16_t ip6af_mff; /* more fragment bit in frag off */ }; #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) /* * IP6 reinjecting structure. */ struct ip6_direct_ctx { uint32_t ip6dc_nxt; /* next header to process */ uint32_t ip6dc_off; /* offset to next header */ }; /* * Structure attached to inpcb.in6p_moptions and * passed to ip6_output when IPv6 multicast options are in use. * This structure is lazy-allocated. */ struct ip6_moptions { struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */ u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */ u_char im6o_multicast_loop; /* 1 >= hear sends if a member */ u_short im6o_num_memberships; /* no. memberships this socket */ u_short im6o_max_memberships; /* max memberships this socket */ struct in6_multi **im6o_membership; /* group memberships */ struct in6_mfilter *im6o_mfilters; /* source filters */ }; /* * Control options for outgoing packets */ /* Routing header related info */ struct ip6po_rhinfo { struct ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */ struct route_in6 ip6po_rhi_route; /* Route to the 1st hop */ }; #define ip6po_rthdr ip6po_rhinfo.ip6po_rhi_rthdr #define ip6po_route ip6po_rhinfo.ip6po_rhi_route /* Nexthop related info */ struct ip6po_nhinfo { struct sockaddr *ip6po_nhi_nexthop; struct route_in6 ip6po_nhi_route; /* Route to the nexthop */ }; #define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop #define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route struct ip6_pktopts { struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */ int ip6po_hlim; /* Hoplimit for outgoing packets */ /* Outgoing IF/address information */ struct in6_pktinfo *ip6po_pktinfo; /* Next-hop address information */ struct ip6po_nhinfo ip6po_nhinfo; struct ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */ /* Destination options header (before a routing header) */ struct ip6_dest *ip6po_dest1; /* Routing header related info. */ struct ip6po_rhinfo ip6po_rhinfo; /* Destination options header (after a routing header) */ struct ip6_dest *ip6po_dest2; int ip6po_tclass; /* traffic class */ int ip6po_minmtu; /* fragment vs PMTU discovery policy */ #define IP6PO_MINMTU_MCASTONLY -1 /* default; send at min MTU for multicast*/ #define IP6PO_MINMTU_DISABLE 0 /* always perform pmtu disc */ #define IP6PO_MINMTU_ALL 1 /* always send at min MTU */ int ip6po_prefer_tempaddr; /* whether temporary addresses are preferred as source address */ #define IP6PO_TEMPADDR_SYSTEM -1 /* follow the system default */ #define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */ #define IP6PO_TEMPADDR_PREFER 1 /* prefer temporary address */ int ip6po_flags; #if 0 /* parameters in this block is obsolete. do not reuse the values. */ #define IP6PO_REACHCONF 0x01 /* upper-layer reachability confirmation. */ #define IP6PO_MINMTU 0x02 /* use minimum MTU (IPV6_USE_MIN_MTU) */ #endif #define IP6PO_DONTFRAG 0x04 /* disable fragmentation (IPV6_DONTFRAG) */ #define IP6PO_USECOA 0x08 /* use care of address */ }; /* * Control options for incoming packets */ struct ip6stat { uint64_t ip6s_total; /* total packets received */ uint64_t ip6s_tooshort; /* packet too short */ uint64_t ip6s_toosmall; /* not enough data */ uint64_t ip6s_fragments; /* fragments received */ uint64_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ uint64_t ip6s_fragtimeout; /* fragments timed out */ uint64_t ip6s_fragoverflow; /* fragments that exceeded limit */ uint64_t ip6s_forward; /* packets forwarded */ uint64_t ip6s_cantforward; /* packets rcvd for unreachable dest */ uint64_t ip6s_redirectsent; /* packets forwarded on same net */ uint64_t ip6s_delivered; /* datagrams delivered to upper level*/ uint64_t ip6s_localout; /* total ip packets generated here */ uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */ uint64_t ip6s_reassembled; /* total packets reassembled ok */ uint64_t ip6s_fragmented; /* datagrams successfully fragmented */ uint64_t ip6s_ofragments; /* output fragments created */ uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ uint64_t ip6s_badoptions; /* error in option processing */ uint64_t ip6s_noroute; /* packets discarded due to no route */ uint64_t ip6s_badvers; /* ip6 version != 6 */ uint64_t ip6s_rawout; /* total raw ip packets generated */ uint64_t ip6s_badscope; /* scope error */ uint64_t ip6s_notmember; /* don't join this multicast group */ #define IP6S_HDRCNT 256 /* headers count */ uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */ uint64_t ip6s_m1; /* one mbuf */ #define IP6S_M2MMAX 32 uint64_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */ uint64_t ip6s_mext1; /* one ext mbuf */ uint64_t ip6s_mext2m; /* two or more ext mbuf */ uint64_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */ uint64_t ip6s_nogif; /* no match gif found */ uint64_t ip6s_toomanyhdr; /* discarded due to too many headers */ /* * statistics for improvement of the source address selection * algorithm: * XXX: hardcoded 16 = # of ip6 multicast scope types + 1 */ #define IP6S_RULESMAX 16 #define IP6S_SCOPECNT 16 /* number of times that address selection fails */ uint64_t ip6s_sources_none; /* number of times that an address on the outgoing I/F is chosen */ uint64_t ip6s_sources_sameif[IP6S_SCOPECNT]; /* number of times that an address on a non-outgoing I/F is chosen */ uint64_t ip6s_sources_otherif[IP6S_SCOPECNT]; /* * number of times that an address that has the same scope * from the destination is chosen. */ uint64_t ip6s_sources_samescope[IP6S_SCOPECNT]; /* * number of times that an address that has a different scope * from the destination is chosen. */ uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT]; /* number of times that a deprecated address is chosen */ uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT]; /* number of times that each rule of source selection is applied. */ uint64_t ip6s_sources_rule[IP6S_RULESMAX]; }; #ifdef _KERNEL #include VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat); #define IP6STAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val)) #define IP6STAT_SUB(name, val) IP6STAT_ADD(name, -(val)) #define IP6STAT_INC(name) IP6STAT_ADD(name, 1) #define IP6STAT_DEC(name) IP6STAT_SUB(name, 1) #endif #ifdef _KERNEL /* flags passed to ip6_output as last parameter */ #define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */ #define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ #define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */ #ifdef __NO_STRICT_ALIGNMENT #define IP6_HDR_ALIGNED_P(ip) 1 #else #define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) #endif VNET_DECLARE(int, ip6_defhlim); /* default hop limit */ VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */ VNET_DECLARE(int, ip6_forwarding); /* act as router? */ VNET_DECLARE(int, ip6_use_deprecated); /* allow deprecated addr as source */ VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix * walk list every 5 sec. */ VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */ VNET_DECLARE(int, ip6_v6only); #define V_ip6_defhlim VNET(ip6_defhlim) #define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) #define V_ip6_forwarding VNET(ip6_forwarding) #define V_ip6_use_deprecated VNET(ip6_use_deprecated) #define V_ip6_rr_prune VNET(ip6_rr_prune) #define V_ip6_mcast_pmtu VNET(ip6_mcast_pmtu) #define V_ip6_v6only VNET(ip6_v6only) VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly * queue */ VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly * queue */ VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA * receiving IF. */ VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when forwarding enabled */ VNET_DECLARE(int, ip6_log_interval); VNET_DECLARE(time_t, ip6_log_time); VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension * headers */ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_mrouter VNET(ip6_mrouter) #define V_ip6_sendredirects VNET(ip6_sendredirects) #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) #define V_ip6_maxfrags VNET(ip6_maxfrags) #define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) #define V_ip6_no_radr VNET(ip6_no_radr) #define V_ip6_norbit_raif VNET(ip6_norbit_raif) #define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3) #define V_ip6_log_interval VNET(ip6_log_interval) #define V_ip6_log_time VNET(ip6_log_time) #define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) #define V_ip6_dad_count VNET(ip6_dad_count) VNET_DECLARE(int, ip6_auto_flowlabel); VNET_DECLARE(int, ip6_auto_linklocal); #define V_ip6_auto_flowlabel VNET(ip6_auto_flowlabel) #define V_ip6_auto_linklocal VNET(ip6_auto_linklocal) VNET_DECLARE(int, ip6_use_tempaddr); /* Whether to use temporary addresses */ VNET_DECLARE(int, ip6_prefer_tempaddr); /* Whether to prefer temporary * addresses in the source address * selection */ #define V_ip6_use_tempaddr VNET(ip6_use_tempaddr) #define V_ip6_prefer_tempaddr VNET(ip6_prefer_tempaddr) VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope * zone when unspecified */ #define V_ip6_use_defzone VNET(ip6_use_defzone) VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ #define V_inet6_pfil_hook VNET(inet6_pfil_hook) #ifdef IPSTEALTH VNET_DECLARE(int, ip6stealth); #define V_ip6stealth VNET(ip6stealth) #endif extern struct pr_usrreqs rip6_usrreqs; struct sockopt; struct inpcb; int icmp6_ctloutput(struct socket *, struct sockopt *sopt); struct in6_ifaddr; void ip6_init(void); #ifdef VIMAGE void ip6_destroy(void); #endif int ip6proto_register(short); int ip6proto_unregister(short); void ip6_input(struct mbuf *); void ip6_direct_input(struct mbuf *); void ip6_freepcbopts(struct ip6_pktopts *); int ip6_unknown_opt(u_int8_t *, struct mbuf *, int); char * ip6_get_prevhdr(const struct mbuf *, int); int ip6_nexthdr(const struct mbuf *, int, int, int *); int ip6_lasthdr(const struct mbuf *, int, int, int *); extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, u_int32_t *); struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, struct mbuf **, int *); void ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **); void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, u_int32_t); int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip6_forward(struct mbuf *, int); void ip6_mloopback(struct ifnet *, const struct mbuf *); int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int, struct ip6_moptions *, struct ifnet **, struct inpcb *); int ip6_ctloutput(struct socket *, struct sockopt *); int ip6_raw_ctloutput(struct socket *, struct sockopt *); void ip6_initpktopts(struct ip6_pktopts *); int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *, struct ip6_pktopts *, struct ucred *, int); void ip6_clearpktopts(struct ip6_pktopts *, int); struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); int ip6_optlen(struct inpcb *); int ip6_deletefraghdr(struct mbuf *, int, int); int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, uint32_t); int route6_input(struct mbuf **, int *, int); void frag6_init(void); int frag6_input(struct mbuf **, int *, int); void frag6_slowtimo(void); void frag6_drain(void); void rip6_init(void); int rip6_input(struct mbuf **, int *, int); void rip6_ctlinput(int, struct sockaddr *, void *); int rip6_ctloutput(struct socket *, struct sockopt *); int rip6_output(struct mbuf *, struct socket *, ...); int rip6_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *); int dest6_input(struct mbuf **, int *, int); int none_input(struct mbuf **, int *, int); int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, - struct inpcb *inp, struct route_in6 *, struct ucred *cred, + struct inpcb *inp, struct ucred *cred, struct ifnet **, struct in6_addr *); int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **); int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, u_int); u_int32_t ip6_randomid(void); u_int32_t ip6_randomflowlabel(void); void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset); #endif /* _KERNEL */ #endif /* !_NETINET6_IP6_VAR_H_ */ diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index fbbf4214776b..4371a024a7c0 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1,2629 +1,2638 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ #define SIN6(s) ((const struct sockaddr_in6 *)(s)) /* timer values */ VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for * local traffic */ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage * collection timer */ /* preventing too many loops in ND option parsing */ static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper * layer hints */ static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) #ifdef ND6_DEBUG VNET_DEFINE(int, nd6_debug) = 1; #else VNET_DEFINE(int, nd6_debug) = 0; #endif static eventhandler_tag lle_event_eh, iflladdr_event_eh; /* for debugging? */ #if 0 static int nd6_inuse, nd6_allocated; #endif VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int); static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); static void nd6_free(struct llentry *, int); static void nd6_free_redirect(const struct llentry *); static void nd6_llinfo_timer(void *); static void nd6_llinfo_settimer_locked(struct llentry *, long); static void clear_llinfo_pqueue(struct llentry *); static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, const struct sockaddr_in6 *, u_char *, uint32_t *); static int nd6_need_cache(struct ifnet *); static VNET_DEFINE(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) VNET_DEFINE(struct callout, nd6_timer_ch); static void nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct rt_addrinfo rtinfo; struct sockaddr_in6 dst; struct sockaddr_dl gw; struct ifnet *ifp; int type; LLE_WLOCK_ASSERT(lle); if (lltable_get_af(lle->lle_tbl) != AF_INET6) return; switch (evt) { case LLENTRY_RESOLVED: type = RTM_ADD; KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); break; case LLENTRY_EXPIRED: type = RTM_DELETE; break; default: return; } ifp = lltable_get_ifp(lle->lle_tbl); bzero(&dst, sizeof(dst)); bzero(&gw, sizeof(gw)); bzero(&rtinfo, sizeof(rtinfo)); lltable_fill_sa_entry(lle, (struct sockaddr *)&dst); dst.sin6_scope_id = in6_getscopezone(ifp, in6_addrscope(&dst.sin6_addr)); gw.sdl_len = sizeof(struct sockaddr_dl); gw.sdl_family = AF_LINK; gw.sdl_alen = ifp->if_addrlen; gw.sdl_index = ifp->if_index; gw.sdl_type = ifp->if_type; if (evt == LLENTRY_RESOLVED) bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen); rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); } /* * A handler for interface link layer address change event. */ static void nd6_iflladdr(void *arg __unused, struct ifnet *ifp) { lltable_update_ifaddr(LLTABLE6(ifp)); } void nd6_init(void) { LIST_INIT(&V_nd_prefix); /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); /* start timer */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); nd6_dad_init(); if (IS_DEFAULT_VNET(curvnet)) { lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, NULL, EVENTHANDLER_PRI_ANY); iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); } } #ifdef VIMAGE void nd6_destroy() { callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); if (IS_DEFAULT_VNET(curvnet)) { EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); } } #endif struct nd_ifinfo * nd6_ifattach(struct ifnet *ifp) { struct nd_ifinfo *nd; nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO); nd->initialized = 1; nd->chlim = IPV6_DEFHLIM; nd->basereachable = REACHABLE_TIME; nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); nd->retrans = RETRANS_TIMER; nd->flags = ND6_IFF_PERFORMNUD; /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by * default regardless of the V_ip6_auto_linklocal configuration to * give a reasonable default behavior. */ if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) || (ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_AUTO_LINKLOCAL; /* * A loopback interface does not need to accept RTADV. * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by * default regardless of the V_ip6_accept_rtadv configuration to * prevent the interface from accepting RA messages arrived * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV. */ if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK) && (ifp->if_type != IFT_BRIDGE)) nd->flags |= ND6_IFF_ACCEPT_RTADV; if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_NO_RADR; /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ nd6_setmtu0(ifp, nd); return nd; } void nd6_ifdetach(struct nd_ifinfo *nd) { free(nd, M_IP6NDP); } /* * Reset ND level link MTU. This function is called when the physical MTU * changes, which means we might have to adjust the ND level MTU. */ void nd6_setmtu(struct ifnet *ifp) { if (ifp->if_afdata[AF_INET6] == NULL) return; nd6_setmtu0(ifp, ND_IFINFO(ifp)); } /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */ void nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi) { u_int32_t omaxmtu; omaxmtu = ndi->maxmtu; switch (ifp->if_type) { case IFT_ARCNET: ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ break; case IFT_FDDI: ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ break; case IFT_ISO88025: ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); break; default: ndi->maxmtu = ifp->if_mtu; break; } /* * Decreasing the interface MTU under IPV6 minimum MTU may cause * undesirable situation. We thus notify the operator of the change * explicitly. The check for omaxmtu is necessary to restrict the * log to the case of changing the MTU, not initializing it. */ if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { log(LOG_NOTICE, "nd6_setmtu0: " "new link MTU on %s (%lu) is too small for IPv6\n", if_name(ifp), (unsigned long)ndi->maxmtu); } if (ndi->maxmtu > V_in6_maxmtu) in6_setmaxmtu(); /* check all interfaces just in case */ } void nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) { bzero(ndopts, sizeof(*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; ndopts->nd_opts_last = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); if (icmp6len == 0) { ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } } /* * Take one ND option. */ struct nd_opt_hdr * nd6_option(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int olen; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return NULL; if (ndopts->nd_opts_done) return NULL; nd_opt = ndopts->nd_opts_search; /* make sure nd_opt_len is inside the buffer */ if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { bzero(ndopts, sizeof(*ndopts)); return NULL; } olen = nd_opt->nd_opt_len << 3; if (olen == 0) { /* * Message validation requires that all included * options have a length that is greater than zero. */ bzero(ndopts, sizeof(*ndopts)); return NULL; } ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); if (ndopts->nd_opts_search > ndopts->nd_opts_last) { /* option overruns the end of buffer, invalid */ bzero(ndopts, sizeof(*ndopts)); return NULL; } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } return nd_opt; } /* * Parse multiple ND options. * This function is much easier to use, for ND routines that do not need * multiple options of the same type. */ int nd6_options(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int i = 0; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return 0; while (1) { nd_opt = nd6_option(ndopts); if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { /* * Message validation requires that all included * options have a length that is greater than zero. */ ICMP6STAT_INC(icp6s_nd_badopt); bzero(ndopts, sizeof(*ndopts)); return -1; } if (nd_opt == NULL) goto skip1; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LINKADDR: case ND_OPT_TARGET_LINKADDR: case ND_OPT_MTU: case ND_OPT_REDIRECTED_HEADER: case ND_OPT_NONCE: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { nd6log((LOG_INFO, "duplicated ND6 option found (type=%d)\n", nd_opt->nd_opt_type)); /* XXX bark? */ } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFORMATION: if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } ndopts->nd_opts_pi_end = (struct nd_opt_prefix_info *)nd_opt; break; /* What about ND_OPT_ROUTE_INFO? RFC 4191 */ case ND_OPT_RDNSS: /* RFC 6106 */ case ND_OPT_DNSSL: /* RFC 6106 */ /* * Silently ignore options we know and do not care about * in the kernel. */ break; default: /* * Unknown options must be silently ignored, * to accomodate future extension to the protocol. */ nd6log((LOG_DEBUG, "nd6_options: unsupported option %d - " "option ignored\n", nd_opt->nd_opt_type)); } skip1: i++; if (i > V_nd6_maxndopt) { ICMP6STAT_INC(icp6s_nd_toomanyopt); nd6log((LOG_INFO, "too many loop in nd opt\n")); break; } if (ndopts->nd_opts_done) break; } return 0; } /* * ND6 timer routine to handle ND6 entries */ static void nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { int canceled; LLE_WLOCK_ASSERT(ln); if (tick < 0) { ln->la_expire = 0; ln->ln_ntick = 0; canceled = callout_stop(&ln->lle_timer); } else { ln->la_expire = time_uptime + tick / hz; LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; canceled = callout_reset(&ln->lle_timer, INT_MAX, nd6_llinfo_timer, ln); } else { ln->ln_ntick = 0; canceled = callout_reset(&ln->lle_timer, tick, nd6_llinfo_timer, ln); } } if (canceled > 0) LLE_REMREF(ln); } /* * Gets source address of the first packet in hold queue * and stores it in @src. * Returns pointer to @src (if hold queue is not empty) or NULL. * * Set noinline to be dtrace-friendly */ static __noinline struct in6_addr * nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) { struct ip6_hdr hdr; struct mbuf *m; if (ln->la_hold == NULL) return (NULL); /* * assume every packet in la_hold has the same IP header */ m = ln->la_hold; if (sizeof(hdr) > m->m_len) return (NULL); m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr); *src = hdr.ip6_src; return (src); } /* * Checks if we need to switch from STALE state. * * RFC 4861 requires switching from STALE to DELAY state * on first packet matching entry, waiting V_nd6_delay and * transition to PROBE state (if upper layer confirmation was * not received). * * This code performs a bit differently: * On packet hit we don't change state (but desired state * can be guessed by control plane). However, after V_nd6_delay * seconds code will transition to PROBE state (so DELAY state * is kinda skipped in most situations). * * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so * we perform the following upon entering STALE state: * * 1) Arm timer to run each V_nd6_delay seconds to make sure that * if packet was transmitted at the start of given interval, we * would be able to switch to PROBE state in V_nd6_delay seconds * as user expects. * * 2) Reschedule timer until original V_nd6_gctimer expires keeping * lle in STALE state (remaining timer value stored in lle_remtime). * * 3) Reschedule timer if packet was transmitted less that V_nd6_delay * seconds ago. * * Returns non-zero value if the entry is still STALE (storing * the next timer interval in @pdelay). * * Returns zero value if original timer expired or we need to switch to * PROBE (store that in @do_switch variable). */ static int nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch) { int nd_delay, nd_gctimer, r_skip_req; time_t lle_hittime; long delay; *do_switch = 0; nd_gctimer = V_nd6_gctimer; nd_delay = V_nd6_delay; LLE_REQ_LOCK(lle); r_skip_req = lle->r_skip_req; lle_hittime = lle->lle_hittime; LLE_REQ_UNLOCK(lle); if (r_skip_req > 0) { /* * Nonzero r_skip_req value was set upon entering * STALE state. Since value was not changed, no * packets were passed using this lle. Ask for * timer reschedule and keep STALE state. */ delay = (long)(MIN(nd_gctimer, nd_delay)); delay *= hz; if (lle->lle_remtime > delay) lle->lle_remtime -= delay; else { delay = lle->lle_remtime; lle->lle_remtime = 0; } if (delay == 0) { /* * The original ng6_gctime timeout ended, * no more rescheduling. */ return (0); } *pdelay = delay; return (1); } /* * Packet received. Verify timestamp */ delay = (long)(time_uptime - lle_hittime); if (delay < nd_delay) { /* * V_nd6_delay still not passed since the first * hit in STALE state. * Reshedule timer and return. */ *pdelay = (long)(nd_delay - delay) * hz; return (1); } /* Request switching to probe */ *do_switch = 1; return (0); } /* * Switch @lle state to new state optionally arming timers. * * Set noinline to be dtrace-friendly */ __noinline void nd6_llinfo_setstate(struct llentry *lle, int newstate) { struct ifnet *ifp; int nd_gctimer, nd_delay; long delay, remtime; delay = 0; remtime = 0; switch (newstate) { case ND6_LLINFO_INCOMPLETE: ifp = lle->lle_tbl->llt_ifp; delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000; break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(lle)) { ifp = lle->lle_tbl->llt_ifp; delay = (long)ND_IFINFO(ifp)->reachable * hz; } break; case ND6_LLINFO_STALE: /* * Notify fast path that we want to know if any packet * is transmitted by setting r_skip_req. */ LLE_REQ_LOCK(lle); lle->r_skip_req = 1; LLE_REQ_UNLOCK(lle); nd_delay = V_nd6_delay; nd_gctimer = V_nd6_gctimer; delay = (long)(MIN(nd_gctimer, nd_delay)) * hz; remtime = (long)nd_gctimer * hz - delay; break; case ND6_LLINFO_DELAY: lle->la_asked = 0; delay = (long)V_nd6_delay * hz; break; } if (delay > 0) nd6_llinfo_settimer_locked(lle, delay); lle->lle_remtime = remtime; lle->ln_state = newstate; } /* * Timer-dependent part of nd state machine. * * Set noinline to be dtrace-friendly */ static __noinline void nd6_llinfo_timer(void *arg) { struct llentry *ln; struct in6_addr *dst, *pdst, *psrc, src; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; int do_switch, send_ns; long delay; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; LLE_WLOCK(ln); if (callout_pending(&ln->lle_timer)) { /* * Here we are a bit odd here in the treatment of * active/pending. If the pending bit is set, it got * rescheduled before I ran. The active * bit we ignore, since if it was stopped * in ll_tablefree() and was currently running * it would have return 0 so the code would * not have deleted it since the callout could * not be stopped so we want to go through * with the delete here now. If the callout * was restarted, the pending bit will be back on and * we just want to bail since the callout_reset would * return 1 and our reference would have been removed * by nd6_llinfo_settimer_locked above since canceled * would have been 1. */ LLE_WUNLOCK(ln); return; } ifp = ln->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); ndi = ND_IFINFO(ifp); send_ns = 0; dst = &ln->r_l3addr.addr6; pdst = dst; if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { ln->ln_ntick -= INT_MAX; nd6_llinfo_settimer_locked(ln, INT_MAX); } else { ln->ln_ntick = 0; nd6_llinfo_settimer_locked(ln, ln->ln_ntick); } goto done; } if (ln->la_flags & LLE_STATIC) { goto done; } if (ln->la_flags & LLE_DELETED) { nd6_free(ln, 0); ln = NULL; goto done; } switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->la_asked < V_nd6_mmaxtries) { ln->la_asked++; send_ns = 1; /* Send NS to multicast address */ pdst = NULL; } else { struct mbuf *m = ln->la_hold; if (m) { struct mbuf *m0; /* * assuming every packet in la_hold has the * same IP header. Send error after unlock. */ m0 = m->m_nextpkt; m->m_nextpkt = NULL; ln->la_hold = m0; clear_llinfo_pqueue(ln); } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); nd6_free(ln, 0); ln = NULL; if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, ifp); } break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(ln)) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); break; case ND6_LLINFO_STALE: if (nd6_is_stale(ln, &delay, &do_switch) != 0) { /* * No packet has used this entry and GC timeout * has not been passed. Reshedule timer and * return. */ nd6_llinfo_settimer_locked(ln, delay); break; } if (do_switch == 0) { /* * GC timer has ended and entry hasn't been used. * Run Garbage collector (RFC 4861, 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); nd6_free(ln, 1); ln = NULL; } break; } /* Entry has been used AND delay timer has ended. */ /* FALLTHROUGH */ case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->la_asked = 1; nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE); send_ns = 1; } else nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */ break; case ND6_LLINFO_PROBE: if (ln->la_asked < V_nd6_umaxtries) { ln->la_asked++; send_ns = 1; } else { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); nd6_free(ln, 0); ln = NULL; } break; default: panic("%s: paths in a dark night can be confusing: %d", __func__, ln->ln_state); } done: if (send_ns != 0) { nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); psrc = nd6_llinfo_get_holdsrc(ln, &src); LLE_FREE_LOCKED(ln); ln = NULL; nd6_ns_output(ifp, psrc, pdst, dst, NULL); } if (ln != NULL) LLE_FREE_LOCKED(ln); CURVNET_RESTORE(); } /* * ND6 timer routine to expire default route list and prefix list */ void nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, nd6_timer, curvnet); /* expire default router list */ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->expire && dr->expire < time_uptime) defrtrlist_del(dr); } /* * expire interface addresses. * in the past the loop was inside prefix expiry processing. * However, from a stricter speci-confrmance standpoint, we should * rather separate address lifetimes and prefix lifetimes. * * XXXRW: in6_ifaddrhead locking. */ addrloop: TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) { /* check address lifetime */ if (IFA6_IS_INVALID(ia6)) { int regen = 0; /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when * we suspended a laptop PC, then turned it on after a * period that could invalidate all temporary * addresses. Although we may have to restart the * loop (see below), it must be after purging the * address. Otherwise, we'd see an infinite loop of * regeneration. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { if (regen_tmpaddr(ia6) == 0) regen = 1; } in6_purgeaddr(&ia6->ia_ifa); if (regen) goto addrloop; /* XXX: see below */ } else if (IFA6_IS_DEPRECATED(ia6)) { int oldflags = ia6->ia6_flags; ia6->ia6_flags |= IN6_IFF_DEPRECATED; /* * If a temporary address has just become deprecated, * regenerate a new one if possible. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (oldflags & IN6_IFF_DEPRECATED) == 0) { if (regen_tmpaddr(ia6) == 0) { /* * A new temporary address is * generated. * XXX: this means the address chain * has changed while we are still in * the loop. Although the change * would not cause disaster (because * it's not a deletion, but an * addition,) we'd rather restart the * loop just for safety. Or does this * significantly reduce performance?? */ goto addrloop; } } } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) { /* * Schedule DAD for a tentative address. This happens * if the interface was down or not running * when the address was configured. */ int delay; delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); nd6_dad_start((struct ifaddr *)ia6, delay); } else { /* * Check status of the interface. If it is down, * mark the address as tentative for future DAD. */ if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 || (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ND_IFINFO(ia6->ia_ifp)->flags & ND6_IFF_IFDISABLED) != 0) { ia6->ia6_flags &= ~IN6_IFF_DUPLICATED; ia6->ia6_flags |= IN6_IFF_TENTATIVE; } /* * A new RA might have made a deprecated address * preferred. */ ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; } } /* expire prefix list */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { /* * check prefix lifetime. * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) { /* * address expiration and prefix expiration are * separate. NEVER perform in6_purgeaddr here. */ prelist_remove(pr); } } CURVNET_RESTORE(); } /* * ia6 - deprecated/invalidated temporary address */ static int regen_tmpaddr(struct in6_ifaddr *ia6) { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; ifp = ia6->ia_ifa.ifa_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *it6; if (ifa->ifa_addr->sa_family != AF_INET6) continue; it6 = (struct in6_ifaddr *)ifa; /* ignore no autoconf addresses. */ if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; /* ignore autoconf addresses with different prefixes. */ if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) continue; /* * Now we are looking at an autoconf address with the same * prefix as ours. If the address is temporary and is still * preferred, do not create another one. It would be rare, but * could happen, for example, when we resume a laptop PC after * a long period. */ if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && !IFA6_IS_DEPRECATED(it6)) { public_ifa6 = NULL; break; } /* * This is a public autoconf address that has the same prefix * as ours. If it is preferred, keep it. We can't break the * loop here, because there may be a still-preferred temporary * address with the prefix. */ if (!IFA6_IS_DEPRECATED(it6)) public_ifa6 = it6; } if (public_ifa6 != NULL) ifa_ref(&public_ifa6->ia_ifa); IF_ADDR_RUNLOCK(ifp); if (public_ifa6 != NULL) { int e; if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) { ifa_free(&public_ifa6->ia_ifa); log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); return (-1); } ifa_free(&public_ifa6->ia_ifa); return (0); } return (-1); } /* * Nuke neighbor cache/prefix/default router management table, right before * ifp goes away. */ void nd6_purge(struct ifnet *ifp) { struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; /* * Nuke default router list entries toward ifp. * We defer removal of default router list entries that is installed * in the routing table, in order to keep additional side effects as * small as possible. */ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->installed) continue; if (dr->ifp == ifp) defrtrlist_del(dr); } TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; if (dr->ifp == ifp) defrtrlist_del(dr); } /* Nuke prefix list entries toward ifp */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { if (pr->ndpr_ifp == ifp) { /* * Because if_detach() does *not* release prefixes * while purging addresses the reference count will * still be above zero. We therefore reset it to * make sure that the prefix really gets purged. */ pr->ndpr_refcnt = 0; /* * Previously, pr->ndpr_addr is removed as well, * but I strongly believe we don't have to do it. * nd6_purge() is only called from in6_ifdetach(), * which removes all the associated interface addresses * by itself. * (jinmei@kame.net 20010129) */ prelist_remove(pr); } } /* cancel default outgoing interface setting */ if (V_nd6_defifindex == ifp->if_index) nd6_setdefaultiface(0); if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* Refresh default router list. */ defrouter_select(); } /* XXXXX * We do not nuke the neighbor cache entries here any more * because the neighbor cache is kept in if_afdata[AF_INET6]. * nd6_purge() is invoked by in6_ifdetach() which is called * from if_detach() where everything gets purged. So let * in6_domifdetach() do the actual L2 table purging work. */ } /* * the caller acquires and releases the lock on the lltbls * Returns the llentry locked */ struct llentry * nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; IF_AFDATA_LOCK_ASSERT(ifp); ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); return (ln); } struct llentry * nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6); if (ln != NULL) ln->ln_state = ND6_LLINFO_NOSTATE; return (ln); } /* * Test whether a given IPv6 address is a neighbor or not, ignoring * the actual neighbor cache. The neighbor cache is ignored in order * to not reenter the routing code from within itself. */ static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; struct ifaddr *dstaddr; + struct rt_addrinfo info; + struct sockaddr_in6 rt_key; + struct sockaddr *dst6; + int fibnum; /* * A link-local address is always a neighbor. * XXX: a link does not necessarily specify a single interface. */ if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { struct sockaddr_in6 sin6_copy; u_int32_t zone; /* * We need sin6_copy since sa6_recoverscope() may modify the * content (XXX). */ sin6_copy = *addr; if (sa6_recoverscope(&sin6_copy)) return (0); /* XXX: should be impossible */ if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) return (0); if (sin6_copy.sin6_scope_id == zone) return (1); else return (0); } + bzero(&rt_key, sizeof(rt_key)); + bzero(&info, sizeof(info)); + info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key; + + /* Always use the default FIB here. XXME - why? */ + fibnum = RT_DEFAULT_FIB; + /* * If the address matches one of our addresses, * it should be a neighbor. * If the address matches one of our on-link prefixes, it should be a * neighbor. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_ifp != ifp) continue; if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { - struct rtentry *rt; /* Always use the default FIB here. */ - rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, - 0, 0, RT_DEFAULT_FIB); - if (rt == NULL) + dst6 = (struct sockaddr *)&pr->ndpr_prefix; + + /* Restore length field before retrying lookup */ + rt_key.sin6_len = sizeof(rt_key); + if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0) continue; /* * This is the case where multiple interfaces * have the same prefix, but only one is installed * into the routing table and that prefix entry * is not the one being examined here. In the case * where RADIX_MPATH is enabled, multiple route * entries (of the same rt_key value) will be * installed because the interface addresses all * differ. */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) { - RTFREE_LOCKED(rt); + &rt_key.sin6_addr)) continue; - } - RTFREE_LOCKED(rt); } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &addr->sin6_addr, &pr->ndpr_mask)) return (1); } /* * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS); if (dstaddr != NULL) { if (dstaddr->ifa_ifp == ifp) { ifa_free(dstaddr); return (1); } ifa_free(dstaddr); } /* * If the default router list is empty, all addresses are regarded * as on-link, and thus, as a neighbor. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && TAILQ_EMPTY(&V_nd_defrouter) && V_nd6_defifindex == ifp->if_index) { return (1); } return (0); } /* * Detect if a given IPv6 address identifies a neighbor on a given link. * XXX: should take care of the destination of a p2p link? */ int nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct llentry *lle; int rc = 0; IF_AFDATA_UNLOCK_ASSERT(ifp); if (nd6_is_new_addr_neighbor(addr, ifp)) return (1); /* * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ IF_AFDATA_RLOCK(ifp); if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { LLE_RUNLOCK(lle); rc = 1; } IF_AFDATA_RUNLOCK(ifp); return (rc); } /* * Free an nd6 llinfo entry. * Since the function would cause significant changes in the kernel, DO NOT * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. * * Set noinline to be dtrace-friendly */ static __noinline void nd6_free(struct llentry *ln, int gc) { struct nd_defrouter *dr; struct ifnet *ifp; LLE_WLOCK_ASSERT(ln); /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ /* cancel timer */ nd6_llinfo_settimer_locked(ln, -1); ifp = ln->lle_tbl->llt_ifp; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { dr = defrouter_lookup(&ln->r_l3addr.addr6, ifp); if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { /* * If the reason for the deletion is just garbage * collection, and the neighbor is an active default * router, do not delete it. Instead, reset the GC * timer using the router's lifetime. * Simply deleting the entry would affect default * router selection, which is not necessarily a good * thing, especially when we're using router preference * values. * XXX: the check for ln_state would be redundant, * but we intentionally keep it just in case. */ if (dr->expire > time_uptime) nd6_llinfo_settimer_locked(ln, (dr->expire - time_uptime) * hz); else nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); LLE_REMREF(ln); LLE_WUNLOCK(ln); return; } if (dr) { /* * Unreachablity of a router might affect the default * router selection and on-link detection of advertised * prefixes. */ /* * Temporarily fake the state to choose a new default * router and to perform on-link determination of * prefixes correctly. * Below the state will be set correctly, * or the entry itself will be deleted. */ ln->ln_state = ND6_LLINFO_INCOMPLETE; } if (ln->ln_router || dr) { /* * We need to unlock to avoid a LOR with rt6_flush() with the * rnh and for the calls to pfxlist_onlink_check() and * defrouter_select() in the block further down for calls * into nd6_lookup(). We still hold a ref. */ LLE_WUNLOCK(ln); /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ rt6_flush(&ln->r_l3addr.addr6, ifp); } if (dr) { /* * Since defrouter_select() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform * the check now. */ pfxlist_onlink_check(); /* * Refresh default router list. */ defrouter_select(); } /* * If this entry was added by an on-link redirect, remove the * corresponding host route. */ if (ln->la_flags & LLE_REDIRECT) nd6_free_redirect(ln); if (ln->ln_router || dr) LLE_WLOCK(ln); } /* * Save to unlock. We still hold an extra reference and will not * free(9) in llentry_free() if someone else holds one as well. */ LLE_WUNLOCK(ln); IF_AFDATA_LOCK(ifp); LLE_WLOCK(ln); /* Guard against race with other llentry_free(). */ if (ln->la_flags & LLE_LINKED) { /* Remove callout reference */ LLE_REMREF(ln); lltable_unlink_entry(ln->lle_tbl, ln); } IF_AFDATA_UNLOCK(ifp); llentry_free(ln); } static int nd6_isdynrte(const struct rtentry *rt, void *xap) { if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC)) return (1); return (0); } /* * Remove the rtentry for the given llentry, * both of which were installed by a redirect. */ static void nd6_free_redirect(const struct llentry *ln) { int fibnum; struct sockaddr_in6 sin6; struct rt_addrinfo info; lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; info.rti_filter = nd6_isdynrte; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); } /* * Rejuvenate this function for routing operations related * processing. */ void nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) { struct sockaddr_in6 *gateway; struct nd_defrouter *dr; struct ifnet *ifp; gateway = (struct sockaddr_in6 *)rt->rt_gateway; ifp = rt->rt_ifp; switch (req) { case RTM_ADD: break; case RTM_DELETE: if (!ifp) return; /* * Only indirect routes are interesting. */ if ((rt->rt_flags & RTF_GATEWAY) == 0) return; /* * check for default route */ if (IN6_ARE_ADDR_EQUAL(&in6addr_any, &SIN6(rt_key(rt))->sin6_addr)) { dr = defrouter_lookup(&gateway->sin6_addr, ifp); if (dr != NULL) dr->installed = 0; } break; } } int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) { struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; int error = 0; if (ifp->if_afdata[AF_INET6] == NULL) return (EPFNOSUPPORT); switch (cmd) { case OSIOCGIFINFO_IN6: #define ND ndi->ndi /* XXX: old ndp(8) assumes a positive value for linkmtu. */ bzero(&ND, sizeof(ND)); ND.linkmtu = IN6_LINKMTU(ifp); ND.maxmtu = ND_IFINFO(ifp)->maxmtu; ND.basereachable = ND_IFINFO(ifp)->basereachable; ND.reachable = ND_IFINFO(ifp)->reachable; ND.retrans = ND_IFINFO(ifp)->retrans; ND.flags = ND_IFINFO(ifp)->flags; ND.recalctm = ND_IFINFO(ifp)->recalctm; ND.chlim = ND_IFINFO(ifp)->chlim; break; case SIOCGIFINFO_IN6: ND = *ND_IFINFO(ifp); break; case SIOCSIFINFO_IN6: /* * used to change host variables from userland. * intented for a use on router to reflect RA configurations. */ /* 0 means 'unspecified' */ if (ND.linkmtu != 0) { if (ND.linkmtu < IPV6_MMTU || ND.linkmtu > IN6_LINKMTU(ifp)) { error = EINVAL; break; } ND_IFINFO(ifp)->linkmtu = ND.linkmtu; } if (ND.basereachable != 0) { int obasereachable = ND_IFINFO(ifp)->basereachable; ND_IFINFO(ifp)->basereachable = ND.basereachable; if (ND.basereachable != obasereachable) ND_IFINFO(ifp)->reachable = ND_COMPUTE_RTIME(ND.basereachable); } if (ND.retrans != 0) ND_IFINFO(ifp)->retrans = ND.retrans; if (ND.chlim != 0) ND_IFINFO(ifp)->chlim = ND.chlim; /* FALLTHROUGH */ case SIOCSIFINFO_FLAGS: { struct ifaddr *ifa; struct in6_ifaddr *ia; if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && !(ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 1->0 transision */ /* * If the interface is marked as ND6_IFF_IFDISABLED and * has an link-local address with IN6_IFF_DUPLICATED, * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } IF_ADDR_RUNLOCK(ifp); if (ifa != NULL) { /* LLA is duplicated. */ ND.flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "Cannot enable an interface" " with a link-local address marked" " duplicate.\n"); } else { ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED; if (ifp->if_flags & IFF_UP) in6_if_up(ifp); } } else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && (ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 0->1 transision */ /* Mark all IPv6 address as tentative. */ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; if (V_ip6_dad_count > 0 && (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; ia->ia6_flags |= IN6_IFF_TENTATIVE; } IF_ADDR_RUNLOCK(ifp); } } if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) { /* auto_linklocal 0->1 transision */ /* If no link-local address on ifp, configure */ ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL; in6_ifattach(ifp, NULL); } else if (!(ND.flags & ND6_IFF_IFDISABLED) && ifp->if_flags & IFF_UP) { /* * When the IF already has * ND6_IFF_AUTO_LINKLOCAL, no link-local * address is assigned, and IFF_UP, try to * assign one. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } IF_ADDR_RUNLOCK(ifp); if (ifa != NULL) /* No LLA is configured. */ in6_ifattach(ifp, NULL); } } } ND_IFINFO(ifp)->flags = ND.flags; break; #undef ND case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ /* sync kernel routing table with the default router list */ defrouter_reset(); defrouter_select(); break; case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { struct in6_ifaddr *ia, *ia_next; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* XXX */ /* do we really have to remove addresses as well? */ /* XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, ia_next) { if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ia->ia6_ndpr == pr) in6_purgeaddr(&ia->ia_ifa); } prelist_remove(pr); } break; } case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ struct nd_defrouter *dr, *next; defrouter_reset(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { defrtrlist_del(dr); } defrouter_select(); break; } case SIOCGNBRINFO_IN6: { struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&nb_addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { error = EINVAL; break; } nbi->state = ln->ln_state; nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; if (ln->la_expire == 0) nbi->expire = 0; else nbi->expire = ln->la_expire + ln->lle_remtime / hz + (time_second - time_uptime); LLE_RUNLOCK(ln); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ ndif->ifindex = V_nd6_defifindex; break; case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ return (nd6_setdefaultiface(ndif->ifindex)); } return (error); } /* * Calculates new isRouter value based on provided parameters and * returns it. */ static int nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr, int ln_router) { /* * ICMP6 type dependent behavior. * * NS: clear IsRouter if new entry * RS: clear IsRouter * RA: set IsRouter if there's lladdr * redir: clear IsRouter if new entry * * RA case, (1): * The spec says that we must set IsRouter in the following cases: * - If lladdr exist, set IsRouter. This means (1-5). * - If it is old entry (!newentry), set IsRouter. This means (7). * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. * A quetion arises for (1) case. (1) case has no lladdr in the * neighbor cache, this is similar to (6). * This case is rare but we figured that we MUST NOT set IsRouter. * * is_new old_addr new_addr NS RS RA redir * D R * 0 n n (1) c ? s * 0 y n (2) c s s * 0 n y (3) c s s * 0 y y (4) c s s * 0 y y (5) c s s * 1 -- n (6) c c c s * 1 -- y (7) c c s c s * * (c=clear s=set) */ switch (type & 0xff) { case ND_NEIGHBOR_SOLICIT: /* * New entry must have is_router flag cleared. */ if (is_new) /* (6-7) */ ln_router = 0; break; case ND_REDIRECT: /* * If the icmp is a redirect to a better router, always set the * is_router flag. Otherwise, if the entry is newly created, * clear the flag. [RFC 2461, sec 8.3] */ if (code == ND_REDIRECT_ROUTER) ln_router = 1; else { if (is_new) /* (6-7) */ ln_router = 0; } break; case ND_ROUTER_SOLICIT: /* * is_router flag must always be cleared. */ ln_router = 0; break; case ND_ROUTER_ADVERT: /* * Mark an entry with lladdr as a router. */ if ((!is_new && (old_addr || new_addr)) || /* (2-5) */ (is_new && new_addr)) { /* (7) */ ln_router = 1; } break; } return (ln_router); } /* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) * * type - ICMP6 type * code - type dependent information * */ void nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { struct llentry *ln = NULL, *ln_tmp; int is_newentry; int do_update; int olladdr; int llchange; int flags; uint16_t router = 0; struct sockaddr_in6 sin6; struct mbuf *chain = NULL; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; IF_AFDATA_UNLOCK_ASSERT(ifp); KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__)); KASSERT(from != NULL, ("%s: from == NULL", __func__)); /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) return; /* * Validation about ifp->if_addrlen and lladdrlen must be done in * the caller. * * XXX If the link does not have link-layer adderss, what should * we do? (ifp->if_addrlen == 0) * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ flags = lladdr ? LLE_EXCLUSIVE : 0; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(from, flags, ifp); IF_AFDATA_RUNLOCK(ifp); is_newentry = 0; if (ln == NULL) { flags |= LLE_EXCLUSIVE; ln = nd6_alloc(from, 0, ifp); if (ln == NULL) return; /* * Since we already know all the data for the new entry, * fill it before insertion. */ if (lladdr != NULL) { linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off); } IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Prefer any existing lle over newly-created one */ ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp); if (ln_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); if (ln_tmp == NULL) { /* No existing lle, mark as new entry (6,7) */ is_newentry = 1; nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if (lladdr != NULL) /* (7) */ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } else { lltable_free_entry(LLTABLE6(ifp), ln); ln = ln_tmp; ln_tmp = NULL; } } /* do nothing if static ndp is set */ if ((ln->la_flags & LLE_STATIC)) { if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); return; } olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { llchange = bcmp(lladdr, ln->ll_addr, ifp->if_addrlen); } else if (!olladdr && lladdr) llchange = 1; else llchange = 0; /* * newentry olladdr lladdr llchange (*=record) * 0 n n -- (1) * 0 y n -- (2) * 0 n y y (3) * STALE * 0 y y n (4) * * 0 y y y (5) * STALE * 1 -- n -- (6) NOSTATE(= PASSIVE) * 1 -- y -- (7) * STALE */ do_update = 0; if (is_newentry == 0 && llchange != 0) { do_update = 1; /* (3,5) */ /* * Record source link-layer address * XXX is it dependent to ifp->if_type? */ linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { /* Entry was deleted */ return; } nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (ln->la_hold != NULL) nd6_grab_holdchain(ln, &chain, &sin6); } /* Calculates new router status */ router = nd6_is_router(type, code, is_newentry, olladdr, lladdr != NULL ? 1 : 0, ln->ln_router); ln->ln_router = router; /* Mark non-router redirects with special flag */ if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER) ln->la_flags |= LLE_REDIRECT; if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); if (chain != NULL) nd6_flush_holdchain(ifp, ifp, chain, &sin6); /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly * created, it might affect the selection policy. * Question: can we restrict the first condition to the "is_newentry" * case? * XXX: when we hear an RA from a new router with the link-layer * address option, defrouter_select() is called twice, since * defrtrlist_update called the function as well. However, I believe * we can compromise the overhead, since it only happens the first * time. * XXX: although defrouter_select() should not have a bad effect * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ if ((do_update || is_newentry) && router && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * guaranteed recursion */ defrouter_select(); } } static void nd6_slowtimo(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_ifinfo *nd6if; struct ifnet *ifp; callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_afdata[AF_INET6] == NULL) continue; nd6if = ND_IFINFO(ifp); if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { /* * Since reachable time rarely changes by router * advertisements, we SHOULD insure that a new random * value gets recomputed at least once every few hours. * (RFC 2461, 6.3.4) */ nd6if->recalctm = V_nd6_recalc_reachtm_interval; nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } IFNET_RUNLOCK_NOSLEEP(); CURVNET_RESTORE(); } void nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain, struct sockaddr_in6 *sin6) { LLE_WLOCK_ASSERT(ln); *chain = ln->la_hold; ln->la_hold = NULL; lltable_fill_sa_entry(ln, (struct sockaddr *)sin6); if (ln->ln_state == ND6_LLINFO_STALE) { /* * The first time we send a packet to a * neighbor whose entry is STALE, we have * to change the state to DELAY and a sets * a timer to expire in DELAY_FIRST_PROBE_TIME * seconds to ensure do neighbor unreachability * detection on expiration. * (RFC 2461 7.3.3) */ nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY); } } int nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, struct sockaddr_in6 *dst, struct route *ro) { int error; int ip6len; struct ip6_hdr *ip6; struct m_tag *mtag; #ifdef MAC mac_netinet6_nd6_send(ifp, m); #endif /* * If called from nd6_ns_output() (NS), nd6_na_output() (NA), * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA * as handled by rtsol and rtadvd), mbufs will be tagged for SeND * to be diverted to user space. When re-injected into the kernel, * send_output() will directly dispatch them to the outgoing interface. */ if (send_sendso_input_hook != NULL) { mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); if (mtag != NULL) { ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); /* Use the SEND socket */ error = send_sendso_input_hook(m, ifp, SND_OUT, ip6len); /* -1 == no app on SEND socket */ if (error == 0 || error != -1) return (error); } } m_clrprotoflags(m); /* Avoid confusing lower layers. */ IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL, mtod(m, struct ip6_hdr *)); if ((ifp->if_flags & IFF_LOOPBACK) == 0) origifp = ifp; error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro); return (error); } /* * Lookup link headerfor @sa_dst address. Stores found * data in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * If destination LLE does not exists or lle state modification * is required, call "slow" version. * * Return values: * - 0 on success (address copied to buffer). * - EWOULDBLOCK (no local error, but address is still unresolved) * - other errors (alloc failure, etc) */ int nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags) { struct llentry *ln = NULL; const struct sockaddr_in6 *dst6; if (pflags != NULL) *pflags = 0; dst6 = (const struct sockaddr_in6 *)sa_dst; /* discard the packet if IPv6 operation is disabled on the interface */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { m_freem(m); return (ENETDOWN); /* better error? */ } if (m != NULL && m->m_flags & M_MCAST) { switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_BRIDGE: case IFT_ISO88025: ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, desten); return (0); default: m_freem(m); return (EAFNOSUPPORT); } } IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp); if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { /* Entry found, let's copy lle info */ bcopy(ln->r_linkdata, desten, ln->r_hdrlen); if (pflags != NULL) *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); /* Check if we have feedback request from nd6 timer */ if (ln->r_skip_req != 0) { LLE_REQ_LOCK(ln); ln->r_skip_req = 0; /* Notify that entry was used */ ln->lle_hittime = time_uptime; LLE_REQ_UNLOCK(ln); } IF_AFDATA_RUNLOCK(ifp); return (0); } IF_AFDATA_RUNLOCK(ifp); return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags)); } /* * Do L2 address resolution for @sa_dst address. Stores found * address in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * Heavy version. * Function assume that destination LLE does not exist, * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. * * Set noinline to be dtrace-friendly */ static __noinline int nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags) { struct llentry *lle = NULL, *lle_tmp; struct in6_addr *psrc, src; int send_ns, ll_len; char *lladdr; /* * Address resolution or Neighbor Unreachability Detection * for the next hop. * At this point, the destination of the packet must be a unicast * or an anycast address(i.e. not a multicast). */ if (lle == NULL) { IF_AFDATA_RLOCK(ifp); lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ lle = nd6_alloc(&dst->sin6_addr, 0, ifp); if (lle == NULL) { char ip6buf[INET6_ADDRSTRLEN]; log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=%p)\n", ip6_sprintf(ip6buf, &dst->sin6_addr), lle); m_freem(m); return (ENOBUFS); } IF_AFDATA_WLOCK(ifp); LLE_WLOCK(lle); /* Prefer any existing entry over newly-created one */ lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); if (lle_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), lle); IF_AFDATA_WUNLOCK(ifp); if (lle_tmp != NULL) { lltable_free_entry(LLTABLE6(ifp), lle); lle = lle_tmp; lle_tmp = NULL; } } } if (lle == NULL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { m_freem(m); return (ENOBUFS); } if (m != NULL) m_freem(m); return (ENOBUFS); } LLE_WLOCK_ASSERT(lle); /* * The first time we send a packet to a neighbor whose entry is * STALE, we have to change the state to DELAY and a sets a timer to * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do * neighbor unreachability detection on expiration. * (RFC 2461 7.3.3) */ if (lle->ln_state == ND6_LLINFO_STALE) nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); /* * If the neighbor cache entry has a state other than INCOMPLETE * (i.e. its link-layer address is already resolved), just * send the packet. */ if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { if (flags & LLE_ADDRONLY) { lladdr = lle->ll_addr; ll_len = ifp->if_addrlen; } else { lladdr = lle->r_linkdata; ll_len = lle->r_hdrlen; } bcopy(lladdr, desten, ll_len); if (pflags != NULL) *pflags = lle->la_flags; LLE_WUNLOCK(lle); return (0); } /* * There is a neighbor cache entry, but no ethernet address * response yet. Append this latest packet to the end of the * packet queue in the mbuf, unless the number of the packet * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, * the oldest packet in the queue will be removed. */ if (lle->la_hold != NULL) { struct mbuf *m_hold; int i; i = 0; for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; break; } } while (i >= V_nd6_maxqueuelen) { m_hold = lle->la_hold; lle->la_hold = lle->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { lle->la_hold = m; } /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. * Note that for newly-created lle la_asked will be 0, * so we will transition from ND6_LLINFO_NOSTATE to * ND6_LLINFO_INCOMPLETE state here. */ psrc = NULL; send_ns = 0; if (lle->la_asked == 0) { lle->la_asked++; send_ns = 1; psrc = nd6_llinfo_get_holdsrc(lle, &src); nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); } LLE_WUNLOCK(lle); if (send_ns != 0) nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL); return (EWOULDBLOCK); } /* * Do L2 address resolution for @sa_dst address. Stores found * address in @desten buffer. Copy of lle ln_flags can be also * saved in @pflags if @pflags is non-NULL. * * Return values: * - 0 on success (address copied to buffer). * - EWOULDBLOCK (no local error, but address is still unresolved) * - other errors (alloc failure, etc) */ int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, char *desten, uint32_t *pflags) { int error; flags |= LLE_ADDRONLY; error = nd6_resolve_slow(ifp, flags, NULL, (const struct sockaddr_in6 *)dst, desten, pflags); return (error); } int nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, struct sockaddr_in6 *dst) { struct mbuf *m, *m_head; struct ifnet *outifp; int error = 0; m_head = chain; if ((ifp->if_flags & IFF_LOOPBACK) != 0) outifp = origifp; else outifp = ifp; while (m_head) { m = m_head; m_head = m_head->m_nextpkt; error = nd6_output_ifp(ifp, origifp, m, dst, NULL); } /* * XXX * note that intermediate errors are blindly ignored */ return (error); } static int nd6_need_cache(struct ifnet *ifp) { /* * XXX: we currently do not make neighbor cache on any interface * other than ARCnet, Ethernet, FDDI and GIF. * * RFC2893 says: * - unidirectional tunnels needs no ND */ switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_PROPVIRTUAL: return (1); default: return (0); } } /* * Add pernament ND6 link-layer record for given * interface address. * * Very similar to IPv4 arp_ifinit(), but: * 1) IPv6 DAD is performed in different place * 2) It is called by IPv6 protocol stack in contrast to * arp_ifinit() which is typically called in SIOCSIFADDR * driver ioctl handler. * */ int nd6_add_ifa_lle(struct in6_ifaddr *ia) { struct ifnet *ifp; struct llentry *ln, *ln_tmp; struct sockaddr *dst; ifp = ia->ia_ifa.ifa_ifp; if (nd6_need_cache(ifp) == 0) return (0); ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; dst = (struct sockaddr *)&ia->ia_addr; ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst); if (ln == NULL) return (ENOBUFS); IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Unlink any entry if exists */ ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst); if (ln_tmp != NULL) lltable_unlink_entry(LLTABLE6(ifp), ln_tmp); lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); if (ln_tmp != NULL) EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); LLE_WUNLOCK(ln); if (ln_tmp != NULL) llentry_free(ln_tmp); return (0); } /* * Removes either all lle entries for given @ia, or lle * corresponding to @ia address. */ void nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all) { struct sockaddr_in6 mask, addr; struct sockaddr *saddr, *smask; struct ifnet *ifp; ifp = ia->ia_ifa.ifa_ifp; memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); saddr = (struct sockaddr *)&addr; smask = (struct sockaddr *)&mask; if (all != 0) lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC); else lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr); } static void clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_freem(m_hold); } ln->la_hold = NULL; return; } static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_icmp6); #endif SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, CTLFLAG_RD, nd6_sysctl_drlist, ""); SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, CTLFLAG_RD, nd6_sysctl_prlist, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), ""); static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) { struct in6_defrouter d; struct nd_defrouter *dr; int error; if (req->newptr) return (EPERM); bzero(&d, sizeof(d)); d.rtaddr.sin6_family = AF_INET6; d.rtaddr.sin6_len = sizeof(d.rtaddr); /* * XXX locking */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { d.rtaddr.sin6_addr = dr->rtaddr; error = sa6_recoverscope(&d.rtaddr); if (error != 0) return (error); d.flags = dr->flags; d.rtlifetime = dr->rtlifetime; d.expire = dr->expire + (time_second - time_uptime); d.if_index = dr->ifp->if_index; error = SYSCTL_OUT(req, &d, sizeof(d)); if (error != 0) return (error); } return (0); } static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) { struct in6_prefix p; struct sockaddr_in6 s6; struct nd_prefix *pr; struct nd_pfxrouter *pfr; time_t maxexpire; int error; char ip6buf[INET6_ADDRSTRLEN]; if (req->newptr) return (EPERM); bzero(&p, sizeof(p)); p.origin = PR_ORIG_RA; bzero(&s6, sizeof(s6)); s6.sin6_family = AF_INET6; s6.sin6_len = sizeof(s6); /* * XXX locking */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { p.prefix = pr->ndpr_prefix; if (sa6_recoverscope(&p.prefix)) { log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &p.prefix.sin6_addr)); /* XXX: press on... */ } p.raflags = pr->ndpr_raf; p.prefixlen = pr->ndpr_plen; p.vltime = pr->ndpr_vltime; p.pltime = pr->ndpr_pltime; p.if_index = pr->ndpr_ifp->if_index; if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) p.expire = 0; else { /* XXX: we assume time_t is signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) p.expire = pr->ndpr_lastupdate + pr->ndpr_vltime + (time_second - time_uptime); else p.expire = maxexpire; } p.refcnt = pr->ndpr_refcnt; p.flags = pr->ndpr_stateflags; p.advrtrs = 0; LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) p.advrtrs++; error = SYSCTL_OUT(req, &p, sizeof(p)); if (error != 0) return (error); LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { s6.sin6_addr = pfr->router->rtaddr; if (sa6_recoverscope(&s6)) log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &pfr->router->rtaddr)); error = SYSCTL_OUT(req, &s6, sizeof(s6)); if (error != 0) return (error); } } return (0); } diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index a5ce2ec83c84..b7ba0b205f49 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -1,1611 +1,1592 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_add(struct dadq *dp); static void nd6_dad_del(struct dadq *dp); static void nd6_dad_rele(struct dadq *); static void nd6_dad_starttimer(struct dadq *, int, int); static void nd6_dad_stoptimer(struct dadq *); static void nd6_dad_timer(struct dadq *); static void nd6_dad_duplicated(struct ifaddr *, struct dadq *); static void nd6_dad_ns_output(struct dadq *); static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_na_input(struct ifaddr *); static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *, u_int); static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int); static VNET_DEFINE(int, dad_enhanced) = 1; #define V_dad_enhanced VNET(dad_enhanced) SYSCTL_DECL(_net_inet6_ip6); SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dad_enhanced), 0, "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD."); static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ #define V_dad_maxtry VNET(dad_maxtry) /* * Input a Neighbor Solicitation Message. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) */ void nd6_ns_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_solicit *nd_ns; struct in6_addr saddr6 = ip6->ip6_src; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; int rflag; union nd_opts ndopts; struct sockaddr_dl proxydl; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif) rflag = 0; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); if (nd_ns == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ taddr6 = nd_ns->nd_ns_target; if (in6_setscope(&taddr6, ifp, NULL) != 0) goto bad; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { /* dst has to be a solicited node multicast address. */ if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL && /* don't check ifindex portion */ daddr6.s6_addr32[1] == 0 && daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE && daddr6.s6_addr8[12] == 0xff) { ; /* good */ } else { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(wrong ip6 dst)\n")); goto bad; } } else if (!V_nd6_onlink_ns_rfc4861) { struct sockaddr_in6 src_sa6; /* * According to recent IETF discussions, it is not a good idea * to accept a NS from an address which would not be deemed * to be a neighbor otherwise. This point is expected to be * clarified in future revisions of the specification. */ bzero(&src_sa6, sizeof(src_sa6)); src_sa6.sin6_family = AF_INET6; src_sa6.sin6_len = sizeof(src_sa6); src_sa6.sin6_addr = saddr6; if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n")); goto bad; } icmp6len -= sizeof(*nd_ns); nd6_option_init(nd_ns + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ns_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(link-layer address option)\n")); goto bad; } /* * Attaching target link-layer address to the NA? * (RFC 2461 7.2.4) * * NS IP dst is unicast/anycast MUST NOT add * NS IP dst is solicited-node multicast MUST add * * In implementation, we add target link-layer address by default. * We do not add one in MUST NOT cases. */ if (!IN6_IS_ADDR_MULTICAST(&daddr6)) tlladdr = 0; else tlladdr = 1; /* * Target address (taddr6) must be either: * (1) Valid unicast/anycast address for my receiving interface, * (2) Unicast address for which I'm offering proxy service, or * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* (2) check. */ if (ifa == NULL) { - struct route_in6 ro; - int need_proxy; + struct sockaddr_dl rt_gateway; + struct rt_addrinfo info; + struct sockaddr_in6 dst6; - bzero(&ro, sizeof(ro)); - ro.ro_dst.sin6_len = sizeof(struct sockaddr_in6); - ro.ro_dst.sin6_family = AF_INET6; - ro.ro_dst.sin6_addr = taddr6; + bzero(&dst6, sizeof(dst6)); + dst6.sin6_len = sizeof(struct sockaddr_in6); + dst6.sin6_family = AF_INET6; + dst6.sin6_addr = taddr6; + + bzero(&rt_gateway, sizeof(rt_gateway)); + rt_gateway.sdl_len = sizeof(rt_gateway); + bzero(&info, sizeof(info)); + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; /* Always use the default FIB. */ -#ifdef RADIX_MPATH - rtalloc_mpath_fib((struct route *)&ro, ntohl(taddr6.s6_addr32[3]), - RT_DEFAULT_FIB); -#else - in6_rtalloc(&ro, RT_DEFAULT_FIB); -#endif - need_proxy = (ro.ro_rt && - (ro.ro_rt->rt_flags & RTF_ANNOUNCE) != 0 && - ro.ro_rt->rt_gateway->sa_family == AF_LINK); - if (ro.ro_rt != NULL) { - if (need_proxy) - proxydl = *SDL(ro.ro_rt->rt_gateway); - RTFREE(ro.ro_rt); - } - if (need_proxy) { - /* - * proxy NDP for single entry - */ - ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, - IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); - if (ifa) - proxy = 1; + if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6, + 0, 0, &info) == 0) { + if ((info.rti_flags & RTF_ANNOUNCE) != 0 && + rt_gateway.sdl_family == AF_LINK) { + + /* + * proxy NDP for single entry + */ + proxydl = *SDL(&rt_gateway); + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal( + ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + if (ifa) + proxy = 1; + } } } if (ifa == NULL) { /* * We've got an NS packet, and we don't have that adddress * assigned for us. We MUST silently ignore it. * See RFC2461 7.2.3. */ goto freeit; } myaddr6 = *IFA_IN6(ifa); anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST; tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) goto freeit; if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s " "(if %d, NS packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) { nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &saddr6))); goto freeit; } /* * We have neighbor solicitation packet, with target address equals to * one of my tentative address. * * src addr how to process? * --- --- * multicast of course, invalid (rejected in ip6_input) * unicast somebody is doing address resolution -> ignore * unspec dup address detection * * The processing is defined in RFC 2462. */ if (tentative) { /* * If source address is unspecified address, it is for * duplicate address detection. * * If not, the packet is for addess resolution; * silently ignore it. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce); goto freeit; } /* * If the source address is unspecified address, entries must not * be created or updated. * It looks that sender is performing DAD. Output NA toward * all-node multicast address, to tell the sender that I'm using * the address. * S bit ("solicited") must be zero. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { struct in6_addr in6_all; in6_all = in6addr_linklocal_allnodes; if (in6_setscope(&in6_all, ifp, NULL) != 0) goto bad; nd6_na_output_fib(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); goto freeit; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); nd6_na_output_fib(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag | ND_NA_FLAG_SOLICITED, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); freeit: if (ifa != NULL) ifa_free(ifa); m_freem(m); return; bad: nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", ip6_sprintf(ip6bufs, &saddr6))); nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", ip6_sprintf(ip6bufs, &daddr6))); nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(ip6bufs, &taddr6))); ICMP6STAT_INC(icp6s_badns); if (ifa != NULL) ifa_free(ifa); m_freem(m); } /* * Output a Neighbor Solicitation Message. Caller specifies: * - ICMP6 header source IP6 address * - ND6 header target IP6 address * - ND6 header source datalink address * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * ln - for source address determination * nonce - If non-NULL, NS is used for duplicate address detection and * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce. */ static void nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6, const struct in6_addr *daddr6, const struct in6_addr *taddr6, uint8_t *nonce, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; struct ip6_hdr *ip6; struct nd_neighbor_solicit *nd_ns; struct ip6_moptions im6o; int icmp6len; int maxlen; caddr_t mac; - struct route_in6 ro; if (IN6_IS_ADDR_MULTICAST(taddr6)) return; /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_ns); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", __func__, max_linkhdr, maxlen, MCLBYTES)); if (max_linkhdr + maxlen > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; M_SETFIB(m, fibnum); - bzero(&ro, sizeof(ro)); - if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_ns); m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len; m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor solicitation packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (daddr6) ip6->ip6_dst = *daddr6; else { ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; ip6->ip6_dst.s6_addr16[1] = 0; ip6->ip6_dst.s6_addr32[1] = 0; ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE; ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3]; ip6->ip6_dst.s6_addr8[12] = 0xff; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) goto bad; } if (nonce == NULL) { struct ifaddr *ifa = NULL; /* * RFC2461 7.2.2: * "If the source address of the packet prompting the * solicitation is the same as one of the addresses assigned * to the outgoing interface, that address SHOULD be placed * in the IP Source Address of the outgoing solicitation. * Otherwise, any one of the addresses assigned to the * interface should be used." * * We use the source address for the prompting packet * (saddr6), if saddr6 belongs to the outgoing interface. * Otherwise, we perform the source address selection as usual. */ if (saddr6 != NULL) ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6); if (ifa != NULL) { /* ip6_src set already. */ ip6->ip6_src = *saddr6; ifa_free(ifa); } else { int error; struct sockaddr_in6 dst_sa; struct in6_addr src_in; struct ifnet *oifp; bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; oifp = ifp; error = in6_selectsrc(&dst_sa, NULL, - NULL, &ro, NULL, &oifp, &src_in); + NULL, NULL, &oifp, &src_in); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "%s: source can't be " "determined: dst=%s, error=%d\n", __func__, ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); goto bad; } ip6->ip6_src = src_in; } } else { /* * Source address for DAD packet must always be IPv6 * unspecified address. (0::0) * We actually don't have to 0-clear the address (we did it * above), but we do so here explicitly to make the intention * clearer. */ bzero(&ip6->ip6_src, sizeof(ip6->ip6_src)); } nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1); nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; nd_ns->nd_ns_code = 0; nd_ns->nd_ns_reserved = 0; nd_ns->nd_ns_target = *taddr6; in6_clearscope(&nd_ns->nd_ns_target); /* XXX */ /* * Add source link-layer address option. * * spec implementation * --- --- * DAD packet MUST NOT do not add the option * there's no link layer address: * impossible do not add the option * there's link layer address: * Multicast NS MUST add one add the option * Unicast NS SHOULD add one add the option */ if (nonce == NULL && (mac = nd6_ifptomac(ifp))) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8 byte alignments... */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } /* * Add a Nonce option (RFC 3971) to detect looped back NS messages. * This behavior is documented as Enhanced Duplicate Address * Detection in RFC 7527. * net.inet6.ip6.dad_enhanced=0 disables this. */ if (V_dad_enhanced != 0 && nonce != NULL) { int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8-byte alignment is required. */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_NONCE; nd_opt->nd_opt_len = optlen >> 3; bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN); } ip6->ip6_plen = htons((u_short)icmp6len); nd_ns->nd_ns_cksum = 0; nd_ns->nd_ns_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type; m_tag_prepend(m, mtag); } - ip6_output(m, NULL, &ro, (nonce != NULL) ? IPV6_UNSPECSRC : 0, + ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); - /* We don't cache this route. */ - RO_RTFREE(&ro); - return; bad: - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } m_freem(m); return; } #ifndef BURN_BRIDGES void nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6, const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce) { nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB); } #endif /* * Neighbor advertisement input handling. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) */ void nd6_na_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_advert *nd_na; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; int flags; int is_router; int is_solicited; int is_override; char *lladdr = NULL; int lladdrlen = 0; int checklink = 0; struct ifaddr *ifa; struct llentry *ln = NULL; union nd_opts ndopts; struct mbuf *chain = NULL; struct sockaddr_in6 sin6; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; int lladdr_off; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); if (nd_na == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif flags = nd_na->nd_na_flags_reserved; is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0); is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0); memset(&sin6, 0, sizeof(sin6)); taddr6 = nd_na->nd_na_target; if (in6_setscope(&taddr6, ifp, NULL)) goto bad; /* XXX: impossible */ if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_ERR, "nd6_na_input: invalid target address %s\n", ip6_sprintf(ip6bufs, &taddr6))); goto bad; } if (IN6_IS_ADDR_MULTICAST(&daddr6)) if (is_solicited) { nd6log((LOG_ERR, "nd6_na_input: a solicited adv is multicasted\n")); goto bad; } icmp6len -= sizeof(*nd_na); nd6_option_init(nd_na + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_na_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } /* * This effectively disables the DAD check on a non-master CARP * address. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* * Target address matches one of my interface address. * * If my address is tentative, this means that there's somebody * already using the same address as mine. This indicates DAD failure. * This is defined in RFC 2462. * * Otherwise, process as defined in RFC 2461. */ if (ifa && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) { nd6_dad_na_input(ifa); ifa_free(ifa); goto freeit; } /* Just for safety, maybe unnecessary. */ if (ifa) { ifa_free(ifa); log(LOG_ERR, "nd6_na_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &taddr6)); goto freeit; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s " "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } /* * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { goto freeit; } if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, * discard the packet. */ if (ifp->if_addrlen && lladdr == NULL) { goto freeit; } /* * Record link-layer address, and update the state. */ linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) return; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); else nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if ((ln->ln_router = is_router) != 0) { /* * This means a router's state has changed from * non-reachable to probably reachable, and might * affect the status of associated prefixes.. */ checklink = 1; } } else { int llchange; /* * Check if the link-layer address has changed or not. */ if (lladdr == NULL) llchange = 0; else { if (ln->la_flags & LLE_VALID) { if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; } else llchange = 1; } /* * This is VERY complex. Look at it with care. * * override solicit lladdr llchange action * (L: record lladdr) * * 0 0 n -- (2c) * 0 0 y n (2b) L * 0 0 y y (1) REACHABLE->STALE * 0 1 n -- (2c) *->REACHABLE * 0 1 y n (2b) L *->REACHABLE * 0 1 y y (1) REACHABLE->STALE * 1 0 n -- (2a) * 1 0 y n (2a) L * 1 0 y y (2a) L *->STALE * 1 1 n -- (2a) *->REACHABLE * 1 1 y n (2a) L *->REACHABLE * 1 1 y y (2a) L *->REACHABLE */ if (!is_override && (lladdr != NULL && llchange)) { /* (1) */ /* * If state is REACHABLE, make it STALE. * no other updates should be done. */ if (ln->ln_state == ND6_LLINFO_REACHABLE) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); goto freeit; } else if (is_override /* (2a) */ || (!is_override && (lladdr != NULL && !llchange)) /* (2b) */ || lladdr == NULL) { /* (2c) */ /* * Update link-local address, if any. */ if (lladdr != NULL) { linkhdrsize = sizeof(linkhdr); if (lltable_calc_llheader(ifp, AF_INET6, lladdr, linkhdr, &linkhdrsize, &lladdr_off) != 0) goto freeit; if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, lladdr_off) == 0) { ln = NULL; goto freeit; } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } /* * If solicited, make the state REACHABLE. * If not solicited and the link-layer address was * changed, make it STALE. */ if (is_solicited) nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); else { if (lladdr != NULL && llchange) nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); } } if (ln->ln_router && !is_router) { /* * The peer dropped the router flag. * Remove the sender from the Default Router List and * update the Destination Cache entries. */ struct nd_defrouter *dr; struct in6_addr *in6; + struct ifnet *nd6_ifp; in6 = &ln->r_l3addr.addr6; /* * Lock to protect the default router list. * XXX: this might be unnecessary, since this function * is only called under the network software interrupt * context. However, we keep it just for safety. */ - dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); + nd6_ifp = lltable_get_ifp(ln->lle_tbl); + dr = defrouter_lookup(in6, nd6_ifp); if (dr) defrtrlist_del(dr); - else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags & + else if (ND_IFINFO(nd6_ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * Even if the neighbor is not in the default * router list, the neighbor may be used * as a next hop for some destinations * (e.g. redirect case). So we must * call rt6_flush explicitly. */ rt6_flush(&ip6->ip6_src, ifp); } } ln->ln_router = is_router; } /* XXX - QL * Does this matter? * rt->rt_flags &= ~RTF_REJECT; */ ln->la_asked = 0; if (ln->la_hold != NULL) nd6_grab_holdchain(ln, &chain, &sin6); freeit: if (ln != NULL) LLE_WUNLOCK(ln); if (chain != NULL) nd6_flush_holdchain(ifp, ifp, chain, &sin6); if (checklink) pfxlist_onlink_check(); m_freem(m); return; bad: if (ln != NULL) LLE_WUNLOCK(ln); ICMP6STAT_INC(icp6s_badna); m_freem(m); } /* * Neighbor advertisement output handling. * * Based on RFC 2461 * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) * * tlladdr - 1 if include target link-layer address * sdl0 - sockaddr_dl (= proxy NA) or NULL */ static void nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; struct in6_addr src, daddr6; struct sockaddr_in6 dst_sa; int icmp6len, maxlen, error; caddr_t mac = NULL; - struct route_in6 ro; - - bzero(&ro, sizeof(ro)); daddr6 = *daddr6_0; /* make a local copy for modification */ /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_na); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", __func__, max_linkhdr, maxlen, MCLBYTES)); if (max_linkhdr + maxlen > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_na); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len; m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor advertisement packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) { /* reply to DAD */ daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; daddr6.s6_addr16[1] = 0; daddr6.s6_addr32[1] = 0; daddr6.s6_addr32[2] = 0; daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE; if (in6_setscope(&daddr6, ifp, NULL)) goto bad; flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; bzero(&dst_sa, sizeof(struct sockaddr_in6)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(struct sockaddr_in6); dst_sa.sin6_addr = daddr6; /* * Select a source whose scope is the same as that of the dest. */ - bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src); + error = in6_selectsrc(&dst_sa, NULL, NULL, NULL, &oifp, &src); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); goto bad; } ip6->ip6_src = src; nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; nd_na->nd_na_target = *taddr6; in6_clearscope(&nd_na->nd_na_target); /* XXX */ /* * "tlladdr" indicates NS's condition for adding tlladdr or not. * see nd6_ns_input() for details. * Basically, if NS packet is sent to unicast/anycast addr, * target lladdr option SHOULD NOT be included. */ if (tlladdr) { /* * sdl0 != NULL indicates proxy NA. If we do proxy, use * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ if (sdl0 == NULL) { if (ifp->if_carp) mac = (*carp_macmatch6_p)(ifp, m, taddr6); if (mac == NULL) mac = nd6_ifptomac(ifp); } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) mac = LLADDR(sdl); } } if (tlladdr && mac) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1); /* roundup to 8 bytes alignment! */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } else flags &= ~ND_NA_FLAG_OVERRIDE; ip6->ip6_plen = htons((u_short)icmp6len); nd_na->nd_na_flags_reserved = flags; nd_na->nd_na_cksum = 0; nd_na->nd_na_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_na->nd_na_type; m_tag_prepend(m, mtag); } - ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL); + ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); - /* We don't cache this route. */ - RO_RTFREE(&ro); - return; bad: - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } m_freem(m); return; } #ifndef BURN_BRIDGES void nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0) { nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0, RT_DEFAULT_FIB); } #endif caddr_t nd6_ifptomac(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: case IFT_L2VLAN: case IFT_IEEE80211: case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_ISO88025: return IF_LLADDR(ifp); default: return NULL; } } struct dadq { TAILQ_ENTRY(dadq) dad_list; struct ifaddr *dad_ifa; int dad_count; /* max NS to send */ int dad_ns_tcount; /* # of trials to send NS */ int dad_ns_ocount; /* NS sent so far */ int dad_ns_icount; int dad_na_icount; int dad_ns_lcount; /* looped back NS */ int dad_loopbackprobe; /* probing state for loopback detection */ struct callout dad_timer_ch; struct vnet *dad_vnet; u_int dad_refcnt; #define ND_OPT_NONCE_LEN32 \ ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t)) uint32_t dad_nonce[ND_OPT_NONCE_LEN32]; }; static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); static VNET_DEFINE(struct rwlock, dad_rwlock); #define V_dadq VNET(dadq) #define V_dad_rwlock VNET(dad_rwlock) #define DADQ_RLOCK() rw_rlock(&V_dad_rwlock) #define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock) #define DADQ_WLOCK() rw_wlock(&V_dad_rwlock) #define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock) static void nd6_dad_add(struct dadq *dp) { DADQ_WLOCK(); TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list); DADQ_WUNLOCK(); } static void nd6_dad_del(struct dadq *dp) { DADQ_WLOCK(); TAILQ_REMOVE(&V_dadq, dp, dad_list); DADQ_WUNLOCK(); nd6_dad_rele(dp); } static struct dadq * nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n) { struct dadq *dp; DADQ_RLOCK(); TAILQ_FOREACH(dp, &V_dadq, dad_list) { if (dp->dad_ifa != ifa) continue; /* * Skip if the nonce matches the received one. * +2 in the length is required because of type and * length fields are included in a header. */ if (n != NULL && n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 && memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0], ND_OPT_NONCE_LEN) == 0) { dp->dad_ns_lcount++; continue; } refcount_acquire(&dp->dad_refcnt); break; } DADQ_RUNLOCK(); return (dp); } static void nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns) { if (send_ns != 0) nd6_dad_ns_output(dp); callout_reset(&dp->dad_timer_ch, ticks, (void (*)(void *))nd6_dad_timer, (void *)dp); } static void nd6_dad_stoptimer(struct dadq *dp) { callout_drain(&dp->dad_timer_ch); } static void nd6_dad_rele(struct dadq *dp) { if (refcount_release(&dp->dad_refcnt)) { ifa_free(dp->dad_ifa); free(dp, M_IP6NDP); } } void nd6_dad_init(void) { rw_init(&V_dad_rwlock, "nd6 DAD queue"); TAILQ_INIT(&V_dadq); } /* * Start Duplicate Address Detection (DAD) for specified interface address. */ void nd6_dad_start(struct ifaddr *ifa, int delay) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; int send_ns; /* * If we don't need DAD, don't do it. * There are several cases: * - DAD is disabled (ip6_dad_count == 0) * - the interface address is anycast */ if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) { log(LOG_DEBUG, "nd6_dad_start: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } if (ia->ia6_flags & IN6_IFF_ANYCAST) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!V_ip6_dad_count) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (ifa->ifa_ifp == NULL) panic("nd6_dad_start: ifa->ifa_ifp == NULL"); if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!(ifa->ifa_ifp->if_flags & IFF_UP) || !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) || (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) { ia->ia6_flags |= IN6_IFF_TENTATIVE; return; } if ((dp = nd6_dad_find(ifa, NULL)) != NULL) { /* * DAD already in progress. Let the existing entry * to finish it. */ return; } dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO); if (dp == NULL) { log(LOG_ERR, "nd6_dad_start: memory allocation failed for " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } callout_init(&dp->dad_timer_ch, 0); #ifdef VIMAGE dp->dad_vnet = curvnet; #endif nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* * Send NS packet for DAD, ip6_dad_count times. * Note that we must delay the first transmission, if this is the * first packet to be sent from the interface after interface * (re)initialization. */ dp->dad_ifa = ifa; ifa_ref(dp->dad_ifa); dp->dad_count = V_ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; dp->dad_ns_lcount = dp->dad_loopbackprobe = 0; refcount_init(&dp->dad_refcnt, 1); nd6_dad_add(dp); send_ns = 0; if (delay == 0) { send_ns = 1; delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000; } nd6_dad_starttimer(dp, delay, send_ns); } /* * terminate DAD unconditionally. used for address removals. */ void nd6_dad_stop(struct ifaddr *ifa) { struct dadq *dp; dp = nd6_dad_find(ifa, NULL); if (!dp) { /* DAD wasn't started yet */ return; } nd6_dad_stoptimer(dp); /* * The DAD queue entry may have been removed by nd6_dad_timer() while * we were waiting for it to stop, so re-do the lookup. */ nd6_dad_rele(dp); if (nd6_dad_find(ifa, NULL) == NULL) return; nd6_dad_del(dp); nd6_dad_rele(dp); } static void nd6_dad_timer(struct dadq *dp) { CURVNET_SET(dp->dad_vnet); struct ifaddr *ifa = dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; char ip6buf[INET6_ADDRSTRLEN]; /* Sanity check */ if (ia == NULL) { log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); goto err; } if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { /* Do not need DAD for ifdisabled interface. */ log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of " "ND6_IFF_IFDISABLED.\n", ifp->if_xname); goto err; } if (ia->ia6_flags & IN6_IFF_DUPLICATED) { log(LOG_ERR, "nd6_dad_timer: called with duplicated address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto err; } if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) { log(LOG_ERR, "nd6_dad_timer: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto err; } /* Stop DAD if the interface is down even after dad_maxtry attempts. */ if ((dp->dad_ns_tcount > V_dad_maxtry) && (((ifp->if_flags & IFF_UP) == 0) || ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) { nd6log((LOG_INFO, "%s: could not run DAD " "because the interface was down or not running.\n", if_name(ifa->ifa_ifp))); goto err; } /* Need more checks? */ if (dp->dad_ns_ocount < dp->dad_count) { /* * We have more NS to go. Send NS packet for DAD. */ nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); goto done; } else { /* * We have transmitted sufficient number of DAD packets. * See what we've got. */ if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) /* We've seen NS or NA, means DAD has failed. */ nd6_dad_duplicated(ifa, dp); else if (V_dad_enhanced != 0 && dp->dad_ns_lcount > 0 && dp->dad_ns_lcount > dp->dad_loopbackprobe) { /* * Sec. 4.1 in RFC 7527 requires transmission of * additional probes until the loopback condition * becomes clear when a looped back probe is detected. */ log(LOG_ERR, "%s: a looped back NS message is " "detected during DAD for %s. " "Another DAD probes are being sent.\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, IFA_IN6(ifa))); dp->dad_loopbackprobe = dp->dad_ns_lcount; /* * Send an NS immediately and increase dad_count by * V_nd6_mmaxtries - 1. */ dp->dad_count = dp->dad_ns_ocount + V_nd6_mmaxtries - 1; nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); goto done; } else { /* * We are done with DAD. No NA came, no NS came. * No duplicate address found. Check IFDISABLED flag * again in case that it is changed between the * beginning of this function and here. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0) ia->ia6_flags &= ~IN6_IFF_TENTATIVE; nd6log((LOG_DEBUG, "%s: DAD complete for %s - no duplicates found\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); if (dp->dad_ns_lcount > 0) log(LOG_ERR, "%s: DAD completed while " "a looped back NS message is detected " "during DAD for %s.\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, IFA_IN6(ifa))); } } err: nd6_dad_del(dp); done: CURVNET_RESTORE(); } static void nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp; char ip6buf[INET6_ADDRSTRLEN]; log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " "NS in/out/loopback=%d/%d/%d, NA in=%d\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount, dp->dad_na_icount); ia->ia6_flags &= ~IN6_IFF_TENTATIVE; ia->ia6_flags |= IN6_IFF_DUPLICATED; ifp = ifa->ifa_ifp; log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)); log(LOG_ERR, "%s: manual intervention required\n", if_name(ifp)); /* * If the address is a link-local address formed from an interface * identifier based on the hardware address which is supposed to be * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP * operation on the interface SHOULD be disabled. * [RFC 4862, Section 5.4.5] */ if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { struct in6_addr in6; /* * To avoid over-reaction, we only apply this logic when we are * very sure that hardware addresses are supposed to be unique. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: case IFT_IEEE80211: case IFT_INFINIBAND: in6 = ia->ia_addr.sin6_addr; if (in6_get_hw_ifid(ifp, &in6) == 0 && IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "%s: possible hardware address " "duplication detected, disable IPv6\n", if_name(ifp)); } break; } } } static void nd6_dad_ns_output(struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; int i; dp->dad_ns_tcount++; if ((ifp->if_flags & IFF_UP) == 0) { return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { return; } dp->dad_ns_ocount++; if (V_dad_enhanced != 0) { for (i = 0; i < ND_OPT_NONCE_LEN32; i++) dp->dad_nonce[i] = arc4random(); /* * XXXHRS: Note that in the case that * DupAddrDetectTransmits > 1, multiple NS messages with * different nonces can be looped back in an unexpected * order. The current implementation recognizes only * the latest nonce on the sender side. Practically it * should work well in almost all cases. */ } nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr, (uint8_t *)&dp->dad_nonce[0]); } static void nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce) { struct in6_ifaddr *ia; struct ifnet *ifp; const struct in6_addr *taddr6; struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_ns_input"); ia = (struct in6_ifaddr *)ifa; ifp = ifa->ifa_ifp; taddr6 = &ia->ia_addr.sin6_addr; /* Ignore Nonce option when Enhanced DAD is disabled. */ if (V_dad_enhanced == 0) ndopt_nonce = NULL; dp = nd6_dad_find(ifa, ndopt_nonce); if (dp == NULL) return; dp->dad_ns_icount++; nd6_dad_rele(dp); } static void nd6_dad_na_input(struct ifaddr *ifa) { struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_na_input"); dp = nd6_dad_find(ifa, NULL); if (dp != NULL) { dp->dad_na_icount++; nd6_dad_rele(dp); } } diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 390194459441..0725842cbe7e 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -1,924 +1,924 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /* IPSEC */ #include #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) /* * Raw interface to IP6 protocol. */ VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) extern u_long rip_sendspace; extern u_long rip_recvspace; VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat); VNET_PCPUSTAT_SYSINIT(rip6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(rip6stat); #endif /* VIMAGE */ /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip6_mrouter); /* * The various mrouter functions. */ int (*ip6_mrouter_set)(struct socket *, struct sockopt *); int (*ip6_mrouter_get)(struct socket *, struct sockopt *); int (*ip6_mrouter_done)(void); int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); int (*mrt6_ioctl)(u_long, caddr_t); /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip6_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); register struct inpcb *in6p; struct inpcb *last = 0; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; RIP6STAT_INC(rip6s_ipackets); init_sin6(&fromsa, m); /* general init */ ifp = m->m_pkthdr.rcvif; INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p && in6p->inp_ip_p != proto) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (jailed_without_vnet(in6p->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && prison_check_ip6(in6p->inp_cred, &ip6->ip6_dst) != 0) continue; } INP_RLOCK(in6p); if (in6p->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); if (in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { INP_RUNLOCK(in6p); RIP6STAT_INC(rip6s_badsum); continue; } } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (in6p->in6p_moptions && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If the incoming datagram is for MLD, allow it * through unconditionally to the raw socket. * * Use the M_RTALERT_MLD flag to check for MLD * traffic without having to inspect the mbuf chain * more deeply, as all MLDv1/v2 host messages MUST * contain the Router Alert option. * * In the case of MLDv1, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. im6o_mc_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if ((m->m_flags & M_RTALERT_MLD) == 0) { struct sockaddr_in6 mcaddr; bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(in6p->in6p_moptions, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); } if (blocked != MCAST_PASS) { IP6STAT_INC(ip6s_notmember); INP_RUNLOCK(in6p); continue; } } if (last != NULL) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); #ifdef IPSEC /* * Check AH/ESP integrity. */ if (n && ipsec6_in_reject(n, last)) { m_freem(n); /* Do not inject data into pcb. */ } else #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { m_freem(n); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); #ifdef IPSEC /* * Check AH/ESP integrity. */ if ((last != NULL) && ipsec6_in_reject(m, last)) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); } else #endif /* IPSEC */ if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(last, m, &opts); /* Strip intermediate headers. */ m_adj(m, *offp); if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); RIP6STAT_INC(rip6s_fullsock); } else sorwakeup(last->inp_socket); INP_RUNLOCK(last); } else { RIP6STAT_INC(rip6s_nosock); if (m->m_flags & M_MCAST) RIP6STAT_INC(rip6s_nosockmcast); if (proto == IPPROTO_NONE) m_freem(m); else { char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, prvnxtp - mtod(m, char *)); } IP6STAT_DEC(ip6s_delivered); } return (IPPROTO_DONE); } void rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* * If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } (void) in6_pcbnotify(&V_ripcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } /* * Generate IPv6 header and pass packet to ip6_output. Tack on options user * may have setup with control call. */ int rip6_output(struct mbuf *m, struct socket *so, ...) { struct mbuf *control; struct m_tag *mtag; struct sockaddr_in6 *dstsock; struct in6_addr *dst; struct ip6_hdr *ip6; struct inpcb *in6p; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; struct in6_addr in6a; va_list ap; va_start(ap, so); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); in6p = sotoinpcb(so); INP_WLOCK(in6p); dst = &dstsock->sin6_addr; if (control != NULL) { if ((error = ip6_setpktopts(control, &opt, in6p->in6p_outputopts, so->so_cred, so->so_proto->pr_protocol)) != 0) { goto bad; } optp = &opt; } else optp = in6p->in6p_outputopts; /* * Check and convert scope zone ID into internal form. * * XXX: we may still need to determine the zone later. */ if (!(so->so_state & SS_ISCONNECTED)) { if (!optp || !optp->ip6po_pktinfo || !optp->ip6po_pktinfo->ipi6_ifindex) use_defzone = V_ip6_use_defzone; if (dstsock->sin6_scope_id == 0 && !use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(dstsock, use_defzone)) != 0) goto bad; } /* * For an ICMPv6 packet, we should know its type and code to update * statistics. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; goto bad; } icmp6 = mtod(m, struct icmp6_hdr *); type = icmp6->icmp6_type; code = icmp6->icmp6_code; } M_PREPEND(m, sizeof(*ip6), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; } ip6 = mtod(m, struct ip6_hdr *); /* * Source address selection. */ - error = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred, + error = in6_selectsrc(dstsock, optp, in6p, so->so_cred, &oifp, &in6a); if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; if (oifp && scope_ambiguous) { /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined * (when it's required), if we can determine the outgoing * interface. determine the zone ID based on the interface. */ error = in6_setscope(&dstsock->sin6_addr, oifp, NULL); if (error != 0) goto bad; } ip6->ip6_dst = dstsock->sin6_addr; /* * Fill in the rest of the IPv6 header fields. */ ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; ip6->ip6_hlim = in6_selecthlim(in6p, oifp); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* Compute checksum. */ if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = in6p->in6p_cksum; if (plen < off + 1) { error = EINVAL; goto bad; } off += sizeof(struct ip6_hdr); n = m; while (n && n->m_len <= off) { off -= n->m_len; n = n->m_next; } if (!n) goto bad; p = (u_int16_t *)(mtod(n, caddr_t) + off); *p = 0; *p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen); } /* * Send RA/RS messages to user land for protection, before sending * them to rtadvd/rtsol. */ if ((send_sendso_input_hook != NULL) && so->so_proto->pr_protocol == IPPROTO_ICMPV6) { switch (type) { case ND_ROUTER_ADVERT: case ND_ROUTER_SOLICIT: mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; m_tag_prepend(m, mtag); } } error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); ICMP6STAT_INC(icp6s_outhist[type]); } else RIP6STAT_INC(rip6s_opackets); goto freectl; bad: if (m) m_freem(m); freectl: if (control != NULL) { ip6_clearpktopts(&opt, -1); m_freem(control); } INP_WUNLOCK(in6p); return (error); } /* * Raw IPv6 socket option processing. */ int rip6_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp; int error; if (sopt->sopt_level == IPPROTO_ICMPV6) /* * XXX: is it better to call icmp6_ctloutput() directly * from protosw? */ return (icmp6_ctloutput(so, sopt)); else if (sopt->sopt_level != IPPROTO_IPV6) { if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_SETFIB) { inp = sotoinpcb(so); INP_WLOCK(inp); inp->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(inp); return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: case MRT6_DEL_MIF: case MRT6_ADD_MFC: case MRT6_DEL_MFC: case MRT6_PIM: error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) : EOPNOTSUPP; break; case IPV6_CHECKSUM: error = ip6_raw_ctloutput(so, sopt); break; default: error = ip6_ctloutput(so, sopt); break; } break; } return (error); } static int rip6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct icmp6_filter *filter; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip6_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT); if (filter == NULL) return (ENOMEM); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); free(filter, M_PCB); return (error); } inp = (struct inpcb *)so->so_pcb; INP_INFO_WUNLOCK(&V_ripcbinfo); inp->inp_vflag |= INP_IPV6; inp->inp_ip_p = (long)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; inp->in6p_icmp6filt = filter; ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt); INP_WUNLOCK(inp); return (0); } static void rip6_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_detach: inp == NULL")); if (so == V_ip6_mrouter && ip6_mrouter_done) ip6_mrouter_done(); /* xxx: RSVP */ INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); free(inp->in6p_icmp6filt, M_PCB); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } /* XXXRW: This can't ever be called. */ static void rip6_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_abort: inp == NULL")); soisdisconnected(so); } static void rip6_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_close: inp == NULL")); soisdisconnected(so); } static int rip6_disconnect(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL")); if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp->in6p_faddr = in6addr_any; rip6_abort(so); return (0); } static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ifa = NULL; int error = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_bind: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0) return (error); if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6) return (EADDRNOTAVAIL); if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) return (EADDRNOTAVAIL); if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { ifa_free(ifa); return (EADDRNOTAVAIL); } if (ifa != NULL) ifa_free(ifa); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); inp->in6p_laddr = addr->sin6_addr; INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; struct ifnet *ifp = NULL; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_connect: inp == NULL")); if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (TAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin6_family != AF_INET6) return (EAFNOSUPPORT); /* * Application should provide a proper zone ID or the use of default * zone IDs should be enabled. Unfortunately, some applications do * not behave as it should, so we need a workaround. Even if an * appropriate ID is not determined, we'll see if we can determine * the outgoing interface. If we can, determine the zone ID based on * the interface below. */ if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ error = in6_selectsrc(addr, inp->in6p_outputopts, - inp, NULL, so->so_cred, &ifp, &in6a); + inp, so->so_cred, &ifp, &in6a); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } /* XXX: see above */ if (ifp && scope_ambiguous && (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip6_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct sockaddr_in6 tmp; struct sockaddr_in6 *dst; int ret; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip6_send: inp == NULL")); /* Always copy sockaddr to avoid overwrites. */ /* Unlocked read. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { m_freem(m); return (EISCONN); } /* XXX */ bzero(&tmp, sizeof(tmp)); tmp.sin6_family = AF_INET6; tmp.sin6_len = sizeof(struct sockaddr_in6); INP_RLOCK(inp); bcopy(&inp->in6p_faddr, &tmp.sin6_addr, sizeof(struct in6_addr)); INP_RUNLOCK(inp); dst = &tmp; } else { if (nam == NULL) { m_freem(m); return (ENOTCONN); } if (nam->sa_len != sizeof(struct sockaddr_in6)) { m_freem(m); return (EINVAL); } tmp = *(struct sockaddr_in6 *)nam; dst = &tmp; if (dst->sin6_family == AF_UNSPEC) { /* * XXX: we allow this case for backward * compatibility to buggy applications that * rely on old (and wrong) kernel behavior. */ log(LOG_INFO, "rip6 SEND: address family is " "unspec. Assume AF_INET6\n"); dst->sin6_family = AF_INET6; } else if (dst->sin6_family != AF_INET6) { m_freem(m); return(EAFNOSUPPORT); } } ret = rip6_output(m, so, dst, control); return (ret); } struct pr_usrreqs rip6_usrreqs = { .pru_abort = rip6_abort, .pru_attach = rip6_attach, .pru_bind = rip6_bind, .pru_connect = rip6_connect, .pru_control = in6_control, .pru_detach = rip6_detach, .pru_disconnect = rip6_disconnect, .pru_peeraddr = in6_getpeeraddr, .pru_send = rip6_send, .pru_shutdown = rip6_shutdown, .pru_sockaddr = in6_getsockaddr, .pru_close = rip6_close, }; diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 375c00d0a3e2..353b260d33ca 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -1,1285 +1,1285 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * Copyright (c) 2014 Kevin Lo * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ * $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipfw.h" #include "opt_ipsec.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif /* IPSEC */ #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static void udp6_detach(struct socket *so); static int udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; struct mbuf *opts; struct udpcb *up; INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. */ up = intoudpcb(inp); if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa, up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } #ifdef IPSEC /* Check AH/ESP integrity. */ if (ipsec6_in_reject(n, inp)) { m_freem(n); return (0); } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); return (0); } #endif opts = NULL; if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(inp, n, &opts); m_adj(n, off + sizeof(struct udphdr)); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); if (opts) m_freem(opts); UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); return (0); } int udp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ifnet *ifp; struct ip6_hdr *ip6; struct udphdr *uh; struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; int off = *offp; int cscov_partial; int plen, ulen; struct sockaddr_in6 fromsa; struct m_tag *fwd_tag; uint16_t uh_sum; uint8_t nxt; ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); if (!uh) return (IPPROTO_DONE); #endif UDPSTAT_INC(udps_ipackets); /* * Destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); nxt = proto; cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0; if (nxt == IPPROTO_UDPLITE) { /* Zero means checksum over the complete packet. */ if (ulen == 0) ulen = plen; if (ulen == plen) cscov_partial = 0; if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } if (uh->uh_sum == 0) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } } else { if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) { UDPSTAT_INC(udps_badlen); goto badunlocked; } if (uh->uh_sum == 0) { UDPSTAT_INC(udps_nosum); goto badunlocked; } } if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) && !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, m->m_pkthdr.csum_data); uh_sum ^= 0xffff; } else uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); if (uh_sum != 0) { UDPSTAT_INC(udps_badsum); goto badunlocked; } /* * Construct sockaddr format source address. */ init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; pcbinfo = udp_get_inpcbinfo(nxt); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; struct inpcbhead *pcblist; struct ip6_moptions *imo; INP_INFO_RLOCK(pcbinfo); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet will not match laddr. To * handle this situation, matching is relaxed if the * receiving interface is the same as one specified in the * socket and if the destination multicast address matches * one of the multicast groups specified in the socket. */ /* * KAME note: traditionally we dropped udpiphdr from mbuf * here. We need udphdr for IPsec processing so we do that * later. */ pcblist = udp_get_pcblist(nxt); last = NULL; LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport) continue; if (inp->inp_fport != 0 && inp->inp_fport != uh->uh_sport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src) || inp->inp_fport != uh->uh_sport) continue; } /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match * before, we should probably recheck now that the * inpcb lock is (supposed to be) held. */ /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ imo = inp->in6p_moptions; if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct sockaddr_in6 mcaddr; int blocked; INP_RLOCK(inp); bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(imo, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IP6STAT_INC(ip6s_notmember); if (blocked == MCAST_NOTSMEMBER || blocked == MCAST_MUTED) UDPSTAT_INC(udps_filtermcast); INP_RUNLOCK(inp); /* XXX */ continue; } INP_RUNLOCK(inp); } if (last != NULL) { struct mbuf *n; if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { INP_RLOCK(last); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, n, off, &fromsa)) goto inp_lost; INP_RUNLOCK(last); } } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids * searching through all pcbs in the common case of a * non-shared port. It assumes that an application * will never clear these options after setting them. */ if ((last->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast * or multicast datgram.) */ UDPSTAT_INC(udps_noport); UDPSTAT_INC(udps_noportmcast); goto badheadlocked; } INP_RLOCK(last); INP_INFO_RUNLOCK(pcbinfo); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, m, off, &fromsa) == 0) INP_RUNLOCK(last); inp_lost: return (IPPROTO_DONE); } /* * Locate pcb for datagram. */ /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { struct sockaddr_in6 *next_hop6; next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src, uh->uh_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? htons(next_hop6->sin6_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP6_NEXTHOP; } else inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp == NULL) { if (udp_log_in_vain) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", ip6_sprintf(ip6bufd, &ip6->ip6_dst), ntohs(uh->uh_dport), ip6_sprintf(ip6bufs, &ip6->ip6_src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); UDPSTAT_INC(udps_noportmcast); goto badunlocked; } if (V_udp_blackhole) goto badunlocked; if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) goto badunlocked; icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } INP_RLOCK_ASSERT(inp); up = intoudpcb(inp); if (cscov_partial) { if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) { INP_RUNLOCK(inp); m_freem(m); return (IPPROTO_DONE); } } UDP_PROBE(receive, NULL, inp, ip6, inp, uh); if (udp6_append(inp, m, off, &fromsa) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); badheadlocked: INP_INFO_RUNLOCK(pcbinfo); badunlocked: if (m) m_freem(m); return (IPPROTO_DONE); } static void udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d, struct inpcbinfo *pcbinfo) { struct udphdr uh; struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* Check if we can safely examine src and dst ports. */ if (m->m_pkthdr.len < off + sizeof(*uhp)) return; bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else (void)in6_pcbnotify(pcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } void udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo)); } void udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo)); } static int udp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); if (req->newlen != sizeof(addrs)) return (EINVAL); if (req->oldlen != sizeof(struct xucred)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseesocket(req->td->td_ucred, inp->inp_socket); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); static int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct mbuf *control, struct thread *td) { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; struct ifnet *oifp = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; int error = 0; uint8_t nxt; uint16_t cscov = 0; struct ip6_pktopts *optp, opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (addr6) { /* addr6 has been validated in udp6_send(). */ sin6 = (struct sockaddr_in6 *)addr6; /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined, * we'll see if we can determine the outgoing interface. If we * can, determine the zone ID based on the interface below. */ if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return (error); } nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; if (control) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, td->td_ucred, nxt)) != 0) goto release; optp = &opt; } else optp = inp->in6p_outputopts; if (sin6) { faddr = &sin6->sin6_addr; /* * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address * and the local port. */ if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 jinmei@kame.net) */ error = EINVAL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is an IPv4-mapped address, * local addr should not be an IPv6 address, * since you cannot determine how to map IPv6 * source address to IPv4. */ error = EINVAL; goto release; } af = AF_INET; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { - error = in6_selectsrc(sin6, optp, inp, NULL, + error = in6_selectsrc(sin6, optp, inp, td->td_ucred, &oifp, &in6a); if (error) goto release; if (oifp && scope_ambiguous && (error = in6_setscope(&sin6->sin6_addr, oifp, NULL))) { goto release; } laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (inp->inp_lport == 0 && (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; goto release; } } else { if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &inp->in6p_laddr; faddr = &inp->in6p_faddr; fport = inp->inp_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (nxt == IPPROTO_UDPLITE) { struct udpcb *up; up = intoudpcb(inp); cscov = up->u_txcslen; if (cscov >= plen) cscov = 0; udp6->uh_ulen = htons(cscov); /* * For UDP-Lite, checksum coverage length of zero means * the entire UDPLite packet is covered by the checksum. */ cscov_partial = (cscov == 0) ? 0 : 1; } else if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_plen = htons((u_short)plen); ip6->ip6_nxt = nxt; ip6->ip6_hlim = in6_selecthlim(inp, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; if (cscov_partial) { if ((udp6->uh_sum = in6_cksum_partial(m, nxt, sizeof(struct ip6_hdr), plen, cscov)) == 0) udp6->uh_sum = 0xffff; } else { udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } #ifdef RSS { uint32_t hash_val, hash_type; uint8_t pr; pr = inp->inp_socket->so_proto->pr_protocol; /* * Calculate an appropriate RSS hash for UDP and * UDP Lite. * * The called function will take care of figuring out * whether a 2-tuple or 4-tuple hash is required based * on the currently configured scheme. * * Later later on connected socket values should be * cached in the inpcb and reused, rather than constantly * re-calculating it. * * UDP Lite is a different protocol number and will * likely end up being hashed as a 2-tuple until * RSS / NICs grow UDP Lite protocol awareness. */ if (rss_proto_software_hash_v6(faddr, laddr, fport, inp->inp_lport, pr, &hash_val, &hash_type) == 0) { m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); } } #endif flags = 0; #ifdef RSS /* * Don't override with the inp cached flowid. * * Until the whole UDP path is vetted, it may actually * be incorrect. */ flags |= IP_NODEFAULTFLOWID; #endif UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT; goto release; } goto releaseopt; release: m_freem(m); releaseopt: if (control) { ip6_clearpktopts(&opt, -1); m_freem(control); } return (error); } static void udp6_abort(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_abort)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return (error); } INP_INFO_WLOCK(pcbinfo); error = in_pcballoc(so, pcbinfo); if (error) { INP_INFO_WUNLOCK(pcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; error = udp_newudpcb(inp); if (error) { in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); return (error); } INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); return (0); } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; #ifdef INET else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td->td_ucred); goto out; } #endif } error = in6_pcbbind(inp, nam, td->td_ucred); #ifdef INET out: #endif INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); } static void udp6_close(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_close: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_disconnect)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct sockaddr_in6 *sin6; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); /* * XXXRW: Need to clarify locking of v4/v6 flags. */ INP_WLOCK(inp); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto out; } in6_sin6_2_sin(&sin, sin6); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); goto out; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = EISCONN; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); out: INP_WUNLOCK(inp); return (error); } static void udp6_detach(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); udp_discardcb(up); } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (void)(*pru->pru_disconnect)(so); return (0); } #endif if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto out; } INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); out: INP_WUNLOCK(inp); return (0); } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error = 0; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; struct sockaddr_in6 *sin6 = 0; if (addr == 0) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; /* * XXXRW: We release UDP-layer locks before calling * udp_send() in order to avoid recursion. However, * this does mean there is a short window where inp's * fields are unstable. Could this lead to a * potential race in which the factors causing us to * select the UDPv4 output routine are invalidated? */ INP_WUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[nxt]].pr_usrreqs; /* addr will just be freed in sendit(). */ return ((*pru->pru_send)(so, flags, m, addr, control, td)); } } #endif #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif INP_HASH_WLOCK(pcbinfo); error = udp6_output(inp, m, addr, control, td); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); bad: INP_WUNLOCK(inp); m_freem(m); return (error); } struct pr_usrreqs udp6_usrreqs = { .pru_abort = udp6_abort, .pru_attach = udp6_attach, .pru_bind = udp6_bind, .pru_connect = udp6_connect, .pru_control = in6_control, .pru_detach = udp6_detach, .pru_disconnect = udp6_disconnect, .pru_peeraddr = in6_mapped_peeraddr, .pru_send = udp6_send, .pru_shutdown = udp_shutdown, .pru_sockaddr = in6_mapped_sockaddr, .pru_soreceive = soreceive_dgram, .pru_sosend = sosend_dgram, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = udp6_close }; diff --git a/sys/powerpc/aim/aim_machdep.c b/sys/powerpc/aim/aim_machdep.c index ab09ab6b5ad7..835783f972ec 100644 --- a/sys/powerpc/aim/aim_machdep.c +++ b/sys/powerpc/aim/aim_machdep.c @@ -1,616 +1,627 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __powerpc64__ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __powerpc64__ extern int n_slbs; #endif #ifndef __powerpc64__ struct bat battable[16]; #endif #ifndef __powerpc64__ /* Bits for running on 64-bit systems in 32-bit mode. */ extern void *testppc64, *testppc64size; extern void *restorebridge, *restorebridgesize; extern void *rfid_patch, *rfi_patch1, *rfi_patch2; extern void *trapcode64; extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; #endif extern void *rstcode, *rstcodeend; extern void *trapcode, *trapcodeend; extern void *generictrap, *generictrap64; extern void *slbtrap, *slbtrapend; extern void *alitrap, *aliend; extern void *dsitrap, *dsiend; extern void *decrint, *decrsize; extern void *extint, *extsize; extern void *dblow, *dbend; extern void *imisstrap, *imisssize; extern void *dlmisstrap, *dlmisssize; extern void *dsmisstrap, *dsmisssize; extern void *ap_pcpu; void aim_cpu_init(vm_offset_t toc); void aim_cpu_init(vm_offset_t toc) { size_t trap_offset, trapsize; vm_offset_t trap; register_t msr, scratch; uint8_t *cache_check; int cacheline_warn; #ifndef __powerpc64__ int ppc64; #endif trap_offset = 0; cacheline_warn = 0; + #ifdef __powerpc64__ + /* + * Switch to 64-bit mode, if the bootloader didn't, before we start + * using memory beyond what the bootloader might have set up. + * Guaranteed not to cause an implicit branch since we either (a) + * started with a 32-bit bootloader below 4 GB or (b) were already in + * 64-bit mode, making this a no-op. + */ + mtmsrd(mfmsr() | PSL_SF); + #endif + /* Various very early CPU fix ups */ switch (mfpvr() >> 16) { /* * PowerPC 970 CPUs have a misfeature requested by Apple that * makes them pretend they have a 32-byte cacheline. Turn this * off before we measure the cacheline size. */ case IBM970: case IBM970FX: case IBM970MP: case IBM970GX: scratch = mfspr(SPR_HID5); scratch &= ~HID5_970_DCBZ_SIZE_HI; mtspr(SPR_HID5, scratch); break; #ifdef __powerpc64__ case IBMPOWER7: case IBMPOWER7PLUS: case IBMPOWER8: case IBMPOWER8E: /* XXX: get from ibm,slb-size in device tree */ n_slbs = 32; break; #endif } /* * Initialize the interrupt tables and figure out our cache line * size and whether or not we need the 64-bit bridge code. */ /* * Disable translation in case the vector area hasn't been * mapped (G5). Note that no OFW calls can be made until * translation is re-enabled. */ msr = mfmsr(); mtmsr((msr & ~(PSL_IR | PSL_DR)) | PSL_RI); /* * Measure the cacheline size using dcbz * * Use EXC_PGM as a playground. We are about to overwrite it * anyway, we know it exists, and we know it is cache-aligned. */ cache_check = (void *)EXC_PGM; for (cacheline_size = 0; cacheline_size < 0x100; cacheline_size++) cache_check[cacheline_size] = 0xff; __asm __volatile("dcbz 0,%0":: "r" (cache_check) : "memory"); /* Find the first byte dcbz did not zero to get the cache line size */ for (cacheline_size = 0; cacheline_size < 0x100 && cache_check[cacheline_size] == 0; cacheline_size++); /* Work around psim bug */ if (cacheline_size == 0) { cacheline_warn = 1; cacheline_size = 32; } #ifndef __powerpc64__ /* * Figure out whether we need to use the 64 bit PMAP. This works by * executing an instruction that is only legal on 64-bit PPC (mtmsrd), * and setting ppc64 = 0 if that causes a trap. */ ppc64 = 1; bcopy(&testppc64, (void *)EXC_PGM, (size_t)&testppc64size); __syncicache((void *)EXC_PGM, (size_t)&testppc64size); __asm __volatile("\ mfmsr %0; \ mtsprg2 %1; \ \ mtmsrd %0; \ mfsprg2 %1;" : "=r"(scratch), "=r"(ppc64)); if (ppc64) cpu_features |= PPC_FEATURE_64; /* * Now copy restorebridge into all the handlers, if necessary, * and set up the trap tables. */ if (cpu_features & PPC_FEATURE_64) { /* Patch the two instances of rfi -> rfid */ bcopy(&rfid_patch,&rfi_patch1,4); #ifdef KDB /* rfi_patch2 is at the end of dbleave */ bcopy(&rfid_patch,&rfi_patch2,4); #endif } #else /* powerpc64 */ cpu_features |= PPC_FEATURE_64; #endif trapsize = (size_t)&trapcodeend - (size_t)&trapcode; /* * Copy generic handler into every possible trap. Special cases will get * different ones in a minute. */ for (trap = EXC_RST; trap < EXC_LAST; trap += 0x20) bcopy(&trapcode, (void *)trap, trapsize); #ifndef __powerpc64__ if (cpu_features & PPC_FEATURE_64) { /* * Copy a code snippet to restore 32-bit bridge mode * to the top of every non-generic trap handler */ trap_offset += (size_t)&restorebridgesize; bcopy(&restorebridge, (void *)EXC_RST, trap_offset); bcopy(&restorebridge, (void *)EXC_DSI, trap_offset); bcopy(&restorebridge, (void *)EXC_ALI, trap_offset); bcopy(&restorebridge, (void *)EXC_PGM, trap_offset); bcopy(&restorebridge, (void *)EXC_MCHK, trap_offset); bcopy(&restorebridge, (void *)EXC_TRC, trap_offset); bcopy(&restorebridge, (void *)EXC_BPT, trap_offset); } #endif bcopy(&rstcode, (void *)(EXC_RST + trap_offset), (size_t)&rstcodeend - (size_t)&rstcode); #ifdef KDB bcopy(&dblow, (void *)(EXC_MCHK + trap_offset), (size_t)&dbend - (size_t)&dblow); bcopy(&dblow, (void *)(EXC_PGM + trap_offset), (size_t)&dbend - (size_t)&dblow); bcopy(&dblow, (void *)(EXC_TRC + trap_offset), (size_t)&dbend - (size_t)&dblow); bcopy(&dblow, (void *)(EXC_BPT + trap_offset), (size_t)&dbend - (size_t)&dblow); #endif bcopy(&alitrap, (void *)(EXC_ALI + trap_offset), (size_t)&aliend - (size_t)&alitrap); bcopy(&dsitrap, (void *)(EXC_DSI + trap_offset), (size_t)&dsiend - (size_t)&dsitrap); #ifdef __powerpc64__ /* Set TOC base so that the interrupt code can get at it */ *((void **)TRAP_GENTRAP) = &generictrap; *((register_t *)TRAP_TOCBASE) = toc; bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap); bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap); #else /* Set branch address for trap code */ if (cpu_features & PPC_FEATURE_64) *((void **)TRAP_GENTRAP) = &generictrap64; else *((void **)TRAP_GENTRAP) = &generictrap; *((void **)TRAP_TOCBASE) = _GLOBAL_OFFSET_TABLE_; /* G2-specific TLB miss helper handlers */ bcopy(&imisstrap, (void *)EXC_IMISS, (size_t)&imisssize); bcopy(&dlmisstrap, (void *)EXC_DLMISS, (size_t)&dlmisssize); bcopy(&dsmisstrap, (void *)EXC_DSMISS, (size_t)&dsmisssize); #endif __syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD); /* * Restore MSR */ mtmsr(msr); /* Warn if cachline size was not determined */ if (cacheline_warn == 1) { printf("WARNING: cacheline size undetermined, setting to 32\n"); } /* * Initialise virtual memory. Use BUS_PROBE_GENERIC priority * in case the platform module had a better idea of what we * should do. */ if (cpu_features & PPC_FEATURE_64) pmap_mmu_install(MMU_TYPE_G5, BUS_PROBE_GENERIC); else pmap_mmu_install(MMU_TYPE_OEA, BUS_PROBE_GENERIC); } /* * Shutdown the CPU as much as possible. */ void cpu_halt(void) { OF_exit(); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_SE; return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_SE; return (0); } void kdb_cpu_clear_singlestep(void) { kdb_frame->srr1 &= ~PSL_SE; } void kdb_cpu_set_singlestep(void) { kdb_frame->srr1 |= PSL_SE; } /* * Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { #ifdef __powerpc64__ /* Copy the SLB contents from the current CPU */ memcpy(pcpu->pc_slb, PCPU_GET(slb), sizeof(pcpu->pc_slb)); #endif } #ifndef __powerpc64__ uint64_t va_to_vsid(pmap_t pm, vm_offset_t va) { return ((pm->pm_sr[(uintptr_t)va >> ADDR_SR_SHFT]) & SR_VSID_MASK); } #endif vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t size) { return (pa); } /* From p3-53 of the MPC7450 RISC Microprocessor Family Reference Manual */ void flush_disable_caches(void) { register_t msr; register_t msscr0; register_t cache_reg; volatile uint32_t *memp; uint32_t temp; int i; int x; msr = mfmsr(); powerpc_sync(); mtmsr(msr & ~(PSL_EE | PSL_DR)); msscr0 = mfspr(SPR_MSSCR0); msscr0 &= ~MSSCR0_L2PFE; mtspr(SPR_MSSCR0, msscr0); powerpc_sync(); isync(); __asm__ __volatile__("dssall; sync"); powerpc_sync(); isync(); __asm__ __volatile__("dcbf 0,%0" :: "r"(0)); __asm__ __volatile__("dcbf 0,%0" :: "r"(0)); __asm__ __volatile__("dcbf 0,%0" :: "r"(0)); /* Lock the L1 Data cache. */ mtspr(SPR_LDSTCR, mfspr(SPR_LDSTCR) | 0xFF); powerpc_sync(); isync(); mtspr(SPR_LDSTCR, 0); /* * Perform this in two stages: Flush the cache starting in RAM, then do it * from ROM. */ memp = (volatile uint32_t *)0x00000000; for (i = 0; i < 128 * 1024; i++) { temp = *memp; __asm__ __volatile__("dcbf 0,%0" :: "r"(memp)); memp += 32/sizeof(*memp); } memp = (volatile uint32_t *)0xfff00000; x = 0xfe; for (; x != 0xff;) { mtspr(SPR_LDSTCR, x); for (i = 0; i < 128; i++) { temp = *memp; __asm__ __volatile__("dcbf 0,%0" :: "r"(memp)); memp += 32/sizeof(*memp); } x = ((x << 1) | 1) & 0xff; } mtspr(SPR_LDSTCR, 0); cache_reg = mfspr(SPR_L2CR); if (cache_reg & L2CR_L2E) { cache_reg &= ~(L2CR_L2IO_7450 | L2CR_L2DO_7450); mtspr(SPR_L2CR, cache_reg); powerpc_sync(); mtspr(SPR_L2CR, cache_reg | L2CR_L2HWF); while (mfspr(SPR_L2CR) & L2CR_L2HWF) ; /* Busy wait for cache to flush */ powerpc_sync(); cache_reg &= ~L2CR_L2E; mtspr(SPR_L2CR, cache_reg); powerpc_sync(); mtspr(SPR_L2CR, cache_reg | L2CR_L2I); powerpc_sync(); while (mfspr(SPR_L2CR) & L2CR_L2I) ; /* Busy wait for L2 cache invalidate */ powerpc_sync(); } cache_reg = mfspr(SPR_L3CR); if (cache_reg & L3CR_L3E) { cache_reg &= ~(L3CR_L3IO | L3CR_L3DO); mtspr(SPR_L3CR, cache_reg); powerpc_sync(); mtspr(SPR_L3CR, cache_reg | L3CR_L3HWF); while (mfspr(SPR_L3CR) & L3CR_L3HWF) ; /* Busy wait for cache to flush */ powerpc_sync(); cache_reg &= ~L3CR_L3E; mtspr(SPR_L3CR, cache_reg); powerpc_sync(); mtspr(SPR_L3CR, cache_reg | L3CR_L3I); powerpc_sync(); while (mfspr(SPR_L3CR) & L3CR_L3I) ; /* Busy wait for L3 cache invalidate */ powerpc_sync(); } mtspr(SPR_HID0, mfspr(SPR_HID0) & ~HID0_DCE); powerpc_sync(); isync(); mtmsr(msr); } void cpu_sleep() { static u_quad_t timebase = 0; static register_t sprgs[4]; static register_t srrs[2]; jmp_buf resetjb; struct thread *fputd; struct thread *vectd; register_t hid0; register_t msr; register_t saved_msr; ap_pcpu = pcpup; PCPU_SET(restore, &resetjb); saved_msr = mfmsr(); fputd = PCPU_GET(fputhread); vectd = PCPU_GET(vecthread); if (fputd != NULL) save_fpu(fputd); if (vectd != NULL) save_vec(vectd); if (setjmp(resetjb) == 0) { sprgs[0] = mfspr(SPR_SPRG0); sprgs[1] = mfspr(SPR_SPRG1); sprgs[2] = mfspr(SPR_SPRG2); sprgs[3] = mfspr(SPR_SPRG3); srrs[0] = mfspr(SPR_SRR0); srrs[1] = mfspr(SPR_SRR1); timebase = mftb(); powerpc_sync(); flush_disable_caches(); hid0 = mfspr(SPR_HID0); hid0 = (hid0 & ~(HID0_DOZE | HID0_NAP)) | HID0_SLEEP; powerpc_sync(); isync(); msr = mfmsr() | PSL_POW; mtspr(SPR_HID0, hid0); powerpc_sync(); while (1) mtmsr(msr); } mttb(timebase); PCPU_SET(curthread, curthread); PCPU_SET(curpcb, curthread->td_pcb); pmap_activate(curthread); powerpc_sync(); mtspr(SPR_SPRG0, sprgs[0]); mtspr(SPR_SPRG1, sprgs[1]); mtspr(SPR_SPRG2, sprgs[2]); mtspr(SPR_SPRG3, sprgs[3]); mtspr(SPR_SRR0, srrs[0]); mtspr(SPR_SRR1, srrs[1]); mtmsr(saved_msr); if (fputd == curthread) enable_fpu(curthread); if (vectd == curthread) enable_vec(curthread); powerpc_sync(); } diff --git a/sys/powerpc/aim/locore64.S b/sys/powerpc/aim/locore64.S index fc2fa8de3255..7f9379341db1 100644 --- a/sys/powerpc/aim/locore64.S +++ b/sys/powerpc/aim/locore64.S @@ -1,225 +1,218 @@ /* $FreeBSD$ */ /* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "assym.s" #include #include #include #include #include #ifdef _CALL_ELF .abiversion _CALL_ELF #endif /* Locate the per-CPU data structure */ #define GET_CPUINFO(r) \ mfsprg0 r #define GET_TOCBASE(r) \ li r,TRAP_TOCBASE; /* Magic address for TOC */ \ ld r,0(r) /* Glue for linker script */ .globl kernbase .set kernbase, KERNBASE /* * Globals */ .data .align 3 GLOBAL(__startkernel) .llong begin GLOBAL(__endkernel) .llong end .align 4 #define TMPSTKSZ 16384 /* 16K temporary stack */ GLOBAL(tmpstk) .space TMPSTKSZ TOC_ENTRY(tmpstk) .text .globl btext btext: /* * This symbol is here for the benefit of kvm_mkdb, and is supposed to * mark the start of kernel text. */ .globl kernel_text kernel_text: /* * Startup entry. Note, this must be the first thing in the text * segment! * * Calling convention: * r3: Flattened Device Tree pointer (or zero) * r4: ignored * r5: OF client interface pointer (or zero) * r6: Loader metadata pointer (or zero) */ .text ASENTRY_NOPROF(__start) /* Set up the TOC pointer */ b 0f .align 3 0: nop bl 1f .llong __tocbase + 0x8000 - . 1: mflr %r2 ld %r1,0(%r2) add %r2,%r1,%r2 /* Get load offset */ ld %r31,-0x8000(%r2) /* First TOC entry is TOC base */ subf %r31,%r31,%r2 /* Subtract from real TOC base to get base */ /* Set up the stack pointer */ ld %r1,TOC_REF(tmpstk)(%r2) addi %r1,%r1,TMPSTKSZ-96 add %r1,%r1,%r31 /* Relocate kernel */ std %r3,48(%r1) std %r4,56(%r1) std %r5,64(%r1) std %r6,72(%r1) bl 1f .llong _DYNAMIC-. 1: mflr %r3 ld %r4,0(%r3) add %r3,%r4,%r3 mr %r4,%r31 bl elf_reloc_self nop ld %r3,48(%r1) ld %r4,56(%r1) ld %r5,64(%r1) ld %r6,72(%r1) - /* Switch to 64-bit mode */ - mfmsr %r9 - li %r8,1 - insrdi %r9,%r8,1,0 - mtmsrd %r9 - isync - /* Begin CPU init */ mr %r4,%r2 /* Replace ignored r4 with tocbase for trap handlers */ bl powerpc_init nop /* Set stack pointer to new value and branch to mi_startup */ mr %r1, %r3 li %r3, 0 std %r3, 0(%r1) bl mi_startup nop /* If this returns (it won't), go back to firmware */ b OF_exit nop /* * int setfault() * * Similar to setjmp to setup for handling faults on accesses to user memory. * Any routine using this may only call bcopy, either the form below, * or the (currently used) C code optimized, so it doesn't use any non-volatile * registers. */ ASENTRY_NOPROF(setfault) mflr 0 mfcr 12 mfsprg 4,0 ld 4,TD_PCB(13) /* curthread = r13 */ std 3,PCB_ONFAULT(4) std 0,0(3) std 1,8(3) std 2,16(3) std %r12,24(%r3) /* Save the non-volatile GP regs. */ std %r13,24+1*8(%r3) std %r14,24+2*8(%r3) std %r15,24+3*8(%r3) std %r16,24+4*8(%r3) std %r17,24+5*8(%r3) std %r18,24+6*8(%r3) std %r19,24+7*8(%r3) std %r20,24+8*8(%r3) std %r21,24+9*8(%r3) std %r22,24+10*8(%r3) std %r23,24+11*8(%r3) std %r24,24+12*8(%r3) std %r25,24+13*8(%r3) std %r26,24+14*8(%r3) std %r27,24+15*8(%r3) std %r28,24+16*8(%r3) std %r29,24+17*8(%r3) std %r30,24+18*8(%r3) std %r31,24+19*8(%r3) xor 3,3,3 blr #include diff --git a/sys/powerpc/aim/mp_cpudep.c b/sys/powerpc/aim/mp_cpudep.c index 59a24e22e62d..cedf53bdaae2 100644 --- a/sys/powerpc/aim/mp_cpudep.c +++ b/sys/powerpc/aim/mp_cpudep.c @@ -1,392 +1,392 @@ /*- * Copyright (c) 2008 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void *ap_pcpu; static register_t bsp_state[8] __aligned(8); static void cpudep_save_config(void *dummy); SYSINIT(cpu_save_config, SI_SUB_CPU, SI_ORDER_ANY, cpudep_save_config, NULL); void cpudep_ap_early_bootstrap(void) { #ifndef __powerpc64__ register_t reg; #endif __asm __volatile("mtsprg 0, %0" :: "r"(ap_pcpu)); powerpc_sync(); switch (mfpvr() >> 16) { case IBM970: case IBM970FX: case IBM970MP: /* Restore HID4 and HID5, which are necessary for the MMU */ #ifdef __powerpc64__ mtspr(SPR_HID4, bsp_state[2]); powerpc_sync(); isync(); mtspr(SPR_HID5, bsp_state[3]); powerpc_sync(); isync(); #else __asm __volatile("ld %0, 16(%2); sync; isync; \ mtspr %1, %0; sync; isync;" - : "=r"(reg) : "K"(SPR_HID4), "r"(bsp_state)); + : "=r"(reg) : "K"(SPR_HID4), "b"(bsp_state)); __asm __volatile("ld %0, 24(%2); sync; isync; \ mtspr %1, %0; sync; isync;" - : "=r"(reg) : "K"(SPR_HID5), "r"(bsp_state)); + : "=r"(reg) : "K"(SPR_HID5), "b"(bsp_state)); #endif powerpc_sync(); break; } } uintptr_t cpudep_ap_bootstrap(void) { register_t msr, sp; msr = PSL_KERNSET & ~PSL_EE; mtmsr(msr); pcpup->pc_curthread = pcpup->pc_idlethread; #ifdef __powerpc64__ __asm __volatile("mr 13,%0" :: "r"(pcpup->pc_curthread)); #else __asm __volatile("mr 2,%0" :: "r"(pcpup->pc_curthread)); #endif pcpup->pc_curpcb = pcpup->pc_curthread->td_pcb; sp = pcpup->pc_curpcb->pcb_sp; return (sp); } static register_t mpc74xx_l2_enable(register_t l2cr_config) { register_t ccr, bit; uint16_t vers; vers = mfpvr() >> 16; switch (vers) { case MPC7400: case MPC7410: bit = L2CR_L2IP; break; default: bit = L2CR_L2I; break; } ccr = mfspr(SPR_L2CR); if (ccr & L2CR_L2E) return (ccr); /* Configure L2 cache. */ ccr = l2cr_config & ~L2CR_L2E; mtspr(SPR_L2CR, ccr | L2CR_L2I); do { ccr = mfspr(SPR_L2CR); } while (ccr & bit); powerpc_sync(); mtspr(SPR_L2CR, l2cr_config); powerpc_sync(); return (l2cr_config); } static register_t mpc745x_l3_enable(register_t l3cr_config) { register_t ccr; ccr = mfspr(SPR_L3CR); if (ccr & L3CR_L3E) return (ccr); /* Configure L3 cache. */ ccr = l3cr_config & ~(L3CR_L3E | L3CR_L3I | L3CR_L3PE | L3CR_L3CLKEN); mtspr(SPR_L3CR, ccr); ccr |= 0x4000000; /* Magic, but documented. */ mtspr(SPR_L3CR, ccr); ccr |= L3CR_L3CLKEN; mtspr(SPR_L3CR, ccr); mtspr(SPR_L3CR, ccr | L3CR_L3I); while (mfspr(SPR_L3CR) & L3CR_L3I) ; mtspr(SPR_L3CR, ccr & ~L3CR_L3CLKEN); powerpc_sync(); DELAY(100); mtspr(SPR_L3CR, ccr); powerpc_sync(); DELAY(100); ccr |= L3CR_L3E; mtspr(SPR_L3CR, ccr); powerpc_sync(); return(ccr); } static register_t mpc74xx_l1d_enable(void) { register_t hid; hid = mfspr(SPR_HID0); if (hid & HID0_DCE) return (hid); /* Enable L1 D-cache */ hid |= HID0_DCE; powerpc_sync(); mtspr(SPR_HID0, hid | HID0_DCFI); powerpc_sync(); return (hid); } static register_t mpc74xx_l1i_enable(void) { register_t hid; hid = mfspr(SPR_HID0); if (hid & HID0_ICE) return (hid); /* Enable L1 I-cache */ hid |= HID0_ICE; isync(); mtspr(SPR_HID0, hid | HID0_ICFI); isync(); return (hid); } static void cpudep_save_config(void *dummy) { uint16_t vers; vers = mfpvr() >> 16; switch(vers) { case IBM970: case IBM970FX: case IBM970MP: #ifdef __powerpc64__ bsp_state[0] = mfspr(SPR_HID0); bsp_state[1] = mfspr(SPR_HID1); bsp_state[2] = mfspr(SPR_HID4); bsp_state[3] = mfspr(SPR_HID5); #else __asm __volatile ("mfspr %0,%2; mr %1,%0; srdi %0,%0,32" : "=r" (bsp_state[0]),"=r" (bsp_state[1]) : "K" (SPR_HID0)); __asm __volatile ("mfspr %0,%2; mr %1,%0; srdi %0,%0,32" : "=r" (bsp_state[2]),"=r" (bsp_state[3]) : "K" (SPR_HID1)); __asm __volatile ("mfspr %0,%2; mr %1,%0; srdi %0,%0,32" : "=r" (bsp_state[4]),"=r" (bsp_state[5]) : "K" (SPR_HID4)); __asm __volatile ("mfspr %0,%2; mr %1,%0; srdi %0,%0,32" : "=r" (bsp_state[6]),"=r" (bsp_state[7]) : "K" (SPR_HID5)); #endif powerpc_sync(); break; case IBMCELLBE: #ifdef NOTYET /* Causes problems if in instruction stream on 970 */ if (mfmsr() & PSL_HV) { bsp_state[0] = mfspr(SPR_HID0); bsp_state[1] = mfspr(SPR_HID1); bsp_state[2] = mfspr(SPR_HID4); bsp_state[3] = mfspr(SPR_HID6); bsp_state[4] = mfspr(SPR_CELL_TSCR); } #endif bsp_state[5] = mfspr(SPR_CELL_TSRL); break; case MPC7450: case MPC7455: case MPC7457: /* Only MPC745x CPUs have an L3 cache. */ bsp_state[3] = mfspr(SPR_L3CR); /* Fallthrough */ case MPC7400: case MPC7410: case MPC7447A: case MPC7448: bsp_state[2] = mfspr(SPR_L2CR); bsp_state[1] = mfspr(SPR_HID1); bsp_state[0] = mfspr(SPR_HID0); break; } } void cpudep_ap_setup() { register_t reg; uint16_t vers; vers = mfpvr() >> 16; /* The following is needed for restoring from sleep. */ #ifdef __powerpc64__ /* Writing to the time base register is hypervisor-privileged */ if (mfmsr() & PSL_HV) mttb(0); #else mttb(0); #endif switch(vers) { case IBM970: case IBM970FX: case IBM970MP: /* Set HIOR to 0 */ __asm __volatile("mtspr 311,%0" :: "r"(0)); powerpc_sync(); /* * The 970 has strange rules about how to update HID registers. * See Table 2-3, 970MP manual * * Note: HID4 and HID5 restored already in * cpudep_ap_early_bootstrap() */ __asm __volatile("mtasr %0; sync" :: "r"(0)); #ifdef __powerpc64__ __asm __volatile(" \ sync; isync; \ mtspr %1, %0; \ mfspr %0, %1; mfspr %0, %1; mfspr %0, %1; \ mfspr %0, %1; mfspr %0, %1; mfspr %0, %1; \ sync; isync" :: "r"(bsp_state[0]), "K"(SPR_HID0)); __asm __volatile("sync; isync; \ mtspr %1, %0; mtspr %1, %0; sync; isync" :: "r"(bsp_state[1]), "K"(SPR_HID1)); #else __asm __volatile(" \ ld %0,0(%2); \ sync; isync; \ mtspr %1, %0; \ mfspr %0, %1; mfspr %0, %1; mfspr %0, %1; \ mfspr %0, %1; mfspr %0, %1; mfspr %0, %1; \ sync; isync" - : "=r"(reg) : "K"(SPR_HID0), "r"(bsp_state)); + : "=r"(reg) : "K"(SPR_HID0), "b"(bsp_state)); __asm __volatile("ld %0, 8(%2); sync; isync; \ mtspr %1, %0; mtspr %1, %0; sync; isync" - : "=r"(reg) : "K"(SPR_HID1), "r"(bsp_state)); + : "=r"(reg) : "K"(SPR_HID1), "b"(bsp_state)); #endif powerpc_sync(); break; case IBMCELLBE: #ifdef NOTYET /* Causes problems if in instruction stream on 970 */ if (mfmsr() & PSL_HV) { mtspr(SPR_HID0, bsp_state[0]); mtspr(SPR_HID1, bsp_state[1]); mtspr(SPR_HID4, bsp_state[2]); mtspr(SPR_HID6, bsp_state[3]); mtspr(SPR_CELL_TSCR, bsp_state[4]); } #endif mtspr(SPR_CELL_TSRL, bsp_state[5]); break; case MPC7400: case MPC7410: case MPC7447A: case MPC7448: case MPC7450: case MPC7455: case MPC7457: /* XXX: Program the CPU ID into PIR */ __asm __volatile("mtspr 1023,%0" :: "r"(PCPU_GET(cpuid))); powerpc_sync(); isync(); mtspr(SPR_HID0, bsp_state[0]); isync(); mtspr(SPR_HID1, bsp_state[1]); isync(); /* Now enable the L3 cache. */ switch (vers) { case MPC7450: case MPC7455: case MPC7457: /* Only MPC745x CPUs have an L3 cache. */ reg = mpc745x_l3_enable(bsp_state[3]); default: break; } reg = mpc74xx_l2_enable(bsp_state[2]); reg = mpc74xx_l1d_enable(); reg = mpc74xx_l1i_enable(); break; default: #ifdef __powerpc64__ if (!(mfmsr() & PSL_HV)) /* Rely on HV to have set things up */ break; #endif printf("WARNING: Unknown CPU type. Cache performace may be " "suboptimal.\n"); break; } } diff --git a/sys/powerpc/booke/booke_machdep.c b/sys/powerpc/booke/booke_machdep.c index f102677202bd..a019597e842c 100644 --- a/sys/powerpc/booke/booke_machdep.c +++ b/sys/powerpc/booke/booke_machdep.c @@ -1,409 +1,411 @@ /*- * Copyright (C) 2006-2012 Semihalf * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(MPC85XX) || defined(QORIQ_DPAA) #include #endif #ifdef DDB #include #endif #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif extern unsigned char kernel_text[]; extern unsigned char _etext[]; extern unsigned char _edata[]; extern unsigned char __bss_start[]; extern unsigned char __sbss_start[]; extern unsigned char __sbss_end[]; extern unsigned char _end[]; extern vm_offset_t __endkernel; /* * Bootinfo is passed to us by legacy loaders. Save the address of the * structure to handle backward compatibility. */ uint32_t *bootinfo; void print_kernel_section_addr(void); void print_kenv(void); -uintptr_t booke_init(uint32_t, uint32_t); +uintptr_t booke_init(u_long, u_long); void ivor_setup(void); extern void *interrupt_vector_base; extern void *int_critical_input; extern void *int_machine_check; extern void *int_data_storage; extern void *int_instr_storage; extern void *int_external_input; extern void *int_alignment; extern void *int_fpu; extern void *int_program; extern void *int_syscall; extern void *int_decrementer; extern void *int_fixed_interval_timer; extern void *int_watchdog; extern void *int_data_tlb_error; extern void *int_inst_tlb_error; extern void *int_debug; extern void *int_vec; extern void *int_vecast; #ifdef HWPMC_HOOKS extern void *int_performance_counter; #endif #define SET_TRAP(ivor, handler) \ KASSERT(((uintptr_t)(&handler) & ~0xffffUL) == \ ((uintptr_t)(&interrupt_vector_base) & ~0xffffUL), \ ("Handler " #handler " too far from interrupt vector base")); \ mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL); uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp); void booke_cpu_init(void); void booke_cpu_init(void) { + cpu_features |= PPC_FEATURE_BOOKE; + pmap_mmu_install(MMU_TYPE_BOOKE, BUS_PROBE_GENERIC); } void ivor_setup(void) { mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & 0xffff0000); SET_TRAP(SPR_IVOR0, int_critical_input); SET_TRAP(SPR_IVOR1, int_machine_check); SET_TRAP(SPR_IVOR2, int_data_storage); SET_TRAP(SPR_IVOR3, int_instr_storage); SET_TRAP(SPR_IVOR4, int_external_input); SET_TRAP(SPR_IVOR5, int_alignment); SET_TRAP(SPR_IVOR6, int_program); SET_TRAP(SPR_IVOR8, int_syscall); SET_TRAP(SPR_IVOR10, int_decrementer); SET_TRAP(SPR_IVOR11, int_fixed_interval_timer); SET_TRAP(SPR_IVOR12, int_watchdog); SET_TRAP(SPR_IVOR13, int_data_tlb_error); SET_TRAP(SPR_IVOR14, int_inst_tlb_error); SET_TRAP(SPR_IVOR15, int_debug); #ifdef HWPMC_HOOKS SET_TRAP(SPR_IVOR35, int_performance_counter); #endif switch ((mfpvr() >> 16) & 0xffff) { case FSL_E6500: SET_TRAP(SPR_IVOR32, int_vec); SET_TRAP(SPR_IVOR33, int_vecast); /* FALLTHROUGH */ case FSL_E500mc: case FSL_E5500: SET_TRAP(SPR_IVOR7, int_fpu); } } static int booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp) { void *ptr; if (arg1 % 8 != 0) return (-1); ptr = (void *)pmap_early_io_map(arg1, PAGE_SIZE); if (fdt_check_header(ptr) != 0) return (-1); *dtbp = (vm_offset_t)ptr; return (0); } uintptr_t -booke_init(uint32_t arg1, uint32_t arg2) +booke_init(u_long arg1, u_long arg2) { uintptr_t ret; void *mdp; vm_offset_t dtbp, end; end = (uintptr_t)_end; dtbp = (vm_offset_t)NULL; /* Set up TLB initially */ bootinfo = NULL; bzero(__sbss_start, __sbss_end - __sbss_start); bzero(__bss_start, _end - __bss_start); tlb1_init(); /* * Handle the various ways we can get loaded and started: * - FreeBSD's loader passes the pointer to the metadata * in arg1, with arg2 undefined. arg1 has a value that's * relative to the kernel's link address (i.e. larger * than 0xc0000000). * - Juniper's loader passes the metadata pointer in arg2 * and sets arg1 to zero. This is to signal that the * loader maps the kernel and starts it at its link * address (unlike the FreeBSD loader). * - U-Boot passes the standard argc and argv parameters * in arg1 and arg2 (resp). arg1 is between 1 and some * relatively small number, such as 64K. arg2 is the * physical address of the argv vector. * - ePAPR loaders pass an FDT blob in r3 (arg1) and the magic hex * string 0x45504150 ('EPAP') in r6 (which has been lost by now). * r4 (arg2) is supposed to be set to zero, but is not always. */ if (arg1 == 0) /* Juniper loader */ mdp = (void *)arg2; else if (booke_check_for_fdt(arg1, &dtbp) == 0) { /* ePAPR */ end = roundup(end, 8); memmove((void *)end, (void *)dtbp, fdt_totalsize((void *)dtbp)); dtbp = end; end += fdt_totalsize((void *)dtbp); __endkernel = end; mdp = NULL; } else if (arg1 > (uintptr_t)kernel_text) /* FreeBSD loader */ mdp = (void *)arg1; else /* U-Boot */ mdp = NULL; - ret = powerpc_init(dtbp, 0, 0, mdp); - /* Default to 32 byte cache line size. */ switch ((mfpvr()) >> 16) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: cacheline_size = 64; break; } + ret = powerpc_init(dtbp, 0, 0, mdp); + /* Enable caches */ booke_enable_l1_cache(); booke_enable_l2_cache(); booke_enable_bpred(); return (ret); } #define RES_GRANULE 32 extern uint32_t tlb0_miss_locks[]; /* Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { pcpu->pc_tid_next = TID_MIN; #ifdef SMP uint32_t *ptr; int words_per_gran = RES_GRANULE / sizeof(uint32_t); ptr = &tlb0_miss_locks[cpuid * words_per_gran]; pcpu->pc_booke_tlb_lock = ptr; *ptr = TLB_UNLOCKED; *(ptr + 1) = 0; /* recurse counter */ #endif } /* Shutdown the CPU as much as possible. */ void cpu_halt(void) { mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE)); while (1) ; } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_DE; tf->cpu.booke.dbcr0 |= (DBCR0_IDM | DBCR0_IC); return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_DE; tf->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); return (0); } void kdb_cpu_clear_singlestep(void) { register_t r; r = mfspr(SPR_DBCR0); mtspr(SPR_DBCR0, r & ~DBCR0_IC); kdb_frame->srr1 &= ~PSL_DE; } void kdb_cpu_set_singlestep(void) { register_t r; r = mfspr(SPR_DBCR0); mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM); kdb_frame->srr1 |= PSL_DE; } diff --git a/sys/powerpc/include/cpu.h b/sys/powerpc/include/cpu.h index ba45eb0372e4..fffb2b4267ae 100644 --- a/sys/powerpc/include/cpu.h +++ b/sys/powerpc/include/cpu.h @@ -1,102 +1,115 @@ /*- * Copyright (C) 1995-1997 Wolfgang Solfrank. * Copyright (C) 1995-1997 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: cpu.h,v 1.11 2000/05/26 21:19:53 thorpej Exp $ * $FreeBSD$ */ #ifndef _MACHINE_CPU_H_ #define _MACHINE_CPU_H_ #include #include #include /* * CPU Feature Attributes * * These are defined in the PowerPC ELF ABI for the AT_HWCAP vector, * and are exported to userland via the machdep.cpu_features * sysctl. */ extern int cpu_features; +extern int cpu_features2; #define PPC_FEATURE_32 0x80000000 /* Always true */ #define PPC_FEATURE_64 0x40000000 /* Defined on a 64-bit CPU */ #define PPC_FEATURE_HAS_ALTIVEC 0x10000000 #define PPC_FEATURE_HAS_FPU 0x08000000 #define PPC_FEATURE_HAS_MMU 0x04000000 -#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 -#define PPC_FEATURE_HAS_VSX 0x00000080 +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 +#define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 +#define PPC_FEATURE_HAS_DFP 0x00000400 +#define PPC_FEATURE_ARCH_2_06 0x00000100 +#define PPC_FEATURE_HAS_VSX 0x00000080 + +#define PPC_FEATURE2_ARCH_2_07 0x80000000 +#define PPC_FEATURE2_HAS_HTM 0x40000000 +#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000 #define PPC_FEATURE_BITMASK \ "\20" \ "\040PPC32\037PPC64\035ALTIVEC\034FPU\033MMU\031UNIFIEDCACHE" \ - "\010VSX" + "\020BOOKE\017SMT\015ARCH205\013DFP\011ARCH206\010VSX" +#define PPC_FEATURE2_BITMASK \ + "\20" \ + "\040ARCH207\037HTM\032VCRYPTO" #define TRAPF_USERMODE(frame) (((frame)->srr1 & PSL_PR) != 0) #define TRAPF_PC(frame) ((frame)->srr0) /* * CTL_MACHDEP definitions. */ #define CPU_CACHELINE 1 static __inline u_int64_t get_cyclecount(void) { u_int32_t _upper, _lower; u_int64_t _time; __asm __volatile( "mftb %0\n" "mftbu %1" : "=r" (_lower), "=r" (_upper)); _time = (u_int64_t)_upper; _time = (_time << 32) + _lower; return (_time); } #define cpu_getstack(td) ((td)->td_frame->fixreg[1]) #define cpu_spinwait() __asm __volatile("or 27,27,27") /* yield */ extern char btext[]; extern char etext[]; void cpu_halt(void); void cpu_reset(void); void cpu_sleep(void); void flush_disable_caches(void); void fork_trampoline(void); void swi_vm(void *); #endif /* _MACHINE_CPU_H_ */ diff --git a/sys/powerpc/include/intr_machdep.h b/sys/powerpc/include/intr_machdep.h index c2529d55b484..32a94be0220d 100644 --- a/sys/powerpc/include/intr_machdep.h +++ b/sys/powerpc/include/intr_machdep.h @@ -1,61 +1,61 @@ /*- * Copyright (C) 2002 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_INTR_MACHDEP_H_ #define _MACHINE_INTR_MACHDEP_H_ #define INTR_VECTORS 256 -#define MAX_PICS 5 +#define MAX_PICS 16 #define MAP_IRQ(node, pin) powerpc_get_irq(node, pin) /* * Default base address for MSI messages on PowerPC */ #define MSI_INTEL_ADDR_BASE 0xfee00000 extern device_t root_pic; struct trapframe; driver_filter_t powerpc_ipi_handler; void intrcnt_add(const char *name, u_long **countp); void powerpc_register_pic(device_t, uint32_t, u_int, u_int, u_int); u_int powerpc_get_irq(uint32_t, u_int); void powerpc_dispatch_intr(u_int, struct trapframe *); int powerpc_enable_intr(void); int powerpc_setup_intr(const char *, u_int, driver_filter_t, driver_intr_t, void *, enum intr_type, void **); int powerpc_teardown_intr(void *); int powerpc_bind_intr(u_int irq, u_char cpu); int powerpc_config_intr(int, enum intr_trigger, enum intr_polarity); int powerpc_fw_config_intr(int irq, int sense_code); #endif /* _MACHINE_INTR_MACHDEP_H_ */ diff --git a/sys/powerpc/mpc85xx/lbc.c b/sys/powerpc/mpc85xx/lbc.c index 62b56c35acfc..8bb92b289963 100644 --- a/sys/powerpc/mpc85xx/lbc.c +++ b/sys/powerpc/mpc85xx/lbc.c @@ -1,825 +1,825 @@ /*- * Copyright (c) 2006-2008, Juniper Networks, Inc. * Copyright (c) 2008 Semihalf, Rafal Czubak * Copyright (c) 2009 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Semihalf * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ofw_bus_if.h" #include "lbc.h" #ifdef DEBUG #define debugf(fmt, args...) do { printf("%s(): ", __func__); \ printf(fmt,##args); } while (0) #else #define debugf(fmt, args...) #endif static MALLOC_DEFINE(M_LBC, "localbus", "localbus devices information"); static int lbc_probe(device_t); static int lbc_attach(device_t); static int lbc_shutdown(device_t); static struct resource *lbc_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); static int lbc_print_child(device_t, device_t); static int lbc_release_resource(device_t, device_t, int, int, struct resource *); static const struct ofw_bus_devinfo *lbc_get_devinfo(device_t, device_t); /* * Bus interface definition */ static device_method_t lbc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, lbc_probe), DEVMETHOD(device_attach, lbc_attach), DEVMETHOD(device_shutdown, lbc_shutdown), /* Bus interface */ DEVMETHOD(bus_print_child, lbc_print_child), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, NULL), DEVMETHOD(bus_alloc_resource, lbc_alloc_resource), DEVMETHOD(bus_release_resource, lbc_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), /* OFW bus interface */ DEVMETHOD(ofw_bus_get_devinfo, lbc_get_devinfo), DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), { 0, 0 } }; static driver_t lbc_driver = { "lbc", lbc_methods, sizeof(struct lbc_softc) }; devclass_t lbc_devclass; DRIVER_MODULE(lbc, ofwbus, lbc_driver, lbc_devclass, 0, 0); /* * Calculate address mask used by OR(n) registers. Use memory region size to * determine mask value. The size must be a power of two and within the range * of 32KB - 4GB. Otherwise error code is returned. Value representing * 4GB size can be passed as 0xffffffff. */ static uint32_t lbc_address_mask(uint32_t size) { int n = 15; - if (size == ~0UL) + if (size == ~0) return (0); while (n < 32) { - if (size == (1UL << n)) + if (size == (1U << n)) break; n++; } if (n == 32) return (EINVAL); return (0xffff8000 << (n - 15)); } static void lbc_banks_unmap(struct lbc_softc *sc) { int r; r = 0; while (r < LBC_DEV_MAX) { if (sc->sc_range[r].size == 0) return; pmap_unmapdev(sc->sc_range[r].kva, sc->sc_range[r].size); law_disable(OCP85XX_TGTIF_LBC, sc->sc_range[r].addr, sc->sc_range[r].size); r++; } } static int lbc_banks_map(struct lbc_softc *sc) { vm_paddr_t end, start; vm_size_t size; u_int i, r, ranges, s; int error; bzero(sc->sc_range, sizeof(sc->sc_range)); /* * Determine number of discontiguous address ranges to program. */ ranges = 0; for (i = 0; i < LBC_DEV_MAX; i++) { size = sc->sc_banks[i].size; if (size == 0) continue; start = sc->sc_banks[i].addr; for (r = 0; r < ranges; r++) { /* Avoid wrap-around bugs. */ end = sc->sc_range[r].addr - 1 + sc->sc_range[r].size; if (start > 0 && end == start - 1) { sc->sc_range[r].size += size; break; } /* Avoid wrap-around bugs. */ end = start - 1 + size; if (sc->sc_range[r].addr > 0 && end == sc->sc_range[r].addr - 1) { sc->sc_range[r].addr = start; sc->sc_range[r].size += size; break; } } if (r == ranges) { /* New range; add using insertion sort */ r = 0; while (r < ranges && sc->sc_range[r].addr < start) r++; for (s = ranges; s > r; s--) sc->sc_range[s] = sc->sc_range[s-1]; sc->sc_range[r].addr = start; sc->sc_range[r].size = size; ranges++; } } /* * Ranges are sorted so quickly go over the list to merge ranges * that grew toward each other while building the ranges. */ r = 0; while (r < ranges - 1) { end = sc->sc_range[r].addr + sc->sc_range[r].size; if (end != sc->sc_range[r+1].addr) { r++; continue; } sc->sc_range[r].size += sc->sc_range[r+1].size; for (s = r + 1; s < ranges - 1; s++) sc->sc_range[s] = sc->sc_range[s+1]; bzero(&sc->sc_range[s], sizeof(sc->sc_range[s])); ranges--; } /* * Configure LAW for the LBC ranges and map the physical memory * range into KVA. */ for (r = 0; r < ranges; r++) { start = sc->sc_range[r].addr; size = sc->sc_range[r].size; error = law_enable(OCP85XX_TGTIF_LBC, start, size); if (error) return (error); sc->sc_range[r].kva = (vm_offset_t)pmap_mapdev(start, size); } /* XXX: need something better here? */ if (ranges == 0) return (EINVAL); /* Assign KVA to banks based on the enclosing range. */ for (i = 0; i < LBC_DEV_MAX; i++) { size = sc->sc_banks[i].size; if (size == 0) continue; start = sc->sc_banks[i].addr; for (r = 0; r < ranges; r++) { end = sc->sc_range[r].addr - 1 + sc->sc_range[r].size; if (start >= sc->sc_range[r].addr && start - 1 + size <= end) break; } if (r < ranges) { sc->sc_banks[i].kva = sc->sc_range[r].kva + (start - sc->sc_range[r].addr); } } return (0); } static int lbc_banks_enable(struct lbc_softc *sc) { - u_long size; + uint32_t size; uint32_t regval; int error, i; for (i = 0; i < LBC_DEV_MAX; i++) { size = sc->sc_banks[i].size; if (size == 0) continue; /* * Compute and program BR value. */ regval = sc->sc_banks[i].addr; switch (sc->sc_banks[i].width) { case 8: regval |= (1 << 11); break; case 16: regval |= (2 << 11); break; case 32: regval |= (3 << 11); break; default: error = EINVAL; goto fail; } regval |= (sc->sc_banks[i].decc << 9); regval |= (sc->sc_banks[i].wp << 8); regval |= (sc->sc_banks[i].msel << 5); regval |= (sc->sc_banks[i].atom << 2); regval |= 1; bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_BR(i), regval); /* * Compute and program OR value. */ regval = lbc_address_mask(size); switch (sc->sc_banks[i].msel) { case LBCRES_MSEL_GPCM: /* TODO Add flag support for option registers */ regval |= 0x0ff7; break; case LBCRES_MSEL_FCM: /* TODO Add flag support for options register */ regval |= 0x0796; break; case LBCRES_MSEL_UPMA: case LBCRES_MSEL_UPMB: case LBCRES_MSEL_UPMC: printf("UPM mode not supported yet!"); error = ENOSYS; goto fail; } bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_OR(i), regval); } return (0); fail: lbc_banks_unmap(sc); return (error); } static void fdt_lbc_fixup(phandle_t node, struct lbc_softc *sc, struct lbc_devinfo *di) { pcell_t width; int bank; if (OF_getprop(node, "bank-width", (void *)&width, sizeof(width)) <= 0) return; bank = di->di_bank; if (sc->sc_banks[bank].size == 0) return; /* Express width in bits. */ sc->sc_banks[bank].width = width * 8; } static int fdt_lbc_reg_decode(phandle_t node, struct lbc_softc *sc, struct lbc_devinfo *di) { u_long start, end, count; pcell_t *reg, *regptr; pcell_t addr_cells, size_cells; int tuple_size, tuples; int i, rv, bank; if (fdt_addrsize_cells(OF_parent(node), &addr_cells, &size_cells) != 0) return (ENXIO); tuple_size = sizeof(pcell_t) * (addr_cells + size_cells); tuples = OF_getprop_alloc(node, "reg", tuple_size, (void **)®); debugf("addr_cells = %d, size_cells = %d\n", addr_cells, size_cells); debugf("tuples = %d, tuple size = %d\n", tuples, tuple_size); if (tuples <= 0) /* No 'reg' property in this node. */ return (0); regptr = reg; for (i = 0; i < tuples; i++) { bank = fdt_data_get((void *)reg, 1); di->di_bank = bank; reg += 1; /* Get address/size. */ rv = fdt_data_to_res(reg, addr_cells - 1, size_cells, &start, &count); if (rv != 0) { resource_list_free(&di->di_res); goto out; } reg += addr_cells - 1 + size_cells; /* Calculate address range relative to VA base. */ start = sc->sc_banks[bank].kva + start; end = start + count - 1; debugf("reg addr bank = %d, start = %lx, end = %lx, " "count = %lx\n", bank, start, end, count); /* Use bank (CS) cell as rid. */ resource_list_add(&di->di_res, SYS_RES_MEMORY, bank, start, end, count); } rv = 0; out: free(regptr, M_OFWPROP); return (rv); } static void lbc_intr(void *arg) { struct lbc_softc *sc = arg; uint32_t ltesr; ltesr = bus_space_read_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTESR); sc->sc_ltesr = ltesr; bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTESR, ltesr); wakeup(sc->sc_dev); } static int lbc_probe(device_t dev) { if (!(ofw_bus_is_compatible(dev, "fsl,lbc") || ofw_bus_is_compatible(dev, "fsl,elbc"))) return (ENXIO); device_set_desc(dev, "Freescale Local Bus Controller"); return (BUS_PROBE_DEFAULT); } static int lbc_attach(device_t dev) { struct lbc_softc *sc; struct lbc_devinfo *di; struct rman *rm; u_long offset, start, size; device_t cdev; phandle_t node, child; pcell_t *ranges, *rangesptr; int tuple_size, tuples; int par_addr_cells; int bank, error, i; sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_mrid = 0; sc->sc_mres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_mrid, RF_ACTIVE); if (sc->sc_mres == NULL) return (ENXIO); sc->sc_bst = rman_get_bustag(sc->sc_mres); sc->sc_bsh = rman_get_bushandle(sc->sc_mres); for (bank = 0; bank < LBC_DEV_MAX; bank++) { bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_BR(bank), 0); bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_OR(bank), 0); } /* * Initialize configuration register: * - enable Local Bus * - set data buffer control signal function * - disable parity byte select * - set ECC parity type * - set bus monitor timing and timer prescale */ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LBCR, 0); /* * Initialize clock ratio register: * - disable PLL bypass mode * - configure LCLK delay cycles for the assertion of LALE * - set system clock divider */ bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LCRR, 0x00030008); bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTEDR, 0); bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTESR, ~0); bus_space_write_4(sc->sc_bst, sc->sc_bsh, LBC85XX_LTEIR, 0x64080001); sc->sc_irid = 0; sc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irid, RF_ACTIVE | RF_SHAREABLE); if (sc->sc_ires != NULL) { error = bus_setup_intr(dev, sc->sc_ires, INTR_TYPE_MISC | INTR_MPSAFE, NULL, lbc_intr, sc, &sc->sc_icookie); if (error) { device_printf(dev, "could not activate interrupt\n"); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irid, sc->sc_ires); sc->sc_ires = NULL; } } sc->sc_ltesr = ~0; rangesptr = NULL; rm = &sc->sc_rman; rm->rm_type = RMAN_ARRAY; rm->rm_descr = "Local Bus Space"; rm->rm_start = 0UL; rm->rm_end = ~0UL; error = rman_init(rm); if (error) goto fail; error = rman_manage_region(rm, rm->rm_start, rm->rm_end); if (error) { rman_fini(rm); goto fail; } /* * Process 'ranges' property. */ node = ofw_bus_get_node(dev); if ((fdt_addrsize_cells(node, &sc->sc_addr_cells, &sc->sc_size_cells)) != 0) { error = ENXIO; goto fail; } par_addr_cells = fdt_parent_addr_cells(node); if (par_addr_cells > 2) { device_printf(dev, "unsupported parent #addr-cells\n"); error = ERANGE; goto fail; } tuple_size = sizeof(pcell_t) * (sc->sc_addr_cells + par_addr_cells + sc->sc_size_cells); tuples = OF_getprop_alloc(node, "ranges", tuple_size, (void **)&ranges); if (tuples < 0) { device_printf(dev, "could not retrieve 'ranges' property\n"); error = ENXIO; goto fail; } rangesptr = ranges; debugf("par addr_cells = %d, addr_cells = %d, size_cells = %d, " "tuple_size = %d, tuples = %d\n", par_addr_cells, sc->sc_addr_cells, sc->sc_size_cells, tuple_size, tuples); start = 0; size = 0; for (i = 0; i < tuples; i++) { /* The first cell is the bank (chip select) number. */ bank = fdt_data_get((void *)ranges, 1); if (bank < 0 || bank > LBC_DEV_MAX) { device_printf(dev, "bank out of range: %d\n", bank); error = ERANGE; goto fail; } ranges += 1; /* * Remaining cells of the child address define offset into * this CS. */ offset = fdt_data_get((void *)ranges, sc->sc_addr_cells - 1); ranges += sc->sc_addr_cells - 1; /* Parent bus start address of this bank. */ start = fdt_data_get((void *)ranges, par_addr_cells); ranges += par_addr_cells; size = fdt_data_get((void *)ranges, sc->sc_size_cells); ranges += sc->sc_size_cells; debugf("bank = %d, start = %lx, size = %lx\n", bank, start, size); sc->sc_banks[bank].addr = start + offset; sc->sc_banks[bank].size = size; /* * Attributes for the bank. * * XXX Note there are no DT bindings defined for them at the * moment, so we need to provide some defaults. */ sc->sc_banks[bank].width = 16; sc->sc_banks[bank].msel = LBCRES_MSEL_GPCM; sc->sc_banks[bank].decc = LBCRES_DECC_DISABLED; sc->sc_banks[bank].atom = LBCRES_ATOM_DISABLED; sc->sc_banks[bank].wp = 0; } /* * Initialize mem-mappings for the LBC banks (i.e. chip selects). */ error = lbc_banks_map(sc); if (error) goto fail; /* * Walk the localbus and add direct subordinates as our children. */ for (child = OF_child(node); child != 0; child = OF_peer(child)) { di = malloc(sizeof(*di), M_LBC, M_WAITOK | M_ZERO); if (ofw_bus_gen_setup_devinfo(&di->di_ofw, child) != 0) { free(di, M_LBC); device_printf(dev, "could not set up devinfo\n"); continue; } resource_list_init(&di->di_res); if (fdt_lbc_reg_decode(child, sc, di)) { device_printf(dev, "could not process 'reg' " "property\n"); ofw_bus_gen_destroy_devinfo(&di->di_ofw); free(di, M_LBC); continue; } fdt_lbc_fixup(child, sc, di); /* Add newbus device for this FDT node */ cdev = device_add_child(dev, NULL, -1); if (cdev == NULL) { device_printf(dev, "could not add child: %s\n", di->di_ofw.obd_name); resource_list_free(&di->di_res); ofw_bus_gen_destroy_devinfo(&di->di_ofw); free(di, M_LBC); continue; } debugf("added child name='%s', node=%p\n", di->di_ofw.obd_name, (void *)child); device_set_ivars(cdev, di); } /* * Enable the LBC. */ lbc_banks_enable(sc); free(rangesptr, M_OFWPROP); return (bus_generic_attach(dev)); fail: free(rangesptr, M_OFWPROP); bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_mrid, sc->sc_mres); return (error); } static int lbc_shutdown(device_t dev) { /* TODO */ return(0); } static struct resource * lbc_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct lbc_softc *sc; struct lbc_devinfo *di; struct resource_list_entry *rle; struct resource *res; struct rman *rm; int needactivate; /* We only support default allocations. */ if (start != 0ul || end != ~0ul) return (NULL); sc = device_get_softc(bus); if (type == SYS_RES_IRQ) return (bus_alloc_resource(bus, type, rid, start, end, count, flags)); /* * Request for the default allocation with a given rid: use resource * list stored in the local device info. */ if ((di = device_get_ivars(child)) == NULL) return (NULL); if (type == SYS_RES_IOPORT) type = SYS_RES_MEMORY; rid = &di->di_bank; rle = resource_list_find(&di->di_res, type, *rid); if (rle == NULL) { device_printf(bus, "no default resources for " "rid = %d, type = %d\n", *rid, type); return (NULL); } start = rle->start; count = rle->count; end = start + count - 1; sc = device_get_softc(bus); needactivate = flags & RF_ACTIVE; flags &= ~RF_ACTIVE; rm = &sc->sc_rman; res = rman_reserve_resource(rm, start, end, count, flags, child); if (res == NULL) { device_printf(bus, "failed to reserve resource %#lx - %#lx " "(%#lx)\n", start, end, count); return (NULL); } rman_set_rid(res, *rid); rman_set_bustag(res, &bs_be_tag); rman_set_bushandle(res, rman_get_start(res)); if (needactivate) if (bus_activate_resource(child, type, *rid, res)) { device_printf(child, "resource activation failed\n"); rman_release_resource(res); return (NULL); } return (res); } static int lbc_print_child(device_t dev, device_t child) { struct lbc_devinfo *di; struct resource_list *rl; int rv; di = device_get_ivars(child); rl = &di->di_res; rv = 0; rv += bus_print_child_header(dev, child); rv += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx"); rv += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); rv += bus_print_child_footer(dev, child); return (rv); } static int lbc_release_resource(device_t dev, device_t child, int type, int rid, struct resource *res) { int err; if (rman_get_flags(res) & RF_ACTIVE) { err = bus_deactivate_resource(child, type, rid, res); if (err) return (err); } return (rman_release_resource(res)); } static const struct ofw_bus_devinfo * lbc_get_devinfo(device_t bus, device_t child) { struct lbc_devinfo *di; di = device_get_ivars(child); return (&di->di_ofw); } void lbc_write_reg(device_t child, u_int off, uint32_t val) { device_t dev; struct lbc_softc *sc; dev = device_get_parent(child); if (off >= 0x1000) { device_printf(dev, "%s(%s): invalid offset %#x\n", __func__, device_get_nameunit(child), off); return; } sc = device_get_softc(dev); if (off == LBC85XX_LTESR && sc->sc_ltesr != ~0u) { sc->sc_ltesr ^= (val & sc->sc_ltesr); return; } if (off == LBC85XX_LTEATR && (val & 1) == 0) sc->sc_ltesr = ~0u; bus_space_write_4(sc->sc_bst, sc->sc_bsh, off, val); } uint32_t lbc_read_reg(device_t child, u_int off) { device_t dev; struct lbc_softc *sc; uint32_t val; dev = device_get_parent(child); if (off >= 0x1000) { device_printf(dev, "%s(%s): invalid offset %#x\n", __func__, device_get_nameunit(child), off); return (~0U); } sc = device_get_softc(dev); if (off == LBC85XX_LTESR && sc->sc_ltesr != ~0U) val = sc->sc_ltesr; else val = bus_space_read_4(sc->sc_bst, sc->sc_bsh, off); return (val); } diff --git a/sys/powerpc/mpc85xx/pci_mpc85xx.c b/sys/powerpc/mpc85xx/pci_mpc85xx.c index 3c34c874db31..4397ac0ad778 100644 --- a/sys/powerpc/mpc85xx/pci_mpc85xx.c +++ b/sys/powerpc/mpc85xx/pci_mpc85xx.c @@ -1,716 +1,828 @@ /*- * Copyright 2006-2007 by Juniper Networks. * Copyright 2008 Semihalf. * Copyright 2010 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Semihalf * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: FreeBSD: src/sys/powerpc/mpc85xx/pci_ocp.c,v 1.9 2010/03/23 23:46:28 marcel */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ofw_bus_if.h" #include "pcib_if.h" #include #include #include #include #define REG_CFG_ADDR 0x0000 #define CONFIG_ACCESS_ENABLE 0x80000000 #define REG_CFG_DATA 0x0004 #define REG_INT_ACK 0x0008 #define REG_POTAR(n) (0x0c00 + 0x20 * (n)) #define REG_POTEAR(n) (0x0c04 + 0x20 * (n)) #define REG_POWBAR(n) (0x0c08 + 0x20 * (n)) #define REG_POWAR(n) (0x0c10 + 0x20 * (n)) #define REG_PITAR(n) (0x0e00 - 0x20 * (n)) #define REG_PIWBAR(n) (0x0e08 - 0x20 * (n)) #define REG_PIWBEAR(n) (0x0e0c - 0x20 * (n)) #define REG_PIWAR(n) (0x0e10 - 0x20 * (n)) #define REG_PEX_MES_DR 0x0020 #define REG_PEX_MES_IER 0x0028 #define REG_PEX_ERR_DR 0x0e00 #define REG_PEX_ERR_EN 0x0e08 +#define REG_PEX_ERR_DR 0x0e00 +#define REG_PEX_ERR_DR_ME 0x80000000 +#define REG_PEX_ERR_DR_PCT 0x800000 +#define REG_PEX_ERR_DR_PAT 0x400000 +#define REG_PEX_ERR_DR_PCAC 0x200000 +#define REG_PEX_ERR_DR_PNM 0x100000 +#define REG_PEX_ERR_DR_CDNSC 0x80000 +#define REG_PEX_ERR_DR_CRSNC 0x40000 +#define REG_PEX_ERR_DR_ICCA 0x20000 +#define REG_PEX_ERR_DR_IACA 0x10000 +#define REG_PEX_ERR_DR_CRST 0x8000 +#define REG_PEX_ERR_DR_MIS 0x4000 +#define REG_PEX_ERR_DR_IOIS 0x2000 +#define REG_PEX_ERR_DR_CIS 0x1000 +#define REG_PEX_ERR_DR_CIEP 0x800 +#define REG_PEX_ERR_DR_IOIEP 0x400 +#define REG_PEX_ERR_DR_OAC 0x200 +#define REG_PEX_ERR_DR_IOIA 0x100 +#define REG_PEX_ERR_DR_IMBA 0x80 +#define REG_PEX_ERR_DR_IIOBA 0x40 +#define REG_PEX_ERR_DR_LDDE 0x20 +#define REG_PEX_ERR_EN 0x0e08 + #define PCIR_LTSSM 0x404 #define LTSSM_STAT_L0 0x16 #define DEVFN(b, s, f) ((b << 16) | (s << 8) | f) struct fsl_pcib_softc { struct ofw_pci_softc pci_sc; device_t sc_dev; int sc_iomem_target; bus_addr_t sc_iomem_start, sc_iomem_end; int sc_ioport_target; bus_addr_t sc_ioport_start, sc_ioport_end; struct resource *sc_res; bus_space_handle_t sc_bsh; bus_space_tag_t sc_bst; int sc_rid; + struct resource *sc_irq_res; + void *sc_ih; + int sc_busnr; int sc_pcie; uint8_t sc_pcie_capreg; /* PCI-E Capability Reg Set */ /* Devices that need special attention. */ int sc_devfn_tundra; int sc_devfn_via_ide; }; +struct fsl_pcib_err_dr { + const char *msg; + uint32_t err_dr_mask; +}; + +static const struct fsl_pcib_err_dr pci_err[] = { + {"ME", REG_PEX_ERR_DR_ME}, + {"PCT", REG_PEX_ERR_DR_PCT}, + {"PAT", REG_PEX_ERR_DR_PAT}, + {"PCAC", REG_PEX_ERR_DR_PCAC}, + {"PNM", REG_PEX_ERR_DR_PNM}, + {"CDNSC", REG_PEX_ERR_DR_CDNSC}, + {"CRSNC", REG_PEX_ERR_DR_CRSNC}, + {"ICCA", REG_PEX_ERR_DR_ICCA}, + {"IACA", REG_PEX_ERR_DR_IACA}, + {"CRST", REG_PEX_ERR_DR_CRST}, + {"MIS", REG_PEX_ERR_DR_MIS}, + {"IOIS", REG_PEX_ERR_DR_IOIS}, + {"CIS", REG_PEX_ERR_DR_CIS}, + {"CIEP", REG_PEX_ERR_DR_CIEP}, + {"IOIEP", REG_PEX_ERR_DR_IOIEP}, + {"OAC", REG_PEX_ERR_DR_OAC}, + {"IOIA", REG_PEX_ERR_DR_IOIA}, + {"IMBA", REG_PEX_ERR_DR_IMBA}, + {"IIOBA", REG_PEX_ERR_DR_IIOBA}, + {"LDDE", REG_PEX_ERR_DR_LDDE} +}; + /* Local forward declerations. */ static uint32_t fsl_pcib_cfgread(struct fsl_pcib_softc *, u_int, u_int, u_int, u_int, int); static void fsl_pcib_cfgwrite(struct fsl_pcib_softc *, u_int, u_int, u_int, u_int, uint32_t, int); static int fsl_pcib_decode_win(phandle_t, struct fsl_pcib_softc *); static void fsl_pcib_err_init(device_t); static void fsl_pcib_inbound(struct fsl_pcib_softc *, int, int, uint64_t, uint64_t, uint64_t); static int fsl_pcib_init(struct fsl_pcib_softc *, int, int); static void fsl_pcib_outbound(struct fsl_pcib_softc *, int, int, uint64_t, uint64_t, uint64_t); /* Forward declerations. */ static int fsl_pcib_attach(device_t); static int fsl_pcib_detach(device_t); static int fsl_pcib_probe(device_t); static int fsl_pcib_maxslots(device_t); static uint32_t fsl_pcib_read_config(device_t, u_int, u_int, u_int, u_int, int); static void fsl_pcib_write_config(device_t, u_int, u_int, u_int, u_int, uint32_t, int); /* Configuration r/w mutex. */ struct mtx pcicfg_mtx; static int mtx_initialized = 0; /* * Bus interface definitions. */ static device_method_t fsl_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fsl_pcib_probe), DEVMETHOD(device_attach, fsl_pcib_attach), DEVMETHOD(device_detach, fsl_pcib_detach), /* pcib interface */ DEVMETHOD(pcib_maxslots, fsl_pcib_maxslots), DEVMETHOD(pcib_read_config, fsl_pcib_read_config), DEVMETHOD(pcib_write_config, fsl_pcib_write_config), DEVMETHOD_END }; static devclass_t fsl_pcib_devclass; DEFINE_CLASS_1(pcib, fsl_pcib_driver, fsl_pcib_methods, sizeof(struct fsl_pcib_softc), ofw_pci_driver); DRIVER_MODULE(pcib, ofwbus, fsl_pcib_driver, fsl_pcib_devclass, 0, 0); +static int +fsl_pcib_err_intr(void *v) +{ + struct fsl_pcib_softc *sc; + device_t dev; + uint32_t err_reg, clear_reg; + uint8_t i; + + dev = (device_t)v; + sc = device_get_softc(dev); + + clear_reg = 0; + err_reg = bus_space_read_4(sc->sc_bst, sc->sc_bsh, REG_PEX_ERR_DR); + + /* Check which one error occurred */ + for (i = 0; i < sizeof(pci_err)/sizeof(struct fsl_pcib_err_dr); i++) { + if (err_reg & pci_err[i].err_dr_mask) { + device_printf(dev, "PCI %d: report %s error\n", + device_get_unit(dev), pci_err[i].msg); + clear_reg |= pci_err[i].err_dr_mask; + } + } + + /* Clear pending errors */ + bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PEX_ERR_DR, clear_reg); + + return (0); +} + static int fsl_pcib_probe(device_t dev) { if (ofw_bus_get_type(dev) == NULL || strcmp(ofw_bus_get_type(dev), "pci") != 0) return (ENXIO); if (!(ofw_bus_is_compatible(dev, "fsl,mpc8540-pci") || ofw_bus_is_compatible(dev, "fsl,mpc8540-pcie") || ofw_bus_is_compatible(dev, "fsl,mpc8548-pcie") || ofw_bus_is_compatible(dev, "fsl,p5020-pcie") || ofw_bus_is_compatible(dev, "fsl,qoriq-pcie-v2.2") || ofw_bus_is_compatible(dev, "fsl,qoriq-pcie"))) return (ENXIO); device_set_desc(dev, "Freescale Integrated PCI/PCI-E Controller"); return (BUS_PROBE_DEFAULT); } static int fsl_pcib_attach(device_t dev) { struct fsl_pcib_softc *sc; phandle_t node; uint32_t cfgreg; - int maxslot, error; + int error, maxslot, rid; uint8_t ltssm, capptr; sc = device_get_softc(dev); sc->sc_dev = dev; sc->sc_rid = 0; sc->sc_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->sc_rid, RF_ACTIVE); if (sc->sc_res == NULL) { device_printf(dev, "could not map I/O memory\n"); return (ENXIO); } sc->sc_bst = rman_get_bustag(sc->sc_res); sc->sc_bsh = rman_get_bushandle(sc->sc_res); sc->sc_busnr = 0; if (!mtx_initialized) { mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN); mtx_initialized = 1; } cfgreg = fsl_pcib_cfgread(sc, 0, 0, 0, PCIR_VENDOR, 2); if (cfgreg != 0x1057 && cfgreg != 0x1957) goto err; capptr = fsl_pcib_cfgread(sc, 0, 0, 0, PCIR_CAP_PTR, 1); while (capptr != 0) { cfgreg = fsl_pcib_cfgread(sc, 0, 0, 0, capptr, 2); switch (cfgreg & 0xff) { case PCIY_PCIX: break; case PCIY_EXPRESS: sc->sc_pcie = 1; sc->sc_pcie_capreg = capptr; break; } capptr = (cfgreg >> 8) & 0xff; } node = ofw_bus_get_node(dev); /* * Initialize generic OF PCI interface (ranges, etc.) */ error = ofw_pci_init(dev); if (error) return (error); /* * Configure decode windows for PCI(E) access. */ if (fsl_pcib_decode_win(node, sc) != 0) goto err; cfgreg = fsl_pcib_cfgread(sc, 0, 0, 0, PCIR_COMMAND, 2); cfgreg |= PCIM_CMD_SERRESPEN | PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN | PCIM_CMD_PORTEN; fsl_pcib_cfgwrite(sc, 0, 0, 0, PCIR_COMMAND, cfgreg, 2); sc->sc_devfn_tundra = -1; sc->sc_devfn_via_ide = -1; /* * Scan bus using firmware configured, 0 based bus numbering. */ maxslot = (sc->sc_pcie) ? 0 : PCI_SLOTMAX; fsl_pcib_init(sc, sc->sc_busnr, maxslot); if (sc->sc_pcie) { ltssm = fsl_pcib_cfgread(sc, 0, 0, 0, PCIR_LTSSM, 1); if (ltssm < LTSSM_STAT_L0) { if (bootverbose) printf("PCI %d: no PCIE link, skipping\n", device_get_unit(dev)); return (0); } } + /* Allocate irq */ + rid = 0; + sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_ACTIVE | RF_SHAREABLE); + if (sc->sc_irq_res == NULL) { + error = fsl_pcib_detach(dev); + if (error != 0) { + device_printf(dev, + "Detach of the driver failed with error %d\n", + error); + } + return (ENXIO); + } + + /* Setup interrupt handler */ + error = bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC | INTR_MPSAFE, + NULL, (driver_intr_t *)fsl_pcib_err_intr, dev, &sc->sc_ih); + if (error != 0) { + device_printf(dev, "Could not setup irq, %d\n", error); + sc->sc_ih = NULL; + error = fsl_pcib_detach(dev); + if (error != 0) { + device_printf(dev, + "Detach of the driver failed with error %d\n", + error); + } + return (ENXIO); + } + fsl_pcib_err_init(dev); return (ofw_pci_attach(dev)); err: return (ENXIO); } static uint32_t fsl_pcib_cfgread(struct fsl_pcib_softc *sc, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { uint32_t addr, data; addr = CONFIG_ACCESS_ENABLE; addr |= (bus & 0xff) << 16; addr |= (slot & 0x1f) << 11; addr |= (func & 0x7) << 8; addr |= reg & 0xfc; if (sc->sc_pcie) addr |= (reg & 0xf00) << 16; mtx_lock_spin(&pcicfg_mtx); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_CFG_ADDR, addr); switch (bytes) { case 1: data = bus_space_read_1(sc->sc_bst, sc->sc_bsh, REG_CFG_DATA + (reg & 3)); break; case 2: data = le16toh(bus_space_read_2(sc->sc_bst, sc->sc_bsh, REG_CFG_DATA + (reg & 2))); break; case 4: data = le32toh(bus_space_read_4(sc->sc_bst, sc->sc_bsh, REG_CFG_DATA)); break; default: data = ~0; break; } mtx_unlock_spin(&pcicfg_mtx); return (data); } static void fsl_pcib_cfgwrite(struct fsl_pcib_softc *sc, u_int bus, u_int slot, u_int func, u_int reg, uint32_t data, int bytes) { uint32_t addr; addr = CONFIG_ACCESS_ENABLE; addr |= (bus & 0xff) << 16; addr |= (slot & 0x1f) << 11; addr |= (func & 0x7) << 8; addr |= reg & 0xfc; if (sc->sc_pcie) addr |= (reg & 0xf00) << 16; mtx_lock_spin(&pcicfg_mtx); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_CFG_ADDR, addr); switch (bytes) { case 1: bus_space_write_1(sc->sc_bst, sc->sc_bsh, REG_CFG_DATA + (reg & 3), data); break; case 2: bus_space_write_2(sc->sc_bst, sc->sc_bsh, REG_CFG_DATA + (reg & 2), htole16(data)); break; case 4: bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_CFG_DATA, htole32(data)); break; } mtx_unlock_spin(&pcicfg_mtx); } #if 0 static void dump(struct fsl_pcib_softc *sc) { unsigned int i; #define RD(o) bus_space_read_4(sc->sc_bst, sc->sc_bsh, o) for (i = 0; i < 5; i++) { printf("POTAR%u =0x%08x\n", i, RD(REG_POTAR(i))); printf("POTEAR%u =0x%08x\n", i, RD(REG_POTEAR(i))); printf("POWBAR%u =0x%08x\n", i, RD(REG_POWBAR(i))); printf("POWAR%u =0x%08x\n", i, RD(REG_POWAR(i))); } printf("\n"); for (i = 1; i < 4; i++) { printf("PITAR%u =0x%08x\n", i, RD(REG_PITAR(i))); printf("PIWBAR%u =0x%08x\n", i, RD(REG_PIWBAR(i))); printf("PIWBEAR%u=0x%08x\n", i, RD(REG_PIWBEAR(i))); printf("PIWAR%u =0x%08x\n", i, RD(REG_PIWAR(i))); } printf("\n"); #undef RD for (i = 0; i < 0x48; i += 4) { printf("cfg%02x=0x%08x\n", i, fsl_pcib_cfgread(sc, 0, 0, 0, i, 4)); } } #endif static int fsl_pcib_maxslots(device_t dev) { struct fsl_pcib_softc *sc = device_get_softc(dev); return ((sc->sc_pcie) ? 0 : PCI_SLOTMAX); } static uint32_t fsl_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { struct fsl_pcib_softc *sc = device_get_softc(dev); u_int devfn; if (bus == sc->sc_busnr && !sc->sc_pcie && slot < 10) return (~0); devfn = DEVFN(bus, slot, func); if (devfn == sc->sc_devfn_tundra) return (~0); if (devfn == sc->sc_devfn_via_ide && reg == PCIR_INTPIN) return (1); return (fsl_pcib_cfgread(sc, bus, slot, func, reg, bytes)); } static void fsl_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t val, int bytes) { struct fsl_pcib_softc *sc = device_get_softc(dev); if (bus == sc->sc_busnr && !sc->sc_pcie && slot < 10) return; fsl_pcib_cfgwrite(sc, bus, slot, func, reg, val, bytes); } static void fsl_pcib_init_via(struct fsl_pcib_softc *sc, uint16_t device, int bus, int slot, int fn) { if (device == 0x0686) { fsl_pcib_write_config(sc->sc_dev, bus, slot, fn, 0x52, 0x34, 1); fsl_pcib_write_config(sc->sc_dev, bus, slot, fn, 0x77, 0x00, 1); fsl_pcib_write_config(sc->sc_dev, bus, slot, fn, 0x83, 0x98, 1); fsl_pcib_write_config(sc->sc_dev, bus, slot, fn, 0x85, 0x03, 1); } else if (device == 0x0571) { sc->sc_devfn_via_ide = DEVFN(bus, slot, fn); fsl_pcib_write_config(sc->sc_dev, bus, slot, fn, 0x40, 0x0b, 1); } } static int fsl_pcib_init(struct fsl_pcib_softc *sc, int bus, int maxslot) { int secbus; int old_pribus, old_secbus, old_subbus; int new_pribus, new_secbus, new_subbus; int slot, func, maxfunc; uint16_t vendor, device; uint8_t command, hdrtype, subclass; secbus = bus; for (slot = 0; slot <= maxslot; slot++) { maxfunc = 0; for (func = 0; func <= maxfunc; func++) { hdrtype = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_HDRTYPE, 1); if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if (func == 0 && (hdrtype & PCIM_MFDEV)) maxfunc = PCI_FUNCMAX; vendor = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_VENDOR, 2); device = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_DEVICE, 2); if (vendor == 0x1957 && device == 0x3fff) { sc->sc_devfn_tundra = DEVFN(bus, slot, func); continue; } command = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_COMMAND, 1); command &= ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN); fsl_pcib_write_config(sc->sc_dev, bus, slot, func, PCIR_COMMAND, command, 1); if (vendor == 0x1106) fsl_pcib_init_via(sc, device, bus, slot, func); /* * Handle PCI-PCI bridges */ subclass = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_SUBCLASS, 1); /* Allow all DEVTYPE 1 devices */ if (hdrtype != PCIM_HDRTYPE_BRIDGE) continue; secbus++; /* Read currect bus register configuration */ old_pribus = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_PRIBUS_1, 1); old_secbus = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_SECBUS_1, 1); old_subbus = fsl_pcib_read_config(sc->sc_dev, bus, slot, func, PCIR_SUBBUS_1, 1); if (bootverbose) printf("PCI: reading firmware bus numbers for " "secbus = %d (bus/sec/sub) = (%d/%d/%d)\n", secbus, old_pribus, old_secbus, old_subbus); new_pribus = bus; new_secbus = secbus; secbus = fsl_pcib_init(sc, secbus, (subclass == PCIS_BRIDGE_PCI) ? PCI_SLOTMAX : 0); new_subbus = secbus; if (bootverbose) printf("PCI: translate firmware bus numbers " "for secbus %d (%d/%d/%d) -> (%d/%d/%d)\n", secbus, old_pribus, old_secbus, old_subbus, new_pribus, new_secbus, new_subbus); fsl_pcib_write_config(sc->sc_dev, bus, slot, func, PCIR_PRIBUS_1, new_pribus, 1); fsl_pcib_write_config(sc->sc_dev, bus, slot, func, PCIR_SECBUS_1, new_secbus, 1); fsl_pcib_write_config(sc->sc_dev, bus, slot, func, PCIR_SUBBUS_1, new_subbus, 1); } } return (secbus); } static void fsl_pcib_inbound(struct fsl_pcib_softc *sc, int wnd, int tgt, uint64_t start, uint64_t size, uint64_t pci_start) { uint32_t attr, bar, tar; KASSERT(wnd > 0, ("%s: inbound window 0 is invalid", __func__)); switch (tgt) { /* XXX OCP85XX_TGTIF_RAM2, OCP85XX_TGTIF_RAM_INTL should be handled */ case OCP85XX_TGTIF_RAM1: attr = 0xa0f55000 | (ffsl(size) - 2); break; default: attr = 0; break; } tar = start >> 12; bar = pci_start >> 12; bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PITAR(wnd), tar); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PIWBEAR(wnd), 0); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PIWBAR(wnd), bar); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PIWAR(wnd), attr); } static void fsl_pcib_outbound(struct fsl_pcib_softc *sc, int wnd, int res, uint64_t start, uint64_t size, uint64_t pci_start) { uint32_t attr, bar, tar; switch (res) { case SYS_RES_MEMORY: attr = 0x80044000 | (ffsll(size) - 2); break; case SYS_RES_IOPORT: attr = 0x80088000 | (ffsll(size) - 2); break; default: attr = 0x0004401f; break; } bar = start >> 12; tar = pci_start >> 12; bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_POTAR(wnd), tar); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_POTEAR(wnd), 0); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_POWBAR(wnd), bar); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_POWAR(wnd), attr); } static void fsl_pcib_err_init(device_t dev) { struct fsl_pcib_softc *sc; uint16_t sec_stat, dsr; uint32_t dcr, err_en; sc = device_get_softc(dev); sec_stat = fsl_pcib_cfgread(sc, 0, 0, 0, PCIR_SECSTAT_1, 2); if (sec_stat) fsl_pcib_cfgwrite(sc, 0, 0, 0, PCIR_SECSTAT_1, 0xffff, 2); if (sc->sc_pcie) { /* Clear error bits */ bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PEX_MES_IER, 0xffffffff); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PEX_MES_DR, 0xffffffff); bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PEX_ERR_DR, 0xffffffff); dsr = fsl_pcib_cfgread(sc, 0, 0, 0, sc->sc_pcie_capreg + PCIER_DEVICE_STA, 2); if (dsr) fsl_pcib_cfgwrite(sc, 0, 0, 0, sc->sc_pcie_capreg + PCIER_DEVICE_STA, 0xffff, 2); /* Enable all errors reporting */ err_en = 0x00bfff00; bus_space_write_4(sc->sc_bst, sc->sc_bsh, REG_PEX_ERR_EN, err_en); /* Enable error reporting: URR, FER, NFER */ dcr = fsl_pcib_cfgread(sc, 0, 0, 0, sc->sc_pcie_capreg + PCIER_DEVICE_CTL, 4); dcr |= PCIEM_CTL_URR_ENABLE | PCIEM_CTL_FER_ENABLE | PCIEM_CTL_NFER_ENABLE; fsl_pcib_cfgwrite(sc, 0, 0, 0, sc->sc_pcie_capreg + PCIER_DEVICE_CTL, dcr, 4); } } static int fsl_pcib_detach(device_t dev) { if (mtx_initialized) { mtx_destroy(&pcicfg_mtx); mtx_initialized = 0; } return (bus_generic_detach(dev)); } static int fsl_pcib_decode_win(phandle_t node, struct fsl_pcib_softc *sc) { device_t dev; int error, i, trgt; dev = sc->sc_dev; fsl_pcib_outbound(sc, 0, -1, 0, 0, 0); /* * Configure LAW decode windows. */ error = law_pci_target(sc->sc_res, &sc->sc_iomem_target, &sc->sc_ioport_target); if (error != 0) { device_printf(dev, "could not retrieve PCI LAW target info\n"); return (error); } for (i = 0; i < sc->pci_sc.sc_nrange; i++) { switch (sc->pci_sc.sc_range[i].pci_hi & OFW_PCI_PHYS_HI_SPACEMASK) { case OFW_PCI_PHYS_HI_SPACE_CONFIG: continue; case OFW_PCI_PHYS_HI_SPACE_IO: trgt = sc->sc_ioport_target; fsl_pcib_outbound(sc, 2, SYS_RES_IOPORT, sc->pci_sc.sc_range[i].host, sc->pci_sc.sc_range[i].size, sc->pci_sc.sc_range[i].pci); sc->sc_ioport_start = sc->pci_sc.sc_range[i].pci; sc->sc_ioport_end = sc->pci_sc.sc_range[i].pci + sc->pci_sc.sc_range[i].size - 1; break; case OFW_PCI_PHYS_HI_SPACE_MEM32: case OFW_PCI_PHYS_HI_SPACE_MEM64: trgt = sc->sc_iomem_target; fsl_pcib_outbound(sc, 1, SYS_RES_MEMORY, sc->pci_sc.sc_range[i].host, sc->pci_sc.sc_range[i].size, sc->pci_sc.sc_range[i].pci); sc->sc_iomem_start = sc->pci_sc.sc_range[i].pci; sc->sc_iomem_end = sc->pci_sc.sc_range[i].pci + sc->pci_sc.sc_range[i].size - 1; break; default: panic("Unknown range type %#x\n", sc->pci_sc.sc_range[i].pci_hi & OFW_PCI_PHYS_HI_SPACEMASK); } error = law_enable(trgt, sc->pci_sc.sc_range[i].host, sc->pci_sc.sc_range[i].size); if (error != 0) { device_printf(dev, "could not program LAW for range " "%d\n", i); return (error); } } /* * Set outbout and inbound windows. */ fsl_pcib_outbound(sc, 3, -1, 0, 0, 0); fsl_pcib_outbound(sc, 4, -1, 0, 0, 0); fsl_pcib_inbound(sc, 1, -1, 0, 0, 0); fsl_pcib_inbound(sc, 2, -1, 0, 0, 0); fsl_pcib_inbound(sc, 3, OCP85XX_TGTIF_RAM1, 0, 2U * 1024U * 1024U * 1024U, 0); return (0); } diff --git a/sys/powerpc/powerpc/cpu.c b/sys/powerpc/powerpc/cpu.c index 1d685315b496..88924171cced 100644 --- a/sys/powerpc/powerpc/cpu.c +++ b/sys/powerpc/powerpc/cpu.c @@ -1,653 +1,670 @@ /*- * Copyright (c) 2001 Matt Thomas. * Copyright (c) 2001 Tsubai Masanari. * Copyright (c) 1998, 1999, 2001 Internet Research Institute, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by * Internet Research Institute, Inc. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2003 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from $NetBSD: cpu_subr.c,v 1.1 2003/02/03 17:10:09 matt Exp $ * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void cpu_6xx_setup(int cpuid, uint16_t vers); static void cpu_970_setup(int cpuid, uint16_t vers); static void cpu_booke_setup(int cpuid, uint16_t vers); int powerpc_pow_enabled; void (*cpu_idle_hook)(sbintime_t) = NULL; static void cpu_idle_60x(sbintime_t); static void cpu_idle_booke(sbintime_t); struct cputab { const char *name; uint16_t version; uint16_t revfmt; int features; /* Do not include PPC_FEATURE_32 or * PPC_FEATURE_HAS_MMU */ + int features2; void (*cpu_setup)(int cpuid, uint16_t vers); }; #define REVFMT_MAJMIN 1 /* %u.%u */ #define REVFMT_HEX 2 /* 0x%04x */ #define REVFMT_DEC 3 /* %u */ static const struct cputab models[] = { { "Motorola PowerPC 601", MPC601, REVFMT_DEC, - PPC_FEATURE_HAS_FPU | PPC_FEATURE_UNIFIED_CACHE, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU | PPC_FEATURE_UNIFIED_CACHE, 0, cpu_6xx_setup }, { "Motorola PowerPC 602", MPC602, REVFMT_DEC, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 603", MPC603, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 603e", MPC603e, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 603ev", MPC603ev, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 604", MPC604, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 604ev", MPC604ev, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 620", MPC620, REVFMT_HEX, - PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, 0, NULL }, { "Motorola PowerPC 750", MPC750, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "IBM PowerPC 750FX", IBM750FX, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "IBM PowerPC 970", IBM970, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, - cpu_970_setup }, + 0, cpu_970_setup }, { "IBM PowerPC 970FX", IBM970FX, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, - cpu_970_setup }, + 0, cpu_970_setup }, { "IBM PowerPC 970GX", IBM970GX, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, - cpu_970_setup }, + 0, cpu_970_setup }, { "IBM PowerPC 970MP", IBM970MP, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, - cpu_970_setup }, + 0, cpu_970_setup }, { "IBM POWER4", IBMPOWER4, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, 0, NULL }, { "IBM POWER4+", IBMPOWER4PLUS, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, 0, NULL }, { "IBM POWER5", IBMPOWER5, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU | PPC_FEATURE_SMT, 0, NULL }, { "IBM POWER5+", IBMPOWER5PLUS, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU | PPC_FEATURE_SMT, 0, NULL }, { "IBM POWER6", IBMPOWER6, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, - NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU | + PPC_FEATURE_SMT | PPC_FEATURE_ARCH_2_05, 0, NULL }, { "IBM POWER7", IBMPOWER7, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU | - PPC_FEATURE_HAS_VSX, NULL }, + PPC_FEATURE_SMT | PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_ARCH_2_06 | + PPC_FEATURE_HAS_VSX, 0, NULL }, { "IBM POWER7+", IBMPOWER7PLUS, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU | - PPC_FEATURE_HAS_VSX, NULL }, + PPC_FEATURE_SMT | PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_ARCH_2_06 | + PPC_FEATURE_HAS_VSX, 0, NULL }, { "IBM POWER8E", IBMPOWER8E, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU | - PPC_FEATURE_HAS_VSX, NULL }, + PPC_FEATURE_SMT | PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_ARCH_2_06 | + PPC_FEATURE_HAS_VSX, + PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_HAS_HTM | + PPC_FEATURE2_HAS_VCRYPTO, NULL }, { "IBM POWER8", IBMPOWER8, REVFMT_MAJMIN, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU | - PPC_FEATURE_HAS_VSX, NULL }, + PPC_FEATURE_SMT | PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_ARCH_2_06 | + PPC_FEATURE_HAS_VSX, + PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_HAS_HTM | + PPC_FEATURE2_HAS_VCRYPTO, NULL }, { "Motorola PowerPC 7400", MPC7400, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 7410", MPC7410, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 7450", MPC7450, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 7455", MPC7455, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 7457", MPC7457, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 7447A", MPC7447A, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 7448", MPC7448, REVFMT_MAJMIN, - PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 8240", MPC8240, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Motorola PowerPC 8245", MPC8245, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, + PPC_FEATURE_HAS_FPU, 0, cpu_6xx_setup }, { "Freescale e500v1 core", FSL_E500v1, REVFMT_MAJMIN, - 0, cpu_booke_setup }, + PPC_FEATURE_BOOKE, 0, cpu_booke_setup }, { "Freescale e500v2 core", FSL_E500v2, REVFMT_MAJMIN, - 0, cpu_booke_setup }, + PPC_FEATURE_BOOKE, 0, cpu_booke_setup }, { "Freescale e500mc core", FSL_E500mc, REVFMT_MAJMIN, - PPC_FEATURE_HAS_FPU, cpu_booke_setup }, + PPC_FEATURE_BOOKE | PPC_FEATURE_HAS_FPU, 0, cpu_booke_setup }, { "Freescale e5500 core", FSL_E5500, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, cpu_booke_setup }, - { "Freescale e6500 core", FSL_E6500, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, + PPC_FEATURE_BOOKE | PPC_FEATURE_64 | PPC_FEATURE_HAS_FPU, 0, cpu_booke_setup }, + { "Freescale e6500 core", FSL_E6500, REVFMT_MAJMIN, + PPC_FEATURE_BOOKE | PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | + PPC_FEATURE_HAS_FPU, 0, cpu_booke_setup }, { "IBM Cell Broadband Engine", IBMCELLBE, REVFMT_MAJMIN, - PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, - NULL}, - { "Unknown PowerPC CPU", 0, REVFMT_HEX, 0, NULL }, + PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU | + PPC_FEATURE_SMT, 0, NULL}, + { "Unknown PowerPC CPU", 0, REVFMT_HEX, 0, 0, NULL }, }; static void cpu_6xx_print_cacheinfo(u_int, uint16_t); static int cpu_feature_bit(SYSCTL_HANDLER_ARGS); static char model[64]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, model, 0, ""); int cpu_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU; +int cpu_features2 = 0; SYSCTL_OPAQUE(_hw, OID_AUTO, cpu_features, CTLFLAG_RD, &cpu_features, sizeof(cpu_features), "IX", "PowerPC CPU features"); +SYSCTL_OPAQUE(_hw, OID_AUTO, cpu_features2, CTLFLAG_RD, + &cpu_features2, sizeof(cpu_features2), "IX", "PowerPC CPU features 2"); /* Provide some user-friendly aliases for bits in cpu_features */ SYSCTL_PROC(_hw, OID_AUTO, floatingpoint, CTLTYPE_INT | CTLFLAG_RD, 0, PPC_FEATURE_HAS_FPU, cpu_feature_bit, "I", "Floating point instructions executed in hardware"); SYSCTL_PROC(_hw, OID_AUTO, altivec, CTLTYPE_INT | CTLFLAG_RD, 0, PPC_FEATURE_HAS_ALTIVEC, cpu_feature_bit, "I", "CPU supports Altivec"); void cpu_setup(u_int cpuid) { u_int pvr, maj, min; uint16_t vers, rev, revfmt; uint64_t cps; const struct cputab *cp; const char *name; pvr = mfpvr(); vers = pvr >> 16; rev = pvr; switch (vers) { case MPC7410: min = (pvr >> 0) & 0xff; maj = min <= 4 ? 1 : 2; break; case FSL_E500v1: case FSL_E500v2: case FSL_E500mc: case FSL_E5500: maj = (pvr >> 4) & 0xf; min = (pvr >> 0) & 0xf; break; default: maj = (pvr >> 8) & 0xf; min = (pvr >> 0) & 0xf; } for (cp = models; cp->version != 0; cp++) { if (cp->version == vers) break; } revfmt = cp->revfmt; name = cp->name; if (rev == MPC750 && pvr == 15) { name = "Motorola MPC755"; revfmt = REVFMT_HEX; } strncpy(model, name, sizeof(model) - 1); printf("cpu%d: %s revision ", cpuid, name); switch (revfmt) { case REVFMT_MAJMIN: printf("%u.%u", maj, min); break; case REVFMT_HEX: printf("0x%04x", rev); break; case REVFMT_DEC: printf("%u", rev); break; } if (cpu_est_clockrate(0, &cps) == 0) printf(", %jd.%02jd MHz", cps / 1000000, (cps / 10000) % 100); printf("\n"); cpu_features |= cp->features; + cpu_features2 |= cp->features2; printf("cpu%d: Features %b\n", cpuid, cpu_features, PPC_FEATURE_BITMASK); + if (cpu_features2 != 0) + printf("cpu%d: Features2 %b\n", cpuid, cpu_features2, + PPC_FEATURE2_BITMASK); /* * Configure CPU */ if (cp->cpu_setup != NULL) cp->cpu_setup(cpuid, vers); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *cps) { uint16_t vers; register_t msr; phandle_t cpu, dev, root; int res = 0; char buf[8]; vers = mfpvr() >> 16; msr = mfmsr(); mtmsr(msr & ~PSL_EE); switch (vers) { case MPC7450: case MPC7455: case MPC7457: case MPC750: case IBM750FX: case MPC7400: case MPC7410: case MPC7447A: case MPC7448: mtspr(SPR_MMCR0, SPR_MMCR0_FC); mtspr(SPR_PMC1, 0); mtspr(SPR_MMCR0, SPR_MMCR0_PMC1SEL(PMCN_CYCLES)); DELAY(1000); *cps = (mfspr(SPR_PMC1) * 1000) + 4999; mtspr(SPR_MMCR0, SPR_MMCR0_FC); mtmsr(msr); return (0); case IBM970: case IBM970FX: case IBM970MP: isync(); mtspr(SPR_970MMCR0, SPR_MMCR0_FC); isync(); mtspr(SPR_970MMCR1, 0); mtspr(SPR_970MMCRA, 0); mtspr(SPR_970PMC1, 0); mtspr(SPR_970MMCR0, SPR_970MMCR0_PMC1SEL(PMC970N_CYCLES)); isync(); DELAY(1000); powerpc_sync(); mtspr(SPR_970MMCR0, SPR_MMCR0_FC); *cps = (mfspr(SPR_970PMC1) * 1000) + 4999; mtmsr(msr); return (0); default: root = OF_peer(0); if (root == 0) return (ENXIO); dev = OF_child(root); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } cpu = OF_child(dev); while (cpu != 0) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; cpu = OF_peer(cpu); } if (cpu == 0) return (ENOENT); if (OF_getprop(cpu, "ibm,extended-clock-frequency", cps, sizeof(*cps)) >= 0) { return (0); } else if (OF_getprop(cpu, "clock-frequency", cps, sizeof(cell_t)) >= 0) { *cps >>= 32; return (0); } else { return (ENOENT); } } } void cpu_6xx_setup(int cpuid, uint16_t vers) { register_t hid0, pvr; const char *bitmask; hid0 = mfspr(SPR_HID0); pvr = mfpvr(); /* * Configure power-saving mode. */ switch (vers) { case MPC603: case MPC603e: case MPC603ev: case MPC604ev: case MPC750: case IBM750FX: case MPC7400: case MPC7410: case MPC8240: case MPC8245: /* Select DOZE mode. */ hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP); hid0 |= HID0_DOZE | HID0_DPM; powerpc_pow_enabled = 1; break; case MPC7448: case MPC7447A: case MPC7457: case MPC7455: case MPC7450: /* Enable the 7450 branch caches */ hid0 |= HID0_SGE | HID0_BTIC; hid0 |= HID0_LRSTK | HID0_FOLD | HID0_BHT; /* Disable BTIC on 7450 Rev 2.0 or earlier and on 7457 */ if (((pvr >> 16) == MPC7450 && (pvr & 0xFFFF) <= 0x0200) || (pvr >> 16) == MPC7457) hid0 &= ~HID0_BTIC; /* Select NAP mode. */ hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP); hid0 |= HID0_NAP | HID0_DPM; powerpc_pow_enabled = 1; break; default: /* No power-saving mode is available. */ ; } switch (vers) { case IBM750FX: case MPC750: hid0 &= ~HID0_DBP; /* XXX correct? */ hid0 |= HID0_EMCP | HID0_BTIC | HID0_SGE | HID0_BHT; break; case MPC7400: case MPC7410: hid0 &= ~HID0_SPD; hid0 |= HID0_EMCP | HID0_BTIC | HID0_SGE | HID0_BHT; hid0 |= HID0_EIEC; break; } mtspr(SPR_HID0, hid0); if (bootverbose) cpu_6xx_print_cacheinfo(cpuid, vers); switch (vers) { case MPC7447A: case MPC7448: case MPC7450: case MPC7455: case MPC7457: bitmask = HID0_7450_BITMASK; break; default: bitmask = HID0_BITMASK; break; } printf("cpu%d: HID0 %b\n", cpuid, (int)hid0, bitmask); if (cpu_idle_hook == NULL) cpu_idle_hook = cpu_idle_60x; } static void cpu_6xx_print_cacheinfo(u_int cpuid, uint16_t vers) { register_t hid; hid = mfspr(SPR_HID0); printf("cpu%u: ", cpuid); printf("L1 I-cache %sabled, ", (hid & HID0_ICE) ? "en" : "dis"); printf("L1 D-cache %sabled\n", (hid & HID0_DCE) ? "en" : "dis"); printf("cpu%u: ", cpuid); if (mfspr(SPR_L2CR) & L2CR_L2E) { switch (vers) { case MPC7450: case MPC7455: case MPC7457: printf("256KB L2 cache, "); if (mfspr(SPR_L3CR) & L3CR_L3E) printf("%cMB L3 backside cache", mfspr(SPR_L3CR) & L3CR_L3SIZ ? '2' : '1'); else printf("L3 cache disabled"); printf("\n"); break; case IBM750FX: printf("512KB L2 cache\n"); break; default: switch (mfspr(SPR_L2CR) & L2CR_L2SIZ) { case L2SIZ_256K: printf("256KB "); break; case L2SIZ_512K: printf("512KB "); break; case L2SIZ_1M: printf("1MB "); break; } printf("write-%s", (mfspr(SPR_L2CR) & L2CR_L2WT) ? "through" : "back"); if (mfspr(SPR_L2CR) & L2CR_L2PE) printf(", with parity"); printf(" backside cache\n"); break; } } else printf("L2 cache disabled\n"); } static void cpu_booke_setup(int cpuid, uint16_t vers) { #ifdef BOOKE_E500 register_t hid0; hid0 = mfspr(SPR_HID0); /* Programe power-management mode. */ hid0 &= ~(HID0_DOZE | HID0_NAP | HID0_SLEEP); hid0 |= HID0_DOZE; mtspr(SPR_HID0, hid0); printf("cpu%d: HID0 %b\n", cpuid, (int)hid0, HID0_E500_BITMASK); #endif if (cpu_idle_hook == NULL) cpu_idle_hook = cpu_idle_booke; } static void cpu_970_setup(int cpuid, uint16_t vers) { #ifdef AIM uint32_t hid0_hi, hid0_lo; __asm __volatile ("mfspr %0,%2; clrldi %1,%0,32; srdi %0,%0,32;" : "=r" (hid0_hi), "=r" (hid0_lo) : "K" (SPR_HID0)); /* Configure power-saving mode */ switch (vers) { case IBM970MP: hid0_hi |= (HID0_DEEPNAP | HID0_NAP | HID0_DPM); hid0_hi &= ~HID0_DOZE; break; default: hid0_hi |= (HID0_NAP | HID0_DPM); hid0_hi &= ~(HID0_DOZE | HID0_DEEPNAP); break; } powerpc_pow_enabled = 1; __asm __volatile (" \ sync; isync; \ sldi %0,%0,32; or %0,%0,%1; \ mtspr %2, %0; \ mfspr %0, %2; mfspr %0, %2; mfspr %0, %2; \ mfspr %0, %2; mfspr %0, %2; mfspr %0, %2; \ sync; isync" :: "r" (hid0_hi), "r"(hid0_lo), "K" (SPR_HID0)); __asm __volatile ("mfspr %0,%1; srdi %0,%0,32;" : "=r" (hid0_hi) : "K" (SPR_HID0)); printf("cpu%d: HID0 %b\n", cpuid, (int)(hid0_hi), HID0_970_BITMASK); #endif cpu_idle_hook = cpu_idle_60x; } static int cpu_feature_bit(SYSCTL_HANDLER_ARGS) { int result; result = (cpu_features & arg2) ? 1 : 0; return (sysctl_handle_int(oidp, &result, 0, req)); } void cpu_idle(int busy) { sbintime_t sbt = -1; #ifdef INVARIANTS if ((mfmsr() & PSL_EE) != PSL_EE) { struct thread *td = curthread; printf("td msr %#lx\n", (u_long)td->td_md.md_saved_msr); panic("ints disabled in idleproc!"); } #endif CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); if (cpu_idle_hook != NULL) { if (!busy) { critical_enter(); sbt = cpu_idleclock(); } cpu_idle_hook(sbt); if (!busy) { cpu_activeclock(); critical_exit(); } } CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } static void cpu_idle_60x(sbintime_t sbt) { register_t msr; uint16_t vers; if (!powerpc_pow_enabled) return; msr = mfmsr(); vers = mfpvr() >> 16; #ifdef AIM switch (vers) { case IBM970: case IBM970FX: case IBM970MP: case MPC7447A: case MPC7448: case MPC7450: case MPC7455: case MPC7457: __asm __volatile("\ dssall; sync; mtmsr %0; isync" :: "r"(msr | PSL_POW)); break; default: powerpc_sync(); mtmsr(msr | PSL_POW); isync(); break; } #endif } static void cpu_idle_booke(sbintime_t sbt) { #ifdef E500 platform_cpu_idle(PCPU_GET(cpuid)); #endif } diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c index b481279d657d..a17d9ff6e14d 100644 --- a/sys/powerpc/powerpc/intr_machdep.c +++ b/sys/powerpc/powerpc/intr_machdep.c @@ -1,611 +1,617 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 2002 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * form: src/sys/i386/isa/intr_machdep.c,v 1.57 2001/07/20 * * $FreeBSD$ */ #include "opt_isa.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pic_if.h" #define MAX_STRAY_LOG 5 static MALLOC_DEFINE(M_INTR, "intr", "interrupt handler data"); struct powerpc_intr { struct intr_event *event; long *cntp; u_int irq; device_t pic; u_int intline; u_int vector; u_int cntindex; cpuset_t cpu; enum intr_trigger trig; enum intr_polarity pol; int fwcode; int ipi; }; struct pic { device_t dev; uint32_t node; u_int irqs; u_int ipis; int base; }; static u_int intrcnt_index = 0; static struct mtx intr_table_lock; static struct powerpc_intr *powerpc_intrs[INTR_VECTORS]; static struct pic piclist[MAX_PICS]; static u_int nvectors; /* Allocated vectors */ static u_int npics; /* PICs registered */ #ifdef DEV_ISA static u_int nirqs = 16; /* Allocated IRQS (ISA pre-allocated). */ #else static u_int nirqs = 0; /* Allocated IRQs. */ #endif static u_int stray_count; u_long intrcnt[INTR_VECTORS]; char intrnames[INTR_VECTORS * MAXCOMLEN]; size_t sintrcnt = sizeof(intrcnt); size_t sintrnames = sizeof(intrnames); device_t root_pic; #ifdef SMP static void *ipi_cookie; #endif static void intr_init(void *dummy __unused) { mtx_init(&intr_table_lock, "intr sources lock", NULL, MTX_DEF); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); #ifdef SMP static void smp_intr_init(void *dummy __unused) { struct powerpc_intr *i; int vector; for (vector = 0; vector < nvectors; vector++) { i = powerpc_intrs[vector]; if (i != NULL && i->pic == root_pic) PIC_BIND(i->pic, i->intline, i->cpu); } } SYSINIT(smp_intr_init, SI_SUB_SMP, SI_ORDER_ANY, smp_intr_init, NULL); #endif static void intrcnt_setname(const char *name, int index) { snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name); } void intrcnt_add(const char *name, u_long **countp) { int idx; idx = atomic_fetchadd_int(&intrcnt_index, 1); *countp = &intrcnt[idx]; intrcnt_setname(name, idx); } static struct powerpc_intr * intr_lookup(u_int irq) { char intrname[16]; struct powerpc_intr *i, *iscan; int vector; mtx_lock(&intr_table_lock); for (vector = 0; vector < nvectors; vector++) { i = powerpc_intrs[vector]; if (i != NULL && i->irq == irq) { mtx_unlock(&intr_table_lock); return (i); } } i = malloc(sizeof(*i), M_INTR, M_NOWAIT); if (i == NULL) { mtx_unlock(&intr_table_lock); return (NULL); } i->event = NULL; i->cntp = NULL; i->trig = INTR_TRIGGER_CONFORM; i->pol = INTR_POLARITY_CONFORM; i->irq = irq; i->pic = NULL; i->vector = -1; i->fwcode = 0; i->ipi = 0; #ifdef SMP i->cpu = all_cpus; #else CPU_SETOF(0, &i->cpu); #endif for (vector = 0; vector < INTR_VECTORS && vector <= nvectors; vector++) { iscan = powerpc_intrs[vector]; if (iscan != NULL && iscan->irq == irq) break; if (iscan == NULL && i->vector == -1) i->vector = vector; iscan = NULL; } if (iscan == NULL && i->vector != -1) { powerpc_intrs[i->vector] = i; i->cntindex = atomic_fetchadd_int(&intrcnt_index, 1); i->cntp = &intrcnt[i->cntindex]; sprintf(intrname, "irq%u:", i->irq); intrcnt_setname(intrname, i->cntindex); nvectors++; } mtx_unlock(&intr_table_lock); if (iscan != NULL || i->vector == -1) { free(i, M_INTR); i = iscan; } return (i); } static int powerpc_map_irq(struct powerpc_intr *i) { struct pic *p; u_int cnt; int idx; for (idx = 0; idx < npics; idx++) { p = &piclist[idx]; cnt = p->irqs + p->ipis; if (i->irq >= p->base && i->irq < p->base + cnt) break; } if (idx == npics) return (EINVAL); i->intline = i->irq - p->base; i->pic = p->dev; /* Try a best guess if that failed */ if (i->pic == NULL) i->pic = root_pic; return (0); } static void powerpc_intr_eoi(void *arg) { struct powerpc_intr *i = arg; PIC_EOI(i->pic, i->intline); } static void powerpc_intr_pre_ithread(void *arg) { struct powerpc_intr *i = arg; PIC_MASK(i->pic, i->intline); PIC_EOI(i->pic, i->intline); } static void powerpc_intr_post_ithread(void *arg) { struct powerpc_intr *i = arg; PIC_UNMASK(i->pic, i->intline); } static int powerpc_assign_intr_cpu(void *arg, int cpu) { #ifdef SMP struct powerpc_intr *i = arg; if (cpu == NOCPU) i->cpu = all_cpus; else CPU_SETOF(cpu, &i->cpu); if (!cold && i->pic != NULL && i->pic == root_pic) PIC_BIND(i->pic, i->intline, i->cpu); return (0); #else return (EOPNOTSUPP); #endif } void powerpc_register_pic(device_t dev, uint32_t node, u_int irqs, u_int ipis, u_int atpic) { struct pic *p; u_int irq; int idx; mtx_lock(&intr_table_lock); /* XXX see powerpc_get_irq(). */ for (idx = 0; idx < npics; idx++) { p = &piclist[idx]; if (p->node != node) continue; if (node != 0 || p->dev == dev) break; } p = &piclist[idx]; p->dev = dev; p->node = node; p->irqs = irqs; p->ipis = ipis; if (idx == npics) { #ifdef DEV_ISA p->base = (atpic) ? 0 : nirqs; #else p->base = nirqs; #endif irq = p->base + irqs + ipis; nirqs = MAX(nirqs, irq); npics++; } + KASSERT(npics < MAX_PICS, + ("Number of PICs exceeds maximum (%d)", MAX_PICS)); + mtx_unlock(&intr_table_lock); } u_int powerpc_get_irq(uint32_t node, u_int pin) { int idx; if (node == 0) return (pin); mtx_lock(&intr_table_lock); for (idx = 0; idx < npics; idx++) { if (piclist[idx].node == node) { mtx_unlock(&intr_table_lock); return (piclist[idx].base + pin); } } /* * XXX we should never encounter an unregistered PIC, but that * can only be done when we properly support bus enumeration * using multiple passes. Until then, fake an entry and give it * some adhoc maximum number of IRQs and IPIs. */ piclist[idx].dev = NULL; piclist[idx].node = node; piclist[idx].irqs = 124; piclist[idx].ipis = 4; piclist[idx].base = nirqs; nirqs += 128; npics++; + KASSERT(npics < MAX_PICS, + ("Number of PICs exceeds maximum (%d)", MAX_PICS)); + mtx_unlock(&intr_table_lock); return (piclist[idx].base + pin); } int powerpc_enable_intr(void) { struct powerpc_intr *i; int error, vector; #ifdef SMP int n; #endif if (npics == 0) panic("no PIC detected\n"); if (root_pic == NULL) root_pic = piclist[0].dev; #ifdef SMP /* Install an IPI handler. */ if (mp_ncpus > 1) { for (n = 0; n < npics; n++) { if (piclist[n].dev != root_pic) continue; KASSERT(piclist[n].ipis != 0, ("%s: SMP root PIC does not supply any IPIs", __func__)); error = powerpc_setup_intr("IPI", MAP_IRQ(piclist[n].node, piclist[n].irqs), powerpc_ipi_handler, NULL, NULL, INTR_TYPE_MISC | INTR_EXCL, &ipi_cookie); if (error) { printf("unable to setup IPI handler\n"); return (error); } /* * Some subterfuge: disable late EOI and mark this * as an IPI to the dispatch layer. */ i = intr_lookup(MAP_IRQ(piclist[n].node, piclist[n].irqs)); i->event->ie_post_filter = NULL; i->ipi = 1; } } #endif for (vector = 0; vector < nvectors; vector++) { i = powerpc_intrs[vector]; if (i == NULL) continue; error = powerpc_map_irq(i); if (error) continue; if (i->trig == -1) PIC_TRANSLATE_CODE(i->pic, i->intline, i->fwcode, &i->trig, &i->pol); if (i->trig != INTR_TRIGGER_CONFORM || i->pol != INTR_POLARITY_CONFORM) PIC_CONFIG(i->pic, i->intline, i->trig, i->pol); if (i->event != NULL) PIC_ENABLE(i->pic, i->intline, vector); } return (0); } int powerpc_setup_intr(const char *name, u_int irq, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep) { struct powerpc_intr *i; int error, enable = 0; i = intr_lookup(irq); if (i == NULL) return (ENOMEM); if (i->event == NULL) { error = intr_event_create(&i->event, (void *)i, 0, irq, powerpc_intr_pre_ithread, powerpc_intr_post_ithread, powerpc_intr_eoi, powerpc_assign_intr_cpu, "irq%u:", irq); if (error) return (error); enable = 1; } error = intr_event_add_handler(i->event, name, filter, handler, arg, intr_priority(flags), flags, cookiep); mtx_lock(&intr_table_lock); intrcnt_setname(i->event->ie_fullname, i->cntindex); mtx_unlock(&intr_table_lock); if (!cold) { error = powerpc_map_irq(i); if (!error) { if (i->trig == -1) PIC_TRANSLATE_CODE(i->pic, i->intline, i->fwcode, &i->trig, &i->pol); if (i->trig != INTR_TRIGGER_CONFORM || i->pol != INTR_POLARITY_CONFORM) PIC_CONFIG(i->pic, i->intline, i->trig, i->pol); if (i->pic == root_pic) PIC_BIND(i->pic, i->intline, i->cpu); if (enable) PIC_ENABLE(i->pic, i->intline, i->vector); } } return (error); } int powerpc_teardown_intr(void *cookie) { return (intr_event_remove_handler(cookie)); } #ifdef SMP int powerpc_bind_intr(u_int irq, u_char cpu) { struct powerpc_intr *i; i = intr_lookup(irq); if (i == NULL) return (ENOMEM); return (intr_event_bind(i->event, cpu)); } #endif int powerpc_fw_config_intr(int irq, int sense_code) { struct powerpc_intr *i; i = intr_lookup(irq); if (i == NULL) return (ENOMEM); i->trig = -1; i->pol = INTR_POLARITY_CONFORM; i->fwcode = sense_code; if (!cold && i->pic != NULL) { PIC_TRANSLATE_CODE(i->pic, i->intline, i->fwcode, &i->trig, &i->pol); PIC_CONFIG(i->pic, i->intline, i->trig, i->pol); } return (0); } int powerpc_config_intr(int irq, enum intr_trigger trig, enum intr_polarity pol) { struct powerpc_intr *i; i = intr_lookup(irq); if (i == NULL) return (ENOMEM); i->trig = trig; i->pol = pol; if (!cold && i->pic != NULL) PIC_CONFIG(i->pic, i->intline, trig, pol); return (0); } void powerpc_dispatch_intr(u_int vector, struct trapframe *tf) { struct powerpc_intr *i; struct intr_event *ie; i = powerpc_intrs[vector]; if (i == NULL) goto stray; (*i->cntp)++; ie = i->event; KASSERT(ie != NULL, ("%s: interrupt without an event", __func__)); /* * IPIs are magical and need to be EOI'ed before filtering. * This prevents races in IPI handling. */ if (i->ipi) PIC_EOI(i->pic, i->intline); if (intr_event_handle(ie, tf) != 0) { goto stray; } return; stray: stray_count++; if (stray_count <= MAX_STRAY_LOG) { printf("stray irq %d\n", i ? i->irq : -1); if (stray_count >= MAX_STRAY_LOG) { printf("got %d stray interrupts, not logging anymore\n", MAX_STRAY_LOG); } } if (i != NULL) PIC_MASK(i->pic, i->intline); } diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c index dea3461ad19e..0913a6078388 100644 --- a/sys/powerpc/powerpc/machdep.c +++ b/sys/powerpc/powerpc/machdep.c @@ -1,504 +1,505 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __powerpc64__ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int cold = 1; #ifdef __powerpc64__ int cacheline_size = 128; #else int cacheline_size = 32; #endif int hw_direct_map = 1; extern void *ap_pcpu; struct pcpu __pcpu[MAXCPU]; static struct trapframe frame0; char machine[] = "powerpc"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size, CTLFLAG_RD, &cacheline_size, 0, ""); uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *); long Maxmem = 0; long realmem = 0; struct kva_md_info kmi; static void cpu_startup(void *dummy) { /* * Initialise the decrementer-based clock. */ decr_init(); /* * Good {morning,afternoon,evening,night}. */ cpu_setup(PCPU_GET(cpuid)); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)physmem), ptoa((uintmax_t)physmem) / 1048576); realmem = physmem; if (bootverbose) printf("available KVA = %zu (%zu MB)\n", virtual_end - virtual_avail, (virtual_end - virtual_avail) / 1048576); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size1 = phys_avail[indx + 1] - phys_avail[indx]; #ifdef __powerpc64__ printf("0x%016jx - 0x%016jx, %jd bytes (%jd pages)\n", #else printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n", #endif (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } extern vm_offset_t __startkernel, __endkernel; extern unsigned char __bss_start[]; extern unsigned char __sbss_start[]; extern unsigned char __sbss_end[]; extern unsigned char _end[]; void aim_cpu_init(vm_offset_t toc); void booke_cpu_init(void); uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp) { struct pcpu *pc; vm_offset_t startkernel, endkernel; void *kmdp; char *env; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif kmdp = NULL; /* First guess at start/end kernel positions */ startkernel = __startkernel; endkernel = __endkernel; /* Check for ePAPR loader, which puts a magic value into r6 */ if (mdp == (void *)0x65504150) mdp = NULL; /* * Parse metadata if present and fetch parameters. Must be done * before console is inited so cninit gets the right value of * boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), + 0); endkernel = ulmax(endkernel, MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t)); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif } } else { bzero(__sbss_start, __sbss_end - __sbss_start); bzero(__bss_start, _end - __bss_start); } #ifdef BOOKE tlb1_init(); #endif /* Store boot environment state */ OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry); /* * Init params/tunables that can be overridden by the loader */ init_param1(); /* * Start initializing proc0 and thread0. */ proc_linkup0(&proc0, &thread0); thread0.td_frame = &frame0; /* * Set up per-cpu data. */ pc = __pcpu; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; #ifdef __powerpc64__ __asm __volatile("mr 13,%0" :: "r"(pc->pc_curthread)); #else __asm __volatile("mr 2,%0" :: "r"(pc->pc_curthread)); #endif pc->pc_cpuid = 0; __asm __volatile("mtsprg 0, %0" :: "r"(pc)); /* * Init mutexes, which we use heavily in PMAP */ mutex_init(); /* * Install the OF client interface */ OF_bootstrap(); /* * Initialize the console before printing anything. */ cninit(); /* * Complain if there is no metadata. */ if (mdp == NULL || kmdp == NULL) { printf("powerpc_init: no loader metadata.\n"); } /* * Init KDB */ kdb_init(); #ifdef AIM aim_cpu_init(toc); #else /* BOOKE */ booke_cpu_init(); /* Make sure the kernel icache is valid before we go too much further */ __syncicache((caddr_t)startkernel, endkernel - startkernel); #endif /* * Choose a platform module so we can get the physical memory map. */ platform_probe_and_attach(); /* * Bring up MMU */ pmap_bootstrap(startkernel, endkernel); mtmsr(PSL_KERNSET & ~PSL_EE); /* * Initialize params/tunables that are derived from memsize */ init_param2(physmem); /* * Grab booted kernel's name */ env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Finish setting up thread0. */ thread0.td_pcb = (struct pcb *) ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~15UL); bzero((void *)thread0.td_pcb, sizeof(struct pcb)); pc->pc_curpcb = thread0.td_pcb; /* Initialise the message buffer. */ msgbufinit(msgbufp, msgbufsize); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif return (((uintptr_t)thread0.td_pcb - (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL); } void bzero(void *buf, size_t len) { caddr_t p; p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { register_t addr, off; /* * Align the address to a cacheline and adjust the length * accordingly. Then round the length to a multiple of the * cacheline for easy looping. */ addr = (uintptr_t)ptr; off = addr & (cacheline_size - 1); addr -= off; len = (len + off + cacheline_size - 1) & ~(cacheline_size - 1); while (len > 0) { __asm __volatile ("dcbf 0,%0" :: "r"(addr)); __asm __volatile ("sync"); addr += cacheline_size; len -= cacheline_size; } } int ptrace_set_pc(struct thread *td, unsigned long addr) { struct trapframe *tf; tf = td->td_frame; tf->srr0 = (register_t)addr; return (0); } void spinlock_enter(void) { struct thread *td; register_t msr; td = curthread; if (td->td_md.md_spinlock_count == 0) { __asm __volatile("or 2,2,2"); /* Set high thread priority */ msr = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_msr = msr; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t msr; td = curthread; critical_exit(); msr = td->td_md.md_saved_msr; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) { intr_restore(msr); __asm __volatile("or 6,6,6"); /* Set normal thread priority */ } } diff --git a/sys/sparc64/include/ofw_machdep.h b/sys/sparc64/include/ofw_machdep.h index bc69b3b3773c..fcacc10d446d 100644 --- a/sys/sparc64/include/ofw_machdep.h +++ b/sys/sparc64/include/ofw_machdep.h @@ -1,45 +1,48 @@ /*- * Copyright (c) 2001 by Thomas Moestl . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_OFW_MACHDEP_H_ #define _MACHINE_OFW_MACHDEP_H_ #include #include #include typedef uint64_t cell_t; +/* sparc64 doesn't use the interrupt parent #address-cells in interrupt maps */ +#define OFW_IMAP_NO_IPARENT_ADDR_CELLS + int OF_decode_addr(phandle_t, int, int *, bus_addr_t *); void OF_getetheraddr(device_t, u_char *); u_int OF_getscsinitid(device_t); void OF_panic(const char *fmt, ...) __dead2 __printflike(1, 2); void cpu_shutdown(void *) __dead2; int ofw_entry(void *); void ofw_exit(void *) __dead2; #endif /* _MACHINE_OFW_MACHDEP_H_ */ diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index 95b10e3f1573..d3cd7a248543 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -1,1111 +1,1112 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: FreeBSD: src/sys/i386/i386/machdep.c,v 1.477 2001/08/27 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef int ofw_vec_t(void *); int dtlb_slots; int itlb_slots; struct tlb_entry *kernel_tlbs; int kernel_tlb_slots; int cold = 1; long Maxmem; long realmem; void *dpcpu0; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; vm_offset_t kstack0; vm_paddr_t kstack0_phys; struct kva_md_info kmi; u_long ofw_vec; u_long ofw_tba; u_int tba_taken_over; char sparc64_model[32]; static int cpu_use_vis = 1; cpu_block_copy_t *cpu_block_copy; cpu_block_zero_t *cpu_block_zero; static phandle_t find_bsp(phandle_t node, uint32_t bspid, u_int cpu_impl); void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec); static void sparc64_shutdown_final(void *dummy, int howto); static void cpu_startup(void *arg); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); CTASSERT((1 << INT_SHIFT) == sizeof(int)); CTASSERT((1 << PTR_SHIFT) == sizeof(char *)); CTASSERT(sizeof(struct reg) == 256); CTASSERT(sizeof(struct fpreg) == 272); CTASSERT(sizeof(struct __mcontext) == 512); CTASSERT((sizeof(struct pcb) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_kfp) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_ufp) & (64 - 1)) == 0); CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8)); CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2)); static void cpu_startup(void *arg) { vm_paddr_t physsz; int i; physsz = 0; for (i = 0; i < sparc64_nmemreg; i++) physsz += sparc64_memreg[i].mr_size; printf("real memory = %lu (%lu MB)\n", physsz, physsz / (1024 * 1024)); realmem = (long)physsz / PAGE_SIZE; vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE, vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); cpu_identify(rdpr(ver), PCPU_GET(clock), curcpu); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { struct intr_request *ir; int i; pcpu->pc_irtail = &pcpu->pc_irhead; for (i = 0; i < IR_FREE; i++) { ir = &pcpu->pc_irpool[i]; ir->ir_next = pcpu->pc_irfree; pcpu->pc_irfree = ir; } } void spinlock_enter(void) { struct thread *td; register_t pil; td = curthread; if (td->td_md.md_spinlock_count == 0) { pil = rdpr(pil); wrpr(pil, 0, PIL_TICK); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_pil = pil; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t pil; td = curthread; critical_exit(); pil = td->td_md.md_saved_pil; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) wrpr(pil, pil, 0); } static phandle_t find_bsp(phandle_t node, uint32_t bspid, u_int cpu_impl) { char type[sizeof("cpu")]; phandle_t child; uint32_t portid; for (; node != 0; node = OF_peer(node)) { child = OF_child(node); if (child > 0) { child = find_bsp(child, bspid, cpu_impl); if (child > 0) return (child); } else { if (OF_getprop(node, "device_type", type, sizeof(type)) <= 0) continue; if (strcmp(type, "cpu") != 0) continue; if (OF_getprop(node, cpu_portid_prop(cpu_impl), &portid, sizeof(portid)) <= 0) continue; if (portid == bspid) return (node); } } return (0); } const char * cpu_portid_prop(u_int cpu_impl) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: return ("upa-portid"); case CPU_IMPL_ULTRASPARCIII: case CPU_IMPL_ULTRASPARCIIIp: case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIIIip: return ("portid"); case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: return ("cpuid"); default: return (""); } } uint32_t cpu_get_mid(u_int cpu_impl) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: return (UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG))); case CPU_IMPL_ULTRASPARCIII: case CPU_IMPL_ULTRASPARCIIIp: return (FIREPLANE_CR_GET_AID(ldxa(AA_FIREPLANE_CONFIG, ASI_FIREPLANE_CONFIG_REG))); case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIIIip: return (JBUS_CR_GET_JID(ldxa(0, ASI_JBUS_CONFIG_REG))); case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: return (INTR_ID_GET_ID(ldxa(AA_INTR_ID, ASI_INTR_ID))); default: return (0); } } void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) { char *env; struct pcpu *pc; vm_offset_t end; vm_offset_t va; caddr_t kmdp; phandle_t root; u_int cpu_impl; end = 0; kmdp = NULL; /* * Find out what kind of CPU we have first, for anything that changes * behaviour. */ cpu_impl = VER_IMPL(rdpr(ver)); /* * Do CPU-specific initialization. */ if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) cheetah_init(cpu_impl); else if (cpu_impl == CPU_IMPL_SPARC64V) zeus_init(cpu_impl); /* * Clear (S)TICK timer (including NPT). */ tick_clear(cpu_impl); /* * UltraSparc II[e,i] based systems come up with the tick interrupt * enabled and a handler that resets the tick counter, causing DELAY() * to not work properly when used early in boot. * UltraSPARC III based systems come up with the system tick interrupt * enabled, causing an interrupt storm on startup since they are not * handled. */ tick_stop(cpu_impl); /* * Set up Open Firmware entry points. */ ofw_tba = rdpr(tba); ofw_vec = (u_long)vec; /* * Parse metadata if present and fetch parameters. Must be before the * console is inited so cninit() gets the right value of boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), + 0); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); kernel_tlb_slots = MD_FETCH(kmdp, MODINFOMD_DTLB_SLOTS, int); kernel_tlbs = (void *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_DTLB); } } init_param1(); /* * Initialize Open Firmware (needed for console). */ OF_install(OFW_STD_DIRECT, 0); OF_init(ofw_entry); /* * Prime our per-CPU data page for use. Note, we are using it for * our stack, so don't pass the real size (PAGE_SIZE) to pcpu_init * or it'll zero it out from under us. */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_addr = (vm_offset_t)pcpu0; pc->pc_impl = cpu_impl; pc->pc_mid = cpu_get_mid(cpu_impl); pc->pc_tlb_ctx = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; /* * Determine the OFW node and frequency of the BSP (and ensure the * BSP is in the device tree in the first place). */ root = OF_peer(0); pc->pc_node = find_bsp(root, pc->pc_mid, cpu_impl); if (pc->pc_node == 0) OF_panic("%s: cannot find boot CPU node", __func__); if (OF_getprop(pc->pc_node, "clock-frequency", &pc->pc_clock, sizeof(pc->pc_clock)) <= 0) OF_panic("%s: cannot determine boot CPU clock", __func__); /* * Panic if there is no metadata. Most likely the kernel was booted * directly, instead of through loader(8). */ if (mdp == NULL || kmdp == NULL || end == 0 || kernel_tlb_slots == 0 || kernel_tlbs == NULL) OF_panic("%s: missing loader metadata.\nThis probably means " "you are not using loader(8).", __func__); /* * Work around the broken loader behavior of not demapping no * longer used kernel TLB slots when unloading the kernel or * modules. */ for (va = KERNBASE + (kernel_tlb_slots - 1) * PAGE_SIZE_4M; va >= roundup2(end, PAGE_SIZE_4M); va -= PAGE_SIZE_4M) { if (bootverbose) OF_printf("demapping unused kernel TLB slot " "(va %#lx - %#lx)\n", va, va + PAGE_SIZE_4M - 1); stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0); stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); flush(KERNBASE); kernel_tlb_slots--; } /* * Determine the TLB slot maxima, which are expected to be * equal across all CPUs. * NB: for cheetah-class CPUs, these properties only refer * to the t16s. */ if (OF_getprop(pc->pc_node, "#dtlb-entries", &dtlb_slots, sizeof(dtlb_slots)) == -1) OF_panic("%s: cannot determine number of dTLB slots", __func__); if (OF_getprop(pc->pc_node, "#itlb-entries", &itlb_slots, sizeof(itlb_slots)) == -1) OF_panic("%s: cannot determine number of iTLB slots", __func__); /* * Initialize and enable the caches. Note that this may include * applying workarounds. */ cache_init(pc); cache_enable(cpu_impl); uma_set_align(pc->pc_cache.dc_linesize - 1); cpu_block_copy = bcopy; cpu_block_zero = bzero; getenv_int("machdep.use_vis", &cpu_use_vis); if (cpu_use_vis) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: case CPU_IMPL_ULTRASPARCIII: /* NB: we've disabled P$. */ case CPU_IMPL_ULTRASPARCIIIp: case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: case CPU_IMPL_ULTRASPARCIIIip: cpu_block_copy = spitfire_block_copy; cpu_block_zero = spitfire_block_zero; break; case CPU_IMPL_SPARC64V: cpu_block_copy = zeus_block_copy; cpu_block_zero = zeus_block_zero; break; } } #ifdef SMP mp_init(); #endif /* * Initialize virtual memory and calculate physmem. */ pmap_bootstrap(cpu_impl); /* * Initialize tunables. */ init_param2(physmem); env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Initialize the interrupt tables. */ intr_init1(); /* * Initialize proc0, set kstack0, frame0, curthread and curpcb. */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; thread0.td_kstack = kstack0; thread0.td_kstack_pages = KSTACK_PAGES; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; /* * Initialize global registers. */ cpu_setregs(pc); /* * Take over the trap table via the PROM. Using the PROM for this * is necessary in order to set obp-control-relinquished to true * within the PROM so obtaining /virtual-memory/translations doesn't * trigger a fatal reset error or worse things further down the road. * XXX it should be possible to use this solely instead of writing * %tba in cpu_setregs(). Doing so causes a hang however. * * NB: the low-level console drivers require a working DELAY() and * some compiler optimizations may cause the curthread accesses of * mutex(9) to be factored out even if the latter aren't actually * called. Both of these require PCPU_REG to be set. However, we * can't set PCPU_REG without also taking over the trap table or the * firmware will overwrite it. */ sun4u_set_traptable(tl0_base); /* * Initialize the dynamic per-CPU area for the BSP and the message * buffer (after setting the trap table). */ dpcpu_init(dpcpu0, 0); msgbufinit(msgbufp, msgbufsize); /* * Initialize mutexes. */ mutex_init(); /* * Initialize console now that we have a reasonable set of system * services. */ cninit(); /* * Finish the interrupt initialization now that mutexes work and * enable them. */ intr_init2(); wrpr(pil, 0, 0); wrpr(pstate, 0, PSTATE_KERNEL); OF_getprop(root, "name", sparc64_model, sizeof(sparc64_model) - 1); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct frame *fp; struct proc *p; u_long sp; int oonstack; int sig; oonstack = 0; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_sp + SPOFF; oonstack = sigonstack(sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Make sure we have a signal trampoline to return to. */ if (p->p_md.md_sigtramp == NULL) { /* * No signal trampoline... kill the process. */ CTR0(KTR_SIG, "sendsig: no sigtramp"); printf("sendsig: %s is too old, rebuild it\n", p->p_comm); sigexit(td, sig); /* NOTREACHED */ } /* Save user context. */ bzero(&sf, sizeof(sf)); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)); } else sfp = (struct sigframe *)sp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); fp = (struct frame *)sfp - 1; /* Build the argument list for the signal handler. */ tf->tf_out[0] = sig; tf->tf_out[2] = (register_t)&sfp->sf_uc; tf->tf_out[4] = (register_t)catcher; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ tf->tf_out[1] = (register_t)&sfp->sf_si; /* Fill in POSIX parts. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ tf->tf_out[1] = ksi->ksi_code; tf->tf_out[3] = (register_t)ksi->ksi_addr; } /* Copy the sigframe out to the user's stack. */ if (rwindow_save(td) != 0 || copyout(&sf, sfp, sizeof(*sfp)) != 0 || suword(&fp->fr_in[6], tf->tf_out[6]) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } tf->tf_tpc = (u_long)p->p_md.md_sigtramp; tf->tf_tnpc = tf->tf_tpc + 4; tf->tf_sp = (u_long)fp - SPOFF; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#lx sp=%#lx", td, tf->tf_tpc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif /* * MPSAFE */ int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; mcontext_t *mc; ucontext_t uc; int error; p = td->td_proc; if (rwindow_save(td)) { PROC_LOCK(p); sigexit(td, SIGILL); } CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } mc = &uc.uc_mcontext; error = set_mcontext(td, mc); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR4(KTR_SIG, "sigreturn: return td=%p pc=%#lx sp=%#lx tstate=%#lx", td, mc->_mc_tpc, mc->_mc_sp, mc->_mc_tstate); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_pc = tf->tf_tpc; pcb->pcb_sp = tf->tf_sp; } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; /* * Copy the registers which will be restored by tl0_ret() from the * trapframe. * Note that we skip %g7 which is used as the userland TLS register * and %wstate. */ mc->_mc_flags = _MC_VERSION; mc->mc_global[1] = tf->tf_global[1]; mc->mc_global[2] = tf->tf_global[2]; mc->mc_global[3] = tf->tf_global[3]; mc->mc_global[4] = tf->tf_global[4]; mc->mc_global[5] = tf->tf_global[5]; mc->mc_global[6] = tf->tf_global[6]; if (flags & GET_MC_CLEAR_RET) { mc->mc_out[0] = 0; mc->mc_out[1] = 0; } else { mc->mc_out[0] = tf->tf_out[0]; mc->mc_out[1] = tf->tf_out[1]; } mc->mc_out[2] = tf->tf_out[2]; mc->mc_out[3] = tf->tf_out[3]; mc->mc_out[4] = tf->tf_out[4]; mc->mc_out[5] = tf->tf_out[5]; mc->mc_out[6] = tf->tf_out[6]; mc->mc_out[7] = tf->tf_out[7]; mc->_mc_fprs = tf->tf_fprs; mc->_mc_fsr = tf->tf_fsr; mc->_mc_gsr = tf->tf_gsr; mc->_mc_tnpc = tf->tf_tnpc; mc->_mc_tpc = tf->tf_tpc; mc->_mc_tstate = tf->tf_tstate; mc->_mc_y = tf->tf_y; critical_enter(); if ((tf->tf_fprs & FPRS_FEF) != 0) { savefpctx(pcb->pcb_ufp); tf->tf_fprs &= ~FPRS_FEF; pcb->pcb_flags |= PCB_FEF; } if ((pcb->pcb_flags & PCB_FEF) != 0) { bcopy(pcb->pcb_ufp, mc->mc_fp, sizeof(mc->mc_fp)); mc->_mc_fprs |= FPRS_FEF; } critical_exit(); return (0); } int set_mcontext(struct thread *td, mcontext_t *mc) { struct trapframe *tf; struct pcb *pcb; if (!TSTATE_SECURE(mc->_mc_tstate) || (mc->_mc_flags & ((1L << _MC_VERSION_BITS) - 1)) != _MC_VERSION) return (EINVAL); tf = td->td_frame; pcb = td->td_pcb; /* Make sure the windows are spilled first. */ flushw(); /* * Copy the registers which will be restored by tl0_ret() to the * trapframe. * Note that we skip %g7 which is used as the userland TLS register * and %wstate. */ tf->tf_global[1] = mc->mc_global[1]; tf->tf_global[2] = mc->mc_global[2]; tf->tf_global[3] = mc->mc_global[3]; tf->tf_global[4] = mc->mc_global[4]; tf->tf_global[5] = mc->mc_global[5]; tf->tf_global[6] = mc->mc_global[6]; tf->tf_out[0] = mc->mc_out[0]; tf->tf_out[1] = mc->mc_out[1]; tf->tf_out[2] = mc->mc_out[2]; tf->tf_out[3] = mc->mc_out[3]; tf->tf_out[4] = mc->mc_out[4]; tf->tf_out[5] = mc->mc_out[5]; tf->tf_out[6] = mc->mc_out[6]; tf->tf_out[7] = mc->mc_out[7]; tf->tf_fprs = mc->_mc_fprs; tf->tf_fsr = mc->_mc_fsr; tf->tf_gsr = mc->_mc_gsr; tf->tf_tnpc = mc->_mc_tnpc; tf->tf_tpc = mc->_mc_tpc; tf->tf_tstate = mc->_mc_tstate; tf->tf_y = mc->_mc_y; if ((mc->_mc_fprs & FPRS_FEF) != 0) { tf->tf_fprs = 0; bcopy(mc->mc_fp, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); pcb->pcb_flags |= PCB_FEF; } return (0); } /* * Exit the kernel and execute a firmware call that will not return, as * specified by the arguments. */ void cpu_shutdown(void *args) { #ifdef SMP cpu_mp_shutdown(); #endif ofw_exit(args); } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { struct pcpu *pc; pc = pcpu_find(cpu_id); if (pc == NULL || rate == NULL) return (EINVAL); *rate = pc->pc_clock; return (0); } /* * Duplicate OF_exit() with a different firmware call function that restores * the trap table, otherwise a RED state exception is triggered in at least * some firmware versions. */ void cpu_halt(void) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"exit", 0, 0 }; cpu_shutdown(&args); } static void sparc64_shutdown_final(void *dummy, int howto) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"SUNW,power-off", 0, 0 }; /* Turn the power off? */ if ((howto & RB_POWEROFF) != 0) cpu_shutdown(&args); /* In case of halt, return to the firmware. */ if ((howto & RB_HALT) != 0) cpu_halt(); } void cpu_idle(int busy) { /* Insert code to halt (until next interrupt) for the idle loop. */ } int cpu_idle_wakeup(int cpu) { return (1); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_tpc = addr; td->td_frame->tf_tnpc = addr + 4; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; struct pcb *pcb; struct proc *p; u_long sp; /* XXX no cpu_exec */ p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } pcb = td->td_pcb; tf = td->td_frame; sp = rounddown(stack, 16); bzero(pcb, sizeof(*pcb)); bzero(tf, sizeof(*tf)); tf->tf_out[0] = stack; tf->tf_out[3] = p->p_sysent->sv_psstrings; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tnpc = imgp->entry_addr + 4; tf->tf_tpc = imgp->entry_addr; /* * While we could adhere to the memory model indicated in the ELF * header, it turns out that just always using TSO performs best. */ tf->tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_MM_TSO; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int fill_regs(struct thread *td, struct reg *regs) { bcopy(td->td_frame, regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; if (!TSTATE_SECURE(regs->r_tstate)) return (EINVAL); tf = td->td_frame; regs->r_wstate = tf->tf_wstate; bcopy(regs, tf, sizeof(*regs)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; bcopy(pcb->pcb_ufp, fpregs->fr_regs, sizeof(fpregs->fr_regs)); fpregs->fr_fsr = tf->tf_fsr; fpregs->fr_gsr = tf->tf_gsr; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs &= ~FPRS_FEF; bcopy(fpregs->fr_regs, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); tf->tf_fsr = fpregs->fr_fsr; tf->tf_gsr = fpregs->fr_gsr; return (0); } struct md_utrap * utrap_alloc(void) { struct md_utrap *ut; ut = malloc(sizeof(struct md_utrap), M_SUBPROC, M_WAITOK | M_ZERO); ut->ut_refcnt = 1; return (ut); } void utrap_free(struct md_utrap *ut) { int refcnt; if (ut == NULL) return; mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt--; refcnt = ut->ut_refcnt; mtx_pool_unlock(mtxpool_sleep, ut); if (refcnt == 0) free(ut, M_SUBPROC); } struct md_utrap * utrap_hold(struct md_utrap *ut) { if (ut == NULL) return (NULL); mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt++; mtx_pool_unlock(mtxpool_sleep, ut); return (ut); } diff --git a/sys/x86/xen/pv.c b/sys/x86/xen/pv.c index 58eb7363f9b4..1cea294ae825 100644 --- a/sys/x86/xen/pv.c +++ b/sys/x86/xen/pv.c @@ -1,438 +1,441 @@ /* * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2013 Roger Pau Monné * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* Native initial function */ extern u_int64_t hammer_time(u_int64_t, u_int64_t); /* Xen initial function */ uint64_t hammer_time_xen(start_info_t *, uint64_t); #define MAX_E820_ENTRIES 128 /*--------------------------- Forward Declarations ---------------------------*/ static caddr_t xen_pv_parse_preload_data(u_int64_t); static void xen_pv_parse_memmap(caddr_t, vm_paddr_t *, int *); #ifdef SMP static int xen_pv_start_all_aps(void); #endif /*---------------------------- Extern Declarations ---------------------------*/ #ifdef SMP /* Variables used by amd64 mp_machdep to start APs */ extern char *doublefault_stack; extern char *nmi_stack; #endif /* * Placed by the linker at the end of the bss section, which is the last * section loaded by Xen before loading the symtab and strtab. */ extern uint32_t end; /*-------------------------------- Global Data -------------------------------*/ /* Xen init_ops implementation. */ struct init_ops xen_init_ops = { .parse_preload_data = xen_pv_parse_preload_data, .early_clock_source_init = xen_clock_init, .early_delay = xen_delay, .parse_memmap = xen_pv_parse_memmap, #ifdef SMP .start_all_aps = xen_pv_start_all_aps, #endif .msi_init = xen_msi_init, }; static struct bios_smap xen_smap[MAX_E820_ENTRIES]; /*-------------------------------- Xen PV init -------------------------------*/ /* * First function called by the Xen PVH boot sequence. * * Set some Xen global variables and prepare the environment so it is * as similar as possible to what native FreeBSD init function expects. */ uint64_t hammer_time_xen(start_info_t *si, uint64_t xenstack) { uint64_t physfree; uint64_t *PT4 = (u_int64_t *)xenstack; uint64_t *PT3 = (u_int64_t *)(xenstack + PAGE_SIZE); uint64_t *PT2 = (u_int64_t *)(xenstack + 2 * PAGE_SIZE); int i; xen_domain_type = XEN_PV_DOMAIN; vm_guest = VM_GUEST_XEN; if ((si == NULL) || (xenstack == 0)) { xc_printf("ERROR: invalid start_info or xen stack, halting\n"); HYPERVISOR_shutdown(SHUTDOWN_crash); } xc_printf("FreeBSD PVH running on %s\n", si->magic); /* We use 3 pages of xen stack for the boot pagetables */ physfree = xenstack + 3 * PAGE_SIZE - KERNBASE; /* Setup Xen global variables */ HYPERVISOR_start_info = si; HYPERVISOR_shared_info = (shared_info_t *)(si->shared_info + KERNBASE); /* * Setup some misc global variables for Xen devices * * XXX: Devices that need these specific variables should * be rewritten to fetch this info by themselves from the * start_info page. */ xen_store = (struct xenstore_domain_interface *) (ptoa(si->store_mfn) + KERNBASE); console_page = (char *)(ptoa(si->console.domU.mfn) + KERNBASE); /* * Use the stack Xen gives us to build the page tables * as native FreeBSD expects to find them (created * by the boot trampoline). */ for (i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ PT4[i] = ((uint64_t)&PT3[0]) - KERNBASE; PT4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ PT3[i] = ((uint64_t)&PT2[0]) - KERNBASE; PT3[i] |= PG_V | PG_RW | PG_U; /* * The level 2 page slots are mapped with * 2MB pages for 1GB. */ PT2[i] = i * (2 * 1024 * 1024); PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; } load_cr3(((uint64_t)&PT4[0]) - KERNBASE); /* Set the hooks for early functions that diverge from bare metal */ init_ops = xen_init_ops; apic_ops = xen_apic_ops; /* Now we can jump into the native init function */ return (hammer_time(0, physfree)); } /*-------------------------------- PV specific -------------------------------*/ #ifdef SMP static bool start_xen_ap(int cpu) { struct vcpu_guest_context *ctxt; int ms, cpus = mp_naps; const size_t stacksize = kstack_pages * PAGE_SIZE; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, stacksize, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; bootAP = cpu; ctxt = malloc(sizeof(*ctxt), M_TEMP, M_WAITOK | M_ZERO); if (ctxt == NULL) panic("unable to allocate memory"); ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.rip = (unsigned long) init_secondary; ctxt->user_regs.rsp = (unsigned long) bootSTK; /* Set the AP to use the same page tables */ ctxt->ctrlreg[3] = KPML4phys; if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) panic("unable to initialize AP#%d", cpu); free(ctxt, M_TEMP); /* Launch the vCPU */ if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) panic("unable to start AP#%d", cpu); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return (true); DELAY(1000); } return (false); } static int xen_pv_start_all_aps(void) { int cpu; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); for (cpu = 1; cpu < mp_ncpus; cpu++) { /* attempt to start the Application Processor */ if (!start_xen_ap(cpu)) panic("AP #%d failed to start!", cpu); CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } return (mp_naps); } #endif /* SMP */ /* * Functions to convert the "extra" parameters passed by Xen * into FreeBSD boot options. */ static void xen_pv_set_env(void) { char *cmd_line_next, *cmd_line; size_t env_size; cmd_line = HYPERVISOR_start_info->cmd_line; env_size = sizeof(HYPERVISOR_start_info->cmd_line); /* Skip leading spaces */ for (; isspace(*cmd_line) && (env_size != 0); cmd_line++) env_size--; /* Replace ',' with '\0' */ for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;) ; - init_static_kenv(cmd_line, env_size); + init_static_kenv(cmd_line, 0); } static void xen_pv_set_boothowto(void) { int i; char *env; /* get equivalents from the environment */ for (i = 0; howto_names[i].ev != NULL; i++) { if ((env = kern_getenv(howto_names[i].ev)) != NULL) { boothowto |= howto_names[i].mask; freeenv(env); } } } #ifdef DDB /* * The way Xen loads the symtab is different from the native boot loader, * because it's tailored for NetBSD. So we have to adapt and use the same * method as NetBSD. Portions of the code below have been picked from NetBSD: * sys/kern/kern_ksyms.c CVS Revision 1.71. */ static void xen_pv_parse_symtab(void) { Elf_Ehdr *ehdr; Elf_Shdr *shdr; vm_offset_t sym_end; uint32_t size; int i, j; size = end; sym_end = HYPERVISOR_start_info->mod_start != 0 ? HYPERVISOR_start_info->mod_start : HYPERVISOR_start_info->mfn_list; /* * Make sure the size is right headed, sym_end is just a * high boundary, but at least allows us to fail earlier. */ if ((vm_offset_t)&end + size > sym_end) { xc_printf("Unable to load ELF symtab: size mismatch\n"); return; } ehdr = (Elf_Ehdr *)(&end + 1); if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || ehdr->e_version > 1) { xc_printf("Unable to load ELF symtab: invalid symbol table\n"); return; } shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); /* Find the symbol table and the corresponding string table. */ for (i = 1; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; if (shdr[i].sh_offset == 0) continue; ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); ksymtab_size = shdr[i].sh_size; j = shdr[i].sh_link; if (shdr[j].sh_offset == 0) continue; /* Can this happen? */ kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); break; } if (ksymtab == 0 || kstrtab == 0) { xc_printf( "Unable to load ELF symtab: could not find symtab or strtab\n"); return; } } #endif static caddr_t xen_pv_parse_preload_data(u_int64_t modulep) { caddr_t kmdp; vm_ooffset_t off; vm_paddr_t metadata; + char *envp; if (HYPERVISOR_start_info->mod_start != 0) { preload_metadata = (caddr_t)(HYPERVISOR_start_info->mod_start); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); KASSERT(kmdp != NULL, ("unable to find kernel")); /* * Xen has relocated the metadata and the modules, * so we need to recalculate it's position. This is * done by saving the original modulep address and * then calculating the offset with mod_start, * which contains the relocated modulep address. */ metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); off = HYPERVISOR_start_info->mod_start - metadata; preload_bootstrap_relocate(off); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); - kern_envp += off; + envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + if (envp != NULL) + envp += off; + init_static_kenv(envp, 0); } else { /* Parse the extra boot information given by Xen */ xen_pv_set_env(); xen_pv_set_boothowto(); kmdp = NULL; } #ifdef DDB xen_pv_parse_symtab(); #endif return (kmdp); } static void xen_pv_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct xen_memory_map memmap; u_int32_t size; int rc; /* Fetch the E820 map from Xen */ memmap.nr_entries = MAX_E820_ENTRIES; set_xen_guest_handle(memmap.buffer, xen_smap); rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); if (rc) panic("unable to fetch Xen E820 memory map"); size = memmap.nr_entries * sizeof(xen_smap[0]); bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); } diff --git a/tools/regression/geom_mirror/conf.sh b/tools/regression/geom_mirror/conf.sh index 5e7e15a1d4d9..d8595f199bf3 100644 --- a/tools/regression/geom_mirror/conf.sh +++ b/tools/regression/geom_mirror/conf.sh @@ -1,8 +1,15 @@ #!/bin/sh # $FreeBSD$ name="$(mktemp -u mirror.XXXXXX)" class="mirror" base=`basename $0` +gmirror_test_cleanup() +{ + [ -c /dev/$class/$name ] && gmirror destroy $name + geom_test_cleanup +} +trap gmirror_test_cleanup ABRT EXIT INT TERM + . `dirname $0`/../geom_subr.sh diff --git a/tools/regression/geom_mirror/test-1.t b/tools/regression/geom_mirror/test-1.t index b07a8d1cb1de..af82a14730c2 100644 --- a/tools/regression/geom_mirror/test-1.t +++ b/tools/regression/geom_mirror/test-1.t @@ -1,34 +1,23 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..1" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` +us0=$(attach_md -t malloc -s 1M) || exit 1 +us1=$(attach_md -t malloc -s 2M) || exit 1 +us2=$(attach_md -t malloc -s 3M) || exit 1 -mdconfig -a -t malloc -s 1M -u $us0 || exit 1 -mdconfig -a -t malloc -s 2M -u $us1 || exit 1 -mdconfig -a -t malloc -s 3M -u $us2 || exit 1 - -gmirror label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gmirror label $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1 devwait # Size of created device should be 1MB - 512b. size=`diskinfo /dev/mirror/${name} | awk '{print $3}'` if [ $size -eq 1048064 ]; then echo "ok 1" else echo "not ok 1" fi - -gmirror remove $name md${us0} -gmirror remove $name md${us1} -gmirror remove $name md${us2} -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 diff --git a/tools/regression/geom_mirror/test-2.t b/tools/regression/geom_mirror/test-2.t index 3fb08224caf7..5605c44eb464 100644 --- a/tools/regression/geom_mirror/test-2.t +++ b/tools/regression/geom_mirror/test-2.t @@ -1,59 +1,52 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..4" balance="round-robin" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 +us0=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us1=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us2=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 -gmirror label -b $balance $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gmirror label -b $balance $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/mirror/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -dd if=/dev/md${us0} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/${us0} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -dd if=/dev/md${us1} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/${us1} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -dd if=/dev/md${us2} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/${us2} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi -gmirror remove $name md${us0} md${us1} md${us2} -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_mirror/test-3.t b/tools/regression/geom_mirror/test-3.t index 5bdd2ab354b8..b7f85255e076 100644 --- a/tools/regression/geom_mirror/test-3.t +++ b/tools/regression/geom_mirror/test-3.t @@ -1,70 +1,64 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..5" balance="round-robin" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 +us0=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us1=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us2=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 -gmirror label -b $balance $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gmirror label -b $balance $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/mirror/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -gmirror remove $name md${us0} +gmirror remove $name ${us0} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -gmirror remove $name md${us1} +gmirror remove $name ${us1} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -gmirror remove $name md${us2} +gmirror remove $name ${us2} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi # mirror/${name} should be removed. if [ -c /dev/${name} ]; then echo "not ok 5" else echo "ok 5" fi -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_mirror/test-4.t b/tools/regression/geom_mirror/test-4.t index 0786bdb1fadb..6efcc021ef65 100644 --- a/tools/regression/geom_mirror/test-4.t +++ b/tools/regression/geom_mirror/test-4.t @@ -1,70 +1,66 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..5" balance="load" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 +us0=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us1=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us2=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 -gmirror label -b $balance $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gmirror label -b $balance $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/mirror/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -gmirror remove $name md${us0} +gmirror remove $name ${us0} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -gmirror remove $name md${us1} +gmirror remove $name ${us1} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -gmirror remove $name md${us2} +gmirror remove $name ${us2} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi +gmirror destroy $name + # mirror/${name} should be removed. if [ -c /dev/${name} ]; then echo "not ok 5" else echo "ok 5" fi -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_mirror/test-5.t b/tools/regression/geom_mirror/test-5.t index 5b0a5ed9f23f..3a176b55531f 100644 --- a/tools/regression/geom_mirror/test-5.t +++ b/tools/regression/geom_mirror/test-5.t @@ -1,70 +1,64 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..5" balance="split" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=8192 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 +us0=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us1=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us2=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 -gmirror label -b $balance -s `expr $ddbs / 2` $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gmirror label -b $balance -s `expr $ddbs / 2` $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/mirror/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -gmirror remove $name md${us0} +gmirror remove $name ${us0} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -gmirror remove $name md${us1} +gmirror remove $name ${us1} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -gmirror remove $name md${us2} +gmirror remove $name ${us2} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi # mirror/${name} should be removed. if [ -c /dev/${name} ]; then echo "not ok 5" else echo "ok 5" fi -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_mirror/test-6.t b/tools/regression/geom_mirror/test-6.t index ec86d06f79e2..68036e9c8c6a 100644 --- a/tools/regression/geom_mirror/test-6.t +++ b/tools/regression/geom_mirror/test-6.t @@ -1,52 +1,45 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..2" balance="split" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=8192 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 +us0=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us1=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us2=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 -gmirror label -b $balance -s `expr $ddbs / 2` $name /dev/md${us0} /dev/md${us1} || exit 1 +gmirror label -b $balance -s `expr $ddbs / 2` $name /dev/${us0} /dev/${us1} || exit 1 devwait dd if=${src} of=/dev/mirror/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -dd if=/dev/zero of=/dev/md${us2} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/zero of=/dev/${us2} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi # Connect disk to the mirror. -gmirror insert ${name} md${us2} +gmirror insert ${name} ${us2} # Wait for synchronization. sleep 1 -dd if=/dev/md${us2} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 +dd if=/dev/${us2} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -gmirror remove $name md${us0} md${us1} md${us2} -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_mirror/test-7.t b/tools/regression/geom_mirror/test-7.t index c911c03c3df1..f5bf71ab7a3b 100644 --- a/tools/regression/geom_mirror/test-7.t +++ b/tools/regression/geom_mirror/test-7.t @@ -1,70 +1,64 @@ #!/bin/sh # $FreeBSD$ . `dirname $0`/conf.sh echo "1..5" balance="prefer" -us0=45 -us1=`expr $us0 + 1` -us2=`expr $us0 + 2` ddbs=2048 nblocks1=1024 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)` -src=`mktemp /tmp/$base.XXXXXX` || exit 1 -dst=`mktemp /tmp/$base.XXXXXX` || exit 1 +src=`mktemp $base.XXXXXX` || exit 1 +dst=`mktemp $base.XXXXXX` || exit 1 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1 -mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1 +us0=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us1=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 +us2=$(attach_md -t malloc -s `expr $nblocks1 + 1`) || exit 1 -gmirror label -b $balance $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1 +gmirror label -b $balance $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1 devwait dd if=${src} of=/dev/mirror/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 1" else echo "ok 1" fi -gmirror remove $name md${us0} +gmirror remove $name ${us0} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 2" else echo "ok 2" fi -gmirror remove $name md${us1} +gmirror remove $name ${us1} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 3" else echo "ok 3" fi -gmirror remove $name md${us2} +gmirror remove $name ${us2} dd if=/dev/mirror/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then echo "not ok 4" else echo "ok 4" fi # mirror/${name} should be removed. if [ -c /dev/${name} ]; then echo "not ok 5" else echo "ok 5" fi -mdconfig -d -u $us0 -mdconfig -d -u $us1 -mdconfig -d -u $us2 rm -f ${src} ${dst} diff --git a/tools/regression/geom_subr.sh b/tools/regression/geom_subr.sh index 0ffb8c8c7049..58dc31dd5b43 100644 --- a/tools/regression/geom_subr.sh +++ b/tools/regression/geom_subr.sh @@ -1,48 +1,49 @@ #!/bin/sh # $FreeBSD$ if [ $(id -u) -ne 0 ]; then echo 'Tests must be run as root' echo 'Bail out!' exit 1 fi kldstat -q -m g_${class} || geom ${class} load || exit 1 devwait() { while :; do if [ -c /dev/${class}/${name} ]; then return fi sleep 0.2 done } # Need to keep track of the test md devices to avoid the scenario where a test # failing will cause the other tests to bomb out, or a test failing will leave # a large number of md(4) devices lingering around : ${TMPDIR=/tmp} export TMPDIR -TEST_MDS_FILE=${TMPDIR}/test_mds +TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX) || exit 1 attach_md() { local test_md test_md=$(mdconfig -a "$@") || exit echo $test_md >> $TEST_MDS_FILE || exit echo $test_md } geom_test_cleanup() { local test_md if [ -f $TEST_MDS_FILE ]; then while read test_md; do # The "#" tells the TAP parser this is a comment echo "# Removing test memory disk: $test_md" mdconfig -d -u $test_md done < $TEST_MDS_FILE fi + rm -f $TEST_MDS_FILE } diff --git a/usr.sbin/gssd/gssd.c b/usr.sbin/gssd/gssd.c index e85dfd7676e6..f3e5ce969edb 100644 --- a/usr.sbin/gssd/gssd.c +++ b/usr.sbin/gssd/gssd.c @@ -1,1290 +1,1292 @@ /*- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_KERBEROS #include #endif #include #include #include #include #include #include #include #include #include #include #include "gssd.h" #ifndef _PATH_GSS_MECH #define _PATH_GSS_MECH "/etc/gss/mech" #endif #ifndef _PATH_GSSDSOCK #define _PATH_GSSDSOCK "/var/run/gssd.sock" #endif #define GSSD_CREDENTIAL_CACHE_FILE "/tmp/krb5cc_gssd" struct gss_resource { LIST_ENTRY(gss_resource) gr_link; uint64_t gr_id; /* indentifier exported to kernel */ void* gr_res; /* GSS-API resource pointer */ }; LIST_HEAD(gss_resource_list, gss_resource) gss_resources; int gss_resource_count; uint32_t gss_next_id; uint32_t gss_start_time; int debug_level; static char ccfile_dirlist[PATH_MAX + 1], ccfile_substring[NAME_MAX + 1]; static char pref_realm[1024]; static int verbose; static int use_old_des; static int hostbased_initiator_cred; #ifndef WITHOUT_KERBEROS /* 1.2.752.43.13.14 */ static gss_OID_desc gss_krb5_set_allowable_enctypes_x_desc = {6, (void *) "\x2a\x85\x70\x2b\x0d\x0e"}; static gss_OID GSS_KRB5_SET_ALLOWABLE_ENCTYPES_X = &gss_krb5_set_allowable_enctypes_x_desc; static gss_OID_desc gss_krb5_mech_oid_x_desc = {9, (void *) "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" }; static gss_OID GSS_KRB5_MECH_OID_X = &gss_krb5_mech_oid_x_desc; #endif static void gssd_load_mech(void); static int find_ccache_file(const char *, uid_t, char *); static int is_a_valid_tgt_cache(const char *, uid_t, int *, time_t *); static void gssd_verbose_out(const char *, ...); #ifndef WITHOUT_KERBEROS static krb5_error_code gssd_get_cc_from_keytab(const char *); static OM_uint32 gssd_get_user_cred(OM_uint32 *, uid_t, gss_cred_id_t *); #endif void gssd_terminate(int); extern void gssd_1(struct svc_req *rqstp, SVCXPRT *transp); extern int gssd_syscall(char *path); int main(int argc, char **argv) { /* * We provide an RPC service on a local-domain socket. The * kernel's GSS-API code will pass what it can't handle * directly to us. */ struct sockaddr_un sun; int fd, oldmask, ch, debug; SVCXPRT *xprt; /* * Initialize the credential cache file name substring and the * search directory list. */ strlcpy(ccfile_substring, "krb5cc_", sizeof(ccfile_substring)); ccfile_dirlist[0] = '\0'; pref_realm[0] = '\0'; debug = 0; verbose = 0; while ((ch = getopt(argc, argv, "dhovs:c:r:")) != -1) { switch (ch) { case 'd': debug_level++; break; case 'h': #ifndef WITHOUT_KERBEROS /* * Enable use of a host based initiator credential * in the default keytab file. */ hostbased_initiator_cred = 1; #else errx(1, "This option not available when built" " without MK_KERBEROS\n"); #endif break; case 'o': #ifndef WITHOUT_KERBEROS /* * Force use of DES and the old type of GSSAPI token. */ use_old_des = 1; #else errx(1, "This option not available when built" " without MK_KERBEROS\n"); #endif break; case 'v': verbose = 1; break; case 's': #ifndef WITHOUT_KERBEROS /* * Set the directory search list. This enables use of * find_ccache_file() to search the directories for a * suitable credentials cache file. */ strlcpy(ccfile_dirlist, optarg, sizeof(ccfile_dirlist)); #else errx(1, "This option not available when built" " without MK_KERBEROS\n"); #endif break; case 'c': /* * Specify a non-default credential cache file * substring. */ strlcpy(ccfile_substring, optarg, sizeof(ccfile_substring)); break; case 'r': /* * Set the preferred realm for the credential cache tgt. */ strlcpy(pref_realm, optarg, sizeof(pref_realm)); break; default: fprintf(stderr, "usage: %s [-d] [-s dir-list] [-c file-substring]" " [-r preferred-realm]\n", argv[0]); exit(1); break; } } gssd_load_mech(); if (!debug_level) { if (daemon(0, 0) != 0) err(1, "Can't daemonize"); signal(SIGINT, SIG_IGN); signal(SIGQUIT, SIG_IGN); signal(SIGHUP, SIG_IGN); } signal(SIGTERM, gssd_terminate); memset(&sun, 0, sizeof sun); sun.sun_family = AF_LOCAL; unlink(_PATH_GSSDSOCK); strcpy(sun.sun_path, _PATH_GSSDSOCK); sun.sun_len = SUN_LEN(&sun); fd = socket(AF_LOCAL, SOCK_STREAM, 0); if (fd < 0) { if (debug_level == 0) { syslog(LOG_ERR, "Can't create local gssd socket"); exit(1); } err(1, "Can't create local gssd socket"); } oldmask = umask(S_IXUSR|S_IRWXG|S_IRWXO); if (bind(fd, (struct sockaddr *) &sun, sun.sun_len) < 0) { if (debug_level == 0) { syslog(LOG_ERR, "Can't bind local gssd socket"); exit(1); } err(1, "Can't bind local gssd socket"); } umask(oldmask); if (listen(fd, SOMAXCONN) < 0) { if (debug_level == 0) { syslog(LOG_ERR, "Can't listen on local gssd socket"); exit(1); } err(1, "Can't listen on local gssd socket"); } xprt = svc_vc_create(fd, RPC_MAXDATASIZE, RPC_MAXDATASIZE); if (!xprt) { if (debug_level == 0) { syslog(LOG_ERR, "Can't create transport for local gssd socket"); exit(1); } err(1, "Can't create transport for local gssd socket"); } if (!svc_reg(xprt, GSSD, GSSDVERS, gssd_1, NULL)) { if (debug_level == 0) { syslog(LOG_ERR, "Can't register service for local gssd socket"); exit(1); } err(1, "Can't register service for local gssd socket"); } LIST_INIT(&gss_resources); gss_next_id = 1; gss_start_time = time(0); gssd_syscall(_PATH_GSSDSOCK); svc_run(); + gssd_syscall(""); return (0); } static void gssd_load_mech(void) { FILE *fp; char buf[256]; char *p; char *name, *oid, *lib, *kobj; fp = fopen(_PATH_GSS_MECH, "r"); if (!fp) return; while (fgets(buf, sizeof(buf), fp)) { if (*buf == '#') continue; p = buf; name = strsep(&p, "\t\n "); if (p) while (isspace(*p)) p++; oid = strsep(&p, "\t\n "); if (p) while (isspace(*p)) p++; lib = strsep(&p, "\t\n "); if (p) while (isspace(*p)) p++; kobj = strsep(&p, "\t\n "); if (!name || !oid || !lib || !kobj) continue; if (strcmp(kobj, "-")) { /* * Attempt to load the kernel module if its * not already present. */ if (modfind(kobj) < 0) { if (kldload(kobj) < 0) { fprintf(stderr, "%s: can't find or load kernel module %s for %s\n", getprogname(), kobj, name); } } } } fclose(fp); } static void * gssd_find_resource(uint64_t id) { struct gss_resource *gr; if (!id) return (NULL); LIST_FOREACH(gr, &gss_resources, gr_link) if (gr->gr_id == id) return (gr->gr_res); return (NULL); } static uint64_t gssd_make_resource(void *res) { struct gss_resource *gr; if (!res) return (0); gr = malloc(sizeof(struct gss_resource)); if (!gr) return (0); gr->gr_id = (gss_next_id++) + ((uint64_t) gss_start_time << 32); gr->gr_res = res; LIST_INSERT_HEAD(&gss_resources, gr, gr_link); gss_resource_count++; if (debug_level > 1) printf("%d resources allocated\n", gss_resource_count); return (gr->gr_id); } static void gssd_delete_resource(uint64_t id) { struct gss_resource *gr; LIST_FOREACH(gr, &gss_resources, gr_link) { if (gr->gr_id == id) { LIST_REMOVE(gr, gr_link); free(gr); gss_resource_count--; if (debug_level > 1) printf("%d resources allocated\n", gss_resource_count); return; } } } static void gssd_verbose_out(const char *fmt, ...) { va_list ap; if (verbose != 0) { va_start(ap, fmt); if (debug_level == 0) vsyslog(LOG_INFO | LOG_DAEMON, fmt, ap); else vfprintf(stderr, fmt, ap); va_end(ap); } } bool_t gssd_null_1_svc(void *argp, void *result, struct svc_req *rqstp) { gssd_verbose_out("gssd_null: done\n"); return (TRUE); } bool_t gssd_init_sec_context_1_svc(init_sec_context_args *argp, init_sec_context_res *result, struct svc_req *rqstp) { gss_cred_id_t cred = GSS_C_NO_CREDENTIAL; gss_ctx_id_t ctx = GSS_C_NO_CONTEXT; gss_name_t name = GSS_C_NO_NAME; char ccname[PATH_MAX + 5 + 1], *cp, *cp2; int gotone, gotcred; OM_uint32 min_stat; #ifndef WITHOUT_KERBEROS gss_buffer_desc principal_desc; char enctype[sizeof(uint32_t)]; int key_enctype; OM_uint32 maj_stat; #endif memset(result, 0, sizeof(*result)); if (hostbased_initiator_cred != 0 && argp->cred != 0 && argp->uid == 0) { /* * These credentials are for a host based initiator name * in a keytab file, which should now have credentials * in /tmp/krb5cc_gssd, because gss_acquire_cred() did * the equivalent of "kinit -k". */ snprintf(ccname, sizeof(ccname), "FILE:%s", GSSD_CREDENTIAL_CACHE_FILE); } else if (ccfile_dirlist[0] != '\0' && argp->cred == 0) { /* * For the "-s" case and no credentials provided as an * argument, search the directory list for an appropriate * credential cache file. If the search fails, return failure. */ gotone = 0; cp = ccfile_dirlist; do { cp2 = strchr(cp, ':'); if (cp2 != NULL) *cp2 = '\0'; gotone = find_ccache_file(cp, argp->uid, ccname); if (gotone != 0) break; if (cp2 != NULL) *cp2++ = ':'; cp = cp2; } while (cp != NULL && *cp != '\0'); if (gotone == 0) { result->major_status = GSS_S_CREDENTIALS_EXPIRED; gssd_verbose_out("gssd_init_sec_context: -s no" " credential cache file found for uid=%d\n", (int)argp->uid); return (TRUE); } } else { /* * If there wasn't a "-s" option or the credentials have * been provided as an argument, do it the old way. * When credentials are provided, the uid should be root. */ if (argp->cred != 0 && argp->uid != 0) { if (debug_level == 0) syslog(LOG_ERR, "gss_init_sec_context:" " cred for non-root"); else fprintf(stderr, "gss_init_sec_context:" " cred for non-root\n"); } snprintf(ccname, sizeof(ccname), "FILE:/tmp/krb5cc_%d", (int) argp->uid); } setenv("KRB5CCNAME", ccname, TRUE); if (argp->cred) { cred = gssd_find_resource(argp->cred); if (!cred) { result->major_status = GSS_S_CREDENTIALS_EXPIRED; gssd_verbose_out("gssd_init_sec_context: cred" " resource not found\n"); return (TRUE); } } if (argp->ctx) { ctx = gssd_find_resource(argp->ctx); if (!ctx) { result->major_status = GSS_S_CONTEXT_EXPIRED; gssd_verbose_out("gssd_init_sec_context: context" " resource not found\n"); return (TRUE); } } if (argp->name) { name = gssd_find_resource(argp->name); if (!name) { result->major_status = GSS_S_BAD_NAME; gssd_verbose_out("gssd_init_sec_context: name" " resource not found\n"); return (TRUE); } } gotcred = 0; #ifndef WITHOUT_KERBEROS if (use_old_des != 0) { if (cred == GSS_C_NO_CREDENTIAL) { /* Acquire a credential for the uid. */ maj_stat = gssd_get_user_cred(&min_stat, argp->uid, &cred); if (maj_stat == GSS_S_COMPLETE) gotcred = 1; else gssd_verbose_out("gssd_init_sec_context: " "get user cred failed uid=%d major=0x%x " "minor=%d\n", (int)argp->uid, (unsigned int)maj_stat, (int)min_stat); } if (cred != GSS_C_NO_CREDENTIAL) { key_enctype = ETYPE_DES_CBC_CRC; enctype[0] = (key_enctype >> 24) & 0xff; enctype[1] = (key_enctype >> 16) & 0xff; enctype[2] = (key_enctype >> 8) & 0xff; enctype[3] = key_enctype & 0xff; principal_desc.length = sizeof(enctype); principal_desc.value = enctype; result->major_status = gss_set_cred_option( &result->minor_status, &cred, GSS_KRB5_SET_ALLOWABLE_ENCTYPES_X, &principal_desc); gssd_verbose_out("gssd_init_sec_context: set allowable " "enctype major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); if (result->major_status != GSS_S_COMPLETE) { if (gotcred != 0) gss_release_cred(&min_stat, &cred); return (TRUE); } } } #endif result->major_status = gss_init_sec_context(&result->minor_status, cred, &ctx, name, argp->mech_type, argp->req_flags, argp->time_req, argp->input_chan_bindings, &argp->input_token, &result->actual_mech_type, &result->output_token, &result->ret_flags, &result->time_rec); gssd_verbose_out("gssd_init_sec_context: done major=0x%x minor=%d" " uid=%d\n", (unsigned int)result->major_status, (int)result->minor_status, (int)argp->uid); if (gotcred != 0) gss_release_cred(&min_stat, &cred); if (result->major_status == GSS_S_COMPLETE || result->major_status == GSS_S_CONTINUE_NEEDED) { if (argp->ctx) result->ctx = argp->ctx; else result->ctx = gssd_make_resource(ctx); } return (TRUE); } bool_t gssd_accept_sec_context_1_svc(accept_sec_context_args *argp, accept_sec_context_res *result, struct svc_req *rqstp) { gss_ctx_id_t ctx = GSS_C_NO_CONTEXT; gss_cred_id_t cred = GSS_C_NO_CREDENTIAL; gss_name_t src_name; gss_cred_id_t delegated_cred_handle; memset(result, 0, sizeof(*result)); if (argp->ctx) { ctx = gssd_find_resource(argp->ctx); if (!ctx) { result->major_status = GSS_S_CONTEXT_EXPIRED; gssd_verbose_out("gssd_accept_sec_context: ctx" " resource not found\n"); return (TRUE); } } if (argp->cred) { cred = gssd_find_resource(argp->cred); if (!cred) { result->major_status = GSS_S_CREDENTIALS_EXPIRED; gssd_verbose_out("gssd_accept_sec_context: cred" " resource not found\n"); return (TRUE); } } memset(result, 0, sizeof(*result)); result->major_status = gss_accept_sec_context(&result->minor_status, &ctx, cred, &argp->input_token, argp->input_chan_bindings, &src_name, &result->mech_type, &result->output_token, &result->ret_flags, &result->time_rec, &delegated_cred_handle); gssd_verbose_out("gssd_accept_sec_context: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); if (result->major_status == GSS_S_COMPLETE || result->major_status == GSS_S_CONTINUE_NEEDED) { if (argp->ctx) result->ctx = argp->ctx; else result->ctx = gssd_make_resource(ctx); result->src_name = gssd_make_resource(src_name); result->delegated_cred_handle = gssd_make_resource(delegated_cred_handle); } return (TRUE); } bool_t gssd_delete_sec_context_1_svc(delete_sec_context_args *argp, delete_sec_context_res *result, struct svc_req *rqstp) { gss_ctx_id_t ctx = gssd_find_resource(argp->ctx); if (ctx) { result->major_status = gss_delete_sec_context( &result->minor_status, &ctx, &result->output_token); gssd_delete_resource(argp->ctx); } else { result->major_status = GSS_S_COMPLETE; result->minor_status = 0; } gssd_verbose_out("gssd_delete_sec_context: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } bool_t gssd_export_sec_context_1_svc(export_sec_context_args *argp, export_sec_context_res *result, struct svc_req *rqstp) { gss_ctx_id_t ctx = gssd_find_resource(argp->ctx); if (ctx) { result->major_status = gss_export_sec_context( &result->minor_status, &ctx, &result->interprocess_token); result->format = KGSS_HEIMDAL_1_1; gssd_delete_resource(argp->ctx); } else { result->major_status = GSS_S_FAILURE; result->minor_status = 0; result->interprocess_token.length = 0; result->interprocess_token.value = NULL; } gssd_verbose_out("gssd_export_sec_context: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } bool_t gssd_import_name_1_svc(import_name_args *argp, import_name_res *result, struct svc_req *rqstp) { gss_name_t name; result->major_status = gss_import_name(&result->minor_status, &argp->input_name_buffer, argp->input_name_type, &name); gssd_verbose_out("gssd_import_name: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); if (result->major_status == GSS_S_COMPLETE) result->output_name = gssd_make_resource(name); else result->output_name = 0; return (TRUE); } bool_t gssd_canonicalize_name_1_svc(canonicalize_name_args *argp, canonicalize_name_res *result, struct svc_req *rqstp) { gss_name_t name = gssd_find_resource(argp->input_name); gss_name_t output_name; memset(result, 0, sizeof(*result)); if (!name) { result->major_status = GSS_S_BAD_NAME; return (TRUE); } result->major_status = gss_canonicalize_name(&result->minor_status, name, argp->mech_type, &output_name); gssd_verbose_out("gssd_canonicalize_name: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); if (result->major_status == GSS_S_COMPLETE) result->output_name = gssd_make_resource(output_name); else result->output_name = 0; return (TRUE); } bool_t gssd_export_name_1_svc(export_name_args *argp, export_name_res *result, struct svc_req *rqstp) { gss_name_t name = gssd_find_resource(argp->input_name); memset(result, 0, sizeof(*result)); if (!name) { result->major_status = GSS_S_BAD_NAME; gssd_verbose_out("gssd_export_name: name resource not found\n"); return (TRUE); } result->major_status = gss_export_name(&result->minor_status, name, &result->exported_name); gssd_verbose_out("gssd_export_name: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } bool_t gssd_release_name_1_svc(release_name_args *argp, release_name_res *result, struct svc_req *rqstp) { gss_name_t name = gssd_find_resource(argp->input_name); if (name) { result->major_status = gss_release_name(&result->minor_status, &name); gssd_delete_resource(argp->input_name); } else { result->major_status = GSS_S_COMPLETE; result->minor_status = 0; } gssd_verbose_out("gssd_release_name: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } bool_t gssd_pname_to_uid_1_svc(pname_to_uid_args *argp, pname_to_uid_res *result, struct svc_req *rqstp) { gss_name_t name = gssd_find_resource(argp->pname); uid_t uid; char buf[1024], *bufp; struct passwd pwd, *pw; size_t buflen; int error; static size_t buflen_hint = 1024; memset(result, 0, sizeof(*result)); if (name) { result->major_status = gss_pname_to_uid(&result->minor_status, name, argp->mech, &uid); if (result->major_status == GSS_S_COMPLETE) { result->uid = uid; buflen = buflen_hint; for (;;) { pw = NULL; bufp = buf; if (buflen > sizeof(buf)) bufp = malloc(buflen); if (bufp == NULL) break; error = getpwuid_r(uid, &pwd, bufp, buflen, &pw); if (error != ERANGE) break; if (buflen > sizeof(buf)) free(bufp); buflen += 1024; if (buflen > buflen_hint) buflen_hint = buflen; } if (pw) { int len = NGROUPS; int groups[NGROUPS]; result->gid = pw->pw_gid; getgrouplist(pw->pw_name, pw->pw_gid, groups, &len); result->gidlist.gidlist_len = len; result->gidlist.gidlist_val = mem_alloc(len * sizeof(int)); memcpy(result->gidlist.gidlist_val, groups, len * sizeof(int)); gssd_verbose_out("gssd_pname_to_uid: mapped" " to uid=%d, gid=%d\n", (int)result->uid, (int)result->gid); } else { result->gid = 65534; result->gidlist.gidlist_len = 0; result->gidlist.gidlist_val = NULL; gssd_verbose_out("gssd_pname_to_uid: mapped" " to uid=%d, but no groups\n", (int)result->uid); } if (bufp != NULL && buflen > sizeof(buf)) free(bufp); } else gssd_verbose_out("gssd_pname_to_uid: failed major=0x%x" " minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); } else { result->major_status = GSS_S_BAD_NAME; result->minor_status = 0; gssd_verbose_out("gssd_pname_to_uid: no name\n"); } return (TRUE); } bool_t gssd_acquire_cred_1_svc(acquire_cred_args *argp, acquire_cred_res *result, struct svc_req *rqstp) { gss_name_t desired_name = GSS_C_NO_NAME; gss_cred_id_t cred; char ccname[PATH_MAX + 5 + 1], *cp, *cp2; int gotone; #ifndef WITHOUT_KERBEROS gss_buffer_desc namebuf; uint32_t minstat; krb5_error_code kret; #endif memset(result, 0, sizeof(*result)); if (argp->desired_name) { desired_name = gssd_find_resource(argp->desired_name); if (!desired_name) { result->major_status = GSS_S_BAD_NAME; gssd_verbose_out("gssd_acquire_cred: no desired name" " found\n"); return (TRUE); } } #ifndef WITHOUT_KERBEROS if (hostbased_initiator_cred != 0 && argp->desired_name != 0 && argp->uid == 0 && argp->cred_usage == GSS_C_INITIATE) { /* This is a host based initiator name in the keytab file. */ snprintf(ccname, sizeof(ccname), "FILE:%s", GSSD_CREDENTIAL_CACHE_FILE); setenv("KRB5CCNAME", ccname, TRUE); result->major_status = gss_display_name(&result->minor_status, desired_name, &namebuf, NULL); gssd_verbose_out("gssd_acquire_cred: desired name for host " "based initiator cred major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); if (result->major_status != GSS_S_COMPLETE) return (TRUE); if (namebuf.length > PATH_MAX + 5) { result->minor_status = 0; result->major_status = GSS_S_FAILURE; return (TRUE); } memcpy(ccname, namebuf.value, namebuf.length); ccname[namebuf.length] = '\0'; if ((cp = strchr(ccname, '@')) != NULL) *cp = '/'; kret = gssd_get_cc_from_keytab(ccname); gssd_verbose_out("gssd_acquire_cred: using keytab entry for " "%s, kerberos ret=%d\n", ccname, (int)kret); gss_release_buffer(&minstat, &namebuf); if (kret != 0) { result->minor_status = kret; result->major_status = GSS_S_FAILURE; return (TRUE); } } else #endif /* !WITHOUT_KERBEROS */ if (ccfile_dirlist[0] != '\0' && argp->desired_name == 0) { /* * For the "-s" case and no name provided as an * argument, search the directory list for an appropriate * credential cache file. If the search fails, return failure. */ gotone = 0; cp = ccfile_dirlist; do { cp2 = strchr(cp, ':'); if (cp2 != NULL) *cp2 = '\0'; gotone = find_ccache_file(cp, argp->uid, ccname); if (gotone != 0) break; if (cp2 != NULL) *cp2++ = ':'; cp = cp2; } while (cp != NULL && *cp != '\0'); if (gotone == 0) { result->major_status = GSS_S_CREDENTIALS_EXPIRED; gssd_verbose_out("gssd_acquire_cred: no cred cache" " file found\n"); return (TRUE); } setenv("KRB5CCNAME", ccname, TRUE); } else { /* * If there wasn't a "-s" option or the name has * been provided as an argument, do it the old way. * When a name is provided, it will normally exist in the * default keytab file and the uid will be root. */ if (argp->desired_name != 0 && argp->uid != 0) { if (debug_level == 0) syslog(LOG_ERR, "gss_acquire_cred:" " principal_name for non-root"); else fprintf(stderr, "gss_acquire_cred:" " principal_name for non-root\n"); } snprintf(ccname, sizeof(ccname), "FILE:/tmp/krb5cc_%d", (int) argp->uid); setenv("KRB5CCNAME", ccname, TRUE); } result->major_status = gss_acquire_cred(&result->minor_status, desired_name, argp->time_req, argp->desired_mechs, argp->cred_usage, &cred, &result->actual_mechs, &result->time_rec); gssd_verbose_out("gssd_acquire_cred: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); if (result->major_status == GSS_S_COMPLETE) result->output_cred = gssd_make_resource(cred); else result->output_cred = 0; return (TRUE); } bool_t gssd_set_cred_option_1_svc(set_cred_option_args *argp, set_cred_option_res *result, struct svc_req *rqstp) { gss_cred_id_t cred = gssd_find_resource(argp->cred); memset(result, 0, sizeof(*result)); if (!cred) { result->major_status = GSS_S_CREDENTIALS_EXPIRED; gssd_verbose_out("gssd_set_cred: no credentials\n"); return (TRUE); } result->major_status = gss_set_cred_option(&result->minor_status, &cred, argp->option_name, &argp->option_value); gssd_verbose_out("gssd_set_cred: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } bool_t gssd_release_cred_1_svc(release_cred_args *argp, release_cred_res *result, struct svc_req *rqstp) { gss_cred_id_t cred = gssd_find_resource(argp->cred); if (cred) { result->major_status = gss_release_cred(&result->minor_status, &cred); gssd_delete_resource(argp->cred); } else { result->major_status = GSS_S_COMPLETE; result->minor_status = 0; } gssd_verbose_out("gssd_release_cred: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } bool_t gssd_display_status_1_svc(display_status_args *argp, display_status_res *result, struct svc_req *rqstp) { result->message_context = argp->message_context; result->major_status = gss_display_status(&result->minor_status, argp->status_value, argp->status_type, argp->mech_type, &result->message_context, &result->status_string); gssd_verbose_out("gssd_display_status: done major=0x%x minor=%d\n", (unsigned int)result->major_status, (int)result->minor_status); return (TRUE); } int gssd_1_freeresult(SVCXPRT *transp, xdrproc_t xdr_result, caddr_t result) { /* * We don't use XDR to free the results - anything which was * allocated came from GSS-API. We use xdr_result to figure * out what to do. */ OM_uint32 junk; if (xdr_result == (xdrproc_t) xdr_init_sec_context_res) { init_sec_context_res *p = (init_sec_context_res *) result; gss_release_buffer(&junk, &p->output_token); } else if (xdr_result == (xdrproc_t) xdr_accept_sec_context_res) { accept_sec_context_res *p = (accept_sec_context_res *) result; gss_release_buffer(&junk, &p->output_token); } else if (xdr_result == (xdrproc_t) xdr_delete_sec_context_res) { delete_sec_context_res *p = (delete_sec_context_res *) result; gss_release_buffer(&junk, &p->output_token); } else if (xdr_result == (xdrproc_t) xdr_export_sec_context_res) { export_sec_context_res *p = (export_sec_context_res *) result; if (p->interprocess_token.length) memset(p->interprocess_token.value, 0, p->interprocess_token.length); gss_release_buffer(&junk, &p->interprocess_token); } else if (xdr_result == (xdrproc_t) xdr_export_name_res) { export_name_res *p = (export_name_res *) result; gss_release_buffer(&junk, &p->exported_name); } else if (xdr_result == (xdrproc_t) xdr_acquire_cred_res) { acquire_cred_res *p = (acquire_cred_res *) result; gss_release_oid_set(&junk, &p->actual_mechs); } else if (xdr_result == (xdrproc_t) xdr_pname_to_uid_res) { pname_to_uid_res *p = (pname_to_uid_res *) result; if (p->gidlist.gidlist_val) free(p->gidlist.gidlist_val); } else if (xdr_result == (xdrproc_t) xdr_display_status_res) { display_status_res *p = (display_status_res *) result; gss_release_buffer(&junk, &p->status_string); } return (TRUE); } /* * Search a directory for the most likely candidate to be used as the * credential cache for a uid. If successful, return 1 and fill the * file's path id into "rpath". Otherwise, return 0. */ static int find_ccache_file(const char *dirpath, uid_t uid, char *rpath) { DIR *dirp; struct dirent *dp; struct stat sb; time_t exptime, oexptime; int gotone, len, rating, orating; char namepath[PATH_MAX + 5 + 1]; char retpath[PATH_MAX + 5 + 1]; dirp = opendir(dirpath); if (dirp == NULL) return (0); gotone = 0; orating = 0; oexptime = 0; while ((dp = readdir(dirp)) != NULL) { len = snprintf(namepath, sizeof(namepath), "%s/%s", dirpath, dp->d_name); if (len < sizeof(namepath) && (hostbased_initiator_cred == 0 || strcmp(namepath, GSSD_CREDENTIAL_CACHE_FILE) != 0) && strstr(dp->d_name, ccfile_substring) != NULL && lstat(namepath, &sb) >= 0 && sb.st_uid == uid && S_ISREG(sb.st_mode)) { len = snprintf(namepath, sizeof(namepath), "FILE:%s/%s", dirpath, dp->d_name); if (len < sizeof(namepath) && is_a_valid_tgt_cache(namepath, uid, &rating, &exptime) != 0) { if (gotone == 0 || rating > orating || (rating == orating && exptime > oexptime)) { orating = rating; oexptime = exptime; strcpy(retpath, namepath); gotone = 1; } } } } closedir(dirp); if (gotone != 0) { strcpy(rpath, retpath); return (1); } return (0); } /* * Try to determine if the file is a valid tgt cache file. * Check that the file has a valid tgt for a principal. * If it does, return 1, otherwise return 0. * It also returns a "rating" and the expiry time for the TGT, when found. * This "rating" is higher based on heuristics that make it more * likely to be the correct credential cache file to use. It can * be used by the caller, along with expiry time, to select from * multiple credential cache files. */ static int is_a_valid_tgt_cache(const char *filepath, uid_t uid, int *retrating, time_t *retexptime) { #ifndef WITHOUT_KERBEROS krb5_context context; krb5_principal princ; krb5_ccache ccache; krb5_error_code retval; krb5_cc_cursor curse; krb5_creds krbcred; int gotone, orating, rating, ret; struct passwd *pw; char *cp, *cp2, *pname; time_t exptime; /* Find a likely name for the uid principal. */ pw = getpwuid(uid); /* * Do a bunch of krb5 library stuff to try and determine if * this file is a credentials cache with an appropriate TGT * in it. */ retval = krb5_init_context(&context); if (retval != 0) return (0); retval = krb5_cc_resolve(context, filepath, &ccache); if (retval != 0) { krb5_free_context(context); return (0); } ret = 0; orating = 0; exptime = 0; retval = krb5_cc_start_seq_get(context, ccache, &curse); if (retval == 0) { while ((retval = krb5_cc_next_cred(context, ccache, &curse, &krbcred)) == 0) { gotone = 0; rating = 0; retval = krb5_unparse_name(context, krbcred.server, &pname); if (retval == 0) { cp = strchr(pname, '/'); if (cp != NULL) { *cp++ = '\0'; if (strcmp(pname, "krbtgt") == 0 && krbcred.times.endtime > time(NULL) ) { gotone = 1; /* * Test to see if this is a * tgt for cross-realm auth. * Rate it higher, if it is not. */ cp2 = strchr(cp, '@'); if (cp2 != NULL) { *cp2++ = '\0'; if (strcmp(cp, cp2) == 0) rating++; } } } free(pname); } if (gotone != 0) { retval = krb5_unparse_name(context, krbcred.client, &pname); if (retval == 0) { cp = strchr(pname, '@'); if (cp != NULL) { *cp++ = '\0'; if (pw != NULL && strcmp(pname, pw->pw_name) == 0) rating++; if (strchr(pname, '/') == NULL) rating++; if (pref_realm[0] != '\0' && strcmp(cp, pref_realm) == 0) rating++; } } free(pname); if (rating > orating) { orating = rating; exptime = krbcred.times.endtime; } else if (rating == orating && krbcred.times.endtime > exptime) exptime = krbcred.times.endtime; ret = 1; } krb5_free_cred_contents(context, &krbcred); } krb5_cc_end_seq_get(context, ccache, &curse); } krb5_cc_close(context, ccache); krb5_free_context(context); if (ret != 0) { *retrating = orating; *retexptime = exptime; } return (ret); #else /* WITHOUT_KERBEROS */ return (0); #endif /* !WITHOUT_KERBEROS */ } #ifndef WITHOUT_KERBEROS /* * This function attempts to do essentially a "kinit -k" for the principal * name provided as the argument, so that there will be a TGT in the * credential cache. */ static krb5_error_code gssd_get_cc_from_keytab(const char *name) { krb5_error_code ret, opt_ret, princ_ret, cc_ret, kt_ret, cred_ret; krb5_context context; krb5_principal principal; krb5_keytab kt; krb5_creds cred; krb5_get_init_creds_opt *opt; krb5_deltat start_time = 0; krb5_ccache ccache; ret = krb5_init_context(&context); if (ret != 0) return (ret); opt_ret = cc_ret = kt_ret = cred_ret = 1; /* anything non-zero */ princ_ret = ret = krb5_parse_name(context, name, &principal); if (ret == 0) opt_ret = ret = krb5_get_init_creds_opt_alloc(context, &opt); if (ret == 0) cc_ret = ret = krb5_cc_default(context, &ccache); if (ret == 0) ret = krb5_cc_initialize(context, ccache, principal); if (ret == 0) { krb5_get_init_creds_opt_set_default_flags(context, "gssd", krb5_principal_get_realm(context, principal), opt); kt_ret = ret = krb5_kt_default(context, &kt); } if (ret == 0) cred_ret = ret = krb5_get_init_creds_keytab(context, &cred, principal, kt, start_time, NULL, opt); if (ret == 0) ret = krb5_cc_store_cred(context, ccache, &cred); if (kt_ret == 0) krb5_kt_close(context, kt); if (cc_ret == 0) krb5_cc_close(context, ccache); if (opt_ret == 0) krb5_get_init_creds_opt_free(context, opt); if (princ_ret == 0) krb5_free_principal(context, principal); if (cred_ret == 0) krb5_free_cred_contents(context, &cred); krb5_free_context(context); return (ret); } /* * Acquire a gss credential for a uid. */ static OM_uint32 gssd_get_user_cred(OM_uint32 *min_statp, uid_t uid, gss_cred_id_t *credp) { gss_buffer_desc principal_desc; gss_name_t name; OM_uint32 maj_stat, min_stat; gss_OID_set mechlist; struct passwd *pw; pw = getpwuid(uid); if (pw == NULL) { *min_statp = 0; return (GSS_S_FAILURE); } /* * The mechanism must be set to KerberosV for acquisition * of credentials to work reliably. */ maj_stat = gss_create_empty_oid_set(min_statp, &mechlist); if (maj_stat != GSS_S_COMPLETE) return (maj_stat); maj_stat = gss_add_oid_set_member(min_statp, GSS_KRB5_MECH_OID_X, &mechlist); if (maj_stat != GSS_S_COMPLETE) { gss_release_oid_set(&min_stat, &mechlist); return (maj_stat); } principal_desc.value = (void *)pw->pw_name; principal_desc.length = strlen(pw->pw_name); maj_stat = gss_import_name(min_statp, &principal_desc, GSS_C_NT_USER_NAME, &name); if (maj_stat != GSS_S_COMPLETE) { gss_release_oid_set(&min_stat, &mechlist); return (maj_stat); } /* Acquire the credentials. */ maj_stat = gss_acquire_cred(min_statp, name, 0, mechlist, GSS_C_INITIATE, credp, NULL, NULL); gss_release_name(&min_stat, &name); gss_release_oid_set(&min_stat, &mechlist); return (maj_stat); } #endif /* !WITHOUT_KERBEROS */ void gssd_terminate(int sig __unused) { #ifndef WITHOUT_KERBEROS if (hostbased_initiator_cred != 0) unlink(GSSD_CREDENTIAL_CACHE_FILE); #endif + gssd_syscall(""); exit(0); } diff --git a/usr.sbin/kbdcontrol/kbdmap.5 b/usr.sbin/kbdcontrol/kbdmap.5 index c7f437aa2d9a..fda2ada5a4f4 100644 --- a/usr.sbin/kbdcontrol/kbdmap.5 +++ b/usr.sbin/kbdcontrol/kbdmap.5 @@ -1,329 +1,335 @@ .\" Copyright (c) 2000 .\" David Malone .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 29, 2008 +.Dd January 2, 2016 .Dt KBDMAP 5 .Os .Sh NAME .Nm kbdmap .Nd keyboard map file format for kbdcontrol .Sh SYNOPSIS .Nm .Sh DESCRIPTION A .Nm file describes how the keys on a keyboard should behave. These files can be loaded using .Xr kbdcontrol 1 , or .Xr kbdmap 1 can be used to select one of the default .Nm files interactively. A .Nm file can be specified in .Xr rc.conf 5 , to be loaded at boot time. The current keymap may also be printed using .Xr kbdcontrol 1 . .Pp Each line in the file can describe a key or an accent. A .Ql # character begins a comment, which extends to the end of the line. .Pp The description of a key begins with the scancode for that key. Then the effect of the key under combinations of shift, control and alt are listed in the following order: no modifier, shift, control, control and shift, alt, alt and shift, alt and control, alt and control and shift. The action of the key under each modifier can be: .Bl -tag -width Ar .It ' Ns Ar symbol Ns No ' The symbol the key should produce, in single quotes. .It Ar decnum The .Tn Unicode value to produce as a decimal number (see .Xr ascii 7 ) . For example, 32 for space. .It 0x Ns Ar hexnum The .Tn Unicode value to produce as a hexadecimal number. For example, 0x20 for space. .It Ar ctrlname One of the standard names for the .Tn ASCII control characters: nul, soh, stx, etx, eot, enq, ack, bel, bs, ht, -nl, +lf, vt, -np, +ff, cr, so, si, dle, dc1, dc2, dc3, dc4, nak, syn, etb, can, em, sub, esc, fs, gs, rs, -ns, us, sp, del. +.It Ar control-alias +One of the historical aliases for certain +.Tn ASCII +control characters: +nl, +np, +ns. .It Ar accentname By giving one of the accent names, the next key pressed will produce an accented character in accordance with that accent. See the description of accents below. The accent names are: dgra, dacu, dcir, dtil, dmac, dbre, ddot, duml, ddia, dsla, drin, dced, dapo, ddac, dogo, dcar. .It fkey Ns Ar N Act as the .Ar N Ns No th function key, where .Ar N is a decimal number in the range from 1 to 96. Refer to the .Xr atkbd 4 manual page for a list of predefined function keys. You can use the .Fl f option of the .Xr kbdcontrol 1 utility to assign arbitrary strings to function keys. .It lshift Act as left shift key. .It rshift Act as right shift key. .It clock Act as caps lock key. .It nlock Act as num lock key. .It slock Act as scroll lock key. .It lalt|alt Act as left alt key. .It btab Act as backwards tab. .It lctrl|ctrl Act as left control key. .It rctrl Act as right control key. .It ralt Act as right alt (altgr) key. .It alock Act as alt lock key. .It ashift Act as alt shift key. .It meta Act as meta key. .It lshifta|shifta Act as left shift key / alt lock. .It rshifta Act as right shift key / alt lock. .It lctrla|ctrla Act as left ctrl key / alt lock. .It rctrla Act as right ctrl key / alt lock. .It lalta|alta Act as left alt key / alt lock. .It ralta Act as right alt key / alt lock. .It nscr Act as switch to next screen. .It pscr Act as switch to previous screen. .It scr Ns Ar N Switch to screen .Ar N , where .Ar N is a decimal number. .It boot Reboot the machine. .It halt Halt the machine. .It pdwn Halt the machine and attempt to power it down. .It debug Call the debugger. .It susp Use APM to suspend power. .It saver Activate screen saver by toggling between splash/text screen. .It panic Panic the system. The .Xr sysctl 8 variable .Va machdep.enable_panic_key must be set to 1 to enable this feature. .It paste Act as mouse buffer paste. .El .Pp Finally, to complete the description of a key, a flag which describes the effect of caps lock and num lock on that key is given. The flag can be .Ql C to indicate that caps lock affects the key, .Ql N to indicate that num lock affects the key, .Ql B to indicate that both caps lock and num lock affects the key, or .Ql O to indicate that neither affects the key. .Pp An accent key works by modifying the behavior of the next key pressed. The description of an accent begins with one of the accent names given above. This is followed by the symbol for the accent, given in single quotes or as a decimal or hexadecimal .Tn Unicode value. This symbol will be produced if the accent key is pressed and then the space key is pressed. .Pp The description of the accent key continues with a list showing how it modifies various symbols, by giving pairs made up of the normal symbol and the modified symbol enclosed in parentheses. Both symbols in a pair can be given in either single quotes or as decimal or hexadecimal .Tn Unicode values. .Pp For example, consider the following extract from a .Nm : .Bd -literal -offset indent 041 dgra 172 nop nop '|' '|' nop nop O dgra '`' ( 'a' 224 ) ( 'A' 192 ) ( 'e' 232 ) ( 'E' 200 ) ( 'i' 236 ) ( 'I' 204 ) ( 'o' 242 ) ( 'O' 210 ) ( 'u' 249 ) ( 'U' 217 ) .Ed This extract configures the backtick key on a UK keyboard to act as a grave accent key. Pressing backtick followed by space produces a backtick, and pressing a backtick followed by a vowel produces the ISO-8859-1 symbol for that vowel with a grave accent. .Sh FILES .Bl -tag -width /usr/share/syscons/keymaps/* -compact .It Pa /usr/share/syscons/keymaps/* standard keyboard map files for syscons .It Pa /usr/share/vt/keymaps/* standard keyboard map files for vt .El .Sh SEE ALSO .Xr kbdcontrol 1 , .Xr kbdmap 1 , .Xr keyboard 4 , .Xr syscons 4 , .Xr vt 4 , .Xr ascii 7 .Sh HISTORY This manual page first appeared in .Fx 4.2 .