Index: projects/binutils-2.17/bin/sh/arith_lex.l =================================================================== --- projects/binutils-2.17/bin/sh/arith_lex.l (revision 215829) +++ projects/binutils-2.17/bin/sh/arith_lex.l (revision 215830) @@ -1,135 +1,143 @@ %{ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)arith_lex.l 8.3 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include "arith.h" #include "shell.h" #include "y.tab.h" #include "error.h" #include "memalloc.h" #include "var.h" int yylex(void); #undef YY_INPUT #define YY_INPUT(buf,result,max) \ - result = (*buf = *arith_buf++) ? 1 : YY_NULL; + do { \ + result = strnlen(arith_buf, max); \ + if (result == 0) \ + result = YY_NULL; \ + else { \ + memcpy(buf, arith_buf, result); \ + arith_buf += result; \ + } \ + } while (0); #define YY_NO_UNPUT #define YY_NO_INPUT %} %% [ \t\n] { ; } 0x[a-fA-F0-9]+ { yylval.l_value = strtoarith_t(yytext, NULL, 16); return ARITH_NUM; } 0[0-7]+ { yylval.l_value = strtoarith_t(yytext, NULL, 8); return ARITH_NUM; } [0-9]+ { yylval.l_value = strtoarith_t(yytext, NULL, 10); return ARITH_NUM; } [A-Za-z][A-Za-z0-9_]* { /* * If variable doesn't exist, we should initialize * it to zero. */ char *temp; if (lookupvar(yytext) == NULL) setvarsafe(yytext, "0", 0); temp = stalloc(strlen(yytext) + 1); yylval.s_value = strcpy(temp, yytext); return ARITH_VAR; } "(" { return ARITH_LPAREN; } ")" { return ARITH_RPAREN; } "||" { return ARITH_OR; } "&&" { return ARITH_AND; } "|" { return ARITH_BOR; } "^" { return ARITH_BXOR; } "&" { return ARITH_BAND; } "==" { return ARITH_EQ; } "!=" { return ARITH_NE; } ">" { return ARITH_GT; } ">=" { return ARITH_GE; } "<" { return ARITH_LT; } "<=" { return ARITH_LE; } "<<" { return ARITH_LSHIFT; } ">>" { return ARITH_RSHIFT; } "*" { return ARITH_MUL; } "/" { return ARITH_DIV; } "%" { return ARITH_REM; } "+" { return ARITH_ADD; } "-" { return ARITH_SUB; } "~" { return ARITH_BNOT; } "!" { return ARITH_NOT; } "=" { return ARITH_ASSIGN; } "+=" { return ARITH_ADDASSIGN; } "-=" { return ARITH_SUBASSIGN; } "*=" { return ARITH_MULASSIGN; } "/=" { return ARITH_DIVASSIGN; } "%=" { return ARITH_REMASSIGN; } ">>=" { return ARITH_RSHASSIGN; } "<<=" { return ARITH_LSHASSIGN; } "&=" { return ARITH_BANDASSIGN; } "^=" { return ARITH_BXORASSIGN; } "|=" { return ARITH_BORASSIGN; } . { error("arith: syntax error: \"%s\"\n", arith_startbuf); } %% void arith_lex_reset(void) { YY_NEW_FILE; } Index: projects/binutils-2.17/bin/sh/cd.c =================================================================== --- projects/binutils-2.17/bin/sh/cd.c (revision 215829) +++ projects/binutils-2.17/bin/sh/cd.c (revision 215830) @@ -1,415 +1,414 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)cd.c 8.2 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* * The cd and pwd commands. */ #include "shell.h" #include "var.h" #include "nodes.h" /* for jobs.h */ #include "jobs.h" #include "options.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "exec.h" #include "redir.h" #include "mystring.h" #include "show.h" #include "cd.h" static int cdlogical(char *); static int cdphysical(char *); static int docd(char *, int, int); static char *getcomponent(void); static char *findcwd(char *); static void updatepwd(char *); static char *getpwd(void); static char *getpwd2(void); static char *curdir = NULL; /* current working directory */ static char *prevdir; /* previous working directory */ static char *cdcomppath; int cdcmd(int argc, char **argv) { const char *dest; const char *path; char *p; struct stat statb; int ch, phys, print = 0; optreset = 1; optind = 1; opterr = 0; /* initialize getopt */ phys = Pflag; while ((ch = getopt(argc, argv, "LP")) != -1) { switch (ch) { case 'L': phys = 0; break; case 'P': phys = 1; break; default: error("unknown option: -%c", optopt); break; } } argc -= optind; argv += optind; if (argc > 1) error("too many arguments"); if ((dest = *argv) == NULL && (dest = bltinlookup("HOME", 1)) == NULL) error("HOME not set"); if (*dest == '\0') dest = "."; if (dest[0] == '-' && dest[1] == '\0') { dest = prevdir ? prevdir : curdir; if (dest) print = 1; else dest = "."; } if (*dest == '/' || (path = bltinlookup("CDPATH", 1)) == NULL) path = nullstr; while ((p = padvance(&path, dest)) != NULL) { if (stat(p, &statb) >= 0 && S_ISDIR(statb.st_mode)) { if (!print) { /* * XXX - rethink */ if (p[0] == '.' && p[1] == '/' && p[2] != '\0') print = strcmp(p + 2, dest); else print = strcmp(p, dest); } if (docd(p, print, phys) >= 0) return 0; } } error("can't cd to %s", dest); /*NOTREACHED*/ return 0; } /* * Actually change the directory. In an interactive shell, print the * directory name if "print" is nonzero. */ static int docd(char *dest, int print, int phys) { TRACE(("docd(\"%s\", %d, %d) called\n", dest, print, phys)); /* If logical cd fails, fall back to physical. */ if ((phys || cdlogical(dest) < 0) && cdphysical(dest) < 0) return (-1); if (print && iflag && curdir) out1fmt("%s\n", curdir); return 0; } static int cdlogical(char *dest) { char *p; char *q; char *component; struct stat statb; int first; int badstat; /* * Check each component of the path. If we find a symlink or * something we can't stat, clear curdir to force a getcwd() * next time we get the value of the current directory. */ badstat = 0; cdcomppath = stalloc(strlen(dest) + 1); scopy(dest, cdcomppath); STARTSTACKSTR(p); if (*dest == '/') { STPUTC('/', p); cdcomppath++; } first = 1; while ((q = getcomponent()) != NULL) { if (q[0] == '\0' || (q[0] == '.' && q[1] == '\0')) continue; if (! first) STPUTC('/', p); first = 0; component = q; - while (*q) - STPUTC(*q++, p); + STPUTS(q, p); if (equal(component, "..")) continue; STACKSTRNUL(p); if (lstat(stackblock(), &statb) < 0) { badstat = 1; break; } } INTOFF; if ((p = findcwd(badstat ? NULL : dest)) == NULL || chdir(p) < 0) { INTON; return (-1); } updatepwd(p); INTON; return (0); } static int cdphysical(char *dest) { char *p; INTOFF; - if (chdir(dest) < 0 || (p = findcwd(NULL)) == NULL) { + if (chdir(dest) < 0) { INTON; return (-1); } + p = findcwd(NULL); + if (p == NULL) + out2fmt_flush("cd: warning: failed to get name of current directory\n"); updatepwd(p); INTON; return (0); } /* * Get the next component of the path name pointed to by cdcomppath. * This routine overwrites the string pointed to by cdcomppath. */ static char * getcomponent(void) { char *p; char *start; if ((p = cdcomppath) == NULL) return NULL; start = cdcomppath; while (*p != '/' && *p != '\0') p++; if (*p == '\0') { cdcomppath = NULL; } else { *p++ = '\0'; cdcomppath = p; } return start; } static char * findcwd(char *dir) { char *new; char *p; /* * If our argument is NULL, we don't know the current directory * any more because we traversed a symbolic link or something * we couldn't stat(). */ if (dir == NULL || curdir == NULL) return getpwd2(); cdcomppath = stalloc(strlen(dir) + 1); scopy(dir, cdcomppath); STARTSTACKSTR(new); if (*dir != '/') { - p = curdir; - while (*p) - STPUTC(*p++, new); - if (p[-1] == '/') + STPUTS(curdir, new); + if (STTOPC(new) == '/') STUNPUTC(new); } while ((p = getcomponent()) != NULL) { if (equal(p, "..")) { while (new > stackblock() && (STUNPUTC(new), *new) != '/'); } else if (*p != '\0' && ! equal(p, ".")) { STPUTC('/', new); - while (*p) - STPUTC(*p++, new); + STPUTS(p, new); } } if (new == stackblock()) STPUTC('/', new); STACKSTRNUL(new); return stackblock(); } /* * Update curdir (the name of the current directory) in response to a * cd command. We also call hashcd to let the routines in exec.c know * that the current directory has changed. */ static void updatepwd(char *dir) { hashcd(); /* update command hash table */ if (prevdir) ckfree(prevdir); prevdir = curdir; - curdir = savestr(dir); + curdir = dir ? savestr(dir) : NULL; setvar("PWD", curdir, VEXPORT); setvar("OLDPWD", prevdir, VEXPORT); } int pwdcmd(int argc, char **argv) { char *p; int ch, phys; optreset = 1; optind = 1; opterr = 0; /* initialize getopt */ phys = Pflag; while ((ch = getopt(argc, argv, "LP")) != -1) { switch (ch) { case 'L': phys = 0; break; case 'P': phys = 1; break; default: error("unknown option: -%c", optopt); break; } } argc -= optind; argv += optind; if (argc != 0) error("too many arguments"); if (!phys && getpwd()) { out1str(curdir); out1c('\n'); } else { if ((p = getpwd2()) == NULL) error(".: %s", strerror(errno)); out1str(p); out1c('\n'); } return 0; } /* * Get the current directory and cache the result in curdir. */ static char * getpwd(void) { char *p; if (curdir) return curdir; p = getpwd2(); if (p != NULL) curdir = savestr(p); return curdir; } #define MAXPWD 256 /* * Return the current directory. */ static char * getpwd2(void) { char *pwd; int i; for (i = MAXPWD;; i *= 2) { pwd = stalloc(i); if (getcwd(pwd, i) != NULL) return pwd; stunalloc(pwd); if (errno != ERANGE) break; } return NULL; } /* * Initialize PWD in a new shell. * If the shell is interactive, we need to warn if this fails. */ void pwd_init(int warn) { char *pwd; struct stat stdot, stpwd; pwd = lookupvar("PWD"); if (pwd && *pwd == '/' && stat(".", &stdot) != -1 && stat(pwd, &stpwd) != -1 && stdot.st_dev == stpwd.st_dev && stdot.st_ino == stpwd.st_ino) { if (curdir) ckfree(curdir); curdir = savestr(pwd); } if (getpwd() == NULL && warn) out2fmt_flush("sh: cannot determine working directory\n"); setvar("PWD", curdir, VEXPORT); } Index: projects/binutils-2.17/bin/sh/eval.c =================================================================== --- projects/binutils-2.17/bin/sh/eval.c (revision 215829) +++ projects/binutils-2.17/bin/sh/eval.c (revision 215830) @@ -1,1229 +1,1228 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)eval.c 8.9 (Berkeley) 6/8/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include /* For WIFSIGNALED(status) */ #include /* * Evaluate a command. */ #include "shell.h" #include "nodes.h" #include "syntax.h" #include "expand.h" #include "parser.h" #include "jobs.h" #include "eval.h" #include "builtins.h" #include "options.h" #include "exec.h" #include "redir.h" #include "input.h" #include "output.h" #include "trap.h" #include "var.h" #include "memalloc.h" #include "error.h" #include "show.h" #include "mystring.h" #ifndef NO_HISTORY #include "myhistedit.h" #endif int evalskip; /* set if we are skipping commands */ static int skipcount; /* number of levels to skip */ MKINIT int loopnest; /* current loop nesting level */ int funcnest; /* depth of function calls */ static int builtin_flags; /* evalcommand flags for builtins */ char *commandname; struct strlist *cmdenviron; int exitstatus; /* exit status of last command */ int oexitstatus; /* saved exit status */ static void evalloop(union node *, int); static void evalfor(union node *, int); static void evalcase(union node *, int); static void evalsubshell(union node *, int); static void evalredir(union node *, int); static void expredir(union node *); static void evalpipe(union node *); static void evalcommand(union node *, int, struct backcmd *); static void prehash(union node *); /* * Called to reset things after an exception. */ #ifdef mkinit INCLUDE "eval.h" RESET { evalskip = 0; loopnest = 0; funcnest = 0; } SHELLPROC { exitstatus = 0; } #endif /* * The eval command. */ int evalcmd(int argc, char **argv) { char *p; char *concat; char **ap; if (argc > 1) { p = argv[1]; if (argc > 2) { STARTSTACKSTR(concat); ap = argv + 2; for (;;) { - while (*p) - STPUTC(*p++, concat); + STPUTS(p, concat); if ((p = *ap++) == NULL) break; STPUTC(' ', concat); } STPUTC('\0', concat); p = grabstackstr(concat); } evalstring(p, builtin_flags & EV_TESTED); } else exitstatus = 0; return exitstatus; } /* * Execute a command or commands contained in a string. */ void evalstring(char *s, int flags) { union node *n; struct stackmark smark; int flags_exit; int any; flags_exit = flags & EV_EXIT; flags &= ~EV_EXIT; any = 0; setstackmark(&smark); setinputstring(s, 1); while ((n = parsecmd(0)) != NEOF) { if (n != NULL) { if (flags_exit && preadateof()) evaltree(n, flags | EV_EXIT); else evaltree(n, flags); any = 1; } popstackmark(&smark); } popfile(); popstackmark(&smark); if (!any) exitstatus = 0; if (flags_exit) exitshell(exitstatus); } /* * Evaluate a parse tree. The value is left in the global variable * exitstatus. */ void evaltree(union node *n, int flags) { int do_etest; union node *next; do_etest = 0; if (n == NULL) { TRACE(("evaltree(NULL) called\n")); exitstatus = 0; goto out; } do { next = NULL; #ifndef NO_HISTORY displayhist = 1; /* show history substitutions done with fc */ #endif TRACE(("evaltree(%p: %d) called\n", (void *)n, n->type)); switch (n->type) { case NSEMI: evaltree(n->nbinary.ch1, flags & ~EV_EXIT); if (evalskip) goto out; next = n->nbinary.ch2; break; case NAND: evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip || exitstatus != 0) { goto out; } next = n->nbinary.ch2; break; case NOR: evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip || exitstatus == 0) goto out; next = n->nbinary.ch2; break; case NREDIR: evalredir(n, flags); break; case NSUBSHELL: evalsubshell(n, flags); do_etest = !(flags & EV_TESTED); break; case NBACKGND: evalsubshell(n, flags); break; case NIF: { evaltree(n->nif.test, EV_TESTED); if (evalskip) goto out; if (exitstatus == 0) next = n->nif.ifpart; else if (n->nif.elsepart) next = n->nif.elsepart; else exitstatus = 0; break; } case NWHILE: case NUNTIL: evalloop(n, flags & ~EV_EXIT); break; case NFOR: evalfor(n, flags & ~EV_EXIT); break; case NCASE: evalcase(n, flags); break; case NDEFUN: defun(n->narg.text, n->narg.next); exitstatus = 0; break; case NNOT: evaltree(n->nnot.com, EV_TESTED); exitstatus = !exitstatus; break; case NPIPE: evalpipe(n); do_etest = !(flags & EV_TESTED); break; case NCMD: evalcommand(n, flags, (struct backcmd *)NULL); do_etest = !(flags & EV_TESTED); break; default: out1fmt("Node type = %d\n", n->type); flushout(&output); break; } n = next; } while (n != NULL); out: if (pendingsigs) dotrap(); if ((flags & EV_EXIT) || (eflag && exitstatus != 0 && do_etest)) exitshell(exitstatus); } static void evalloop(union node *n, int flags) { int status; loopnest++; status = 0; for (;;) { evaltree(n->nbinary.ch1, EV_TESTED); if (evalskip) { skipping: if (evalskip == SKIPCONT && --skipcount <= 0) { evalskip = 0; continue; } if (evalskip == SKIPBREAK && --skipcount <= 0) evalskip = 0; if (evalskip == SKIPFUNC || evalskip == SKIPFILE) status = exitstatus; break; } if (n->type == NWHILE) { if (exitstatus != 0) break; } else { if (exitstatus == 0) break; } evaltree(n->nbinary.ch2, flags); status = exitstatus; if (evalskip) goto skipping; } loopnest--; exitstatus = status; } static void evalfor(union node *n, int flags) { struct arglist arglist; union node *argp; struct strlist *sp; struct stackmark smark; setstackmark(&smark); arglist.lastp = &arglist.list; for (argp = n->nfor.args ; argp ; argp = argp->narg.next) { oexitstatus = exitstatus; expandarg(argp, &arglist, EXP_FULL | EXP_TILDE); if (evalskip) goto out; } *arglist.lastp = NULL; exitstatus = 0; loopnest++; for (sp = arglist.list ; sp ; sp = sp->next) { setvar(n->nfor.var, sp->text, 0); evaltree(n->nfor.body, flags); if (evalskip) { if (evalskip == SKIPCONT && --skipcount <= 0) { evalskip = 0; continue; } if (evalskip == SKIPBREAK && --skipcount <= 0) evalskip = 0; break; } } loopnest--; out: popstackmark(&smark); } static void evalcase(union node *n, int flags) { union node *cp; union node *patp; struct arglist arglist; struct stackmark smark; setstackmark(&smark); arglist.lastp = &arglist.list; oexitstatus = exitstatus; exitstatus = 0; expandarg(n->ncase.expr, &arglist, EXP_TILDE); for (cp = n->ncase.cases ; cp && evalskip == 0 ; cp = cp->nclist.next) { for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) { if (casematch(patp, arglist.list->text)) { if (evalskip == 0) { evaltree(cp->nclist.body, flags); } goto out; } } } out: popstackmark(&smark); } /* * Kick off a subshell to evaluate a tree. */ static void evalsubshell(union node *n, int flags) { struct job *jp; int backgnd = (n->type == NBACKGND); expredir(n->nredir.redirect); if ((!backgnd && flags & EV_EXIT && !have_traps()) || forkshell(jp = makejob(n, 1), n, backgnd) == 0) { if (backgnd) flags &=~ EV_TESTED; redirect(n->nredir.redirect, 0); evaltree(n->nredir.n, flags | EV_EXIT); /* never returns */ } else if (! backgnd) { INTOFF; exitstatus = waitforjob(jp, (int *)NULL); INTON; } } /* * Evaluate a redirected compound command. */ static void evalredir(union node *n, int flags) { struct jmploc jmploc; struct jmploc *savehandler; volatile int in_redirect = 1; expredir(n->nredir.redirect); savehandler = handler; if (setjmp(jmploc.loc)) { int e; handler = savehandler; e = exception; if (e == EXERROR || e == EXEXEC) { popredir(); if (in_redirect) { exitstatus = 2; return; } } longjmp(handler->loc, 1); } else { INTOFF; handler = &jmploc; redirect(n->nredir.redirect, REDIR_PUSH); in_redirect = 0; INTON; evaltree(n->nredir.n, flags); } INTOFF; handler = savehandler; popredir(); INTON; } /* * Compute the names of the files in a redirection list. */ static void expredir(union node *n) { union node *redir; for (redir = n ; redir ; redir = redir->nfile.next) { struct arglist fn; fn.lastp = &fn.list; oexitstatus = exitstatus; switch (redir->type) { case NFROM: case NTO: case NFROMTO: case NAPPEND: case NCLOBBER: expandarg(redir->nfile.fname, &fn, EXP_TILDE | EXP_REDIR); redir->nfile.expfname = fn.list->text; break; case NFROMFD: case NTOFD: if (redir->ndup.vname) { expandarg(redir->ndup.vname, &fn, EXP_TILDE | EXP_REDIR); fixredir(redir, fn.list->text, 1); } break; } } } /* * Evaluate a pipeline. All the processes in the pipeline are children * of the process creating the pipeline. (This differs from some versions * of the shell, which make the last process in a pipeline the parent * of all the rest.) */ static void evalpipe(union node *n) { struct job *jp; struct nodelist *lp; int pipelen; int prevfd; int pip[2]; TRACE(("evalpipe(%p) called\n", (void *)n)); pipelen = 0; for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) pipelen++; INTOFF; jp = makejob(n, pipelen); prevfd = -1; for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) { prehash(lp->n); pip[1] = -1; if (lp->next) { if (pipe(pip) < 0) { close(prevfd); error("Pipe call failed: %s", strerror(errno)); } } if (forkshell(jp, lp->n, n->npipe.backgnd) == 0) { INTON; if (prevfd > 0) { dup2(prevfd, 0); close(prevfd); } if (pip[1] >= 0) { if (!(prevfd >= 0 && pip[0] == 0)) close(pip[0]); if (pip[1] != 1) { dup2(pip[1], 1); close(pip[1]); } } evaltree(lp->n, EV_EXIT); } if (prevfd >= 0) close(prevfd); prevfd = pip[0]; close(pip[1]); } INTON; if (n->npipe.backgnd == 0) { INTOFF; exitstatus = waitforjob(jp, (int *)NULL); TRACE(("evalpipe: job done exit status %d\n", exitstatus)); INTON; } } /* * Execute a command inside back quotes. If it's a builtin command, we * want to save its output in a block obtained from malloc. Otherwise * we fork off a subprocess and get the output of the command via a pipe. * Should be called with interrupts off. */ void evalbackcmd(union node *n, struct backcmd *result) { int pip[2]; struct job *jp; struct stackmark smark; /* unnecessary */ setstackmark(&smark); result->fd = -1; result->buf = NULL; result->nleft = 0; result->jp = NULL; if (n == NULL) { exitstatus = 0; goto out; } if (n->type == NCMD) { exitstatus = oexitstatus; evalcommand(n, EV_BACKCMD, result); } else { exitstatus = 0; if (pipe(pip) < 0) error("Pipe call failed: %s", strerror(errno)); jp = makejob(n, 1); if (forkshell(jp, n, FORK_NOJOB) == 0) { FORCEINTON; close(pip[0]); if (pip[1] != 1) { dup2(pip[1], 1); close(pip[1]); } evaltree(n, EV_EXIT); } close(pip[1]); result->fd = pip[0]; result->jp = jp; } out: popstackmark(&smark); TRACE(("evalbackcmd done: fd=%d buf=%p nleft=%d jp=%p\n", result->fd, result->buf, result->nleft, result->jp)); } /* * Execute a simple command. */ static void evalcommand(union node *cmd, int flags, struct backcmd *backcmd) { struct stackmark smark; union node *argp; struct arglist arglist; struct arglist varlist; char **argv; int argc; char **envp; int varflag; struct strlist *sp; int mode; int pip[2]; struct cmdentry cmdentry; struct job *jp; struct jmploc jmploc; struct jmploc *savehandler; char *savecmdname; struct shparam saveparam; struct localvar *savelocalvars; struct parsefile *savetopfile; volatile int e; char *lastarg; int realstatus; int do_clearcmdentry; const char *path = pathval(); /* First expand the arguments. */ TRACE(("evalcommand(%p, %d) called\n", (void *)cmd, flags)); setstackmark(&smark); arglist.lastp = &arglist.list; varlist.lastp = &varlist.list; varflag = 1; do_clearcmdentry = 0; oexitstatus = exitstatus; exitstatus = 0; for (argp = cmd->ncmd.args ; argp ; argp = argp->narg.next) { char *p = argp->narg.text; if (varflag && is_name(*p)) { do { p++; } while (is_in_name(*p)); if (*p == '=') { expandarg(argp, &varlist, EXP_VARTILDE); continue; } } expandarg(argp, &arglist, EXP_FULL | EXP_TILDE); varflag = 0; } *arglist.lastp = NULL; *varlist.lastp = NULL; expredir(cmd->ncmd.redirect); argc = 0; for (sp = arglist.list ; sp ; sp = sp->next) argc++; argv = stalloc(sizeof (char *) * (argc + 1)); for (sp = arglist.list ; sp ; sp = sp->next) { TRACE(("evalcommand arg: %s\n", sp->text)); *argv++ = sp->text; } *argv = NULL; lastarg = NULL; if (iflag && funcnest == 0 && argc > 0) lastarg = argv[-1]; argv -= argc; /* Print the command if xflag is set. */ if (xflag) { char sep = 0; const char *p; out2str(ps4val()); for (sp = varlist.list ; sp ; sp = sp->next) { if (sep != 0) out2c(' '); p = strchr(sp->text, '='); if (p != NULL) { p++; outbin(sp->text, p - sp->text, out2); out2qstr(p); } else out2qstr(sp->text); sep = ' '; } for (sp = arglist.list ; sp ; sp = sp->next) { if (sep != 0) out2c(' '); /* Disambiguate command looking like assignment. */ if (sp == arglist.list && strchr(sp->text, '=') != NULL && strchr(sp->text, '\'') == NULL) { out2c('\''); out2str(sp->text); out2c('\''); } else out2qstr(sp->text); sep = ' '; } out2c('\n'); flushout(&errout); } /* Now locate the command. */ if (argc == 0) { /* Variable assignment(s) without command */ cmdentry.cmdtype = CMDBUILTIN; cmdentry.u.index = BLTINCMD; cmdentry.special = 0; } else { static const char PATH[] = "PATH="; int cmd_flags = 0, bltinonly = 0; /* * Modify the command lookup path, if a PATH= assignment * is present */ for (sp = varlist.list ; sp ; sp = sp->next) if (strncmp(sp->text, PATH, sizeof(PATH) - 1) == 0) { path = sp->text + sizeof(PATH) - 1; /* * On `PATH=... command`, we need to make * sure that the command isn't using the * non-updated hash table of the outer PATH * setting and we need to make sure that * the hash table isn't filled with items * from the temporary setting. * * It would be better to forbit using and * updating the table while this command * runs, by the command finding mechanism * is heavily integrated with hash handling, * so we just delete the hash before and after * the command runs. Partly deleting like * changepatch() does doesn't seem worth the * bookinging effort, since most such runs add * directories in front of the new PATH. */ clearcmdentry(0); do_clearcmdentry = 1; } for (;;) { if (bltinonly) { cmdentry.u.index = find_builtin(*argv, &cmdentry.special); if (cmdentry.u.index < 0) { cmdentry.u.index = BLTINCMD; argv--; argc++; break; } } else find_command(argv[0], &cmdentry, cmd_flags, path); /* implement the bltin and command builtins here */ if (cmdentry.cmdtype != CMDBUILTIN) break; if (cmdentry.u.index == BLTINCMD) { if (argc == 1) break; argv++; argc--; bltinonly = 1; } else if (cmdentry.u.index == COMMANDCMD) { if (argc == 1) break; if (!strcmp(argv[1], "-p")) { if (argc == 2) break; if (argv[2][0] == '-') { if (strcmp(argv[2], "--")) break; if (argc == 3) break; argv += 3; argc -= 3; } else { argv += 2; argc -= 2; } path = _PATH_STDPATH; clearcmdentry(0); do_clearcmdentry = 1; } else if (!strcmp(argv[1], "--")) { if (argc == 2) break; argv += 2; argc -= 2; } else if (argv[1][0] == '-') break; else { argv++; argc--; } cmd_flags |= DO_NOFUNC; bltinonly = 0; } else break; } /* * Special builtins lose their special properties when * called via 'command'. */ if (cmd_flags & DO_NOFUNC) cmdentry.special = 0; } /* Fork off a child process if necessary. */ if (cmd->ncmd.backgnd || ((cmdentry.cmdtype == CMDNORMAL || cmdentry.cmdtype == CMDUNKNOWN) && ((flags & EV_EXIT) == 0 || have_traps())) || ((flags & EV_BACKCMD) != 0 && (cmdentry.cmdtype != CMDBUILTIN || cmdentry.u.index == CDCMD || cmdentry.u.index == DOTCMD || cmdentry.u.index == EVALCMD))) { jp = makejob(cmd, 1); mode = cmd->ncmd.backgnd; if (flags & EV_BACKCMD) { mode = FORK_NOJOB; if (pipe(pip) < 0) error("Pipe call failed: %s", strerror(errno)); } if (forkshell(jp, cmd, mode) != 0) goto parent; /* at end of routine */ if (flags & EV_BACKCMD) { FORCEINTON; close(pip[0]); if (pip[1] != 1) { dup2(pip[1], 1); close(pip[1]); } } flags |= EV_EXIT; } /* This is the child process if a fork occurred. */ /* Execute the command. */ if (cmdentry.cmdtype == CMDFUNCTION) { #ifdef DEBUG trputs("Shell function: "); trargs(argv); #endif saveparam = shellparam; shellparam.malloc = 0; shellparam.reset = 1; shellparam.nparam = argc - 1; shellparam.p = argv + 1; shellparam.optnext = NULL; INTOFF; savelocalvars = localvars; localvars = NULL; reffunc(cmdentry.u.func); savehandler = handler; if (setjmp(jmploc.loc)) { if (exception == EXSHELLPROC) freeparam(&saveparam); else { freeparam(&shellparam); shellparam = saveparam; if (exception == EXERROR || exception == EXEXEC) popredir(); } unreffunc(cmdentry.u.func); poplocalvars(); localvars = savelocalvars; funcnest--; handler = savehandler; longjmp(handler->loc, 1); } handler = &jmploc; funcnest++; redirect(cmd->ncmd.redirect, REDIR_PUSH); INTON; for (sp = varlist.list ; sp ; sp = sp->next) mklocal(sp->text); exitstatus = oexitstatus; if (flags & EV_TESTED) evaltree(getfuncnode(cmdentry.u.func), EV_TESTED); else evaltree(getfuncnode(cmdentry.u.func), 0); INTOFF; unreffunc(cmdentry.u.func); poplocalvars(); localvars = savelocalvars; freeparam(&shellparam); shellparam = saveparam; handler = savehandler; funcnest--; popredir(); INTON; if (evalskip == SKIPFUNC) { evalskip = 0; skipcount = 0; } if (flags & EV_EXIT) exitshell(exitstatus); } else if (cmdentry.cmdtype == CMDBUILTIN) { #ifdef DEBUG trputs("builtin command: "); trargs(argv); #endif mode = (cmdentry.u.index == EXECCMD)? 0 : REDIR_PUSH; if (flags == EV_BACKCMD) { memout.nleft = 0; memout.nextc = memout.buf; memout.bufsize = 64; mode |= REDIR_BACKQ; cmdentry.special = 0; } savecmdname = commandname; savetopfile = getcurrentfile(); cmdenviron = varlist.list; e = -1; savehandler = handler; if (setjmp(jmploc.loc)) { e = exception; exitstatus = (e == EXINT)? SIGINT+128 : 2; goto cmddone; } handler = &jmploc; redirect(cmd->ncmd.redirect, mode); /* * If there is no command word, redirection errors should * not be fatal but assignment errors should. */ if (argc == 0 && !(flags & EV_BACKCMD)) cmdentry.special = 1; if (cmdentry.special) listsetvar(cmdenviron); if (argc > 0) bltinsetlocale(); commandname = argv[0]; argptr = argv + 1; nextopt_optptr = NULL; /* initialize nextopt */ builtin_flags = flags; exitstatus = (*builtinfunc[cmdentry.u.index])(argc, argv); flushall(); cmddone: if (argc > 0) bltinunsetlocale(); cmdenviron = NULL; out1 = &output; out2 = &errout; freestdout(); if (e != EXSHELLPROC) { commandname = savecmdname; if (flags & EV_EXIT) { exitshell(exitstatus); } } handler = savehandler; if (flags == EV_BACKCMD) { backcmd->buf = memout.buf; backcmd->nleft = memout.nextc - memout.buf; memout.buf = NULL; } if (cmdentry.u.index != EXECCMD && (e == -1 || e == EXERROR || e == EXEXEC)) popredir(); if (e != -1) { if ((e != EXERROR && e != EXEXEC) || cmdentry.special) exraise(e); popfilesupto(savetopfile); if (flags != EV_BACKCMD) FORCEINTON; } } else { #ifdef DEBUG trputs("normal command: "); trargs(argv); #endif redirect(cmd->ncmd.redirect, 0); for (sp = varlist.list ; sp ; sp = sp->next) setvareq(sp->text, VEXPORT|VSTACK); envp = environment(); shellexec(argv, envp, path, cmdentry.u.index); /*NOTREACHED*/ } goto out; parent: /* parent process gets here (if we forked) */ if (mode == FORK_FG) { /* argument to fork */ INTOFF; exitstatus = waitforjob(jp, &realstatus); INTON; if (iflag && loopnest > 0 && WIFSIGNALED(realstatus)) { evalskip = SKIPBREAK; skipcount = loopnest; } } else if (mode == FORK_NOJOB) { backcmd->fd = pip[0]; close(pip[1]); backcmd->jp = jp; } out: if (lastarg) setvar("_", lastarg, 0); if (do_clearcmdentry) clearcmdentry(0); popstackmark(&smark); } /* * Search for a command. This is called before we fork so that the * location of the command will be available in the parent as well as * the child. The check for "goodname" is an overly conservative * check that the name will not be subject to expansion. */ static void prehash(union node *n) { struct cmdentry entry; if (n && n->type == NCMD && n->ncmd.args) if (goodname(n->ncmd.args->narg.text)) find_command(n->ncmd.args->narg.text, &entry, 0, pathval()); } /* * Builtin commands. Builtin commands whose functions are closely * tied to evaluation are implemented here. */ /* * No command given, a bltin command with no arguments, or a bltin command * with an invalid name. */ int bltincmd(int argc, char **argv) { if (argc > 1) { out2fmt_flush("%s: not found\n", argv[1]); return 127; } /* * Preserve exitstatus of a previous possible redirection * as POSIX mandates */ return exitstatus; } /* * Handle break and continue commands. Break, continue, and return are * all handled by setting the evalskip flag. The evaluation routines * above all check this flag, and if it is set they start skipping * commands rather than executing them. The variable skipcount is * the number of loops to break/continue, or the number of function * levels to return. (The latter is always 1.) It should probably * be an error to break out of more loops than exist, but it isn't * in the standard shell so we don't make it one here. */ int breakcmd(int argc, char **argv) { int n = argc > 1 ? number(argv[1]) : 1; if (n > loopnest) n = loopnest; if (n > 0) { evalskip = (**argv == 'c')? SKIPCONT : SKIPBREAK; skipcount = n; } return 0; } /* * The `command' command. */ int commandcmd(int argc, char **argv) { const char *path; int ch; int cmd = -1; path = bltinlookup("PATH", 1); optind = optreset = 1; opterr = 0; while ((ch = getopt(argc, argv, "pvV")) != -1) { switch (ch) { case 'p': path = _PATH_STDPATH; break; case 'v': cmd = TYPECMD_SMALLV; break; case 'V': cmd = TYPECMD_BIGV; break; case '?': default: error("unknown option: -%c", optopt); } } argc -= optind; argv += optind; if (cmd != -1) { if (argc != 1) error("wrong number of arguments"); return typecmd_impl(2, argv - 1, cmd, path); } if (argc != 0) error("commandcmd bad call"); /* * Do nothing successfully if no command was specified; * ksh also does this. */ return 0; } /* * The return command. */ int returncmd(int argc, char **argv) { int ret = argc > 1 ? number(argv[1]) : oexitstatus; if (funcnest) { evalskip = SKIPFUNC; skipcount = 1; } else { /* skip the rest of the file */ evalskip = SKIPFILE; skipcount = 1; } return ret; } int falsecmd(int argc __unused, char **argv __unused) { return 1; } int truecmd(int argc __unused, char **argv __unused) { return 0; } int execcmd(int argc, char **argv) { /* * Because we have historically not supported any options, * only treat "--" specially. */ if (argc > 1 && strcmp(argv[1], "--") == 0) argc--, argv++; if (argc > 1) { struct strlist *sp; iflag = 0; /* exit on error */ mflag = 0; optschanged(); for (sp = cmdenviron; sp ; sp = sp->next) setvareq(sp->text, VEXPORT|VSTACK); shellexec(argv + 1, environment(), pathval(), 0); } return 0; } int timescmd(int argc __unused, char **argv __unused) { struct rusage ru; long shumins, shsmins, chumins, chsmins; double shusecs, shssecs, chusecs, chssecs; if (getrusage(RUSAGE_SELF, &ru) < 0) return 1; shumins = ru.ru_utime.tv_sec / 60; shusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.; shsmins = ru.ru_stime.tv_sec / 60; shssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.; if (getrusage(RUSAGE_CHILDREN, &ru) < 0) return 1; chumins = ru.ru_utime.tv_sec / 60; chusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.; chsmins = ru.ru_stime.tv_sec / 60; chssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.; out1fmt("%ldm%.3fs %ldm%.3fs\n%ldm%.3fs %ldm%.3fs\n", shumins, shusecs, shsmins, shssecs, chumins, chusecs, chsmins, chssecs); return 0; } Index: projects/binutils-2.17/bin/sh/expand.c =================================================================== --- projects/binutils-2.17/bin/sh/expand.c (revision 215829) +++ projects/binutils-2.17/bin/sh/expand.c (revision 215830) @@ -1,1598 +1,1597 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1997-2005 * Herbert Xu . All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include /* * Routines to expand arguments to commands. We have to deal with * backquotes, shell variables, and file metacharacters. */ #include "shell.h" #include "main.h" #include "nodes.h" #include "eval.h" #include "expand.h" #include "syntax.h" #include "parser.h" #include "jobs.h" #include "options.h" #include "var.h" #include "input.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "arith.h" #include "show.h" /* * Structure specifying which parts of the string should be searched * for IFS characters. */ struct ifsregion { struct ifsregion *next; /* next region in list */ int begoff; /* offset of start of region */ int endoff; /* offset of end of region */ int inquotes; /* search for nul bytes only */ }; static char *expdest; /* output of current string */ static struct nodelist *argbackq; /* list of back quote expressions */ static struct ifsregion ifsfirst; /* first struct in list of ifs regions */ static struct ifsregion *ifslastp; /* last struct in list */ static struct arglist exparg; /* holds expanded arg list */ static void argstr(char *, int); static char *exptilde(char *, int); static void expbackq(union node *, int, int); static int subevalvar(char *, char *, int, int, int, int, int); static char *evalvar(char *, int); static int varisset(char *, int); static void varvalue(char *, int, int, int); static void recordregion(int, int, int); static void removerecordregions(int); static void ifsbreakup(char *, struct arglist *); static void expandmeta(struct strlist *, int); static void expmeta(char *, char *); static void addfname(char *); static struct strlist *expsort(struct strlist *); static struct strlist *msort(struct strlist *, int); static char *cvtnum(int, char *); static int collate_range_cmp(int, int); static int collate_range_cmp(int c1, int c2) { static char s1[2], s2[2]; s1[0] = c1; s2[0] = c2; return (strcoll(s1, s2)); } /* * Expand shell variables and backquotes inside a here document. * union node *arg the document * int fd; where to write the expanded version */ void expandhere(union node *arg, int fd) { herefd = fd; expandarg(arg, (struct arglist *)NULL, 0); xwrite(fd, stackblock(), expdest - stackblock()); } /* * Perform expansions on an argument, placing the resulting list of arguments * in arglist. Parameter expansion, command substitution and arithmetic * expansion are always performed; additional expansions can be requested * via flag (EXP_*). * The result is left in the stack string. * When arglist is NULL, perform here document expansion. A partial result * may be written to herefd, which is then not included in the stack string. * * Caution: this function uses global state and is not reentrant. * However, a new invocation after an interrupted invocation is safe * and will reset the global state for the new call. */ void expandarg(union node *arg, struct arglist *arglist, int flag) { struct strlist *sp; char *p; argbackq = arg->narg.backquote; STARTSTACKSTR(expdest); ifsfirst.next = NULL; ifslastp = NULL; argstr(arg->narg.text, flag); if (arglist == NULL) { return; /* here document expanded */ } STPUTC('\0', expdest); p = grabstackstr(expdest); exparg.lastp = &exparg.list; /* * TODO - EXP_REDIR */ if (flag & EXP_FULL) { ifsbreakup(p, &exparg); *exparg.lastp = NULL; exparg.lastp = &exparg.list; expandmeta(exparg.list, flag); } else { if (flag & EXP_REDIR) /*XXX - for now, just remove escapes */ rmescapes(p); sp = (struct strlist *)stalloc(sizeof (struct strlist)); sp->text = p; *exparg.lastp = sp; exparg.lastp = &sp->next; } while (ifsfirst.next != NULL) { struct ifsregion *ifsp; INTOFF; ifsp = ifsfirst.next->next; ckfree(ifsfirst.next); ifsfirst.next = ifsp; INTON; } *exparg.lastp = NULL; if (exparg.list) { *arglist->lastp = exparg.list; arglist->lastp = exparg.lastp; } } /* * Perform parameter expansion, command substitution and arithmetic * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE. * Processing ends at a CTLENDVAR character as well as '\0'. * This is used to expand word in ${var+word} etc. * If EXP_FULL, EXP_CASE or EXP_REDIR are set, keep and/or generate CTLESC * characters to allow for further processing. * If EXP_FULL is set, also preserve CTLQUOTEMARK characters. */ static void argstr(char *p, int flag) { char c; int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR); /* do CTLESC */ int firsteq = 1; int split_lit; int lit_quoted; split_lit = flag & EXP_SPLIT_LIT; lit_quoted = flag & EXP_LIT_QUOTED; flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED); if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE))) p = exptilde(p, flag); for (;;) { + CHECKSTRSPACE(2, expdest); switch (c = *p++) { case '\0': case CTLENDVAR: goto breakloop; case CTLQUOTEMARK: lit_quoted = 1; /* "$@" syntax adherence hack */ if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=') break; if ((flag & EXP_FULL) != 0) - STPUTC(c, expdest); + USTPUTC(c, expdest); break; case CTLQUOTEEND: lit_quoted = 0; break; case CTLESC: if (quotes) - STPUTC(c, expdest); + USTPUTC(c, expdest); c = *p++; - STPUTC(c, expdest); + USTPUTC(c, expdest); if (split_lit && !lit_quoted) recordregion(expdest - stackblock() - (quotes ? 2 : 1), expdest - stackblock(), 0); break; case CTLVAR: p = evalvar(p, flag); break; case CTLBACKQ: case CTLBACKQ|CTLQUOTE: expbackq(argbackq->n, c & CTLQUOTE, flag); argbackq = argbackq->next; break; case CTLENDARI: expari(flag); break; case ':': case '=': /* * sort of a hack - expand tildes in variable * assignments (after the first '=' and after ':'s). */ - STPUTC(c, expdest); + USTPUTC(c, expdest); if (split_lit && !lit_quoted) recordregion(expdest - stackblock() - 1, expdest - stackblock(), 0); if (flag & EXP_VARTILDE && *p == '~' && (c != '=' || firsteq)) { if (c == '=') firsteq = 0; p = exptilde(p, flag); } break; default: - STPUTC(c, expdest); + USTPUTC(c, expdest); if (split_lit && !lit_quoted) recordregion(expdest - stackblock() - 1, expdest - stackblock(), 0); } } breakloop:; } /* * Perform tilde expansion, placing the result in the stack string and * returning the next position in the input string to process. */ static char * exptilde(char *p, int flag) { char c, *startp = p; struct passwd *pw; char *home; int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR); while ((c = *p) != '\0') { switch(c) { case CTLESC: /* This means CTL* are always considered quoted. */ case CTLVAR: case CTLBACKQ: case CTLBACKQ | CTLQUOTE: case CTLARI: case CTLENDARI: case CTLQUOTEMARK: return (startp); case ':': if (flag & EXP_VARTILDE) goto done; break; case '/': case CTLENDVAR: goto done; } p++; } done: *p = '\0'; if (*(startp+1) == '\0') { if ((home = lookupvar("HOME")) == NULL) goto lose; } else { if ((pw = getpwnam(startp+1)) == NULL) goto lose; home = pw->pw_dir; } if (*home == '\0') goto lose; *p = c; while ((c = *home++) != '\0') { if (quotes && SQSYNTAX[(int)c] == CCTL) STPUTC(CTLESC, expdest); STPUTC(c, expdest); } return (p); lose: *p = c; return (startp); } static void removerecordregions(int endoff) { if (ifslastp == NULL) return; if (ifsfirst.endoff > endoff) { while (ifsfirst.next != NULL) { struct ifsregion *ifsp; INTOFF; ifsp = ifsfirst.next->next; ckfree(ifsfirst.next); ifsfirst.next = ifsp; INTON; } if (ifsfirst.begoff > endoff) ifslastp = NULL; else { ifslastp = &ifsfirst; ifsfirst.endoff = endoff; } return; } ifslastp = &ifsfirst; while (ifslastp->next && ifslastp->next->begoff < endoff) ifslastp=ifslastp->next; while (ifslastp->next != NULL) { struct ifsregion *ifsp; INTOFF; ifsp = ifslastp->next->next; ckfree(ifslastp->next); ifslastp->next = ifsp; INTON; } if (ifslastp->endoff > endoff) ifslastp->endoff = endoff; } /* * Expand arithmetic expression. Backup to start of expression, * evaluate, place result in (backed up) result, adjust string position. */ void expari(int flag) { char *p, *q, *start; arith_t result; int begoff; int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR); int quoted; /* * This routine is slightly over-complicated for * efficiency. First we make sure there is * enough space for the result, which may be bigger * than the expression. Next we * scan backwards looking for the start of arithmetic. If the * next previous character is a CTLESC character, then we * have to rescan starting from the beginning since CTLESC * characters have to be processed left to right. */ CHECKSTRSPACE(DIGITS(result) - 2, expdest); USTPUTC('\0', expdest); start = stackblock(); p = expdest - 2; while (p >= start && *p != CTLARI) --p; if (p < start || *p != CTLARI) error("missing CTLARI (shouldn't happen)"); if (p > start && *(p - 1) == CTLESC) for (p = start; *p != CTLARI; p++) if (*p == CTLESC) p++; if (p[1] == '"') quoted=1; else quoted=0; begoff = p - start; removerecordregions(begoff); if (quotes) rmescapes(p+2); q = grabstackstr(expdest); result = arith(p+2); ungrabstackstr(q, expdest); fmtstr(p, DIGITS(result), ARITH_FORMAT_STR, result); while (*p++) ; if (quoted == 0) recordregion(begoff, p - 1 - start, 0); result = expdest - p + 1; STADJUST(-result, expdest); } /* * Perform command substitution. */ static void expbackq(union node *cmd, int quoted, int flag) { struct backcmd in; int i; char buf[128]; char *p; char *dest = expdest; struct ifsregion saveifs, *savelastp; struct nodelist *saveargbackq; char lastc; int startloc = dest - stackblock(); char const *syntax = quoted? DQSYNTAX : BASESYNTAX; int saveherefd; int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR); int nnl; INTOFF; saveifs = ifsfirst; savelastp = ifslastp; saveargbackq = argbackq; saveherefd = herefd; herefd = -1; p = grabstackstr(dest); evalbackcmd(cmd, &in); ungrabstackstr(p, dest); ifsfirst = saveifs; ifslastp = savelastp; argbackq = saveargbackq; herefd = saveherefd; p = in.buf; lastc = '\0'; nnl = 0; /* Don't copy trailing newlines */ for (;;) { if (--in.nleft < 0) { if (in.fd < 0) break; while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR); TRACE(("expbackq: read returns %d\n", i)); if (i <= 0) break; p = buf; in.nleft = i - 1; } lastc = *p++; if (lastc != '\0') { if (quotes && syntax[(int)lastc] == CCTL) STPUTC(CTLESC, dest); if (lastc == '\n') { nnl++; } else { while (nnl > 0) { nnl--; STPUTC('\n', dest); } STPUTC(lastc, dest); } } } if (in.fd >= 0) close(in.fd); if (in.buf) ckfree(in.buf); if (in.jp) exitstatus = waitforjob(in.jp, (int *)NULL); if (quoted == 0) recordregion(startloc, dest - stackblock(), 0); TRACE(("expbackq: size=%td: \"%.*s\"\n", ((dest - stackblock()) - startloc), (int)((dest - stackblock()) - startloc), stackblock() + startloc)); expdest = dest; INTON; } static int subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varflags, int quotes) { char *startp; char *loc = NULL; char *q; int c = 0; int saveherefd = herefd; struct nodelist *saveargbackq = argbackq; int amount; herefd = -1; argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ? EXP_CASE : 0) | EXP_TILDE); STACKSTRNUL(expdest); herefd = saveherefd; argbackq = saveargbackq; startp = stackblock() + startloc; if (str == NULL) str = stackblock() + strloc; switch (subtype) { case VSASSIGN: setvar(str, startp, 0); amount = startp - expdest; STADJUST(amount, expdest); varflags &= ~VSNUL; if (c != 0) *loc = c; return 1; case VSQUESTION: if (*p != CTLENDVAR) { outfmt(out2, "%s\n", startp); error((char *)NULL); } error("%.*s: parameter %snot set", (int)(p - str - 1), str, (varflags & VSNUL) ? "null or " : nullstr); return 0; case VSTRIMLEFT: for (loc = startp; loc < str; loc++) { c = *loc; *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; goto recordleft; } *loc = c; if (quotes && *loc == CTLESC) loc++; } return 0; case VSTRIMLEFTMAX: for (loc = str - 1; loc >= startp;) { c = *loc; *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; goto recordleft; } *loc = c; loc--; if (quotes && loc > startp && *(loc - 1) == CTLESC) { for (q = startp; q < loc; q++) if (*q == CTLESC) q++; if (q > loc) loc--; } } return 0; case VSTRIMRIGHT: for (loc = str - 1; loc >= startp;) { if (patmatch(str, loc, quotes)) { amount = loc - expdest; STADJUST(amount, expdest); return 1; } loc--; if (quotes && loc > startp && *(loc - 1) == CTLESC) { for (q = startp; q < loc; q++) if (*q == CTLESC) q++; if (q > loc) loc--; } } return 0; case VSTRIMRIGHTMAX: for (loc = startp; loc < str - 1; loc++) { if (patmatch(str, loc, quotes)) { amount = loc - expdest; STADJUST(amount, expdest); return 1; } if (quotes && *loc == CTLESC) loc++; } return 0; default: abort(); } recordleft: amount = ((str - 1) - (loc - startp)) - expdest; STADJUST(amount, expdest); while (loc != str - 1) *startp++ = *loc++; return 1; } /* * Expand a variable, and return a pointer to the next character in the * input string. */ static char * evalvar(char *p, int flag) { int subtype; int varflags; char *var; char *val; int patloc; int c; int set; int special; int startloc; int varlen; int easy; int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR); varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; var = p; special = 0; if (! is_name(*p)) special = 1; p = strchr(p, '=') + 1; again: /* jump here after setting a variable with ${var=text} */ if (varflags & VSLINENO) { set = 1; special = 0; val = var; p[-1] = '\0'; /* temporarily overwrite '=' to have \0 terminated string */ } else if (special) { set = varisset(var, varflags & VSNUL); val = NULL; } else { val = bltinlookup(var, 1); if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) { val = NULL; set = 0; } else set = 1; } varlen = 0; startloc = expdest - stackblock(); if (!set && uflag && *var != '@' && *var != '*') { switch (subtype) { case VSNORMAL: case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: case VSLENGTH: error("%.*s: parameter not set", (int)(p - var - 1), var); } } if (set && subtype != VSPLUS) { /* insert the value of the variable */ if (special) { varvalue(var, varflags & VSQUOTE, subtype, flag); if (subtype == VSLENGTH) { varlen = expdest - stackblock() - startloc; STADJUST(-varlen, expdest); } } else { char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX : BASESYNTAX; if (subtype == VSLENGTH) { for (;*val; val++) varlen++; } else { while (*val) { if (quotes && syntax[(int)*val] == CCTL) STPUTC(CTLESC, expdest); STPUTC(*val++, expdest); } } } } if (subtype == VSPLUS) set = ! set; easy = ((varflags & VSQUOTE) == 0 || (*var == '@' && shellparam.nparam != 1)); switch (subtype) { case VSLENGTH: expdest = cvtnum(varlen, expdest); goto record; case VSNORMAL: if (!easy) break; record: recordregion(startloc, expdest - stackblock(), varflags & VSQUOTE); break; case VSPLUS: case VSMINUS: if (!set) { argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) | (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0)); break; } if (easy) goto record; break; case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: if (!set) break; /* * Terminate the string and start recording the pattern * right after it */ STPUTC('\0', expdest); patloc = expdest - stackblock(); if (subevalvar(p, NULL, patloc, subtype, startloc, varflags, quotes) == 0) { int amount = (expdest - stackblock() - patloc) + 1; STADJUST(-amount, expdest); } /* Remove any recorded regions beyond start of variable */ removerecordregions(startloc); goto record; case VSASSIGN: case VSQUESTION: if (!set) { if (subevalvar(p, var, 0, subtype, startloc, varflags, quotes)) { varflags &= ~VSNUL; /* * Remove any recorded regions beyond * start of variable */ removerecordregions(startloc); goto again; } break; } if (easy) goto record; break; case VSERROR: c = p - var - 1; error("${%.*s%s}: Bad substitution", c, var, (c > 0 && *p != CTLENDVAR) ? "..." : ""); default: abort(); } p[-1] = '='; /* recover overwritten '=' */ if (subtype != VSNORMAL) { /* skip to end of alternative */ int nesting = 1; for (;;) { if ((c = *p++) == CTLESC) p++; else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) { if (set) argbackq = argbackq->next; } else if (c == CTLVAR) { if ((*p++ & VSTYPE) != VSNORMAL) nesting++; } else if (c == CTLENDVAR) { if (--nesting == 0) break; } } } return p; } /* * Test whether a specialized variable is set. */ static int varisset(char *name, int nulok) { if (*name == '!') return backgndpidset(); else if (*name == '@' || *name == '*') { if (*shellparam.p == NULL) return 0; if (nulok) { char **av; for (av = shellparam.p; *av; av++) if (**av != '\0') return 1; return 0; } } else if (is_digit(*name)) { char *ap; int num = atoi(name); if (num > shellparam.nparam) return 0; if (num == 0) ap = arg0; else ap = shellparam.p[num - 1]; if (nulok && (ap == NULL || *ap == '\0')) return 0; } return 1; } /* * Add the value of a specialized variable to the stack string. */ static void varvalue(char *name, int quoted, int subtype, int flag) { int num; char *p; int i; char sep; char **ap; char const *syntax; #define STRTODEST(p) \ do {\ if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH) { \ syntax = quoted? DQSYNTAX : BASESYNTAX; \ while (*p) { \ if (syntax[(int)*p] == CCTL) \ STPUTC(CTLESC, expdest); \ STPUTC(*p++, expdest); \ } \ } else \ - while (*p) \ - STPUTC(*p++, expdest); \ + STPUTS(p, expdest); \ } while (0) switch (*name) { case '$': num = rootpid; goto numvar; case '?': num = oexitstatus; goto numvar; case '#': num = shellparam.nparam; goto numvar; case '!': num = backgndpidval(); numvar: expdest = cvtnum(num, expdest); break; case '-': for (i = 0 ; i < NOPTS ; i++) { if (optlist[i].val) STPUTC(optlist[i].letter, expdest); } break; case '@': if (flag & EXP_FULL && quoted) { for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { STRTODEST(p); if (*ap) STPUTC('\0', expdest); } break; } /* FALLTHROUGH */ case '*': if (ifsset()) sep = ifsval()[0]; else sep = ' '; for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { STRTODEST(p); if (*ap && sep) STPUTC(sep, expdest); } break; case '0': p = arg0; STRTODEST(p); break; default: if (is_digit(*name)) { num = atoi(name); if (num > 0 && num <= shellparam.nparam) { p = shellparam.p[num - 1]; STRTODEST(p); } } break; } } /* * Record the the fact that we have to scan this region of the * string for IFS characters. */ static void recordregion(int start, int end, int inquotes) { struct ifsregion *ifsp; if (ifslastp == NULL) { ifsp = &ifsfirst; } else { if (ifslastp->endoff == start && ifslastp->inquotes == inquotes) { /* extend previous area */ ifslastp->endoff = end; return; } ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion)); ifslastp->next = ifsp; } ifslastp = ifsp; ifslastp->next = NULL; ifslastp->begoff = start; ifslastp->endoff = end; ifslastp->inquotes = inquotes; } /* * Break the argument string into pieces based upon IFS and add the * strings to the argument list. The regions of the string to be * searched for IFS characters have been stored by recordregion. * CTLESC characters are preserved but have little effect in this pass * other than escaping CTL* characters. In particular, they do not escape * IFS characters: that should be done with the ifsregion mechanism. * CTLQUOTEMARK characters are used to preserve empty quoted strings. * This pass treats them as a regular character, making the string non-empty. * Later, they are removed along with the other CTL* characters. */ static void ifsbreakup(char *string, struct arglist *arglist) { struct ifsregion *ifsp; struct strlist *sp; char *start; char *p; char *q; const char *ifs; const char *ifsspc; int had_param_ch = 0; start = string; if (ifslastp == NULL) { /* Return entire argument, IFS doesn't apply to any of it */ sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; return; } ifs = ifsset() ? ifsval() : " \t\n"; for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) { p = string + ifsp->begoff; while (p < string + ifsp->endoff) { q = p; if (*p == CTLESC) p++; if (ifsp->inquotes) { /* Only NULs (should be from "$@") end args */ had_param_ch = 1; if (*p != 0) { p++; continue; } ifsspc = NULL; } else { if (!strchr(ifs, *p)) { had_param_ch = 1; p++; continue; } ifsspc = strchr(" \t\n", *p); /* Ignore IFS whitespace at start */ if (q == start && ifsspc != NULL) { p++; start = p; continue; } had_param_ch = 0; } /* Save this argument... */ *q = '\0'; sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; p++; if (ifsspc != NULL) { /* Ignore further trailing IFS whitespace */ for (; p < string + ifsp->endoff; p++) { q = p; if (*p == CTLESC) p++; if (strchr(ifs, *p) == NULL) { p = q; break; } if (strchr(" \t\n", *p) == NULL) { p++; break; } } } start = p; } } /* * Save anything left as an argument. * Traditionally we have treated 'IFS=':'; set -- x$IFS' as * generating 2 arguments, the second of which is empty. * Some recent clarification of the Posix spec say that it * should only generate one.... */ if (had_param_ch || *start != 0) { sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; } } static char expdir[PATH_MAX]; #define expdir_end (expdir + sizeof(expdir)) /* * Perform pathname generation and remove control characters. * At this point, the only control characters should be CTLESC and CTLQUOTEMARK. * The results are stored in the list exparg. */ static void expandmeta(struct strlist *str, int flag __unused) { char *p; struct strlist **savelastp; struct strlist *sp; char c; /* TODO - EXP_REDIR */ while (str) { if (fflag) goto nometa; p = str->text; for (;;) { /* fast check for meta chars */ if ((c = *p++) == '\0') goto nometa; if (c == '*' || c == '?' || c == '[') break; } savelastp = exparg.lastp; INTOFF; expmeta(expdir, str->text); INTON; if (exparg.lastp == savelastp) { /* * no matches */ nometa: *exparg.lastp = str; rmescapes(str->text); exparg.lastp = &str->next; } else { *exparg.lastp = NULL; *savelastp = sp = expsort(*savelastp); while (sp->next != NULL) sp = sp->next; exparg.lastp = &sp->next; } str = str->next; } } /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ static void expmeta(char *enddir, char *name) { char *p; char *q; char *start; char *endname; int metaflag; struct stat statb; DIR *dirp; struct dirent *dp; int atend; int matchdot; int esc; metaflag = 0; start = name; for (p = name; esc = 0, *p; p += esc + 1) { if (*p == '*' || *p == '?') metaflag = 1; else if (*p == '[') { q = p + 1; if (*q == '!' || *q == '^') q++; for (;;) { while (*q == CTLQUOTEMARK) q++; if (*q == CTLESC) q++; if (*q == '/' || *q == '\0') break; if (*++q == ']') { metaflag = 1; break; } } } else if (*p == '\0') break; else if (*p == CTLQUOTEMARK) continue; else { if (*p == CTLESC) esc++; if (p[esc] == '/') { if (metaflag) break; start = p + esc + 1; } } } if (metaflag == 0) { /* we've reached the end of the file name */ if (enddir != expdir) metaflag++; for (p = name ; ; p++) { if (*p == CTLQUOTEMARK) continue; if (*p == CTLESC) p++; *enddir++ = *p; if (*p == '\0') break; if (enddir == expdir_end) return; } if (metaflag == 0 || lstat(expdir, &statb) >= 0) addfname(expdir); return; } endname = p; if (start != name) { p = name; while (p < start) { while (*p == CTLQUOTEMARK) p++; if (*p == CTLESC) p++; *enddir++ = *p++; if (enddir == expdir_end) return; } } if (enddir == expdir) { p = "."; } else if (enddir == expdir + 1 && *expdir == '/') { p = "/"; } else { p = expdir; enddir[-1] = '\0'; } if ((dirp = opendir(p)) == NULL) return; if (enddir != expdir) enddir[-1] = '/'; if (*endname == 0) { atend = 1; } else { atend = 0; *endname = '\0'; endname += esc + 1; } matchdot = 0; p = start; while (*p == CTLQUOTEMARK) p++; if (*p == CTLESC) p++; if (*p == '.') matchdot++; while (! int_pending() && (dp = readdir(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; if (patmatch(start, dp->d_name, 0)) { if (enddir + dp->d_namlen + 1 > expdir_end) continue; memcpy(enddir, dp->d_name, dp->d_namlen + 1); if (atend) addfname(expdir); else { if (enddir + dp->d_namlen + 2 > expdir_end) continue; enddir[dp->d_namlen] = '/'; enddir[dp->d_namlen + 1] = '\0'; expmeta(enddir + dp->d_namlen + 1, endname); } } } closedir(dirp); if (! atend) endname[-esc - 1] = esc ? CTLESC : '/'; } /* * Add a file name to the list. */ static void addfname(char *name) { char *p; struct strlist *sp; p = stalloc(strlen(name) + 1); scopy(name, p); sp = (struct strlist *)stalloc(sizeof *sp); sp->text = p; *exparg.lastp = sp; exparg.lastp = &sp->next; } /* * Sort the results of file name expansion. It calculates the number of * strings to sort and then calls msort (short for merge sort) to do the * work. */ static struct strlist * expsort(struct strlist *str) { int len; struct strlist *sp; len = 0; for (sp = str ; sp ; sp = sp->next) len++; return msort(str, len); } static struct strlist * msort(struct strlist *list, int len) { struct strlist *p, *q = NULL; struct strlist **lpp; int half; int n; if (len <= 1) return list; half = len >> 1; p = list; for (n = half ; --n >= 0 ; ) { q = p; p = p->next; } q->next = NULL; /* terminate first half of list */ q = msort(list, half); /* sort first half of list */ p = msort(p, len - half); /* sort second half */ lpp = &list; for (;;) { if (strcmp(p->text, q->text) < 0) { *lpp = p; lpp = &p->next; if ((p = *lpp) == NULL) { *lpp = q; break; } } else { *lpp = q; lpp = &q->next; if ((q = *lpp) == NULL) { *lpp = p; break; } } } return list; } /* * Returns true if the pattern matches the string. */ int patmatch(const char *pattern, const char *string, int squoted) { const char *p, *q; char c; p = pattern; q = string; for (;;) { switch (c = *p++) { case '\0': goto breakloop; case CTLESC: if (squoted && *q == CTLESC) q++; if (*q++ != *p++) return 0; break; case CTLQUOTEMARK: continue; case '?': if (squoted && *q == CTLESC) q++; if (*q++ == '\0') return 0; break; case '*': c = *p; while (c == CTLQUOTEMARK || c == '*') c = *++p; if (c != CTLESC && c != CTLQUOTEMARK && c != '?' && c != '*' && c != '[') { while (*q != c) { if (squoted && *q == CTLESC && q[1] == c) break; if (*q == '\0') return 0; if (squoted && *q == CTLESC) q++; q++; } } do { if (patmatch(p, q, squoted)) return 1; if (squoted && *q == CTLESC) q++; } while (*q++ != '\0'); return 0; case '[': { const char *endp; int invert, found; char chr; endp = p; if (*endp == '!' || *endp == '^') endp++; for (;;) { while (*endp == CTLQUOTEMARK) endp++; if (*endp == '\0') goto dft; /* no matching ] */ if (*endp == CTLESC) endp++; if (*++endp == ']') break; } invert = 0; if (*p == '!' || *p == '^') { invert++; p++; } found = 0; chr = *q++; if (squoted && chr == CTLESC) chr = *q++; if (chr == '\0') return 0; c = *p++; do { if (c == CTLQUOTEMARK) continue; if (c == CTLESC) c = *p++; if (*p == '-' && p[1] != ']') { p++; while (*p == CTLQUOTEMARK) p++; if (*p == CTLESC) p++; if ( collate_range_cmp(chr, c) >= 0 && collate_range_cmp(chr, *p) <= 0 ) found = 1; p++; } else { if (chr == c) found = 1; } } while ((c = *p++) != ']'); if (found == invert) return 0; break; } dft: default: if (squoted && *q == CTLESC) q++; if (*q++ != c) return 0; break; } } breakloop: if (*q != '\0') return 0; return 1; } /* * Remove any CTLESC and CTLQUOTEMARK characters from a string. */ void rmescapes(char *str) { char *p, *q; p = str; while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) { if (*p++ == '\0') return; } q = p; while (*p) { if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) { p++; continue; } if (*p == CTLESC) p++; *q++ = *p++; } *q = '\0'; } /* * See if a pattern matches in a case statement. */ int casematch(union node *pattern, const char *val) { struct stackmark smark; int result; char *p; setstackmark(&smark); argbackq = pattern->narg.backquote; STARTSTACKSTR(expdest); ifslastp = NULL; argstr(pattern->narg.text, EXP_TILDE | EXP_CASE); STPUTC('\0', expdest); p = grabstackstr(expdest); result = patmatch(p, val, 0); popstackmark(&smark); return result; } /* * Our own itoa(). */ static char * cvtnum(int num, char *buf) { char temp[32]; int neg = num < 0; char *p = temp + 31; temp[31] = '\0'; do { *--p = num % 10 + '0'; } while ((num /= 10) != 0); if (neg) *--p = '-'; - while (*p) - STPUTC(*p++, buf); + STPUTS(p, buf); return buf; } /* * Do most of the work for wordexp(3). */ int wordexpcmd(int argc, char **argv) { size_t len; int i; out1fmt("%08x", argc - 1); for (i = 1, len = 0; i < argc; i++) len += strlen(argv[i]); out1fmt("%08x", (int)len); for (i = 1; i < argc; i++) outbin(argv[i], strlen(argv[i]) + 1, out1); return (0); } Index: projects/binutils-2.17/bin/sh/histedit.c =================================================================== --- projects/binutils-2.17/bin/sh/histedit.c (revision 215829) +++ projects/binutils-2.17/bin/sh/histedit.c (revision 215830) @@ -1,513 +1,512 @@ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)histedit.c 8.2 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include /* * Editline and history functions (and glue). */ #include "shell.h" #include "parser.h" #include "var.h" #include "options.h" #include "main.h" #include "output.h" #include "mystring.h" #ifndef NO_HISTORY #include "myhistedit.h" #include "error.h" #include "eval.h" #include "memalloc.h" #define MAXHISTLOOPS 4 /* max recursions through fc */ #define DEFEDITOR "ed" /* default editor *should* be $EDITOR */ History *hist; /* history cookie */ EditLine *el; /* editline cookie */ int displayhist; static FILE *el_in, *el_out, *el_err; static char *fc_replace(const char *, char *, char *); /* * Set history and editing status. Called whenever the status may * have changed (figures out what to do). */ void histedit(void) { #define editing (Eflag || Vflag) if (iflag) { if (!hist) { /* * turn history on */ INTOFF; hist = history_init(); INTON; if (hist != NULL) sethistsize(histsizeval()); else out2fmt_flush("sh: can't initialize history\n"); } if (editing && !el && isatty(0)) { /* && isatty(2) ??? */ /* * turn editing on */ char *term; INTOFF; if (el_in == NULL) el_in = fdopen(0, "r"); if (el_err == NULL) el_err = fdopen(1, "w"); if (el_out == NULL) el_out = fdopen(2, "w"); if (el_in == NULL || el_err == NULL || el_out == NULL) goto bad; term = lookupvar("TERM"); if (term) setenv("TERM", term, 1); else unsetenv("TERM"); el = el_init(arg0, el_in, el_out, el_err); if (el != NULL) { if (hist) el_set(el, EL_HIST, history, hist); el_set(el, EL_PROMPT, getprompt); el_set(el, EL_ADDFN, "sh-complete", "Filename completion", _el_fn_sh_complete); } else { bad: out2fmt_flush("sh: can't initialize editing\n"); } INTON; } else if (!editing && el) { INTOFF; el_end(el); el = NULL; INTON; } if (el) { if (Vflag) el_set(el, EL_EDITOR, "vi"); else if (Eflag) el_set(el, EL_EDITOR, "emacs"); el_set(el, EL_BIND, "^I", "sh-complete", NULL); el_source(el, NULL); } } else { INTOFF; if (el) { /* no editing if not interactive */ el_end(el); el = NULL; } if (hist) { history_end(hist); hist = NULL; } INTON; } } void sethistsize(hs) const char *hs; { int histsize; HistEvent he; if (hist != NULL) { if (hs == NULL || *hs == '\0' || (histsize = atoi(hs)) < 0) histsize = 100; history(hist, &he, H_SETSIZE, histsize); history(hist, &he, H_SETUNIQUE, 1); } } void setterm(const char *term) { if (rootshell && el != NULL && term != NULL) el_set(el, EL_TERMINAL, term); } int histcmd(int argc, char **argv) { int ch; const char *editor = NULL; HistEvent he; int lflg = 0, nflg = 0, rflg = 0, sflg = 0; int i, retval; const char *firststr, *laststr; int first, last, direction; char *pat = NULL, *repl = NULL; static int active = 0; struct jmploc jmploc; struct jmploc *savehandler; char editfilestr[PATH_MAX]; char *volatile editfile; FILE *efp = NULL; int oldhistnum; if (hist == NULL) error("history not active"); if (argc == 1) error("missing history argument"); optreset = 1; optind = 1; /* initialize getopt */ opterr = 0; while (not_fcnumber(argv[optind]) && (ch = getopt(argc, argv, ":e:lnrs")) != -1) switch ((char)ch) { case 'e': editor = optarg; break; case 'l': lflg = 1; break; case 'n': nflg = 1; break; case 'r': rflg = 1; break; case 's': sflg = 1; break; case ':': error("option -%c expects argument", optopt); case '?': default: error("unknown option: -%c", optopt); } argc -= optind, argv += optind; /* * If executing... */ if (lflg == 0 || editor || sflg) { lflg = 0; /* ignore */ editfile = NULL; /* * Catch interrupts to reset active counter and * cleanup temp files. */ savehandler = handler; if (setjmp(jmploc.loc)) { active = 0; if (editfile) unlink(editfile); handler = savehandler; longjmp(handler->loc, 1); } handler = &jmploc; if (++active > MAXHISTLOOPS) { active = 0; displayhist = 0; error("called recursively too many times"); } /* * Set editor. */ if (sflg == 0) { if (editor == NULL && (editor = bltinlookup("FCEDIT", 1)) == NULL && (editor = bltinlookup("EDITOR", 1)) == NULL) editor = DEFEDITOR; if (editor[0] == '-' && editor[1] == '\0') { sflg = 1; /* no edit */ editor = NULL; } } } /* * If executing, parse [old=new] now */ if (lflg == 0 && argc > 0 && ((repl = strchr(argv[0], '=')) != NULL)) { pat = argv[0]; *repl++ = '\0'; argc--, argv++; } /* * determine [first] and [last] */ switch (argc) { case 0: firststr = lflg ? "-16" : "-1"; laststr = "-1"; break; case 1: firststr = argv[0]; laststr = lflg ? "-1" : argv[0]; break; case 2: firststr = argv[0]; laststr = argv[1]; break; default: error("too many arguments"); } /* * Turn into event numbers. */ first = str_to_event(firststr, 0); last = str_to_event(laststr, 1); if (rflg) { i = last; last = first; first = i; } /* * XXX - this should not depend on the event numbers * always increasing. Add sequence numbers or offset * to the history element in next (diskbased) release. */ direction = first < last ? H_PREV : H_NEXT; /* * If editing, grab a temp file. */ if (editor) { int fd; INTOFF; /* easier */ sprintf(editfilestr, "%s/_shXXXXXX", _PATH_TMP); if ((fd = mkstemp(editfilestr)) < 0) error("can't create temporary file %s", editfile); editfile = editfilestr; if ((efp = fdopen(fd, "w")) == NULL) { close(fd); error("Out of space"); } } /* * Loop through selected history events. If listing or executing, * do it now. Otherwise, put into temp file and call the editor * after. * * The history interface needs rethinking, as the following * convolutions will demonstrate. */ history(hist, &he, H_FIRST); retval = history(hist, &he, H_NEXT_EVENT, first); for (;retval != -1; retval = history(hist, &he, direction)) { if (lflg) { if (!nflg) out1fmt("%5d ", he.num); out1str(he.str); } else { char *s = pat ? fc_replace(he.str, pat, repl) : (char *)he.str; if (sflg) { if (displayhist) { out2str(s); flushout(out2); } evalstring(s, 0); if (displayhist && hist) { /* * XXX what about recursive and * relative histnums. */ oldhistnum = he.num; history(hist, &he, H_ENTER, s); /* * XXX H_ENTER moves the internal * cursor, set it back to the current * entry. */ retval = history(hist, &he, H_NEXT_EVENT, oldhistnum); } } else fputs(s, efp); } /* * At end? (if we were to lose last, we'd sure be * messed up). */ if (he.num == last) break; } if (editor) { char *editcmd; fclose(efp); editcmd = stalloc(strlen(editor) + strlen(editfile) + 2); sprintf(editcmd, "%s %s", editor, editfile); evalstring(editcmd, 0); /* XXX - should use no JC command */ INTON; readcmdfile(editfile); /* XXX - should read back - quick tst */ unlink(editfile); } if (lflg == 0 && active > 0) --active; if (displayhist) displayhist = 0; return 0; } static char * fc_replace(const char *s, char *p, char *r) { char *dest; int plen = strlen(p); STARTSTACKSTR(dest); while (*s) { if (*s == *p && strncmp(s, p, plen) == 0) { - while (*r) - STPUTC(*r++, dest); + STPUTS(r, dest); s += plen; *p = '\0'; /* so no more matches */ } else STPUTC(*s++, dest); } STPUTC('\0', dest); dest = grabstackstr(dest); return (dest); } int not_fcnumber(const char *s) { if (s == NULL) return (0); if (*s == '-') s++; return (!is_number(s)); } int str_to_event(const char *str, int last) { HistEvent he; const char *s = str; int relative = 0; int i, retval; retval = history(hist, &he, H_FIRST); switch (*s) { case '-': relative = 1; /*FALLTHROUGH*/ case '+': s++; } if (is_number(s)) { i = atoi(s); if (relative) { while (retval != -1 && i--) { retval = history(hist, &he, H_NEXT); } if (retval == -1) retval = history(hist, &he, H_LAST); } else { retval = history(hist, &he, H_NEXT_EVENT, i); if (retval == -1) { /* * the notion of first and last is * backwards to that of the history package */ retval = history(hist, &he, last ? H_FIRST : H_LAST); } } if (retval == -1) error("history number %s not found (internal error)", str); } else { /* * pattern */ retval = history(hist, &he, H_PREV_STR, str); if (retval == -1) error("history pattern not found: %s", str); } return (he.num); } int bindcmd(int argc, char **argv) { if (el == NULL) error("line editing is disabled"); return (el_parse(el, argc, (const char **)argv)); } #else #include "error.h" int histcmd(int argc, char **argv) { error("not compiled with history support"); /*NOTREACHED*/ return (0); } int bindcmd(int argc, char **argv) { error("not compiled with line editing support"); return (0); } #endif Index: projects/binutils-2.17/bin/sh/memalloc.c =================================================================== --- projects/binutils-2.17/bin/sh/memalloc.c (revision 215829) +++ projects/binutils-2.17/bin/sh/memalloc.c (revision 215830) @@ -1,342 +1,359 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)memalloc.c 8.3 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include "shell.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "expand.h" #include #include /* * Like malloc, but returns an error when out of space. */ pointer ckmalloc(size_t nbytes) { pointer p; INTOFF; p = malloc(nbytes); INTON; if (p == NULL) error("Out of space"); return p; } /* * Same for realloc. */ pointer ckrealloc(pointer p, int nbytes) { INTOFF; p = realloc(p, nbytes); INTON; if (p == NULL) error("Out of space"); return p; } void ckfree(pointer p) { INTOFF; free(p); INTON; } /* * Make a copy of a string in safe storage. */ char * savestr(const char *s) { char *p; p = ckmalloc(strlen(s) + 1); scopy(s, p); return p; } /* * Parse trees for commands are allocated in lifo order, so we use a stack * to make this more efficient, and also to avoid all sorts of exception * handling code to handle interrupts in the middle of a parse. * * The size 496 was chosen because with 16-byte alignment the total size * for the allocated block is 512. */ #define MINSIZE 496 /* minimum size of a block. */ struct stack_block { struct stack_block *prev; /* Data follows */ }; #define SPACE(sp) ((char*)(sp) + ALIGN(sizeof(struct stack_block))) static struct stack_block *stackp; static struct stackmark *markp; char *stacknxt; int stacknleft; int sstrnleft; int herefd = -1; static void stnewblock(int nbytes) { struct stack_block *sp; int allocsize; if (nbytes < MINSIZE) nbytes = MINSIZE; allocsize = ALIGN(sizeof(struct stack_block)) + ALIGN(nbytes); INTOFF; sp = ckmalloc(allocsize); sp->prev = stackp; stacknxt = SPACE(sp); stacknleft = allocsize - (stacknxt - (char*)sp); stackp = sp; INTON; } pointer stalloc(int nbytes) { char *p; nbytes = ALIGN(nbytes); if (nbytes > stacknleft) stnewblock(nbytes); p = stacknxt; stacknxt += nbytes; stacknleft -= nbytes; return p; } void stunalloc(pointer p) { if (p == NULL) { /*DEBUG */ write(STDERR_FILENO, "stunalloc\n", 10); abort(); } stacknleft += stacknxt - (char *)p; stacknxt = p; } void setstackmark(struct stackmark *mark) { mark->stackp = stackp; mark->stacknxt = stacknxt; mark->stacknleft = stacknleft; mark->marknext = markp; markp = mark; } void popstackmark(struct stackmark *mark) { struct stack_block *sp; INTOFF; markp = mark->marknext; while (stackp != mark->stackp) { sp = stackp; stackp = sp->prev; ckfree(sp); } stacknxt = mark->stacknxt; stacknleft = mark->stacknleft; INTON; } /* * When the parser reads in a string, it wants to stick the string on the * stack and only adjust the stack pointer when it knows how big the * string is. Stackblock (defined in stack.h) returns a pointer to a block * of space on top of the stack and stackblocklen returns the length of * this block. Growstackblock will grow this space by at least one byte, * possibly moving it (like realloc). Grabstackblock actually allocates the * part of the block that has been used. */ void growstackblock(void) { char *p; int newlen; char *oldspace; int oldlen; struct stack_block *sp; struct stack_block *oldstackp; struct stackmark *xmark; newlen = (stacknleft == 0) ? MINSIZE : stacknleft * 2 + 100; newlen = ALIGN(newlen); oldspace = stacknxt; oldlen = stacknleft; if (stackp != NULL && stacknxt == SPACE(stackp)) { INTOFF; oldstackp = stackp; stackp = oldstackp->prev; sp = ckrealloc((pointer)oldstackp, newlen); sp->prev = stackp; stackp = sp; stacknxt = SPACE(sp); stacknleft = newlen - (stacknxt - (char*)sp); /* * Stack marks pointing to the start of the old block * must be relocated to point to the new block */ xmark = markp; while (xmark != NULL && xmark->stackp == oldstackp) { xmark->stackp = stackp; xmark->stacknxt = stacknxt; xmark->stacknleft = stacknleft; xmark = xmark->marknext; } INTON; } else { p = stalloc(newlen); if (oldlen != 0) memcpy(p, oldspace, oldlen); stunalloc(p); } } void grabstackblock(int len) { len = ALIGN(len); stacknxt += len; stacknleft -= len; } /* * The following routines are somewhat easier to use that the above. * The user declares a variable of type STACKSTR, which may be declared * to be a register. The macro STARTSTACKSTR initializes things. Then * the user uses the macro STPUTC to add characters to the string. In * effect, STPUTC(c, p) is the same as *p++ = c except that the stack is * grown as necessary. When the user is done, she can just leave the * string there and refer to it using stackblock(). Or she can allocate * the space for it using grabstackstr(). If it is necessary to allow * someone else to use the stack temporarily and then continue to grow * the string, the user should use grabstack to allocate the space, and * then call ungrabstr(p) to return to the previous mode of operation. * * USTPUTC is like STPUTC except that it doesn't check for overflow. * CHECKSTACKSPACE can be called before USTPUTC to ensure that there * is space for at least one character. */ static char * growstrstackblock(int n) { growstackblock(); sstrnleft = stackblocksize() - n; return stackblock() + n; } char * growstackstr(void) { int len; len = stackblocksize(); if (herefd >= 0 && len >= 1024) { xwrite(herefd, stackblock(), len); sstrnleft = len; return stackblock(); } return growstrstackblock(len); } /* * Called from CHECKSTRSPACE. */ char * makestrspace(void) { int len; len = stackblocksize() - sstrnleft; return growstrstackblock(len); } void ungrabstackstr(char *s, char *p) { stacknleft += stacknxt - s; stacknxt = s; sstrnleft = stacknleft - (p - s); } + + +char * +stputbin(const char *data, int len, char *p) +{ + int i; + + for (i = 0; i < len; i++) + STPUTC(data[i], p); + return (p); +} + +char * +stputs(const char *data, char *p) +{ + return (stputbin(data, strlen(data), p)); +} Index: projects/binutils-2.17/bin/sh/memalloc.h =================================================================== --- projects/binutils-2.17/bin/sh/memalloc.h (revision 215829) +++ projects/binutils-2.17/bin/sh/memalloc.h (revision 215830) @@ -1,84 +1,88 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)memalloc.h 8.2 (Berkeley) 5/4/95 * $FreeBSD$ */ #include struct stackmark { struct stack_block *stackp; char *stacknxt; int stacknleft; struct stackmark *marknext; }; extern char *stacknxt; extern int stacknleft; extern int sstrnleft; extern int herefd; pointer ckmalloc(size_t); pointer ckrealloc(pointer, int); void ckfree(pointer); char *savestr(const char *); pointer stalloc(int); void stunalloc(pointer); void setstackmark(struct stackmark *); void popstackmark(struct stackmark *); void growstackblock(void); void grabstackblock(int); char *growstackstr(void); char *makestrspace(void); void ungrabstackstr(char *, char *); +char *stputbin(const char *data, int len, char *p); +char *stputs(const char *data, char *p); #define stackblock() stacknxt #define stackblocksize() stacknleft #define STARTSTACKSTR(p) p = stackblock(), sstrnleft = stackblocksize() #define STPUTC(c, p) (--sstrnleft >= 0? (*p++ = (c)) : (p = growstackstr(), --sstrnleft, *p++ = (c))) #define CHECKSTRSPACE(n, p) { if (sstrnleft < n) p = makestrspace(); } #define USTPUTC(c, p) (--sstrnleft, *p++ = (c)) /* * STACKSTRNUL's use is where we want to be able to turn a stack * (non-sentinel, character counting string) into a C string, * and later pretend the NUL is not there. * Note: Because of STACKSTRNUL's semantics, STACKSTRNUL cannot be used * on a stack that will grabstackstr()ed. */ #define STACKSTRNUL(p) (sstrnleft == 0? (p = growstackstr(), *p = '\0') : (*p = '\0')) #define STUNPUTC(p) (++sstrnleft, --p) #define STTOPC(p) p[-1] #define STADJUST(amount, p) (p += (amount), sstrnleft -= (amount)) #define grabstackstr(p) stalloc(stackblocksize() - sstrnleft) +#define STPUTBIN(s, len, p) p = stputbin((s), (len), p) +#define STPUTS(s, p) p = stputs((s), p) Index: projects/binutils-2.17/bin/sh/miscbltin.c =================================================================== --- projects/binutils-2.17/bin/sh/miscbltin.c (revision 215829) +++ projects/binutils-2.17/bin/sh/miscbltin.c (revision 215830) @@ -1,501 +1,502 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)miscbltin.c 8.4 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); /* * Miscellaneous builtins. */ #include #include #include #include #include #include #include #include #include #include #include #include "shell.h" #include "options.h" #include "var.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #undef eflag int readcmd(int, char **); int umaskcmd(int, char **); int ulimitcmd(int, char **); /* * The read builtin. The -r option causes backslashes to be treated like * ordinary characters. * * This uses unbuffered input, which may be avoidable in some cases. * * Note that if IFS=' :' then read x y should work so that: * 'a b' x='a', y='b' * ' a b ' x='a', y='b' * ':b' x='', y='b' * ':' x='', y='' * '::' x='', y='' * ': :' x='', y='' * ':::' x='', y='::' * ':b c:' x='', y='b c:' */ int readcmd(int argc __unused, char **argv __unused) { char **ap; int backslash; char c; int rflag; char *prompt; const char *ifs; char *p; int startword; int status; int i; int is_ifs; int saveall = 0; struct timeval tv; char *tvptr; fd_set ifds; rflag = 0; prompt = NULL; tv.tv_sec = -1; tv.tv_usec = 0; while ((i = nextopt("erp:t:")) != '\0') { switch(i) { case 'p': prompt = shoptarg; break; case 'e': break; case 'r': rflag = 1; break; case 't': tv.tv_sec = strtol(shoptarg, &tvptr, 0); if (tvptr == shoptarg) error("timeout value"); switch(*tvptr) { case 0: case 's': break; case 'h': tv.tv_sec *= 60; /* FALLTHROUGH */ case 'm': tv.tv_sec *= 60; break; default: error("timeout unit"); } break; } } if (prompt && isatty(0)) { out2str(prompt); flushall(); } if (*(ap = argptr) == NULL) error("arg count"); if ((ifs = bltinlookup("IFS", 1)) == NULL) ifs = " \t\n"; if (tv.tv_sec >= 0) { /* * Wait for something to become available. */ FD_ZERO(&ifds); FD_SET(0, &ifds); status = select(1, &ifds, NULL, NULL, &tv); /* * If there's nothing ready, return an error. */ if (status <= 0) return(1); } status = 0; startword = 2; backslash = 0; STARTSTACKSTR(p); for (;;) { if (read(STDIN_FILENO, &c, 1) != 1) { status = 1; break; } if (c == '\0') continue; + CHECKSTRSPACE(1, p); if (backslash) { backslash = 0; startword = 0; if (c != '\n') - STPUTC(c, p); + USTPUTC(c, p); continue; } if (!rflag && c == '\\') { backslash++; continue; } if (c == '\n') break; if (strchr(ifs, c)) is_ifs = strchr(" \t\n", c) ? 1 : 2; else is_ifs = 0; if (startword != 0) { if (is_ifs == 1) { /* Ignore leading IFS whitespace */ if (saveall) - STPUTC(c, p); + USTPUTC(c, p); continue; } if (is_ifs == 2 && startword == 1) { /* Only one non-whitespace IFS per word */ startword = 2; if (saveall) - STPUTC(c, p); + USTPUTC(c, p); continue; } } if (is_ifs == 0) { /* append this character to the current variable */ startword = 0; if (saveall) /* Not just a spare terminator */ saveall++; - STPUTC(c, p); + USTPUTC(c, p); continue; } /* end of variable... */ startword = is_ifs; if (ap[1] == NULL) { /* Last variable needs all IFS chars */ saveall++; - STPUTC(c, p); + USTPUTC(c, p); continue; } STACKSTRNUL(p); setvar(*ap, stackblock(), 0); ap++; STARTSTACKSTR(p); } STACKSTRNUL(p); /* Remove trailing IFS chars */ for (; stackblock() <= --p; *p = 0) { if (!strchr(ifs, *p)) break; if (strchr(" \t\n", *p)) /* Always remove whitespace */ continue; if (saveall > 1) /* Don't remove non-whitespace unless it was naked */ break; } setvar(*ap, stackblock(), 0); /* Set any remaining args to "" */ while (*++ap != NULL) setvar(*ap, nullstr, 0); return status; } int umaskcmd(int argc __unused, char **argv __unused) { char *ap; int mask; int i; int symbolic_mode = 0; while ((i = nextopt("S")) != '\0') { symbolic_mode = 1; } INTOFF; mask = umask(0); umask(mask); INTON; if ((ap = *argptr) == NULL) { if (symbolic_mode) { char u[4], g[4], o[4]; i = 0; if ((mask & S_IRUSR) == 0) u[i++] = 'r'; if ((mask & S_IWUSR) == 0) u[i++] = 'w'; if ((mask & S_IXUSR) == 0) u[i++] = 'x'; u[i] = '\0'; i = 0; if ((mask & S_IRGRP) == 0) g[i++] = 'r'; if ((mask & S_IWGRP) == 0) g[i++] = 'w'; if ((mask & S_IXGRP) == 0) g[i++] = 'x'; g[i] = '\0'; i = 0; if ((mask & S_IROTH) == 0) o[i++] = 'r'; if ((mask & S_IWOTH) == 0) o[i++] = 'w'; if ((mask & S_IXOTH) == 0) o[i++] = 'x'; o[i] = '\0'; out1fmt("u=%s,g=%s,o=%s\n", u, g, o); } else { out1fmt("%.4o\n", mask); } } else { if (isdigit(*ap)) { mask = 0; do { if (*ap >= '8' || *ap < '0') error("Illegal number: %s", *argptr); mask = (mask << 3) + (*ap - '0'); } while (*++ap != '\0'); umask(mask); } else { void *set; INTOFF; if ((set = setmode (ap)) == 0) error("Illegal number: %s", ap); mask = getmode (set, ~mask & 0777); umask(~mask & 0777); free(set); INTON; } } return 0; } /* * ulimit builtin * * This code, originally by Doug Gwyn, Doug Kingston, Eric Gisin, and * Michael Rendell was ripped from pdksh 5.0.8 and hacked for use with * ash by J.T. Conklin. * * Public domain. */ struct limits { const char *name; const char *units; int cmd; int factor; /* multiply by to get rlim_{cur,max} values */ char option; }; static const struct limits limits[] = { #ifdef RLIMIT_CPU { "cpu time", "seconds", RLIMIT_CPU, 1, 't' }, #endif #ifdef RLIMIT_FSIZE { "file size", "512-blocks", RLIMIT_FSIZE, 512, 'f' }, #endif #ifdef RLIMIT_DATA { "data seg size", "kbytes", RLIMIT_DATA, 1024, 'd' }, #endif #ifdef RLIMIT_STACK { "stack size", "kbytes", RLIMIT_STACK, 1024, 's' }, #endif #ifdef RLIMIT_CORE { "core file size", "512-blocks", RLIMIT_CORE, 512, 'c' }, #endif #ifdef RLIMIT_RSS { "max memory size", "kbytes", RLIMIT_RSS, 1024, 'm' }, #endif #ifdef RLIMIT_MEMLOCK { "locked memory", "kbytes", RLIMIT_MEMLOCK, 1024, 'l' }, #endif #ifdef RLIMIT_NPROC { "max user processes", (char *)0, RLIMIT_NPROC, 1, 'u' }, #endif #ifdef RLIMIT_NOFILE { "open files", (char *)0, RLIMIT_NOFILE, 1, 'n' }, #endif #ifdef RLIMIT_VMEM { "virtual mem size", "kbytes", RLIMIT_VMEM, 1024, 'v' }, #endif #ifdef RLIMIT_SWAP { "swap limit", "kbytes", RLIMIT_SWAP, 1024, 'w' }, #endif #ifdef RLIMIT_SBSIZE { "sbsize", "bytes", RLIMIT_SBSIZE, 1, 'b' }, #endif #ifdef RLIMIT_NPTS { "pseudo-terminals", (char *)0, RLIMIT_NPTS, 1, 'p' }, #endif { (char *) 0, (char *)0, 0, 0, '\0' } }; int ulimitcmd(int argc __unused, char **argv __unused) { int c; rlim_t val = 0; enum { SOFT = 0x1, HARD = 0x2 } how = SOFT | HARD; const struct limits *l; int set, all = 0; int optc, what; struct rlimit limit; what = 'f'; while ((optc = nextopt("HSatfdsmcnuvlbpw")) != '\0') switch (optc) { case 'H': how = HARD; break; case 'S': how = SOFT; break; case 'a': all = 1; break; default: what = optc; } for (l = limits; l->name && l->option != what; l++) ; if (!l->name) error("internal error (%c)", what); set = *argptr ? 1 : 0; if (set) { char *p = *argptr; if (all || argptr[1]) error("too many arguments"); if (strcmp(p, "unlimited") == 0) val = RLIM_INFINITY; else { val = 0; while ((c = *p++) >= '0' && c <= '9') { val = (val * 10) + (long)(c - '0'); if (val < 0) break; } if (c) error("bad number"); val *= l->factor; } } if (all) { for (l = limits; l->name; l++) { char optbuf[40]; if (getrlimit(l->cmd, &limit) < 0) error("can't get limit: %s", strerror(errno)); if (how & SOFT) val = limit.rlim_cur; else if (how & HARD) val = limit.rlim_max; if (l->units) snprintf(optbuf, sizeof(optbuf), "(%s, -%c) ", l->units, l->option); else snprintf(optbuf, sizeof(optbuf), "(-%c) ", l->option); out1fmt("%-18s %18s ", l->name, optbuf); if (val == RLIM_INFINITY) out1fmt("unlimited\n"); else { val /= l->factor; out1fmt("%jd\n", (intmax_t)val); } } return 0; } if (getrlimit(l->cmd, &limit) < 0) error("can't get limit: %s", strerror(errno)); if (set) { if (how & SOFT) limit.rlim_cur = val; if (how & HARD) limit.rlim_max = val; if (setrlimit(l->cmd, &limit) < 0) error("bad limit: %s", strerror(errno)); } else { if (how & SOFT) val = limit.rlim_cur; else if (how & HARD) val = limit.rlim_max; if (val == RLIM_INFINITY) out1fmt("unlimited\n"); else { val /= l->factor; out1fmt("%jd\n", (intmax_t)val); } } return 0; } Index: projects/binutils-2.17/bin/sh/parser.c =================================================================== --- projects/binutils-2.17/bin/sh/parser.c (revision 215829) +++ projects/binutils-2.17/bin/sh/parser.c (revision 215830) @@ -1,1839 +1,1838 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "shell.h" #include "parser.h" #include "nodes.h" #include "expand.h" /* defines rmescapes() */ #include "syntax.h" #include "options.h" #include "input.h" #include "output.h" #include "var.h" #include "error.h" #include "memalloc.h" #include "mystring.h" #include "alias.h" #include "show.h" #include "eval.h" #include "exec.h" /* to check for special builtins */ #ifndef NO_HISTORY #include "myhistedit.h" #endif /* * Shell command parser. */ #define EOFMARKLEN 79 #define PROMPTLEN 128 /* values of checkkwd variable */ #define CHKALIAS 0x1 #define CHKKWD 0x2 #define CHKNL 0x4 /* values returned by readtoken */ #include "token.h" struct heredoc { struct heredoc *next; /* next here document in list */ union node *here; /* redirection node */ char *eofmark; /* string indicating end of input */ int striptabs; /* if set, strip leading tabs */ }; struct parser_temp { struct parser_temp *next; void *data; }; static struct heredoc *heredoclist; /* list of here documents to read */ static int doprompt; /* if set, prompt the user */ static int needprompt; /* true if interactive and at start of line */ static int lasttoken; /* last token read */ MKINIT int tokpushback; /* last token pushed back */ static char *wordtext; /* text of last word returned by readtoken */ MKINIT int checkkwd; /* 1 == check for kwds, 2 == also eat newlines */ static struct nodelist *backquotelist; static union node *redirnode; static struct heredoc *heredoc; static int quoteflag; /* set if (part of) last token was quoted */ static int startlinno; /* line # where last token started */ static int funclinno; /* line # where the current function started */ static struct parser_temp *parser_temp; static union node *list(int, int); static union node *andor(void); static union node *pipeline(void); static union node *command(void); static union node *simplecmd(union node **, union node *); static union node *makename(void); static void parsefname(void); static void parseheredoc(void); static int peektoken(void); static int readtoken(void); static int xxreadtoken(void); static int readtoken1(int, char const *, char *, int); static int noexpand(char *); static void synexpect(int) __dead2; static void synerror(const char *) __dead2; static void setprompt(int); static void * parser_temp_alloc(size_t len) { struct parser_temp *t; INTOFF; t = ckmalloc(sizeof(*t)); t->data = NULL; t->next = parser_temp; parser_temp = t; t->data = ckmalloc(len); INTON; return t->data; } static void * parser_temp_realloc(void *ptr, size_t len) { struct parser_temp *t; INTOFF; t = parser_temp; if (ptr != t->data) error("bug: parser_temp_realloc misused"); t->data = ckrealloc(t->data, len); INTON; return t->data; } static void parser_temp_free_upto(void *ptr) { struct parser_temp *t; int done = 0; INTOFF; while (parser_temp != NULL && !done) { t = parser_temp; parser_temp = t->next; done = t->data == ptr; ckfree(t->data); ckfree(t); } INTON; if (!done) error("bug: parser_temp_free_upto misused"); } static void parser_temp_free_all(void) { struct parser_temp *t; INTOFF; while (parser_temp != NULL) { t = parser_temp; parser_temp = t->next; ckfree(t->data); ckfree(t); } INTON; } /* * Read and parse a command. Returns NEOF on end of file. (NULL is a * valid parse tree indicating a blank line.) */ union node * parsecmd(int interact) { int t; /* This assumes the parser is not re-entered, * which could happen if we add command substitution on PS1/PS2. */ parser_temp_free_all(); heredoclist = NULL; tokpushback = 0; doprompt = interact; if (doprompt) setprompt(1); else setprompt(0); needprompt = 0; t = readtoken(); if (t == TEOF) return NEOF; if (t == TNL) return NULL; tokpushback++; return list(1, 1); } static union node * list(int nlflag, int erflag) { union node *ntop, *n1, *n2, *n3; int tok; checkkwd = CHKNL | CHKKWD | CHKALIAS; if (!nlflag && !erflag && tokendlist[peektoken()]) return NULL; ntop = n1 = NULL; for (;;) { n2 = andor(); tok = readtoken(); if (tok == TBACKGND) { if (n2->type == NCMD || n2->type == NPIPE) { n2->ncmd.backgnd = 1; } else if (n2->type == NREDIR) { n2->type = NBACKGND; } else { n3 = (union node *)stalloc(sizeof (struct nredir)); n3->type = NBACKGND; n3->nredir.n = n2; n3->nredir.redirect = NULL; n2 = n3; } } if (ntop == NULL) ntop = n2; else if (n1 == NULL) { n1 = (union node *)stalloc(sizeof (struct nbinary)); n1->type = NSEMI; n1->nbinary.ch1 = ntop; n1->nbinary.ch2 = n2; ntop = n1; } else { n3 = (union node *)stalloc(sizeof (struct nbinary)); n3->type = NSEMI; n3->nbinary.ch1 = n1->nbinary.ch2; n3->nbinary.ch2 = n2; n1->nbinary.ch2 = n3; n1 = n3; } switch (tok) { case TBACKGND: case TSEMI: tok = readtoken(); /* FALLTHROUGH */ case TNL: if (tok == TNL) { parseheredoc(); if (nlflag) return ntop; } else if (tok == TEOF && nlflag) { parseheredoc(); return ntop; } else { tokpushback++; } checkkwd = CHKNL | CHKKWD | CHKALIAS; if (!nlflag && !erflag && tokendlist[peektoken()]) return ntop; break; case TEOF: if (heredoclist) parseheredoc(); else pungetc(); /* push back EOF on input */ return ntop; default: if (nlflag || erflag) synexpect(-1); tokpushback++; return ntop; } } } static union node * andor(void) { union node *n1, *n2, *n3; int t; n1 = pipeline(); for (;;) { if ((t = readtoken()) == TAND) { t = NAND; } else if (t == TOR) { t = NOR; } else { tokpushback++; return n1; } n2 = pipeline(); n3 = (union node *)stalloc(sizeof (struct nbinary)); n3->type = t; n3->nbinary.ch1 = n1; n3->nbinary.ch2 = n2; n1 = n3; } } static union node * pipeline(void) { union node *n1, *n2, *pipenode; struct nodelist *lp, *prev; int negate, t; negate = 0; checkkwd = CHKNL | CHKKWD | CHKALIAS; TRACE(("pipeline: entered\n")); while (readtoken() == TNOT) negate = !negate; tokpushback++; n1 = command(); if (readtoken() == TPIPE) { pipenode = (union node *)stalloc(sizeof (struct npipe)); pipenode->type = NPIPE; pipenode->npipe.backgnd = 0; lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); pipenode->npipe.cmdlist = lp; lp->n = n1; do { prev = lp; lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); checkkwd = CHKNL | CHKKWD | CHKALIAS; t = readtoken(); tokpushback++; if (t == TNOT) lp->n = pipeline(); else lp->n = command(); prev->next = lp; } while (readtoken() == TPIPE); lp->next = NULL; n1 = pipenode; } tokpushback++; if (negate) { n2 = (union node *)stalloc(sizeof (struct nnot)); n2->type = NNOT; n2->nnot.com = n1; return n2; } else return n1; } static union node * command(void) { union node *n1, *n2; union node *ap, **app; union node *cp, **cpp; union node *redir, **rpp; int t; checkkwd = CHKNL | CHKKWD | CHKALIAS; redir = NULL; n1 = NULL; rpp = &redir; /* Check for redirection which may precede command */ while (readtoken() == TREDIR) { *rpp = n2 = redirnode; rpp = &n2->nfile.next; parsefname(); } tokpushback++; switch (readtoken()) { case TIF: n1 = (union node *)stalloc(sizeof (struct nif)); n1->type = NIF; if ((n1->nif.test = list(0, 0)) == NULL) synexpect(-1); if (readtoken() != TTHEN) synexpect(TTHEN); n1->nif.ifpart = list(0, 0); n2 = n1; while (readtoken() == TELIF) { n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif)); n2 = n2->nif.elsepart; n2->type = NIF; if ((n2->nif.test = list(0, 0)) == NULL) synexpect(-1); if (readtoken() != TTHEN) synexpect(TTHEN); n2->nif.ifpart = list(0, 0); } if (lasttoken == TELSE) n2->nif.elsepart = list(0, 0); else { n2->nif.elsepart = NULL; tokpushback++; } if (readtoken() != TFI) synexpect(TFI); checkkwd = CHKKWD | CHKALIAS; break; case TWHILE: case TUNTIL: { int got; n1 = (union node *)stalloc(sizeof (struct nbinary)); n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL; if ((n1->nbinary.ch1 = list(0, 0)) == NULL) synexpect(-1); if ((got=readtoken()) != TDO) { TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : "")); synexpect(TDO); } n1->nbinary.ch2 = list(0, 0); if (readtoken() != TDONE) synexpect(TDONE); checkkwd = CHKKWD | CHKALIAS; break; } case TFOR: if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) synerror("Bad for loop variable"); n1 = (union node *)stalloc(sizeof (struct nfor)); n1->type = NFOR; n1->nfor.var = wordtext; while (readtoken() == TNL) ; if (lasttoken == TWORD && ! quoteflag && equal(wordtext, "in")) { app = ≈ while (readtoken() == TWORD) { n2 = (union node *)stalloc(sizeof (struct narg)); n2->type = NARG; n2->narg.text = wordtext; n2->narg.backquote = backquotelist; *app = n2; app = &n2->narg.next; } *app = NULL; n1->nfor.args = ap; if (lasttoken != TNL && lasttoken != TSEMI) synexpect(-1); } else { static char argvars[5] = { CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' }; n2 = (union node *)stalloc(sizeof (struct narg)); n2->type = NARG; n2->narg.text = argvars; n2->narg.backquote = NULL; n2->narg.next = NULL; n1->nfor.args = n2; /* * Newline or semicolon here is optional (but note * that the original Bourne shell only allowed NL). */ if (lasttoken != TNL && lasttoken != TSEMI) tokpushback++; } checkkwd = CHKNL | CHKKWD | CHKALIAS; if ((t = readtoken()) == TDO) t = TDONE; else if (t == TBEGIN) t = TEND; else synexpect(-1); n1->nfor.body = list(0, 0); if (readtoken() != t) synexpect(t); checkkwd = CHKKWD | CHKALIAS; break; case TCASE: n1 = (union node *)stalloc(sizeof (struct ncase)); n1->type = NCASE; if (readtoken() != TWORD) synexpect(TWORD); n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg)); n2->type = NARG; n2->narg.text = wordtext; n2->narg.backquote = backquotelist; n2->narg.next = NULL; while (readtoken() == TNL); if (lasttoken != TWORD || ! equal(wordtext, "in")) synerror("expecting \"in\""); cpp = &n1->ncase.cases; checkkwd = CHKNL | CHKKWD, readtoken(); while (lasttoken != TESAC) { *cpp = cp = (union node *)stalloc(sizeof (struct nclist)); cp->type = NCLIST; app = &cp->nclist.pattern; if (lasttoken == TLP) readtoken(); for (;;) { *app = ap = (union node *)stalloc(sizeof (struct narg)); ap->type = NARG; ap->narg.text = wordtext; ap->narg.backquote = backquotelist; checkkwd = CHKNL | CHKKWD; if (readtoken() != TPIPE) break; app = &ap->narg.next; readtoken(); } ap->narg.next = NULL; if (lasttoken != TRP) synexpect(TRP); cp->nclist.body = list(0, 0); checkkwd = CHKNL | CHKKWD | CHKALIAS; if ((t = readtoken()) != TESAC) { if (t != TENDCASE) synexpect(TENDCASE); else checkkwd = CHKNL | CHKKWD, readtoken(); } cpp = &cp->nclist.next; } *cpp = NULL; checkkwd = CHKKWD | CHKALIAS; break; case TLP: n1 = (union node *)stalloc(sizeof (struct nredir)); n1->type = NSUBSHELL; n1->nredir.n = list(0, 0); n1->nredir.redirect = NULL; if (readtoken() != TRP) synexpect(TRP); checkkwd = CHKKWD | CHKALIAS; break; case TBEGIN: n1 = list(0, 0); if (readtoken() != TEND) synexpect(TEND); checkkwd = CHKKWD | CHKALIAS; break; /* Handle an empty command like other simple commands. */ case TBACKGND: case TSEMI: case TAND: case TOR: /* * An empty command before a ; doesn't make much sense, and * should certainly be disallowed in the case of `if ;'. */ if (!redir) synexpect(-1); case TNL: case TEOF: case TWORD: case TRP: tokpushback++; n1 = simplecmd(rpp, redir); return n1; default: synexpect(-1); } /* Now check for redirection which may follow command */ while (readtoken() == TREDIR) { *rpp = n2 = redirnode; rpp = &n2->nfile.next; parsefname(); } tokpushback++; *rpp = NULL; if (redir) { if (n1->type != NSUBSHELL) { n2 = (union node *)stalloc(sizeof (struct nredir)); n2->type = NREDIR; n2->nredir.n = n1; n1 = n2; } n1->nredir.redirect = redir; } return n1; } static union node * simplecmd(union node **rpp, union node *redir) { union node *args, **app; union node **orig_rpp = rpp; union node *n = NULL; int special; /* If we don't have any redirections already, then we must reset */ /* rpp to be the address of the local redir variable. */ if (redir == 0) rpp = &redir; args = NULL; app = &args; /* * We save the incoming value, because we need this for shell * functions. There can not be a redirect or an argument between * the function name and the open parenthesis. */ orig_rpp = rpp; for (;;) { if (readtoken() == TWORD) { n = (union node *)stalloc(sizeof (struct narg)); n->type = NARG; n->narg.text = wordtext; n->narg.backquote = backquotelist; *app = n; app = &n->narg.next; } else if (lasttoken == TREDIR) { *rpp = n = redirnode; rpp = &n->nfile.next; parsefname(); /* read name of redirection file */ } else if (lasttoken == TLP && app == &args->narg.next && rpp == orig_rpp) { /* We have a function */ if (readtoken() != TRP) synexpect(TRP); funclinno = plinno; /* * - Require plain text. * - Functions with '/' cannot be called. * - Reject name=(). * - Reject ksh extended glob patterns. */ if (!noexpand(n->narg.text) || quoteflag || strchr(n->narg.text, '/') || strchr("!%*+-=?@}~", n->narg.text[strlen(n->narg.text) - 1])) synerror("Bad function name"); rmescapes(n->narg.text); if (find_builtin(n->narg.text, &special) >= 0 && special) synerror("Cannot override a special builtin with a function"); n->type = NDEFUN; n->narg.next = command(); funclinno = 0; return n; } else { tokpushback++; break; } } *app = NULL; *rpp = NULL; n = (union node *)stalloc(sizeof (struct ncmd)); n->type = NCMD; n->ncmd.backgnd = 0; n->ncmd.args = args; n->ncmd.redirect = redir; return n; } static union node * makename(void) { union node *n; n = (union node *)stalloc(sizeof (struct narg)); n->type = NARG; n->narg.next = NULL; n->narg.text = wordtext; n->narg.backquote = backquotelist; return n; } void fixredir(union node *n, const char *text, int err) { TRACE(("Fix redir %s %d\n", text, err)); if (!err) n->ndup.vname = NULL; if (is_digit(text[0]) && text[1] == '\0') n->ndup.dupfd = digit_val(text[0]); else if (text[0] == '-' && text[1] == '\0') n->ndup.dupfd = -1; else { if (err) synerror("Bad fd number"); else n->ndup.vname = makename(); } } static void parsefname(void) { union node *n = redirnode; if (readtoken() != TWORD) synexpect(-1); if (n->type == NHERE) { struct heredoc *here = heredoc; struct heredoc *p; int i; if (quoteflag == 0) n->type = NXHERE; TRACE(("Here document %d\n", n->type)); if (here->striptabs) { while (*wordtext == '\t') wordtext++; } if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN) synerror("Illegal eof marker for << redirection"); rmescapes(wordtext); here->eofmark = wordtext; here->next = NULL; if (heredoclist == NULL) heredoclist = here; else { for (p = heredoclist ; p->next ; p = p->next); p->next = here; } } else if (n->type == NTOFD || n->type == NFROMFD) { fixredir(n, wordtext, 0); } else { n->nfile.fname = makename(); } } /* * Input any here documents. */ static void parseheredoc(void) { struct heredoc *here; union node *n; while (heredoclist) { here = heredoclist; heredoclist = here->next; if (needprompt) { setprompt(2); needprompt = 0; } readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, here->eofmark, here->striptabs); n = (union node *)stalloc(sizeof (struct narg)); n->narg.type = NARG; n->narg.next = NULL; n->narg.text = wordtext; n->narg.backquote = backquotelist; here->here->nhere.doc = n; } } static int peektoken(void) { int t; t = readtoken(); tokpushback++; return (t); } static int readtoken(void) { int t; struct alias *ap; #ifdef DEBUG int alreadyseen = tokpushback; #endif top: t = xxreadtoken(); /* * eat newlines */ if (checkkwd & CHKNL) { while (t == TNL) { parseheredoc(); t = xxreadtoken(); } } /* * check for keywords and aliases */ if (t == TWORD && !quoteflag) { const char * const *pp; if (checkkwd & CHKKWD) for (pp = parsekwd; *pp; pp++) { if (**pp == *wordtext && equal(*pp, wordtext)) { lasttoken = t = pp - parsekwd + KWDOFFSET; TRACE(("keyword %s recognized\n", tokname[t])); goto out; } } if (checkkwd & CHKALIAS && (ap = lookupalias(wordtext, 1)) != NULL) { pushstring(ap->val, strlen(ap->val), ap); goto top; } } out: if (t != TNOT) checkkwd = 0; #ifdef DEBUG if (!alreadyseen) TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); else TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); #endif return (t); } /* * Read the next input token. * If the token is a word, we set backquotelist to the list of cmds in * backquotes. We set quoteflag to true if any part of the word was * quoted. * If the token is TREDIR, then we set redirnode to a structure containing * the redirection. * In all cases, the variable startlinno is set to the number of the line * on which the token starts. * * [Change comment: here documents and internal procedures] * [Readtoken shouldn't have any arguments. Perhaps we should make the * word parsing code into a separate routine. In this case, readtoken * doesn't need to have any internal procedures, but parseword does. * We could also make parseoperator in essence the main routine, and * have parseword (readtoken1?) handle both words and redirection.] */ #define RETURN(token) return lasttoken = token static int xxreadtoken(void) { int c; if (tokpushback) { tokpushback = 0; return lasttoken; } if (needprompt) { setprompt(2); needprompt = 0; } startlinno = plinno; for (;;) { /* until token or start of word found */ c = pgetc_macro(); if (c == ' ' || c == '\t') continue; /* quick check for white space first */ switch (c) { case ' ': case '\t': continue; case '#': while ((c = pgetc()) != '\n' && c != PEOF); pungetc(); continue; case '\\': if (pgetc() == '\n') { startlinno = ++plinno; if (doprompt) setprompt(2); else setprompt(0); continue; } pungetc(); goto breakloop; case '\n': plinno++; needprompt = doprompt; RETURN(TNL); case PEOF: RETURN(TEOF); case '&': if (pgetc() == '&') RETURN(TAND); pungetc(); RETURN(TBACKGND); case '|': if (pgetc() == '|') RETURN(TOR); pungetc(); RETURN(TPIPE); case ';': if (pgetc() == ';') RETURN(TENDCASE); pungetc(); RETURN(TSEMI); case '(': RETURN(TLP); case ')': RETURN(TRP); default: goto breakloop; } } breakloop: return readtoken1(c, BASESYNTAX, (char *)NULL, 0); #undef RETURN } #define MAXNEST_static 8 struct tokenstate { const char *syntax; /* *SYNTAX */ int parenlevel; /* levels of parentheses in arithmetic */ enum tokenstate_category { TSTATE_TOP, TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */ TSTATE_VAR_NEW, /* other ${var...}, own dquote state */ TSTATE_ARITH } category; }; /* * Called to parse command substitutions. */ static char * parsebackq(char *out, struct nodelist **pbqlist, int oldstyle, int dblquote, int quoted) { struct nodelist **nlpp; union node *n; char *volatile str; struct jmploc jmploc; struct jmploc *const savehandler = handler; int savelen; int saveprompt; const int bq_startlinno = plinno; char *volatile ostr = NULL; struct parsefile *const savetopfile = getcurrentfile(); struct heredoc *const saveheredoclist = heredoclist; struct heredoc *here; str = NULL; if (setjmp(jmploc.loc)) { popfilesupto(savetopfile); if (str) ckfree(str); if (ostr) ckfree(ostr); heredoclist = saveheredoclist; handler = savehandler; if (exception == EXERROR) { startlinno = bq_startlinno; synerror("Error in command substitution"); } longjmp(handler->loc, 1); } INTOFF; savelen = out - stackblock(); if (savelen > 0) { str = ckmalloc(savelen); memcpy(str, stackblock(), savelen); } handler = &jmploc; heredoclist = NULL; INTON; if (oldstyle) { /* We must read until the closing backquote, giving special treatment to some slashes, and then push the string and reread it as input, interpreting it normally. */ char *oout; int c; int olen; STARTSTACKSTR(oout); for (;;) { if (needprompt) { setprompt(2); needprompt = 0; } + CHECKSTRSPACE(2, oout); switch (c = pgetc()) { case '`': goto done; case '\\': if ((c = pgetc()) == '\n') { plinno++; if (doprompt) setprompt(2); else setprompt(0); /* * If eating a newline, avoid putting * the newline into the new character - * stream (via the STPUTC after the + * stream (via the USTPUTC after the * switch). */ continue; } if (c != '\\' && c != '`' && c != '$' && (!dblquote || c != '"')) - STPUTC('\\', oout); + USTPUTC('\\', oout); break; case '\n': plinno++; needprompt = doprompt; break; case PEOF: startlinno = plinno; synerror("EOF in backquote substitution"); break; default: break; } - STPUTC(c, oout); + USTPUTC(c, oout); } done: - STPUTC('\0', oout); + USTPUTC('\0', oout); olen = oout - stackblock(); INTOFF; ostr = ckmalloc(olen); memcpy(ostr, stackblock(), olen); setinputstring(ostr, 1); INTON; } nlpp = pbqlist; while (*nlpp) nlpp = &(*nlpp)->next; *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); (*nlpp)->next = NULL; if (oldstyle) { saveprompt = doprompt; doprompt = 0; } n = list(0, oldstyle); if (oldstyle) doprompt = saveprompt; else { if (readtoken() != TRP) synexpect(TRP); } (*nlpp)->n = n; if (oldstyle) { /* * Start reading from old file again, ignoring any pushed back * tokens left from the backquote parsing */ popfile(); tokpushback = 0; } while (stackblocksize() <= savelen) growstackblock(); STARTSTACKSTR(out); INTOFF; if (str) { memcpy(out, str, savelen); STADJUST(savelen, out); ckfree(str); str = NULL; } if (ostr) { ckfree(ostr); ostr = NULL; } here = saveheredoclist; if (here != NULL) { while (here->next != NULL) here = here->next; here->next = heredoclist; heredoclist = saveheredoclist; } handler = savehandler; INTON; if (quoted) USTPUTC(CTLBACKQ | CTLQUOTE, out); else USTPUTC(CTLBACKQ, out); return out; } /* * If eofmark is NULL, read a word or a redirection symbol. If eofmark * is not NULL, read a here document. In the latter case, eofmark is the * word which marks the end of the document and striptabs is true if * leading tabs should be stripped from the document. The argument firstc * is the first character of the input token or document. * * Because C does not have internal subroutines, I have simulated them * using goto's to implement the subroutine linkage. The following macros * will run code that appears at the end of readtoken1. */ #define CHECKEND() {goto checkend; checkend_return:;} #define PARSEREDIR() {goto parseredir; parseredir_return:;} #define PARSESUB() {goto parsesub; parsesub_return:;} #define PARSEARITH() {goto parsearith; parsearith_return:;} static int readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs) { int c = firstc; char *out; int len; char line[EOFMARKLEN + 1]; struct nodelist *bqlist; int quotef; int newvarnest; int level; int synentry; struct tokenstate state_static[MAXNEST_static]; int maxnest = MAXNEST_static; struct tokenstate *state = state_static; startlinno = plinno; quotef = 0; bqlist = NULL; newvarnest = 0; level = 0; state[level].syntax = initialsyntax; state[level].parenlevel = 0; state[level].category = TSTATE_TOP; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ CHECKEND(); /* set c to PEOF if at end of here document */ for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ synentry = state[level].syntax[c]; switch(synentry) { case CNL: /* '\n' */ if (state[level].syntax == BASESYNTAX) goto endword; /* exit outer loop */ USTPUTC(c, out); plinno++; if (doprompt) setprompt(2); else setprompt(0); c = pgetc(); goto loop; /* continue outer loop */ case CWORD: USTPUTC(c, out); break; case CCTL: if (eofmark == NULL || initialsyntax != SQSYNTAX) USTPUTC(CTLESC, out); USTPUTC(c, out); break; case CBACK: /* backslash */ c = pgetc(); if (c == PEOF) { USTPUTC('\\', out); pungetc(); } else if (c == '\n') { plinno++; if (doprompt) setprompt(2); else setprompt(0); } else { if (state[level].syntax == DQSYNTAX && c != '\\' && c != '`' && c != '$' && (c != '"' || (eofmark != NULL && newvarnest == 0)) && (c != '}' || state[level].category != TSTATE_VAR_OLD)) USTPUTC('\\', out); if ((eofmark == NULL || newvarnest > 0) && state[level].syntax == BASESYNTAX) USTPUTC(CTLQUOTEMARK, out); if (SQSYNTAX[c] == CCTL) USTPUTC(CTLESC, out); USTPUTC(c, out); if ((eofmark == NULL || newvarnest > 0) && state[level].syntax == BASESYNTAX && state[level].category == TSTATE_VAR_OLD) USTPUTC(CTLQUOTEEND, out); quotef++; } break; case CSQUOTE: USTPUTC(CTLQUOTEMARK, out); state[level].syntax = SQSYNTAX; break; case CDQUOTE: USTPUTC(CTLQUOTEMARK, out); state[level].syntax = DQSYNTAX; break; case CENDQUOTE: if (eofmark != NULL && newvarnest == 0) USTPUTC(c, out); else { if (state[level].category == TSTATE_VAR_OLD) USTPUTC(CTLQUOTEEND, out); state[level].syntax = BASESYNTAX; quotef++; } break; case CVAR: /* '$' */ PARSESUB(); /* parse substitution */ break; case CENDVAR: /* '}' */ if (level > 0 && ((state[level].category == TSTATE_VAR_OLD && state[level].syntax == state[level - 1].syntax) || (state[level].category == TSTATE_VAR_NEW && state[level].syntax == BASESYNTAX))) { if (state[level].category == TSTATE_VAR_NEW) newvarnest--; level--; USTPUTC(CTLENDVAR, out); } else { USTPUTC(c, out); } break; case CLP: /* '(' in arithmetic */ state[level].parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ if (state[level].parenlevel > 0) { USTPUTC(c, out); --state[level].parenlevel; } else { if (pgetc() == ')') { if (level > 0 && state[level].category == TSTATE_ARITH) { level--; USTPUTC(CTLENDARI, out); } else USTPUTC(')', out); } else { /* * unbalanced parens * (don't 2nd guess - no error) */ pungetc(); USTPUTC(')', out); } } break; case CBQUOTE: /* '`' */ out = parsebackq(out, &bqlist, 1, state[level].syntax == DQSYNTAX && (eofmark == NULL || newvarnest > 0), state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); break; case CEOF: goto endword; /* exit outer loop */ case CIGN: break; default: if (level == 0) goto endword; /* exit outer loop */ USTPUTC(c, out); } c = pgetc_macro(); } } endword: if (state[level].syntax == ARISYNTAX) synerror("Missing '))'"); if (state[level].syntax != BASESYNTAX && eofmark == NULL) synerror("Unterminated quoted string"); if (state[level].category == TSTATE_VAR_OLD || state[level].category == TSTATE_VAR_NEW) { startlinno = plinno; synerror("Missing '}'"); } if (state != state_static) parser_temp_free_upto(state); USTPUTC('\0', out); len = out - stackblock(); out = stackblock(); if (eofmark == NULL) { if ((c == '>' || c == '<') && quotef == 0 && len <= 2 && (*out == '\0' || is_digit(*out))) { PARSEREDIR(); return lasttoken = TREDIR; } else { pungetc(); } } quoteflag = quotef; backquotelist = bqlist; grabstackblock(len); wordtext = out; return lasttoken = TWORD; /* end of readtoken routine */ /* * Check to see whether we are at the end of the here document. When this * is called, c is set to the first character of the next input line. If * we are at the end of the here document, this routine sets the c to PEOF. */ checkend: { if (eofmark) { if (striptabs) { while (c == '\t') c = pgetc(); } if (c == *eofmark) { if (pfgets(line, sizeof line) != NULL) { char *p, *q; p = line; for (q = eofmark + 1 ; *q && *p == *q ; p++, q++); if (*p == '\n' && *q == '\0') { c = PEOF; plinno++; needprompt = doprompt; } else { pushstring(line, strlen(line), NULL); } } } } goto checkend_return; } /* * Parse a redirection operator. The variable "out" points to a string * specifying the fd to be redirected. The variable "c" contains the * first character of the redirection operator. */ parseredir: { char fd = *out; union node *np; np = (union node *)stalloc(sizeof (struct nfile)); if (c == '>') { np->nfile.fd = 1; c = pgetc(); if (c == '>') np->type = NAPPEND; else if (c == '&') np->type = NTOFD; else if (c == '|') np->type = NCLOBBER; else { np->type = NTO; pungetc(); } } else { /* c == '<' */ np->nfile.fd = 0; c = pgetc(); if (c == '<') { if (sizeof (struct nfile) != sizeof (struct nhere)) { np = (union node *)stalloc(sizeof (struct nhere)); np->nfile.fd = 0; } np->type = NHERE; heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); heredoc->here = np; if ((c = pgetc()) == '-') { heredoc->striptabs = 1; } else { heredoc->striptabs = 0; pungetc(); } } else if (c == '&') np->type = NFROMFD; else if (c == '>') np->type = NFROMTO; else { np->type = NFROM; pungetc(); } } if (fd != '\0') np->nfile.fd = digit_val(fd); redirnode = np; goto parseredir_return; } /* * Parse a substitution. At this point, we have read the dollar sign * and nothing else. */ parsesub: { char buf[10]; int subtype; int typeloc; int flags; char *p; static const char types[] = "}-+?="; int bracketed_name = 0; /* used to handle ${[0-9]*} variables */ - int i; int linno; int length; c = pgetc(); if (c != '(' && c != '{' && (is_eof(c) || !is_name(c)) && !is_special(c)) { USTPUTC('$', out); pungetc(); } else if (c == '(') { /* $(command) or $((arith)) */ if (pgetc() == '(') { PARSEARITH(); } else { pungetc(); out = parsebackq(out, &bqlist, 0, state[level].syntax == DQSYNTAX && (eofmark == NULL || newvarnest > 0), state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); } } else { USTPUTC(CTLVAR, out); typeloc = out - stackblock(); USTPUTC(VSNORMAL, out); subtype = VSNORMAL; flags = 0; if (c == '{') { bracketed_name = 1; c = pgetc(); if (c == '#') { if ((c = pgetc()) == '}') c = '#'; else subtype = VSLENGTH; } else subtype = 0; } if (!is_eof(c) && is_name(c)) { length = 0; do { STPUTC(c, out); c = pgetc(); length++; } while (!is_eof(c) && is_in_name(c)); if (length == 6 && strncmp(out - length, "LINENO", length) == 0) { /* Replace the variable name with the * current line number. */ linno = plinno; if (funclinno != 0) linno -= funclinno - 1; snprintf(buf, sizeof(buf), "%d", linno); STADJUST(-6, out); - for (i = 0; buf[i] != '\0'; i++) - STPUTC(buf[i], out); + STPUTS(buf, out); flags |= VSLINENO; } } else if (is_digit(c)) { if (bracketed_name) { do { STPUTC(c, out); c = pgetc(); } while (is_digit(c)); } else { STPUTC(c, out); c = pgetc(); } } else { if (! is_special(c)) { subtype = VSERROR; if (c == '}') pungetc(); else if (c == '\n' || c == PEOF) synerror("Unexpected end of line in substitution"); else USTPUTC(c, out); } else { USTPUTC(c, out); c = pgetc(); } } if (subtype == 0) { switch (c) { case ':': flags |= VSNUL; c = pgetc(); /*FALLTHROUGH*/ default: p = strchr(types, c); if (p == NULL) { if (c == '\n' || c == PEOF) synerror("Unexpected end of line in substitution"); if (flags == VSNUL) STPUTC(':', out); STPUTC(c, out); subtype = VSERROR; } else subtype = p - types + VSNORMAL; break; case '%': case '#': { int cc = c; subtype = c == '#' ? VSTRIMLEFT : VSTRIMRIGHT; c = pgetc(); if (c == cc) subtype++; else pungetc(); break; } } } else if (subtype != VSERROR) { pungetc(); } STPUTC('=', out); if (subtype != VSLENGTH && (state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX)) flags |= VSQUOTE; *(stackblock() + typeloc) = subtype | flags; if (subtype != VSNORMAL) { if (level + 1 >= maxnest) { maxnest *= 2; if (state == state_static) { state = parser_temp_alloc( maxnest * sizeof(*state)); memcpy(state, state_static, MAXNEST_static * sizeof(*state)); } else state = parser_temp_realloc(state, maxnest * sizeof(*state)); } level++; state[level].parenlevel = 0; if (subtype == VSMINUS || subtype == VSPLUS || subtype == VSQUESTION || subtype == VSASSIGN) { /* * For operators that were in the Bourne shell, * inherit the double-quote state. */ state[level].syntax = state[level - 1].syntax; state[level].category = TSTATE_VAR_OLD; } else { /* * The other operators take a pattern, * so go to BASESYNTAX. * Also, ' and " are now special, even * in here documents. */ state[level].syntax = BASESYNTAX; state[level].category = TSTATE_VAR_NEW; newvarnest++; } } } goto parsesub_return; } /* * Parse an arithmetic expansion (indicate start of one and set state) */ parsearith: { if (level + 1 >= maxnest) { maxnest *= 2; if (state == state_static) { state = parser_temp_alloc( maxnest * sizeof(*state)); memcpy(state, state_static, MAXNEST_static * sizeof(*state)); } else state = parser_temp_realloc(state, maxnest * sizeof(*state)); } level++; state[level].syntax = ARISYNTAX; state[level].parenlevel = 0; state[level].category = TSTATE_ARITH; USTPUTC(CTLARI, out); if (state[level - 1].syntax == DQSYNTAX) USTPUTC('"',out); else USTPUTC(' ',out); goto parsearith_return; } } /* end of readtoken */ #ifdef mkinit RESET { tokpushback = 0; checkkwd = 0; } #endif /* * Returns true if the text contains nothing to expand (no dollar signs * or backquotes). */ static int noexpand(char *text) { char *p; char c; p = text; while ((c = *p++) != '\0') { if ( c == CTLQUOTEMARK) continue; if (c == CTLESC) p++; else if (BASESYNTAX[(int)c] == CCTL) return 0; } return 1; } /* * Return true if the argument is a legal variable name (a letter or * underscore followed by zero or more letters, underscores, and digits). */ int goodname(const char *name) { const char *p; p = name; if (! is_name(*p)) return 0; while (*++p) { if (! is_in_name(*p)) return 0; } return 1; } /* * Called when an unexpected token is read during the parse. The argument * is the token that is expected, or -1 if more than one type of token can * occur at this point. */ static void synexpect(int token) { char msg[64]; if (token >= 0) { fmtstr(msg, 64, "%s unexpected (expecting %s)", tokname[lasttoken], tokname[token]); } else { fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]); } synerror(msg); } static void synerror(const char *msg) { if (commandname) outfmt(out2, "%s: %d: ", commandname, startlinno); outfmt(out2, "Syntax error: %s\n", msg); error((char *)NULL); } static void setprompt(int which) { whichprompt = which; #ifndef NO_HISTORY if (!el) #endif { out2str(getprompt(NULL)); flushout(out2); } } /* * called by editline -- any expansions to the prompt * should be added here. */ char * getprompt(void *unused __unused) { static char ps[PROMPTLEN]; char *fmt; const char *pwd; int i, trim; static char internal_error[] = "??"; /* * Select prompt format. */ switch (whichprompt) { case 0: fmt = nullstr; break; case 1: fmt = ps1val(); break; case 2: fmt = ps2val(); break; default: return internal_error; } /* * Format prompt string. */ for (i = 0; (i < 127) && (*fmt != '\0'); i++, fmt++) if (*fmt == '\\') switch (*++fmt) { /* * Hostname. * * \h specifies just the local hostname, * \H specifies fully-qualified hostname. */ case 'h': case 'H': ps[i] = '\0'; gethostname(&ps[i], PROMPTLEN - i); /* Skip to end of hostname. */ trim = (*fmt == 'h') ? '.' : '\0'; while ((ps[i+1] != '\0') && (ps[i+1] != trim)) i++; break; /* * Working directory. * * \W specifies just the final component, * \w specifies the entire path. */ case 'W': case 'w': pwd = lookupvar("PWD"); if (pwd == NULL) pwd = "?"; if (*fmt == 'W' && *pwd == '/' && pwd[1] != '\0') strlcpy(&ps[i], strrchr(pwd, '/') + 1, PROMPTLEN - i); else strlcpy(&ps[i], pwd, PROMPTLEN - i); /* Skip to end of path. */ while (ps[i + 1] != '\0') i++; break; /* * Superuser status. * * '$' for normal users, '#' for root. */ case '$': ps[i] = (geteuid() != 0) ? '$' : '#'; break; /* * A literal \. */ case '\\': ps[i] = '\\'; break; /* * Emit unrecognized formats verbatim. */ default: ps[i++] = '\\'; ps[i] = *fmt; break; } else ps[i] = *fmt; ps[i] = '\0'; return (ps); } Index: projects/binutils-2.17/cddl/contrib/opensolaris =================================================================== --- projects/binutils-2.17/cddl/contrib/opensolaris (revision 215829) +++ projects/binutils-2.17/cddl/contrib/opensolaris (revision 215830) Property changes on: projects/binutils-2.17/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl/contrib/opensolaris:r215709-215824 Index: projects/binutils-2.17/contrib/bind9 =================================================================== --- projects/binutils-2.17/contrib/bind9 (revision 215829) +++ projects/binutils-2.17/contrib/bind9 (revision 215830) Property changes on: projects/binutils-2.17/contrib/bind9 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/bind9:r215709-215824 Index: projects/binutils-2.17/contrib/binutils =================================================================== --- projects/binutils-2.17/contrib/binutils (revision 215829) +++ projects/binutils-2.17/contrib/binutils (revision 215830) Property changes on: projects/binutils-2.17/contrib/binutils ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/binutils:r215709-215824 Index: projects/binutils-2.17/contrib/bzip2 =================================================================== --- projects/binutils-2.17/contrib/bzip2 (revision 215829) +++ projects/binutils-2.17/contrib/bzip2 (revision 215830) Property changes on: projects/binutils-2.17/contrib/bzip2 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/bzip2:r215709-215824 Index: projects/binutils-2.17/contrib/ee =================================================================== --- projects/binutils-2.17/contrib/ee (revision 215829) +++ projects/binutils-2.17/contrib/ee (revision 215830) Property changes on: projects/binutils-2.17/contrib/ee ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/ee:r215709-215824 Index: projects/binutils-2.17/contrib/expat =================================================================== --- projects/binutils-2.17/contrib/expat (revision 215829) +++ projects/binutils-2.17/contrib/expat (revision 215830) Property changes on: projects/binutils-2.17/contrib/expat ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/expat:r215709-215824 Index: projects/binutils-2.17/contrib/file =================================================================== --- projects/binutils-2.17/contrib/file (revision 215829) +++ projects/binutils-2.17/contrib/file (revision 215830) Property changes on: projects/binutils-2.17/contrib/file ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/file:r215709-215824 Index: projects/binutils-2.17/contrib/gdb =================================================================== --- projects/binutils-2.17/contrib/gdb (revision 215829) +++ projects/binutils-2.17/contrib/gdb (revision 215830) Property changes on: projects/binutils-2.17/contrib/gdb ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/gdb:r215709-215824 Index: projects/binutils-2.17/contrib/gdtoa =================================================================== --- projects/binutils-2.17/contrib/gdtoa (revision 215829) +++ projects/binutils-2.17/contrib/gdtoa (revision 215830) Property changes on: projects/binutils-2.17/contrib/gdtoa ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/gdtoa:r215709-215824 Index: projects/binutils-2.17/contrib/gnu-sort =================================================================== --- projects/binutils-2.17/contrib/gnu-sort (revision 215829) +++ projects/binutils-2.17/contrib/gnu-sort (revision 215830) Property changes on: projects/binutils-2.17/contrib/gnu-sort ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/gnu-sort:r215709-215824 Index: projects/binutils-2.17/contrib/groff =================================================================== --- projects/binutils-2.17/contrib/groff (revision 215829) +++ projects/binutils-2.17/contrib/groff (revision 215830) Property changes on: projects/binutils-2.17/contrib/groff ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/groff:r215709-215824 Index: projects/binutils-2.17/contrib/less =================================================================== --- projects/binutils-2.17/contrib/less (revision 215829) +++ projects/binutils-2.17/contrib/less (revision 215830) Property changes on: projects/binutils-2.17/contrib/less ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/less:r215709-215824 Index: projects/binutils-2.17/contrib/libpcap =================================================================== --- projects/binutils-2.17/contrib/libpcap (revision 215829) +++ projects/binutils-2.17/contrib/libpcap (revision 215830) Property changes on: projects/binutils-2.17/contrib/libpcap ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/libpcap:r215709-215824 Index: projects/binutils-2.17/contrib/llvm/tools/clang =================================================================== --- projects/binutils-2.17/contrib/llvm/tools/clang (revision 215829) +++ projects/binutils-2.17/contrib/llvm/tools/clang (revision 215830) Property changes on: projects/binutils-2.17/contrib/llvm/tools/clang ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/llvm/tools/clang:r215709-215824 Index: projects/binutils-2.17/contrib/llvm =================================================================== --- projects/binutils-2.17/contrib/llvm (revision 215829) +++ projects/binutils-2.17/contrib/llvm (revision 215830) Property changes on: projects/binutils-2.17/contrib/llvm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/llvm:r215709-215824 Index: projects/binutils-2.17/contrib/ncurses =================================================================== --- projects/binutils-2.17/contrib/ncurses (revision 215829) +++ projects/binutils-2.17/contrib/ncurses (revision 215830) Property changes on: projects/binutils-2.17/contrib/ncurses ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/ncurses:r215709-215824 Index: projects/binutils-2.17/contrib/netcat =================================================================== --- projects/binutils-2.17/contrib/netcat (revision 215829) +++ projects/binutils-2.17/contrib/netcat (revision 215830) Property changes on: projects/binutils-2.17/contrib/netcat ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/netcat:r215709-215824 Index: projects/binutils-2.17/contrib/ntp =================================================================== --- projects/binutils-2.17/contrib/ntp (revision 215829) +++ projects/binutils-2.17/contrib/ntp (revision 215830) Property changes on: projects/binutils-2.17/contrib/ntp ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/ntp:r215709-215824 Index: projects/binutils-2.17/contrib/one-true-awk =================================================================== --- projects/binutils-2.17/contrib/one-true-awk (revision 215829) +++ projects/binutils-2.17/contrib/one-true-awk (revision 215830) Property changes on: projects/binutils-2.17/contrib/one-true-awk ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/one-true-awk:r215709-215824 Index: projects/binutils-2.17/contrib/openbsm =================================================================== --- projects/binutils-2.17/contrib/openbsm (revision 215829) +++ projects/binutils-2.17/contrib/openbsm (revision 215830) Property changes on: projects/binutils-2.17/contrib/openbsm ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/openbsm:r215709-215824 Index: projects/binutils-2.17/contrib/openpam =================================================================== --- projects/binutils-2.17/contrib/openpam (revision 215829) +++ projects/binutils-2.17/contrib/openpam (revision 215830) Property changes on: projects/binutils-2.17/contrib/openpam ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/openpam:r215709-215824 Index: projects/binutils-2.17/contrib/pf =================================================================== --- projects/binutils-2.17/contrib/pf (revision 215829) +++ projects/binutils-2.17/contrib/pf (revision 215830) Property changes on: projects/binutils-2.17/contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/pf:r215709-215824 Index: projects/binutils-2.17/contrib/sendmail =================================================================== --- projects/binutils-2.17/contrib/sendmail (revision 215829) +++ projects/binutils-2.17/contrib/sendmail (revision 215830) Property changes on: projects/binutils-2.17/contrib/sendmail ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/sendmail:r215709-215824 Index: projects/binutils-2.17/contrib/tcpdump =================================================================== --- projects/binutils-2.17/contrib/tcpdump (revision 215829) +++ projects/binutils-2.17/contrib/tcpdump (revision 215830) Property changes on: projects/binutils-2.17/contrib/tcpdump ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/tcpdump:r215709-215824 Index: projects/binutils-2.17/contrib/tcsh =================================================================== --- projects/binutils-2.17/contrib/tcsh (revision 215829) +++ projects/binutils-2.17/contrib/tcsh (revision 215830) Property changes on: projects/binutils-2.17/contrib/tcsh ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/tcsh:r215709-215824 Index: projects/binutils-2.17/contrib/top/install-sh =================================================================== --- projects/binutils-2.17/contrib/top/install-sh (revision 215829) +++ projects/binutils-2.17/contrib/top/install-sh (revision 215830) Property changes on: projects/binutils-2.17/contrib/top/install-sh ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/top/install-sh:r215709-215824 Index: projects/binutils-2.17/contrib/top =================================================================== --- projects/binutils-2.17/contrib/top (revision 215829) +++ projects/binutils-2.17/contrib/top (revision 215830) Property changes on: projects/binutils-2.17/contrib/top ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/top:r215709-215824 Index: projects/binutils-2.17/contrib/tzcode/stdtime =================================================================== --- projects/binutils-2.17/contrib/tzcode/stdtime (revision 215829) +++ projects/binutils-2.17/contrib/tzcode/stdtime (revision 215830) Property changes on: projects/binutils-2.17/contrib/tzcode/stdtime ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/tzcode/stdtime:r215709-215824 Index: projects/binutils-2.17/contrib/tzcode/zic =================================================================== --- projects/binutils-2.17/contrib/tzcode/zic (revision 215829) +++ projects/binutils-2.17/contrib/tzcode/zic (revision 215830) Property changes on: projects/binutils-2.17/contrib/tzcode/zic ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/tzcode/zic:r215709-215824 Index: projects/binutils-2.17/contrib/tzdata =================================================================== --- projects/binutils-2.17/contrib/tzdata (revision 215829) +++ projects/binutils-2.17/contrib/tzdata (revision 215830) Property changes on: projects/binutils-2.17/contrib/tzdata ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/tzdata:r215709-215824 Index: projects/binutils-2.17/contrib/wpa =================================================================== --- projects/binutils-2.17/contrib/wpa (revision 215829) +++ projects/binutils-2.17/contrib/wpa (revision 215830) Property changes on: projects/binutils-2.17/contrib/wpa ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/wpa:r215709-215824 Index: projects/binutils-2.17/contrib/xz =================================================================== --- projects/binutils-2.17/contrib/xz (revision 215829) +++ projects/binutils-2.17/contrib/xz (revision 215830) Property changes on: projects/binutils-2.17/contrib/xz ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/xz:r215709-215824 Index: projects/binutils-2.17/crypto/openssh =================================================================== --- projects/binutils-2.17/crypto/openssh (revision 215829) +++ projects/binutils-2.17/crypto/openssh (revision 215830) Property changes on: projects/binutils-2.17/crypto/openssh ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/crypto/openssh:r215709-215824 Index: projects/binutils-2.17/crypto/openssl =================================================================== --- projects/binutils-2.17/crypto/openssl (revision 215829) +++ projects/binutils-2.17/crypto/openssl (revision 215830) Property changes on: projects/binutils-2.17/crypto/openssl ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/crypto/openssl:r215709-215824 Index: projects/binutils-2.17/etc/defaults/rc.conf =================================================================== --- projects/binutils-2.17/etc/defaults/rc.conf (revision 215829) +++ projects/binutils-2.17/etc/defaults/rc.conf (revision 215830) @@ -1,710 +1,712 @@ #!/bin/sh # This is rc.conf - a file full of useful variables that you can set # to change the default startup behavior of your system. You should # not edit this file! Put any overrides into one of the ${rc_conf_files} # instead and you will be able to update these defaults later without # spamming your local configuration information. # # The ${rc_conf_files} files should only contain values which override # values set in this file. This eases the upgrade path when defaults # are changed and new features are added. # # All arguments must be in double or single quotes. # # For a more detailed explanation of all the rc.conf variables, please # refer to the rc.conf(5) manual page. # # $FreeBSD$ ############################################################## ### Important initial Boot-time options #################### ############################################################## rc_debug="NO" # Set to YES to enable debugging output from rc.d rc_info="NO" # Enables display of informational messages at boot. rc_startmsgs="YES" # Show "Starting foo:" messages at boot rcshutdown_timeout="30" # Seconds to wait before terminating rc.shutdown early_late_divider="FILESYSTEMS" # Script that separates early/late # stages of the boot process. Make sure you know # the ramifications if you change this. # See rc.conf(5) for more details. swapfile="NO" # Set to name of swapfile if aux swapfile desired. apm_enable="NO" # Set to YES to enable APM BIOS functions (or NO). apmd_enable="NO" # Run apmd to handle APM event from userland. apmd_flags="" # Flags to apmd (if enabled). ddb_enable="NO" # Set to YES to load ddb scripts at boot. ddb_config="/etc/ddb.conf" # ddb(8) config file. devd_enable="YES" # Run devd, to trigger programs on device tree changes. devd_flags="" # Additional flags for devd(8). kldxref_enable="NO" # Build linker.hints files with kldxref(8). kldxref_clobber="NO" # Overwrite old linker.hints at boot. kldxref_module_path="" # Override kern.module_path. A ';'-delimited list. powerd_enable="NO" # Run powerd to lower our power usage. powerd_flags="" # Flags to powerd (if enabled). tmpmfs="AUTO" # Set to YES to always create an mfs /tmp, NO to never tmpsize="20m" # Size of mfs /tmp if created tmpmfs_flags="-S" # Extra mdmfs options for the mfs /tmp varmfs="AUTO" # Set to YES to always create an mfs /var, NO to never varsize="32m" # Size of mfs /var if created varmfs_flags="-S" # Extra mount options for the mfs /var populate_var="AUTO" # Set to YES to always (re)populate /var, NO to never cleanvar_enable="YES" # Clean the /var directory local_startup="/usr/local/etc/rc.d" # startup script dirs. script_name_sep=" " # Change if your startup scripts' names contain spaces rc_conf_files="/etc/rc.conf /etc/rc.conf.local" # ZFS support zfs_enable="NO" # Set to YES to automatically mount ZFS file systems +gptboot_enable="YES" # GPT boot success/failure reporting. + # Experimental - test before enabling gbde_autoattach_all="NO" # YES automatically mounts gbde devices from fstab gbde_devices="NO" # Devices to automatically attach (list, or AUTO) gbde_attach_attempts="3" # Number of times to attempt attaching gbde devices gbde_lockdir="/etc" # Where to look for gbde lockfiles # GELI disk encryption configuration. geli_devices="" # List of devices to automatically attach in addition to # GELI devices listed in /etc/fstab. geli_tries="" # Number of times to attempt attaching geli device. # If empty, kern.geom.eli.tries will be used. geli_default_flags="" # Default flags for geli(8). geli_autodetach="YES" # Automatically detach on last close. # Providers are marked as such when all file systems are # mounted. # Example use. #geli_devices="da1 mirror/home" #geli_da1_flags="-p -k /etc/geli/da1.keys" #geli_da1_autodetach="NO" #geli_mirror_home_flags="-k /etc/geli/home.keys" geli_swap_flags="-e aes -l 256 -s 4096 -d" # Options for GELI-encrypted # swap partitions. root_rw_mount="YES" # Set to NO to inhibit remounting root read-write. fsck_y_enable="NO" # Set to YES to do fsck -y if the initial preen fails. fsck_y_flags="" # Additional flags for fsck -y background_fsck="YES" # Attempt to run fsck in the background where possible. background_fsck_delay="60" # Time to wait (seconds) before starting the fsck. netfs_types="nfs:NFS nfs4:NFS4 smbfs:SMB portalfs:PORTAL nwfs:NWFS" # Net filesystems. extra_netfs_types="NO" # List of network extra filesystem types for delayed # mount at startup (or NO). ############################################################## ### Network configuration sub-section ###################### ############################################################## ### Basic network and firewall/security options: ### hostname="" # Set this! hostid_enable="YES" # Set host UUID. hostid_file="/etc/hostid" # File with hostuuid. nisdomainname="NO" # Set to NIS domain if using NIS (or NO). dhclient_program="/sbin/dhclient" # Path to dhcp client program. dhclient_flags="" # Extra flags to pass to dhcp client. #dhclient_flags_fxp0="" # Extra dhclient flags for fxp0 only background_dhclient="NO" # Start dhcp client in the background. #background_dhclient_fxp0="YES" # Start dhcp client on fxp0 in the background. synchronous_dhclient="NO" # Start dhclient directly on configured # interfaces during startup. defaultroute_delay="30" # Time to wait for a default route on a DHCP interface. defaultroute_carrier_delay="5" # Time to wait for carrier while waiting for a default route. wpa_supplicant_program="/usr/sbin/wpa_supplicant" wpa_supplicant_flags="-s" # Extra flags to pass to wpa_supplicant wpa_supplicant_conf_file="/etc/wpa_supplicant.conf" # firewall_enable="NO" # Set to YES to enable firewall functionality firewall_script="/etc/rc.firewall" # Which script to run to set up the firewall firewall_type="UNKNOWN" # Firewall type (see /etc/rc.firewall) firewall_quiet="NO" # Set to YES to suppress rule display firewall_logging="NO" # Set to YES to enable events logging firewall_flags="" # Flags passed to ipfw when type is a file firewall_coscripts="" # List of executables/scripts to run after # firewall starts/stops firewall_client_net="192.0.2.0/24" # IPv4 Network address for "client" # firewall. #firewall_client_net_ipv6="2001:db8:2:1::/64" # IPv6 network prefix for # "client" firewall. firewall_simple_iif="ed1" # Inside network interface for "simple" # firewall. firewall_simple_inet="192.0.2.16/28" # Inside network address for "simple" # firewall. firewall_simple_oif="ed0" # Outside network interface for "simple" # firewall. firewall_simple_onet="192.0.2.0/28" # Outside network address for "simple" # firewall. #firewall_simple_iif_ipv6="ed1" # Inside IPv6 network interface for "simple" # firewall. #firewall_simple_inet_ipv6="2001:db8:2:800::/56" # Inside IPv6 network prefix # for "simple" firewall. #firewall_simple_oif_ipv6="ed0" # Outside IPv6 network interface for "simple" # firewall. #firewall_simple_onet_ipv6="2001:db8:2:0::/56" # Outside IPv6 network prefix # for "simple" firewall. firewall_myservices="" # List of TCP ports on which this host # offers services for "workstation" firewall. firewall_allowservices="" # List of IPs which have access to # $firewall_myservices for "workstation" # firewall. firewall_trusted="" # List of IPs which have full access to this # host for "workstation" firewall. firewall_logdeny="NO" # Set to YES to log default denied incoming # packets for "workstation" firewall. firewall_nologports="135-139,445 1026,1027 1433,1434" # List of TCP/UDP ports # for which denied incoming packets are not # logged for "workstation" firewall. firewall_nat_enable="NO" # Enable kernel NAT (if firewall_enable == YES) firewall_nat_interface="" # Public interface or IPaddress to use firewall_nat_flags="" # Additional configuration parameters dummynet_enable="NO" # Load the dummynet(4) module ip_portrange_first="NO" # Set first dynamically allocated port ip_portrange_last="NO" # Set last dynamically allocated port ike_enable="NO" # Enable IKE daemon (usually racoon or isakmpd) ike_program="/usr/local/sbin/isakmpd" # Path to IKE daemon ike_flags="" # Additional flags for IKE daemon ipsec_enable="NO" # Set to YES to run setkey on ipsec_file ipsec_file="/etc/ipsec.conf" # Name of config file for setkey natd_program="/sbin/natd" # path to natd, if you want a different one. natd_enable="NO" # Enable natd (if firewall_enable == YES). natd_interface="" # Public interface or IPaddress to use. natd_flags="" # Additional flags for natd. ipfilter_enable="NO" # Set to YES to enable ipfilter functionality ipfilter_program="/sbin/ipf" # where the ipfilter program lives ipfilter_rules="/etc/ipf.rules" # rules definition file for ipfilter, see # /usr/src/contrib/ipfilter/rules for examples ipfilter_flags="" # additional flags for ipfilter ipnat_enable="NO" # Set to YES to enable ipnat functionality ipnat_program="/sbin/ipnat" # where the ipnat program lives ipnat_rules="/etc/ipnat.rules" # rules definition file for ipnat ipnat_flags="" # additional flags for ipnat ipmon_enable="NO" # Set to YES for ipmon; needs ipfilter or ipnat ipmon_program="/sbin/ipmon" # where the ipfilter monitor program lives ipmon_flags="-Ds" # typically "-Ds" or "-D /var/log/ipflog" ipfs_enable="NO" # Set to YES to enable saving and restoring # of state tables at shutdown and boot ipfs_program="/sbin/ipfs" # where the ipfs program lives ipfs_flags="" # additional flags for ipfs pf_enable="NO" # Set to YES to enable packet filter (pf) pf_rules="/etc/pf.conf" # rules definition file for pf pf_program="/sbin/pfctl" # where the pfctl program lives pf_flags="" # additional flags for pfctl pflog_enable="NO" # Set to YES to enable packet filter logging pflog_logfile="/var/log/pflog" # where pflogd should store the logfile pflog_program="/sbin/pflogd" # where the pflogd program lives pflog_flags="" # additional flags for pflogd ftpproxy_enable="NO" # Set to YES to enable ftp-proxy(8) for pf ftpproxy_flags="" # additional flags for ftp-proxy(8) pfsync_enable="NO" # Expose pf state to other hosts for syncing pfsync_syncdev="" # Interface for pfsync to work through pfsync_syncpeer="" # IP address of pfsync peer host pfsync_ifconfig="" # Additional options to ifconfig(8) for pfsync tcp_extensions="YES" # Set to NO to turn off RFC1323 extensions. log_in_vain="0" # >=1 to log connects to ports w/o listeners. tcp_keepalive="YES" # Enable stale TCP connection timeout (or NO). tcp_drop_synfin="NO" # Set to YES to drop TCP packets with SYN+FIN # NOTE: this violates the TCP specification icmp_drop_redirect="NO" # Set to YES to ignore ICMP REDIRECT packets icmp_log_redirect="NO" # Set to YES to log ICMP REDIRECT packets network_interfaces="auto" # List of network interfaces (or "auto"). cloned_interfaces="" # List of cloned network interfaces to create. #cloned_interfaces="gif0 gif1 gif2 gif3" # Pre-cloning GENERIC config. ifconfig_lo0="inet 127.0.0.1" # default loopback device configuration. #ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry. #ifconfig_ed0_ipx="ipx 0x00010010" # Sample IPX address family entry. #ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64" # Sample IPv6 addr entry #ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64" # Sample IPv6 alias #ifconfig_fxp0_name="net0" # Change interface name from fxp0 to net0. #vlans_fxp0="101 vlan0" # vlan(4) interfaces for fxp0 device #create_args_vlan0="vlan 102" # vlan tag for vlan0 device #wlans_ath0="wlan0" # wlan(4) interfaces for ath0 device #wlandebug_wlan0="scan+auth+assoc" # Set debug flags with wlanddebug(8) #ipv4_addrs_fxp0="192.168.0.1/24 192.168.1.1-5/28" # example IPv4 address entry. # #autobridge_interfaces="bridge0" # List of bridges to check #autobridge_bridge0="tap* vlan0" # Interface glob to automatically add to the bridge # # If you have any sppp(4) interfaces above, you might also want to set # the following parameters. Refer to spppcontrol(8) for their meaning. sppp_interfaces="" # List of sppp interfaces. #sppp_interfaces="...0" # example: sppp over ... #spppconfig_...0="authproto=chap myauthname=foo myauthsecret='top secret' hisauthname=some-gw hisauthsecret='another secret'" gif_interfaces="" # List of GIF tunnels. #gif_interfaces="gif0 gif1" # Examples typically for a router. # Choose correct tunnel addrs. #gifconfig_gif0="10.1.1.1 10.1.2.1" # Examples typically for a router. #gifconfig_gif1="10.1.1.2 10.1.2.2" # Examples typically for a router. fec_interfaces="" # List of Fast EtherChannels. #fec_interfaces="fec0 fec1" #fecconfig_fec0="fxp0 dc0" # Examples typically for two NICs #fecconfig_fec1="em0 em1 bge0 bge1" # Examples typically for four NICs # User ppp configuration. ppp_enable="NO" # Start user-ppp (or NO). ppp_program="/usr/sbin/ppp" # Path to user-ppp program. ppp_mode="auto" # Choice of "auto", "ddial", "direct" or "dedicated". # For details see man page for ppp(8). Default is auto. ppp_nat="YES" # Use PPP's internal network address translation or NO. ppp_profile="papchap" # Which profile to use from /etc/ppp/ppp.conf. ppp_user="root" # Which user to run ppp as # Start multiple instances of ppp at boot time #ppp_profile="profile1 profile2 profile3" # Which profiles to use #ppp_profile1_mode="ddial" # Override ppp mode for profile1 #ppp_profile2_nat="NO" # Override nat mode for profile2 # profile3 uses default ppp_mode and ppp_nat ### Network daemon (miscellaneous) ### hostapd_enable="NO" # Run hostap daemon. syslogd_enable="YES" # Run syslog daemon (or NO). syslogd_program="/usr/sbin/syslogd" # path to syslogd, if you want a different one. syslogd_flags="-s" # Flags to syslogd (if enabled). inetd_enable="NO" # Run the network daemon dispatcher (YES/NO). inetd_program="/usr/sbin/inetd" # path to inetd, if you want a different one. inetd_flags="-wW -C 60" # Optional flags to inetd hastd_enable="NO" # Run the HAST daemon (YES/NO). hastd_program="/sbin/hastd" # path to hastd, if you want a different one. hastd_flags="" # Optional flags to hastd. # # named. It may be possible to run named in a sandbox, man security for # details. # named_enable="NO" # Run named, the DNS server (or NO). named_program="/usr/sbin/named" # Path to named, if you want a different one. named_conf="/etc/namedb/named.conf" # Path to the configuration file #named_flags="" # Use this for flags OTHER than -u and -c named_pidfile="/var/run/named/pid" # Must set this in named.conf as well named_uid="bind" # User to run named as named_chrootdir="/var/named" # Chroot directory (or "" not to auto-chroot it) named_chroot_autoupdate="YES" # Automatically install/update chrooted # components of named. See /etc/rc.d/named. named_symlink_enable="YES" # Symlink the chrooted pid file named_wait="NO" # Wait for working name service before exiting named_wait_host="localhost" # Hostname to check if named_wait is enabled named_auto_forward="NO" # Set up forwarders from /etc/resolv.conf named_auto_forward_only="NO" # Do "forward only" instead of "forward first" # # kerberos. Do not run the admin daemons on slave servers # kerberos5_server_enable="NO" # Run a kerberos 5 master server (or NO). kerberos5_server="/usr/libexec/kdc" # path to kerberos 5 KDC kerberos5_server_flags="--detach" # Additional flags to the kerberos 5 server kadmind5_server_enable="NO" # Run kadmind (or NO) kadmind5_server="/usr/libexec/kadmind" # path to kerberos 5 admin daemon kpasswdd_server_enable="NO" # Run kpasswdd (or NO) kpasswdd_server="/usr/libexec/kpasswdd" # path to kerberos 5 passwd daemon gssd_enable="NO" # Run the gssd daemon (or NO). gssd_flags="" # Flags for gssd. rwhod_enable="NO" # Run the rwho daemon (or NO). rwhod_flags="" # Flags for rwhod rarpd_enable="NO" # Run rarpd (or NO). rarpd_flags="-a" # Flags to rarpd. bootparamd_enable="NO" # Run bootparamd (or NO). bootparamd_flags="" # Flags to bootparamd pppoed_enable="NO" # Run the PPP over Ethernet daemon. pppoed_provider="*" # Provider and ppp(8) config file entry. pppoed_flags="-P /var/run/pppoed.pid" # Flags to pppoed (if enabled). pppoed_interface="fxp0" # The interface that pppoed runs on. sshd_enable="NO" # Enable sshd sshd_program="/usr/sbin/sshd" # path to sshd, if you want a different one. sshd_flags="" # Additional flags for sshd. ftpd_enable="NO" # Enable stand-alone ftpd. ftpd_program="/usr/libexec/ftpd" # Path to ftpd, if you want a different one. ftpd_flags="" # Additional flags to stand-alone ftpd. ### Network daemon (NFS): All need rpcbind_enable="YES" ### amd_enable="NO" # Run amd service with $amd_flags (or NO). amd_program="/usr/sbin/amd" # path to amd, if you want a different one. amd_flags="-a /.amd_mnt -l syslog /host /etc/amd.map /net /etc/amd.map" amd_map_program="NO" # Can be set to "ypcat -k amd.master" nfs_client_enable="NO" # This host is an NFS client (or NO). nfs_access_cache="60" # Client cache timeout in seconds nfs_server_enable="NO" # This host is an NFS server (or NO). nfs_server_flags="-u -t -n 4" # Flags to nfsd (if enabled). mountd_enable="NO" # Run mountd (or NO). mountd_flags="-r" # Flags to mountd (if NFS server enabled). weak_mountd_authentication="NO" # Allow non-root mount requests to be served. nfs_reserved_port_only="NO" # Provide NFS only on secure port (or NO). nfs_bufpackets="" # bufspace (in packets) for client rpc_lockd_enable="NO" # Run NFS rpc.lockd needed for client/server. rpc_lockd_flags="" # Flags to rpc.lockd (if enabled). rpc_statd_enable="NO" # Run NFS rpc.statd needed for client/server. rpc_statd_flags="" # Flags to rpc.statd (if enabled). rpcbind_enable="NO" # Run the portmapper service (YES/NO). rpcbind_program="/usr/sbin/rpcbind" # path to rpcbind, if you want a different one. rpcbind_flags="" # Flags to rpcbind (if enabled). rpc_ypupdated_enable="NO" # Run if NIS master and SecureRPC (or NO). keyserv_enable="NO" # Run the SecureRPC keyserver (or NO). keyserv_flags="" # Flags to keyserv (if enabled). nfsv4_server_enable="NO" # Enable support for NFSv4 nfscbd_enable="NO" # NFSv4 client side callback daemon nfscbd_flags="" # Flags for nfscbd nfsuserd_enable="NO" # NFSv4 user/group name mapping daemon nfsuserd_flags="" # Flags for nfsuserd ### Network Time Services options: ### timed_enable="NO" # Run the time daemon (or NO). timed_flags="" # Flags to timed (if enabled). ntpdate_enable="NO" # Run ntpdate to sync time on boot (or NO). ntpdate_program="/usr/sbin/ntpdate" # path to ntpdate, if you want a different one. ntpdate_flags="-b" # Flags to ntpdate (if enabled). ntpdate_config="/etc/ntp.conf" # ntpdate(8) configuration file ntpdate_hosts="" # Whitespace-separated list of ntpdate(8) servers. ntpd_enable="NO" # Run ntpd Network Time Protocol (or NO). ntpd_program="/usr/sbin/ntpd" # path to ntpd, if you want a different one. ntpd_config="/etc/ntp.conf" # ntpd(8) configuration file ntpd_sync_on_start="NO" # Sync time on ntpd startup, even if offset is high ntpd_flags="-p /var/run/ntpd.pid -f /var/db/ntpd.drift" # Flags to ntpd (if enabled). # Network Information Services (NIS) options: All need rpcbind_enable="YES" ### nis_client_enable="NO" # We're an NIS client (or NO). nis_client_flags="" # Flags to ypbind (if enabled). nis_ypset_enable="NO" # Run ypset at boot time (or NO). nis_ypset_flags="" # Flags to ypset (if enabled). nis_server_enable="NO" # We're an NIS server (or NO). nis_server_flags="" # Flags to ypserv (if enabled). nis_ypxfrd_enable="NO" # Run rpc.ypxfrd at boot time (or NO). nis_ypxfrd_flags="" # Flags to rpc.ypxfrd (if enabled). nis_yppasswdd_enable="NO" # Run rpc.yppasswdd at boot time (or NO). nis_yppasswdd_flags="" # Flags to rpc.yppasswdd (if enabled). ### SNMP daemon ### # Be sure to understand the security implications of running SNMP v1/v2 # in your network. bsnmpd_enable="NO" # Run the SNMP daemon (or NO). bsnmpd_flags="" # Flags for bsnmpd. ### Network routing options: ### defaultrouter="NO" # Set to default gateway (or NO). static_arp_pairs="" # Set to static ARP list (or leave empty). static_routes="" # Set to static route list (or leave empty). natm_static_routes="" # Set to static route list for NATM (or leave empty). gateway_enable="NO" # Set to YES if this host will be a gateway. routed_enable="NO" # Set to YES to enable a routing daemon. routed_program="/sbin/routed" # Name of routing daemon to use if enabled. routed_flags="-q" # Flags for routing daemon. mrouted_enable="NO" # Do IPv4 multicast routing. mrouted_program="/usr/local/sbin/mrouted" # Name of IPv4 multicast # routing daemon. You need to # install it from package or # port. mrouted_flags="" # Flags for multicast routing daemon. ipxgateway_enable="NO" # Set to YES to enable IPX routing. ipxrouted_enable="NO" # Set to YES to run the IPX routing daemon. ipxrouted_flags="" # Flags for IPX routing daemon. arpproxy_all="NO" # replaces obsolete kernel option ARP_PROXYALL. forward_sourceroute="NO" # do source routing (only if gateway_enable is set to "YES") accept_sourceroute="NO" # accept source routed packets to us ### ATM interface options: ### atm_enable="NO" # Configure ATM interfaces (or NO). #atm_netif_hea0="atm 1" # Network interfaces for physical interface. #atm_sigmgr_hea0="uni31" # Signalling manager for physical interface. #atm_prefix_hea0="ILMI" # NSAP prefix (UNI interfaces only) (or ILMI). #atm_macaddr_hea0="NO" # Override physical MAC address (or NO). #atm_arpserver_atm0="0x47.0005.80.999999.9999.9999.9999.999999999999.00" # ATMARP server address (or local). #atm_scsparp_atm0="NO" # Run SCSP/ATMARP on network interface (or NO). atm_pvcs="" # Set to PVC list (or leave empty). atm_arps="" # Set to permanent ARP list (or leave empty). ### Bluetooth ### hcsecd_enable="NO" # Enable hcsecd(8) (or NO) hcsecd_config="/etc/bluetooth/hcsecd.conf" # hcsecd(8) configuration file sdpd_enable="NO" # Enable sdpd(8) (or NO) sdpd_control="/var/run/sdp" # sdpd(8) control socket sdpd_groupname="nobody" # set spdp(8) user/group to run as after sdpd_username="nobody" # it initializes bthidd_enable="NO" # Enable bthidd(8) (or NO) bthidd_config="/etc/bluetooth/bthidd.conf" # bthidd(8) configuration file bthidd_hids="/var/db/bthidd.hids" # bthidd(8) known HID devices file rfcomm_pppd_server_enable="NO" # Enable rfcomm_pppd(8) in server mode (or NO) rfcomm_pppd_server_profile="one two" # Profile to use from /etc/ppp/ppp.conf # #rfcomm_pppd_server_one_bdaddr="" # Override local bdaddr for 'one' rfcomm_pppd_server_one_channel="1" # Override local channel for 'one' #rfcomm_pppd_server_one_register_sp="NO" # Override SP and DUN register #rfcomm_pppd_server_one_register_dun="NO" # for 'one' # #rfcomm_pppd_server_two_bdaddr="" # Override local bdaddr for 'two' rfcomm_pppd_server_two_channel="3" # Override local channel for 'two' #rfcomm_pppd_server_two_register_sp="NO" # Override SP and DUN register #rfcomm_pppd_server_two_register_dun="NO" # for 'two' ubthidhci_enable="NO" # Switch an USB BT controller present on #ubthidhci_busnum="3" # bus 3 and addr 2 from HID mode to HCI mode. #ubthidhci_addr="2" # Check usbconfig list to find the correct # numbers for your system. ### Miscellaneous network options: ### icmp_bmcastecho="NO" # respond to broadcast ping packets ### IPv6 options: ### ipv6_network_interfaces="auto" # List of IPv6 network interfaces # (or "auto" or "none"). ipv6_activate_all_interfaces="NO" # If NO, interfaces which have no # corresponding $ifconfig_IF_ipv6 is # marked as IFDISABLED for security # reason. ipv6_defaultrouter="NO" # Set to IPv6 default gateway (or NO). #ipv6_defaultrouter="2002:c058:6301::" # Use this for 6to4 (RFC 3068) ipv6_static_routes="" # Set to static route list (or leave empty). #ipv6_static_routes="xxx" # An example to set fec0:0000:0000:0006::/64 # route toward loopback interface. #ipv6_route_xxx="fec0:0000:0000:0006:: -prefixlen 64 ::1" ipv6_gateway_enable="NO" # Set to YES if this host will be a gateway. ipv6_privacy="NO" # Use privacy address on RA-receiving IFs # (RFC 4193) route6d_enable="NO" # Set to YES to enable an IPv6 routing daemon. route6d_program="/usr/sbin/route6d" # Name of IPv6 routing daemon. route6d_flags="" # Flags to IPv6 routing daemon. #route6d_flags="-l" # Example for route6d with only IPv6 site local # addrs. #route6d_flags="-q" # If you want to run a routing daemon on an end # node, you should stop advertisement. #ipv6_network_interfaces="ed0 ep0" # Examples for router # or static configuration for end node. # Choose correct prefix value. #ipv6_prefix_ed0="fec0:0000:0000:0001 fec0:0000:0000:0002" # Examples for rtr. #ipv6_prefix_ep0="fec0:0000:0000:0003 fec0:0000:0000:0004" # Examples for rtr. ipv6_default_interface="NO" # Default output interface for scoped addrs. # This works only with # ipv6_gateway_enable="NO". rtsol_flags="" # Flags to IPv6 router solicitation. rtsold_enable="NO" # Set to YES to enable an IPv6 router # solicitation daemon. rtsold_flags="-a" # Flags to an IPv6 router solicitation # daemon. rtadvd_enable="NO" # Set to YES to enable an IPv6 router # advertisement daemon. If set to YES, # this router becomes a possible candidate # IPv6 default router for local subnets. rtadvd_interfaces="" # Interfaces rtadvd sends RA packets. mroute6d_enable="NO" # Do IPv6 multicast routing. mroute6d_program="/usr/local/sbin/pim6dd" # Name of IPv6 multicast # routing daemon. You need to # install it from package or # port. mroute6d_flags="" # Flags to IPv6 multicast routing daemon. stf_interface_ipv4addr="" # Local IPv4 addr for 6to4 IPv6 over IPv4 # tunneling interface. Specify this entry # to enable 6to4 interface. stf_interface_ipv4plen="0" # Prefix length for 6to4 IPv4 addr, # to limit peer addr range. Effective value # is 0-31. stf_interface_ipv6_ifid="0:0:0:1" # IPv6 interface id for stf0. # If you like, you can set "AUTO" for this. stf_interface_ipv6_slaid="0000" # IPv6 Site Level Aggregator for stf0 ipv6_faith_prefix="NO" # Set faith prefix to enable a FAITH # IPv6-to-IPv4 TCP translator. You also need # faithd(8) setup. ipv6_ipv4mapping="NO" # Set to "YES" to enable IPv4 mapped IPv6 addr # communication. (like ::ffff:a.b.c.d) ipv6_ipfilter_rules="/etc/ipf6.rules" # rules definition file for ipfilter, # see /usr/src/contrib/ipfilter/rules # for examples ip6addrctl_enable="YES" # Set to YES to enable default address selection ip6addrctl_verbose="NO" # Set to YES to enable verbose configuration messages ip6addrctl_policy="AUTO" # A pre-defined address selection policy # (ipv4_prefer, ipv6_prefer, or AUTO) ############################################################## ### System console options ################################# ############################################################## keyboard="" # keyboard device to use (default /dev/kbd0). keymap="NO" # keymap in /usr/share/syscons/keymaps/* (or NO). keyrate="NO" # keyboard rate to: slow, normal, fast (or NO). keybell="NO" # See kbdcontrol(1) for options. Use "off" to disable. keychange="NO" # function keys default values (or NO). cursor="NO" # cursor type {normal|blink|destructive} (or NO). scrnmap="NO" # screen map in /usr/share/syscons/scrnmaps/* (or NO). font8x16="NO" # font 8x16 from /usr/share/syscons/fonts/* (or NO). font8x14="NO" # font 8x14 from /usr/share/syscons/fonts/* (or NO). font8x8="NO" # font 8x8 from /usr/share/syscons/fonts/* (or NO). blanktime="300" # blank time (in seconds) or "NO" to turn it off. saver="NO" # screen saver: Uses /boot/kernel/${saver}_saver.ko moused_nondefault_enable="YES" # Treat non-default mice as enabled unless # specifically overriden in rc.conf(5). moused_enable="NO" # Run the mouse daemon. moused_type="auto" # See man page for rc.conf(5) for available settings. moused_port="/dev/psm0" # Set to your mouse port. moused_flags="" # Any additional flags to moused. mousechar_start="NO" # if 0xd0-0xd3 default range is occupied in your # language code table, specify alternative range # start like mousechar_start=3, see vidcontrol(1) allscreens_flags="" # Set this vidcontrol mode for all virtual screens allscreens_kbdflags="" # Set this kbdcontrol mode for all virtual screens ############################################################## ### Mail Transfer Agent (MTA) options ###################### ############################################################## mta_start_script="/etc/rc.sendmail" # Script to start your chosen MTA, called by /etc/rc. # Settings for /etc/rc.sendmail and /etc/rc.d/sendmail: sendmail_enable="NO" # Run the sendmail inbound daemon (YES/NO). sendmail_pidfile="/var/run/sendmail.pid" # sendmail pid file sendmail_procname="/usr/sbin/sendmail" # sendmail process name sendmail_flags="-L sm-mta -bd -q30m" # Flags to sendmail (as a server) sendmail_submit_enable="YES" # Start a localhost-only MTA for mail submission sendmail_submit_flags="-L sm-mta -bd -q30m -ODaemonPortOptions=Addr=localhost" # Flags for localhost-only MTA sendmail_outbound_enable="YES" # Dequeue stuck mail (YES/NO). sendmail_outbound_flags="-L sm-queue -q30m" # Flags to sendmail (outbound only) sendmail_msp_queue_enable="YES" # Dequeue stuck clientmqueue mail (YES/NO). sendmail_msp_queue_flags="-L sm-msp-queue -Ac -q30m" # Flags for sendmail_msp_queue daemon. sendmail_rebuild_aliases="NO" # Run newaliases if necessary (YES/NO). ############################################################## ### Miscellaneous administrative options ################### ############################################################## auditd_enable="NO" # Run the audit daemon. auditd_program="/usr/sbin/auditd" # Path to the audit daemon. auditd_flags="" # Which options to pass to the audit daemon. cron_enable="YES" # Run the periodic job daemon. cron_program="/usr/sbin/cron" # Which cron executable to run (if enabled). cron_dst="YES" # Handle DST transitions intelligently (YES/NO) cron_flags="" # Which options to pass to the cron daemon. lpd_enable="NO" # Run the line printer daemon. lpd_program="/usr/sbin/lpd" # path to lpd, if you want a different one. lpd_flags="" # Flags to lpd (if enabled). nscd_enable="NO" # Run the nsswitch caching daemon. chkprintcap_enable="NO" # Run chkprintcap(8) before running lpd. chkprintcap_flags="-d" # Create missing directories by default. dumpdev="AUTO" # Device to crashdump to (device name, AUTO, or NO). dumpdir="/var/crash" # Directory where crash dumps are to be stored savecore_flags="" # Used if dumpdev is enabled above, and present. crashinfo_enable="YES" # Automatically generate crash dump summary. crashinfo_program="/usr/sbin/crashinfo" # Script to generate crash dump summary. quota_enable="NO" # turn on quotas on startup (or NO). check_quotas="YES" # Check quotas on startup (or NO). quotaon_flags="-a" # Turn quotas on for all file systems (if enabled) quotaoff_flags="-a" # Turn quotas off for all file systems at shutdown quotacheck_flags="-a" # Check all file system quotas (if enabled) accounting_enable="NO" # Turn on process accounting (or NO). ibcs2_enable="NO" # Ibcs2 (SCO) emulation loaded at startup (or NO). ibcs2_loaders="coff" # List of additional Ibcs2 loaders (or NO). # Emulation/compatibility services provided by /etc/rc.d/abi sysvipc_enable="NO" # Load System V IPC primitives at startup (or NO). linux_enable="NO" # Linux binary compatibility loaded at startup (or NO). svr4_enable="NO" # SysVR4 emulation loaded at startup (or NO). clear_tmp_enable="NO" # Clear /tmp at startup. clear_tmp_X="YES" # Clear and recreate X11-related directories in /tmp ldconfig_insecure="NO" # Set to YES to disable ldconfig security checks ldconfig_paths="/usr/lib/compat /usr/local/lib /usr/local/lib/compat/pkg" # shared library search paths ldconfig32_paths="/usr/lib32" # 32-bit compatibility shared library search paths ldconfig_paths_aout="/usr/lib/compat/aout /usr/local/lib/aout" # a.out shared library search paths ldconfig_local_dirs="/usr/local/libdata/ldconfig" # Local directories with ldconfig configuration files. ldconfig_local32_dirs="/usr/local/libdata/ldconfig32" # Local directories with 32-bit compatibility ldconfig # configuration files. kern_securelevel_enable="NO" # kernel security level (see security(7)) kern_securelevel="-1" # range: -1..3 ; `-1' is the most insecure # Note that setting securelevel to 0 will result # in the system booting with securelevel set to 1, as # init(8) will raise the level when rc(8) completes. update_motd="YES" # update version info in /etc/motd (or NO) entropy_file="/entropy" # Set to NO to disable caching entropy through reboots. # /var/db/entropy-file is preferred if / is not avail. entropy_dir="/var/db/entropy" # Set to NO to disable caching entropy via cron. entropy_save_sz="2048" # Size of the entropy cache files. entropy_save_num="8" # Number of entropy cache files to save. harvest_interrupt="YES" # Entropy device harvests interrupt randomness harvest_ethernet="YES" # Entropy device harvests ethernet randomness harvest_p_to_p="YES" # Entropy device harvests point-to-point randomness dmesg_enable="YES" # Save dmesg(8) to /var/run/dmesg.boot watchdogd_enable="NO" # Start the software watchdog daemon watchdogd_flags="" # Flags to watchdogd (if enabled) devfs_rulesets="/etc/defaults/devfs.rules /etc/devfs.rules" # Files containing # devfs(8) rules. devfs_system_ruleset="" # The name (NOT number) of a ruleset to apply to /dev devfs_set_rulesets="" # A list of /mount/dev=ruleset_name settings to # apply (must be mounted already, i.e. fstab(5)) performance_cx_lowest="HIGH" # Online CPU idle state performance_cpu_freq="NONE" # Online CPU frequency economy_cx_lowest="HIGH" # Offline CPU idle state economy_cpu_freq="NONE" # Offline CPU frequency virecover_enable="YES" # Perform housekeeping for the vi(1) editor ugidfw_enable="NO" # Load mac_bsdextended(4) rules on boot bsdextended_script="/etc/rc.bsdextended" # Default mac_bsdextended(4) # ruleset file. newsyslog_enable="YES" # Run newsyslog at startup. newsyslog_flags="-CN" # Newsyslog flags to create marked files mixer_enable="YES" # Run the sound mixer. ############################################################## ### Jail Configuration ####################################### ############################################################## jail_enable="NO" # Set to NO to disable starting of any jails jail_parallel_start="NO" # Start jails in the background jail_list="" # Space separated list of names of jails jail_set_hostname_allow="YES" # Allow root user in a jail to change its hostname jail_socket_unixiproute_only="YES" # Route only TCP/IP within a jail jail_sysvipc_allow="NO" # Allow SystemV IPC use from within a jail # # To use rc's built-in jail infrastructure create entries for # each jail, specified in jail_list, with the following variables. # NOTES: # - replace 'example' with the jail's name. # - except rootdir, hostname, ip and the _multi addresses, # all of the following variables may be made global jail variables # if you don't specify a jail name (ie. jail_interface, jail_devfs_ruleset). # #jail_example_rootdir="/usr/jail/default" # Jail's root directory #jail_example_hostname="default.domain.com" # Jail's hostname #jail_example_interface="" # Jail's interface variable to create IP aliases on #jail_example_fib="0" # Routing table for setfib(1) #jail_example_ip="192.0.2.10,2001:db8::17" # Jail's primary IPv4 and IPv6 address #jail_example_ip_multi0="2001:db8::10" # and another IPv6 address #jail_example_exec_start="/bin/sh /etc/rc" # command to execute in jail for starting #jail_example_exec_afterstart0="/bin/sh command" # command to execute after the one for # starting the jail. More than one can be # specified using a trailing number #jail_example_exec_stop="/bin/sh /etc/rc.shutdown" # command to execute in jail for stopping #jail_example_devfs_enable="NO" # mount devfs in the jail #jail_example_devfs_ruleset="ruleset_name" # devfs ruleset to apply to jail - # usually you want "devfsrules_jail". #jail_example_fdescfs_enable="NO" # mount fdescfs in the jail #jail_example_procfs_enable="NO" # mount procfs in jail #jail_example_mount_enable="NO" # mount/umount jail's fs #jail_example_fstab="" # fstab(5) for mount/umount #jail_example_flags="-l -U root" # flags for jail(8) ############################################################## ### Define source_rc_confs, the mechanism used by /etc/rc.* ## ### scripts to source rc_conf_files overrides safely. ## ############################################################## if [ -z "${source_rc_confs_defined}" ]; then source_rc_confs_defined=yes source_rc_confs () { local i sourced_files for i in ${rc_conf_files}; do case ${sourced_files} in *:$i:*) ;; *) sourced_files="${sourced_files}:$i:" if [ -r $i ]; then . $i fi ;; esac done } fi Index: projects/binutils-2.17/etc/rc.d/gptboot =================================================================== --- projects/binutils-2.17/etc/rc.d/gptboot (revision 215829) +++ projects/binutils-2.17/etc/rc.d/gptboot (revision 215830) @@ -1,76 +1,77 @@ #!/bin/sh # # Copyright (c) 2010 Pawel Jakub Dawidek # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # # PROVIDE: gptboot # REQUIRE: mountcritremote # KEYWORD: nojail . /etc/rc.subr name="gptboot" +rcvar=`set_rcvar` start_cmd="gptboot_report" gptboot_report() { gpart show | \ egrep '(^=>| freebsd-ufs .*(\[|,)(bootfailed|bootonce)(,|\]))' | \ sed 's/^=>//' | \ egrep -v '(\[|,)bootme(,|\])' | \ while read start size pos type attrs rest; do case "${pos}" in [0-9]*) if [ -n "${disk}" ]; then part="${disk}p${pos}" echo "${attrs}" | egrep -q '(\[|,)bootfailed(,|\])' bootfailed=$? echo "${attrs}" | egrep -q '(\[|,)bootonce(,|\])' bootonce=$? if [ ${bootfailed} -eq 0 ]; then logger -t gptboot -p local0.notice "Boot from ${part} failed." gpart unset -a bootfailed -i ${pos} ${disk} >/dev/null elif [ ${bootonce} -eq 0 ]; then # We want to log success after all failures. echo -n "Boot from ${part} succeeded." gpart unset -a bootonce -i ${pos} ${disk} >/dev/null fi fi ;; *) if [ "${type}" = "GPT" ]; then disk="${pos}" else disk="" fi ;; esac done | logger -t gptboot -p local0.notice } load_rc_config $name run_rc_command "$1" Index: projects/binutils-2.17/etc/rc.d/mountcritlocal =================================================================== --- projects/binutils-2.17/etc/rc.d/mountcritlocal (revision 215829) +++ projects/binutils-2.17/etc/rc.d/mountcritlocal (revision 215830) @@ -1,54 +1,54 @@ #!/bin/sh # # $FreeBSD$ # # PROVIDE: mountcritlocal # REQUIRE: root hostid_save mdconfig -# KEYWORD: nojail +# KEYWORD: nojail shutdown . /etc/rc.subr name="mountcritlocal" start_cmd="mountcritlocal_start" -stop_cmd=":" +stop_cmd=sync mountcritlocal_start() { local err # Set up the list of network filesystem types for which mounting # should be delayed until after network initialization. case ${extra_netfs_types} in [Nn][Oo]) ;; *) netfs_types="${netfs_types} ${extra_netfs_types}" ;; esac # Mount everything except nfs filesystems. check_startmsgs && echo -n 'Mounting local file systems:' mount_excludes='no' for i in ${netfs_types}; do fstype=${i%:*} mount_excludes="${mount_excludes}${fstype}," done mount_excludes=${mount_excludes%,} mount -a -t ${mount_excludes} err=$? check_startmsgs && echo '.' case ${err} in 0) ;; *) echo 'Mounting /etc/fstab filesystems failed,' \ ' startup aborted' stop_boot true ;; esac } load_rc_config $name run_rc_command "$1" Index: projects/binutils-2.17/lib/libc/stdtime =================================================================== --- projects/binutils-2.17/lib/libc/stdtime (revision 215829) +++ projects/binutils-2.17/lib/libc/stdtime (revision 215830) Property changes on: projects/binutils-2.17/lib/libc/stdtime ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libc/stdtime:r215709-215824 Index: projects/binutils-2.17/lib/libc =================================================================== --- projects/binutils-2.17/lib/libc (revision 215829) +++ projects/binutils-2.17/lib/libc (revision 215830) Property changes on: projects/binutils-2.17/lib/libc ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libc:r215709-215824 Index: projects/binutils-2.17/lib/libutil =================================================================== --- projects/binutils-2.17/lib/libutil (revision 215829) +++ projects/binutils-2.17/lib/libutil (revision 215830) Property changes on: projects/binutils-2.17/lib/libutil ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libutil:r215709-215824 Index: projects/binutils-2.17/lib/libz =================================================================== --- projects/binutils-2.17/lib/libz (revision 215829) +++ projects/binutils-2.17/lib/libz (revision 215830) Property changes on: projects/binutils-2.17/lib/libz ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libz:r215709-215824 Index: projects/binutils-2.17/sbin/ipfw =================================================================== --- projects/binutils-2.17/sbin/ipfw (revision 215829) +++ projects/binutils-2.17/sbin/ipfw (revision 215830) Property changes on: projects/binutils-2.17/sbin/ipfw ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sbin/ipfw:r215709-215824 Index: projects/binutils-2.17/sbin =================================================================== --- projects/binutils-2.17/sbin (revision 215829) +++ projects/binutils-2.17/sbin (revision 215830) Property changes on: projects/binutils-2.17/sbin ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sbin:r215709-215824 Index: projects/binutils-2.17/share/man/man4/bge.4 =================================================================== --- projects/binutils-2.17/share/man/man4/bge.4 (revision 215829) +++ projects/binutils-2.17/share/man/man4/bge.4 (revision 215830) @@ -1,282 +1,261 @@ .\" Copyright (c) 2001 Wind River Systems .\" Copyright (c) 1997, 1998, 1999, 2000, 2001 .\" Bill Paul . All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by Bill Paul. .\" 4. Neither the name of the author nor the names of any co-contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd November 7, 2010 +.Dd November 23, 2010 .Dt BGE 4 .Os .Sh NAME .Nm bge .Nd "Broadcom BCM57xx/BCM590x Gigabit/Fast Ethernet driver" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device miibus" .Cd "device bge" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_bge_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for various NICs based on the Broadcom BCM570x, 571x, 572x, 575x, 576x, 578x, 5776x and 5778x Gigabit Ethernet controller chips and the 590x and 5779x Fast Ethernet controller chips. .Pp All of these NICs are capable of 10, 100 and 1000Mbps speeds over CAT5 copper cable, except for the SysKonnect SK-9D41 which supports only 1000Mbps over multimode fiber. The BCM570x builds upon the technology of the Alteon Tigon II. It has two R4000 CPU cores and is PCI v2.2 and PCI-X v1.0 compliant. It supports IP, TCP and UDP checksum offload for both receive and transmit, multiple RX and TX DMA rings for QoS applications, rules-based receive filtering, and VLAN tag stripping/insertion as well as a 256-bit multicast hash filter. Additional features may be provided via value-add firmware updates. The BCM570x supports TBI (ten bit interface) and GMII transceivers, which means it can be used with either copper or 1000baseX fiber applications. Note however the device only supports a single speed in TBI mode. .Pp Most BCM5700-based cards also use the Broadcom BCM5401 or BCM5411 10/100/1000 copper gigabit transceivers, which support autonegotiation of 10, 100 and 1000Mbps modes in full or half duplex. .Pp The BCM5700, BCM5701, BCM5702, BCM5703, BCM5704 and BCM5717 also support jumbo frames, which can be configured via the interface MTU setting. Selecting an MTU larger than 1500 bytes with the .Xr ifconfig 8 utility configures the adapter to receive and transmit jumbo frames. Using jumbo frames can greatly improve performance for certain tasks, such as file transfers and data streaming. .Pp The .Nm driver supports the following media types: .Bl -tag -width ".Cm 10baseT/UTP" .It Cm autoselect Enable autoselection of the media type and options. The user can manually override the autoselected mode by adding media options to .Xr rc.conf 5 . .It Cm 10baseT/UTP Set 10Mbps operation. The .Xr ifconfig 8 .Ic mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 100baseTX Set 100Mbps (Fast Ethernet) operation. The .Xr ifconfig 8 .Ic mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 1000baseTX Set 1000baseTX operation over twisted pair. Only .Cm full-duplex mode is supported. .It Cm 1000baseSX Set 1000Mbps (Gigabit Ethernet) operation. Both .Cm full-duplex and .Cm half-duplex modes are supported. .El .Pp The .Nm driver supports the following media options: .Bl -tag -width ".Cm full-duplex" .It Cm full-duplex Force full duplex operation. .It Cm half-duplex Force half duplex operation. -.El -.Pp -The -.Nm -driver also supports one special link option for 1000baseTX cards: -.Bl -tag -width ".Cm link0" -.It Cm link0 -With 1000baseTX cards, establishing a link between two ports requires -that one port be configured as a master and the other a slave. -With autonegotiation, -the master/slave settings will be chosen automatically. -However when manually selecting the link state, it is necessary to -force one side of the link to be a master and the other a slave. -The -.Nm -driver configures the ports as slaves by default. -Setting the -.Cm link0 -flag with -.Xr ifconfig 8 -will set a port as a master instead. .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE The .Nm driver provides support for various NICs based on the Broadcom BCM570x family of Gigabit Ethernet controller chips, including the following: .Pp .Bl -bullet -compact .It 3Com 3c996-SX (1000baseSX) .It 3Com 3c996-T (10/100/1000baseTX) .It Dell PowerEdge 1750 integrated BCM5704C NIC (10/100/1000baseTX) .It Dell PowerEdge 2550 integrated BCM5700 NIC (10/100/1000baseTX) .It Dell PowerEdge 2650 integrated BCM5703 NIC (10/100/1000baseTX) .It Dell PowerEdge R200 integrated BCM5750 NIC (10/100/1000baseTX) .It Dell PowerEdge R300 integrated BCM5722 NIC (10/100/1000baseTX) .It IBM x235 server integrated BCM5703x NIC (10/100/1000baseTX) .It HP Compaq dc7600 integrated BCM5752 NIC (10/100/1000baseTX) .It HP ProLiant NC7760 embedded Gigabit NIC (10/100/1000baseTX) .It HP ProLiant NC7770 PCI-X Gigabit NIC (10/100/1000baseTX) .It HP ProLiant NC7771 PCI-X Gigabit NIC (10/100/1000baseTX) .It HP ProLiant NC7781 embedded PCI-X Gigabit NIC (10/100/1000baseTX) .It Netgear GA302T (10/100/1000baseTX) .It SysKonnect SK-9D21 (10/100/1000baseTX) .It SysKonnect SK-9D41 (1000baseSX) .El .Sh LOADER TUNABLES The following tunable can be set at the .Xr loader 8 prompt before booting the kernel, or stored in .Xr loader.conf 5 . .Bl -tag -width indent .It Va hw.bge.allow_asf Allow the ASF feature for cooperating with IPMI. Can cause system lockup problems on a small number of systems. Enabled by default. .El .Sh SYSCTL VARIABLES The following variables are available as both .Xr sysctl 8 variables and .Xr loader 8 tunables: .Bl -tag -width indent .It Va dev.bge.%d.forced_collapse Allow collapsing multiple transmit buffers into a single buffer to increase transmit performance with the cost of CPU cycles. The default value is 0 to disable transmit buffer collapsing. .It Va dev.bge.%d.forced_udpcsum Enable UDP transmit checksum offloading even if controller can generate UDP datagrams with checksum value 0. UDP datagrams with checksum value 0 can confuse receiver host as it means sender did not compute UDP checksum. The default value is 0 which disables UDP transmit checksum offloading. The interface need to be brought down and up again before a change takes effect. .El .Sh DIAGNOSTICS .Bl -diag .It "bge%d: couldn't map memory" A fatal initialization error has occurred. .It "bge%d: couldn't map ports" A fatal initialization error has occurred. .It "bge%d: couldn't map interrupt" A fatal initialization error has occurred. .It "bge%d: no memory for softc struct!" The driver failed to allocate memory for per-device instance information during initialization. .It "bge%d: failed to enable memory mapping!" The driver failed to initialize PCI shared memory mapping. This might happen if the card is not in a bus-master slot. .It "bge%d: no memory for jumbo buffers!" The driver failed to allocate memory for jumbo frames during initialization. .It "bge%d: watchdog timeout" The device has stopped responding to the network, or there is a problem with the network connection (cable). .El .Sh SEE ALSO .Xr altq 4 , .Xr arp 4 , .Xr miibus 4 , .Xr netintro 4 , .Xr ng_ether 4 , .Xr polling 4 , .Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The .Nm device driver first appeared in .Fx 4.5 . .Sh AUTHORS The .Nm driver was written by .An Bill Paul Aq wpaul@windriver.com . Index: projects/binutils-2.17/share/man/man4/msk.4 =================================================================== --- projects/binutils-2.17/share/man/man4/msk.4 (revision 215829) +++ projects/binutils-2.17/share/man/man4/msk.4 (revision 215830) @@ -1,275 +1,254 @@ .\" Copyright (c) 2006 Pyun YongHyeon .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd April 30, 2010 +.Dd November 23, 2010 .Dt MSK 4 .Os .Sh NAME .Nm msk .Nd Marvell/SysKonnect Yukon II Gigabit Ethernet adapter driver .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device miibus" .Cd "device msk" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_msk_load="YES" .Ed .Sh DESCRIPTION The .Nm device driver provides support for various NICs based on the Marvell/SysKonnect Yukon II Gigabit Ethernet controller chip. .Pp All NICs supported by the .Nm driver have TCP/UDP/IP checksum offload for transmit, TCP segmentation offload (TSO), hardware VLAN tag stripping/insertion features and an interrupt moderation mechanism as well as a 64-bit multicast hash filter. The Yukon II supports TBI (ten bit interface) and GMII transceivers, which means it can be used with either copper or 1000baseX fiber applications. .Pp The Yukon II also supports Jumbo Frames (up to 9022 bytes), which can be configured via the interface MTU setting. Selecting an MTU larger than 1500 bytes with the .Xr ifconfig 8 utility configures the adapter to receive and transmit Jumbo Frames. .Pp The .Nm driver supports the following media types: .Bl -tag -width ".Cm 10baseT/UTP" .It Cm autoselect Enable autoselection of the media type and options. The user can manually override the autoselected mode by adding media options to .Xr rc.conf 5 . .It Cm 10baseT/UTP Set 10Mbps operation. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 100baseTX Set 100Mbps (Fast Ethernet) operation. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 1000baseTX Set 1000baseTX operation over twisted pair. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 1000baseSX Set 1000Mbps (Gigabit Ethernet) operation. Both .Cm full-duplex and .Cm half-duplex modes are supported. .El .Pp The .Nm driver supports the following media options: .Bl -tag -width ".Cm full-duplex" .It Cm full-duplex Force full duplex operation. .It Cm half-duplex Force half duplex operation. -.El -.Pp -The -.Nm -driver also supports one special link option for 1000baseTX cards: -.Bl -tag -width ".Cm link0" -.It Cm link0 -With 1000baseTX cards, establishing a link between two ports requires -that one port is configured as master and the other one as slave. -With autonegotiation, -the master/slave settings will be chosen automatically. -However when manually selecting the link state, it is necessary to -force one side of the link to be a master and the other a slave. -The -.Nm -driver configures the ports as slaves by default. -Setting the -.Cm link0 -flag with -.Xr ifconfig 8 -will set a port as a master instead. .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE The .Nm driver provides support for various NICs based on the Marvell/SysKonnect Yukon II based Gigabit Ethernet controller chips, including: .Pp .Bl -bullet -compact .It D-Link 550SX Gigabit Ethernet .It D-Link 560SX Gigabit Ethernet .It D-Link 560T Gigabit Ethernet .It Marvell Yukon 88E8021CU Gigabit Ethernet .It Marvell Yukon 88E8021 SX/LX Gigabit Ethernet .It Marvell Yukon 88E8022CU Gigabit Ethernet .It Marvell Yukon 88E8022 SX/LX Gigabit Ethernet .It Marvell Yukon 88E8061CU Gigabit Ethernet .It Marvell Yukon 88E8061 SX/LX Gigabit Ethernet .It Marvell Yukon 88E8062CU Gigabit Ethernet .It Marvell Yukon 88E8062 SX/LX Gigabit Ethernet .It Marvell Yukon 88E8035 Fast Ethernet .It Marvell Yukon 88E8036 Fast Ethernet .It Marvell Yukon 88E8038 Fast Ethernet .It Marvell Yukon 88E8039 Fast Ethernet .It Marvell Yukon 88E8040 Fast Ethernet .It Marvell Yukon 88E8040T Fast Ethernet .It Marvell Yukon 88E8042 Fast Ethernet .It Marvell Yukon 88E8048 Fast Ethernet .It Marvell Yukon 88E8050 Gigabit Ethernet .It Marvell Yukon 88E8052 Gigabit Ethernet .It Marvell Yukon 88E8053 Gigabit Ethernet .It Marvell Yukon 88E8055 Gigabit Ethernet .It Marvell Yukon 88E8056 Gigabit Ethernet .It Marvell Yukon 88E8057 Gigabit Ethernet .It Marvell Yukon 88E8058 Gigabit Ethernet .It Marvell Yukon 88E8059 Gigabit Ethernet .It Marvell Yukon 88E8070 Gigabit Ethernet .It Marvell Yukon 88E8071 Gigabit Ethernet .It Marvell Yukon 88E8072 Gigabit Ethernet .It SysKonnect SK-9Sxx Gigabit Ethernet .It SysKonnect SK-9Exx Gigabit Ethernet .El .Sh LOADER TUNABLES Tunables can be set at the .Xr loader 8 prompt before booting the kernel or stored in .Xr loader.conf 5 . .Bl -tag -width indent .It Va hw.msk.msi_disable This tunable disables MSI support on the Ethernet hardware. The default value is 0. .El .Sh SYSCTL VARIABLES The following variables are available as both .Xr sysctl 8 variables and .Xr loader 8 tunables: .Bl -tag -width indent .It Va dev.mskc.%d.int_holdoff Maximum number of time to delay interrupts. The valid range is 0 to 34359738 for 125MHz clock in units of 1us, the default is 100 (100us). The interface need to be brought down and up again before a change takes effect. .It Va dev.mskc.%d.process_limit Maximum amount of Rx events to be processed in the event loop before rescheduling a taskqueue. The accepted range is 30 to 256, the default value is 128 events. The interface does not need to be brought down and up again before a change takes effect. .El .Sh SEE ALSO .Xr altq 4 , .Xr arp 4 , .Xr miibus 4 , .Xr netintro 4 , .Xr ng_ether 4 , .Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The .Nm driver was written by .An Pyun YongHyeon .Aq yongari@FreeBSD.org and it is based on .Xr sk 4 and Marvell's .Fx driver. It first appeared in .Fx 7.0 and .Fx 6.3 . Index: projects/binutils-2.17/share/man/man4/nge.4 =================================================================== --- projects/binutils-2.17/share/man/man4/nge.4 (revision 215829) +++ projects/binutils-2.17/share/man/man4/nge.4 (revision 215830) @@ -1,248 +1,227 @@ .\" Copyright (c) 2001 Wind River Systems .\" Copyright (c) 1997, 1998, 1999, 2000, 2001 .\" Bill Paul . All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by Bill Paul. .\" 4. Neither the name of the author nor the names of any co-contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd May 21, 2009 +.Dd November 23, 2010 .Dt NGE 4 .Os .Sh NAME .Nm nge .Nd "National Semiconductor PCI Gigabit Ethernet adapter driver" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device miibus" .Cd "device nge" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_nge_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for various NICs based on the National Semiconductor DP83820 and DP83821 Gigabit Ethernet controller chips. .Pp The DP83820 supports TBI (ten bit interface) and GMII transceivers, which means it can be used with either copper or 1000baseX fiber applications. The DP83820 supports TCP/IP checksum offload and VLAN tagging/insertion as well as a 2048-bit multicast hash filter and up to 4 pattern match buffers. .Pp Most cards also use the DP83861 10/100/1000 copper gigabit transceiver chip, which supports autonegotiation of 10, 100 and 1000Mbps modes in full or half duplex. .Pp The DP83820 and DP83821 also support jumbo frames, which can be configured via the interface MTU setting. Selecting an MTU larger than 1500 bytes with the .Xr ifconfig 8 utility configures the adapter to receive and transmit jumbo frames. Using jumbo frames can greatly improve performance for certain tasks, such as file transfers and data streaming. .Pp The .Nm driver supports the following media types: .Bl -tag -width 10baseTXUTP .It Cm autoselect Enable autoselection of the media type and options. The user can manually override the autoselected mode by adding media options to .Xr rc.conf 5 . .It Cm 10baseT/UTP Set 10Mbps operation. The .Xr ifconfig 8 .Ic mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 100baseTX Set 100Mbps (Fast Ethernet) operation. The .Xr ifconfig 8 .Ic mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 1000baseTX Set 1000baseTX operation over twisted pair. .Cm full-duplex and .Cm half-duplex modes are supported. .It Cm 1000baseSX Set 1000Mbps (Gigabit Ethernet) operation. Both .Cm full-duplex and .Cm half-duplex modes are supported. .El .Pp The .Nm driver supports the following media options: .Bl -tag -width full-duplex .It Cm full-duplex Force full duplex operation. .It Cm half-duplex Force half duplex operation. -.El -.Pp -The -.Nm -driver also supports one special link option for 1000baseTX cards: -.Bl -tag -width link0 -.It Cm link0 -With 1000baseTX cards, establishing a link between two ports requires -that one port be configured as a master and the other a slave. -With autonegotiation, -the master/slave settings will be chosen automatically. -However when manually selecting the link state, it is necessary to -force one side of the link to be a master and the other a slave. -The -.Nm -driver configures the ports as slaves by default. -Setting the -.Cm link0 -flag with -.Xr ifconfig 8 -will set a port as a master instead. .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE The .Nm driver supports National Semiconductor DP83820 and DP83821 based Gigabit Ethernet adapters including: .Pp .Bl -bullet -compact .It Addtron AEG320T .It Ark PC SOHO-GA2500T (32-bit PCI) and SOHO-GA2000T (64-bit PCI) .It Asante FriendlyNet GigaNIX 1000TA and 1000TPC .It D-Link DGE-500T .It Linksys EG1032, revision 1 .It Netgear GA621 .It Netgear GA622T .It SMC EZ Card 1000 (SMC9462TX) .It Surecom Technology EP-320G-TX .It Trendware TEG-PCITX (32-bit PCI) and TEG-PCITX2 (64-bit PCI) .El .Sh SYSCTL VARIABLES The following variables are available as both .Xr sysctl 8 variables and .Xr loader 8 tunables: .Bl -tag -width "xxxxxx" .It Va dev.nge.%d.int_holdoff Maximum amount of time to delay interrupt processing in units of 100us. The accepted range is 0 to 255, the default is 1(100us). Value 0 completely disables the interrupt moderation. The interface has to be brought down and up again before a change takes effect. .El .Sh DIAGNOSTICS .Bl -diag .It "nge%d: couldn't map memory" A fatal initialization error has occurred. .It "nge%d: couldn't map ports" A fatal initialization error has occurred. .It "nge%d: couldn't map interrupt" A fatal initialization error has occurred. .It "nge%d: no memory for softc struct!" The driver failed to allocate memory for per-device instance information during initialization. .It "nge%d: failed to enable memory mapping!" The driver failed to initialize PCI shared memory mapping. This might happen if the card is not in a bus-master slot. .It "nge%d: no memory for jumbo buffers!" The driver failed to allocate memory for jumbo frames during initialization. .It "nge%d: watchdog timeout" The device has stopped responding to the network, or there is a problem with the network connection (cable). .El .Sh SEE ALSO .Xr altq 4 , .Xr arp 4 , .Xr miibus 4 , .Xr netintro 4 , .Xr ng_ether 4 , .Xr polling 4 , .Xr vlan 4 , .Xr ifconfig 8 .Rs .%T National Semiconductor DP83820 datasheet .%U http://www.national.com .Re .Rs .%T National Semiconductor DP83861 datasheet .%U http://www.national.com .Re .Sh HISTORY The .Nm device driver first appeared in .Fx 4.4 . .Sh AUTHORS The .Nm driver was written by .An Bill Paul Aq wpaul@bsdi.com . Index: projects/binutils-2.17/share/man/man4/sk.4 =================================================================== --- projects/binutils-2.17/share/man/man4/sk.4 (revision 215829) +++ projects/binutils-2.17/share/man/man4/sk.4 (revision 215830) @@ -1,262 +1,241 @@ .\" Copyright (c) 1997, 1998, 1999 .\" Bill Paul . All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by Bill Paul. .\" 4. Neither the name of the author nor the names of any co-contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 23, 2009 +.Dd November 23, 2010 .Dt SK 4 .Os .Sh NAME .Nm sk .Nd "SysKonnect SK-984x and SK-982x PCI Gigabit Ethernet adapter driver" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device miibus" .Cd "device sk" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_sk_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for the SysKonnect SK-984x and SK-982x series PCI Gigabit Ethernet adapters. .Pp The SysKonnect adapters consist of two main components: the XaQti Corp. XMAC II gigabit MAC and the SysKonnect GEnesis controller ASIC. The XMAC provides the gigabit MAC and PHY support while the GEnesis provides an interface to the PCI bus, DMA support, packet buffering and arbitration. The GEnesis can control up to two XMACs simultaneously, allowing dual-port NIC configurations. .Pp The SK-982x 1000baseT adapters also include a Broadcom BCM5400 1000baseTX PHY which is used in place of the XMAC's internal PHY. The Broadcom PHY is connected to the XMAC via its GMII port. .Pp The .Nm driver configures dual port SysKonnect adapters such that each XMAC is treated as a separate logical network interface. Both ports can operate independently of each other and can be connected to separate networks. The SysKonnect driver software currently only uses the second port on dual port adapters for failover purposes: if the link on the primary port fails, the SysKonnect driver will automatically switch traffic onto the second port. .Pp Also supported is the Marvell Semiconductor 88E100* gigabit PHY. .Pp The XaQti XMAC II supports full and half duplex operation with autonegotiation. The XMAC also supports unlimited frame sizes. Support for jumbo frames is provided via the interface MTU setting. Selecting an MTU larger than 1500 bytes with the .Xr ifconfig 8 utility configures the adapter to receive and transmit jumbo frames. Using jumbo frames can greatly improve performance for certain tasks, such as file transfers and data streaming. .Pp The .Nm driver supports the following media types: .Bl -tag -width xxxxxxxxxxxxxxxxxxxx .It autoselect Enable autoselection of the media type and options. The user can manually override the autoselected mode by adding media options to the .Pa /etc/rc.conf file. .It 1000baseTX Set 1000baseTX operation over twisted pair. This is only available for SK-982x series adapters with 1000baseT ports. Both .Ar full-duplex and .Ar half-duplex modes are supported. .It 1000baseSX Set 1000Mbps (Gigabit Ethernet) operation. Both .Ar full-duplex and .Ar half-duplex modes are supported. .El .Pp The .Nm driver supports the following media options: .Bl -tag -width xxxxxxxxxxxxxxxxxxxx .It full-duplex Force full duplex operation .It half-duplex Force half duplex operation. -.El -.Pp -The -.Nm -driver also supports one special link option for 1000baseTX cards: -.Bl -tag -width xxxxxxxxxxxxxxxxxxxx -.It link0 -With 1000baseTX cards, establishing a link between two ports requires -that one port is configured as master and the other one as slave. -With autonegotiation, -the master/slave settings will be chosen automatically. -However when manually selecting the link state, it is necessary to -force one side of the link to be a master and the other a slave. -The -.Nm -driver configures the ports as slaves by default. -Setting the -.Ar link0 -flag with -.Xr ifconfig 8 -will set a port as a master instead. .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE Adapters supported by the .Nm driver include: .Pp .Bl -bullet -compact .It 3Com 3C940 single port, 1000baseT adapter .It 3Com 3C2000-T single port, 1000baseT adapter .It Belkin F5D5005 single port, 1000baseT adapter .It D-Link DGE-530T single port, 1000baseT adapter .It Linksys (revision 2) single port, 1000baseT adapter .It SK-9521 SK-NET GE-T single port, 1000baseT adapter .It SK-9821 SK-NET GE-T single port, 1000baseT adapter .It SK-9822 SK-NET GE-T dual port, 1000baseT adapter .It SK-9841 SK-NET GE-LX single port, single mode fiber adapter .It SK-9842 SK-NET GE-LX dual port, single mode fiber adapter .It SK-9843 SK-NET GE-SX single port, multimode fiber adapter .It SK-9844 SK-NET GE-SX dual port, multimode fiber adapter .It SMC 9452TX single port, 1000baseT adapter .El .Sh LOADER TUNABLES Tunables can be set at the .Xr loader 8 prompt before booting the kernel or stored in .Xr loader.conf 5 . .Bl -tag -width xxxxxx .It Va hw.skc.jumbo_disable Disable jumbo frame support. Systems with less memory can set it to a non-zero value to save memory. The default value is 0. .El .Sh SYSCTL VARIABLES The following variable is available as both .Xr sysctl 8 variable and .Xr loader 8 tunable: .Bl -tag -width xxxxxx .It Va dev.skc.%d.int_mod This variable controls interrupt moderation. The accepted range is 10 to 10000. The default value is 100 microseconds. The interface has to be brought down and up again before a change takes effect. .El .Sh DIAGNOSTICS .Bl -diag .It "sk%d: couldn't map memory" A fatal initialization error has occurred. .It "sk%d: couldn't map ports" A fatal initialization error has occurred. .It "sk%d: couldn't map interrupt" A fatal initialization error has occurred. .It "sk%d: no memory for softc struct!" The driver failed to allocate memory for per-device instance information during initialization. .It "sk%d: failed to enable memory mapping!" The driver failed to initialize PCI shared memory mapping. This might happen if the card is not in a bus-master slot. .It "sk%d: no memory for jumbo buffers!" The driver failed to allocate memory for jumbo frames during initialization. .It "sk%d: watchdog timeout" The device has stopped responding to the network, or there is a problem with the network connection (cable). .El .Sh SEE ALSO .Xr altq 4 , .Xr arp 4 , .Xr miibus 4 , .Xr netintro 4 , .Xr ng_ether 4 , .Xr vlan 4 , .Xr ifconfig 8 .Rs .%T XaQti XMAC II datasheet .%U http://www.xaqti.com .Re .Rs .%T SysKonnect GEnesis programming manual .%U http://www.syskonnect.com .Re .Sh HISTORY The .Nm device driver first appeared in .Fx 3.0 . .Sh AUTHORS The .Nm driver was written by .An Bill Paul Aq wpaul@ctr.columbia.edu . Index: projects/binutils-2.17/share/man/man4/stge.4 =================================================================== --- projects/binutils-2.17/share/man/man4/stge.4 (revision 215829) +++ projects/binutils-2.17/share/man/man4/stge.4 (revision 215830) @@ -1,223 +1,202 @@ .\" $NetBSD: stge.4,v 1.7 2003/02/14 15:20:20 grant Exp $ .\" .\" Copyright (c) 2001 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" This code is derived from software contributed to The NetBSD Foundation .\" by Jason R. Thorpe. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd July 25, 2006 +.Dd November 23, 2010 .Dt STGE 4 .Os .Sh NAME .Nm stge .Nd Sundance/Tamarack TC9021 Gigabit Ethernet adapter driver .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device miibus" .Cd "device stge" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_stge_load="YES" .Ed .Sh DESCRIPTION The .Nm device driver provides support for various NICs based on the Sundance/Tamarack TC9021 Gigabit Ethernet controller chip. .Pp The Sundance/Tamarack TC9021 is found on the D-Link DGE-550T and the Antares Microsystems Gigabit Ethernet board. It uses an external PHY or an external 10-bit interface. .Pp All NICs supported by the .Nm driver have TCP/UDP/IP checksum offload for both receive and transmit, hardware VLAN tag stripping/insertion features, and receive interrupt moderation mechanism as well as a 64-bit multicast hash filter. The Sundance/Tamarack TC9021 supports TBI (ten bit interface) and GMII transceivers, which means it can be used with either copper or 1000baseX fiber applications. .Pp The Sundance/Tamarack TC9021 also supports jumbo frames, which can be configured via the interface MTU setting. Selecting an MTU larger than 1500 bytes with the .Xr ifconfig 8 utility configures the adapter to receive and transmit jumbo frames. .Pp The .Nm driver supports the following media types: .Bl -tag -width ".Cm 10baseT/UTP" .It Cm autoselect Enable autoselection of the media type and options. The user can manually override the autoselected mode by adding media options to .Xr rc.conf 5 . .It Cm 10baseT/UTP Set 10Mbps operation. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 100baseTX Set 100Mbps (Fast Ethernet) operation. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 1000baseTX Set 1000baseTX operation over twisted pair. The Sundance/Tamarack supports 1000Mbps in .Cm autoselect mode only. .\" .It Cm 1000baseSX .\" Set 1000Mbps (Gigabit Ethernet) operation. .\" Both .\" .Cm full-duplex .\" and .\" .Cm half-duplex .\" modes are supported. .El .Pp The .Nm driver supports the following media options: .Bl -tag -width ".Cm full-duplex" .It Cm full-duplex Force full duplex operation. .It Cm half-duplex Force half duplex operation. -.El -.Pp -The -.Nm -driver also supports one special link option for 1000baseTX cards: -.Bl -tag -width ".Cm link0" -.It Cm link0 -With 1000baseTX cards, establishing a link between two ports requires -that one port is configured as master and the other one as slave. -With autonegotiation, -the master/slave settings will be chosen automatically. -However when manually selecting the link state, it is necessary to -force one side of the link to be a master and the other a slave. -The -.Nm -driver configures the ports as slaves by default. -Setting the -.Cm link0 -flag with -.Xr ifconfig 8 -will set a port as a master instead. .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE The .Nm driver provides support for various NICs based on the Sundance/Tamarack TC9021 based Gigabit Ethernet controller chips, including: .Pp .Bl -bullet -compact .It Antares Microsystems Gigabit Ethernet .It ASUS NX1101 Gigabit Ethernet .It D-Link DL-4000 Gigabit Ethernet .It IC Plus IP1000A Gigabit Ethernet .It Sundance ST-2021 Gigabit Ethernet .It Sundance ST-2023 Gigabit Ethernet .It Sundance TC9021 Gigabit Ethernet .It Tamarack TC9021 Gigabit Ethernet .El .Sh SYSCTL VARIABLES The following variables are available as both .Xr sysctl 8 variables and .Xr loader 8 tunables: .Bl -tag -width indent .It Va dev.stge.%d.rxint_nframe Number of frames between RxDMAComplete interrupts. The accepted range is 1 to 255, default value is 8 frames. The interface has to be brought down and up again before a change takes effect. .It Va dev.stge.%d.rxint_dmawait Maximum amount of time to wait in 1us increments before issuing an Rx interrupt if the number of frames received is less than .Va rxint_nframe . The accepted range is 0 to 4194, default value is 30 microseconds. The interface has to be brought down and up again before a change takes effect. .El .Sh SEE ALSO .Xr altq 4 , .Xr arp 4 , .Xr miibus 4 , .Xr netintro 4 , .Xr ng_ether 4 , .Xr polling 4 , .Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The .Nm driver was ported from .Nx and first appeared in .Fx 6.2 . The .Nx version was written by .An Jason R. Thorpe .Aq thorpej@NetBSD.org . .Sh AUTHORS The .Nm driver was ported by .An Pyun YongHyeon .Aq yongari@FreeBSD.org . Index: projects/binutils-2.17/share/man/man4/vge.4 =================================================================== --- projects/binutils-2.17/share/man/man4/vge.4 (revision 215829) +++ projects/binutils-2.17/share/man/man4/vge.4 (revision 215830) @@ -1,243 +1,222 @@ .\" Copyright (c) 2004 .\" Bill Paul . All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by Bill Paul. .\" 4. Neither the name of the author nor the names of any co-contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF .\" THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd December 18, 2009 +.Dd November 23, 2010 .Dt VGE 4 .Os .Sh NAME .Nm vge .Nd "VIA Networking Technologies Velocity Gigabit Ethernet adapter driver" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device miibus" .Cd "device vge" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_vge_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for various NICs and embedded Ethernet interfaces based on the VIA Technologies VT6120, VT6122, VT6130 and VT6132 Velocity Family Gigabit Ethernet controller chips. .Pp The VT6120/VT6122 is a 33/66MHz 64-bit PCI device which combines a tri-speed MAC with an integrated 10/100/1000 copper PHY. (Some older cards use an external PHY.) The VT6130/VT6132 is the PCI express version of Velocity family. The MAC supports TCP/IP hardware checksums (IPv4 only), TCP large send, VLAN tag insertion and stripping, as well as VLAN filtering, a 64-entry CAM filter and a 64-entry VLAN filter, 64-bit multicast hash filter, 4 separate transmit DMA queues, flow control and jumbo frames up to 16K in size. The Velocity family controllers have a 16K receive FIFO and 48K transmit FIFO. .Pp The .Nm driver takes advantage of the controller's checksum offload and VLAN tagging features, as well as the jumbo frame and CAM filter support. The CAM filter is used for multicast address filtering to provide 64 perfect multicast address filter support. If it is necessary for the interface to join more than 64 multicast groups, the driver will switch over to using the hash filter. .Pp The jumbo frame support can be enabled by setting the interface MTU to any value larger than the default of 1500 bytes, up to a maximum of 9000 bytes. The receive and transmit checksum offload support can be toggled on and off using the .Xr ifconfig 8 utility. .Pp The .Nm driver supports the following media types: .Bl -tag -width ".Cm 10baseT/UTP" .It Cm autoselect Enable autoselection of the media type and options. The user can manually override the autoselected mode by adding media options to .Xr rc.conf 5 . .It Cm 10baseT/UTP Set 10Mbps operation. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 100baseTX Set 100Mbps (Fast Ethernet) operation. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .It Cm 1000baseTX Set 1000baseTX operation over twisted pair. The .Xr ifconfig 8 .Cm mediaopt option can also be used to select either .Cm full-duplex or .Cm half-duplex modes. .El .Pp The .Nm driver supports the following media options: .Bl -tag -width ".Cm full-duplex" .It Cm full-duplex Force full duplex operation. .It Cm half-duplex Force half duplex operation. -.El -.Pp -The -.Nm -driver also supports one special link option for 1000baseTX cards: -.Bl -tag -width ".Cm link0" -.It Cm link0 -With 1000baseTX cards, establishing a link between two ports requires -that one port be configured as a master and the other a slave. -With autonegotiation, -the master/slave settings will be chosen automatically. -However when manually selecting the link state, it is necessary to -force one side of the link to be a master and the other a slave. -The -.Nm -driver configures the ports as slaves by default. -Setting the -.Cm link0 -flag with -.Xr ifconfig 8 -will set a port as a master instead. .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE The .Nm driver supports VIA Networking VT6120, VT6122, VT6130 and VT6132 based Gigabit Ethernet adapters including: .Pp .Bl -bullet -compact .It VIA Networking LAN-on-motherboard Gigabit Ethernet .It ZyXEL GN650-T 64-bit PCI Gigabit Ethernet NIC (ZX1701) .It ZyXEL GN670-T 32-bit PCI Gigabit Ethernet NIC (ZX1702) .El .Sh LOADER TUNABLES Tunables can be set at the .Xr loader 8 prompt before booting the kernel or stored in .Xr loader.conf 5 . .Bl -tag -width "xxxxxx" .It Va hw.vge.msi_disable This tunable disables MSI support on the Ethernet hardware. The default value is 0. .El .Sh SYSCTL VARIABLES The following variables are available as both .Xr sysctl 8 variables and .Xr loader 8 tunables: .Bl -tag -width "xxxxxx" .It Va dev.vge.%d.int_holdoff Maximum number of time to delay interrupts. The valid range is 0 to 5100 in units of 1us, the default is 150 (150us). The resolution of of timer is about 20us so finer tuning than 20us wouldn't be available. The interface should be brought down and up again before a change takes effect. .It Va dev.vge.%d.rx_coal_pkt Maximum number of packets to fire Rx completion interrupt. The valid range is 1 to 255, the default is 64. .It Va dev.vge.%d.tx_coal_pkt Maximum number of packets to fire Tx completion interrupt. The valid range is 1 to 255, the default is 128. .El .Sh DIAGNOSTICS .Bl -diag .It "vge%d: couldn't map memory" A fatal initialization error has occurred. .It "vge%d: couldn't map ports" A fatal initialization error has occurred. .It "vge%d: couldn't map interrupt" A fatal initialization error has occurred. .It "vge%d: failed to enable memory mapping!" The driver failed to initialize PCI shared memory mapping. This might happen if the card is not in a bus-master slot. .It "vge%d: watchdog timeout" The device has stopped responding to the network, or there is a problem with the network connection (cable). .El .Sh SEE ALSO .Xr altq 4 , .Xr arp 4 , .Xr miibus 4 , .Xr netintro 4 , .Xr ng_ether 4 , .Xr polling 4 , .Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The .Nm device driver first appeared in .Fx 5.3 . .Sh AUTHORS The .Nm driver was written by .An Bill Paul Aq wpaul@windriver.com . Index: projects/binutils-2.17/share/man/man5/rc.conf.5 =================================================================== --- projects/binutils-2.17/share/man/man5/rc.conf.5 (revision 215829) +++ projects/binutils-2.17/share/man/man5/rc.conf.5 (revision 215830) @@ -1,4331 +1,4342 @@ .\" Copyright (c) 1995 .\" Jordan K. Hubbard .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd November 13, 2010 +.Dd November 24, 2010 .Dt RC.CONF 5 .Os .Sh NAME .Nm rc.conf .Nd system configuration information .Sh DESCRIPTION The file .Nm contains descriptive information about the local host name, configuration details for any potential network interfaces and which services should be started up at system initial boot time. In new installations, the .Nm file is generally initialized by the system installation utility, .Xr sysinstall 8 . .Pp The purpose of .Nm is not to run commands or perform system startup actions directly. Instead, it is included by the various generic startup scripts in .Pa /etc which conditionalize their internal actions according to the settings found there. .Pp The .Pa /etc/rc.conf file is included from the file .Pa /etc/defaults/rc.conf , which specifies the default settings for all the available options. Options need only be specified in .Pa /etc/rc.conf when the system administrator wishes to override these defaults. The file .Pa /etc/rc.conf.local is used to override settings in .Pa /etc/rc.conf for historical reasons. See the .Va rc_conf_files variable below. .Pp Options are set with .Dq Ar name Ns Li = Ns Ar value assignments that use .Xr sh 1 syntax. The following list provides a name and short description for each variable that can be set in the .Nm file: .Bl -tag -width indent-two .It Va rc_debug .Pq Vt bool If set to .Dq Li YES , enable output of debug messages from rc scripts. This variable can be helpful in diagnosing mistakes when editing or integrating new scripts. Beware that this produces copious output to the terminal and .Xr syslog 3 . .It Va rc_info .Pq Vt bool If set to .Dq Li NO , disable informational messages from the rc scripts. Informational messages are displayed when a condition that is not serious enough to warrant a warning or an error occurs. .It Va rc_startmsgs .Pq Vt bool If set to .Dq Li YES , show .Dq Starting foo: when faststart is used (e.g., at boot time). .It Va early_late_divider .Pq Vt str The name of the script that should be used as the delimiter between the .Dq early and .Dq late stages of the boot process. The early stage should contain all the services needed to get the disks (local or remote) mounted so that the late stage can include scripts contained in the directories listed in the .Va local_startup variable (see below). Thus, the two likely candidates for this value are .Pa mountcritlocal for the typical system, and .Pa mountcritremote if the system needs remote file systems mounted to get access to the .Va local_startup directories; for example when .Pa /usr/local is NFS mounted. For .Pa rc.conf within a .Xr jail 8 .Pa NETWORKING is likely to be an appropriate value. Extreme care should be taken when changing this value, and before changing it one should ensure that there are adequate provisions to recover from a failed boot (such as physical contact with the machine, or reliable remote console access). .It Va swapfile .Pq Vt str If set to .Dq Li NO , no swapfile is installed, otherwise the value is used as the full pathname to a file to use for additional swap space. .It Va apm_enable .Pq Vt bool If set to .Dq Li YES , enable support for Automatic Power Management with the .Xr apm 8 command. .It Va apmd_enable .Pq Vt bool Run .Xr apmd 8 to handle APM event from userland. This also enables support for APM. .It Va apmd_flags .Pq Vt str If .Va apmd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr apmd 8 daemon. .It Va devd_enable .Pq Vt bool Run .Xr devd 8 to handle device added, removed or unknown events from the kernel. .It Va ddb_enable .Pq Vt bool Run .Xr ddb 8 to install .Xr ddb 4 scripts at boot time. .It Va ddb_config .Pq Vt str Configuration file for .Xr ddb 8 . Default .Pa /etc/ddb.conf . .It Va kldxref_enable .Pq Vt bool Set to .Dq Li NO by default. Set to .Dq Li YES to automatically rebuild .Pa linker.hints files with .Xr kldxref 8 at boot time. .It Va kldxref_clobber .Pq Vt bool Set to .Dq Li NO by default. If .Va kldxref_enable is true, setting to .Dq Li YES will overwrite existing .Pa linker.hints files at boot time. Otherwise, only missing .Pa linker.hints files are generated. .It Va kldxref_module_path .Pq Vt str Empty by default. A semi-colon .Pq Ql \&; delimited list of paths containing .Xr kld 4 modules. If empty, the contents of the .Va kern.module_path .Xr sysctl 8 are used. .It Va powerd_enable .Pq Vt bool If set to .Dq Li YES , enable the system power control facility with the .Xr powerd 8 daemon. .It Va powerd_flags .Pq Vt str If .Va powerd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr powerd 8 daemon. .It Va tmpmfs Controls the creation of a .Pa /tmp memory file system. Always happens if set to .Dq Li YES and never happens if set to .Dq Li NO . If set to anything else, a memory file system is created if .Pa /tmp is not writable. .It Va tmpsize Controls the size of a created .Pa /tmp memory file system. .It Va tmpmfs_flags Extra options passed to the .Xr mdmfs 8 utility when the memory file system for .Pa /tmp is created. The default is .Dq Li "-S" , which inhibits the use of softupdates on .Pa /tmp so that file system space is freed without delay after file truncation or deletion. See .Xr mdmfs 8 for other options you can use in .Va tmpmfs_flags . .It Va varmfs Controls the creation of a .Pa /var memory file system. Always happens if set to .Dq Li YES and never happens if set to .Dq Li NO . If set to anything else, a memory file system is created if .Pa /var is not writable. .It Va varsize Controls the size of a created .Pa /var memory file system. .It Va varmfs_flags Extra options passed to the .Xr mdmfs 8 utility when the memory file system for .Pa /var is created. The default is .Dq Li "-S" , which inhibits the use of softupdates on .Pa /var so that file system space is freed without delay after file truncation or deletion. See .Xr mdmfs 8 for other options you can use in .Va varmfs_flags . .It Va populate_var Controls the automatic population of the .Pa /var file system. Always happens if set to .Dq Li YES and never happens if set to .Dq Li NO . If set to anything else, a memory file system is created if .Pa /var is not writable. Note that this process requires access to certain commands in .Pa /usr before .Pa /usr is mounted on normal systems. .It Va cleanvar_enable .Pq Vt bool Clean the .Pa /var directory. .It Va local_startup .Pq Vt str List of directories to search for startup script files. .It Va script_name_sep .Pq Vt str The field separator to use for breaking down the list of startup script files into individual filenames. The default is a space. It is not necessary to change this unless there are startup scripts with names containing spaces. .It Va hostapd_enable .Pq Vt bool Set to .Dq Li YES to start .Xr hostapd 8 at system boot time. .It Va hostname .Pq Vt str The fully qualified domain name (FQDN) of this host on the network. This should almost certainly be set to something meaningful, even if there is no network connection. If .Xr dhclient 8 is used to set the hostname via DHCP, this variable should be set to an empty string. If this value remains unset when the system is done booting your console login will display the default hostname of .Dq Amnesiac. .It Va nisdomainname .Pq Vt str The NIS domain name of this host, or .Dq Li NO if NIS is not used. .It Va dhclient_program .Pq Vt str Path to the DHCP client program .Pa ( /sbin/dhclient , the .Ox DHCP client, is the default). .It Va dhclient_flags .Pq Vt str Additional flags to pass to the DHCP client program. For the .Ox DHCP client, see the .Xr dhclient 8 manpage for a description of the command line options available. .It Va dhclient_flags_ Ns Aq Ar iface Additional flags to pass to the DHCP client program running on .Ar iface only. When specified, this variable overrides .Va dhclient_flags . .It Va background_dhclient .Pq Vt bool Set to .Dq Li YES to start the DHCP client in background. This can cause trouble with applications depending on a working network, but it will provide a faster startup in many cases. .It Va background_dhclient_ Ns Aq Ar iface When specified, this variable overrides the .Va background_dhclient variable for interface .Ar iface only. .It Va synchronous_dhclient .Pq Vt bool Set to .Dq Li YES to start .Xr dhclient 8 synchronously at startup. This behavior can be overridden on a per-interface basis by replacing the .Dq Li DHCP keyword in the .Va ifconfig_ Ns Aq Ar interface variable with .Dq Li SYNCDHCP or .Dq Li NOSYNCDHCP . .It Va defaultroute_delay .Pq Vt int When set to a positive value, wait up to this long after configuring DHCP interfaces at startup to give the interfaces time to receive a lease. .It Va firewall_enable .Pq Vt bool Set to .Dq Li YES to load firewall rules at startup. If the kernel was not built with .Cd "options IPFIREWALL" , the .Pa ipfw.ko kernel module will be loaded. See also .Va ipfilter_enable . .It Va firewall_script .Pq Vt str This variable specifies the full path to the firewall script to run. The default is .Pa /etc/rc.firewall . .It Va firewall_type .Pq Vt str Names the firewall type from the selection in .Pa /etc/rc.firewall , or the file which contains the local firewall ruleset. Valid selections from .Pa /etc/rc.firewall are: .Pp .Bl -tag -width ".Li simple" -compact .It Li open unrestricted IP access .It Li closed all IP services disabled, except via .Dq Li lo0 .It Li client basic protection for a workstation .It Li simple basic protection for a LAN. .El .Pp If a filename is specified, the full path must be given. .It Va firewall_quiet .Pq Vt bool Set to .Dq Li YES to disable the display of firewall rules on the console during boot. .It Va firewall_logging .Pq Vt bool Set to .Dq Li YES to enable firewall event logging. This is equivalent to the .Dv IPFIREWALL_VERBOSE kernel option. .It Va firewall_flags .Pq Vt str Flags passed to .Xr ipfw 8 if .Va firewall_type specifies a filename. .It Va firewall_coscripts .Pq Vt str List of executables and/or rc scripts to run after firewall starts/stops. Default is empty. .\" ----- firewall_nat_enable setting -------------------------------- .It Va firewall_nat_enable .Pq Vt bool The .Xr ipfw 8 equivalent of .Va natd_enable . Setting this to .Dq Li YES enables kernel NAT. .Va firewall_enable must also be set to .Dq Li YES . .It Va firewall_nat_interface .Pq Vt str The .Xr ipfw 8 equivalent of .Va natd_interface . This is the name of the public interface or IP address on which kernel NAT should run. .It Va firewall_nat_flags .Pq Vt str Additional configuration parameters for kernel NAT should be placed here. .It Va dummynet_enable .Pq Vt bool Setting this to .Dq Li YES will automatically load the .Xr dummynet 4 module if .Va firewall_enable is also set to .Dq Li YES . .\" ------------------------------------------------------------------- .It Va natd_program .Pq Vt str Path to .Xr natd 8 . .It Va natd_enable .Pq Vt bool Set to .Dq Li YES to enable .Xr natd 8 . .Va firewall_enable must also be set to .Dq Li YES , and .Xr divert 4 sockets must be enabled in the kernel. If the kernel was not built with .Cd "options IPDIVERT" , the .Pa ipdivert.ko kernel module will be loaded. .It Va natd_interface .Pq Vt str This is the name of the public interface on which .Xr natd 8 should run. The interface may be given as an interface name or as an IP address. .It Va natd_flags .Pq Vt str Additional .Xr natd 8 flags should be placed here. The .Fl n or .Fl a flag is automatically added with the above .Va natd_interface as an argument. .\" ----- ipfilter_enable setting -------------------------------- .It Va ipfilter_enable .Pq Vt bool Set to .Dq Li NO by default. Setting this to .Dq Li YES enables .Xr ipf 8 packet filtering. .Pp Typical usage will require putting .Bd -literal ipfilter_enable="YES" ipnat_enable="YES" ipmon_enable="YES" ipfs_enable="YES" .Ed .Pp into .Pa /etc/rc.conf and editing .Pa /etc/ipf.rules and .Pa /etc/ipnat.rules appropriately. .Pp Note that .Va ipfilter_enable and .Va ipnat_enable can be enabled independently. .Va ipmon_enable and .Va ipfs_enable both require at least one of .Va ipfilter_enable and .Va ipnat_enable to be enabled. .Pp Having .Bd -literal options IPFILTER options IPFILTER_LOG options IPFILTER_DEFAULT_BLOCK .Ed .Pp in the kernel configuration file is a good idea, too. .\" ----- ipfilter_program setting ------------------------------ .It Va ipfilter_program .Pq Vt str Path to .Xr ipf 8 (default .Pa /sbin/ipf ) . .\" ----- ipfilter_rules setting -------------------------------- .It Va ipfilter_rules .Pq Vt str Set to .Pa /etc/ipf.rules by default. This variable contains the name of the filter rule definition file. The file is expected to be readable for the .Xr ipf 8 command to execute. .\" ----- ipv6_ipfilter_rules setting --------------------------- .It Va ipv6_ipfilter_rules .Pq Vt str Set to .Pa /etc/ipf6.rules by default. This variable contains the IPv6 filter rule definition file. The file is expected to be readable for the .Xr ipf 8 command to execute. .\" ----- ipfilter_flags setting -------------------------------- .It Va ipfilter_flags .Pq Vt str Empty by default. This variable contains flags passed to the .Xr ipf 8 program. .\" ----- ipnat_enable setting ---------------------------------- .It Va ipnat_enable .Pq Vt bool Set to .Dq Li NO by default. Set it to .Dq Li YES to enable .Xr ipnat 8 network address translation. See .Va ipfilter_enable for a detailed discussion. .\" ----- ipnat_program setting --------------------------------- .It Va ipnat_program .Pq Vt str Path to .Xr ipnat 8 (default .Pa /sbin/ipnat ) . .\" ----- ipnat_rules setting ----------------------------------- .It Va ipnat_rules .Pq Vt str Set to .Pa /etc/ipnat.rules by default. This variable contains the name of the file holding the network address translation definition. This file is expected to be readable for the .Xr ipnat 8 command to execute. .\" ----- ipnat_flags setting ----------------------------------- .It Va ipnat_flags .Pq Vt str Empty by default. This variable contains flags passed to the .Xr ipnat 8 program. .\" ----- ipmon_enable setting ---------------------------------- .It Va ipmon_enable .Pq Vt bool Set to .Dq Li NO by default. Set it to .Dq Li YES to enable .Xr ipmon 8 monitoring (logging .Xr ipf 8 and .Xr ipnat 8 events). Setting this variable needs setting .Va ipfilter_enable or .Va ipnat_enable too. See .Va ipfilter_enable for a detailed discussion. .\" ----- ipmon_program setting --------------------------------- .It Va ipmon_program .Pq Vt str Path to .Xr ipmon 8 (default .Pa /sbin/ipmon ) . .\" ----- ipmon_flags setting ----------------------------------- .It Va ipmon_flags .Pq Vt str Set to .Dq Li -Ds by default. This variable contains flags passed to the .Xr ipmon 8 program. Another typical example would be .Dq Fl D Pa /var/log/ipflog to have .Xr ipmon 8 log directly to a file bypassing .Xr syslogd 8 . Make sure to adjust .Pa /etc/newsyslog.conf in such case like this: .Bd -literal /var/log/ipflog 640 10 100 * Z /var/run/ipmon.pid .Ed .\" ----- ipfs_enable setting ----------------------------------- .It Va ipfs_enable .Pq Vt bool Set to .Dq Li NO by default. Set it to .Dq Li YES to enable .Xr ipfs 8 saving the filter and NAT state tables during shutdown and reloading them during startup again. Setting this variable needs setting .Va ipfilter_enable or .Va ipnat_enable to .Dq Li YES too. See .Va ipfilter_enable for a detailed discussion. Note that if .Va kern_securelevel is set to 3, .Va ipfs_enable cannot be used because the raised securelevel will prevent .Xr ipfs 8 from saving the state tables at shutdown time. .\" ----- ipfs_program setting ---------------------------------- .It Va ipfs_program .Pq Vt str Path to .Xr ipfs 8 (default .Pa /sbin/ipfs ) . .\" ----- ipfs_flags setting ------------------------------------ .It Va ipfs_flags .Pq Vt str Empty by default. This variable contains flags passed to the .Xr ipfs 8 program. .\" ----- end of added ipf hook --------------------------------- .It Va pf_enable .Pq Vt bool Set to .Dq Li NO by default. Setting this to .Dq Li YES enables .Xr pf 4 packet filtering. .Pp Typical usage will require putting .Pp .Dl pf_enable="YES" .Pp into .Pa /etc/rc.conf and editing .Pa /etc/pf.conf appropriately. Adding .Pp .Dl "device pf" .Pp builds support for .Xr pf 4 into the kernel, otherwise the kernel module will be loaded. .It Va pf_rules .Pq Vt str Path to .Xr pf 4 ruleset configuration file (default .Pa /etc/pf.conf ) . .It Va pf_program .Pq Vt str Path to .Xr pfctl 8 (default .Pa /sbin/pfctl ) . .It Va pf_flags .Pq Vt str If .Va pf_enable is set to .Dq Li YES , these flags are passed to the .Xr pfctl 8 program when loading the ruleset. .It Va pflog_enable .Pq Vt bool Set to .Dq Li NO by default. Setting this to .Dq Li YES enables .Xr pflogd 8 which logs packets from the .Xr pf 4 packet filter. .It Va pflog_logfile .Pq Vt str If .Va pflog_enable is set to .Dq Li YES this controls where .Xr pflogd 8 stores the logfile (default .Pa /var/log/pflog ) . Check .Pa /etc/newsyslog.conf to adjust logfile rotation for this. .It Va pflog_program .Pq Vt str Path to .Xr pflogd 8 (default .Pa /sbin/pflogd ) . .It Va pflog_flags .Pq Vt str Empty by default. This variable contains additional flags passed to the .Xr pflogd 8 program. .It Va ftpproxy_enable .Pq Vt bool Set to .Dq Li NO by default. Setting this to .Dq Li YES enables .Xr ftp-proxy 8 which supports the .Xr pf 4 packet filter in translating ftp connections. .It Va ftpproxy_flags .Pq Vt str Empty by default. This variable contains additional flags passed to the .Xr ftp-proxy 8 program. .It Va pfsync_enable .Pq Vt bool Set to .Dq Li NO by default. Setting this to .Dq Li YES enables exposing .Xr pf 4 state changes to other hosts over the network by means of .Xr pfsync 4 . The .Va pfsync_syncdev variable must also be set then. .It Va pfsync_syncdev .Pq Vt str Empty by default. This variable specifies the name of the network interface .Xr pfsync 4 should operate through. It must be set accordingly if .Va pfsync_enable is set to .Dq Li YES . .It Va pfsync_syncpeer .Pq Vt str Empty by default. This variable is optional. By default, state change messages are sent out on the synchronisation interface using IP multicast packets. The protocol is IP protocol 240, PFSYNC, and the multicast group used is 224.0.0.240. When a peer address is specified using the .Va pfsync_syncpeer option, the peer address is used as a destination for the pfsync traffic, and the traffic can then be protected using .Xr ipsec 4 . See the .Xr pfsync 4 manpage for more details about using .Xr ipsec 4 with .Xr pfsync 4 interfaces. .It Va pfsync_ifconfig .Pq Vt str Empty by default. This variable can contain additional options to be passed to the .Xr ifconfig 8 command used to set up .Xr pfsync 4 . .It Va tcp_extensions .Pq Vt bool Set to .Dq Li YES by default. Setting this to .Dq Li NO disables certain TCP options as described by .Rs .%T "RFC 1323" .Re Setting this to .Dq Li NO might help remedy such problems with connections as randomly hanging or other weird behavior. Some network devices are known to be broken with respect to these options. .It Va log_in_vain .Pq Vt int Set to 0 by default. The .Xr sysctl 8 variables, .Va net.inet.tcp.log_in_vain and .Va net.inet.udp.log_in_vain , as described in .Xr tcp 4 and .Xr udp 4 , are set to the given value. .It Va tcp_keepalive .Pq Vt bool Set to .Dq Li YES by default. Setting to .Dq Li NO will disable probing idle TCP connections to verify that the peer is still up and reachable. .It Va tcp_drop_synfin .Pq Vt bool Set to .Dq Li NO by default. Setting to .Dq Li YES will cause the kernel to ignore TCP frames that have both the SYN and FIN flags set. This prevents OS fingerprinting, but may break some legitimate applications. .It Va icmp_drop_redirect .Pq Vt bool Set to .Dq Li NO by default. Setting to .Dq Li YES will cause the kernel to ignore ICMP REDIRECT packets. Refer to .Xr icmp 4 for more information. .It Va icmp_log_redirect .Pq Vt bool Set to .Dq Li NO by default. Setting to .Dq Li YES will cause the kernel to log ICMP REDIRECT packets. Note that the log messages are not rate-limited, so this option should only be used for troubleshooting networks. Refer to .Xr icmp 4 for more information. .It Va icmp_bmcastecho .Pq Vt bool Set to .Dq Li YES to respond to broadcast or multicast ICMP ping packets. Refer to .Xr icmp 4 for more information. .It Va ip_portrange_first .Pq Vt int If not set to .Dq Li NO , this is the first port in the default portrange. Refer to .Xr ip 4 for more information. .It Va ip_portrange_last .Pq Vt int If not set to .Dq Li NO , this is the last port in the default portrange. Refer to .Xr ip 4 for more information. .It Va network_interfaces .Pq Vt str Set to the list of network interfaces to configure on this host or .Dq Li AUTO (the default) for all current interfaces. Setting the .Va network_interfaces variable to anything other than the default is deprecated. Interfaces that the administrator wishes to store configuration for, but not start at boot should be configured with the .Dq Li NOAUTO keyword in their .Va ifconfig_ Ns Aq Ar interface variables as described below. .Pp An .Va ifconfig_ Ns Aq Ar interface variable is also assumed to exist for each value of .Ar interface . When an interface name contains any of the characters .Dq Li .-/+ they are translated to .Dq Li _ before lookup. The variable can contain arguments to .Xr ifconfig 8 , as well as special case-insensitive keywords described below. Such keywords are removed before passing the value to .Xr ifconfig 8 while the order of the other arguments is preserved. .Pp One can configure more than one IPv4 address with the .Va ipv4_addrs_ Ns Aq Ar interface variable. One or more IP addresses must be provided in Classless Inter-Domain Routing (CIDR) address notation, whose last byte can be a range like 192.0.2.5-23/24. In this case the address 192.0.2.5 will be configured with the netmask /24 and the addresses 192.0.2.6 to 192.0.2.23 with the non-conflicting netmask /32 as explained in the .Xr ifconfig 8 alias section. With the interface in question being .Li ed0 , an example could look like: .Bd -literal ipv4_addrs_ed0="192.0.2.129/27 192.0.2.1-5/28" .Ed .Pp It is also possible to add IP alias entries using .Xr ifconfig 8 syntax. Assuming that the interface in question was .Li ed0 , it might look something like this: .Bd -literal ifconfig_ed0_alias0="inet 127.0.0.253 netmask 0xffffffff" ifconfig_ed0_alias1="inet 127.0.0.254 netmask 0xffffffff" .Ed .Pp And so on. For each .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n entry that is found, its contents are passed to .Xr ifconfig 8 . Execution stops at the first unsuccessful access, so if something like this is present: .Bd -literal ifconfig_ed0_alias0="inet 127.0.0.251 netmask 0xffffffff" ifconfig_ed0_alias1="inet 127.0.0.252 netmask 0xffffffff" ifconfig_ed0_alias2="inet 127.0.0.253 netmask 0xffffffff" ifconfig_ed0_alias4="inet 127.0.0.254 netmask 0xffffffff" .Ed .Pp Then note that alias4 would .Em not be added since the search would stop with the missing .Dq Li alias3 entry. Due to this difficult to manage behavior, the .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n form is deprecated. .Pp If the .Pa /etc/start_if. Ns Aq Ar interface file is present, it is read and executed by the .Xr sh 1 interpreter before configuring the interface as specified in the .Va ifconfig_ Ns Aq Ar interface and .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n variables. .Pp If a .Va vlans_ Ns Aq Ar interface variable is set, a .Xr vlan 4 interface will be created for each item in the list with the .Ar vlandev argument set to .Ar interface . If a vlan interface's name is a number, then that number is used as the vlan tag and the new vlan interface is named .Ar interface . Ns Ar tag . Otherwise, the vlan tag must be specified via a .Va vlan parameter in the .Va create_args_ Ns Aq Ar interface variable. .Pp To create a vlan device named .Li em0.101 on .Li em0 with the vlan tag 101 and the optional the IPv4 address 192.0.2.1/24: .Bd -literal vlans_em0="101" ifconfig_em0_101="inet 192.0.2.1/24" .Ed .Pp To create a vlan device named .Li myvlan on .Li em0 with the vlan tag 102: .Bd -literal vlans_em0="myvlan" create_args_myvlan="vlan 102" .Ed .Pp If a .Va wlans_ Ns Aq Ar interface variable is set, an .Xr wlan 4 interface will be created for each item in the list with the .Ar wlandev argument set to .Ar interface . Further wlan cloning arguments may be passed to the .Xr ifconfig 8 .Cm create command by setting the .Va create_args_ Ns Aq Ar interface variable. One or more .Xr wlan 4 devices must be created for each wireless devices as of .Fx 8.0 . Debugging flags for .Xr wlan 4 devices as set by .Xr wlandebug 8 may be specified with an .Va wlandebug_ Ns Aq Ar interface variable. The contents of this variable will be passed directly to .Xr wlandebug 8 . .Pp If the .Va ifconfig_ Ns Aq Ar interface contains the keyword .Dq Li NOAUTO then the interface will not be configured at boot or by .Pa /etc/pccard_ether when .Va network_interfaces is set to .Dq Li AUTO . .Pp It is possible to bring up an interface with DHCP by adding .Dq Li DHCP to the .Va ifconfig_ Ns Aq Ar interface variable. For instance, to initialize the .Li ed0 device via DHCP, it is possible to use something like: .Bd -literal ifconfig_ed0="DHCP" .Ed .Pp Also, if you want to configure your wireless interface with .Xr wpa_supplicant 8 for use with WPA, EAP/LEAP or WEP, you need to add .Dq Li WPA to the .Va ifconfig_ Ns Aq Ar interface variable. .Pp Finally, you can add .Xr ifconfig 8 options in this variable, in addition to the .Pa /etc/start_if. Ns Aq Ar interface file. For instance, to configure an .Xr ath 4 wireless device in station mode with an address obtained via DHCP, using WPA authentication and 802.11b mode, it is possible to use something like: .Bd -literal wlans_ath0="wlan0" ifconfig_wlan0="DHCP WPA mode 11b" .Ed .Pp In addition to the .Va ifconfig_ Ns Aq Ar interface form, a fallback variable .Va ifconfig_DEFAULT may be configured. It will be used for all interfaces with no .Va ifconfig_ Ns Aq Ar interface variable. This is intended to replace the no longer supported .Va pccard_ifconfig variable. .Pp It is also possible to rename an interface by doing: .Bd -literal ifconfig_ed0_name="net0" ifconfig_net0="inet 192.0.2.1 netmask 0xffffff00" .Ed .It Va ipv6_enable .Pq Vt bool If the variable is .Dq Li YES , .Dq Li inet6 accept_rtadv is added to all of .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 and the .Va ipv6_activate_all_interfaces is defined as .Dq Li YES . .Pp This variable is deprecated. Use .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 and .Va ipv6_activate_all_interfaces if necessary. .It Va ipv6_prefer .Pq Vt bool If the variable is .Dq Li YES , the default address selection policy table set by .Xr ip6addrctl 8 will be IPv6-preferred. .Pp If the variable is .Dq Li NO , the default address selection policy table set by .Xr ip6addrctl 8 will be IPv4-preferred. .Pp This variable is deprecated. Use .Va ip6addtctl_policy instead. .It Va ipv6_activate_all_interfaces If the variable is .Dq Li NO , all of interfaces which do not have the corrsponding .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 variable will be marked as .Dq Li IFDISABLED for security reason. This means only IPv6 functionality on that interface is completely disabled. For more details of .Dq Li IFDISABLED flag and keywords .Dq Li inet6 ifdisabled , see .Xr ifconfig 8 . .Pp Default is .Dq Li NO . .It Va ipv6_privacy .Pq Vt bool If the variable is .Dq Li YES privacy addresses will be generated for each IPv6 interface as described in RFC 4193. .It Va ipv6_network_interfaces .Pq Vt str This is the IPv6 equivalent of .Va network_interfaces . Normally manual configuration of this variable is not needed. .Pp .It Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 .Pq Vt str IPv6 functionality on an interface should be configured by .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 , instead of setting ifconfig parameters in .Va ifconfig_ Ns Aq Ar interface . Aliases should be set by .Va ifconfig_ Ns Ao Ar interface Ac Ns Va _alias Ns Aq Ar n with .Dq Li inet6 keyword. For example: .Bd -literal ifconfig_ed0_ipv6="inet6 2001:db8:1::1 prefixlen 64" ifconfig_ed0_alias0="inet6 2001:db8:2::1 prefixlen 64" .Ed .Pp Interfaces that have an .Dq Li inet6 accept_rtadv keyword in .Va ifconfig_ Ns Ao Ar interface Ac Ns _ipv6 setting will be automatically configured by .Xr rtsol 8 . Note that this automatic configuration is disabled if the .Va ipv6_gateway_enable is set to .Dq Li YES . .It Va ipv6_prefix_ Ns Aq Ar interface .Pq Vt str If one or more prefixes are defined in .Va ipv6_prefix_ Ns Aq Ar interface addresses based on each prefix and the EUI-64 interface index will be configured on that interface. .It Va ipv6_default_interface .Pq Vt str If not set to .Dq Li NO , this is the default output interface for scoped addresses. This works only with ipv6_gateway_enable="NO". .It Va ip6addrctl_enable .Pq Vt bool This variable is to enable configuring default address selection policy table .Pq RFC 3484 . The table can be specified in another variable .Va ip6addrctl_policy . For .Va ip6addrctl_policy the following keywords can be specified: .Dq Li ipv4_prefer , .Dq Li ipv6_prefer , or .Dq Li AUTO . .Pp If .Dq Li ipv4_prefer or .Dq Li ipv6_prefer is specified, .Xr ip6addrctl 8 installs a pre-defined policy table described in Section 2.1 .Pq IPv6-preferred or 10.3 .Pq IPv4-preferred of RFC 3484. .Pp If .Dq Li AUTO is specified, it attempts to read a file .Pa /etc/ip6addrctl.conf first. If this file is found, .Xr ip6addrctl 8 reads and installs it. If not found, a policy is automatically set according to .Va ipv6_activate_all_interfaces variable; if the variable is set to .Dq Li YES the IPv6-preferred one is used. Otherwise IPv4-preferred. .Pp The default value of .Va ip6addrctl_enable and .Va ip6addrctl_policy are .Dq Li YES and .Dq Li AUTO , respectively. .It Va cloned_interfaces .Pq Vt str Set to the list of clonable network interfaces to create on this host. Further cloning arguments may be passed to the .Xr ifconfig 8 .Cm create command for each interface by setting the .Va create_args_ Ns Aq Ar interface variable. Entries in .Va cloned_interfaces are automatically appended to .Va network_interfaces for configuration. .It Va fec_interfaces .Pq Vt str Set to the list of .Xr ng_fec 4 Fast EtherChannel interfaces to configure on this host. A .Va fecconfig_ Ns Aq Ar interface variable is assumed to exist for each value of .Ar interface . The value of this variable is used to configure link aggregated interfaces according to the syntax of the .Cm NGM_FEC_ADD_IFACE to .Xr ngctl 8 msg. Additionally, this option ensures that each listed interface is created via the .Cm mkpeer command to .Xr ngctl 8 before attempting to configure it. For example: .Bd -literal fec_interfaces="fec0" fecconfig_fec0="em0 em1" ifconfig_fec0="DHCP" .Ed .It Va gif_interfaces .Pq Vt str Set to the list of .Xr gif 4 tunnel interfaces to configure on this host. A .Va gifconfig_ Ns Aq Ar interface variable is assumed to exist for each value of .Ar interface . The value of this variable is used to configure the link layer of the tunnel according to the syntax of the .Cm tunnel option to .Xr ifconfig 8 . Additionally, this option ensures that each listed interface is created via the .Cm create option to .Xr ifconfig 8 before attempting to configure it. .It Va sppp_interfaces .Pq Vt str Set to the list of .Xr sppp 4 interfaces to configure on this host. A .Va spppconfig_ Ns Aq Ar interface variable is assumed to exist for each value of .Ar interface . Each interface should also be configured by a general .Va ifconfig_ Ns Aq Ar interface setting. Refer to .Xr spppcontrol 8 for more information about available options. .It Va ppp_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr ppp 8 daemon. .It Va ppp_profile .Pq Vt str The name of the profile to use from .Pa /etc/ppp/ppp.conf . Also used for per-profile overrides of .Va ppp_mode and .Va ppp_nat , and .Va ppp_ Ns Ao Ar profile Ac Ns _unit . When the profile name contains any of the characters .Dq Li .-/+ they are translated to .Dq Li _ for the proposes of the override variable names. .It Va ppp_mode .Pq Vt str Mode in which to run the .Xr ppp 8 daemon. .It Va ppp_ Ns Ao Ar profile Ac Ns _mode .Pq Vt str Overrides the global .Va ppp_mode for .Ar profile . Accepted modes are .Dq Li auto , .Dq Li ddial , .Dq Li direct and .Dq Li dedicated . See the manual for a full description. .It Va ppp_nat .Pq Vt bool If set to .Dq Li YES , enables network address translation. Used in conjunction with .Va gateway_enable allows hosts on private network addresses access to the Internet using this host as a network address translating router. .It Va ppp_ Ns Ao Ar profile Ac Ns _nat .Pq Vt str Overrides the global .Va ppp_nat for .Ar profile . .It Va ppp_ Ns Ao Ar profile Ac Ns _unit .Pq Vt int Set the unit number to be used for this profile. See the manual description of .Fl unit Ns Ar N for details. .It Va ppp_user .Pq Vt str The name of the user under which .Xr ppp 8 should be started. By default, .Xr ppp 8 is started as .Dq Li root . .It Va rc_conf_files .Pq Vt str This option is used to specify a list of files that will override the settings in .Pa /etc/defaults/rc.conf . The files will be read in the order in which they are specified and should include the full path to the file. By default, the files specified are .Pa /etc/rc.conf and .Pa /etc/rc.conf.local .It Va zfs_enable .Pq Vt bool If set to .Dq Li YES , .Pa /etc/rc.d/zfs will attempt to automatically mount ZFS file systems and initialize ZFS volumes (ZVOLs). +.It Va gptboot_enable +.Pq Vt bool +If set to +.Dq Li YES , +.Pa /etc/rc.d/gptboot +will log if the system successfully (or not) booted from a GPT partition, +which had the +.Ar bootonce +attribute set using +.Xr gpart 8 +utility. .It Va gbde_autoattach_all .Pq Vt bool If set to .Dq Li YES , .Pa /etc/rc.d/gbde will attempt to automatically initialize your .bde devices in .Pa /etc/fstab . .It Va gbde_devices .Pq Vt str List the devices that the script should try to attach, or .Dq Li AUTO . .It Va gbde_lockdir .Pq Vt str The directory where the .Xr gbde 4 lockfiles are located. The default lockfile directory is .Pa /etc . .Pp The lockfile for each individual .Xr gbde 4 device can be overridden by setting the variable .Va gbde_lock_ Ns Aq Ar device , where .Ar device is the encrypted device without the .Dq Pa /dev/ and .Dq Pa .bde parts. .It Va gbde_attach_attempts .Pq Vt int Number of times to attempt attaching to a .Xr gbde 4 device, i.e., how many times the user is asked for the pass-phrase. Default is 3. .It Va geli_devices .Pq Vt str List of devices to automatically attach on boot. Note that .eli devices from .Pa /etc/fstab are automatically appended to this list. .It Va geli_tries .Pq Vt int Number of times user is asked for the pass-phrase. If empty, it will be taken from .Va kern.geom.eli.tries sysctl variable. .It Va geli_default_flags .Pq Vt str Default flags to use by .Xr geli 8 when configuring disk encryption. Flags can be configured for every device separately by defining .Va geli_ Ns Ao Ar device Ac Ns Va _flags variable. .It Va geli_autodetach .Pq Vt str Specifies if GELI devices should be marked for detach on last close after file systems are mounted. Default is .Dq Li YES . This can be changed for every device separately by defining .Va geli_ Ns Ao Ar device Ac Ns Va _autodetach variable. .It Va geli_swap_flags Options passed to the .Xr geli 8 utility when encrypted GEOM providers for swap partitions are created. The default is .Dq Li "-e aes -l 256 -s 4096 -d" . .It Va root_rw_mount .Pq Vt bool Set to .Dq Li YES by default. After the file systems are checked at boot time, the root file system is remounted as read-write if this is set to .Dq Li YES . Diskless systems that mount their root file system from a read-only remote NFS share should set this to .Dq Li NO in their .Pa rc.conf . .It Va fsck_y_enable .Pq Vt bool If set to .Dq Li YES , .Xr fsck 8 will be run with the .Fl y flag if the initial preen of the file systems fails. .It Va background_fsck .Pq Vt bool If set to .Dq Li YES , the system will attempt to run .Xr fsck 8 in the background where possible. .It Va background_fsck_delay .Pq Vt int The amount of time in seconds to sleep before starting a background .Xr fsck 8 . It defaults to sixty seconds to allow large applications such as the X server to start before disk I/O bandwidth is monopolized by .Xr fsck 8 . If set to a negative number, the background file system check will be delayed indefinitely to allow the administrator to run it at a more convenient time. For example it may be run from .Xr cron 8 by adding a line like .Pp .Dl "0 4 * * * root /etc/rc.d/bgfsck forcestart" .Pp to .Pa /etc/crontab . .It Va netfs_types .Pq Vt str List of file system types that are network-based. This list should generally not be modified by end users. Use .Va extra_netfs_types instead. .It Va extra_netfs_types .Pq Vt str If set to something other than .Dq Li NO (the default), this variable extends the list of file system types for which automatic mounting at startup by .Xr rc 8 should be delayed until the network is initialized. It should contain a whitespace-separated list of network file system descriptor pairs, each consisting of a file system type as passed to .Xr mount 8 and a human-readable, one-word description, joined with a colon .Pq Ql \&: . Extending the default list in this way is only necessary when third party file system types are used. .It Va syslogd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr syslogd 8 daemon. .It Va syslogd_program .Pq Vt str Path to .Xr syslogd 8 (default .Pa /usr/sbin/syslogd ) . .It Va syslogd_flags .Pq Vt str If .Va syslogd_enable is set to .Dq Li YES , these are the flags to pass to .Xr syslogd 8 . .It Va inetd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr inetd 8 daemon. .It Va inetd_program .Pq Vt str Path to .Xr inetd 8 (default .Pa /usr/sbin/inetd ) . .It Va inetd_flags .Pq Vt str If .Va inetd_enable is set to .Dq Li YES , these are the flags to pass to .Xr inetd 8 . .It Va hastd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr hastd 8 daemon. .It Va hastd_program .Pq Vt str Path to .Xr hastd 8 (default .Pa /sbin/hastd ) . .It Va hastd_flags .Pq Vt str If .Va hastd_enable is set to .Dq Li YES , these are the flags to pass to .Xr hastd 8 . .It Va named_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr named 8 daemon. .It Va named_program .Pq Vt str Path to .Xr named 8 (default .Pa /usr/sbin/named ) . .It Va named_conf .Pq Vt str Path to .Xr named 8 configuration file, (default .Pa /etc/namedb/named.conf ) . .It Va named_flags .Pq Vt str If .Va named_enable is set to .Dq Li YES , these are the flags to pass to .Xr named 8 . .It Va named_pidfile .Pq Vt str This is the default path to the .Xr named 8 daemon's PID file. This must match the location in .Xr named.conf 5 . .It Va named_uid .Pq Vt str The user that the .Xr named 8 process should be run as. .It Va named_chrootdir .Pq Vt str The root directory for a name server run in a .Xr chroot 8 environment (default .Pa /var/named ) . If left empty .Xr named 8 will not be run in a .Xr chroot 8 environment. .It Va named_chroot_autoupdate .Pq Vt bool Set to .Dq Li NO to disable automatic update of the .Xr chroot 8 environment. .It Va named_symlink_enable .Pq Vt bool Set to .Dq Li NO to disable symlinking of daemon's PID file into the .Xr chroot 8 environment. .It Va named_wait .Pq Vt bool Set to have .Pa /etc/rc.d/named loop until working name service is established. .It Va named_wait_host .Pq Vt str Name of host to lookup for the named_wait option. (Default localhost) .It Va named_auto_forward .Pq Vt bool Set to enable automatic creation of a forwarder configuration file derived from .Pa /etc/resolv.conf . .It Va named_auto_forward_only .Pq Vt bool Set to change the default forwarder configuration from .Dq forward first to .Dq forward only . .It Va kerberos5_server_enable .Pq Vt bool Set to .Dq Li YES to start a Kerberos 5 authentication server at boot time. .It Va kerberos5_server .Pq Vt str If .Va kerberos5_server_enable is set to .Dq Li YES this is the path to Kerberos 5 Authentication Server. .It Va kerberos5_server_flags .Pq Vt str Empty by default. This variable contains additional flags to be passed to the Kerberos 5 authentication server. .It Va kadmind5_server_enable .Pq Vt bool Set to .Dq Li YES to start .Xr kadmind 8 , the Kerberos 5 Administration Daemon; set to .Dq Li NO on a slave server. .It Va kadmind5_server .Pq Vt str If .Va kadmind5_server_enable is set to .Dq Li YES this is the path to Kerberos 5 Administration Daemon. .It Va kpasswdd_server_enable .Pq Vt bool Set to .Dq Li YES to start .Xr kpasswdd 8 , the Kerberos 5 Password-Changing Daemon; set to .Dq Li NO on a slave server. .It Va kpasswdd_server .Pq Vt str If .Va kpasswdd_server_enable is set to .Dq Li YES this is the path to Kerberos 5 Password-Changing Daemon. .It Va rwhod_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr rwhod 8 daemon at boot time. .It Va rwhod_flags .Pq Vt str If .Va rwhod_enable is set to .Dq Li YES , these are the flags to pass to it. .It Va amd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr amd 8 daemon at boot time. .It Va amd_flags .Pq Vt str If .Va amd_enable is set to .Dq Li YES , these are the flags to pass to it. See the .Xr amd 8 manpage for more information. .It Va amd_map_program .Pq Vt str If set, the specified program is run to get the list of .Xr amd 8 maps. For example, if the .Xr amd 8 maps are stored in NIS, one can set this to run .Xr ypcat 1 to get a list of .Xr amd 8 maps from the .Pa amd.master NIS map. .It Va update_motd .Pq Vt bool If set to .Dq Li YES , .Pa /etc/motd will be updated at boot time to reflect the kernel release being run. If set to .Dq Li NO , .Pa /etc/motd will not be updated. .It Va nfs_client_enable .Pq Vt bool If set to .Dq Li YES , run the NFS client daemons at boot time. .It Va nfs_access_cache .Pq Vt int If .Va nfs_client_enable is set to .Dq Li YES , this can be set to .Dq Li 0 to disable NFS ACCESS RPC caching, or to the number of seconds for which NFS ACCESS results should be cached. A value of 2-10 seconds will substantially reduce network traffic for many NFS operations. .It Va nfs_server_enable .Pq Vt bool If set to .Dq Li YES , run the NFS server daemons at boot time. .It Va nfs_server_flags .Pq Vt str If .Va nfs_server_enable is set to .Dq Li YES , these are the flags to pass to the .Xr nfsd 8 daemon. .It Va idmapd_enable .Pq Vt bool If set to .Dq Li YES , run the ID mapping daemon for NFS version 4. .It Va idmapd_flags .Pq Vt str If .Va idmapd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr idmapd 8 daemon. .It Va mountd_enable .Pq Vt bool If set to .Dq Li YES , and no .Va nfs_server_enable is set, start .Xr mountd 8 , but not .Xr nfsd 8 daemon. It is commonly needed to run CFS without real NFS used. .It Va mountd_flags .Pq Vt str If .Va mountd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr mountd 8 daemon. .It Va weak_mountd_authentication .Pq Vt bool If set to .Dq Li YES , allow services like PCNFSD to make non-privileged mount requests. .It Va nfs_reserved_port_only .Pq Vt bool If set to .Dq Li YES , provide NFS services only on a secure port. .It Va nfs_bufpackets .Pq Vt int If set to a number, indicates the number of packets worth of socket buffer space to reserve on an NFS client. The kernel default is typically 4. Using a higher number may be useful on gigabit networks to improve performance. The minimum value is 2 and the maximum is 64. .It Va rpc_lockd_enable .Pq Vt bool If set to .Dq Li YES and also an NFS server or client, run .Xr rpc.lockd 8 at boot time. .It Va rpc_lockd_flags .Pq Vt str If .Va rpc_lockd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr rpc.lockd 8 daemon. .It Va rpc_statd_enable .Pq Vt bool If set to .Dq Li YES and also an NFS server or client, run .Xr rpc.statd 8 at boot time. .It Va rpc_statd_flags .Pq Vt str If .Va rpc_statd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr rpc.statd 8 daemon. .It Va rpcbind_program .Pq Vt str Path to .Xr rpcbind 8 (default .Pa /usr/sbin/rpcbind ) . .It Va rpcbind_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr rpcbind 8 service at boot time. .It Va rpcbind_flags .Pq Vt str If .Va rpcbind_enable is set to .Dq Li YES , these are the flags to pass to the .Xr rpcbind 8 daemon. .It Va keyserv_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr keyserv 8 daemon on boot for running Secure RPC. .It Va keyserv_flags .Pq Vt str If .Va keyserv_enable is set to .Dq Li YES , these are the flags to pass to .Xr keyserv 8 daemon. .It Va pppoed_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr pppoed 8 daemon at boot time to provide PPP over Ethernet services. .It Va pppoed_ Ns Aq Ar provider .Pq Vt str .Xr pppoed 8 listens to requests to this .Ar provider and ultimately runs .Xr ppp 8 with a .Ar system argument of the same name. .It Va pppoed_flags .Pq Vt str Additional flags to pass to .Xr pppoed 8 . .It Va pppoed_interface .Pq Vt str The network interface to run .Xr pppoed 8 on. This is mandatory when .Va pppoed_enable is set to .Dq Li YES . .It Va timed_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr timed 8 service at boot time. This command is intended for networks of machines where a consistent .Dq "network time" for all hosts must be established. This is often useful in large NFS environments where time stamps on files are expected to be consistent network-wide. .It Va timed_flags .Pq Vt str If .Va timed_enable is set to .Dq Li YES , these are the flags to pass to the .Xr timed 8 service. .It Va ntpdate_enable .Pq Vt bool If set to .Dq Li YES , run .Xr ntpdate 8 at system startup. This command is intended to synchronize the system clock only .Em once from some standard reference. An option to set this up initially (from a list of known servers) is also provided by the .Xr sysinstall 8 program when the system is first installed. .It Va ntpdate_config .Pq Vt str Configuration file for .Xr ntpdate 8 . Default .Pa /etc/ntp.conf . .It Va ntpdate_hosts .Pq Vt str A whitespace-separated list of NTP servers to synchronize with at startup. The default is to use the servers listed in .Va ntpdate_config , if that file exists. .It Va ntpdate_program .Pq Vt str Path to .Xr ntpdate 8 (default .Pa /usr/sbin/ntpdate ) . .It Va ntpdate_flags .Pq Vt str If .Va ntpdate_enable is set to .Dq Li YES , these are the flags to pass to the .Xr ntpdate 8 command (typically a hostname). .It Va ntpd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr ntpd 8 command at boot time. .It Va ntpd_program .Pq Vt str Path to .Xr ntpd 8 (default .Pa /usr/sbin/ntpd ) . .It Va ntpd_config .Pq Vt str Path to .Xr ntpd 8 configuration file. Default .Pa /etc/ntp.conf . .It Va ntpd_flags .Pq Vt str If .Va ntpd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr ntpd 8 daemon. .It Va ntpd_sync_on_start .Pq Vt bool If set to .Dq Li YES , .Xr ntpd 8 is run with the .Fl g flag, which syncs the system's clock on startup. See .Xr ntpd 8 for more information regarding the .Fl g option. This is a preferred alternative to using .Xr ntpdate 8 or specifying the .Va ntpdate_enable variable. .It Va nis_client_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr ypbind 8 service at system boot time. .It Va nis_client_flags .Pq Vt str If .Va nis_client_enable is set to .Dq Li YES , these are the flags to pass to the .Xr ypbind 8 service. .It Va nis_ypset_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr ypset 8 daemon at system boot time. .It Va nis_ypset_flags .Pq Vt str If .Va nis_ypset_enable is set to .Dq Li YES , these are the flags to pass to the .Xr ypset 8 daemon. .It Va nis_server_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr ypserv 8 daemon at system boot time. .It Va nis_server_flags .Pq Vt str If .Va nis_server_enable is set to .Dq Li YES , these are the flags to pass to the .Xr ypserv 8 daemon. .It Va nis_ypxfrd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr rpc.ypxfrd 8 daemon at system boot time. .It Va nis_ypxfrd_flags .Pq Vt str If .Va nis_ypxfrd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr rpc.ypxfrd 8 daemon. .It Va nis_yppasswdd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr rpc.yppasswdd 8 daemon at system boot time. .It Va nis_yppasswdd_flags .Pq Vt str If .Va nis_yppasswdd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr rpc.yppasswdd 8 daemon. .It Va rpc_ypupdated_enable .Pq Vt bool If set to .Dq Li YES , run the .Nm rpc.ypupdated daemon at system boot time. .It Va bsnmpd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr bsnmpd 1 daemon at system boot time. Be sure to understand the security implications of running SNMP daemon on your host. .It Va bsnmpd_flags .Pq Vt str If .Va bsnmpd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr bsnmpd 1 daemon. .It Va defaultrouter .Pq Vt str If not set to .Dq Li NO , create a default route to this host name or IP address (use an IP address if this router is also required to get to the name server!). .It Va ipv6_defaultrouter .Pq Vt str The IPv6 equivalent of .Va defaultrouter . .It Va static_arp_pairs .Pq Vt str Set to the list of static ARP pairs that are to be added at system boot time. For each whitespace separated .Ar element in the value, a .Va static_arp_ Ns Aq Ar element variable is assumed to exist whose contents will later be passed to a .Dq Nm arp Cm -S operation. For example .Bd -literal static_arp_pairs="gw" static_arp_gw="192.168.1.1 00:01:02:03:04:05" .Ed .It Va static_routes .Pq Vt str Set to the list of static routes that are to be added at system boot time. If not set to .Dq Li NO then for each whitespace separated .Ar element in the value, a .Va route_ Ns Aq Ar element variable is assumed to exist whose contents will later be passed to a .Dq Nm route Cm add operation. For example: .Bd -literal static_routes="mcast gif0local" route_mcast="-net 224.0.0.0/4 -iface gif0" route_gif0local="-host 169.254.1.1 -iface lo0" .Ed .It Va ipv6_static_routes .Pq Vt str The IPv6 equivalent of .Va static_routes . If not set to .Dq Li NO then for each whitespace separated .Ar element in the value, a .Va ipv6_route_ Ns Aq Ar element variable is assumed to exist whose contents will later be passed to a .Dq Nm route Cm add Fl inet6 operation. .It Va natm_static_routes .Pq Vt str The .Xr natmip 4 equivalent of .Va static_routes . If not empty then for each whitespace separated .Ar element in the value, a .Va route_ Ns Aq Ar element variable is assumed to exist whose contents will later be passed to a .Dq Nm atmconfig Cm natm Cm add operation. .It Va gateway_enable .Pq Vt bool If set to .Dq Li YES , configure host to act as an IP router, e.g.\& to forward packets between interfaces. .It Va ipv6_gateway_enable .Pq Vt bool The IPv6 equivalent of .Va gateway_enable . .It Va routed_enable .Pq Vt bool If set to .Dq Li YES , run a routing daemon of some sort, based on the settings of .Va routed_program and .Va routed_flags . .It Va route6d_enable .Pq Vt bool The IPv6 equivalent of .Va routed_enable . If set to .Dq Li YES , run a routing daemon of some sort, based on the settings of .Va route6d_program and .Va route6d_flags . .It Va routed_program .Pq Vt str If .Va routed_enable is set to .Dq Li YES , this is the name of the routing daemon to use. .It Va route6d_program .Pq Vt str The IPv6 equivalent of .Va routed_program . .It Va routed_flags .Pq Vt str If .Va routed_enable is set to .Dq Li YES , these are the flags to pass to the routing daemon. .It Va route6d_flags .Pq Vt str The IPv6 equivalent of .Va routed_flags . .It Va mrouted_enable .Pq Vt bool If set to .Dq Li YES , run the multicast routing daemon, .Xr mrouted 8 . .It Va mroute6d_enable .Pq Vt bool The IPv6 equivalent of .Va mrouted_enable . If set to .Dq Li YES , run the IPv6 multicast routing daemon. .Pp Note that multicast routing daemons are no longer included in the .Fx base system, however, both .Xr mrouted 8 and .Xr pim6dd 8 may be installed from the .Fx Ports Collection. .It Va mrouted_flags .Pq Vt str If .Va mrouted_enable is set to .Dq Li YES , these are the flags to pass to the .Xr mrouted 8 daemon. .It Va mroute6d_flags .Pq Vt str The IPv6 equivalent of .Va mrouted_flags . If .Va mroute6d_enable is set to .Dq Li YES , these are the flags passed to the IPv6 multicast routing daemon. .It Va mroute6d_program .Pq Vt str If .Va mroute6d_enable is set to .Dq Li YES , this is the path to the IPv6 multicast routing daemon. .It Va rtadvd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr rtadvd 8 daemon at boot time. .Xr rtadvd 8 will only run if .Va ipv6_gateway_enable is also set to .Dq Li YES . The .Xr rtadvd 8 utility sends router advertisement packets to the interfaces specified in .Va rtadvd_interfaces and should only be enabled with great care. You may want to fine-tune .Xr rtadvd.conf 5 . .It Va rtadvd_interfaces .Pq Vt str If .Va rtadvd_enable is set to .Dq Li YES this is the list of interfaces to use. .It Va ipxgateway_enable .Pq Vt bool If set to .Dq Li YES , enable the routing of IPX traffic. .It Va ipxrouted_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr IPXrouted 8 daemon at system boot time. .It Va ipxrouted_flags .Pq Vt str If .Va ipxrouted_enable is set to .Dq Li YES , these are the flags to pass to the .Xr IPXrouted 8 daemon. .It Va arpproxy_all .Pq Vt bool If set to .Dq Li YES , enable global proxy ARP. .It Va forward_sourceroute .Pq Vt bool If set to .Dq Li YES and .Va gateway_enable is also set to .Dq Li YES , source-routed packets are forwarded. .It Va accept_sourceroute .Pq Vt bool If set to .Dq Li YES , the system will accept source-routed packets directed at it. .It Va rarpd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr rarpd 8 daemon at system boot time. .It Va rarpd_flags .Pq Vt str If .Va rarpd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr rarpd 8 daemon. .It Va bootparamd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr bootparamd 8 daemon at system boot time. .It Va bootparamd_flags .Pq Vt str If .Va bootparamd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr bootparamd 8 daemon. .It Va stf_interface_ipv4addr .Pq Vt str If not set to .Dq Li NO , this is the local IPv4 address for 6to4 (IPv6 over IPv4 tunneling interface). Specify this entry to enable the 6to4 interface. .It Va stf_interface_ipv4plen .Pq Vt int Prefix length for 6to4 IPv4 addresses, to limit peer address range. An effective value is 0-31. .It Va stf_interface_ipv6_ifid .Pq Vt str IPv6 interface ID for .Xr stf 4 . This can be set to .Dq Li AUTO . .It Va stf_interface_ipv6_slaid .Pq Vt str IPv6 Site Level Aggregator for .Xr stf 4 . .It Va ipv6_faith_prefix .Pq Vt str If not set to .Dq Li NO , this is the faith prefix to enable a FAITH IPv6-to-IPv4 TCP translator. You also need .Xr faithd 8 setup. .It Va ipv6_ipv4mapping .Pq Vt bool If set to .Dq Li YES this enables IPv4 mapped IPv6 address communication (like .Li ::ffff:a.b.c.d ) . .It Va atm_enable .Pq Vt bool Set to .Dq Li YES to enable the configuration of ATM interfaces at system boot time. For all of the ATM variables described below, please refer to the .Xr atm 8 manual page for further details on the available command parameters. Also refer to the files in .Pa /usr/share/examples/atm for more detailed configuration information. .It Va atm_load .Pq Vt str This is a list of physical ATM interface drivers to load. Typical values are .Dq Li hfa_pci and/or .Dq Li hea_pci . .It Va atm_netif_ Ns Aq Ar intf .Pq Vt str For the ATM physical interface .Ar intf , this variable defines the name prefix and count for the ATM network interfaces to be created. The value will be passed as the parameters of an .Dq Nm atm Cm "set netif" Ar intf command. .It Va atm_sigmgr_ Ns Aq Ar intf .Pq Vt str For the ATM physical interface .Ar intf , this variable defines the ATM signalling manager to be used. The value will be passed as the parameters of an .Dq Nm atm Cm attach Ar intf command. .It Va atm_prefix_ Ns Aq Ar intf .Pq Vt str For the ATM physical interface .Ar intf , this variable defines the NSAP prefix for interfaces using a UNI signalling manager. If set to .Dq Li ILMI , the prefix will automatically be set via the .Xr ilmid 8 daemon. Otherwise, the value will be passed as the parameters of an .Dq Nm atm Cm "set prefix" Ar intf command. .It Va atm_macaddr_ Ns Aq Ar intf .Pq Vt str For the ATM physical interface .Ar intf , this variable defines the MAC address for interfaces using a UNI signalling manager. If set to .Dq Li NO , the hardware MAC address contained in the ATM interface card will be used. Otherwise, the value will be passed as the parameters of an .Dq Nm atm Cm "set mac" Ar intf command. .It Va atm_arpserver_ Ns Aq Ar netif .Pq Vt str For the ATM network interface .Ar netif , this variable defines the ATM address for a host which is to provide ATMARP service. This variable is only applicable to interfaces using a UNI signalling manager. If set to .Dq Li local , this host will become an ATMARP server. The value will be passed as the parameters of an .Dq Nm atm Cm "set arpserver" Ar netif command. .It Va atm_scsparp_ Ns Aq Ar netif .Pq Vt bool If set to .Dq Li YES , SCSP/ATMARP service for the network interface .Ar netif will be initiated using the .Xr scspd 8 and .Xr atmarpd 8 daemons. This variable is only applicable if .Va atm_arpserver_ Ns Aq Ar netif is set to .Dq Li local . .It Va atm_pvcs .Pq Vt str Set to the list of ATM PVCs to be added at system boot time. For each whitespace separated .Ar element in the value, an .Va atm_pvc_ Ns Aq Ar element variable is assumed to exist. The value of each of these variables will be passed as the parameters of an .Dq Nm atm Cm "add pvc" command. .It Va atm_arps .Pq Vt str Set to the list of permanent ATM ARP entries to be added at system boot time. For each whitespace separated .Ar element in the value, an .Va atm_arp_ Ns Aq Ar element variable is assumed to exist. The value of each of these variables will be passed as the parameters of an .Dq Nm atm Cm "add arp" command. .It Va natm_interfaces .Pq Vt str Set to the list of .Xr natm 4 interfaces that will also be used for HARP through .Xr harp 4 . If this list is not empty all interfaces in the list will be brought up with .Xr ifconfig 8 and .Xr harp 4 will be loaded. For this to work the interface drivers must be either compiled into the kernel or must reside on the root partition. .It Va keybell .Pq Vt str The keyboard bell sound. Set to .Dq Li normal , .Dq Li visual , .Dq Li off , or .Dq Li NO if the default behavior is desired. For details, refer to the .Xr kbdcontrol 1 manpage. .It Va keyboard .Pq Vt str If set to a non-null string, the virtual console's keyboard input is set to this device. .It Va keymap .Pq Vt str If set to .Dq Li NO , no keymap is installed, otherwise the value is used to install the keymap file in .Pa /usr/share/syscons/keymaps/ Ns Ao Ar value Ac Ns Pa .kbd . .It Va keyrate .Pq Vt str The keyboard repeat speed. Set to .Dq Li slow , .Dq Li normal , .Dq Li fast , or .Dq Li NO if the default behavior is desired. .It Va keychange .Pq Vt str If not set to .Dq Li NO , attempt to program the function keys with the value. The value should be a single string of the form: .Dq Ar funkey_number new_value Op Ar funkey_number new_value ... . .It Va cursor .Pq Vt str Can be set to the value of .Dq Li normal , .Dq Li blink , .Dq Li destructive , or .Dq Li NO to set the cursor behavior explicitly or choose the default behavior. .It Va scrnmap .Pq Vt str If set to .Dq Li NO , no screen map is installed, otherwise the value is used to install the screen map file in .Pa /usr/share/syscons/scrnmaps/ Ns Aq Ar value . .It Va font8x16 .Pq Vt str If set to .Dq Li NO , the default 8x16 font value is used for screen size requests, otherwise the value in .Pa /usr/share/syscons/fonts/ Ns Aq Ar value is used. .It Va font8x14 .Pq Vt str If set to .Dq Li NO , the default 8x14 font value is used for screen size requests, otherwise the value in .Pa /usr/share/syscons/fonts/ Ns Aq Ar value is used. .It Va font8x8 .Pq Vt str If set to .Dq Li NO , the default 8x8 font value is used for screen size requests, otherwise the value in .Pa /usr/share/syscons/fonts/ Ns Aq Ar value is used. .It Va blanktime .Pq Vt int If set to .Dq Li NO , the default screen blanking interval is used, otherwise it is set to .Ar value seconds. .It Va saver .Pq Vt str If not set to .Dq Li NO , this is the actual screen saver to use .Li ( blank , snake , daemon , etc). .It Va moused_nondefault_enable .Pq Vt str If set to .Dq Li NO , the mouse device specified on the command line is not automatically treated as enabled by the .Pa /etc/rc.d/moused script. Having this variable set to .Dq Li YES allows a .Xr usb 4 mouse, for example, to be enabled as soon as it is plugged in. .It Va moused_enable .Pq Vt str If set to .Dq Li YES , the .Xr moused 8 daemon is started for doing cut/paste selection on the console. .It Va moused_type .Pq Vt str This is the protocol type of the mouse connected to this host. This variable must be set if .Va moused_enable is set to .Dq Li YES . The .Xr moused 8 daemon is able to detect the appropriate mouse type automatically in many cases. Set this variable to .Dq Li auto to let the daemon detect it, or select one from the following list if the automatic detection fails. .Pp If the mouse is attached to the PS/2 mouse port, choose .Dq Li auto or .Dq Li ps/2 , regardless of the brand and model of the mouse. Likewise, if the mouse is attached to the bus mouse port, choose .Dq Li auto or .Dq Li busmouse . All other protocols are for serial mice and will not work with the PS/2 and bus mice. If this is a USB mouse, .Dq Li auto is the only protocol type which will work. .Pp .Bl -tag -width ".Li x10mouseremote" -compact .It Li microsoft Microsoft mouse (serial) .It Li intellimouse Microsoft IntelliMouse (serial) .It Li mousesystems Mouse systems Corp.\& mouse (serial) .It Li mmseries MM Series mouse (serial) .It Li logitech Logitech mouse (serial) .It Li busmouse A bus mouse .It Li mouseman Logitech MouseMan and TrackMan (serial) .It Li glidepoint ALPS GlidePoint (serial) .It Li thinkingmouse Kensington ThinkingMouse (serial) .It Li ps/2 PS/2 mouse .It Li mmhittab MM HitTablet (serial) .It Li x10mouseremote X10 MouseRemote (serial) .It Li versapad Interlink VersaPad (serial) .El .Pp Even if the mouse is not in the above list, it may be compatible with one in the list. Refer to the manual page for .Xr moused 8 for compatibility information. .Pp It should also be noted that while this is enabled, any other client of the mouse (such as an X server) should access the mouse through the virtual mouse device, .Pa /dev/sysmouse , and configure it as a .Dq Li sysmouse type mouse, since all mouse data is converted to this single canonical format when using .Xr moused 8 . If the client program does not support the .Dq Li sysmouse type, specify the .Dq Li mousesystems type. It is the second preferred type. .It Va moused_port .Pq Vt str If .Va moused_enable is set to .Dq Li YES , this is the actual port the mouse is on. It might be .Pa /dev/cuad0 for a COM1 serial mouse, .Pa /dev/psm0 for a PS/2 mouse or .Pa /dev/mse0 for a bus mouse, for example. .It Va moused_flags .Pq Vt str If .Va moused_flags is set, its value is used as an additional set of flags to pass to the .Xr moused 8 daemon. .It Va "moused_" Ns Ar XXX Ns Va "_flags" When .Va moused_nondefault_enable is enabled, and a .Xr moused 8 daemon is started for a non-default port, the .Va "moused_" Ns Ar XXX Ns Va "_flags" set of options has precedence over and replaces the default .Va moused_flags (where .Ar XXX is the name of the non-default port, i.e.\& .Ar ums0 ) . By setting .Va "moused_" Ns Ar XXX Ns Va "_flags" it is possible to set up a different set of default flags for each .Xr moused 8 instance. For example, you can use .Dq Li "-3" for the default .Va moused_flags to make your laptop's touchpad more comfortable to use, but an empty set of options for .Va moused_ums0_flags when your .Xr usb 4 mouse has three or more buttons. .It Va mousechar_start .Pq Vt int If set to .Dq Li NO , the default mouse cursor character range .Li 0xd0 Ns - Ns Li 0xd3 is used, otherwise the range start is set to .Ar value character, see .Xr vidcontrol 1 . Use if the default range is occupied in the language code table. .It Va allscreens_flags .Pq Vt str If set, .Xr vidcontrol 1 is run with these options for each of the virtual terminals .Pq Pa /dev/ttyv* . For example, .Dq Fl m Cm on will enable the mouse pointer on all virtual terminals if .Va moused_enable is set to .Dq Li YES . .It Va allscreens_kbdflags .Pq Vt str If set, .Xr kbdcontrol 1 is run with these options for each of the virtual terminals .Pq Pa /dev/ttyv* . For example, .Dq Fl h Li 200 will set the .Xr syscons 4 scrollback (history) buffer to 200 lines. .It Va cron_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr cron 8 daemon at system boot time. .It Va cron_program .Pq Vt str Path to .Xr cron 8 (default .Pa /usr/sbin/cron ) . .It Va cron_flags .Pq Vt str If .Va cron_enable is set to .Dq Li YES , these are the flags to pass to .Xr cron 8 . .It Va cron_dst .Pq Vt bool If set to .Dq Li YES , enable the special handling of transitions to and from the Daylight Saving Time in .Xr cron 8 (equivalent to using the flag .Fl s ) . .It Va lpd_program .Pq Vt str Path to .Xr lpd 8 (default .Pa /usr/sbin/lpd ) . .It Va lpd_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr lpd 8 daemon at system boot time. .It Va lpd_flags .Pq Vt str If .Va lpd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr lpd 8 daemon. .It Va chkprintcap_enable .Pq Vt bool If set to .Dq Li YES , run the .Xr chkprintcap 8 command before starting the .Xr lpd 8 daemon. .It Va chkprintcap_flags .Pq Vt str If .Va lpd_enable and .Va chkprintcap_enable are set to .Dq Li YES , these are the flags to pass to the .Xr chkprintcap 8 program. The default is .Dq Li -d , which causes missing directories to be created. .It Va mta_start_script .Pq Vt str This variable specifies the full path to the script to run to start a mail transfer agent. The default is .Pa /etc/rc.sendmail . The .Va sendmail_* variables which .Pa /etc/rc.sendmail uses are documented in the .Xr rc.sendmail 8 manual page. .It Va dumpdev .Pq Vt str Indicates the device (usually a swap partition) to which a crash dump should be written in the event of a system crash. If the value of this variable is .Dq Li AUTO , the first suitable swap device listed in .Pa /etc/fstab will be used as dump device. Otherwise, the value of this variable is passed as the argument to .Xr dumpon 8 . To disable crash dumps, set this variable to .Dq Li NO . .It Va dumpdir .Pq Vt str When the system reboots after a crash and a crash dump is found on the device specified by the .Va dumpdev variable, .Xr savecore 8 will save that crash dump and a copy of the kernel to the directory specified by the .Va dumpdir variable. The default value is .Pa /var/crash . Set to .Dq Li NO to not run .Xr savecore 8 at boot time when .Va dumpdir is set. .It Va savecore_flags .Pq Vt str If crash dumps are enabled, these are the flags to pass to the .Xr savecore 8 utility. .It Va quota_enable .Pq Vt bool Set to .Dq Li YES to turn on user and group disk quotas on system startup via the .Xr quotaon 8 command for all file systems marked as having quotas enabled in .Pa /etc/fstab . The kernel must be built with .Cd "options QUOTA" for disk quotas to function. .It Va check_quotas .Pq Vt bool Set to .Dq Li YES to enable user and group disk quota checking via the .Xr quotacheck 8 command. .It Va quotacheck_flags .Pq Vt str If .Va quota_enable is set to .Dq Li YES , and .Va check_quotas is set to .Dq Li YES , these are the flags to pass to the .Xr quotacheck 8 utility. The default is .Dq Li "-a" , which checks quotas for all file systems with quotas enabled in .Pa /etc/fstab . .It Va quotaon_flags .Pq Vt str If .Va quota_enable is set to .Dq Li YES , these are the flags to pass to the .Xr quotaon 8 utility. The default is .Dq Li "-a" , which enables quotas for all file systems with quotas enabled in .Pa /etc/fstab . .It Va quotaoff_flags .Pq Vt str If .Va quota_enable is set to .Dq Li YES , these are the flags to pass to the .Xr quotaoff 8 utility when shutting down the quota system. The default is .Dq Li "-a" , which disables quotas for all file systems with quotas enabled in .Pa /etc/fstab . .It Va accounting_enable .Pq Vt bool Set to .Dq Li YES to enable system accounting through the .Xr accton 8 facility. .It Va ibcs2_enable .Pq Vt bool Set to .Dq Li YES to enable iBCS2 (SCO) binary emulation at system initial boot time. .It Va ibcs2_loaders .Pq Vt str If not set to .Dq Li NO and if .Va ibcs2_enable is set to .Dq Li YES , this specifies a list of additional iBCS2 loaders to enable. .It Va linux_enable .Pq Vt bool Set to .Dq Li YES to enable Linux/ELF binary emulation at system initial boot time. .It Va svr4_enable .Pq Vt bool If set to .Dq Li YES , enable SysVR4 emulation at boot time. .It Va sysvipc_enable .Pq Vt bool If set to .Dq Li YES , load System V IPC primitives at boot time. .It Va clear_tmp_enable .Pq Vt bool Set to .Dq Li YES to have .Pa /tmp cleaned at startup. .It Va clear_tmp_X .Pq Vt bool Set to .Dq Li NO to disable removing of X11 lock files, and the removal and (secure) recreation of the various socket directories for X11 related programs. .It Va ldconfig_paths .Pq Vt str Set to the list of shared library paths to use with .Xr ldconfig 8 . NOTE: .Pa /usr/lib will always be added first, so it need not appear in this list. .It Va ldconfig32_paths .Pq Vt str Set to the list of 32-bit compatibility shared library paths to use with .Xr ldconfig 8 . .It Va ldconfig_paths_aout .Pq Vt str Set to the list of shared library paths to use with .Xr ldconfig 8 legacy .Xr a.out 5 support. .It Va ldconfig_insecure .Pq Vt bool The .Xr ldconfig 8 utility normally refuses to use directories which are writable by anyone except root. Set this variable to .Dq Li YES to disable that security check during system startup. .It Va ldconfig_local_dirs .Pq Vt str Set to the list of local .Xr ldconfig 8 directories. The names of all files in the directories listed will be passed as arguments to .Xr ldconfig 8 . .It Va ldconfig_local32_dirs .Pq Vt str Set to the list of local 32-bit compatibility .Xr ldconfig 8 directories. The names of all files in the directories listed will be passed as arguments to .Dq Nm ldconfig Fl 32 . .It Va kern_securelevel_enable .Pq Vt bool Set to .Dq Li YES to set the kernel security level at system startup. .It Va kern_securelevel .Pq Vt int The kernel security level to set at startup. The allowed range of .Ar value ranges from \-1 (the compile time default) to 3 (the most secure). See .Xr security 7 for the list of possible security levels and their effect on system operation. .It Va sshd_program .Pq Vt str Path to the SSH server program .Pa ( /usr/sbin/sshd is the default). .It Va sshd_enable .Pq Vt bool Set to .Dq Li YES to start .Xr sshd 8 at system boot time. .It Va sshd_flags .Pq Vt str If .Va sshd_enable is set to .Dq Li YES , these are the flags to pass to the .Xr sshd 8 daemon. .It Va ftpd_program .Pq Vt str Path to the FTP server program .Pa ( /usr/libexec/ftpd is the default). .It Va ftpd_enable .Pq Vt bool Set to .Dq Li YES to start .Xr ftpd 8 as a stand-alone daemon at system boot time. .It Va ftpd_flags .Pq Vt str If .Va ftpd_enable is set to .Dq Li YES , these are the additional flags to pass to the .Xr ftpd 8 daemon. .It Va watchdogd_enable .Pq Vt bool If set to .Dq Li YES , start the .Xr watchdogd 8 daemon at boot time. This requires that the kernel have been compiled with a .Xr watchdog 4 compatible device. .It Va watchdogd_flags .Pq Vt str If .Va watchdogd_enable is set to .Dq Li YES , these are the flags passed to the .Xr watchdogd 8 daemon. .It Va performance_cx_lowest .Pq Vt str CPU idle state to use while on AC power. The string .Dq Li LOW indicates that .Xr acpi 4 should use the lowest power state available while .Dq Li HIGH indicates that the lowest latency state (less power savings) should be used. .It Va performance_cpu_freq .Pq Vt str CPU clock frequency to use while on AC power. The string .Dq Li LOW indicates that .Xr cpufreq 4 should use the lowest frequency available while .Dq Li HIGH indicates that the highest frequency (less power savings) should be used. .It Va economy_cx_lowest .Pq Vt str CPU idle state to use when off AC power. The string .Dq Li LOW indicates that .Xr acpi 4 should use the lowest power state available while .Dq Li HIGH indicates that the lowest latency state (less power savings) should be used. .It Va economy_cpu_freq .Pq Vt str CPU clock frequency to use when off AC power. The string .Dq Li LOW indicates that .Xr cpufreq 4 should use the lowest frequency available while .Dq Li HIGH indicates that the highest frequency (less power savings) should be used. .It Va jail_enable .Pq Vt bool If set to .Dq Li NO , any configured jails will not be started. .It jail_parallel_start .Pq Vt bool If set to .Dq Li YES all configured jails will be started in the background (= in parallel). .It Va jail_list .Pq Vt str A space separated list of names for jails. This is purely a configuration aid to help identify and configure multiple jails. The names specified in this list will be used to identify settings common to an instance of a jail, and should contain alphanumeric characters only. Assuming that the jail in question was named .Li vjail , you would have the following dependent variables: .Bd -literal jail_vjail_hostname="jail.example.com" jail_vjail_ip="192.0.2.100" jail_vjail_rootdir="/var/jails/vjail/root" .Ed .Pp .It Va jail_flags .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _flags for every jail in .Va jail_list . .It Va jail_interface .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _interface for every jail in .Va jail_list . .It Va jail_fstab .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _fstab for every jail in .Va jail_list . .It Va jail_mount_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , sets .Va jail_ Ns Ao Ar jname Ac Ns Va _mount_enable to .Dq Li YES by default for every jail in .Va jail_list . .It Va jail_devfs_ruleset .Pq Vt str Unset by default. When set, sets .Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_ruleset to given value for every jail in .Va jail_list . .It Va jail_devfs_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , sets .Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_enable to .Dq Li YES by default for every jail in .Va jail_list . .It Va jail_fdescfs_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , sets .Va jail_ Ns Ao Ar jname Ac Ns Va _fdescfs_enable to .Dq Li YES by default for every jail in .Va jail_list . .It Va jail_procfs_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , sets .Va jail_ Ns Ao Ar jname Ac Ns Va _fdescfs_enable to .Dq Li YES by default for every jail in .Va jail_list . .It Va jail_exec_prestart Ns Aq Ar N .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestart Ns Aq Ar N for every jail in .Va jail_list . .It Va jail_exec_start .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_start for every jail in .Va jail_list . .It Va jail_exec_afterstart Ns Aq Ar N .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_afterstart Ns Aq Ar N for every jail in .Va jail_list . .It Va jail_exec_poststart Ns Aq Ar N .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststart Ns Aq Ar N for every jail in .Va jail_list . .It Va jail_exec_prestop Ns Aq Ar N .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestop Ns Aq Ar N for every jail in .Va jail_list . .It Va jail_exec_stop Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_stop for every jail in .Va jail_list . .It Va jail_exec_poststop Ns Aq Ar N .Pq Vt str Unset by default. When set, use as default value for .Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststop Ns Aq Ar N for every jail in .Va jail_list . .It Va jail_ Ns Ao Ar jname Ac Ns Va _rootdir .Pq Vt str Unset by default. Set to the root directory used by jail .Va jname . .It Va jail_ Ns Ao Ar jname Ac Ns Va _hostname .Pq Vt str Unset by default. Set to the fully qualified domain name (FQDN) assigned to jail .Va jname . .It Va jail_ Ns Ao Ar jname Ac Ns Va _ip .Pq Vt str Unset by default. Set to the (primary) IPv4 and/or IPv6 address(es) assigned to the jail. The argument can be a sole address or a comma separated list of addresses. Additionally each address can be prefixed by the name of an interface followed by a pipe to overwrite .Va jail_ Ns Ao Ar jname Ac Ns Va _interface or .Va jail_interface and/or suffixed by a netmask, prefixlen or prefix. In case no netmask, prefixlen or prefix is given, .Sq /32 will be used for IPv4 and .Sq /128 will be used for an IPv6 address. If no address is given for the jail then the jail will be started with no networking support. .It Va jail_ Ns Ao Ar jname Ac Ns Va _ip_multi Ns Aq Ar n .Pq Vt str Unset by default. Set additional IPv4 and/or IPv6 address(es) assigned to the jail. The sequence starts with .Dq Li _multi0 and the numbers have to be strictly ascending. These entries follow the same syntax as their primary .Va jail_ Ns Ao Ar jname Ac Ns Va _ip entry. The order of the entries can be important as the first address for each address family found will be the primary address of the jail. See .Va ip-addresses option in .Xr jail 8 for more details. .It Va jail_ Ns Ao Ar jname Ac Ns Va _flags .Pq Vt str Set to .Dq Li -l -U root by default. These are flags to pass to .Xr jail . .It Va jail_ Ns Ao Ar jname Ac Ns Va _interface .Pq Vt str Unset by default. When set, sets the interface to use when setting IP address alias. Note that the alias is created at jail startup and removed at jail shutdown. .It Va jail_ Ns Ao Ar jname Ac Ns Va _fib .Pq Vt str Unset by default. When set, the jail is started with the specified forwarding table (sometimes referred to as a routing table) via .Xr setfib 1 . .It Va jail_ Ns Ao Ar jname Ac Ns Va _fstab .Pq Vt str Set to .Pa /etc/fstab. Ns Aq Ar jname by default. This is the file system information file to use for jail .Va jname . .It Va jail_ Ns Ao Ar jname Ac Ns Va _mount_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , mount all file systems from .Va jail_ Ns Ao Ar jname Ac Ns Va _fstab at jail startup. .It Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_ruleset .Pq Vt str Unset by default. When set, defines the device file system ruleset file to use for jail .Va jname . .It Va jail_ Ns Ao Ar jname Ac Ns Va _devfs_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , mount the device file system inside jail .Ar jname at jail startup. .It Va jail_ Ns Ao Ar jname Ac Ns Va _fdescfs_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , mount the file-descriptor file system inside jail .Ar jname at jail startup. .It Va jail_ Ns Ao Ar jname Ac Ns Va _procfs_enable .Pq Vt bool Set to .Dq Li NO by default. When set to .Dq Li YES , mount the process file system inside jail .Ar jname at jail startup. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestart Ns Aq Ar N .Pq Vt str Unset by default. This is the command run as .Ar N Ns th command before jail startup, where .Ar N is 0, 1, and so on. It is run outside the jail. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_start .Pq Vt str Set to .Dq Li /bin/sh /etc/rc by default. This is the command executed in a jail at jail startup. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_afterstart Ns Aq Ar N .Pq Vt str Unset by default. This is the command run as .Ar N Ns th command in a jail after jail startup, where .Ar N is 1, 2, and so on. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststart Ns Aq Ar N .Pq Vt str Unset by default. This is the command run as .Ar N Ns th command after jail startup, where .Ar N is 0, 1, and so on. It is run outside the jail. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_prestop Ns Aq Ar N .Pq Vt str Unset by default. This is the command run as .Ar N Ns th command before jail shutdown, where .Ar N is 0, 1, and so on. It is run outside the jail. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_stop .Pq Vt str Set to .Dq Li /bin/sh /etc/rc.shutdown by default. This is the command executed in a jail at jail shutdown. .It Va jail_ Ns Ao Ar jname Ac Ns Va _exec_poststop Ns Aq Ar N .Pq Vt str Unset by default. This is the command run as .Ar N Ns th command after jail shutdown, where .Ar N is 0, 1, and so on. It is run outside the jail. .It Va jail_set_hostname_allow .Pq Vt bool If set to .Dq Li NO , do not allow the root user in a jail to set its hostname. .It Va jail_socket_unixiproute_only .Pq Vt bool If set to .Dq Li YES , do not allow any sockets, besides UNIX/IP/route sockets, to be used within a jail. .It Va jail_sysvipc_allow .Pq Vt bool If set to .Dq Li YES , allow applications within a jail to use System V IPC. .\" ----------------------------------------------------- .It Va harvest_interrupt .Pq Vt bool Set to .Dq Li YES to use hardware interrupts as an entropy source. Refer to .Xr random 4 for more information. .It Va harvest_ethernet .Pq Vt bool Set to .Dq Li YES to use LAN traffic as an entropy source. Refer to .Xr random 4 for more information. .It Va harvest_p_to_p .Pq Vt bool Set to .Dq Li YES to use serial line traffic as an entropy source. Refer to .Xr random 4 for more information. .It Va entropy_dir .Pq Vt str Set to .Dq Li NO to disable caching entropy via .Xr cron 8 . Otherwise set to the directory used to store entropy files in. .It Va entropy_file .Pq Vt str Set to .Dq Li NO to disable caching entropy through reboots. Otherwise set to the filename used to store cached entropy through reboots. This file should be located on the root file system to seed the .Xr random 4 device as early as possible in the boot process. .It Va entropy_save_sz .Pq Vt int Size of the entropy cache files saved by .Nm save-entropy periodically. .It Va entropy_save_num .Pq Vt int Number of entropy cache files to save by .Nm save-entropy periodically. .It Va ipsec_enable .Pq Vt bool Set to .Dq Li YES to run .Xr setkey 8 on .Va ipsec_file at boot time. .It Va ipsec_file .Pq Vt str Configuration file for .Xr setkey 8 . .It Va dmesg_enable .Pq Vt bool Set to .Dq Li YES to save .Xr dmesg 8 to .Pa /var/run/dmesg.boot on boot. .It Va rcshutdown_timeout .Pq Vt int If set, start a watchdog timer in the background which will terminate .Pa rc.shutdown if .Xr shutdown 8 has not completed within the specified time (in seconds). Notice that in addition to this soft timeout, .Xr init 8 also applies a hard timeout for the execution of .Pa rc.shutdown . This is configured via .Xr sysctl 8 variable .Va kern.init_shutdown_timeout and defaults to 120 seconds. Setting the value of .Va rcshutdown_timeout to more than 120 seconds will have no effect until the .Xr sysctl 8 variable .Va kern.init_shutdown_timeout is also increased. .It Va virecover_enable .Pq Vt bool Set to .Dq Li NO to prevent the system from trying to recover pre-maturely terminated .Xr vi 1 sessions. .It Va ugidfw_enable .Pq Vt bool Set to .Dq Li YES to load the .Xr mac_bsdextended 4 module upon system initialization and load a default ruleset file. .It Va bsdextended_script .Pq Vt str The default .Xr mac_bsdextended 4 ruleset file to load. The default value of this variable is .Pa /etc/rc.bsdextended . .It Va newsyslog_enable .Pq Vt bool If set to .Dq Li YES , run .Xr newsyslog 8 command at startup. .It Va newsyslog_flags .Pq Vt str If .Va newsyslog_enable is set to .Dq Li YES , these are the flags to pass to the .Xr newsyslog 8 program. The default is .Dq Li -CN , which causes log files flagged with a .Cm C to be created. .It Va mdconfig_md Ns Aq Ar X .Pq Vt str Arguments to .Xr mdconfig 8 for .Xr md 4 device .Ar X . At minimum a .Fl t Ar type must be specified and either a .Fl s Ar size for malloc or swap backed .Xr md 4 devices or a .Fl f Ar file for vnode backed .Xr md 4 devices. Note that .Va mdconfig_md Ns Aq Ar X variables are evaluated until one variable is unset or null. .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _newfs .Pq Vt str Optional arguments passed to .Xr newfs 8 to initialize .Xr md 4 device .Ar X . .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _owner .Pq Vt str An ownership specification passed to .Xr chown 8 after the specified .Xr md 4 device .Ar X has been mounted. Both the .Xr md 4 device and the mount point will be changed. .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _perms .Pq Vt str A mode string passed to .Xr chmod 1 after the specified .Xr md 4 device .Ar X has been mounted. Both the .Xr md 4 device and the mount point will be changed. .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _files .Pq Vt str Files to be copied to the mount point of the .Xr md 4 device .Ar X after it has been mounted. .It Va mdconfig_md Ns Ao Ar X Ac Ns Va _cmd .Pq Vt str Command to execute after the specified .Xr md 4 device .Ar X has been mounted. Note that the command is passed to .Ic eval and that both .Va _dev and .Va _mp variables can be used to reference respectively the .Xr md 4 device and the mount point. Assuming that the .Xr md 4 device is .Li md0 , one could set the following: .Bd -literal mdconfig_md0_cmd="tar xfzC /var/file.tgz \e${_mp}" .Ed .It Va autobridge_interfaces .Pq Vt str Set to the list of bridge interfaces that will have newly arriving interfaces checked against to be automatically added. If not set to .Dq Li NO then for each whitespace separated .Ar element in the value, a .Va autobridge_ Ns Aq Ar element variable is assumed to exist which has a whitespace separated list of interface names to match, these names can use wildcards. For example: .Bd -literal autobridge_interfaces="bridge0" autobridge_bridge0="tap* dc0 vlan[345]" .Ed .It Va mixer_enable .Pq Vt bool If set to .Dq Li YES , enable support for sound mixer. .It Va hcsecd_enable .Pq Vt bool If set to .Dq Li YES , enable Bluetooth security daemon. .It Va hcsecd_config .Pq Vt str Configuration file for .Xr hcsecd 8 . Default .Pa /etc/bluetooth/hcsecd.conf . .It Va sdpd_enable .Pq Vt bool If set to .Dq Li YES , enable Bluetooth Service Discovery Protocol daemon. .It Va sdpd_control .Pq Vt str Path to .Xr sdpd 8 control socket. Default .Pa /var/run/sdp . .It Va sdpd_groupname .Pq Vt str Sets .Xr sdpd 8 group to run as after it initializes. Default .Dq Li nobody . .It Va sdpd_username .Pq Vt str Sets .Xr sdpd 8 user to run as after it initializes. Default .Dq Li nobody . .It Va bthidd_enable .Pq Vt bool If set to .Dq Li YES , enable Bluetooth Human Interface Device daemon. .It Va bthidd_config .Pq Vt str Configuration file for .Xr bthidd 8 . Default .Pa /etc/bluetooth/bthidd.conf . .It Va bthidd_hids .Pq Vt str Path to a file, where .Xr bthidd 8 will store information about known HID devices. Default .Pa /var/db/bthidd.hids . .It Va rfcomm_pppd_server_enable .Pq Vt bool If set to .Dq Li YES , enable Bluetooth RFCOMM PPP wrapper daemon. .It Va rfcomm_pppd_server_profile .Pq Vt str The name of the profile to use from .Pa /etc/ppp/ppp.conf . Multiple profiles can be specified here. Also used to specify per-profile overrides. When the profile name contains any of the characters .Dq Li .-/+ they are translated to .Dq Li _ for the proposes of the override variable names. .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _bdaddr .Pq Vt str Overrides local address to listen on. By default .Xr rfcomm_pppd 8 will listen on .Dq Li ANY address. The address can be specified as BD_ADDR or name. .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _channel .Pq Vt str Overrides local RFCOMM channel to listen on. By default .Xr rfcomm_pppd 8 will listen on RFCOMM channel 1. Must set properly if multiple profiles used in the same time. .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _register_sp .Pq Vt bool Tells .Xr rfcomm_pppd 8 if it should register Serial Port service on the specified RFCOMM channel. Default .Dq Li NO . .It Va rfcomm_pppd_server_ Ns Ao Ar profile Ac Ns _register_dun .Pq Vt bool Tells .Xr rfcomm_pppd 8 if it should register Dial-Up Networking service on the specified RFCOMM channel. Default .Dq Li NO . .It Va ubthidhci_enable .Pq Vt bool If set to .Dq Li YES , change the USB Bluetooth controller from HID mode to HCI mode. You also need to specify the location of USB Bluetooth controller with the .Va ubthidhci_busnum and .Va ubthidhci_addr variables. .It Va ubthidhci_busnum Bus number where the USB Bluetooth controller is located. Check the output of .Xr usbconfig 8 on your system to find this information. .It Va ubthidhci_addr Bus address of the USB Bluetooth controller. Check the output of .Xr usbconfig 8 on your system to find this information. .El .Sh FILES .Bl -tag -width ".Pa /etc/defaults/rc.conf" -compact .It Pa /etc/defaults/rc.conf .It Pa /etc/rc.conf .It Pa /etc/rc.conf.local .El .Sh SEE ALSO .Xr catman 1 , .Xr chmod 1 , .Xr gdb 1 , .Xr info 1 , .Xr kbdcontrol 1 , .Xr makewhatis 1 , .Xr sh 1 , .Xr vi 1 , .Xr vidcontrol 1 , .Xr bridge 4 , .Xr dummynet 4 , .Xr ip 4 , .Xr ipf 4 , .Xr ipfw 4 , .Xr ipnat 4 , .Xr kld 4 , .Xr pf 4 , .Xr pflog 4 , .Xr pfsync 4 , .Xr tcp 4 , .Xr udp 4 , .Xr exports 5 , .Xr fstab 5 , .Xr ipf 5 , .Xr ipnat 5 , .Xr motd 5 , .Xr newsyslog.conf 5 , .Xr pf.conf 5 , .Xr security 7 , .Xr accton 8 , .Xr amd 8 , .Xr apm 8 , .Xr atm 8 , .Xr bthidd 8 , .Xr chkprintcap 8 , .Xr chown 8 , .Xr cron 8 , .Xr dhclient 8 , .Xr ftpd 8 , .Xr geli 8 , .Xr hcsecd 8 , .Xr ifconfig 8 , .Xr inetd 8 , .Xr ipf 8 , .Xr ipfw 8 , .Xr ipnat 8 , .Xr jail 8 , .Xr kldxref 8 , .Xr lpd 8 , .Xr mdconfig 8 , .Xr mdmfs 8 , .Xr mixer 8 , .Xr mountd 8 , .Xr moused 8 , .Xr mrouted 8 , .Xr named 8 , .Xr newfs 8 , .Xr newsyslog 8 , .Xr nfsd 8 , .Xr ntpd 8 , .Xr ntpdate 8 , .Xr pfctl 8 , .Xr pflogd 8 , .Xr powerd 8 , .Xr quotacheck 8 , .Xr quotaon 8 , .Xr rc 8 , .Xr rc.sendmail 8 , .Xr rfcomm_pppd 8 , .Xr route 8 , .Xr routed 8 , .Xr rpcbind 8 , .Xr rpc.lockd 8 , .Xr rpc.statd 8 , .Xr rwhod 8 , .Xr savecore 8 , .Xr sdpd 8 , .Xr sshd 8 , .Xr swapon 8 , .Xr sysctl 8 , .Xr syslogd 8 , .Xr timed 8 , .Xr usbconfig 8 , .Xr wlandebug 8 , .Xr yp 8 , .Xr ypbind 8 , .Xr ypserv 8 , .Xr ypset 8 .Sh HISTORY The .Nm file appeared in .Fx 2.2.2 . .Sh AUTHORS .An Jordan K. Hubbard . Index: projects/binutils-2.17/share/man/man9/vrele.9 =================================================================== --- projects/binutils-2.17/share/man/man9/vrele.9 (revision 215829) +++ projects/binutils-2.17/share/man/man9/vrele.9 (revision 215830) @@ -1,105 +1,101 @@ .\" -*- nroff -*- .\" .\" Copyright (c) 1996 Doug Rabson .\" Copyright (c) 2010 Konstantin Belousov .\" .\" All rights reserved. .\" .\" This program is free software. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 17, 2010 +.Dd November 20, 2010 .Dt VRELE 9 .Os .Sh NAME .Nm vput , .Nm vrele , .Nm vunref .Nd decrement the use count for a vnode .Sh SYNOPSIS .In sys/param.h .In sys/vnode.h .Ft void .Fn vput "struct vnode *vp" .Ft void .Fn vrele "struct vnode *vp" .Ft void .Fn vunref "struct vnode *vp" .Sh DESCRIPTION Decrement the .Va v_usecount field of a vnode. .Bl -tag -width 2n .It Fa vp the vnode to decrement .El .Pp The .Fn vrele function takes an unlocked vnode and returns with the vnode unlocked. .Pp The .Fn vput function should be given a locked vnode as argument, the vnode is unlocked after the function returned. The .Fn vput is operationally equivalent to calling .Xr VOP_UNLOCK 9 followed by .Xr vrele 9 , with less overhead. .Pp The .Fn vunref function takes a locked vnode as argument, and returns with the vnode locked. -Nonetheless, the -.Fn vunref -might drop the vnode lock during the operation, so caller should not expect -that non-doomed vnode is still non-doomed after the function returned. .Pp Any code in the system which signified its use of a vnode by usecount should call one of the listed function to decrement use counter. If the .Va v_usecount field of the non-doomed vnode reaches zero, then it will be inactivated and placed on the free list. Since the functions might need to call VOPs for the vnode, the .Va Giant mutex should be conditionally locked around the call. .Pp The hold count for the vnode is always greater or equal to the usecount. Non-forced unmount fails when mount point owns a vnode that has non-zero usecount, see .Xr vflush 9 . .Sh SEE ALSO .Xr vget 9 , .Xr vnode 9 , .Xr vref 9 , .Xr vrefcnt 9 .Sh AUTHORS This manual page was written by .An Doug Rabson and .An Konstantin Belousov . Index: projects/binutils-2.17/share/misc/bsd-family-tree =================================================================== --- projects/binutils-2.17/share/misc/bsd-family-tree (revision 215829) +++ projects/binutils-2.17/share/misc/bsd-family-tree (revision 215830) @@ -1,586 +1,587 @@ The UNIX system family tree: Research and BSD --------------------------------------------- First Edition (V1) | Second Edition (V2) | Third Edition (V3) | Fourth Edition (V4) | Fifth Edition (V5) | Sixth Edition (V6) -----* \ | \ | \ | Seventh Edition (V7) | \ | \ 1BSD 32V | \ 2BSD---------------* \ / | \ / | \/ | 3BSD | | | 4.0BSD 2.79BSD | | 4.1BSD --------------> 2.8BSD | | 4.1aBSD -----------\ | | \ | 4.1bBSD \ | | \ | *------ 4.1cBSD --------------> 2.9BSD / | | Eighth Edition | 2.9BSD-Seismo | | | +----<--- 4.2BSD 2.9.1BSD | | | +----<--- 4.3BSD -------------> 2.10BSD | | / | Ninth Edition | / 2.10.1BSD | 4.3BSD Tahoe-----+ | | | \ | | | \ | v | 2.11BSD Tenth Edition | | | 2.11BSD rev #430 4.3BSD NET/1 | | v 4.3BSD Reno | *---------- 4.3BSD NET/2 -------------------+-------------* | | | | 386BSD 0.0 | | BSD/386 ALPHA | | | | 386BSD 0.1 ------------>+ | BSD/386 0.3.[13] | \ | 4.4BSD Alpha | | 386BSD 1.0 | | BSD/386 0.9.[34] | | 4.4BSD | | | / | | | | 4.4BSD-Encumbered | | | NetBSD 0.8 | BSD/386 1.0 | | | | FreeBSD 1.0 NetBSD 0.9 | BSD/386 1.1 | | .----- 4.4BSD Lite | FreeBSD 1.1 | / / | \ | | | / / | \ | FreeBSD 1.1.5 .---|--------' / | \ | | / | / | \ | FreeBSD 1.1.5.1 / | / | \ | | / NetBSD 1.0 <-' | \ | | / | | \ | FreeBSD 2.0 <--' | | BSD/OS 2.0 | \ | | FreeBSD 2.0.5 \ | BSD/OS 2.0.1 | .-----\------------- 4.4BSD Lite2 | | | \ | | | | | | | .-----|------Rhapsody | | | | | | | | NetBSD 1.3 | | | | | | | OpenBSD 2.3 | | | | | | BSD/OS 3.0 | FreeBSD 2.1 | | | | | | | | NetBSD 1.1 ------. BSD/OS 2.1 | FreeBSD 2.1.5 | | | \ | | | | | NetBSD 1.2 \ BSD/OS 3.0 | FreeBSD 2.1.6 | | | \ OpenBSD 2.0 | | | | | | \ | | | FreeBSD 2.1.6.1 | | | \ | | | | | | | \ | | | FreeBSD 2.1.7 | | | | | | | | | | | NetBSD 1.2.1 | | | FreeBSD 2.1.7.1 | | | | | | | | | | | | | | | | | *-FreeBSD 2.2 | | | | | | \ | | | | | | FreeBSD 2.2.1 | | | | | | | | | | | | | FreeBSD 2.2.2 | | | OpenBSD 2.1 | | | | | | | | | FreeBSD 2.2.5 | | | | | | | | | | OpenBSD 2.2 | | | | | NetBSD 1.3 | | | FreeBSD 2.2.6 | | | | | | | | | | | NetBSD 1.3.1 | BSD/OS 3.1 | | | | | | OpenBSD 2.3 | | | | | | NetBSD 1.3.2 | | | FreeBSD 2.2.7 | | | | | | | | | | | | | BSD/OS 4.0 | v | | | | | | | FreeBSD 2.2.8 | | | | | | | | | | | OpenBSD 2.4 | FreeBSD 3.0 <--------* | | v | | | | | NetBSD 1.3.3 | | *---FreeBSD 3.1 | | | | | | | | | BSD/OS 4.0.1 | FreeBSD 3.2----* | NetBSD 1.4 OpenBSD 2.5 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | FreeBSD 3.3 | | | | NetBSD 1.4.1 | | | | | | | | | OpenBSD 2.6 | | FreeBSD 3.4 | | | | | | | | | | | | | | | BSD/OS 4.1 FreeBSD 4.0 | | | | | NetBSD 1.4.2 | | | | | | | | | | | | | | | | | | | | | FreeBSD 3.5 | | | | | OpenBSD 2.7 | | | | | | | | | | | FreeBSD 3.5.1 | | | | | | | | | | | | | | | *---FreeBSD 4.1 | | | | | | | | | | | (?) | | | | | FreeBSD 4.1.1 | | / | | | | | | | | / | | | | | FreeBSD 4.2 Darwin/ | NetBSD 1.4.3 | | | | Mac OS X | OpenBSD 2.8 BSD/OS 4.2 | | | | | | | | | | | | | | 10.0 NetBSD 1.5 | | | FreeBSD 4.3 | | | | | | | | | | OpenBSD 2.9 | | | | | NetBSD 1.5.1 | | | | | | | | | | FreeBSD 4.4-. | | NetBSD 1.5.2 | | | | | Mac OS X | | | | | | | 10.1 | | OpenBSD 3.0 | | FreeBSD 4.5 | | | | | | | | \ | | | | BSD/OS 4.3 | FreeBSD 4.6 \ | | | OpenBSD 3.1 | | | \ | | NetBSD 1.5.3 | | | FreeBSD 4.6.2 Mac OS X | | | | | 10.2 | | | | FreeBSD 4.7 | | | | | | | NetBSD 1.6 OpenBSD 3.2 | | FreeBSD 4.8 | | | | | | | | | NetBSD 1.6.1 | | | |--------. | | | OpenBSD 3.3 BSD/OS 5.0 | | \ | | | | | | FreeBSD 4.9 | | | | OpenBSD 3.4 BSD/OS 5.1 ISE | | | | | | | | | | | | NetBSD 1.6.2 | | | | | | | | | | | | | | OpenBSD 3.5 | | | | | v | | FreeBSD 4.10 | | | | | | | | | | | FreeBSD 4.11 | | | | | | | | | | `-|------|-----------------|---------------------. | | | | \ FreeBSD 5.0 | | | | | | | | | FreeBSD 5.1 | | | DragonFly 1.0 | \ | | | | | ----- Mac OS X | | | | 10.3 | | | FreeBSD 5.2 | | | | | | | | | | | FreeBSD 5.2.1 | | | | | | | | | *-------FreeBSD 5.3 | | | | | | | | OpenBSD 3.6 | | | | NetBSD 2.0 | | | | | | | | | DragonFly 1.2.0 | | Mac OS X | | NetBSD 2.0.2 | | | | 10.4 | | | | | | FreeBSD 5.4 | | | | | | | | | | | | OpenBSD 3.7 | | | | | | NetBSD 2.0.3 | | | | | | | | | | *--FreeBSD | | | | v OpenBSD 3.8 | | 6.0 | | | | | | | | | | | \ | | | | | | | NetBSD 2.1 | | | | | | | | | | | | | NetBSD 3.0 | | | | | | | | | | DragonFly 1.4.0 | | | | | | | OpenBSD 3.9 | | FreeBSD | | | | | | | | 6.1 | | | | | | | | | FreeBSD 5.5 | | | | | | | | | | | NetBSD 3.0.1 | DragonFly 1.6.0 | | | | | | | | | | | | | | OpenBSD 4.0 | | | | | | NetBSD 3.0.2 | | | | | | NetBSD 3.1 | | | FreeBSD 6.2 | | | | | | | | | DragonFly 1.8.0 | | | | OpenBSD 4.1 | | | | | | DragonFly 1.10.0 | | Mac OS X | | | | | 10.5 | | | | | | | OpenBSD 4.2 | | | | NetBSD 4.0 | | | FreeBSD 6.3 | | | | | \ | | | | *--FreeBSD | | | | DragonFly 1.12.0 | 7.0 | | | | | | | | | | OpenBSD 4.3 | | | | | | | DragonFly 2.0.0 | | FreeBSD | | OpenBSD 4.4 | | | 6.4 | | | | | | | | | | | FreeBSD 7.1 | | | | | | | | | DragonFly 2.2.0 | FreeBSD 7.2 | NetBSD 5.0 OpenBSD 4.5 | - | \ | | | | - | | | | | DragonFly 2.4.0 - | | | | OpenBSD 4.6 | - | | | | | | - *--FreeBSD | | | | | - | 8.0 | | | | | - | | FreeBSD | | | | - | | 7.3 | | | DragonFly 2.6.0 - | | | | OpenBSD 4.7 | - | FreeBSD | | | | - | 8.1 | | | | - | | | | | DragonFly 2.8.0 - | | | | OpenBSD 4.8 | - | V | | | | + | \ | | | | | + | | | | | | DragonFly 2.4.0 + | | | | | OpenBSD 4.6 | + | | | | | | | + *--FreeBSD | | | | | | + | 8.0 | | | | | | + | | FreeBSD | | | | | + | | 7.3 | | | | DragonFly 2.6.0 + | | | | | OpenBSD 4.7 | + | FreeBSD | | | | | + | 8.1 | | | | | + | | | | | | DragonFly 2.8.0 + | | | | | OpenBSD 4.8 | + | V | | NetBSD 5.1 | | | | | | | FreeBSD 9 -current | NetBSD -current OpenBSD -current | | | | | | v v v v v Time ---------------- Time tolerance +/- 6 months, depending on which book/article you read; if it was the announcement in Usenet or if it was available as tape. [44B] McKusick, Marshall Kirk, Keith Bostic, Michael J Karels, and John Quarterman. The Design and Implementation of the 4.4BSD Operating System. [APL] Apple website [http://www.apple.com/macosx/] [BSDI] Berkeley Software Design, Inc. [DFB] DragonFlyBSD Project, The. [DOC] README, COPYRIGHT on tape. [FBD] FreeBSD Project, The. [KB] Keith Bostic. BSD2.10 available from Usenix. comp.unix.sources, Volume 11, Info 4, April, 1987. [KKK] Mike Karels, Kirk McKusick, and Keith Bostic. tahoe announcement. comp.bugs.4bsd.ucb-fixes, June 15, 1988. [KSJ] Michael J. Karels, Carl F. Smith, and William F. Jolitz. Changes in the Kernel in 2.9BSD. Second Berkeley Software Distribution UNIX Version 2.9, July, 1983. [NBD] NetBSD Project, The. [OBD] OpenBSD Project, The. [QCU] Salus, Peter H. A quarter century of UNIX. [SMS] Steven M. Schultz. 2.11BSD, UNIX for the PDP-11. [TUHS] The Unix Historical Society. http://minnie.tuhs.org/Unix_History/. [USE] Usenet announcement. [WRS] Wind River Systems, Inc. [dmr] Dennis Ritchie, via E-Mail Multics 1965 UNIX Summer 1969 DEC PDP-7 First Edition 1971-11-03 [QCU] DEC PDP-11/20, Assembler Second Edition 1972-06-12 [QCU] 10 UNIX installations Third Edition 1973-02-xx [QCU] Pipes, 16 installations Fourth Edition 1973-11-xx [QCU] rewriting in C effected, above 30 installations Fifth Edition 1974-06-xx [QCU] above 50 installations Sixth Edition 1975-05-xx [QCU] port to DEC Vax Seventh Edition 1979-01-xx [QCU] first portable UNIX Eighth Edition 1985-02-xx [QCU] VAX 11/750, VAX 11/780 [dmr] descended from 4.1c BSD [dmr] descended from 4.1 BSD [44B] scooping-out and replacement of the character-device and networking part by the streams mechanism Ninth Edition 1986-09-xx [QCU] Tenth Edition 1989-10-xx [QCU] 1BSD late 1977 1978-03-09 [QCU] PDP-11, Pascal, ex(1) 30 free copies of 1BSD sent out 35 tapes sold for 50 USD [QCU] 2BSD mid 1978 [QCU] 1979-05-10 [TUHS] 75 2BSD tapes shipped 2.79BSD 1980-04-xx [TUHS] 2.8BSD 1981-07-xx [KSJ] 2.8.1BSD 1982-01-xx [QCU] set of performance improvements 2.9BSD 1983-07-xx [KSJ] 2.9.1BSD 1983-11-xx [TUHS] 2.9BSD-Seismo 1985-08-xx [SMS] 2.10BSD 1987-04-xx [KKK] 2.10.1BSD 1989-01-xx [SMS] 2.11BSD 1992-02-xx [SMS] 2.11BSD rev #430 1999-12-13 [SMS] 32V 1978-1[01]-xx [QCU] 3BSD late 1979 [QCU] March 1980 [TUHS] virtual memory, page replacement, demand paging 4.0BSD 1980-10-xx 4.1BSD 1981-07-08 [DOC] 4.1aBSD 1982-04-xx alpha release, 100 sites, networking [44B] 4.1bBSD internal release, fast filesystem [44B] 4.1cBSD late 1982 beta release, IPC [44B] 4.2BSD 1983-09-xx [QCU] 1983-08-03 [DOC] 4.3BSD 1986-06-xx [QCU] 1986-04-05 [KB], [DOC] 4.3BSD Tahoe 1988-06-15 [QCU], [DOC] 4.3BSD NET/1 1988-11-xx [QCU] 1989-01-01 [DOC] 4.3BSD Reno 1990-06-29 [QCU], [DOC] 4.3BSD NET/2 1991-06-28 [QCU], [DOC] BSD/386 ALPHA 1991-12-xx [BSDI] first code released to people outside BSDI 386BSD 0.0 1992-02-xx [DOC] BSD/386 0.3.1 1992-04-xx [BSDI] first ext. beta; B customers BSD/386 0.3.3 1992-06-xx [BSDI] first CDROM version 386BSD 0.1 1992-07-28 [DOC] 4.4BSD Alpha 1992-07-07 BSD/386 0.9.3 1992-10-xx [BSDI] first external gamma; G customers BSD/386 0.9.4 1992-12-xx [BSDI] would have been 1.0 except for request for preliminary injunction BSD/386 1.0 1993-03-xx [BSDI] injunction denied; first official release NetBSD 0.8 1993-04-20 [NBD] 4.4BSD 1993-06-01 [USE] NetBSD 0.9 1993-08-23 [NBD] FreeBSD 1.0 1993-11-01 [FBD] FreeBSD 1.0.2 1993-11-14 [FBD] supersedes 1.0 13 days after release. BSD/386 1.1 1994-02-xx [BSDI] 4.4BSD Lite 1994-03-01 [USE] FreeBSD 1.1 1994-05-07 [FBD] FreeBSD 1.1.5 1994-06-30 [FBD] FreeBSD 1.1.5.1 1994-07-05 [FBD] supersedes 1.1.5 5 days after release. NetBSD 1.0 1994-10-26 [NBD] 386BSD 1.0 1994-11-12 [USE] FreeBSD 2.0 1994-11-23 [FBD] BSD/OS 2.0 1995-01-xx [BSDI] 4.4 lite based FreeBSD 2.0.5 1995-06-10 [FBD] BSD/OS 2.0.1 1995-06-xx [BSDI] 4.4BSD Lite Release 2 1995-06-xx [44B] the true final distribution from the CSRG FreeBSD 2.1.0 1995-11-19 [FBD] NetBSD 1.1 1995-11-26 [NBD] BSD/OS 2.1 1996-01-xx [BSDI] FreeBSD 2.1.5 1996-07-14 [FBD] NetBSD 1.2 1996-10-04 [NBD] OpenBSD 2.0 1996-10-18 [OBD] FreeBSD 2.1.6 1996-11-16 [FBD] FreeBSD 2.1.6.1 1996-11-25 [FBD] (sendmail security release) Rhapsody 1997-xx-xx FreeBSD 2.1.7 1997-02-20 [FBD] BSD/OS 3.0 1997-02-xx [BSDI] 4.4 lite2 based FreeBSD 2.2.0 1997-03-16 [FBD] FreeBSD 2.2.1 1997-03-25 [FBD] FreeBSD 2.2.2 1997-05-16 [FBD] NetBSD 1.2.1 1997-05-20 [NBD] (patch release) OpenBSD 2.1 1997-06-01 [OBD] FreeBSD 2.2.5 1997-10-22 [FBD] OpenBSD 2.2 1997-12-01 [OBD] NetBSD 1.3 1998-01-04 [NBD] FreeBSD 2.2.6 1998-03-25 [FBD] NetBSD 1.3.1 1998-03-09 [NBD] (patch release) BSD/OS 3.1 1998-03-xx [BSDI] OpenBSD 2.3 1998-05-19 [OBD] NetBSD 1.3.2 1998-05-29 [NBD] (patch release) FreeBSD 2.2.7 1998-07-22 [FBD] BSD/OS 4.0 1998-08-xx [BSDI] 2-lock MP support, ELF executables FreeBSD 3.0 1998-10-16 [FBD] FreeBSD-3.0 is a snapshot from -current, while 3.1 and 3.2 are from 3.x-stable which was branched quite some time after 3.0-release FreeBSD 2.2.8 1998-11-29 [FBD] OpenBSD 2.4 1998-12-01 [OBD] NetBSD 1.3.3 1998-12-23 [NBD] (patch release) FreeBSD 3.1 1999-02-15 [FBD] BSD/OS 4.0.1 1999-03-xx [BSDI] NetBSD 1.4 1999-05-12 [NBD] FreeBSD 3.2 1999-05-17 [FBD] OpenBSD 2.5 1999-05-19 [OBD] NetBSD 1.4.1 1999-08-26 [NBD] (patch release) FreeBSD 3.3 1999-09-17 [FBD] OpenBSD 2.6 1999-12-01 [OBD] FreeBSD 3.4 1999-12-20 [FBD] BSD/OS 4.1 1999-12-xx [BSDI] FreeBSD 4.0 2000-03-13 [FBD] NetBSD 1.4.2 2000-03-19 [NBD] (patch release) OpenBSD 2.7 2000-06-15 [OBD] FreeBSD 3.5 2000-06-24 [FBD] FreeBSD 4.1 2000-07-27 [FBD] FreeBSD 3.5.1 2000-07-28 [FBD] FreeBSD 4.1.1 2000-09-25 [FBD] (a network-only patch release) FreeBSD 4.2 2000-11-21 [FBD] NetBSD 1.4.3 2000-11-25 [NBD] (patch release) BSD/OS 4.2 2000-11-29 [BSDI] OpenBSD 2.8 2000-12-01 [OBD] NetBSD 1.5 2000-12-06 [NBD] Mac OS X 10.0 2001-03-24 [APL] FreeBSD 4.3 2001-04-20 [FBD] OpenBSD 2.9 2001-06-01 [OBD] NetBSD 1.5.1 2001-07-11 [NBD] (patch release) NetBSD 1.5.2 2001-09-13 [NBD] (patch release) FreeBSD 4.4 2001-09-18 [FBD] Mac OS X 10.1 2001-09-29 [APL] OpenBSD 3.0 2001-12-01 [OBD] FreeBSD 4.5 2002-01-29 [FBD] BSD/OS 4.3 2002-03-14 [WRS] OpenBSD 3.1 2002-05-19 [OBD] FreeBSD 4.6 2002-06-15 [FBD] NetBSD 1.5.3 2002-07-22 [NBD] (patch release) FreeBSD 4.6.2 2002-08-15 [FBD] (patch release) Mac OS X 10.2 2002-08-23 [APL] NetBSD 1.6 2002-09-14 [NBD] FreeBSD 4.7 2002-10-08 [FBD] OpenBSD 3.2 2002-11-01 [OBD] FreeBSD 5.0 2003-01-17 [FBD] FreeBSD 5.0 is a separate branch off of -current, similar to 3.0. FreeBSD 4.8 2003-04-03 [FBD] NetBSD 1.6.1 2003-04-21 [NBD] (patch release) OpenBSD 3.3 2003-05-01 [OBD] BSD/OS 5.0 2003-05-?? [WRS] FreeBSD 5.1 2003-06-09 [FBD] Mac OS X 10.3 2003-10-24 [APL] FreeBSD 4.9 2003-10-28 [FBD] BSD/OS 5.1 ISE 2003-10-?? [WRS] (final version) OpenBSD 3.4 2003-11-01 [OBD] FreeBSD 5.2 2004-01-12 [FBD] FreeBSD 5.2.1 2004-02-22 [FBD] (patch release) NetBSD 1.6.2 2004-03-01 [NBD] (patch release) OpenBSD 3.5 2004-04-01 [OBD] FreeBSD 4.10 2004-05-27 [FBD] DragonFly 1.0 2004-07-12 [DFB] OpenBSD 3.6 2004-10-29 [OBD] FreeBSD 5.3 2004-11-06 [FBD] NetBSD 2.0 2004-12-09 [NBD] FreeBSD 4.11 2005-01-25 [FBD] DragonFly 1.2.0 2005-04-08 [DFB] NetBSD 2.0.2 2005-04-14 [NBD] (security/critical release) Mac OS X 10.4 2005-04-29 [APL] FreeBSD 5.4 2005-05-09 [FBD] OpenBSD 3.7 2005-05-19 [OBD] NetBSD 2.0.3 2005-10-31 [NBD] (security/critical release) OpenBSD 3.8 2005-11-01 [OBD] FreeBSD 6.0 2005-11-01 [FBD] NetBSD 2.1 2005-11-02 [NBD] NetBSD 3.0 2005-12-23 [NBD] DragonFly 1.4.0 2006-01-08 [DFB] OpenBSD 3.9 2006-05-01 [OBD] FreeBSD 6.1 2006-05-08 [FBD] FreeBSD 5.5 2006-05-25 [FBD] NetBSD 3.0.1 2006-07-24 [NBD] (security/critical release) DragonFly 1.6.0 2006-07-24 [DFB] OpenBSD 4.0 2006-11-01 [OBD] NetBSD 3.0.2 2006-11-04 [NBD] (security/critical release) NetBSD 3.1 2006-11-04 [NBD] FreeBSD 6.2 2007-01-15 [FBD] DragonFly 1.8.0 2007-01-30 [DFB] OpenBSD 4.1 2007-05-01 [OBD] DragonFly 1.10.0 2007-08-06 [DFB] Mac OS X 10.5 2007-10-26 [APL] OpenBSD 4.2 2007-11-01 [OBD] NetBSD 4.0 2007-12-19 [NBD] FreeBSD 6.3 2008-01-18 [FBD] DragonFly 1.12.0 2008-02-26 [DFB] FreeBSD 7.0 2008-02-27 [FBD] OpenBSD 4.3 2008-05-01 [OBD] DragonFly 2.0.0 2008-07-21 [DFB] OpenBSD 4.4 2008-11-01 [OBD] FreeBSD 6.4 2008-11-28 [FBD] FreeBSD 7.1 2009-01-04 [FBD] DragonFly 2.2.0 2009-02-17 [DFB] NetBSD 5.0 2009-04-29 [NBD] OpenBSD 4.5 2009-05-01 [OBD] FreeBSD 7.2 2009-05-04 [FBD] DragonFly 2.4.0 2009-09-16 [DFB] OpenBSD 4.6 2009-10-18 [OBD] FreeBSD 8.0 2009-11-26 [FBD] FreeBSD 7.3 2010-03-23 [FBD] DragonFly 2.6.0 2010-03-28 [DFB] OpenBSD 4.7 2010-05-19 [OBD] FreeBSD 8.1 2010-07-24 [FBD] DragonFly 2.8.0 2010-10-30 [DFB] OpenBSD 4.8 2010-11-01 [OBD] +NetBSD 5.1 2010-11-19 [NBD] Bibliography ------------------------ Leffler, Samuel J., Marshall Kirk McKusick, Michael J Karels and John Quarterman. The Design and Implementation of the 4.3BSD UNIX Operating System. Reading, Mass. Addison-Wesley, 1989. ISBN 0-201-06196-1 Salus, Peter H. A quarter century of UNIX. Addison-Wesley Publishing Company, Inc., 1994. ISBN 0-201-54777-5 McKusick, Marshall Kirk, Keith Bostic, Michael J Karels, and John Quarterman. The Design and Implementation of the 4.4BSD Operating System. Reading, Mass. Addison-Wesley, 1996. ISBN 0-201-54979-4 McKusick, Marshall Kirk, George Neville-Neil. The Design and Implementation of the FreeBSD Operating System. Addison-Wesley Professional, Published: Aug 2, 2004. ISBN 0-201-70245-2 Doug McIlroy. Research Unix Reader. Michael G. Brown. The Role of BSD in the Development of Unix. Presented to the Tasmanian Unix Special Interest Group of the Australian Computer Society, Hobart, August 1993. Peter H. Salus. Unix at 25. Byte Magazine, October 1994. URL: http://www.byte.com/art/9410/sec8/art3.htm Andreas Klemm, Lars Köller. If you're going to San Francisco ... Die freien BSD-Varianten von Unix. c't April 1997, page 368ff. BSD Release Announcements collection. URL: http://www.FreeBSD.org/releases/ BSD Hypertext Man Pages URL: http://www.FreeBSD.org/cgi/man.cgi UNIX history graphing project URL: http://minnie.tuhs.org/Unix_History/index.html UNIX history URL: http://www.levenez.com/unix/ James Howard: The BSD Family Tree URL: http://ezine.daemonnews.org/200104/bsd_family.html ("what are the differences between FreeBSD, NetBSD, and OpenBSD?") Acknowledgments --------------- Josh Gilliam for suggestions, bug fixes, and finding very old original BSD announcements from Usenet or tapes. Steven M. Schultz for providing 2.8BSD, 2.10BSD, 2.11BSD manual pages. -- Copyright (c) 1997-2007 Wolfram Schneider URL: http://cvsweb.freebsd.org/src/share/misc/bsd-family-tree $FreeBSD$ Index: projects/binutils-2.17/share/mk/bsd.arch.inc.mk =================================================================== --- projects/binutils-2.17/share/mk/bsd.arch.inc.mk (revision 215829) +++ projects/binutils-2.17/share/mk/bsd.arch.inc.mk (revision 215830) Property changes on: projects/binutils-2.17/share/mk/bsd.arch.inc.mk ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share/mk/bsd.arch.inc.mk:r215709-215824 Index: projects/binutils-2.17/share/zoneinfo =================================================================== --- projects/binutils-2.17/share/zoneinfo (revision 215829) +++ projects/binutils-2.17/share/zoneinfo (revision 215830) Property changes on: projects/binutils-2.17/share/zoneinfo ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share/zoneinfo:r215709-215824 Index: projects/binutils-2.17/sys/amd64/acpica/acpi_switch.S =================================================================== --- projects/binutils-2.17/sys/amd64/acpica/acpi_switch.S (revision 215829) +++ projects/binutils-2.17/sys/amd64/acpica/acpi_switch.S (revision 215830) @@ -1,169 +1,163 @@ /*- * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001 Mitsuru IWASAKI * Copyright (c) 2008-2010 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include "acpi_wakedata.h" #include "assym.s" #define WAKEUP_CTX(member) wakeup_ ## member - wakeup_ctx(%rsi) ENTRY(acpi_restorecpu) /* Switch to KPML4phys. */ movq %rdi, %cr3 /* Restore GDT. */ lgdt WAKEUP_CTX(gdt) jmp 1f 1: /* Fetch PCB. */ movq WAKEUP_CTX(pcb), %rdi /* Force kernel segment registers. */ movl $KDSEL, %eax movw %ax, %ds movw %ax, %es movw %ax, %ss movl $KUF32SEL, %eax movw %ax, %fs movl $KUG32SEL, %eax movw %ax, %gs movl $MSR_FSBASE, %ecx movl PCB_FSBASE(%rdi), %eax movl 4 + PCB_FSBASE(%rdi), %edx wrmsr movl $MSR_GSBASE, %ecx movl PCB_GSBASE(%rdi), %eax movl 4 + PCB_GSBASE(%rdi), %edx wrmsr movl $MSR_KGSBASE, %ecx movl PCB_KGSBASE(%rdi), %eax movl 4 + PCB_KGSBASE(%rdi), %edx wrmsr /* Restore EFER. */ movl $MSR_EFER, %ecx movl WAKEUP_CTX(efer), %eax wrmsr - /* Restore PAT. */ - movl $MSR_PAT, %ecx - movl WAKEUP_CTX(pat), %eax - movl 4 + WAKEUP_CTX(pat), %edx - wrmsr - /* Restore fast syscall stuff. */ movl $MSR_STAR, %ecx movl WAKEUP_CTX(star), %eax movl 4 + WAKEUP_CTX(star), %edx wrmsr movl $MSR_LSTAR, %ecx movl WAKEUP_CTX(lstar), %eax movl 4 + WAKEUP_CTX(lstar), %edx wrmsr movl $MSR_CSTAR, %ecx movl WAKEUP_CTX(cstar), %eax movl 4 + WAKEUP_CTX(cstar), %edx wrmsr movl $MSR_SF_MASK, %ecx movl WAKEUP_CTX(sfmask), %eax wrmsr /* Restore CR0 except for FPU mode. */ movq PCB_CR0(%rdi), %rax movq %rax, %rcx andq $~(CR0_EM | CR0_TS), %rax movq %rax, %cr0 /* Restore CR2 and CR4. */ movq PCB_CR2(%rdi), %rax movq %rax, %cr2 movq PCB_CR4(%rdi), %rax movq %rax, %cr4 /* Restore descriptor tables. */ lidt PCB_IDT(%rdi) lldt PCB_LDT(%rdi) #define SDT_SYSTSS 9 #define SDT_SYSBSY 11 /* Clear "task busy" bit and reload TR. */ movq PCPU(TSS), %rax andb $(~SDT_SYSBSY | SDT_SYSTSS), 5(%rax) movw PCB_TR(%rdi), %ax ltr %ax #undef SDT_SYSTSS #undef SDT_SYSBSY /* Restore other callee saved registers. */ movq PCB_R15(%rdi), %r15 movq PCB_R14(%rdi), %r14 movq PCB_R13(%rdi), %r13 movq PCB_R12(%rdi), %r12 movq PCB_RBP(%rdi), %rbp movq PCB_RSP(%rdi), %rsp movq PCB_RBX(%rdi), %rbx /* Restore debug registers. */ movq PCB_DR0(%rdi), %rax movq %rax, %dr0 movq PCB_DR1(%rdi), %rax movq %rax, %dr1 movq PCB_DR2(%rdi), %rax movq %rax, %dr2 movq PCB_DR3(%rdi), %rax movq %rax, %dr3 movq PCB_DR6(%rdi), %rax movq %rax, %dr6 movq PCB_DR7(%rdi), %rax movq %rax, %dr7 /* Restore FPU state. */ fninit fxrstor PCB_USERFPU(%rdi) /* Reload CR0. */ movq %rcx, %cr0 /* Restore return address. */ movq PCB_RIP(%rdi), %rax movq %rax, (%rsp) /* Indicate the CPU is resumed. */ xorl %eax, %eax movl %eax, WAKEUP_CTX(cpu) ret END(acpi_restorecpu) Index: projects/binutils-2.17/sys/amd64/acpica/acpi_wakecode.S =================================================================== --- projects/binutils-2.17/sys/amd64/acpica/acpi_wakecode.S (revision 215829) +++ projects/binutils-2.17/sys/amd64/acpica/acpi_wakecode.S (revision 215830) @@ -1,289 +1,287 @@ /*- * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001 Mitsuru IWASAKI * Copyright (c) 2003 Peter Wemm * Copyright (c) 2008-2010 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include "assym.s" /* * Resume entry point for real mode. * * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero * in FACS, the BIOS enters here in real mode after POST with CS set to * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf). * Depending on the previous sleep state, we may need to initialize more * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk). * * Note: If XFirmwareWakingVector is non-zero, it should disable address * translation/paging and interrupts, load all segment registers with * a flat 4 GB address space, and set EFLAGS.IF to zero. Currently * this mode is not supported by this code. */ .data /* So we can modify it */ ALIGN_TEXT .code16 wakeup_start: /* * Set up segment registers for real mode, a small stack for * any calls we make, and clear any flags. */ cli /* make sure no interrupts */ mov %cs, %ax /* copy %cs to %ds. Remember these */ mov %ax, %ds /* are offsets rather than selectors */ mov %ax, %ss movw $PAGE_SIZE, %sp xorw %ax, %ax pushw %ax popfw /* To debug resume hangs, beep the speaker if the user requested. */ testb $~0, resume_beep - wakeup_start jz 1f movb $0, resume_beep - wakeup_start movb $0xc0, %al outb %al, $0x42 movb $0x04, %al outb %al, $0x42 inb $0x61, %al orb $0x3, %al outb %al, $0x61 1: /* Re-initialize video BIOS if the reset_video tunable is set. */ testb $~0, reset_video - wakeup_start jz 1f movb $0, reset_video - wakeup_start lcall $0xc000, $3 /* When we reach here, int 0x10 should be ready. Hide cursor. */ movb $0x01, %ah movb $0x20, %ch int $0x10 /* Re-start in case the previous BIOS call clobbers them. */ jmp wakeup_start 1: /* * Find relocation base and patch the gdt descript and ljmp targets */ xorl %ebx, %ebx mov %cs, %bx sall $4, %ebx /* %ebx is now our relocation base */ /* * Load the descriptor table pointer. We'll need it when running * in 16-bit protected mode. */ lgdtl bootgdtdesc - wakeup_start /* Enable protected mode */ movl $CR0_PE, %eax mov %eax, %cr0 /* * Now execute a far jump to turn on protected mode. This * causes the segment registers to turn into selectors and causes * %cs to be loaded from the gdt. * * The following instruction is: * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start * but gas cannot assemble that. And besides, we patch the targets * in early startup and its a little clearer what we are patching. */ wakeup_sw32: .byte 0x66 /* size override to 32 bits */ .byte 0xea /* opcode for far jump */ .long wakeup_32 - wakeup_start /* offset in segment */ .word bootcode32 - bootgdt /* index in gdt for 32 bit code */ /* * At this point, we are running in 32 bit legacy protected mode. */ ALIGN_TEXT .code32 wakeup_32: mov $bootdata32 - bootgdt, %eax mov %ax, %ds /* Turn on the PAE and PSE bits for when paging is enabled */ mov %cr4, %eax orl $(CR4_PAE | CR4_PSE), %eax mov %eax, %cr4 /* * Enable EFER.LME so that we get long mode when all the prereqs are * in place. In this case, it turns on when CR0_PG is finally enabled. * Pick up a few other EFER bits that we'll use need we're here. */ movl $MSR_EFER, %ecx rdmsr orl $EFER_LME | EFER_SCE, %eax wrmsr /* * Point to the embedded page tables for startup. Note that this * only gets accessed after we're actually in 64 bit mode, however * we can only set the bottom 32 bits of %cr3 in this state. This * means we are required to use a temporary page table that is below * the 4GB limit. %ebx is still our relocation base. We could just * subtract 3 * PAGE_SIZE, but that would be too easy. */ leal wakeup_pagetables - wakeup_start(%ebx), %eax movl (%eax), %eax mov %eax, %cr3 /* * Finally, switch to long bit mode by enabling paging. We have * to be very careful here because all the segmentation disappears * out from underneath us. The spec says we can depend on the * subsequent pipelined branch to execute, but *only if* everthing * is still identity mapped. If any mappings change, the pipeline * will flush. */ mov %cr0, %eax orl $CR0_PG, %eax mov %eax, %cr0 /* * At this point paging is enabled, and we are in "compatability" mode. * We do another far jump to reload %cs with the 64 bit selector. * %cr3 points to a 4-level page table page. * We cannot yet jump all the way to the kernel because we can only * specify a 32 bit linear address. So, yet another trampoline. * * The following instruction is: * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start * but gas cannot assemble that. And besides, we patch the targets * in early startup and its a little clearer what we are patching. */ wakeup_sw64: .byte 0xea /* opcode for far jump */ .long wakeup_64 - wakeup_start /* offset in segment */ .word bootcode64 - bootgdt /* index in gdt for 64 bit code */ /* * Yeehar! We're running in 64-bit mode! We can mostly ignore our * segment registers, and get on with it. * Note that we are running at the correct virtual address, but with * a 1:1 1GB mirrored mapping over entire address space. We had better * switch to a real %cr3 promptly so that we can get to the direct map * space. Remember that jmp is relative and that we've been relocated, * so use an indirect jump. */ ALIGN_TEXT .code64 wakeup_64: mov $bootdata64 - bootgdt, %eax mov %ax, %ds /* Restore arguments and return. */ movq wakeup_kpml4 - wakeup_start(%rbx), %rdi movq wakeup_ctx - wakeup_start(%rbx), %rsi movq wakeup_retaddr - wakeup_start(%rbx), %rax jmp *%rax .data resume_beep: .byte 0 reset_video: .byte 0 ALIGN_DATA bootgdt: .long 0x00000000 .long 0x00000000 .long 0x00000000 .long 0x00000000 .long 0x00000000 .long 0x00000000 .long 0x00000000 .long 0x00000000 bootcode64: .long 0x0000ffff .long 0x00af9b00 bootdata64: .long 0x0000ffff .long 0x00af9300 bootcode32: .long 0x0000ffff .long 0x00cf9b00 bootdata32: .long 0x0000ffff .long 0x00cf9300 bootgdtend: wakeup_pagetables: .long 0 bootgdtdesc: .word bootgdtend - bootgdt /* Length */ .long bootgdt - wakeup_start /* Offset plus %ds << 4 */ ALIGN_DATA wakeup_retaddr: .quad 0 wakeup_kpml4: .quad 0 wakeup_ctx: .quad 0 wakeup_pcb: .quad 0 wakeup_gdt: .word 0 .quad 0 ALIGN_DATA wakeup_efer: .quad 0 -wakeup_pat: - .quad 0 wakeup_star: .quad 0 wakeup_lstar: .quad 0 wakeup_cstar: .quad 0 wakeup_sfmask: .quad 0 wakeup_cpu: .long 0 dummy: Index: projects/binutils-2.17/sys/amd64/acpica/acpi_wakeup.c =================================================================== --- projects/binutils-2.17/sys/amd64/acpica/acpi_wakeup.c (revision 215829) +++ projects/binutils-2.17/sys/amd64/acpica/acpi_wakeup.c (revision 215830) @@ -1,410 +1,410 @@ /*- * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001 Mitsuru IWASAKI * Copyright (c) 2003 Peter Wemm * Copyright (c) 2008-2010 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #include #include #endif #include #include #include "acpi_wakecode.h" #include "acpi_wakedata.h" /* Make sure the code is less than a page and leave room for the stack. */ CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); extern int acpi_resume_beep; extern int acpi_reset_video; #ifdef SMP extern struct pcb **susppcbs; #else static struct pcb **susppcbs; #endif int acpi_restorecpu(vm_offset_t, struct pcb *); static void *acpi_alloc_wakeup_handler(void); static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); #endif #define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE)) #define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (3 * PAGE_SIZE)) #define WAKECODE_FIXUP(offset, type, val) do { \ type *addr; \ addr = (type *)(WAKECODE_VADDR(sc) + offset); \ *addr = val; \ } while (0) /* Turn off bits 1&2 of the PIT, stopping the beep. */ static void acpi_stop_beep(void *arg) { outb(0x61, inb(0x61) & ~0x3); } #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *sc, int cpu) { int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff; int apic_id = cpu_apic_ids[cpu]; int ms; WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[cpu]); WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[cpu]->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, susppcbs[cpu]->pcb_gdt.rd_base); WAKECODE_FIXUP(wakeup_cpu, int, cpu); /* do an INIT IPI: assert RESET */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); /* wait for pending status end */ lapic_ipi_wait(-1); /* do an INIT IPI: deassert RESET */ lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); /* wait for pending status end */ DELAY(10000); /* wait ~10mS */ lapic_ipi_wait(-1); /* * next we do a STARTUP IPI: the previous INIT IPI might still be * latched, (P5 bug) this 1st STARTUP would then terminate * immediately, and the previously started INIT IPI would continue. OR * the previous INIT IPI has already run. and this STARTUP IPI will * run. OR the previous INIT IPI was ignored. and this STARTUP IPI * will run. */ /* do a STARTUP IPI */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ /* * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0) return (1); /* return SUCCESS */ DELAY(1000); } return (0); /* return FAILURE */ } #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) static void acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) { uint32_t mpbioswarmvec; int cpu; u_char mpbiosreason; /* save the current value of the warm-start vector */ mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { if ((wakeup_cpus & (1 << cpu)) == 0) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", cpu, cpu_apic_ids[cpu]); } } /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); } #endif int acpi_sleep_machdep(struct acpi_softc *sc, int state) { #ifdef SMP cpumask_t wakeup_cpus; #endif register_t cr3, rf; ACPI_STATUS status; int ret; ret = -1; if (sc->acpi_wakeaddr == 0ul) return (ret); #ifdef SMP wakeup_cpus = PCPU_GET(other_cpus); #endif AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc)); rf = intr_disable(); intr_suspend(); /* * Temporarily switch to the kernel pmap because it provides * an identity mapping (setup at boot) for the low physical * memory region containing the wakeup code. */ cr3 = rcr3(); load_cr3(KPML4phys); if (savectx(susppcbs[0])) { #ifdef SMP if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { device_printf(sc->acpi_dev, "Failed to suspend APs: CPU mask = 0x%jx\n", (uintmax_t)(wakeup_cpus & ~stopped_cpus)); goto out; } #endif WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0)); WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0)); WAKECODE_FIXUP(wakeup_pcb, struct pcb *, susppcbs[0]); WAKECODE_FIXUP(wakeup_gdt, uint16_t, susppcbs[0]->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, susppcbs[0]->pcb_gdt.rd_base); WAKECODE_FIXUP(wakeup_cpu, int, 0); /* Call ACPICA to enter the desired sleep state */ if (state == ACPI_STATE_S4 && sc->acpi_s4bios) status = AcpiEnterSleepStateS4bios(); else status = AcpiEnterSleepState(state); if (status != AE_OK) { device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n", AcpiFormatException(status)); goto out; } for (;;) ia32_pause(); } else { + pmap_init_pat(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef SMP if (wakeup_cpus != 0) acpi_wakeup_cpus(sc, wakeup_cpus); #endif acpi_resync_clock(sc); ret = 0; } out: #ifdef SMP if (wakeup_cpus != 0) restart_cpus(wakeup_cpus); #endif load_cr3(cr3); mca_resume(); intr_resume(); intr_restore(rf); AcpiSetFirmwareWakingVector(0); if (ret == 0 && mem_range_softc.mr_op != NULL && mem_range_softc.mr_op->reinit != NULL) mem_range_softc.mr_op->reinit(&mem_range_softc); /* If we beeped, turn it off after a delay. */ if (acpi_resume_beep) timeout(acpi_stop_beep, NULL, 3 * hz); return (ret); } static void * acpi_alloc_wakeup_handler(void) { void *wakeaddr; int i; /* * Specify the region for our wakeup code. We want it in the low 1 MB * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT), * and ROM area (0xa0000 and above). The temporary page tables must be * page-aligned. */ wakeaddr = contigmalloc(4 * PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0x500, 0xa0000, PAGE_SIZE, 0ul); if (wakeaddr == NULL) { printf("%s: can't alloc wake memory\n", __func__); return (NULL); } susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); for (i = 0; i < mp_ncpus; i++) susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); return (wakeaddr); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { static void *wakeaddr = NULL; uint64_t *pt4, *pt3, *pt2; int i; if (wakeaddr != NULL) return; wakeaddr = acpi_alloc_wakeup_handler(); if (wakeaddr == NULL) return; sc->acpi_wakeaddr = (vm_offset_t)wakeaddr; sc->acpi_wakephys = vtophys(wakeaddr); bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode)); /* Patch GDT base address, ljmp targets and page table base address. */ WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, WAKECODE_PADDR(sc) + bootgdt); WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, WAKECODE_PADDR(sc) + wakeup_32); WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t, WAKECODE_PADDR(sc) + wakeup_64); WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys); /* Save pointers to some global data. */ WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu); WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys); WAKECODE_FIXUP(wakeup_ctx, vm_offset_t, WAKECODE_VADDR(sc) + wakeup_ctx); WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER)); - WAKECODE_FIXUP(wakeup_pat, uint64_t, rdmsr(MSR_PAT)); WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR)); WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR)); WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR)); WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK)); /* Build temporary page tables below realmode code. */ pt4 = wakeaddr; pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t); pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t); /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ pt4[i] = (uint64_t)(sc->acpi_wakephys + PAGE_SIZE); pt4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ pt3[i] = (uint64_t)(sc->acpi_wakephys + (2 * PAGE_SIZE)); pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } if (bootverbose) device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n", (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys); } Index: projects/binutils-2.17/sys/amd64/amd64/cpu_switch.S =================================================================== --- projects/binutils-2.17/sys/amd64/amd64/cpu_switch.S (revision 215829) +++ projects/binutils-2.17/sys/amd64/amd64/cpu_switch.S (revision 215830) @@ -1,363 +1,363 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include "assym.s" #include "opt_sched.h" /*****************************************************************************/ /* Scheduling */ /*****************************************************************************/ .text #ifdef SMP #define LK lock ; #else #define LK #endif #if defined(SCHED_ULE) && defined(SMP) #define SETLK xchgq #else #define SETLK movq #endif /* * cpu_throw() * * This is the second half of cpu_switch(). It is used when the current * thread is either a dummy or slated to die, and we no longer care * about its state. This is only a slight optimization and is probably * not worth it anymore. Note that we need to clear the pm_active bits so * we do need the old proc if it still exists. * %rdi = oldtd * %rsi = newtd */ ENTRY(cpu_throw) movl PCPU(CPUID),%eax testq %rdi,%rdi jz 1f /* release bit from old pm_active */ movq PCPU(CURPMAP),%rdx LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ 1: movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ movq PCB_CR3(%r8),%rdx movq %rdx,%cr3 /* new address space */ jmp swact END(cpu_throw) /* * cpu_switch(old, new, mtx) * * Save the current thread state, then select the next thread to run * and load its state. * %rdi = oldtd * %rsi = newtd * %rdx = mtx */ ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ movq TD_PCB(%rdi),%r8 movb $1,PCB_FULL_IRET(%r8) movq (%rsp),%rax /* Hardware registers */ movq %r15,PCB_R15(%r8) movq %r14,PCB_R14(%r8) movq %r13,PCB_R13(%r8) movq %r12,PCB_R12(%r8) movq %rbp,PCB_RBP(%r8) movq %rsp,PCB_RSP(%r8) movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f movq PCB_SAVEFPU(%r8),%r8 clts fxsave (%r8) smsw %ax orb $CR0_TS,%al lmsw %ax xorl %eax,%eax movq %rax,PCPU(FPCURTHREAD) 1: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq TD_PCB(%rsi),%r8 /* switch address space */ movq PCB_CR3(%r8),%rcx movq %cr3,%rax cmpq %rcx,%rax /* Same address space? */ jne swinact SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ jmp sw1 swinact: movq %rcx,%cr3 /* new address space */ movl PCPU(CPUID), %eax /* Release bit from old pmap->pm_active */ movq PCPU(CURPMAP),%rcx LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */ SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ swact: /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx addq $VM_PMAP,%rdx LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ movq %rdx,PCPU(CURPMAP) sw1: #if defined(SCHED_ULE) && defined(SMP) /* Wait for the new thread to become unblocked */ movq $blocked_lock, %rdx 1: movq TD_LOCK(%rsi),%rcx cmpq %rcx, %rdx pause je 1b #endif /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ /* Skip loading user fsbase/gsbase for kthreads */ testl $TDP_KTHREAD,TD_PFLAGS(%rsi) jnz do_kthread /* * Load ldt register */ movq TD_PROC(%rsi),%rcx cmpq $0, P_MD+MD_LDT(%rcx) jne do_ldt xorl %eax,%eax ld_ldt: lldt %ax /* Restore fs base in GDT */ movl PCB_FSBASE(%r8),%eax movq PCPU(FS32P),%rdx movw %ax,2(%rdx) shrl $16,%eax movb %al,4(%rdx) shrl $8,%eax movb %al,7(%rdx) /* Restore gs base in GDT */ movl PCB_GSBASE(%r8),%eax movq PCPU(GS32P),%rdx movw %ax,2(%rdx) shrl $16,%eax movb %al,4(%rdx) shrl $8,%eax movb %al,7(%rdx) do_kthread: /* Do we need to reload tss ? */ movq PCPU(TSSP),%rax movq PCB_TSSP(%r8),%rdx testq %rdx,%rdx cmovzq PCPU(COMMONTSSP),%rdx cmpq %rax,%rdx jne do_tss done_tss: movq %r8,PCPU(RSP0) movq %r8,PCPU(CURPCB) /* Update the TSS_RSP0 pointer for the next interrupt */ movq %r8,COMMON_TSS_RSP0(%rdx) movq %rsi,PCPU(CURTHREAD) /* into next thread */ /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz load_dr /* static predict not taken */ done_load_dr: /* Restore context. */ movq PCB_R15(%r8),%r15 movq PCB_R14(%r8),%r14 movq PCB_R13(%r8),%r13 movq PCB_R12(%r8),%r12 movq PCB_RBP(%r8),%rbp movq PCB_RSP(%r8),%rsp movq PCB_RBX(%r8),%rbx movq PCB_RIP(%r8),%rax movq %rax,(%rsp) ret /* * We order these strangely for several reasons. * 1: I wanted to use static branch prediction hints * 2: Most athlon64/opteron cpus don't have them. They define * a forward branch as 'predict not taken'. Intel cores have * the 'rep' prefix to invert this. * So, to make it work on both forms of cpu we do the detour. * We use jumps rather than call in order to avoid the stack. */ store_dr: movq %dr7,%rax /* yes, do the save */ movq %dr0,%r15 movq %dr1,%r14 movq %dr2,%r13 movq %dr3,%r12 movq %dr6,%r11 movq %r15,PCB_DR0(%r8) movq %r14,PCB_DR1(%r8) movq %r13,PCB_DR2(%r8) movq %r12,PCB_DR3(%r8) movq %r11,PCB_DR6(%r8) movq %rax,PCB_DR7(%r8) andq $0x0000fc00, %rax /* disable all watchpoints */ movq %rax,%dr7 jmp done_store_dr load_dr: movq %dr7,%rax movq PCB_DR0(%r8),%r15 movq PCB_DR1(%r8),%r14 movq PCB_DR2(%r8),%r13 movq PCB_DR3(%r8),%r12 movq PCB_DR6(%r8),%r11 movq PCB_DR7(%r8),%rcx movq %r15,%dr0 movq %r14,%dr1 /* Preserve reserved bits in %dr7 */ andq $0x0000fc00,%rax andq $~0x0000fc00,%rcx movq %r13,%dr2 movq %r12,%dr3 orq %rcx,%rax movq %r11,%dr6 movq %rax,%dr7 jmp done_load_dr do_tss: movq %rdx,PCPU(TSSP) movq %rdx,%rcx movq PCPU(TSS),%rax - movw %rcx,2(%rax) + movw %cx,2(%rax) shrq $16,%rcx movb %cl,4(%rax) shrq $8,%rcx movb %cl,7(%rax) shrq $8,%rcx movl %ecx,8(%rax) movb $0x89,5(%rax) /* unset busy */ movl $TSSSEL,%eax ltr %ax jmp done_tss do_ldt: movq PCPU(LDT),%rax movq P_MD+MD_LDT_SD(%rcx),%rdx movq %rdx,(%rax) movq P_MD+MD_LDT_SD+8(%rcx),%rdx movq %rdx,8(%rax) movl $LDTSEL,%eax jmp ld_ldt END(cpu_switch) /* * savectx(pcb) * Update pcb, saving current processor state. */ ENTRY(savectx) /* Save caller's return address. */ movq (%rsp),%rax movq %rax,PCB_RIP(%rdi) movq %rbx,PCB_RBX(%rdi) movq %rsp,PCB_RSP(%rdi) movq %rbp,PCB_RBP(%rdi) movq %r12,PCB_R12(%rdi) movq %r13,PCB_R13(%rdi) movq %r14,PCB_R14(%rdi) movq %r15,PCB_R15(%rdi) movq %cr0,%rsi movq %rsi,PCB_CR0(%rdi) movq %cr2,%rax movq %rax,PCB_CR2(%rdi) movq %cr3,%rax movq %rax,PCB_CR3(%rdi) movq %cr4,%rax movq %rax,PCB_CR4(%rdi) movq %dr0,%rax movq %rax,PCB_DR0(%rdi) movq %dr1,%rax movq %rax,PCB_DR1(%rdi) movq %dr2,%rax movq %rax,PCB_DR2(%rdi) movq %dr3,%rax movq %rax,PCB_DR3(%rdi) movq %dr6,%rax movq %rax,PCB_DR6(%rdi) movq %dr7,%rax movq %rax,PCB_DR7(%rdi) movl $MSR_FSBASE,%ecx rdmsr movl %eax,PCB_FSBASE(%rdi) movl %edx,PCB_FSBASE+4(%rdi) movl $MSR_GSBASE,%ecx rdmsr movl %eax,PCB_GSBASE(%rdi) movl %edx,PCB_GSBASE+4(%rdi) movl $MSR_KGSBASE,%ecx rdmsr movl %eax,PCB_KGSBASE(%rdi) movl %edx,PCB_KGSBASE+4(%rdi) sgdt PCB_GDT(%rdi) sidt PCB_IDT(%rdi) sldt PCB_LDT(%rdi) str PCB_TR(%rdi) clts fxsave PCB_USERFPU(%rdi) movq %rsi,%cr0 /* The previous %cr0 is saved in %rsi. */ movl $1,%eax ret END(savectx) Index: projects/binutils-2.17/sys/amd64/amd64/mp_machdep.c =================================================================== --- projects/binutils-2.17/sys/amd64/amd64/mp_machdep.c (revision 215829) +++ projects/binutils-2.17/sys/amd64/amd64/mp_machdep.c (revision 215830) @@ -1,1630 +1,1631 @@ /*- * Copyright (c) 1996, by Steve Passe * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_mp_watchdog.h" #include "opt_sched.h" #include "opt_smp.h" #include #include #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) /* lock region used by kernel profiling */ int mcount_lock; int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ extern struct pcpu __pcpu[]; /* AP uses this during bootstrap. Do not staticize. */ char *bootSTK; static int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; /* Temporary variables for init_secondary() */ char *doublefault_stack; char *nmi_stack; void *dpcpu; struct pcb stoppcbs[MAXCPU]; struct pcb **susppcbs = NULL; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; vm_offset_t smp_tlb_addr2; volatile int smp_tlb_wait; #ifdef COUNT_IPIS /* Interrupt counts. */ static u_long *ipi_preempt_counts[MAXCPU]; static u_long *ipi_ast_counts[MAXCPU]; u_long *ipi_invltlb_counts[MAXCPU]; u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); /* * Local data and functions. */ static volatile cpumask_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready = 0; /* * Store data from cpu_add() until later in the boot when we actually setup * the APs. */ struct cpu_info { int cpu_present:1; int cpu_bsp:1; int cpu_disabled:1; int cpu_hyperthread:1; } static cpu_info[MAX_APIC_ID + 1]; int cpu_apic_ids[MAXCPU]; int apic_cpuids[MAX_APIC_ID + 1]; /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; static int cpu_logical; /* logical cpus per core */ static int cpu_cores; /* cores per package */ static void assign_cpu_ids(void); static void set_interrupt_apic_ids(void); static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; static struct sysctl_ctx_list logical_cpu_clist; static u_int bootMP_size; static void mem_range_AP_init(void) { if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) mem_range_softc.mr_op->initAP(&mem_range_softc); } static void topo_probe_amd(void) { /* AMD processors do not support HTT. */ cpu_cores = (amd_feature2 & AMDID2_CMP) != 0 ? (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1; cpu_logical = 1; } /* * Round up to the next power of two, if necessary, and then * take log2. * Returns -1 if argument is zero. */ static __inline int mask_width(u_int x) { return (fls(x << (1 - powerof2(x))) - 1); } static void topo_probe_0x4(void) { u_int p[4]; int pkg_id_bits; int core_id_bits; int max_cores; int max_logical; int id; /* Both zero and one here mean one logical processor per package. */ max_logical = (cpu_feature & CPUID_HTT) != 0 ? (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; if (max_logical <= 1) return; /* * Because of uniformity assumption we examine only * those logical processors that belong to the same * package as BSP. Further, we count number of * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ cpuid_count(0x04, 0, p); max_cores = ((p[0] >> 26) & 0x3f) + 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; pkg_id_bits = core_id_bits + mask_width(max_cores); for (id = 0; id <= MAX_APIC_ID; id++) { /* Check logical CPU availability. */ if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) continue; /* Check if logical CPU has the same package ID. */ if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) continue; cpu_cores++; /* Check if logical CPU has the same package and core IDs. */ if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) cpu_logical++; } KASSERT(cpu_cores >= 1 && cpu_logical >= 1, ("topo_probe_0x4 couldn't find BSP")); cpu_cores /= cpu_logical; hyperthreading_cpus = cpu_logical; } static void topo_probe_0xb(void) { u_int p[4]; int bits; int cnt; int i; int logical; int type; int x; /* We only support three levels for now. */ for (i = 0; i < 3; i++) { cpuid_count(0x0b, i, p); /* Fall back if CPU leaf 11 doesn't really exist. */ if (i == 0 && p[1] == 0) { topo_probe_0x4(); return; } bits = p[0] & 0x1f; logical = p[1] &= 0xffff; type = (p[2] >> 8) & 0xff; if (type == 0 || logical == 0) break; /* * Because of uniformity assumption we examine only * those logical processors that belong to the same * package as BSP. */ for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { if (!cpu_info[x].cpu_present || cpu_info[x].cpu_disabled) continue; if (x >> bits == boot_cpu_id >> bits) cnt++; } if (type == CPUID_TYPE_SMT) cpu_logical = cnt; else if (type == CPUID_TYPE_CORE) cpu_cores = cnt; } if (cpu_logical == 0) cpu_logical = 1; cpu_cores /= cpu_logical; } /* * Both topology discovery code and code that consumes topology * information assume top-down uniformity of the topology. * That is, all physical packages must be identical and each * core in a package must have the same number of threads. * Topology information is queried only on BSP, on which this * code runs and for which it can query CPUID information. * Then topology is extrapolated on all packages using the * uniformity assumption. */ static void topo_probe(void) { static int cpu_topo_probed = 0; if (cpu_topo_probed) return; logical_cpus_mask = 0; if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) topo_probe_amd(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) { /* * See Intel(R) 64 Architecture Processor * Topology Enumeration article for details. * * Note that 0x1 <= cpu_high < 4 case should be * compatible with topo_probe_0x4() logic when * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) * or it should trigger the fallback otherwise. */ if (cpu_high >= 0xb) topo_probe_0xb(); else if (cpu_high >= 0x1) topo_probe_0x4(); } /* * Fallback: assume each logical CPU is in separate * physical package. That is, no multi-core, no SMT. */ if (cpu_cores == 0 || cpu_logical == 0) cpu_cores = cpu_logical = 1; cpu_topo_probed = 1; } struct cpu_group * cpu_topo(void) { int cg_flags; /* * Determine whether any threading flags are * necessry. */ topo_probe(); if (cpu_logical > 1 && hyperthreading_cpus) cg_flags = CG_FLAG_HTT; else if (cpu_logical > 1) cg_flags = CG_FLAG_SMT; else cg_flags = 0; if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { printf("WARNING: Non-uniform processors.\n"); printf("WARNING: Using suboptimal topology.\n"); return (smp_topo_none()); } /* * No multi-core or hyper-threaded. */ if (cpu_logical * cpu_cores == 1) return (smp_topo_none()); /* * Only HTT no multi-core. */ if (cpu_logical > 1 && cpu_cores == 1) return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags)); /* * Only multi-core no HTT. */ if (cpu_cores > 1 && cpu_logical == 1) return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags)); /* * Both HTT and multi-core. */ return (smp_topo_2level(CG_SHARE_L2, cpu_cores, CG_SHARE_L1, cpu_logical, cg_flags)); } /* * Calculate usable address in base memory for AP trampoline code. */ u_int mp_bootaddress(u_int basemem) { bootMP_size = mptramp_end - mptramp_start; boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ if (((basemem * 1024) - boot_address) < bootMP_size) boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ /* 3 levels of page table pages */ mptramp_pagetables = boot_address - (PAGE_SIZE * 3); return mptramp_pagetables; } void cpu_add(u_int apic_id, char boot_cpu) { if (apic_id > MAX_APIC_ID) { panic("SMP: APIC ID %d too high", apic_id); return; } KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", apic_id)); cpu_info[apic_id].cpu_present = 1; if (boot_cpu) { KASSERT(boot_cpu_id == -1, ("CPU %d claims to be BSP, but CPU %d already is", apic_id, boot_cpu_id)); boot_cpu_id = apic_id; cpu_info[apic_id].cpu_bsp = 1; } if (mp_ncpus < MAXCPU) { mp_ncpus++; mp_maxid = mp_ncpus - 1; } if (bootverbose) printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : "AP"); } void cpu_mp_setmaxid(void) { /* * mp_maxid should be already set by calls to cpu_add(). * Just sanity check its value here. */ if (mp_ncpus == 0) KASSERT(mp_maxid == 0, ("%s: mp_ncpus is zero, but mp_maxid is not", __func__)); else if (mp_ncpus == 1) mp_maxid = 0; else KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } int cpu_mp_probe(void) { /* * Always record BSP in CPU map so that the mbuf init code works * correctly. */ all_cpus = 1; if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup * the variables to represent a system with a single CPU * with an id of 0. */ mp_ncpus = 1; return (0); } /* At least one CPU was found. */ if (mp_ncpus == 1) { /* * One CPU was found, so this must be a UP system with * an I/O APIC. */ mp_maxid = 0; return (0); } /* At least two CPUs were found. */ return (1); } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); set_interrupt_apic_ids(); } /* * Print various information about the SMP system hardware and setup. */ void cpu_mp_announce(void) { const char *hyperthread; int i; printf("FreeBSD/SMP: %d package(s) x %d core(s)", mp_ncpus / (cpu_cores * cpu_logical), cpu_cores); if (hyperthreading_cpus > 1) printf(" x %d HTT threads", cpu_logical); else if (cpu_logical > 1) printf(" x %d SMT threads", cpu_logical); printf("\n"); /* List active CPUs first. */ printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); for (i = 1; i < mp_ncpus; i++) { if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread, cpu_apic_ids[i]); } /* List disabled CPUs last. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled) continue; if (cpu_info[i].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread, i); } } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr, cr0; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; /* Set by the startup code for us to use */ cpu = bootAP; /* Init tss */ common_tss[cpu] = common_tss[0]; common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */ common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE; common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; /* The NMI stack runs on IST2. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist2 = (long) np; /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); } ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; lgdt(&ap_gdt); /* does magic intra-segment return */ /* Get per-cpu data */ pc = &__pcpu[cpu]; /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); dpcpu_init(dpcpu, cpu); pc->pc_apic_id = cpu_apic_ids[cpu]; pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_tssp = &common_tss[cpu]; pc->pc_commontssp = &common_tss[cpu]; pc->pc_rsp0 = 0; pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]; pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GUSERLDT_SEL]; /* Save the per-cpu pointer for use by the NMI handler. */ np->np_pcpu = (register_t) pc; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ lidt(&r_idt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); /* Disable local APIC just to be sure. */ lapic_disable(); /* signal our startup to the BSP. */ mp_naps++; /* Spin until the BSP releases the AP's. */ while (!aps_ready) ia32_pause(); /* Initialize the PAT MSR. */ pmap_init_pat(); /* set up CPU registers and state */ cpu_setregs(); /* set up SSE/NX registers */ initializecpu(); /* set up FPU state on the AP */ fpuinit(); /* A quick check from sanity claus */ if (PCPU_GET(apic_id) != lapic_id()) { printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); panic("cpuid mismatch! boom!!"); } /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); mca_init(); mtx_lock_spin(&ap_boot_mtx); /* Init local apic for irq's */ lapic_setup(1); /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); smp_cpus++; CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) logical_cpus_mask |= PCPU_GET(cpumask); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) hyperthreading_cpus_mask |= PCPU_GET(cpumask); /* Build our map of 'other' CPUs. */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); if (bootverbose) lapic_dump("AP"); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); smp_active = 1; /* historic */ } /* * Enable global pages TLB extension * This also implicitly flushes the TLB */ load_cr4(rcr4() | CR4_PGE); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); mtx_unlock_spin(&ap_boot_mtx); /* Wait until all the AP's are up. */ while (smp_started == 0) ia32_pause(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ } /******************************************************************* * local functions and data */ /* * We tell the I/O APIC code about all the CPUs we want to receive * interrupts. If we don't want certain CPUs to receive IRQs we * can simply not tell the I/O APIC code about them in this function. * We also do not tell it about the BSP since it tells itself about * the BSP internally to work with UP kernels and on UP machines. */ static void set_interrupt_apic_ids(void) { u_int i, apic_id; for (i = 0; i < MAXCPU; i++) { apic_id = cpu_apic_ids[i]; if (apic_id == -1) continue; if (cpu_info[apic_id].cpu_bsp) continue; if (cpu_info[apic_id].cpu_disabled) continue; /* Don't let hyperthreads service interrupts. */ if (hyperthreading_cpus > 1 && apic_id % hyperthreading_cpus != 0) continue; intr_add_cpu(i); } } /* * Assign logical CPU IDs to local APICs. */ static void assign_cpu_ids(void) { u_int i; TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", &hyperthreading_allowed); /* Check for explicitly disabled CPUs. */ for (i = 0; i <= MAX_APIC_ID; i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) continue; if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; #if defined(SCHED_ULE) /* * Don't use HT CPU if it has been disabled by a * tunable. */ if (hyperthreading_allowed == 0) { cpu_info[i].cpu_disabled = 1; continue; } #endif } /* Don't use this CPU if it has been disabled by a tunable. */ if (resource_disabled("lapic", i)) { cpu_info[i].cpu_disabled = 1; continue; } } /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. * * To minimize confusion for userland, we attempt to number * CPUs such that all threads and cores in a package are * grouped together. For now we assume that the BSP is always * the first thread in a package and just start adding APs * starting with the BSP's APIC ID. */ mp_ncpus = 1; cpu_apic_ids[0] = boot_cpu_id; apic_cpuids[boot_cpu_id] = 0; for (i = boot_cpu_id + 1; i != boot_cpu_id; i == MAX_APIC_ID ? i = 0 : i++) { if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || cpu_info[i].cpu_disabled) continue; if (mp_ncpus < MAXCPU) { cpu_apic_ids[mp_ncpus] = i; apic_cpuids[i] = mp_ncpus; mp_ncpus++; } else cpu_info[i].cpu_disabled = 1; } KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); } /* * start each AP in our list */ static int start_all_aps(void) { vm_offset_t va = boot_address + KERNBASE; u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; int apic_id, cpu, i; u_char mpbiosreason; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* install the AP 1st level boot code */ pmap_kenter(va, boot_address); pmap_invalidate_page(kernel_pmap, va); bcopy(mptramp_start, (void *)va, bootMP_size); /* Locate the page tables, they'll be below the trampoline */ pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* Each slot of the level 4 pages points to the same level 3 page */ pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); pt4[i] |= PG_V | PG_RW | PG_U; /* Each slot of the level 3 pages points to the same level 2 page */ pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; /* attempt to start the Application Processor */ if (!start_ap(apic_id)) { /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } all_cpus |= (1 << cpu); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* number of APs actually started */ return mp_naps; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; /* * first we do an INIT/RESET IPI this INIT IPI might be run, reseting * and running the target CPU. OR this INIT IPI might be latched (P5 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be * ignored. */ /* do an INIT IPI: assert RESET */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); /* wait for pending status end */ lapic_ipi_wait(-1); /* do an INIT IPI: deassert RESET */ lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); /* wait for pending status end */ DELAY(10000); /* wait ~10mS */ lapic_ipi_wait(-1); /* * next we do a STARTUP IPI: the previous INIT IPI might still be * latched, (P5 bug) this 1st STARTUP would then terminate * immediately, and the previously started INIT IPI would continue. OR * the previous INIT IPI has already run. and this STARTUP IPI will * run. OR the previous INIT IPI was ignored. and this STARTUP IPI * will run. */ /* do a STARTUP IPI */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ /* * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); lapic_ipi_wait(-1); DELAY(200); /* wait ~200uS */ /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } #ifdef COUNT_XINVLTLB_HITS u_int xhits_gbl[MAXCPU]; u_int xhits_pg[MAXCPU]; u_int xhits_rng[MAXCPU]; SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, sizeof(xhits_gbl), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, sizeof(xhits_pg), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, sizeof(xhits_rng), "IU", ""); u_int ipi_global; u_int ipi_page; u_int ipi_range; u_int ipi_range_size; SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 0, ""); u_int ipi_masked_global; u_int ipi_masked_page; u_int ipi_masked_range; u_int ipi_masked_range_size; SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, &ipi_masked_global, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, &ipi_masked_page, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, &ipi_masked_range, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, &ipi_masked_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ /* * Flush the TLB on all other CPU's */ static void smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) { u_int ncpu; ncpu = mp_ncpus - 1; /* does not shootdown self */ if (ncpu < 1) return; /* no other cpus */ if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); ipi_all_but_self(vector); while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } static void smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { int ncpu, othercpus; othercpus = mp_ncpus - 1; if (mask == (cpumask_t)-1) { ncpu = othercpus; if (ncpu < 1) return; } else { mask &= ~PCPU_GET(cpumask); if (mask == 0) return; ncpu = bitcount32(mask); if (ncpu > othercpus) { /* XXX this should be a panic offence */ printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", ncpu, othercpus); ncpu = othercpus; } /* XXX should be a panic, implied by mask == 0 above */ if (ncpu < 1) return; } if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); if (mask == (cpumask_t)-1) ipi_all_but_self(vector); else ipi_selected(mask, vector); while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } /* * Send an IPI to specified CPU handling the bitmap logic. */ static void ipi_send_cpu(int cpu, u_int ipi) { u_int bitmap, old_pending, new_pending; KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; do { old_pending = cpu_ipi_pending[cpu]; new_pending = old_pending | bitmap; } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], old_pending, new_pending)); if (old_pending) return; } lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); } void smp_cache_flush(void) { if (smp_started) smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); } void smp_invltlb(void) { if (smp_started) { smp_tlb_shootdown(IPI_INVLTLB, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif } } void smp_invlpg(vm_offset_t addr) { if (smp_started) { smp_tlb_shootdown(IPI_INVLPG, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif } } void smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void smp_masked_invltlb(cpumask_t mask) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_global++; #endif } } void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_page++; #endif } } void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_masked_range++; ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void ipi_bitmap_handler(struct trapframe frame) { struct trapframe *oldframe; struct thread *td; int cpu = PCPU_GET(cpuid); u_int ipi_bitmap; critical_enter(); td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = &frame; ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); if (ipi_bitmap & (1 << IPI_PREEMPT)) { #ifdef COUNT_IPIS (*ipi_preempt_counts[cpu])++; #endif sched_preempt(td); } if (ipi_bitmap & (1 << IPI_AST)) { #ifdef COUNT_IPIS (*ipi_ast_counts[cpu])++; #endif /* Nothing to do for AST */ } if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { #ifdef COUNT_IPIS (*ipi_hardclock_counts[cpu])++; #endif hardclockintr(); } td->td_intr_frame = oldframe; td->td_intr_nesting_level--; critical_exit(); } /* * send an IPI to a set of cpus. */ void ipi_selected(cpumask_t cpus, u_int ipi) { int cpu; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) atomic_set_int(&ipi_nmi_pending, cpus); CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); while ((cpu = ffs(cpus)) != 0) { cpu--; cpus &= ~(1 << cpu); ipi_send_cpu(cpu, ipi); } } /* * send an IPI to a specific CPU. */ void ipi_cpu(int cpu, u_int ipi) { /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) atomic_set_int(&ipi_nmi_pending, 1 << cpu); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } /* * send an IPI to all CPUs EXCEPT myself */ void ipi_all_but_self(u_int ipi) { if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); return; } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int ipi_nmi_handler() { cpumask_t cpumask; /* * As long as there is not a simple way to know about a NMI's * source, if the bitmask for the current CPU is present in * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ cpumask = PCPU_GET(cpumask); if ((ipi_nmi_pending & cpumask) == 0) return (1); atomic_clear_int(&ipi_nmi_pending, cpumask); cpustop_handler(); return (0); } /* * Handle an IPI_STOP by saving our current context and spinning until we * are resumed. */ void cpustop_handler(void) { cpumask_t cpumask; u_int cpu; cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ atomic_set_int(&stopped_cpus, cpumask); /* Wait for restart */ while (!(started_cpus & cpumask)) ia32_pause(); atomic_clear_int(&started_cpus, cpumask); atomic_clear_int(&stopped_cpus, cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); cpustop_restartfunc = NULL; } } /* * Handle an IPI_SUSPEND by saving our current context and spinning until we * are resumed. */ void cpususpend_handler(void) { cpumask_t cpumask; register_t cr3, rf; u_int cpu; cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); rf = intr_disable(); cr3 = rcr3(); if (savectx(susppcbs[cpu])) { wbinvd(); atomic_set_int(&stopped_cpus, cpumask); } else { + pmap_init_pat(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); } /* Wait for resume */ while (!(started_cpus & cpumask)) ia32_pause(); atomic_clear_int(&started_cpus, cpumask); atomic_clear_int(&stopped_cpus, cpumask); /* Restore CR3 and enable interrupts */ load_cr3(cr3); mca_resume(); lapic_setup(0); intr_restore(rf); } /* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ static void release_aps(void *dummy __unused) { if (mp_ncpus == 1) return; atomic_store_rel_int(&aps_ready, 1); while (smp_started == 0) ia32_pause(); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); static int sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) { cpumask_t mask; int error; mask = hlt_cpus_mask; error = sysctl_handle_int(oidp, &mask, 0, req); if (error || !req->newptr) return (error); if (logical_cpus_mask != 0 && (mask & logical_cpus_mask) == logical_cpus_mask) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; if (! hyperthreading_allowed) mask |= hyperthreading_cpus_mask; if ((mask & all_cpus) == all_cpus) mask &= ~(1<<0); hlt_cpus_mask = mask; return (error); } SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hlt_cpus, "IU", "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); static int sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) { int disable, error; disable = hlt_logical_cpus; error = sysctl_handle_int(oidp, &disable, 0, req); if (error || !req->newptr) return (error); if (disable) hlt_cpus_mask |= logical_cpus_mask; else hlt_cpus_mask &= ~logical_cpus_mask; if (! hyperthreading_allowed) hlt_cpus_mask |= hyperthreading_cpus_mask; if ((hlt_cpus_mask & all_cpus) == all_cpus) hlt_cpus_mask &= ~(1<<0); hlt_logical_cpus = disable; return (error); } static int sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) { int allowed, error; allowed = hyperthreading_allowed; error = sysctl_handle_int(oidp, &allowed, 0, req); if (error || !req->newptr) return (error); #ifdef SCHED_ULE /* * SCHED_ULE doesn't allow enabling/disabling HT cores at * run-time. */ if (allowed != hyperthreading_allowed) return (ENOTSUP); return (error); #endif if (allowed) hlt_cpus_mask &= ~hyperthreading_cpus_mask; else hlt_cpus_mask |= hyperthreading_cpus_mask; if (logical_cpus_mask != 0 && (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) hlt_logical_cpus = 1; else hlt_logical_cpus = 0; if ((hlt_cpus_mask & all_cpus) == all_cpus) hlt_cpus_mask &= ~(1<<0); hyperthreading_allowed = allowed; return (error); } static void cpu_hlt_setup(void *dummy __unused) { if (logical_cpus_mask != 0) { TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", &hlt_logical_cpus); sysctl_ctx_init(&logical_cpu_clist); SYSCTL_ADD_PROC(&logical_cpu_clist, SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hlt_logical_cpus, "IU", ""); SYSCTL_ADD_UINT(&logical_cpu_clist, SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, &logical_cpus_mask, 0, ""); if (hlt_logical_cpus) hlt_cpus_mask |= logical_cpus_mask; /* * If necessary for security purposes, force * hyperthreading off, regardless of the value * of hlt_logical_cpus. */ if (hyperthreading_cpus_mask) { SYSCTL_ADD_PROC(&logical_cpu_clist, SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_hyperthreading_allowed, "IU", ""); if (! hyperthreading_allowed) hlt_cpus_mask |= hyperthreading_cpus_mask; } } } SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); int mp_grab_cpu_hlt(void) { cpumask_t mask; #ifdef MP_WATCHDOG u_int cpuid; #endif int retval; mask = PCPU_GET(cpumask); #ifdef MP_WATCHDOG cpuid = PCPU_GET(cpuid); ap_watchdog(cpuid); #endif retval = 0; while (mask & hlt_cpus_mask) { retval = 1; __asm __volatile("sti; hlt" : : : "memory"); } return (retval); } #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. */ static void mp_ipi_intrcnt(void *dummy) { char buf[64]; int i; CPU_FOREACH(i) { snprintf(buf, sizeof(buf), "cpu%d:invltlb", i); intrcnt_add(buf, &ipi_invltlb_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlrng", i); intrcnt_add(buf, &ipi_invlrng_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlpg", i); intrcnt_add(buf, &ipi_invlpg_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:preempt", i); intrcnt_add(buf, &ipi_preempt_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:ast", i); intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i); intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); #endif Index: projects/binutils-2.17/sys/amd64/amd64/pmap.c =================================================================== --- projects/binutils-2.17/sys/amd64/amd64/pmap.c (revision 215829) +++ projects/binutils-2.17/sys/amd64/amd64/pmap.c (revision 215830) @@ -1,5092 +1,5091 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_pmap.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #define PV_STATS #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static int ndmpdp; static vm_paddr_t dmaplimit; vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; pt_entry_t pg_nx; SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pat_works = 1; -TUNABLE_INT("vm.pmap.pat_works", &pat_works); -SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RDTUN, &pat_works, 1, +SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, "Is page attribute table fully functional?"); static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, "Are large page mappings enabled?"); #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ static u_int64_t KPTphys; /* phys addr of kernel level 1 */ static u_int64_t KPDphys; /* phys addr of kernel level 2 */ u_int64_t KPDPphys; /* phys addr of kernel level 3 */ u_int64_t KPML4phys; /* phys addr of kernel level 4 */ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ /* * Data for the pv entry allocation mechanism */ static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1 = 0; caddr_t CADDR1 = 0; /* * Crashdump maps. */ static caddr_t crashdumpmap; static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_pvh_wired_mappings(struct md_page *pvh, int count); static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va); static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, vm_page_t *free); static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t* free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, vm_page_t *); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * Move the kernel virtual free pointer to the next * 2MB. This is used to help improve performance * by using a large (2MB) page for much of the kernel * (.text, .data, .bss) */ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); return (newaddr); } /********************/ /* Inline functions */ /********************/ /* Return a non-clipped PD index for a given VA */ static __inline vm_pindex_t pmap_pde_pindex(vm_offset_t va) { return (va >> PDRSHIFT); } /* Return various clipped indexes for a given VA */ static __inline vm_pindex_t pmap_pte_index(vm_offset_t va) { return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); } static __inline vm_pindex_t pmap_pde_index(vm_offset_t va) { return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); } static __inline vm_pindex_t pmap_pdpe_index(vm_offset_t va) { return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); } static __inline vm_pindex_t pmap_pml4e_index(vm_offset_t va) { return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); } /* Return a pointer to the PML4 slot that corresponds to a VA */ static __inline pml4_entry_t * pmap_pml4e(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_pml4[pmap_pml4e_index(va)]); } /* Return a pointer to the PDP slot that corresponds to a VA */ static __inline pdp_entry_t * pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) { pdp_entry_t *pdpe; pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); return (&pdpe[pmap_pdpe_index(va)]); } /* Return a pointer to the PDP slot that corresponds to a VA */ static __inline pdp_entry_t * pmap_pdpe(pmap_t pmap, vm_offset_t va) { pml4_entry_t *pml4e; pml4e = pmap_pml4e(pmap, va); if ((*pml4e & PG_V) == 0) return (NULL); return (pmap_pml4e_to_pdpe(pml4e, va)); } /* Return a pointer to the PD slot that corresponds to a VA */ static __inline pd_entry_t * pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) { pd_entry_t *pde; pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); return (&pde[pmap_pde_index(va)]); } /* Return a pointer to the PD slot that corresponds to a VA */ static __inline pd_entry_t * pmap_pde(pmap_t pmap, vm_offset_t va) { pdp_entry_t *pdpe; pdpe = pmap_pdpe(pmap, va); if (pdpe == NULL || (*pdpe & PG_V) == 0) return (NULL); return (pmap_pdpe_to_pde(pdpe, va)); } /* Return a pointer to the PT slot that corresponds to a VA */ static __inline pt_entry_t * pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) { pt_entry_t *pte; pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); return (&pte[pmap_pte_index(va)]); } /* Return a pointer to the PT slot that corresponds to a VA */ static __inline pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t *pde; pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) return (NULL); if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ return ((pt_entry_t *)pde); return (pmap_pde_to_pte(pde, va)); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count -= count; } PMAP_INLINE pt_entry_t * vtopte(vm_offset_t va) { u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); return (PTmap + ((va >> PAGE_SHIFT) & mask)); } static __inline pd_entry_t * vtopde(vm_offset_t va) { u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); return (PDmap + ((va >> PDRSHIFT) & mask)); } static u_int64_t allocpages(vm_paddr_t *firstaddr, int n) { u_int64_t ret; ret = *firstaddr; bzero((void *)ret, n * PAGE_SIZE); *firstaddr += n * PAGE_SIZE; return (ret); } static void create_pagetables(vm_paddr_t *firstaddr) { int i; /* Allocate pages */ KPTphys = allocpages(firstaddr, NKPT); KPML4phys = allocpages(firstaddr, 1); KPDPphys = allocpages(firstaddr, NKPML4E); KPDphys = allocpages(firstaddr, NKPDPE); ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; DMPDPphys = allocpages(firstaddr, NDMPML4E); if ((amd_feature & AMDID_PAGE1GB) == 0) DMPDphys = allocpages(firstaddr, ndmpdp); dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; /* Fill in the underlying page table pages */ /* Read-only from zero to physfree */ /* XXX not fully used, underneath 2M pages */ for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) { ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G; } /* Now map the page tables at their location within PTmap */ for (i = 0; i < NKPT; i++) { ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; } /* Map from zero to end of allocations under 2M pages */ /* This replaces some of the KPTphys entries above */ for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) { ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT; ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G; } /* And connect up the PD to the PDP */ for (i = 0; i < NKPDPE; i++) { ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT); ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; } /* * Now, set up the direct map region using either 2MB or 1GB pages. * Later, if pmap_mapdev{_attr}() uses the direct map for non-write- * back memory, pmap_change_attr() will demote any 2MB or 1GB page * mappings that are partially used. */ if ((amd_feature & AMDID_PAGE1GB) == 0) { for (i = 0; i < NPDEPG * ndmpdp; i++) { ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | PG_M | PG_A; } /* And the direct map space's PDP */ for (i = 0; i < ndmpdp; i++) { ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT); ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; } } else { for (i = 0; i < ndmpdp; i++) { ((pdp_entry_t *)DMPDPphys)[i] = (vm_paddr_t)i << PDPSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | PG_M | PG_A; } } /* And recursively map PML4 to itself in order to get PTmap */ ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; /* Connect the Direct Map slot up to the PML4 */ ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys; ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U; /* Connect the KVA slot up to the PML4 */ ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; } /* * Bootstrap the system enough to run with virtual memory. * * On amd64 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t *firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; /* * Create an initial set of page tables to run the kernel in. */ create_pagetables(firstaddr); virtual_avail = (vm_offset_t) KERNBASE + *firstaddr; virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; /* XXX do %cr0 as well */ load_cr4(rcr4() | CR4_PGE | CR4_PSE); load_cr3(KPML4phys); /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_root = NULL; kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * CMAP1 is only used for the memory test. */ SYSMAP(caddr_t, CMAP1, CADDR1, 1) /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) virtual_avail = va; /* Initialize the PAT MSR. */ pmap_init_pat(); } /* * Setup the PAT MSR. */ void pmap_init_pat(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Bail if this CPU doesn't implement PAT. */ if ((cpu_feature & CPUID_PAT) == 0) panic("no PAT??"); /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_page_t mpte; vm_size_t s; int i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ for (i = 0; i < NKPT; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = pmap_pde_pindex(KERNBASE) + i; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * If the kernel is running in a virtual machine on an AMD Family 10h * processor, then it must assume that MCA is enabled by the virtual * machine monitor. */ if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10) workaround_erratum383 = 1; /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Calculate the size of the pv head table for superpages. */ for (i = 0; phys_avail[i + 1]; i += 2); pv_npg = round_2mpage(phys_avail[(i - 2) + 1]) / NBPDR; /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_alloc(kernel_map, s); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); } static int pmap_pventry_proc(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (error == 0 && req->newptr) { shpgperproc = (pv_entry_max - cnt.v_page_count) / maxproc; pv_entry_high_water = 9 * (pv_entry_max / 10); } return (error); } SYSCTL_PROC(_vm_pmap, OID_AUTO, pv_entry_max, CTLTYPE_INT|CTLFLAG_RW, &pv_entry_max, 0, pmap_pventry_proc, "IU", "Max number of PV entries"); static int pmap_shpgperproc_proc(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (error == 0 && req->newptr) { pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; pv_entry_high_water = 9 * (pv_entry_max / 10); } return (error); } SYSCTL_PROC(_vm_pmap, OID_AUTO, shpgperproc, CTLTYPE_INT|CTLFLAG_RW, &shpgperproc, 0, pmap_shpgperproc_proc, "IU", "Page share factor per proc"); SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2MB page mapping counters"); static u_long pmap_pde_demotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pde_demotions, 0, "2MB page demotions"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2MB page mappings"); static u_long pmap_pde_p_failures; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pde_p_failures, 0, "2MB page promotion failures"); static u_long pmap_pde_promotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pde_promotions, 0, "2MB page promotions"); SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD, 0, "1GB page mapping counters"); static u_long pmap_pdpe_demotions; SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pdpe_demotions, 0, "1GB page demotions"); /*************************************************** * Low level helper routines..... ***************************************************/ /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ static int pmap_cache_bits(int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0) panic("Unknown caching mode %d\n", mode); /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; return (cache_bits); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) { u_long cr4; if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); else { /* * Promotion: flush every 4KB page mapping from the TLB, * including any global (PG_G) mappings. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpumask_t cpumask, other_cpus; sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invlpg(va); if (pmap->pm_active & other_cpus) smp_masked_invlpg(pmap->pm_active & other_cpus, va); } sched_unpin(); } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpumask_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); if (pmap->pm_active & other_cpus) smp_masked_invlpg_range(pmap->pm_active & other_cpus, sva, eva); } sched_unpin(); } void pmap_invalidate_all(pmap_t pmap) { cpumask_t cpumask, other_cpus; sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invltlb(); if (pmap->pm_active & other_cpus) smp_masked_invltlb(pmap->pm_active & other_cpus); } sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpumask_t store; /* processor that updates the PDE */ cpumask_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; }; static void pmap_update_pde_action(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpumask)) pde_store(act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if ((act->invalidate & PCPU_GET(cpumask)) != 0) pmap_update_pde_invalidate(act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpumask_t active, cpumask; sched_pin(); cpumask = PCPU_GET(cpumask); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; if ((active & PCPU_GET(other_cpus)) != 0) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; smp_rendezvous_cpus(cpumask | active, smp_no_rendevous_barrier, pmap_update_pde_action, pmap_update_pde_teardown, &act); } else { pde_store(pde, newpde); if ((active & cpumask) != 0) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || pmap->pm_active) invlpg(va); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap || pmap->pm_active) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { pde_store(pde, newpde); if (pmap == kernel_pmap || pmap->pm_active) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ static void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); if (cpu_feature & CPUID_SS) ; /* If "Self Snoop" is supported, do nothing. */ else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < 2 * 1024 * 1024) { /* * Otherwise, do per-cache line flush. Use the mfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); mfence(); } else { /* * No targeted cache flush methods are supported by CPU, * or the supplied range is bigger than 2MB. * Globally invalidate cache. */ pmap_invalidate_cache(); } } /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME)); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; vm_paddr_t pa; pa = 0; PMAP_LOCK(pmap); pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { if ((*pdpe & PG_PS) != 0) pa = (*pdpe & PG_PS_FRAME) | (va & PDPMASK); else { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & PG_V) != 0) { if ((*pde & PG_PS) != 0) { pa = (*pde & PG_PS_FRAME) | (va & PDRMASK); } else { pte = pmap_pde_to_pte(pde, va); pa = (*pte & PG_FRAME) | (va & PAGE_MASK); } } } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde, *pdep; pt_entry_t pte; vm_paddr_t pa; vm_page_t m; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t pde; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { pde = *vtopde(va); if (pde & PG_PS) { pa = (pde & PG_PS_FRAME) | (va & PDRMASK); } else { /* * Beware of a concurrent promotion that changes the * PDE at this point! For example, vtopte() must not * be used to access the PTE because it would use the * new PDE. It is, however, safe to use the old PDE * because the page table page is preserved by the * promotion. */ pa = *pmap_pde_to_pte(&pde, va); pa = (pa & PG_FRAME) | (va & PAGE_MASK); } } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | PG_G); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | PG_G | pmap_cache_bits(mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; pte_store(pte, pa | PG_G | PG_RW | PG_V); } pte++; } if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(vm_page_t free) { vm_page_t m; while (free != NULL) { m = free; free = m->right; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; m->right = *free; *free = m; } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { vm_page_t root; PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { mpte->left = NULL; mpte->right = NULL; } else { root = vm_page_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { mpte->left = root->left; mpte->right = root; root->left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { mpte->right = root->right; mpte->left = root; root->right = NULL; } } pmap->pm_root = mpte; } /* * Looks for a page table page mapping the specified virtual address in the * specified pmap's collection of idle page table pages. Returns NULL if there * is no page table page corresponding to the specified virtual address. */ static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) { vm_page_t mpte; vm_pindex_t pindex = pmap_pde_pindex(va); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { mpte = vm_page_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } return (mpte); } /* * Removes the specified page table page from the specified pmap's collection * of idle page table pages. The specified page table page must be a member of * the pmap's collection. */ static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) { vm_page_t root; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) { root = vm_page_splay(mpte->pindex, pmap->pm_root); KASSERT(mpte == root, ("pmap_remove_pt_page: mpte %p is missing from pmap %p", mpte, pmap)); } if (mpte->left == NULL) root = mpte->right; else { root = vm_page_splay(mpte->pindex, mpte->left); root->right = mpte->right; } pmap->pm_root = root; } /* * This routine unholds page table pages, and if the hold count * drops to zero, then it decrements the wire count. */ static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free) { --m->wire_count; if (m->wire_count == 0) return (_pmap_unwire_pte_hold(pmap, va, m, free)); else return (0); } static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *free) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ if (m->pindex >= (NUPDE + NUPDPE)) { /* PDP page */ pml4_entry_t *pml4; pml4 = pmap_pml4e(pmap, va); *pml4 = 0; } else if (m->pindex >= NUPDE) { /* PD page */ pdp_entry_t *pdp; pdp = pmap_pdpe(pmap, va); *pdp = 0; } else { /* PTE page */ pd_entry_t *pd; pd = pmap_pde(pmap, va); *pd = 0; } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME); pmap_unwire_pte_hold(pmap, va, pdpg, free); } if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { /* We just released a PD, unhold the matching PDP */ vm_page_t pdppg; pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); pmap_unwire_pte_hold(pmap, va, pdppg, free); } /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&cnt.v_wire_count, 1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); return (1); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, vm_page_t *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_pte_hold(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_root = NULL; pmap->pm_active = 0; PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { vm_page_t pml4pg; static vm_pindex_t color; PMAP_LOCK_INIT(pmap); /* * allocate the page directory page */ while ((pml4pg = vm_page_alloc(NULL, color++, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); if ((pml4pg->flags & PG_ZERO) == 0) pagezero(pmap->pm_pml4); /* Wire in kernel global address entries. */ pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U; /* install self-referential address mapping entry(s) */ pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_root = NULL; pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } /* * this routine is called if the page table page is not * mapped correctly. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags) { vm_page_t m, pdppg, pdpg; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); vm_page_unlock_queues(); VM_WAIT; vm_page_lock_queues(); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= (NUPDE + NUPDPE)) { pml4_entry_t *pml4; vm_pindex_t pml4index; /* Wire up a new PDPE page */ pml4index = ptepindex - (NUPDE + NUPDPE); pml4 = &pmap->pm_pml4[pml4index]; *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } else if (ptepindex >= NUPDE) { vm_pindex_t pml4index; vm_pindex_t pdpindex; pml4_entry_t *pml4; pdp_entry_t *pdp; /* Wire up a new PDE page */ pdpindex = ptepindex - NUPDE; pml4index = pdpindex >> NPML4EPGSHIFT; pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, flags) == NULL) { --m->wire_count; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { /* Add reference to pdp page */ pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME); pdppg->wire_count++; } pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); /* Now find the pdp page */ pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } else { vm_pindex_t pml4index; vm_pindex_t pdpindex; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pd; /* Wire up a new PTE page */ pdpindex = ptepindex >> NPDPEPGSHIFT; pml4index = pdpindex >> NPML4EPGSHIFT; /* First, find the pdp and check that its valid. */ pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, flags) == NULL) { --m->wire_count; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; } else { pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, flags) == NULL) { --m->wire_count; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { /* Add reference to the pd page */ pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); pdpg->wire_count++; } } pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); /* Now we know where the page directory page is */ pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags) { vm_pindex_t pdpindex, ptepindex; pdp_entry_t *pdpe; vm_page_t pdpg; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("pmap_allocpde: flags is neither M_NOWAIT nor M_WAITOK")); retry: pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { /* Add a reference to the pd page. */ pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); pdpg->wire_count++; } else { /* Allocate a pd page. */ ptepindex = pmap_pde_pindex(va); pdpindex = ptepindex >> NPDPEPGSHIFT; pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags); if (pdpg == NULL && (flags & M_WAITOK)) goto retry; } return (pdpg); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) { vm_pindex_t ptepindex; pd_entry_t *pd; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); /* * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); retry: /* * Get the page directory entry */ pd = pmap_pde(pmap, va); /* * This supports switching from a 2MB page to a * normal 4K page. */ if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { if (!pmap_demote_pde(pmap, pd, va)) { /* * Invalidation of the 2MB page mapping may have caused * the deallocation of the underlying PD page. */ pd = NULL; } } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (pd != NULL && (*pd & PG_V) != 0) { m = PHYS_TO_VM_PAGE(*pd & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_allocpte(pmap, ptepindex, flags); if (m == NULL && (flags & M_WAITOK)) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(pmap->pm_root == NULL, ("pmap_release: pmap has reserved page table page(s)")); m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME); pmap->pm_pml4[KPML4I] = 0; /* KVA */ pmap->pm_pml4[DMPML4I] = 0; /* Direct Map */ pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); PMAP_LOCK_DESTROY(pmap); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *pde, newpdir; pdp_entry_t *pdpe; mtx_assert(&kernel_map->system_mtx, MA_OWNED); /* * Return if "addr" is within the range of kernel page table pages * that were preallocated during pmap bootstrap. Moreover, leave * "kernel_vm_end" and the kernel page table as they were. * * The correctness of this action is based on the following * argument: vm_map_findspace() allocates contiguous ranges of the * kernel virtual address space. It calls this function if a range * ends after "kernel_vm_end". If the kernel is mapped between * "kernel_vm_end" and "addr", then the range cannot begin at * "kernel_vm_end". In fact, its beginning address cannot be less * than the kernel. Thus, there is no immediate need to allocate * any new kernel page table pages between "kernel_vm_end" and * "KERNBASE". */ if (KERNBASE < addr && addr <= KERNBASE + NKPT * NBPDR) return; addr = roundup2(addr, NBPDR); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); if ((*pdpe & PG_V) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); *pdpe = (pdp_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M); continue; /* try again */ } pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); if ((*pde & PG_V) != 0) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end), VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M); pde_store(pde, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); static int pmap_collect_inactive, pmap_collect_active; SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, "Current number times pmap_collect called on inactive queue"); SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. This is normally called to * unmap inactive pages, and if necessary, active pages. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static void pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) { pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; pv_entry_t next_pv, pv; vm_offset_t va; vm_page_t m, free; TAILQ_FOREACH(m, &vpq->pl, pageq) { if (m->hold_count || m->busy) continue; TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) continue; pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, va); KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, va); tpte = pte_load_clear(pte); KASSERT((tpte & PG_W) == 0, ("pmap_collect: wired pte %#lx", tpte)); if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); free = NULL; pmap_unuse_pt(pmap, va, *pde, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { vm_page_t m; struct pv_chunk *pc; int idx, field, bit; mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; /* move to head of list */ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); return; } PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, 0); vm_page_free(m); } /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, int try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; static vm_pindex_t colour; struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max sysctl.\n"); pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfq(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(pv_entry_spare--); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, colour, (pq == &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } /* * Reclaim pv entries: At first, destroy mappings to inactive * pages. After that, if a pv chunk entry is still needed, * destroy mappings to active pages. */ if (pq == NULL) { PV_STAT(pmap_collect_inactive++); pq = &vm_page_queues[PQ_INACTIVE]; } else if (pq == &vm_page_queues[PQ_INACTIVE]) { PV_STAT(pmap_collect_active++); pq = &vm_page_queues[PQ_ACTIVE]; } else panic("get_pv_entry: increase vm.pmap.shpgperproc"); pmap_collect(pmap, pq); goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); colour++; dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); break; } } return (pv); } /* * After demotion from a 2MB page mapping to 512 4KB page mappings, * destroy the pv entry for the 2MB page mapping and reinstantiate the pv * entries for each of the 4KB page mappings. */ static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); /* * Transfer the 2mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = trunc_2mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } /* * After promotion from 512 4KB page mappings to a single 2MB page mapping, * replace the many pv entries for the 4KB page mappings by a single pv entry * for the 2MB page mapping. */ static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 2mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_collect() and that pmap_collect() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_2mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; mtx_assert(&vm_page_queue_mtx, MA_OWNED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); return (TRUE); } else return (FALSE); } /* * Create the pv entry for a 2MB page mapping. */ static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); return (TRUE); } else return (FALSE); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page * mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t free, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); mpte = pmap_lookup_pt_page(pmap, va); if (mpte != NULL) pmap_remove_pt_page(pmap, mpte); else { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2MB page mapping and return "failure" if the * mapping was never accessed or the allocation of the new * page table page fails. If the 2MB page mapping belongs to * the direct map region of the kernel's address space, then * the page allocation request specifies the highest possible * priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is * normal. Page table pages are preallocated for every other * part of the kernel address space, so the direct map region * is the only part of the kernel address space that must be * handled here. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { free = NULL; pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free); pmap_invalidate_page(pmap, trunc_2mpage(va)); pmap_free_zero_pages(free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap_resident_count_inc(pmap, 1); } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. */ if (va >= VM_MAXUSER_ADDRESS) pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_collect(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PDE must have already changed from mapping * the 2mpage to referencing the page table page. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); pmap_pde_demotions++; CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 2mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpde & PG_G) pmap_invalidate_page(kernel_pmap, sva); pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_flag_set(m, PG_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } } if (pmap == kernel_pmap) { if (!pmap_demote_pde(pmap, pdq, sva)) panic("pmap_remove_pde: failed demotion"); } else { mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); atomic_subtract_int(&cnt.v_wire_count, 1); } } return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t ptepde, vm_page_t *free) { pt_entry_t oldpte; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt(pmap, va, ptepde, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, vm_page_t *free) { pt_entry_t *pte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((*pde & PG_V) == 0) return; pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_V) == 0) return; pmap_remove_pte(pmap, pte, va, *pde, free); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va, va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte; vm_page_t free = NULL; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; vm_page_lock_queues(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if (sva + PAGE_SIZE == eva) { pde = pmap_pde(pmap, sva); if (pde && (*pde & PG_PS) == 0) { pmap_remove_page(pmap, sva, pde, &free); goto out; } } for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); ptpaddr = *pde; /* * Weed out invalid mappings. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, pde, sva, &free); continue; } else if (!pmap_demote_pde(pmap, pde, sva)) { /* The large page mapping was destroyed. */ continue; } else ptpaddr = *pde; } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { if (*pte == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if ((*pte & PG_G) == 0) anyvalid = 1; else if (va == va_next) va = sva; if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free)) { sva += PAGE_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } out: if (anyvalid) pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; pd_entry_t *pde; vm_offset_t va; vm_page_t free; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); free = NULL; vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); tpte = pte_load_clear(pte); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); vm_page_unlock_queues(); pmap_free_zero_pages(free); } /* * pmap_protect_pde: do the things to protect a 2mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 2mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; if (newpde != oldpde) { if (!atomic_cmpset_long(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte; int anychanged; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; anychanged = 0; vm_page_lock_queues(); PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); ptpaddr = *pde; /* * Weed out invalid mappings. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, pde, sva, prot)) anychanged = 1; continue; } else if (!pmap_demote_pde(pmap, pde, sva)) { /* The large page mapping was destroyed. */ continue; } } if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { pt_entry_t obits, pbits; vm_page_t m; retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; if (pbits != obits) { if (!atomic_cmpset_long(pte, obits, pbits)) goto retry; if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = 1; } } } if (anychanged) pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single page table page (PTP) to a single 2MB page mapping. For promotion * to occur, two conditions must be met: (1) the 4KB page mappings must map * aligned, contiguous physical memory and (2) the 4KB page mappings must have * identical characteristics. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; vm_offset_t oldpteva; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2MB page. */ firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; oldpteva = (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK); CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" " in pmap %p", oldpteva, pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == pmap_pde_pindex(va), ("pmap_promote_pde: page table page's pindex is wrong")); pmap_insert_pt_page(pmap, mpte); /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); /* * Propagate the PAT index to its proper position. */ if ((newpde & PG_PTE_PAT) != 0) newpde ^= PG_PDE_PAT | PG_PTE_PAT; /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" " in pmap %p", va, pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { pd_entry_t *pde; pt_entry_t *pte; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva; va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || (m->oflags & VPO_BUSY) != 0, ("pmap_enter: page %p is not busy", m)); mpte = NULL; vm_page_lock_queues(); PMAP_LOCK(pmap); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) mpte = pmap_allocpte(pmap, va, M_WAITOK); pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0) { if ((*pde & PG_PS) != 0) panic("pmap_enter: attempted pmap_enter on 2MB page"); pte = pmap_pde_to_pte(pde, va); } else panic("pmap_enter: invalid page directory va=%#lx", va); pa = VM_PAGE_TO_PHYS(m); om = NULL; origpte = *pte; opa = origpte & PG_FRAME; /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; /* * Remove extra pte reference */ if (mpte) mpte->wire_count--; if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); pv = pmap_pvh_remove(&om->md, pmap, va); } if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } } else pmap_resident_count_inc(pmap, 1); /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; if ((newpte & PG_MANAGED) != 0) vm_page_flag_set(m, PG_WRITEABLE); } if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= PG_G; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { newpte |= PG_A; if ((access & VM_PROT_WRITE) != 0) newpte |= PG_M; if (origpte & PG_V) { invlva = FALSE; origpte = pte_load_store(pte, newpte); if (origpte & PG_A) { if (origpte & PG_MANAGED) vm_page_flag_set(om, PG_REFERENCED); if (opa != VM_PAGE_TO_PHYS(m) || ((origpte & PG_NX) == 0 && (newpte & PG_NX))) invlva = TRUE; } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(om); if ((newpte & PG_RW) == 0) invlva = TRUE; } if ((origpte & PG_MANAGED) != 0 && TAILQ_EMPTY(&om->md.pv_list) && TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)) vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else pte_store(pte, newpte); } /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Tries to create a 2MB page mapping. Returns TRUE if successful and FALSE * otherwise. Fails if (1) a page table page cannot be allocated without * blocking, (2) a mapping already exists at the specified virtual address, or * (3) a pv entry cannot be allocated without reclaiming another pv entry. */ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pd_entry_t *pde, newpde; vm_page_t free, mpde; mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpde = pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpde)); pde = &pde[pmap_pde_index(va)]; if ((*pde & PG_V) != 0) { KASSERT(mpde->wire_count > 1, ("pmap_enter_pde: mpde's wire count is too low")); mpde->wire_count--; CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { newpde |= PG_MANAGED; /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { free = NULL; if (pmap_unwire_pte_hold(pmap, va, mpde, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } } if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; /* * Increment counters. */ pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); /* * Map the superpage. */ pde_store(pde, newpde); pmap_pde_mappings++; CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); mpte = NULL; m = m_start; vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && pmap_enter_pde(pmap, va, m, prot)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); m = TAILQ_NEXT(m, listq); } vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { vm_page_lock_queues(); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { vm_page_t free; pt_entry_t *pte; vm_paddr_t pa; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t ptepindex; pd_entry_t *ptepa; /* * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap_pde(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa && (*ptepa & PG_V) != 0) { if (*ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); mpte->wire_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex, M_NOWAIT); if (mpte == NULL) return (mpte); } } pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); pte = &pte[pmap_pte_index(va)]; } else { mpte = NULL; pte = vtopte(va); } if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpte != NULL) { free = NULL; if (pmap_unwire_pte_hold(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; /* * Now validate mapping with RO protection */ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p, pdpg; int pat_mode; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2MB pages. Since "ptepa" is 2M aligned and * "size" is a multiple of 2M, adding the PAT setting to "pa" * will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pdpg = pmap_allocpde(pmap, addr, M_NOWAIT); if (pdpg == NULL) { /* * The creation of mappings below is only an * optimization. If a page directory page * cannot be allocated without blocking, * continue on to the next mapping rather than * blocking. */ addr += NBPDR; continue; } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); pde = &pde[pmap_pde_index(addr)]; if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); pmap_pde_mappings++; } else { /* Continue on if the PDE is already valid. */ pdpg->wire_count--; KASSERT(pdpg->wire_count > 0, ("pmap_object_init_pt: missing reference " "to page directory page, va: 0x%lx", addr)); } addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) { pd_entry_t *pde; pt_entry_t *pte; boolean_t are_queues_locked; are_queues_locked = FALSE; /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ retry: PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) { if (!wired != ((*pde & PG_W) == 0)) { if (!are_queues_locked) { are_queues_locked = TRUE; if (!mtx_trylock(&vm_page_queue_mtx)) { PMAP_UNLOCK(pmap); vm_page_lock_queues(); goto retry; } } if (!pmap_demote_pde(pmap, pde, va)) panic("pmap_change_wiring: demotion failed"); } else goto out; } pte = pmap_pde_to_pte(pde, va); if (wired && (*pte & PG_W) == 0) { pmap->pm_stats.wired_count++; atomic_set_long(pte, PG_W); } else if (!wired && (*pte & PG_W) != 0) { pmap->pm_stats.wired_count--; atomic_clear_long(pte, PG_W); } out: if (are_queues_locked) vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { vm_page_t free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t va_next; if (dst_addr != src_addr) return; vm_page_lock_queues(); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } for (addr = src_addr; addr < end_addr; addr = va_next) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpde, dstmpte, srcmpte; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t srcptepaddr, *pde; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pml4e = pmap_pml4e(src_pmap, addr); if ((*pml4e & PG_V) == 0) { va_next = (addr + NBPML4) & ~PML4MASK; if (va_next < addr) va_next = end_addr; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, addr); if ((*pdpe & PG_V) == 0) { va_next = (addr + NBPDP) & ~PDPMASK; if (va_next < addr) va_next = end_addr; continue; } va_next = (addr + NBPDR) & ~PDRMASK; if (va_next < addr) va_next = end_addr; pde = pmap_pdpe_to_pde(pdpe, addr); srcptepaddr = *pde; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT); if (dstmpde == NULL) break; pde = (pd_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde)); pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME))) { *pde = srcptepaddr & ~PG_W; pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); } else dstmpde->wire_count--; continue; } srcptepaddr &= PG_FRAME; srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (va_next > end_addr) va_next = end_addr; src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr); src_pte = &src_pte[pmap_pte_index(addr)]; dstmpte = NULL; while (addr < va_next) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { if (dstmpte != NULL && dstmpte->pindex == pmap_pde_pindex(addr)) dstmpte->wire_count++; else if ((dstmpte = pmap_allocpte(dst_pmap, addr, M_NOWAIT)) == NULL) goto out; dst_pte = (pt_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); dst_pte = &dst_pte[pmap_pte_index(addr)]; if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); pmap_resident_count_inc(dst_pmap, 1); } else { free = NULL; if (pmap_unwire_pte_hold(dst_pmap, addr, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, addr); pmap_free_zero_pages(free); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: vm_page_unlock_queues(); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } vm_page_unlock_queues(); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { int count; count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); vm_page_lock_queues(); count = pmap_pvh_wired_mappings(&m->md, count); count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); vm_page_unlock_queues(); return (count); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 2mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); vm_page_lock_queues(); rv = !TAILQ_EMPTY(&m->md.pv_list) || !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); vm_page_unlock_queues(); return (rv); } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pd_entry_t ptepde; pt_entry_t *pte, tpte; vm_page_t free = NULL; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; int field, idx; int64_t bit; uint64_t inuse, bitmask; int allfree; if (pmap != PCPU_GET(curpmap)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } vm_page_lock_queues(); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = (~(pc->pc_map[field])) & pc_freemask[field]; while (inuse != 0) { bit = bsfq(inuse); bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pdpe(pmap, pv->pv_va); ptepde = *pte; pte = pmap_pdpe_to_pde(pte, pv->pv_va); tpte = *pte; if ((tpte & (PG_PS | PG_V)) == PG_V) { ptepde = tpte; pte = (pt_entry_t *)PHYS_TO_DMAP(tpte & PG_FRAME); pte = &pte[pmap_pte_index(pv->pv_va)]; tpte = *pte & ~PG_PTE_PAT; } if ((tpte & PG_V) == 0) panic("bad pte"); /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT(m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((tpte & PG_PS) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_flag_clear(mt, PG_WRITEABLE); } mpte = pmap_lookup_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); atomic_subtract_int(&cnt.v_wire_count, 1); } } else { pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } } pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); } } if (allfree) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, 0); vm_page_free(m); } } pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PG_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return (FALSE); vm_page_lock_queues(); rv = pmap_is_modified_pvh(&m->md) || pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); vm_page_unlock_queues(); return (rv); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 2mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); PMAP_UNLOCK(pmap); if (rv) break; } return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) { pte = pmap_pde_to_pte(pde, addr); rv = (*pte & PG_V) == 0; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_is_referenced: page %p is not managed", m)); vm_page_lock_queues(); rv = pmap_is_referenced_pvh(&m->md) || pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); vm_page_unlock_queues(); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 2mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte(pmap, pv->pv_va); rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); PMAP_UNLOCK(pmap); if (rv) break; } return (rv); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PG_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); retry: oldpte = *pte; if (oldpte & PG_RW) { if (!atomic_cmpset_long(pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); vm_page_unlock_queues(); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf, pvn; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t va; int rtval = 0; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_ts_referenced: page %p is not managed", m)); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); vm_page_lock_queues(); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Remove the mapping to a single page * so that a subsequent access may * repromote. Since the underlying * page table page is fully populated, * this removal never frees a page * table page. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pmap_remove_page(pmap, va, pde, NULL); rtval++; if (rtval > 4) { PMAP_UNLOCK(pmap); goto out; } } } } PMAP_UNLOCK(pmap); } if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { pvn = TAILQ_NEXT(pv, pv_list); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:" " found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & PG_A) != 0) { atomic_clear_long(pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); rtval++; if (rtval > 4) pvn = NULL; } PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } out: vm_page_unlock_queues(); return (rtval); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); KASSERT((m->oflags & VPO_BUSY) == 0, ("pmap_clear_modify: page %p is busy", m)); /* * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. */ if ((m->flags & PG_WRITEABLE) == 0) return; vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pde_to_pte(pde, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { while (!atomic_cmpset_long(pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { atomic_clear_long(pte, PG_M); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_unlock_queues(); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(vm_page_t m) { struct md_page *pvh; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t va; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_clear_reference: page %p is not managed", m)); vm_page_lock_queues(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { if (pmap_demote_pde(pmap, pde, va)) { /* * Remove the mapping to a single page so * that a subsequent access may repromote. * Since the underlying page table page is * fully populated, this removal never frees * a page table page. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pmap_remove_page(pmap, va, pde, NULL); } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if (*pte & PG_A) { atomic_clear_long(pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_unlock_queues(); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~PG_PTE_CACHE; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~PG_PDE_CACHE; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { vm_offset_t va, offset; vm_size_t tmpsize; /* * If the specified range of physical addresses fits within the direct * map window, use the direct map. */ if (pa < dmaplimit && pa + size < dmaplimit) { va = PHYS_TO_DMAP(pa); if (!pmap_change_attr(va, size, mode)) return ((void *)va); } offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); va = kmem_alloc_nofault(kernel_map, size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); pa = trunc_page(pa); for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + tmpsize); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { vm_offset_t base, offset, tmpva; /* If we gave a direct map region in pmap_mapdev, do nothing */ if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) return; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) pmap_kremove(tmpva); pmap_invalidate_range(kernel_pmap, va, tmpva); kmem_free(kernel_map, base, size); } /* * Tries to demote a 1GB page mapping. */ static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; vm_paddr_t mpdepa; vm_page_t mpde; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); if ((mpde = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } mpdepa = VM_PAGE_TO_PHYS(mpde); firstpde = (pd_entry_t *)PHYS_TO_DMAP(mpdepa); newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; KASSERT((oldpdpe & PG_A) != 0, ("pmap_demote_pdpe: oldpdpe is missing PG_A")); KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pdpe: oldpdpe is missing PG_M")); newpde = oldpdpe; /* * Initialize the page directory page. */ for (pde = firstpde; pde < firstpde + NPDEPG; pde++) { *pde = newpde; newpde += NBPDR; } /* * Demote the mapping. */ *pdpe = newpdpe; /* * Invalidate a stale recursive mapping of the page directory page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); pmap_pdpe_demotions++; CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; /* * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, m->md.pat_mode)) panic("memory attribute change on the direct map failed"); } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the direct map or the kernel map. If * the virtual address range is contained within the kernel map, then the * memory type for each of the corresponding ranges of the direct map is also * changed. (The corresponding ranges of the direct map are those ranges that * map the same physical pages as the specified virtual address range.) These * changes to the direct map are necessary because Intel describes the * behavior of their processors as "undefined" if two or more mappings to the * same physical page have different memory types. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. In the * latter case, the memory type may have been changed on some part of the * virtual address range or the direct map. */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { int error; PMAP_LOCK(kernel_pmap); error = pmap_change_attr_locked(va, size, mode); PMAP_UNLOCK(kernel_pmap); return (error); } static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; vm_paddr_t pa_start, pa_end; pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde, error; boolean_t changed; PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); /* * Only supported on kernel virtual addresses, including the direct * map but excluding the recursive map. */ if (base < DMAP_MIN_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(mode, 1); cache_bits_pte = pmap_cache_bits(mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down 2MB pages * into 4KB pages if required. */ for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (*pdpe == 0) return (EINVAL); if (*pdpe & PG_PS) { /* * If the current 1GB page already has the required * memory type, then we need not demote this page. Just * increment tmpva to the next 1GB page frame. */ if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_1gpage(tmpva) + NBPDP; continue; } /* * If the current offset aligns with a 1GB page frame * and there is at least 1GB left within the range, then * we need not break down this page into 2MB pages. */ if ((tmpva & PDPMASK) == 0 && tmpva + PDPMASK < base + size) { tmpva += NBPDP; continue; } if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde == 0) return (EINVAL); if (*pde & PG_PS) { /* * If the current 2MB page already has the required * memory type, then we need not demote this page. Just * increment tmpva to the next 2MB page frame. */ if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_2mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2MB page frame * and there is at least 2MB left within the range, then * we need not break down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) return (ENOMEM); } pte = pmap_pde_to_pte(pde, tmpva); if (*pte == 0) return (EINVAL); tmpva += PAGE_SIZE; } error = 0; /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ pa_start = pa_end = 0; for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (*pdpe & PG_PS) { if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pdpe, cache_bits_pde); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pdpe & PG_PS_FRAME; pa_end = pa_start + NBPDP; } else if (pa_end == (*pdpe & PG_PS_FRAME)) pa_end += NBPDP; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pdpe & PG_PS_FRAME; pa_end = pa_start + NBPDP; } } tmpva = trunc_1gpage(tmpva) + NBPDP; continue; } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde & PG_PS) { if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pde & PG_PS_FRAME; pa_end = pa_start + NBPDR; } else if (pa_end == (*pde & PG_PS_FRAME)) pa_end += NBPDR; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pde & PG_PS_FRAME; pa_end = pa_start + NBPDR; } } tmpva = trunc_2mpage(tmpva) + NBPDR; } else { pte = pmap_pde_to_pte(pde, tmpva); if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pte & PG_FRAME; pa_end = pa_start + PAGE_SIZE; } else if (pa_end == (*pte & PG_FRAME)) pa_end += PAGE_SIZE; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pte & PG_FRAME; pa_end = pa_start + PAGE_SIZE; } } tmpva += PAGE_SIZE; } } if (error == 0 && pa_start != pa_end) error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); /* * Flush CPU caches if required to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); } return (error); } /* * Demotes any mapping within the direct map region that covers more than the * specified range of physical addresses. This range's size must be a power * of two and its starting address must be a multiple of its size. Since the * demotion does not change any attributes of the mapping, a TLB invalidation * is not mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate) { pdp_entry_t *pdpe; pd_entry_t *pde; vm_offset_t va; boolean_t changed; if (len == 0) return; KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = FALSE; PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) panic("pmap_demote_DMAP: PDPE failed"); changed = TRUE; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & PG_V) == 0) panic("pmap_demote_DMAP: invalid PDE"); if ((*pde & PG_PS) != 0) { if (!pmap_demote_pde(kernel_pmap, pde, va)) panic("pmap_demote_DMAP: PDE failed"); changed = TRUE; } } if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); } } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, addr); if (pdep != NULL && (*pdep & PG_V)) { if (*pdep & PG_PS) { pte = *pdep; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { pte = *pmap_pde_to_pte(pdep, addr); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int64_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #ifdef SMP atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); #else oldpmap->pm_active &= ~PCPU_GET(cpumask); pmap->pm_active |= PCPU_GET(cpumask); #endif cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); PCPU_SET(curpmap, pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } Index: projects/binutils-2.17/sys/amd64/include/specialreg.h =================================================================== --- projects/binutils-2.17/sys/amd64/include/specialreg.h (revision 215829) +++ projects/binutils-2.17/sys/amd64/include/specialreg.h (revision 215830) @@ -1,555 +1,564 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)specialreg.h 7.1 (Berkeley) 5/9/91 * $FreeBSD$ */ #ifndef _MACHINE_SPECIALREG_H_ #define _MACHINE_SPECIALREG_H_ /* * Bits in 386 special registers: */ #define CR0_PE 0x00000001 /* Protected mode Enable */ #define CR0_MP 0x00000002 /* "Math" (fpu) Present */ #define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */ #define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */ #define CR0_PG 0x80000000 /* PaGing enable */ /* * Bits in 486 special registers: */ #define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */ #define CR0_WP 0x00010000 /* Write Protect (honor page protect in all modes) */ #define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */ #define CR0_NW 0x20000000 /* Not Write-through */ #define CR0_CD 0x40000000 /* Cache Disable */ /* * Bits in PPro special registers */ #define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */ #define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */ #define CR4_TSD 0x00000004 /* Time stamp disable */ #define CR4_DE 0x00000008 /* Debugging extensions */ #define CR4_PSE 0x00000010 /* Page size extensions */ #define CR4_PAE 0x00000020 /* Physical address extension */ #define CR4_MCE 0x00000040 /* Machine check enable */ #define CR4_PGE 0x00000080 /* Page global enable */ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. */ #define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */ #define EFER_LME 0x000000100 /* Long mode enable (R/W) */ #define EFER_LMA 0x000000400 /* Long mode active (R) */ #define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ /* * CPUID instruction features register */ #define CPUID_FPU 0x00000001 #define CPUID_VME 0x00000002 #define CPUID_DE 0x00000004 #define CPUID_PSE 0x00000008 #define CPUID_TSC 0x00000010 #define CPUID_MSR 0x00000020 #define CPUID_PAE 0x00000040 #define CPUID_MCE 0x00000080 #define CPUID_CX8 0x00000100 #define CPUID_APIC 0x00000200 #define CPUID_B10 0x00000400 #define CPUID_SEP 0x00000800 #define CPUID_MTRR 0x00001000 #define CPUID_PGE 0x00002000 #define CPUID_MCA 0x00004000 #define CPUID_CMOV 0x00008000 #define CPUID_PAT 0x00010000 #define CPUID_PSE36 0x00020000 #define CPUID_PSN 0x00040000 #define CPUID_CLFSH 0x00080000 #define CPUID_B20 0x00100000 #define CPUID_DS 0x00200000 #define CPUID_ACPI 0x00400000 #define CPUID_MMX 0x00800000 #define CPUID_FXSR 0x01000000 #define CPUID_SSE 0x02000000 #define CPUID_XMM 0x02000000 #define CPUID_SSE2 0x04000000 #define CPUID_SS 0x08000000 #define CPUID_HTT 0x10000000 #define CPUID_TM 0x20000000 #define CPUID_IA64 0x40000000 #define CPUID_PBE 0x80000000 #define CPUID2_SSE3 0x00000001 #define CPUID2_PCLMULQDQ 0x00000002 #define CPUID2_DTES64 0x00000004 #define CPUID2_MON 0x00000008 #define CPUID2_DS_CPL 0x00000010 #define CPUID2_VMX 0x00000020 #define CPUID2_SMX 0x00000040 #define CPUID2_EST 0x00000080 #define CPUID2_TM2 0x00000100 #define CPUID2_SSSE3 0x00000200 #define CPUID2_CNXTID 0x00000400 #define CPUID2_CX16 0x00002000 #define CPUID2_XTPR 0x00004000 #define CPUID2_PDCM 0x00008000 #define CPUID2_PCID 0x00020000 #define CPUID2_DCA 0x00040000 #define CPUID2_SSE41 0x00080000 #define CPUID2_SSE42 0x00100000 #define CPUID2_X2APIC 0x00200000 #define CPUID2_MOVBE 0x00400000 #define CPUID2_POPCNT 0x00800000 #define CPUID2_AESNI 0x02000000 /* + * Important bits in the Thermal and Power Management flags + * CPUID.6 EAX and ECX. + */ +#define CPUTPM1_SENSOR 0x00000001 +#define CPUTPM1_TURBO 0x00000002 +#define CPUTPM1_ARAT 0x00000004 +#define CPUTPM2_EFFREQ 0x00000001 + +/* * Important bits in the AMD extended cpuid flags */ #define AMDID_SYSCALL 0x00000800 #define AMDID_MP 0x00080000 #define AMDID_NX 0x00100000 #define AMDID_EXT_MMX 0x00400000 #define AMDID_FFXSR 0x01000000 #define AMDID_PAGE1GB 0x04000000 #define AMDID_RDTSCP 0x08000000 #define AMDID_LM 0x20000000 #define AMDID_EXT_3DNOW 0x40000000 #define AMDID_3DNOW 0x80000000 #define AMDID2_LAHF 0x00000001 #define AMDID2_CMP 0x00000002 #define AMDID2_SVM 0x00000004 #define AMDID2_EXT_APIC 0x00000008 #define AMDID2_CR8 0x00000010 #define AMDID2_ABM 0x00000020 #define AMDID2_SSE4A 0x00000040 #define AMDID2_MAS 0x00000080 #define AMDID2_PREFETCH 0x00000100 #define AMDID2_OSVW 0x00000200 #define AMDID2_IBS 0x00000400 #define AMDID2_SSE5 0x00000800 #define AMDID2_SKINIT 0x00001000 #define AMDID2_WDT 0x00002000 /* * CPUID instruction 1 eax info */ #define CPUID_STEPPING 0x0000000f #define CPUID_MODEL 0x000000f0 #define CPUID_FAMILY 0x00000f00 #define CPUID_EXT_MODEL 0x000f0000 #define CPUID_EXT_FAMILY 0x0ff00000 #define CPUID_TO_MODEL(id) \ ((((id) & CPUID_MODEL) >> 4) | \ (((id) & CPUID_EXT_MODEL) >> 12)) #define CPUID_TO_FAMILY(id) \ ((((id) & CPUID_FAMILY) >> 8) + \ (((id) & CPUID_EXT_FAMILY) >> 20)) /* * CPUID instruction 1 ebx info */ #define CPUID_BRAND_INDEX 0x000000ff #define CPUID_CLFUSH_SIZE 0x0000ff00 #define CPUID_HTT_CORES 0x00ff0000 #define CPUID_LOCAL_APIC_ID 0xff000000 /* * CPUID instruction 0xb ebx info. */ #define CPUID_TYPE_INVAL 0 #define CPUID_TYPE_SMT 1 #define CPUID_TYPE_CORE 2 /* * AMD extended function 8000_0007h edx info */ #define AMDPM_TS 0x00000001 #define AMDPM_FID 0x00000002 #define AMDPM_VID 0x00000004 #define AMDPM_TTP 0x00000008 #define AMDPM_TM 0x00000010 #define AMDPM_STC 0x00000020 #define AMDPM_100MHZ_STEPS 0x00000040 #define AMDPM_HW_PSTATE 0x00000080 #define AMDPM_TSC_INVARIANT 0x00000100 #define AMDPM_CPB 0x00000200 /* * AMD extended function 8000_0008h ecx info */ #define AMDID_CMP_CORES 0x000000ff /* * CPUID manufacturers identifiers */ #define AMD_VENDOR_ID "AuthenticAMD" #define CENTAUR_VENDOR_ID "CentaurHauls" #define INTEL_VENDOR_ID "GenuineIntel" /* * Model-specific registers for the i386 family */ #define MSR_P5_MC_ADDR 0x000 #define MSR_P5_MC_TYPE 0x001 #define MSR_TSC 0x010 #define MSR_P5_CESR 0x011 #define MSR_P5_CTR0 0x012 #define MSR_P5_CTR1 0x013 #define MSR_IA32_PLATFORM_ID 0x017 #define MSR_APICBASE 0x01b #define MSR_EBL_CR_POWERON 0x02a #define MSR_TEST_CTL 0x033 #define MSR_BIOS_UPDT_TRIG 0x079 #define MSR_BBL_CR_D0 0x088 #define MSR_BBL_CR_D1 0x089 #define MSR_BBL_CR_D2 0x08a #define MSR_BIOS_SIGN 0x08b #define MSR_PERFCTR0 0x0c1 #define MSR_PERFCTR1 0x0c2 #define MSR_MPERF 0x0e7 #define MSR_APERF 0x0e8 #define MSR_IA32_EXT_CONFIG 0x0ee /* Undocumented. Core Solo/Duo only */ #define MSR_MTRRcap 0x0fe #define MSR_BBL_CR_ADDR 0x116 #define MSR_BBL_CR_DECC 0x118 #define MSR_BBL_CR_CTL 0x119 #define MSR_BBL_CR_TRIG 0x11a #define MSR_BBL_CR_BUSY 0x11b #define MSR_BBL_CR_CTL3 0x11e #define MSR_SYSENTER_CS_MSR 0x174 #define MSR_SYSENTER_ESP_MSR 0x175 #define MSR_SYSENTER_EIP_MSR 0x176 #define MSR_MCG_CAP 0x179 #define MSR_MCG_STATUS 0x17a #define MSR_MCG_CTL 0x17b #define MSR_EVNTSEL0 0x186 #define MSR_EVNTSEL1 0x187 #define MSR_THERM_CONTROL 0x19a #define MSR_THERM_INTERRUPT 0x19b #define MSR_THERM_STATUS 0x19c #define MSR_IA32_MISC_ENABLE 0x1a0 #define MSR_IA32_TEMPERATURE_TARGET 0x1a2 #define MSR_DEBUGCTLMSR 0x1d9 #define MSR_LASTBRANCHFROMIP 0x1db #define MSR_LASTBRANCHTOIP 0x1dc #define MSR_LASTINTFROMIP 0x1dd #define MSR_LASTINTTOIP 0x1de #define MSR_ROB_CR_BKUPTMPDR6 0x1e0 #define MSR_MTRRVarBase 0x200 #define MSR_MTRR64kBase 0x250 #define MSR_MTRR16kBase 0x258 #define MSR_MTRR4kBase 0x268 #define MSR_PAT 0x277 #define MSR_MC0_CTL2 0x280 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 #define MSR_MC0_ADDR 0x402 #define MSR_MC0_MISC 0x403 #define MSR_MC1_CTL 0x404 #define MSR_MC1_STATUS 0x405 #define MSR_MC1_ADDR 0x406 #define MSR_MC1_MISC 0x407 #define MSR_MC2_CTL 0x408 #define MSR_MC2_STATUS 0x409 #define MSR_MC2_ADDR 0x40a #define MSR_MC2_MISC 0x40b #define MSR_MC3_CTL 0x40c #define MSR_MC3_STATUS 0x40d #define MSR_MC3_ADDR 0x40e #define MSR_MC3_MISC 0x40f #define MSR_MC4_CTL 0x410 #define MSR_MC4_STATUS 0x411 #define MSR_MC4_ADDR 0x412 #define MSR_MC4_MISC 0x413 /* * Constants related to MSR's. */ #define APICBASE_RESERVED 0x000006ff #define APICBASE_BSP 0x00000100 #define APICBASE_ENABLED 0x00000800 #define APICBASE_ADDRESS 0xfffff000 /* * PAT modes. */ #define PAT_UNCACHEABLE 0x00 #define PAT_WRITE_COMBINING 0x01 #define PAT_WRITE_THROUGH 0x04 #define PAT_WRITE_PROTECTED 0x05 #define PAT_WRITE_BACK 0x06 #define PAT_UNCACHED 0x07 #define PAT_VALUE(i, m) ((long)(m) << (8 * (i))) #define PAT_MASK(i) PAT_VALUE(i, 0xff) /* * Constants related to MTRRs */ #define MTRR_UNCACHEABLE 0x00 #define MTRR_WRITE_COMBINING 0x01 #define MTRR_WRITE_THROUGH 0x04 #define MTRR_WRITE_PROTECTED 0x05 #define MTRR_WRITE_BACK 0x06 #define MTRR_N64K 8 /* numbers of fixed-size entries */ #define MTRR_N16K 16 #define MTRR_N4K 64 #define MTRR_CAP_WC 0x0000000000000400 #define MTRR_CAP_FIXED 0x0000000000000100 #define MTRR_CAP_VCNT 0x00000000000000ff #define MTRR_DEF_ENABLE 0x0000000000000800 #define MTRR_DEF_FIXED_ENABLE 0x0000000000000400 #define MTRR_DEF_TYPE 0x00000000000000ff #define MTRR_PHYSBASE_PHYSBASE 0x000ffffffffff000 #define MTRR_PHYSBASE_TYPE 0x00000000000000ff #define MTRR_PHYSMASK_PHYSMASK 0x000ffffffffff000 #define MTRR_PHYSMASK_VALID 0x0000000000000800 /* Performance Control Register (5x86 only). */ #define PCR0 0x20 #define PCR0_RSTK 0x01 /* Enables return stack */ #define PCR0_BTB 0x02 /* Enables branch target buffer */ #define PCR0_LOOP 0x04 /* Enables loop */ #define PCR0_AIS 0x08 /* Enables all instrcutions stalled to serialize pipe. */ #define PCR0_MLR 0x10 /* Enables reordering of misaligned loads */ #define PCR0_BTBRT 0x40 /* Enables BTB test register. */ #define PCR0_LSSER 0x80 /* Disable reorder */ /* Device Identification Registers */ #define DIR0 0xfe #define DIR1 0xff /* * Machine Check register constants. */ #define MCG_CAP_COUNT 0x000000ff #define MCG_CAP_CTL_P 0x00000100 #define MCG_CAP_EXT_P 0x00000200 #define MCG_CAP_CMCI_P 0x00000400 #define MCG_CAP_TES_P 0x00000800 #define MCG_CAP_EXT_CNT 0x00ff0000 #define MCG_CAP_SER_P 0x01000000 #define MCG_STATUS_RIPV 0x00000001 #define MCG_STATUS_EIPV 0x00000002 #define MCG_STATUS_MCIP 0x00000004 #define MCG_CTL_ENABLE 0xffffffffffffffff #define MCG_CTL_DISABLE 0x0000000000000000 #define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4) #define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4) #define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4) #define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4) #define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */ #define MC_STATUS_MCA_ERROR 0x000000000000ffff #define MC_STATUS_MODEL_ERROR 0x00000000ffff0000 #define MC_STATUS_OTHER_INFO 0x01ffffff00000000 #define MC_STATUS_COR_COUNT 0x001fffc000000000 /* If MCG_CAP_CMCI_P */ #define MC_STATUS_TES_STATUS 0x0060000000000000 /* If MCG_CAP_TES_P */ #define MC_STATUS_AR 0x0080000000000000 /* If MCG_CAP_TES_P */ #define MC_STATUS_S 0x0100000000000000 /* If MCG_CAP_TES_P */ #define MC_STATUS_PCC 0x0200000000000000 #define MC_STATUS_ADDRV 0x0400000000000000 #define MC_STATUS_MISCV 0x0800000000000000 #define MC_STATUS_EN 0x1000000000000000 #define MC_STATUS_UC 0x2000000000000000 #define MC_STATUS_OVER 0x4000000000000000 #define MC_STATUS_VAL 0x8000000000000000 #define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */ #define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */ #define MC_CTL2_THRESHOLD 0x0000000000007fff #define MC_CTL2_CMCI_EN 0x0000000040000000 /* * The following four 3-byte registers control the non-cacheable regions. * These registers must be written as three separate bytes. * * NCRx+0: A31-A24 of starting address * NCRx+1: A23-A16 of starting address * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx. * * The non-cacheable region's starting address must be aligned to the * size indicated by the NCR_SIZE_xx field. */ #define NCR1 0xc4 #define NCR2 0xc7 #define NCR3 0xca #define NCR4 0xcd #define NCR_SIZE_0K 0 #define NCR_SIZE_4K 1 #define NCR_SIZE_8K 2 #define NCR_SIZE_16K 3 #define NCR_SIZE_32K 4 #define NCR_SIZE_64K 5 #define NCR_SIZE_128K 6 #define NCR_SIZE_256K 7 #define NCR_SIZE_512K 8 #define NCR_SIZE_1M 9 #define NCR_SIZE_2M 10 #define NCR_SIZE_4M 11 #define NCR_SIZE_8M 12 #define NCR_SIZE_16M 13 #define NCR_SIZE_32M 14 #define NCR_SIZE_4G 15 /* * The address region registers are used to specify the location and * size for the eight address regions. * * ARRx + 0: A31-A24 of start address * ARRx + 1: A23-A16 of start address * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx */ #define ARR0 0xc4 #define ARR1 0xc7 #define ARR2 0xca #define ARR3 0xcd #define ARR4 0xd0 #define ARR5 0xd3 #define ARR6 0xd6 #define ARR7 0xd9 #define ARR_SIZE_0K 0 #define ARR_SIZE_4K 1 #define ARR_SIZE_8K 2 #define ARR_SIZE_16K 3 #define ARR_SIZE_32K 4 #define ARR_SIZE_64K 5 #define ARR_SIZE_128K 6 #define ARR_SIZE_256K 7 #define ARR_SIZE_512K 8 #define ARR_SIZE_1M 9 #define ARR_SIZE_2M 10 #define ARR_SIZE_4M 11 #define ARR_SIZE_8M 12 #define ARR_SIZE_16M 13 #define ARR_SIZE_32M 14 #define ARR_SIZE_4G 15 /* * The region control registers specify the attributes associated with * the ARRx addres regions. */ #define RCR0 0xdc #define RCR1 0xdd #define RCR2 0xde #define RCR3 0xdf #define RCR4 0xe0 #define RCR5 0xe1 #define RCR6 0xe2 #define RCR7 0xe3 #define RCR_RCD 0x01 /* Disables caching for ARRx (x = 0-6). */ #define RCR_RCE 0x01 /* Enables caching for ARR7. */ #define RCR_WWO 0x02 /* Weak write ordering. */ #define RCR_WL 0x04 /* Weak locking. */ #define RCR_WG 0x08 /* Write gathering. */ #define RCR_WT 0x10 /* Write-through. */ #define RCR_NLB 0x20 /* LBA# pin is not asserted. */ /* AMD Write Allocate Top-Of-Memory and Control Register */ #define AMD_WT_ALLOC_TME 0x40000 /* top-of-memory enable */ #define AMD_WT_ALLOC_PRE 0x20000 /* programmable range enable */ #define AMD_WT_ALLOC_FRE 0x10000 /* fixed (A0000-FFFFF) range enable */ /* AMD64 MSR's */ #define MSR_EFER 0xc0000080 /* extended features */ #define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target/cs/ss */ #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target rip */ #define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target rip */ #define MSR_SF_MASK 0xc0000084 /* syscall flags mask */ #define MSR_FSBASE 0xc0000100 /* base address of the %fs "segment" */ #define MSR_GSBASE 0xc0000101 /* base address of the %gs "segment" */ #define MSR_KGSBASE 0xc0000102 /* base address of the kernel %gs */ #define MSR_PERFEVSEL0 0xc0010000 #define MSR_PERFEVSEL1 0xc0010001 #define MSR_PERFEVSEL2 0xc0010002 #define MSR_PERFEVSEL3 0xc0010003 #undef MSR_PERFCTR0 #undef MSR_PERFCTR1 #define MSR_PERFCTR0 0xc0010004 #define MSR_PERFCTR1 0xc0010005 #define MSR_PERFCTR2 0xc0010006 #define MSR_PERFCTR3 0xc0010007 #define MSR_SYSCFG 0xc0010010 #define MSR_HWCR 0xc0010015 #define MSR_IORRBASE0 0xc0010016 #define MSR_IORRMASK0 0xc0010017 #define MSR_IORRBASE1 0xc0010018 #define MSR_IORRMASK1 0xc0010019 #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ #define MSR_MC0_CTL_MASK 0xc0010044 /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */ /* VIA ACE crypto featureset: for via_feature_xcrypt */ #define VIA_HAS_AES 1 /* cpu has AES */ #define VIA_HAS_SHA 2 /* cpu has SHA1 & SHA256 */ #define VIA_HAS_MM 4 /* cpu has RSA instructions */ #define VIA_HAS_AESCTR 8 /* cpu has AES-CTR instructions */ /* Centaur Extended Feature flags */ #define VIA_CPUID_HAS_RNG 0x000004 #define VIA_CPUID_DO_RNG 0x000008 #define VIA_CPUID_HAS_ACE 0x000040 #define VIA_CPUID_DO_ACE 0x000080 #define VIA_CPUID_HAS_ACE2 0x000100 #define VIA_CPUID_DO_ACE2 0x000200 #define VIA_CPUID_HAS_PHE 0x000400 #define VIA_CPUID_DO_PHE 0x000800 #define VIA_CPUID_HAS_PMM 0x001000 #define VIA_CPUID_DO_PMM 0x002000 /* VIA ACE xcrypt-* instruction context control options */ #define VIA_CRYPT_CWLO_ROUND_M 0x0000000f #define VIA_CRYPT_CWLO_ALG_M 0x00000070 #define VIA_CRYPT_CWLO_ALG_AES 0x00000000 #define VIA_CRYPT_CWLO_KEYGEN_M 0x00000080 #define VIA_CRYPT_CWLO_KEYGEN_HW 0x00000000 #define VIA_CRYPT_CWLO_KEYGEN_SW 0x00000080 #define VIA_CRYPT_CWLO_NORMAL 0x00000000 #define VIA_CRYPT_CWLO_INTERMEDIATE 0x00000100 #define VIA_CRYPT_CWLO_ENCRYPT 0x00000000 #define VIA_CRYPT_CWLO_DECRYPT 0x00000200 #define VIA_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */ #define VIA_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */ #define VIA_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */ #endif /* !_MACHINE_SPECIALREG_H_ */ Index: projects/binutils-2.17/sys/amd64/include/xen =================================================================== --- projects/binutils-2.17/sys/amd64/include/xen (revision 215829) +++ projects/binutils-2.17/sys/amd64/include/xen (revision 215830) Property changes on: projects/binutils-2.17/sys/amd64/include/xen ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/amd64/include/xen:r215709-215824 Index: projects/binutils-2.17/sys/boot/common/load_elf.c =================================================================== --- projects/binutils-2.17/sys/boot/common/load_elf.c (revision 215829) +++ projects/binutils-2.17/sys/boot/common/load_elf.c (revision 215830) @@ -1,788 +1,789 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 1998 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #define FREEBSD_ELF #include #include "bootstrap.h" #define COPYOUT(s,d,l) archsw.arch_copyout((vm_offset_t)(s), d, l) #if defined(__i386__) && __ELF_WORD_SIZE == 64 #undef ELF_TARG_CLASS #undef ELF_TARG_MACH #define ELF_TARG_CLASS ELFCLASS64 #define ELF_TARG_MACH EM_X86_64 #endif typedef struct elf_file { Elf_Phdr *ph; Elf_Ehdr *ehdr; Elf_Sym *symtab; Elf_Hashelt *hashtab; Elf_Hashelt nbuckets; Elf_Hashelt nchains; Elf_Hashelt *buckets; Elf_Hashelt *chains; Elf_Rel *rel; size_t relsz; Elf_Rela *rela; size_t relasz; char *strtab; size_t strsz; int fd; caddr_t firstpage; size_t firstlen; int kernel; u_int64_t off; } *elf_file_t; static int __elfN(loadimage)(struct preloaded_file *mp, elf_file_t ef, u_int64_t loadaddr); static int __elfN(lookup_symbol)(struct preloaded_file *mp, elf_file_t ef, const char* name, Elf_Sym* sym); static int __elfN(reloc_ptr)(struct preloaded_file *mp, elf_file_t ef, Elf_Addr p, void *val, size_t len); static int __elfN(parse_modmetadata)(struct preloaded_file *mp, elf_file_t ef); static symaddr_fn __elfN(symaddr); static char *fake_modname(const char *name); const char *__elfN(kerneltype) = "elf kernel"; const char *__elfN(moduletype) = "elf module"; u_int64_t __elfN(relocation_offset) = 0; /* * Attempt to load the file (file) as an ELF module. It will be stored at * (dest), and a pointer to a module structure describing the loaded object * will be saved in (result). */ int __elfN(loadfile)(char *filename, u_int64_t dest, struct preloaded_file **result) { struct preloaded_file *fp, *kfp; struct elf_file ef; Elf_Ehdr *ehdr; int err; u_int pad; ssize_t bytes_read; fp = NULL; bzero(&ef, sizeof(struct elf_file)); /* * Open the image, read and validate the ELF header */ if (filename == NULL) /* can't handle nameless */ return(EFTYPE); if ((ef.fd = open(filename, O_RDONLY)) == -1) return(errno); ef.firstpage = malloc(PAGE_SIZE); if (ef.firstpage == NULL) { close(ef.fd); return(ENOMEM); } bytes_read = read(ef.fd, ef.firstpage, PAGE_SIZE); ef.firstlen = (size_t)bytes_read; if (bytes_read < 0 || ef.firstlen <= sizeof(Elf_Ehdr)) { err = EFTYPE; /* could be EIO, but may be small file */ goto oerr; } ehdr = ef.ehdr = (Elf_Ehdr *)ef.firstpage; /* Is it ELF? */ if (!IS_ELF(*ehdr)) { err = EFTYPE; goto oerr; } if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || /* Layout ? */ ehdr->e_ident[EI_DATA] != ELF_TARG_DATA || ehdr->e_ident[EI_VERSION] != EV_CURRENT || /* Version ? */ ehdr->e_version != EV_CURRENT || ehdr->e_machine != ELF_TARG_MACH) { /* Machine ? */ err = EFTYPE; goto oerr; } /* * Check to see what sort of module we are. */ kfp = file_findfile(NULL, NULL); if (ehdr->e_type == ET_DYN) { /* Looks like a kld module */ if (kfp == NULL) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: can't load module before kernel\n"); err = EPERM; goto oerr; } if (strcmp(__elfN(kerneltype), kfp->f_type)) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: can't load module with kernel type '%s'\n", kfp->f_type); err = EPERM; goto oerr; } /* Looks OK, got ahead */ ef.kernel = 0; /* Page-align the load address */ pad = (u_int)dest & PAGE_MASK; if (pad != 0) { pad = PAGE_SIZE - pad; dest += pad; } } else if (ehdr->e_type == ET_EXEC) { /* Looks like a kernel */ if (kfp != NULL) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: kernel already loaded\n"); err = EPERM; goto oerr; } /* * Calculate destination address based on kernel entrypoint */ dest = ehdr->e_entry; if (dest == 0) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: not a kernel (maybe static binary?)\n"); err = EPERM; goto oerr; } ef.kernel = 1; } else { err = EFTYPE; goto oerr; } /* * Ok, we think we should handle this. */ fp = file_alloc(); if (fp == NULL) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: cannot allocate module info\n"); err = EPERM; goto out; } if (ef.kernel) setenv("kernelname", filename, 1); fp->f_name = strdup(filename); fp->f_type = strdup(ef.kernel ? __elfN(kerneltype) : __elfN(moduletype)); #ifdef ELF_VERBOSE if (ef.kernel) printf("%s entry at 0x%jx\n", filename, (uintmax_t)dest); #else printf("%s ", filename); #endif fp->f_size = __elfN(loadimage)(fp, &ef, dest); if (fp->f_size == 0 || fp->f_addr == 0) goto ioerr; /* save exec header as metadata */ file_addmetadata(fp, MODINFOMD_ELFHDR, sizeof(*ehdr), ehdr); /* Load OK, return module pointer */ *result = (struct preloaded_file *)fp; err = 0; goto out; ioerr: err = EIO; oerr: file_discard(fp); out: if (ef.firstpage) free(ef.firstpage); close(ef.fd); return(err); } /* * With the file (fd) open on the image, and (ehdr) containing * the Elf header, load the image at (off) */ static int __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) { int i; u_int j; Elf_Ehdr *ehdr; Elf_Phdr *phdr, *php; Elf_Shdr *shdr; int ret; vm_offset_t firstaddr; vm_offset_t lastaddr; size_t chunk; ssize_t result; Elf_Addr ssym, esym; Elf_Dyn *dp; Elf_Addr adp; int ndp; int symstrindex; int symtabindex; Elf_Size size; u_int fpcopy; dp = NULL; shdr = NULL; ret = 0; firstaddr = lastaddr = 0; ehdr = ef->ehdr; if (ef->kernel) { #ifdef __i386__ #if __ELF_WORD_SIZE == 64 off = - (off & 0xffffffffff000000ull);/* x86_64 relocates after locore */ #else off = - (off & 0xff000000u); /* i386 relocates after locore */ #endif #elif defined(__powerpc__) /* * On the purely virtual memory machines like e500, the kernel is * linked against its final VA range, which is most often not * available at the loader stage, but only after kernel initializes * and completes its VM settings. In such cases we cannot use p_vaddr * field directly to load ELF segments, but put them at some * 'load-time' locations. */ if (off & 0xf0000000u) { off = -(off & 0xf0000000u); /* * XXX the physical load address should not be hardcoded. Note * that the Book-E kernel assumes that it's loaded at a 16MB * boundary for now... */ off += 0x01000000; ehdr->e_entry += off; #ifdef ELF_VERBOSE printf("Converted entry 0x%08x\n", ehdr->e_entry); #endif } else off = 0; #elif defined(__arm__) if (off & 0xf0000000u) { off = -(off & 0xf0000000u); ehdr->e_entry += off; #ifdef ELF_VERBOSE printf("Converted entry 0x%08x\n", ehdr->e_entry); #endif } else off = 0; #else off = 0; /* other archs use direct mapped kernels */ #endif __elfN(relocation_offset) = off; } ef->off = off; if ((ehdr->e_phoff + ehdr->e_phnum * sizeof(*phdr)) > ef->firstlen) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: program header not within first page\n"); goto out; } phdr = (Elf_Phdr *)(ef->firstpage + ehdr->e_phoff); for (i = 0; i < ehdr->e_phnum; i++) { /* We want to load PT_LOAD segments only.. */ if (phdr[i].p_type != PT_LOAD) continue; #ifdef ELF_VERBOSE printf("Segment: 0x%lx@0x%lx -> 0x%lx-0x%lx", (long)phdr[i].p_filesz, (long)phdr[i].p_offset, (long)(phdr[i].p_vaddr + off), (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1)); #else if ((phdr[i].p_flags & PF_W) == 0) { printf("text=0x%lx ", (long)phdr[i].p_filesz); } else { printf("data=0x%lx", (long)phdr[i].p_filesz); if (phdr[i].p_filesz < phdr[i].p_memsz) printf("+0x%lx", (long)(phdr[i].p_memsz -phdr[i].p_filesz)); printf(" "); } #endif fpcopy = 0; if (ef->firstlen > phdr[i].p_offset) { fpcopy = ef->firstlen - phdr[i].p_offset; archsw.arch_copyin(ef->firstpage + phdr[i].p_offset, phdr[i].p_vaddr + off, fpcopy); } if (phdr[i].p_filesz > fpcopy) { if (kern_pread(ef->fd, phdr[i].p_vaddr + off + fpcopy, phdr[i].p_filesz - fpcopy, phdr[i].p_offset + fpcopy) != 0) { printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: read failed\n"); goto out; } } /* clear space from oversized segments; eg: bss */ if (phdr[i].p_filesz < phdr[i].p_memsz) { #ifdef ELF_VERBOSE printf(" (bss: 0x%lx-0x%lx)", (long)(phdr[i].p_vaddr + off + phdr[i].p_filesz), (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1)); #endif kern_bzero(phdr[i].p_vaddr + off + phdr[i].p_filesz, phdr[i].p_memsz - phdr[i].p_filesz); } #ifdef ELF_VERBOSE printf("\n"); #endif if (firstaddr == 0 || firstaddr > (phdr[i].p_vaddr + off)) firstaddr = phdr[i].p_vaddr + off; if (lastaddr == 0 || lastaddr < (phdr[i].p_vaddr + off + phdr[i].p_memsz)) lastaddr = phdr[i].p_vaddr + off + phdr[i].p_memsz; } lastaddr = roundup(lastaddr, sizeof(long)); /* * Now grab the symbol tables. This isn't easy if we're reading a * .gz file. I think the rule is going to have to be that you must * strip a file to remove symbols before gzipping it so that we do not * try to lseek() on it. */ chunk = ehdr->e_shnum * ehdr->e_shentsize; if (chunk == 0 || ehdr->e_shoff == 0) goto nosyms; shdr = alloc_pread(ef->fd, ehdr->e_shoff, chunk); if (shdr == NULL) { printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: failed to read section headers"); goto nosyms; } symtabindex = -1; symstrindex = -1; for (i = 0; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; for (j = 0; j < ehdr->e_phnum; j++) { if (phdr[j].p_type != PT_LOAD) continue; if (shdr[i].sh_offset >= phdr[j].p_offset && (shdr[i].sh_offset + shdr[i].sh_size <= phdr[j].p_offset + phdr[j].p_filesz)) { shdr[i].sh_offset = 0; shdr[i].sh_size = 0; break; } } if (shdr[i].sh_offset == 0 || shdr[i].sh_size == 0) continue; /* alread loaded in a PT_LOAD above */ /* Save it for loading below */ symtabindex = i; symstrindex = shdr[i].sh_link; } if (symtabindex < 0 || symstrindex < 0) goto nosyms; /* Ok, committed to a load. */ #ifndef ELF_VERBOSE printf("syms=["); #endif ssym = lastaddr; for (i = symtabindex; i >= 0; i = symstrindex) { #ifdef ELF_VERBOSE char *secname; switch(shdr[i].sh_type) { case SHT_SYMTAB: /* Symbol table */ secname = "symtab"; break; case SHT_STRTAB: /* String table */ secname = "strtab"; break; default: secname = "WHOA!!"; break; } #endif size = shdr[i].sh_size; archsw.arch_copyin(&size, lastaddr, sizeof(size)); lastaddr += sizeof(size); #ifdef ELF_VERBOSE printf("\n%s: 0x%jx@0x%jx -> 0x%jx-0x%jx", secname, (uintmax_t)shdr[i].sh_size, (uintmax_t)shdr[i].sh_offset, (uintmax_t)lastaddr, (uintmax_t)(lastaddr + shdr[i].sh_size)); #else if (i == symstrindex) printf("+"); printf("0x%lx+0x%lx", (long)sizeof(size), (long)size); #endif if (lseek(ef->fd, (off_t)shdr[i].sh_offset, SEEK_SET) == -1) { printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: could not seek for symbols - skipped!"); lastaddr = ssym; ssym = 0; goto nosyms; } result = archsw.arch_readin(ef->fd, lastaddr, shdr[i].sh_size); if (result < 0 || (size_t)result != shdr[i].sh_size) { - printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: could not read symbols - skipped!"); + printf("\nelf" __XSTRING(__ELF_WORD_SIZE) "_loadimage: could not read symbols - skipped! (%ju != %ju)", (uintmax_t)result, + (uintmax_t)shdr[i].sh_size); lastaddr = ssym; ssym = 0; goto nosyms; } /* Reset offsets relative to ssym */ lastaddr += shdr[i].sh_size; lastaddr = roundup(lastaddr, sizeof(size)); if (i == symtabindex) symtabindex = -1; else if (i == symstrindex) symstrindex = -1; } esym = lastaddr; #ifndef ELF_VERBOSE printf("]"); #endif file_addmetadata(fp, MODINFOMD_SSYM, sizeof(ssym), &ssym); file_addmetadata(fp, MODINFOMD_ESYM, sizeof(esym), &esym); nosyms: printf("\n"); ret = lastaddr - firstaddr; fp->f_addr = firstaddr; php = NULL; for (i = 0; i < ehdr->e_phnum; i++) { if (phdr[i].p_type == PT_DYNAMIC) { php = phdr + i; adp = php->p_vaddr; file_addmetadata(fp, MODINFOMD_DYNAMIC, sizeof(adp), &adp); break; } } if (php == NULL) /* this is bad, we cannot get to symbols or _DYNAMIC */ goto out; ndp = php->p_filesz / sizeof(Elf_Dyn); if (ndp == 0) goto out; dp = malloc(php->p_filesz); if (dp == NULL) goto out; archsw.arch_copyout(php->p_vaddr + off, dp, php->p_filesz); ef->strsz = 0; for (i = 0; i < ndp; i++) { if (dp[i].d_tag == 0) break; switch (dp[i].d_tag) { case DT_HASH: ef->hashtab = (Elf_Hashelt*)(uintptr_t)(dp[i].d_un.d_ptr + off); break; case DT_STRTAB: ef->strtab = (char *)(uintptr_t)(dp[i].d_un.d_ptr + off); break; case DT_STRSZ: ef->strsz = dp[i].d_un.d_val; break; case DT_SYMTAB: ef->symtab = (Elf_Sym*)(uintptr_t)(dp[i].d_un.d_ptr + off); break; case DT_REL: ef->rel = (Elf_Rel *)(uintptr_t)(dp[i].d_un.d_ptr + off); break; case DT_RELSZ: ef->relsz = dp[i].d_un.d_val; break; case DT_RELA: ef->rela = (Elf_Rela *)(uintptr_t)(dp[i].d_un.d_ptr + off); break; case DT_RELASZ: ef->relasz = dp[i].d_un.d_val; break; default: break; } } if (ef->hashtab == NULL || ef->symtab == NULL || ef->strtab == NULL || ef->strsz == 0) goto out; COPYOUT(ef->hashtab, &ef->nbuckets, sizeof(ef->nbuckets)); COPYOUT(ef->hashtab + 1, &ef->nchains, sizeof(ef->nchains)); ef->buckets = ef->hashtab + 2; ef->chains = ef->buckets + ef->nbuckets; if (__elfN(parse_modmetadata)(fp, ef) == 0) goto out; if (ef->kernel) /* kernel must not depend on anything */ goto out; out: if (dp) free(dp); if (shdr) free(shdr); return ret; } static char invalid_name[] = "bad"; char * fake_modname(const char *name) { const char *sp, *ep; char *fp; size_t len; sp = strrchr(name, '/'); if (sp) sp++; else sp = name; ep = strrchr(name, '.'); if (ep) { if (ep == name) { sp = invalid_name; ep = invalid_name + sizeof(invalid_name) - 1; } } else ep = name + strlen(name); len = ep - sp; fp = malloc(len + 1); if (fp == NULL) return NULL; memcpy(fp, sp, len); fp[len] = '\0'; return fp; } #if defined(__i386__) && __ELF_WORD_SIZE == 64 struct mod_metadata64 { int md_version; /* structure version MDTV_* */ int md_type; /* type of entry MDT_* */ u_int64_t md_data; /* specific data */ u_int64_t md_cval; /* common string label */ }; #endif int __elfN(parse_modmetadata)(struct preloaded_file *fp, elf_file_t ef) { struct mod_metadata md; #if defined(__i386__) && __ELF_WORD_SIZE == 64 struct mod_metadata64 md64; #endif struct mod_depend *mdepend; struct mod_version mver; Elf_Sym sym; char *s; int error, modcnt, minfolen; Elf_Addr v, p, p_stop; if (__elfN(lookup_symbol)(fp, ef, "__start_set_modmetadata_set", &sym) != 0) return ENOENT; p = sym.st_value + ef->off; if (__elfN(lookup_symbol)(fp, ef, "__stop_set_modmetadata_set", &sym) != 0) return ENOENT; p_stop = sym.st_value + ef->off; modcnt = 0; while (p < p_stop) { COPYOUT(p, &v, sizeof(v)); error = __elfN(reloc_ptr)(fp, ef, p, &v, sizeof(v)); if (error == EOPNOTSUPP) v += ef->off; else if (error != 0) return (error); #if defined(__i386__) && __ELF_WORD_SIZE == 64 COPYOUT(v, &md64, sizeof(md64)); error = __elfN(reloc_ptr)(fp, ef, v, &md64, sizeof(md64)); if (error == EOPNOTSUPP) { md64.md_cval += ef->off; md64.md_data += ef->off; } else if (error != 0) return (error); md.md_version = md64.md_version; md.md_type = md64.md_type; md.md_cval = (const char *)(uintptr_t)md64.md_cval; md.md_data = (void *)(uintptr_t)md64.md_data; #else COPYOUT(v, &md, sizeof(md)); error = __elfN(reloc_ptr)(fp, ef, v, &md, sizeof(md)); if (error == EOPNOTSUPP) { md.md_cval += ef->off; md.md_data += ef->off; } else if (error != 0) return (error); #endif p += sizeof(Elf_Addr); switch(md.md_type) { case MDT_DEPEND: if (ef->kernel) /* kernel must not depend on anything */ break; s = strdupout((vm_offset_t)md.md_cval); minfolen = sizeof(*mdepend) + strlen(s) + 1; mdepend = malloc(minfolen); if (mdepend == NULL) return ENOMEM; COPYOUT((vm_offset_t)md.md_data, mdepend, sizeof(*mdepend)); strcpy((char*)(mdepend + 1), s); free(s); file_addmetadata(fp, MODINFOMD_DEPLIST, minfolen, mdepend); free(mdepend); break; case MDT_VERSION: s = strdupout((vm_offset_t)md.md_cval); COPYOUT((vm_offset_t)md.md_data, &mver, sizeof(mver)); file_addmodule(fp, s, mver.mv_version, NULL); free(s); modcnt++; break; } } if (modcnt == 0) { s = fake_modname(fp->f_name); file_addmodule(fp, s, 1, NULL); free(s); } return 0; } static unsigned long elf_hash(const char *name) { const unsigned char *p = (const unsigned char *) name; unsigned long h = 0; unsigned long g; while (*p != '\0') { h = (h << 4) + *p++; if ((g = h & 0xf0000000) != 0) h ^= g >> 24; h &= ~g; } return h; } static const char __elfN(bad_symtable)[] = "elf" __XSTRING(__ELF_WORD_SIZE) "_lookup_symbol: corrupt symbol table\n"; int __elfN(lookup_symbol)(struct preloaded_file *fp, elf_file_t ef, const char* name, Elf_Sym *symp) { Elf_Hashelt symnum; Elf_Sym sym; char *strp; unsigned long hash; hash = elf_hash(name); COPYOUT(&ef->buckets[hash % ef->nbuckets], &symnum, sizeof(symnum)); while (symnum != STN_UNDEF) { if (symnum >= ef->nchains) { printf(__elfN(bad_symtable)); return ENOENT; } COPYOUT(ef->symtab + symnum, &sym, sizeof(sym)); if (sym.st_name == 0) { printf(__elfN(bad_symtable)); return ENOENT; } strp = strdupout((vm_offset_t)(ef->strtab + sym.st_name)); if (strcmp(name, strp) == 0) { free(strp); if (sym.st_shndx != SHN_UNDEF || (sym.st_value != 0 && ELF_ST_TYPE(sym.st_info) == STT_FUNC)) { *symp = sym; return 0; } return ENOENT; } free(strp); COPYOUT(&ef->chains[symnum], &symnum, sizeof(symnum)); } return ENOENT; } /* * Apply any intra-module relocations to the value. p is the load address * of the value and val/len is the value to be modified. This does NOT modify * the image in-place, because this is done by kern_linker later on. * * Returns EOPNOTSUPP if no relocation method is supplied. */ static int __elfN(reloc_ptr)(struct preloaded_file *mp, elf_file_t ef, Elf_Addr p, void *val, size_t len) { size_t n; Elf_Rela a; Elf_Rel r; int error; /* * The kernel is already relocated, but we still want to apply * offset adjustments. */ if (ef->kernel) return (EOPNOTSUPP); for (n = 0; n < ef->relsz / sizeof(r); n++) { COPYOUT(ef->rel + n, &r, sizeof(r)); error = __elfN(reloc)(ef, __elfN(symaddr), &r, ELF_RELOC_REL, ef->off, p, val, len); if (error != 0) return (error); } for (n = 0; n < ef->relasz / sizeof(a); n++) { COPYOUT(ef->rela + n, &a, sizeof(a)); error = __elfN(reloc)(ef, __elfN(symaddr), &a, ELF_RELOC_RELA, ef->off, p, val, len); if (error != 0) return (error); } return (0); } static Elf_Addr __elfN(symaddr)(struct elf_file *ef, Elf_Size symidx) { /* Symbol lookup by index not required here. */ return (0); } Index: projects/binutils-2.17/sys/cddl/contrib/opensolaris =================================================================== --- projects/binutils-2.17/sys/cddl/contrib/opensolaris (revision 215829) +++ projects/binutils-2.17/sys/cddl/contrib/opensolaris (revision 215830) Property changes on: projects/binutils-2.17/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r215709-215824 Index: projects/binutils-2.17/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- projects/binutils-2.17/sys/compat/freebsd32/freebsd32_misc.c (revision 215829) +++ projects/binutils-2.17/sys/compat/freebsd32/freebsd32_misc.c (revision 215830) @@ -1,2667 +1,2668 @@ /*- * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); CTASSERT(sizeof(struct statfs32) == 256); CTASSERT(sizeof(struct rusage32) == 72); CTASSERT(sizeof(struct sigaltstack32) == 12); CTASSERT(sizeof(struct kevent32) == 20); CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); CTASSERT(sizeof(struct stat32) == 96); CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); #if BYTE_ORDER == BIG_ENDIAN #define PAIR32TO64(type, name) ((name ## 2) | ((type)(name ## 1) << 32)) #define RETVAL_HI 0 #define RETVAL_LO 1 #else #define PAIR32TO64(type, name) ((name ## 1) | ((type)(name ## 2) << 32)) #define RETVAL_HI 1 #define RETVAL_LO 0 #endif void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); if (size > 0) { count = td->td_retval[0]; sp = buf; while (count > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; count--; } free(buf, M_TEMP); } return (error); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; size_t length; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = copyinstr(argp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = copyinstr(envp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; int error; error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; int error; error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } return (error); } #ifdef __ia64__ static int freebsd32_mmap_partial(struct thread *td, vm_offset_t start, vm_offset_t end, int prot, int fd, off_t pos) { vm_map_t map; vm_map_entry_t entry; int rv; map = &td->td_proc->p_vmspace->vm_map; if (fd != -1) prot |= VM_PROT_WRITE; if (vm_map_lookup_entry(map, start, &entry)) { if ((entry->protection & prot) != prot) { rv = vm_map_protect(map, trunc_page(start), round_page(end), entry->protection | prot, FALSE); if (rv != KERN_SUCCESS) return (EINVAL); } } else { vm_offset_t addr = trunc_page(start); rv = vm_map_find(map, 0, 0, &addr, PAGE_SIZE, FALSE, prot, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) return (EINVAL); } if (fd != -1) { struct pread_args r; r.fd = fd; r.buf = (void *) start; r.nbyte = end - start; r.offset = pos; return (pread(td, &r)); } else { while (start < end) { subyte((void *) start, 0); start++; } return (0); } } #endif int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { struct mmap_args ap; vm_offset_t addr = (vm_offset_t) uap->addr; vm_size_t len = uap->len; int prot = uap->prot; int flags = uap->flags; int fd = uap->fd; off_t pos = PAIR32TO64(off_t,uap->pos); #ifdef __ia64__ vm_size_t pageoff; int error; /* * Attempt to handle page size hassles. */ pageoff = (pos & PAGE_MASK); if (flags & MAP_FIXED) { vm_offset_t start, end; start = addr; end = addr + len; if (start != trunc_page(start)) { error = freebsd32_mmap_partial(td, start, round_page(start), prot, fd, pos); if (fd != -1) pos += round_page(start) - start; start = round_page(start); } if (end != round_page(end)) { vm_offset_t t = trunc_page(end); error = freebsd32_mmap_partial(td, t, end, prot, fd, pos + t - start); end = trunc_page(end); } if (end > start && fd != -1 && (pos & PAGE_MASK)) { /* * We can't map this region at all. The specified * address doesn't have the same alignment as the file * position. Fake the mapping by simply reading the * entire region into memory. First we need to make * sure the region exists. */ vm_map_t map; struct pread_args r; int rv; prot |= VM_PROT_WRITE; map = &td->td_proc->p_vmspace->vm_map; rv = vm_map_remove(map, start, end); if (rv != KERN_SUCCESS) return (EINVAL); rv = vm_map_find(map, 0, 0, &start, end - start, FALSE, prot, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) return (EINVAL); r.fd = fd; r.buf = (void *) start; r.nbyte = end - start; r.offset = pos; error = pread(td, &r); if (error) return (error); td->td_retval[0] = addr; return (0); } if (end == start) { /* * After dealing with the ragged ends, there * might be none left. */ td->td_retval[0] = addr; return (0); } addr = start; len = end - start; } #endif ap.addr = (void *) addr; ap.len = len; ap.prot = prot; ap.flags = flags; ap.fd = fd; ap.pos = pos; return (mmap(td, &ap)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { struct freebsd32_mmap_args ap; ap.addr = uap->addr; ap.len = uap->len; ap.prot = uap->prot; ap.flags = uap->flags; ap.fd = uap->fd; ap.pos1 = uap->pos1; ap.pos2 = uap->pos2; return (freebsd32_mmap(td, &ap)); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { uap, freebsd32_kevent_copyout, freebsd32_kevent_copyin}; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); return (error); } int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error) return (error); if (uap->rusage != NULL) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; m = control; ctlbuf = msg->msg_control; while (m && len > 0) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; /* Adjust message length */ cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen; /* Copy cmsghdr */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(cm,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (len <= 0) break; /* Copy data */ copylen = datalen; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(data,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m = m->m_next; } msg->msg_controllen = (len <= 0) ? maxlen : ctlbuf - (caddr_t)msg->msg_control; exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) m_freem(control); return (error); } static int freebsd32_convert_msg_in(struct mbuf **controlp) { struct mbuf *control = *controlp; struct cmsghdr *cm = mtod(control, struct cmsghdr *); void *data; socklen_t clen = control->m_len, datalen; int error; error = 0; *controlp = NULL; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = FREEBSD32_CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; *controlp = sbcreatecontrol(data, datalen, cm->cmsg_type, cm->cmsg_level); controlp = &(*controlp)->m_next; if (FREEBSD32_CMSG_SPACE(datalen) < clen) { clen -= FREEBSD32_CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + FREEBSD32_CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m_freem(control); return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = sockargs(&control, msg.msg_control, msg.msg_controllen, MT_CONTROL); if (error) goto out; error = freebsd32_convert_msg_in(&control); if (error) goto out; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs s; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs s; int error; error = kern_fstatfs(td, uap->fd, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs s; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); error = kern_fhstatfs(td, fh, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { struct pread_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (pread(td, &ap)); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { struct pwrite_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (pwrite(td, &ap)); } int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; struct lseek_args ap; off_t pos; ap.fd = uap->fd; ap.offset = PAIR32TO64(off_t,uap->offset); ap.whence = uap->whence; error = lseek(td, &ap); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { struct truncate_args ap; ap.path = uap->path; ap.length = PAIR32TO64(off_t,uap->length); return (truncate(td, &ap)); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { struct ftruncate_args ap; ap.fd = uap->fd; ap.length = PAIR32TO64(off_t,uap->length); return (ftruncate(td, &ap)); } int freebsd32_getdirentries(struct thread *td, struct freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { struct pread_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (pread(td, &ap)); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { struct pwrite_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (pwrite(td, &ap)); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; struct lseek_args ap; off_t pos; ap.fd = uap->fd; ap.offset = PAIR32TO64(off_t,uap->offset); ap.whence = uap->whence; error = lseek(td, &ap); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { struct truncate_args ap; ap.path = uap->path; ap.length = PAIR32TO64(off_t,uap->length); return (truncate(td, &ap)); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { struct ftruncate_args ap; ap.fd = uap->fd; ap.length = PAIR32TO64(off_t,uap->length); return (ftruncate(td, &ap)); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sendfile_args ap; struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct iovec32 *iov32; int error; hdr_uio = trl_uio = NULL; ap.fd = uap->fd; ap.s = uap->s; ap.offset = PAIR32TO64(off_t,uap->offset); ap.nbytes = uap->nbytes; ap.hdtr = (struct sf_hdtr *)uap->hdtr; /* XXX not used */ ap.sbytes = uap->sbytes; ap.flags = uap->flags; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } error = kern_sendfile(td, &ap, hdr_uio, trl_uio, compat); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); } int freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } int freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } /* * MPSAFE */ int freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) oldlen = fuword32(uap->oldlenp); else oldlen = 0; error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error && error != ENOMEM) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = j32_v0.ip_number; break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error; error = copyin(uap->rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); if (uap->rmtp && !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) return (EFAULT); error = kern_nanosleep(td, &rqt, &rmt); if (error && uap->rmtp) { int error2; CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, uap->rmtp, sizeof(rmt32)); if (error2) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sigval_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { struct cpuset_setid_args ap; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.setid = uap->setid; return (cpuset_setid(td, &ap)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { struct cpuset_getid_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.setid = uap->setid; return (cpuset_getid(td, &ap)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { struct cpuset_getaffinity_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.cpusetsize = uap->cpusetsize; ap.mask = uap->mask; return (cpuset_getaffinity(td, &ap)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { struct cpuset_setaffinity_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.cpusetsize = uap->cpusetsize; ap.mask = uap->mask; return (cpuset_setaffinity(td, &ap)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; int error; AUDIT_ARG_FFLAGS(uap->flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. - * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). + * MNT_ROOTFS should only be set by the kernel when mounting its + * root file system. */ uap->flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, uap->flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent) { if (*offset == NO_SYSCALL) { int i; for (i = 1; i < SYS_MAXSYSCALL; ++i) if (freebsd32_sysent[i].sy_call == (sy_call_t *)lkmnosys) break; if (i == SYS_MAXSYSCALL) return (ENFILE); *offset = i; } else if (*offset < 0 || *offset >= SYS_MAXSYSCALL) return (EINVAL); else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys && freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys) return (EEXIST); *old_sysent = freebsd32_sysent[*offset]; freebsd32_sysent[*offset] = *new_sysent; return 0; } int syscall32_deregister(int *offset, struct sysent *old_sysent) { if (*offset) freebsd32_sysent[*offset] = *old_sysent; return 0; } int syscall32_module_handler(struct module *mod, int what, void *arg) { struct syscall_module_data *data = (struct syscall_module_data*)arg; modspecific_t ms; int error; switch (what) { case MOD_LOAD: error = syscall32_register(data->offset, data->new_sysent, &data->old_sysent); if (error) { /* Leave a mark so we know to safely unload below. */ data->offset = NULL; return error; } ms.intval = *data->offset; MOD_XLOCK; module_setspecific(mod, &ms); MOD_XUNLOCK; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); case MOD_UNLOAD: /* * MOD_LOAD failed, so just return without calling the * chained handler since we didn't pass along the MOD_LOAD * event. */ if (data->offset == NULL) return (0); if (data->chainevh) { error = data->chainevh(mod, what, data->chainarg); if (error) return (error); } error = syscall32_deregister(data->offset, &data->old_sysent); return (error); default: error = EOPNOTSUPP; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); } } int syscall32_helper_register(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; int error; for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) { error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent, &sd1->old_sysent); if (error != 0) { syscall32_helper_unregister(sd); return (error); } sd1->registered = 1; } return (0); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; for (sd1 = sd; sd1->registered != 0; sd1++) { syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent); sd1->registered = 0; } return (0); } register_t * freebsd32_copyout_strings(struct image_params *imgp) { int argc, envc, i; u_int32_t *vectp; char *stringp, *destp; u_int32_t *stack_base; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)) - roundup(sizeof(pagesizes32), sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); /* * install sigcode */ if (szsigcode) copyout(imgp->proc->p_sysent->sv_sigcode, ((caddr_t)arginfo - szsigcode), szsigcode); /* * Copy the image path for the rtld. */ if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - szsigcode - execpath_len - sizeof(canary); copyout(canary, (void *)imgp->canary, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; imgp->pagesizes = (uintptr_t)arginfo - szsigcode - execpath_len - roundup(sizeof(canary), sizeof(char *)) - sizeof(pagesizes32); copyout(pagesizes32, (void *)imgp->pagesizes, sizeof(pagesizes32)); imgp->pagesizeslen = sizeof(pagesizes32); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) * sizeof(u_int32_t)); } else /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } Index: projects/binutils-2.17/sys/compat/ia32/ia32_signal.h =================================================================== --- projects/binutils-2.17/sys/compat/ia32/ia32_signal.h (revision 215829) +++ projects/binutils-2.17/sys/compat/ia32/ia32_signal.h (revision 215830) @@ -1,190 +1,195 @@ /*- * Copyright (c) 1999 Marcel Moolenaar * Copyright (c) 2003 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ +#ifndef _COMPAT_IA32_IA32_SIGNAL_H +#define _COMPAT_IA32_IA32_SIGNAL_H + struct ia32_mcontext { u_int32_t mc_onstack; /* XXX - sigcontext compat. */ u_int32_t mc_gs; /* machine state (struct trapframe) */ u_int32_t mc_fs; u_int32_t mc_es; u_int32_t mc_ds; u_int32_t mc_edi; u_int32_t mc_esi; u_int32_t mc_ebp; u_int32_t mc_isp; u_int32_t mc_ebx; u_int32_t mc_edx; u_int32_t mc_ecx; u_int32_t mc_eax; u_int32_t mc_trapno; u_int32_t mc_err; u_int32_t mc_eip; u_int32_t mc_cs; u_int32_t mc_eflags; u_int32_t mc_esp; u_int32_t mc_ss; u_int32_t mc_len; /* sizeof(struct ia32_mcontext) */ /* We use the same values for fpformat and ownedfp */ u_int32_t mc_fpformat; u_int32_t mc_ownedfp; u_int32_t mc_spare1[1]; /* align next field to 16 bytes */ /* * See for the internals of mc_fpstate[]. */ u_int32_t mc_fpstate[128] __aligned(16); u_int32_t mc_fsbase; u_int32_t mc_gsbase; u_int32_t mc_spare2[6]; }; struct ia32_ucontext { sigset_t uc_sigmask; struct ia32_mcontext uc_mcontext; u_int32_t uc_link; struct sigaltstack32 uc_stack; u_int32_t uc_flags; u_int32_t __spare__[4]; }; #if defined(COMPAT_FREEBSD4) struct ia32_mcontext4 { u_int32_t mc_onstack; /* XXX - sigcontext compat. */ u_int32_t mc_gs; /* machine state (struct trapframe) */ u_int32_t mc_fs; u_int32_t mc_es; u_int32_t mc_ds; u_int32_t mc_edi; u_int32_t mc_esi; u_int32_t mc_ebp; u_int32_t mc_isp; u_int32_t mc_ebx; u_int32_t mc_edx; u_int32_t mc_ecx; u_int32_t mc_eax; u_int32_t mc_trapno; u_int32_t mc_err; u_int32_t mc_eip; u_int32_t mc_cs; u_int32_t mc_eflags; u_int32_t mc_esp; u_int32_t mc_ss; u_int32_t mc_fpregs[28]; u_int32_t __spare__[17]; }; struct ia32_ucontext4 { sigset_t uc_sigmask; struct ia32_mcontext4 uc_mcontext; u_int32_t uc_link; struct sigaltstack32 uc_stack; u_int32_t __spare__[8]; }; #endif #ifdef COMPAT_FREEBSD3 struct ia32_sigcontext3 { u_int32_t sc_onstack; u_int32_t sc_mask; u_int32_t sc_esp; u_int32_t sc_ebp; u_int32_t sc_isp; u_int32_t sc_eip; u_int32_t sc_eflags; u_int32_t sc_es; u_int32_t sc_ds; u_int32_t sc_cs; u_int32_t sc_ss; u_int32_t sc_edi; u_int32_t sc_esi; u_int32_t sc_ebx; u_int32_t sc_edx; u_int32_t sc_ecx; u_int32_t sc_eax; u_int32_t sc_gs; u_int32_t sc_fs; u_int32_t sc_trapno; u_int32_t sc_err; }; #endif /* * Signal frames, arguments passed to application signal handlers. */ #ifdef COMPAT_FREEBSD4 struct ia32_sigframe4 { u_int32_t sf_signum; u_int32_t sf_siginfo; /* code or pointer to sf_si */ u_int32_t sf_ucontext; /* points to sf_uc */ u_int32_t sf_addr; /* undocumented 4th arg */ u_int32_t sf_ah; /* action/handler pointer */ struct ia32_ucontext4 sf_uc; /* = *sf_ucontext */ struct siginfo32 sf_si; /* = *sf_siginfo (SA_SIGINFO case) */ }; #endif struct ia32_sigframe { u_int32_t sf_signum; u_int32_t sf_siginfo; /* code or pointer to sf_si */ u_int32_t sf_ucontext; /* points to sf_uc */ u_int32_t sf_addr; /* undocumented 4th arg */ u_int32_t sf_ah; /* action/handler pointer */ /* Beware, hole due to ucontext being 16 byte aligned! */ struct ia32_ucontext sf_uc; /* = *sf_ucontext */ struct siginfo32 sf_si; /* = *sf_siginfo (SA_SIGINFO case) */ }; #ifdef COMPAT_FREEBSD3 struct ia32_siginfo3 { struct ia32_sigcontext3 si_sc; int si_signo; int si_code; union sigval32 si_value; }; struct ia32_sigframe3 { int sf_signum; u_int32_t sf_arg2; /* int or siginfo_t */ u_int32_t sf_scp; u_int32_t sf_addr; u_int32_t sf_ah; /* action/handler pointer */ struct ia32_siginfo3 sf_siginfo; }; #endif struct ksiginfo; struct image_params; extern char ia32_sigcode[]; extern char freebsd4_ia32_sigcode[]; extern int sz_ia32_sigcode; extern int sz_freebsd4_ia32_sigcode; extern void ia32_sendsig(sig_t, struct ksiginfo *, sigset_t *); extern void ia32_setregs(struct thread *td, struct image_params *imgp, u_long stack); + +#endif Index: projects/binutils-2.17/sys/compat/ia32/ia32_util.h =================================================================== --- projects/binutils-2.17/sys/compat/ia32/ia32_util.h (revision 215829) +++ projects/binutils-2.17/sys/compat/ia32/ia32_util.h (revision 215830) @@ -1,53 +1,58 @@ /*- * Copyright (c) 1998-1999 Andrew Gallatin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ +#ifndef _COMPAT_IA32_IA32_UTIL_H +#define _COMPAT_IA32_IA32_UTIL_H + #include #include #include #include #include #include #ifdef __ia64__ #define FREEBSD32_USRSTACK ((1ul << 32) - IA32_PAGE_SIZE * 2) #else #define FREEBSD32_USRSTACK ((1ul << 32) - IA32_PAGE_SIZE) #endif #define IA32_PAGE_SIZE 4096 #define IA32_MAXDSIZ (512*1024*1024) /* 512MB */ #define IA32_MAXSSIZ (64*1024*1024) /* 64MB */ #define IA32_MAXVMEM 0 /* Unlimited */ struct syscall_args; int ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa); void ia32_set_syscall_retval(struct thread *, int); + +#endif Index: projects/binutils-2.17/sys/compat/ndis/ntoskrnl_var.h =================================================================== --- projects/binutils-2.17/sys/compat/ndis/ntoskrnl_var.h (revision 215829) +++ projects/binutils-2.17/sys/compat/ndis/ntoskrnl_var.h (revision 215830) @@ -1,1527 +1,1528 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NTOSKRNL_VAR_H_ #define _NTOSKRNL_VAR_H_ #define MTX_NTOSKRNL_SPIN_LOCK "NDIS thread lock" /* * us_buf is really a wchar_t *, but it's inconvenient to include * all the necessary header goop needed to define it, and it's a * pointer anyway, so for now, just make it a uint16_t *. */ struct unicode_string { uint16_t us_len; uint16_t us_maxlen; uint16_t *us_buf; }; typedef struct unicode_string unicode_string; struct ansi_string { uint16_t as_len; uint16_t as_maxlen; char *as_buf; }; typedef struct ansi_string ansi_string; /* * Windows memory descriptor list. In Windows, it's possible for * buffers to be passed between user and kernel contexts without * copying. Buffers may also be allocated in either paged or * non-paged memory regions. An MDL describes the pages of memory * used to contain a particular buffer. Note that a single MDL * may describe a buffer that spans multiple pages. An array of * page addresses appears immediately after the MDL structure itself. * MDLs are therefore implicitly variably sized, even though they * don't look it. * * Note that in FreeBSD, we can take many shortcuts in the way * we handle MDLs because: * * - We are only concerned with pages in kernel context. This means * we will only ever use the kernel's memory map, and remapping * of buffers is never needed. * * - Kernel pages can never be paged out, so we don't have to worry * about whether or not a page is actually mapped before going to * touch it. */ struct mdl { struct mdl *mdl_next; uint16_t mdl_size; uint16_t mdl_flags; void *mdl_process; void *mdl_mappedsystemva; void *mdl_startva; uint32_t mdl_bytecount; uint32_t mdl_byteoffset; }; typedef struct mdl mdl, ndis_buffer; /* MDL flags */ #define MDL_MAPPED_TO_SYSTEM_VA 0x0001 #define MDL_PAGES_LOCKED 0x0002 #define MDL_SOURCE_IS_NONPAGED_POOL 0x0004 #define MDL_ALLOCATED_FIXED_SIZE 0x0008 #define MDL_PARTIAL 0x0010 #define MDL_PARTIAL_HAS_BEEN_MAPPED 0x0020 #define MDL_IO_PAGE_READ 0x0040 #define MDL_WRITE_OPERATION 0x0080 #define MDL_PARENT_MAPPED_SYSTEM_VA 0x0100 #define MDL_FREE_EXTRA_PTES 0x0200 #define MDL_IO_SPACE 0x0800 #define MDL_NETWORK_HEADER 0x1000 #define MDL_MAPPING_CAN_FAIL 0x2000 #define MDL_ALLOCATED_MUST_SUCCEED 0x4000 #define MDL_ZONE_ALLOCED 0x8000 /* BSD private */ #define MDL_ZONE_PAGES 16 #define MDL_ZONE_SIZE (sizeof(mdl) + (sizeof(vm_offset_t) * MDL_ZONE_PAGES)) /* Note: assumes x86 page size of 4K. */ #ifndef PAGE_SHIFT #if PAGE_SIZE == 4096 #define PAGE_SHIFT 12 #elif PAGE_SIZE == 8192 #define PAGE_SHIFT 13 #else #error PAGE_SHIFT undefined! #endif #endif #define SPAN_PAGES(ptr, len) \ ((uint32_t)((((uintptr_t)(ptr) & (PAGE_SIZE - 1)) + \ (len) + (PAGE_SIZE - 1)) >> PAGE_SHIFT)) #define PAGE_ALIGN(ptr) \ ((void *)((uintptr_t)(ptr) & ~(PAGE_SIZE - 1))) #define BYTE_OFFSET(ptr) \ ((uint32_t)((uintptr_t)(ptr) & (PAGE_SIZE - 1))) #define MDL_PAGES(m) (vm_offset_t *)(m + 1) #define MmInitializeMdl(b, baseva, len) \ (b)->mdl_next = NULL; \ (b)->mdl_size = (uint16_t)(sizeof(mdl) + \ (sizeof(vm_offset_t) * SPAN_PAGES((baseva), (len)))); \ (b)->mdl_flags = 0; \ (b)->mdl_startva = (void *)PAGE_ALIGN((baseva)); \ (b)->mdl_byteoffset = BYTE_OFFSET((baseva)); \ (b)->mdl_bytecount = (uint32_t)(len); #define MmGetMdlByteOffset(mdl) ((mdl)->mdl_byteoffset) #define MmGetMdlByteCount(mdl) ((mdl)->mdl_bytecount) #define MmGetMdlVirtualAddress(mdl) \ ((void *)((char *)((mdl)->mdl_startva) + (mdl)->mdl_byteoffset)) #define MmGetMdlStartVa(mdl) ((mdl)->mdl_startva) #define MmGetMdlPfnArray(mdl) MDL_PAGES(mdl) #define WDM_MAJOR 1 #define WDM_MINOR_WIN98 0x00 #define WDM_MINOR_WINME 0x05 #define WDM_MINOR_WIN2000 0x10 #define WDM_MINOR_WINXP 0x20 #define WDM_MINOR_WIN2003 0x30 enum nt_caching_type { MmNonCached = 0, MmCached = 1, MmWriteCombined = 2, MmHardwareCoherentCached = 3, MmNonCachedUnordered = 4, MmUSWCCached = 5, MmMaximumCacheType = 6 }; /*- * The ndis_kspin_lock type is called KSPIN_LOCK in MS-Windows. * According to the Windows DDK header files, KSPIN_LOCK is defined like this: * typedef ULONG_PTR KSPIN_LOCK; * * From basetsd.h (SDK, Feb. 2003): * typedef [public] unsigned __int3264 ULONG_PTR, *PULONG_PTR; * typedef unsigned __int64 ULONG_PTR, *PULONG_PTR; * typedef _W64 unsigned long ULONG_PTR, *PULONG_PTR; * * The keyword __int3264 specifies an integral type that has the following * properties: * + It is 32-bit on 32-bit platforms * + It is 64-bit on 64-bit platforms * + It is 32-bit on the wire for backward compatibility. * It gets truncated on the sending side and extended appropriately * (signed or unsigned) on the receiving side. * * Thus register_t seems the proper mapping onto FreeBSD for spin locks. */ typedef register_t kspin_lock; struct slist_entry { struct slist_entry *sl_next; }; typedef struct slist_entry slist_entry; union slist_header { uint64_t slh_align; struct { struct slist_entry *slh_next; uint16_t slh_depth; uint16_t slh_seq; } slh_list; }; typedef union slist_header slist_header; struct list_entry { struct list_entry *nle_flink; struct list_entry *nle_blink; }; typedef struct list_entry list_entry; #define InitializeListHead(l) \ (l)->nle_flink = (l)->nle_blink = (l) #define IsListEmpty(h) \ ((h)->nle_flink == (h)) #define RemoveEntryList(e) \ do { \ list_entry *b; \ list_entry *f; \ \ f = (e)->nle_flink; \ b = (e)->nle_blink; \ b->nle_flink = f; \ f->nle_blink = b; \ } while (0) /* These two have to be inlined since they return things. */ static __inline__ list_entry * RemoveHeadList(list_entry *l) { list_entry *f; list_entry *e; e = l->nle_flink; f = e->nle_flink; l->nle_flink = f; f->nle_blink = l; return (e); } static __inline__ list_entry * RemoveTailList(list_entry *l) { list_entry *b; list_entry *e; e = l->nle_blink; b = e->nle_blink; l->nle_blink = b; b->nle_flink = l; return (e); } #define InsertTailList(l, e) \ do { \ list_entry *b; \ \ b = l->nle_blink; \ e->nle_flink = l; \ e->nle_blink = b; \ b->nle_flink = (e); \ l->nle_blink = (e); \ } while (0) #define InsertHeadList(l, e) \ do { \ list_entry *f; \ \ f = l->nle_flink; \ e->nle_flink = f; \ e->nle_blink = l; \ f->nle_blink = e; \ l->nle_flink = e; \ } while (0) #define CONTAINING_RECORD(addr, type, field) \ ((type *)((vm_offset_t)(addr) - (vm_offset_t)(&((type *)0)->field))) struct nt_dispatch_header { uint8_t dh_type; uint8_t dh_abs; uint8_t dh_size; uint8_t dh_inserted; int32_t dh_sigstate; list_entry dh_waitlisthead; }; typedef struct nt_dispatch_header nt_dispatch_header; /* Dispatcher object types */ #define DISP_TYPE_NOTIFICATION_EVENT 0 /* KEVENT */ #define DISP_TYPE_SYNCHRONIZATION_EVENT 1 /* KEVENT */ #define DISP_TYPE_MUTANT 2 /* KMUTANT/KMUTEX */ #define DISP_TYPE_PROCESS 3 /* KPROCESS */ #define DISP_TYPE_QUEUE 4 /* KQUEUE */ #define DISP_TYPE_SEMAPHORE 5 /* KSEMAPHORE */ #define DISP_TYPE_THREAD 6 /* KTHREAD */ #define DISP_TYPE_NOTIFICATION_TIMER 8 /* KTIMER */ #define DISP_TYPE_SYNCHRONIZATION_TIMER 9 /* KTIMER */ #define OTYPE_EVENT 0 #define OTYPE_MUTEX 1 #define OTYPE_THREAD 2 #define OTYPE_TIMER 3 /* Windows dispatcher levels. */ #define PASSIVE_LEVEL 0 #define LOW_LEVEL 0 #define APC_LEVEL 1 #define DISPATCH_LEVEL 2 #define DEVICE_LEVEL (DISPATCH_LEVEL + 1) #define PROFILE_LEVEL 27 #define CLOCK1_LEVEL 28 #define CLOCK2_LEVEL 28 #define IPI_LEVEL 29 #define POWER_LEVEL 30 #define HIGH_LEVEL 31 #define SYNC_LEVEL_UP DISPATCH_LEVEL #define SYNC_LEVEL_MP (IPI_LEVEL - 1) #define AT_PASSIVE_LEVEL(td) \ ((td)->td_proc->p_flag & P_KTHREAD == FALSE) #define AT_DISPATCH_LEVEL(td) \ ((td)->td_base_pri == PI_REALTIME) #define AT_DIRQL_LEVEL(td) \ ((td)->td_priority <= PI_NET) #define AT_HIGH_LEVEL(td) \ ((td)->td_critnest != 0) struct nt_objref { nt_dispatch_header no_dh; void *no_obj; TAILQ_ENTRY(nt_objref) link; }; TAILQ_HEAD(nt_objref_head, nt_objref); typedef struct nt_objref nt_objref; #define EVENT_TYPE_NOTIFY 0 #define EVENT_TYPE_SYNC 1 /* * We need to use the timeout()/untimeout() API for ktimers * since timers can be initialized, but not destroyed (so * malloc()ing our own callout structures would mean a leak, * since there'd be no way to free() them). This means we * need to use struct callout_handle, which is really just a * pointer. To make it easier to deal with, we use a union * to overlay the callout_handle over the k_timerlistentry. * The latter is a list_entry, which is two pointers, so * there's enough space available to hide a callout_handle * there. */ struct ktimer { nt_dispatch_header k_header; uint64_t k_duetime; union { list_entry k_timerlistentry; struct callout *k_callout; } u; void *k_dpc; uint32_t k_period; }; #define k_timerlistentry u.k_timerlistentry #define k_callout u.k_callout typedef struct ktimer ktimer; struct nt_kevent { nt_dispatch_header k_header; }; typedef struct nt_kevent nt_kevent; /* Kernel defered procedure call (i.e. timer callback) */ struct kdpc; typedef void (*kdpc_func)(struct kdpc *, void *, void *, void *); struct kdpc { uint16_t k_type; uint8_t k_num; /* CPU number */ uint8_t k_importance; /* priority */ list_entry k_dpclistentry; void *k_deferedfunc; void *k_deferredctx; void *k_sysarg1; void *k_sysarg2; void *k_lock; }; #define KDPC_IMPORTANCE_LOW 0 #define KDPC_IMPORTANCE_MEDIUM 1 #define KDPC_IMPORTANCE_HIGH 2 #define KDPC_CPU_DEFAULT 255 typedef struct kdpc kdpc; /* * Note: the acquisition count is BSD-specific. The Microsoft * documentation says that mutexes can be acquired recursively * by a given thread, but that you must release the mutex as * many times as you acquired it before it will be set to the * signalled state (i.e. before any other threads waiting on * the object will be woken up). However the Windows KMUTANT * structure has no field for keeping track of the number of * acquisitions, so we need to add one ourselves. As long as * driver code treats the mutex as opaque, we should be ok. */ struct kmutant { nt_dispatch_header km_header; list_entry km_listentry; void *km_ownerthread; uint8_t km_abandoned; uint8_t km_apcdisable; }; typedef struct kmutant kmutant; #define LOOKASIDE_DEPTH 256 struct general_lookaside { slist_header gl_listhead; uint16_t gl_depth; uint16_t gl_maxdepth; uint32_t gl_totallocs; union { uint32_t gl_allocmisses; uint32_t gl_allochits; } u_a; uint32_t gl_totalfrees; union { uint32_t gl_freemisses; uint32_t gl_freehits; } u_m; uint32_t gl_type; uint32_t gl_tag; uint32_t gl_size; void *gl_allocfunc; void *gl_freefunc; list_entry gl_listent; uint32_t gl_lasttotallocs; union { uint32_t gl_lastallocmisses; uint32_t gl_lastallochits; } u_l; uint32_t gl_rsvd[2]; }; typedef struct general_lookaside general_lookaside; struct npaged_lookaside_list { general_lookaside nll_l; #ifdef __i386__ kspin_lock nll_obsoletelock; #endif }; typedef struct npaged_lookaside_list npaged_lookaside_list; typedef struct npaged_lookaside_list paged_lookaside_list; typedef void * (*lookaside_alloc_func)(uint32_t, size_t, uint32_t); typedef void (*lookaside_free_func)(void *); struct irp; struct kdevice_qentry { list_entry kqe_devlistent; uint32_t kqe_sortkey; uint8_t kqe_inserted; }; typedef struct kdevice_qentry kdevice_qentry; struct kdevice_queue { uint16_t kq_type; uint16_t kq_size; list_entry kq_devlisthead; kspin_lock kq_lock; uint8_t kq_busy; }; typedef struct kdevice_queue kdevice_queue; struct wait_ctx_block { kdevice_qentry wcb_waitqueue; void *wcb_devfunc; void *wcb_devctx; uint32_t wcb_mapregcnt; void *wcb_devobj; void *wcb_curirp; void *wcb_bufchaindpc; }; typedef struct wait_ctx_block wait_ctx_block; struct wait_block { list_entry wb_waitlist; void *wb_kthread; nt_dispatch_header *wb_object; struct wait_block *wb_next; #ifdef notdef uint16_t wb_waitkey; uint16_t wb_waittype; #endif uint8_t wb_waitkey; uint8_t wb_waittype; uint8_t wb_awakened; uint8_t wb_oldpri; }; typedef struct wait_block wait_block; #define wb_ext wb_kthread #define THREAD_WAIT_OBJECTS 3 #define MAX_WAIT_OBJECTS 64 #define WAITTYPE_ALL 0 #define WAITTYPE_ANY 1 #define WAITKEY_VALID 0x8000 /* kthread priority */ #define LOW_PRIORITY 0 #define LOW_REALTIME_PRIORITY 16 #define HIGH_PRIORITY 31 struct thread_context { void *tc_thrctx; void *tc_thrfunc; }; typedef struct thread_context thread_context; /* Forward declaration */ struct driver_object; struct devobj_extension; struct driver_extension { struct driver_object *dre_driverobj; void *dre_adddevicefunc; uint32_t dre_reinitcnt; unicode_string dre_srvname; /* * Drivers are allowed to add one or more custom extensions * to the driver object, but there's no special pointer * for them. Hang them off here for now. */ list_entry dre_usrext; }; typedef struct driver_extension driver_extension; struct custom_extension { list_entry ce_list; void *ce_clid; }; typedef struct custom_extension custom_extension; /* * The KINTERRUPT structure in Windows is opaque to drivers. * We define our own custom version with things we need. */ struct kinterrupt { list_entry ki_list; device_t ki_dev; int ki_rid; void *ki_cookie; struct resource *ki_irq; kspin_lock ki_lock_priv; kspin_lock *ki_lock; void *ki_svcfunc; void *ki_svcctx; }; typedef struct kinterrupt kinterrupt; struct ksystem_time { uint32_t low_part; int32_t high1_time; int32_t high2_time; }; enum nt_product_type { NT_PRODUCT_WIN_NT = 1, NT_PRODUCT_LAN_MAN_NT, NT_PRODUCT_SERVER }; enum alt_arch_type { STANDARD_DESIGN, NEC98x86, END_ALTERNATIVES }; struct kuser_shared_data { uint32_t tick_count; uint32_t tick_count_multiplier; volatile struct ksystem_time interrupt_time; volatile struct ksystem_time system_time; volatile struct ksystem_time time_zone_bias; uint16_t image_number_low; uint16_t image_number_high; int16_t nt_system_root[260]; uint32_t max_stack_trace_depth; uint32_t crypto_exponent; uint32_t time_zone_id; uint32_t large_page_min; uint32_t reserved2[7]; enum nt_product_type nt_product_type; uint8_t product_type_is_valid; uint32_t nt_major_version; uint32_t nt_minor_version; uint8_t processor_features[64]; uint32_t reserved1; uint32_t reserved3; volatile uint32_t time_slip; enum alt_arch_type alt_arch_type; int64_t system_expiration_date; uint32_t suite_mask; uint8_t kdbg_enabled; volatile uint32_t active_console; volatile uint32_t dismount_count; uint32_t com_plus_package; uint32_t last_system_rit_event_tick_count; uint32_t num_phys_pages; uint8_t safe_boot_mode; uint32_t trace_log; uint64_t fill0; uint64_t sys_call[4]; union { volatile struct ksystem_time tick_count; volatile uint64_t tick_count_quad; } tick; }; /* * In Windows, there are Physical Device Objects (PDOs) and * Functional Device Objects (FDOs). Physical Device Objects are * created and maintained by bus drivers. For example, the PCI * bus driver might detect two PCI ethernet cards on a given * bus. The PCI bus driver will then allocate two device_objects * for its own internal bookeeping purposes. This is analagous * to the device_t that the FreeBSD PCI code allocates and passes * into each PCI driver's probe and attach routines. * * When an ethernet driver claims one of the ethernet cards * on the bus, it will create its own device_object. This is * the Functional Device Object. This object is analagous to the * device-specific softc structure. */ struct device_object { uint16_t do_type; uint16_t do_size; uint32_t do_refcnt; struct driver_object *do_drvobj; struct device_object *do_nextdev; struct device_object *do_attacheddev; struct irp *do_currirp; void *do_iotimer; uint32_t do_flags; uint32_t do_characteristics; void *do_vpb; void *do_devext; uint32_t do_devtype; uint8_t do_stacksize; union { list_entry do_listent; wait_ctx_block do_wcb; } queue; uint32_t do_alignreq; kdevice_queue do_devqueue; struct kdpc do_dpc; uint32_t do_activethreads; void *do_securitydesc; struct nt_kevent do_devlock; uint16_t do_sectorsz; uint16_t do_spare1; struct devobj_extension *do_devobj_ext; void *do_rsvd; }; typedef struct device_object device_object; struct devobj_extension { uint16_t dve_type; uint16_t dve_size; device_object *dve_devobj; }; typedef struct devobj_extension devobj_extension; /* Device object flags */ #define DO_VERIFY_VOLUME 0x00000002 #define DO_BUFFERED_IO 0x00000004 #define DO_EXCLUSIVE 0x00000008 #define DO_DIRECT_IO 0x00000010 #define DO_MAP_IO_BUFFER 0x00000020 #define DO_DEVICE_HAS_NAME 0x00000040 #define DO_DEVICE_INITIALIZING 0x00000080 #define DO_SYSTEM_BOOT_PARTITION 0x00000100 #define DO_LONG_TERM_REQUESTS 0x00000200 #define DO_NEVER_LAST_DEVICE 0x00000400 #define DO_SHUTDOWN_REGISTERED 0x00000800 #define DO_BUS_ENUMERATED_DEVICE 0x00001000 #define DO_POWER_PAGABLE 0x00002000 #define DO_POWER_INRUSH 0x00004000 #define DO_LOW_PRIORITY_FILESYSTEM 0x00010000 /* Priority boosts */ #define IO_NO_INCREMENT 0 #define IO_CD_ROM_INCREMENT 1 #define IO_DISK_INCREMENT 1 #define IO_KEYBOARD_INCREMENT 6 #define IO_MAILSLOT_INCREMENT 2 #define IO_MOUSE_INCREMENT 6 #define IO_NAMED_PIPE_INCREMENT 2 #define IO_NETWORK_INCREMENT 2 #define IO_PARALLEL_INCREMENT 1 #define IO_SERIAL_INCREMENT 2 #define IO_SOUND_INCREMENT 8 #define IO_VIDEO_INCREMENT 1 /* IRP major codes */ #define IRP_MJ_CREATE 0x00 #define IRP_MJ_CREATE_NAMED_PIPE 0x01 #define IRP_MJ_CLOSE 0x02 #define IRP_MJ_READ 0x03 #define IRP_MJ_WRITE 0x04 #define IRP_MJ_QUERY_INFORMATION 0x05 #define IRP_MJ_SET_INFORMATION 0x06 #define IRP_MJ_QUERY_EA 0x07 #define IRP_MJ_SET_EA 0x08 #define IRP_MJ_FLUSH_BUFFERS 0x09 #define IRP_MJ_QUERY_VOLUME_INFORMATION 0x0a #define IRP_MJ_SET_VOLUME_INFORMATION 0x0b #define IRP_MJ_DIRECTORY_CONTROL 0x0c #define IRP_MJ_FILE_SYSTEM_CONTROL 0x0d #define IRP_MJ_DEVICE_CONTROL 0x0e #define IRP_MJ_INTERNAL_DEVICE_CONTROL 0x0f #define IRP_MJ_SHUTDOWN 0x10 #define IRP_MJ_LOCK_CONTROL 0x11 #define IRP_MJ_CLEANUP 0x12 #define IRP_MJ_CREATE_MAILSLOT 0x13 #define IRP_MJ_QUERY_SECURITY 0x14 #define IRP_MJ_SET_SECURITY 0x15 #define IRP_MJ_POWER 0x16 #define IRP_MJ_SYSTEM_CONTROL 0x17 #define IRP_MJ_DEVICE_CHANGE 0x18 #define IRP_MJ_QUERY_QUOTA 0x19 #define IRP_MJ_SET_QUOTA 0x1a #define IRP_MJ_PNP 0x1b #define IRP_MJ_PNP_POWER IRP_MJ_PNP // Obsolete.... #define IRP_MJ_MAXIMUM_FUNCTION 0x1b #define IRP_MJ_SCSI IRP_MJ_INTERNAL_DEVICE_CONTROL /* IRP minor codes */ #define IRP_MN_QUERY_DIRECTORY 0x01 #define IRP_MN_NOTIFY_CHANGE_DIRECTORY 0x02 #define IRP_MN_USER_FS_REQUEST 0x00 #define IRP_MN_MOUNT_VOLUME 0x01 #define IRP_MN_VERIFY_VOLUME 0x02 #define IRP_MN_LOAD_FILE_SYSTEM 0x03 #define IRP_MN_TRACK_LINK 0x04 #define IRP_MN_KERNEL_CALL 0x04 #define IRP_MN_LOCK 0x01 #define IRP_MN_UNLOCK_SINGLE 0x02 #define IRP_MN_UNLOCK_ALL 0x03 #define IRP_MN_UNLOCK_ALL_BY_KEY 0x04 #define IRP_MN_NORMAL 0x00 #define IRP_MN_DPC 0x01 #define IRP_MN_MDL 0x02 #define IRP_MN_COMPLETE 0x04 #define IRP_MN_COMPRESSED 0x08 #define IRP_MN_MDL_DPC (IRP_MN_MDL | IRP_MN_DPC) #define IRP_MN_COMPLETE_MDL (IRP_MN_COMPLETE | IRP_MN_MDL) #define IRP_MN_COMPLETE_MDL_DPC (IRP_MN_COMPLETE_MDL | IRP_MN_DPC) #define IRP_MN_SCSI_CLASS 0x01 #define IRP_MN_START_DEVICE 0x00 #define IRP_MN_QUERY_REMOVE_DEVICE 0x01 #define IRP_MN_REMOVE_DEVICE 0x02 #define IRP_MN_CANCEL_REMOVE_DEVICE 0x03 #define IRP_MN_STOP_DEVICE 0x04 #define IRP_MN_QUERY_STOP_DEVICE 0x05 #define IRP_MN_CANCEL_STOP_DEVICE 0x06 #define IRP_MN_QUERY_DEVICE_RELATIONS 0x07 #define IRP_MN_QUERY_INTERFACE 0x08 #define IRP_MN_QUERY_CAPABILITIES 0x09 #define IRP_MN_QUERY_RESOURCES 0x0A #define IRP_MN_QUERY_RESOURCE_REQUIREMENTS 0x0B #define IRP_MN_QUERY_DEVICE_TEXT 0x0C #define IRP_MN_FILTER_RESOURCE_REQUIREMENTS 0x0D #define IRP_MN_READ_CONFIG 0x0F #define IRP_MN_WRITE_CONFIG 0x10 #define IRP_MN_EJECT 0x11 #define IRP_MN_SET_LOCK 0x12 #define IRP_MN_QUERY_ID 0x13 #define IRP_MN_QUERY_PNP_DEVICE_STATE 0x14 #define IRP_MN_QUERY_BUS_INFORMATION 0x15 #define IRP_MN_DEVICE_USAGE_NOTIFICATION 0x16 #define IRP_MN_SURPRISE_REMOVAL 0x17 #define IRP_MN_QUERY_LEGACY_BUS_INFORMATION 0x18 #define IRP_MN_WAIT_WAKE 0x00 #define IRP_MN_POWER_SEQUENCE 0x01 #define IRP_MN_SET_POWER 0x02 #define IRP_MN_QUERY_POWER 0x03 #define IRP_MN_QUERY_ALL_DATA 0x00 #define IRP_MN_QUERY_SINGLE_INSTANCE 0x01 #define IRP_MN_CHANGE_SINGLE_INSTANCE 0x02 #define IRP_MN_CHANGE_SINGLE_ITEM 0x03 #define IRP_MN_ENABLE_EVENTS 0x04 #define IRP_MN_DISABLE_EVENTS 0x05 #define IRP_MN_ENABLE_COLLECTION 0x06 #define IRP_MN_DISABLE_COLLECTION 0x07 #define IRP_MN_REGINFO 0x08 #define IRP_MN_EXECUTE_METHOD 0x09 #define IRP_MN_REGINFO_EX 0x0b /* IRP flags */ #define IRP_NOCACHE 0x00000001 #define IRP_PAGING_IO 0x00000002 #define IRP_MOUNT_COMPLETION 0x00000002 #define IRP_SYNCHRONOUS_API 0x00000004 #define IRP_ASSOCIATED_IRP 0x00000008 #define IRP_BUFFERED_IO 0x00000010 #define IRP_DEALLOCATE_BUFFER 0x00000020 #define IRP_INPUT_OPERATION 0x00000040 #define IRP_SYNCHRONOUS_PAGING_IO 0x00000040 #define IRP_CREATE_OPERATION 0x00000080 #define IRP_READ_OPERATION 0x00000100 #define IRP_WRITE_OPERATION 0x00000200 #define IRP_CLOSE_OPERATION 0x00000400 #define IRP_DEFER_IO_COMPLETION 0x00000800 #define IRP_OB_QUERY_NAME 0x00001000 #define IRP_HOLD_DEVICE_QUEUE 0x00002000 #define IRP_RETRY_IO_COMPLETION 0x00004000 #define IRP_CLASS_CACHE_OPERATION 0x00008000 #define IRP_SET_USER_EVENT IRP_CLOSE_OPERATION /* IRP I/O control flags */ #define IRP_QUOTA_CHARGED 0x01 #define IRP_ALLOCATED_MUST_SUCCEED 0x02 #define IRP_ALLOCATED_FIXED_SIZE 0x04 #define IRP_LOOKASIDE_ALLOCATION 0x08 /* I/O method types */ #define METHOD_BUFFERED 0 #define METHOD_IN_DIRECT 1 #define METHOD_OUT_DIRECT 2 #define METHOD_NEITHER 3 /* File access types */ #define FILE_ANY_ACCESS 0x0000 #define FILE_SPECIAL_ACCESS FILE_ANY_ACCESS #define FILE_READ_ACCESS 0x0001 #define FILE_WRITE_ACCESS 0x0002 /* Recover I/O access method from IOCTL code. */ #define IO_METHOD(x) ((x) & 0xFFFFFFFC) /* Recover function code from IOCTL code */ #define IO_FUNC(x) (((x) & 0x7FFC) >> 2) /* Macro to construct an IOCTL code. */ #define IOCTL_CODE(dev, func, iomethod, acc) \ ((dev) << 16) | (acc << 14) | (func << 2) | (iomethod)) struct io_status_block { union { uint32_t isb_status; void *isb_ptr; } u; register_t isb_info; }; #define isb_status u.isb_status #define isb_ptr u.isb_ptr typedef struct io_status_block io_status_block; struct kapc { uint16_t apc_type; uint16_t apc_size; uint32_t apc_spare0; void *apc_thread; list_entry apc_list; void *apc_kernfunc; void *apc_rundownfunc; void *apc_normalfunc; void *apc_normctx; void *apc_sysarg1; void *apc_sysarg2; uint8_t apc_stateidx; uint8_t apc_cpumode; uint8_t apc_inserted; }; typedef struct kapc kapc; typedef uint32_t (*completion_func)(device_object *, struct irp *, void *); typedef uint32_t (*cancel_func)(device_object *, struct irp *); struct io_stack_location { uint8_t isl_major; uint8_t isl_minor; uint8_t isl_flags; uint8_t isl_ctl; /* * There's a big-ass union here in the actual Windows * definition of the stucture, but it contains stuff * that doesn't really apply to BSD, and defining it * all properly would require duplicating over a dozen * other structures that we'll never use. Since the * io_stack_location structure is opaque to drivers * anyway, I'm not going to bother with the extra crap. */ union { struct { uint32_t isl_len; uint32_t *isl_key; uint64_t isl_byteoff; } isl_read; struct { uint32_t isl_len; uint32_t *isl_key; uint64_t isl_byteoff; } isl_write; struct { uint32_t isl_obuflen; uint32_t isl_ibuflen; uint32_t isl_iocode; void *isl_type3ibuf; } isl_ioctl; struct { void *isl_arg1; void *isl_arg2; void *isl_arg3; void *isl_arg4; } isl_others; } isl_parameters __attribute__((packed)); void *isl_devobj; void *isl_fileobj; completion_func isl_completionfunc; void *isl_completionctx; }; typedef struct io_stack_location io_stack_location; /* Stack location control flags */ #define SL_PENDING_RETURNED 0x01 #define SL_INVOKE_ON_CANCEL 0x20 #define SL_INVOKE_ON_SUCCESS 0x40 #define SL_INVOKE_ON_ERROR 0x80 struct irp { uint16_t irp_type; uint16_t irp_size; mdl *irp_mdl; uint32_t irp_flags; union { struct irp *irp_master; uint32_t irp_irpcnt; void *irp_sysbuf; } irp_assoc; list_entry irp_thlist; io_status_block irp_iostat; uint8_t irp_reqmode; uint8_t irp_pendingreturned; uint8_t irp_stackcnt; uint8_t irp_currentstackloc; uint8_t irp_cancel; uint8_t irp_cancelirql; uint8_t irp_apcenv; uint8_t irp_allocflags; io_status_block *irp_usriostat; nt_kevent *irp_usrevent; union { struct { void *irp_apcfunc; void *irp_apcctx; } irp_asyncparms; uint64_t irp_allocsz; } irp_overlay; cancel_func irp_cancelfunc; void *irp_userbuf; /* Windows kernel info */ union { struct { union { kdevice_qentry irp_dqe; struct { void *irp_drvctx[4]; } s1; } u1; void *irp_thread; char *irp_auxbuf; struct { list_entry irp_list; union { io_stack_location *irp_csl; uint32_t irp_pkttype; } u2; } s2; void *irp_fileobj; } irp_overlay; union { kapc irp_apc; struct { void *irp_ep; void *irp_dev; } irp_usb; } irp_misc; void *irp_compkey; } irp_tail; }; #define irp_csl s2.u2.irp_csl #define irp_pkttype s2.u2.irp_pkttype #define IRP_NDIS_DEV(irp) (irp)->irp_tail.irp_misc.irp_usb.irp_dev #define IRP_NDISUSB_EP(irp) (irp)->irp_tail.irp_misc.irp_usb.irp_ep typedef struct irp irp; #define InterlockedExchangePointer(dst, val) \ (void *)InterlockedExchange((uint32_t *)(dst), (uintptr_t)(val)) #define IoSizeOfIrp(ssize) \ ((uint16_t) (sizeof(irp) + ((ssize) * (sizeof(io_stack_location))))) #define IoSetCancelRoutine(irp, func) \ (cancel_func)InterlockedExchangePointer( \ (void *)&(ip)->irp_cancelfunc, (void *)(func)) #define IoSetCancelValue(irp, val) \ (u_long)InterlockedExchangePointer( \ (void *)&(ip)->irp_cancel, (void *)(val)) #define IoGetCurrentIrpStackLocation(irp) \ (irp)->irp_tail.irp_overlay.irp_csl #define IoGetNextIrpStackLocation(irp) \ ((irp)->irp_tail.irp_overlay.irp_csl - 1) #define IoSetNextIrpStackLocation(irp) \ do { \ irp->irp_currentstackloc--; \ irp->irp_tail.irp_overlay.irp_csl--; \ } while(0) #define IoSetCompletionRoutine(irp, func, ctx, ok, err, cancel) \ do { \ io_stack_location *s; \ s = IoGetNextIrpStackLocation((irp)); \ s->isl_completionfunc = (func); \ s->isl_completionctx = (ctx); \ s->isl_ctl = 0; \ if (ok) s->isl_ctl = SL_INVOKE_ON_SUCCESS; \ if (err) s->isl_ctl |= SL_INVOKE_ON_ERROR; \ if (cancel) s->isl_ctl |= SL_INVOKE_ON_CANCEL; \ } while(0) #define IoMarkIrpPending(irp) \ IoGetCurrentIrpStackLocation(irp)->isl_ctl |= SL_PENDING_RETURNED #define IoUnmarkIrpPending(irp) \ IoGetCurrentIrpStackLocation(irp)->isl_ctl &= ~SL_PENDING_RETURNED #define IoCopyCurrentIrpStackLocationToNext(irp) \ do { \ io_stack_location *src, *dst; \ src = IoGetCurrentIrpStackLocation(irp); \ dst = IoGetNextIrpStackLocation(irp); \ bcopy((char *)src, (char *)dst, \ offsetof(io_stack_location, isl_completionfunc)); \ } while(0) #define IoSkipCurrentIrpStackLocation(irp) \ do { \ (irp)->irp_currentstackloc++; \ (irp)->irp_tail.irp_overlay.irp_csl++; \ } while(0) #define IoInitializeDpcRequest(dobj, dpcfunc) \ KeInitializeDpc(&(dobj)->do_dpc, dpcfunc, dobj) #define IoRequestDpc(dobj, irp, ctx) \ KeInsertQueueDpc(&(dobj)->do_dpc, irp, ctx) typedef uint32_t (*driver_dispatch)(device_object *, irp *); /* * The driver_object is allocated once for each driver that's loaded * into the system. A new one is allocated for each driver and * populated a bit via the driver's DriverEntry function. * In general, a Windows DriverEntry() function will provide a pointer * to its AddDevice() method and set up the dispatch table. * For NDIS drivers, this is all done behind the scenes in the * NdisInitializeWrapper() and/or NdisMRegisterMiniport() routines. */ struct driver_object { uint16_t dro_type; uint16_t dro_size; device_object *dro_devobj; uint32_t dro_flags; void *dro_driverstart; uint32_t dro_driversize; void *dro_driversection; driver_extension *dro_driverext; unicode_string dro_drivername; unicode_string *dro_hwdb; void *dro_pfastiodispatch; void *dro_driverinitfunc; void *dro_driverstartiofunc; void *dro_driverunloadfunc; driver_dispatch dro_dispatch[IRP_MJ_MAXIMUM_FUNCTION + 1]; }; typedef struct driver_object driver_object; #define DEVPROP_DEVICE_DESCRIPTION 0x00000000 #define DEVPROP_HARDWARE_ID 0x00000001 #define DEVPROP_COMPATIBLE_IDS 0x00000002 #define DEVPROP_BOOTCONF 0x00000003 #define DEVPROP_BOOTCONF_TRANSLATED 0x00000004 #define DEVPROP_CLASS_NAME 0x00000005 #define DEVPROP_CLASS_GUID 0x00000006 #define DEVPROP_DRIVER_KEYNAME 0x00000007 #define DEVPROP_MANUFACTURER 0x00000008 #define DEVPROP_FRIENDLYNAME 0x00000009 #define DEVPROP_LOCATION_INFO 0x0000000A #define DEVPROP_PHYSDEV_NAME 0x0000000B #define DEVPROP_BUSTYPE_GUID 0x0000000C #define DEVPROP_LEGACY_BUSTYPE 0x0000000D #define DEVPROP_BUS_NUMBER 0x0000000E #define DEVPROP_ENUMERATOR_NAME 0x0000000F #define DEVPROP_ADDRESS 0x00000010 #define DEVPROP_UINUMBER 0x00000011 #define DEVPROP_INSTALL_STATE 0x00000012 #define DEVPROP_REMOVAL_POLICY 0x00000013 /* Various supported device types (used with IoCreateDevice()) */ #define FILE_DEVICE_BEEP 0x00000001 #define FILE_DEVICE_CD_ROM 0x00000002 #define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003 #define FILE_DEVICE_CONTROLLER 0x00000004 #define FILE_DEVICE_DATALINK 0x00000005 #define FILE_DEVICE_DFS 0x00000006 #define FILE_DEVICE_DISK 0x00000007 #define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008 #define FILE_DEVICE_FILE_SYSTEM 0x00000009 #define FILE_DEVICE_INPORT_PORT 0x0000000A #define FILE_DEVICE_KEYBOARD 0x0000000B #define FILE_DEVICE_MAILSLOT 0x0000000C #define FILE_DEVICE_MIDI_IN 0x0000000D #define FILE_DEVICE_MIDI_OUT 0x0000000E #define FILE_DEVICE_MOUSE 0x0000000F #define FILE_DEVICE_MULTI_UNC_PROVIDER 0x00000010 #define FILE_DEVICE_NAMED_PIPE 0x00000011 #define FILE_DEVICE_NETWORK 0x00000012 #define FILE_DEVICE_NETWORK_BROWSER 0x00000013 #define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014 #define FILE_DEVICE_NULL 0x00000015 #define FILE_DEVICE_PARALLEL_PORT 0x00000016 #define FILE_DEVICE_PHYSICAL_NETCARD 0x00000017 #define FILE_DEVICE_PRINTER 0x00000018 #define FILE_DEVICE_SCANNER 0x00000019 #define FILE_DEVICE_SERIAL_MOUSE_PORT 0x0000001A #define FILE_DEVICE_SERIAL_PORT 0x0000001B #define FILE_DEVICE_SCREEN 0x0000001C #define FILE_DEVICE_SOUND 0x0000001D #define FILE_DEVICE_STREAMS 0x0000001E #define FILE_DEVICE_TAPE 0x0000001F #define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020 #define FILE_DEVICE_TRANSPORT 0x00000021 #define FILE_DEVICE_UNKNOWN 0x00000022 #define FILE_DEVICE_VIDEO 0x00000023 #define FILE_DEVICE_VIRTUAL_DISK 0x00000024 #define FILE_DEVICE_WAVE_IN 0x00000025 #define FILE_DEVICE_WAVE_OUT 0x00000026 #define FILE_DEVICE_8042_PORT 0x00000027 #define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028 #define FILE_DEVICE_BATTERY 0x00000029 #define FILE_DEVICE_BUS_EXTENDER 0x0000002A #define FILE_DEVICE_MODEM 0x0000002B #define FILE_DEVICE_VDM 0x0000002C #define FILE_DEVICE_MASS_STORAGE 0x0000002D #define FILE_DEVICE_SMB 0x0000002E #define FILE_DEVICE_KS 0x0000002F #define FILE_DEVICE_CHANGER 0x00000030 #define FILE_DEVICE_SMARTCARD 0x00000031 #define FILE_DEVICE_ACPI 0x00000032 #define FILE_DEVICE_DVD 0x00000033 #define FILE_DEVICE_FULLSCREEN_VIDEO 0x00000034 #define FILE_DEVICE_DFS_FILE_SYSTEM 0x00000035 #define FILE_DEVICE_DFS_VOLUME 0x00000036 #define FILE_DEVICE_SERENUM 0x00000037 #define FILE_DEVICE_TERMSRV 0x00000038 #define FILE_DEVICE_KSEC 0x00000039 #define FILE_DEVICE_FIPS 0x0000003A /* Device characteristics */ #define FILE_REMOVABLE_MEDIA 0x00000001 #define FILE_READ_ONLY_DEVICE 0x00000002 #define FILE_FLOPPY_DISKETTE 0x00000004 #define FILE_WRITE_ONCE_MEDIA 0x00000008 #define FILE_REMOTE_DEVICE 0x00000010 #define FILE_DEVICE_IS_MOUNTED 0x00000020 #define FILE_VIRTUAL_VOLUME 0x00000040 #define FILE_AUTOGENERATED_DEVICE_NAME 0x00000080 #define FILE_DEVICE_SECURE_OPEN 0x00000100 /* Status codes */ #define STATUS_SUCCESS 0x00000000 #define STATUS_USER_APC 0x000000C0 #define STATUS_KERNEL_APC 0x00000100 #define STATUS_ALERTED 0x00000101 #define STATUS_TIMEOUT 0x00000102 #define STATUS_PENDING 0x00000103 #define STATUS_FAILURE 0xC0000001 #define STATUS_NOT_IMPLEMENTED 0xC0000002 +#define STATUS_ACCESS_VIOLATION 0xC0000005 #define STATUS_INVALID_PARAMETER 0xC000000D #define STATUS_INVALID_DEVICE_REQUEST 0xC0000010 #define STATUS_MORE_PROCESSING_REQUIRED 0xC0000016 #define STATUS_NO_MEMORY 0xC0000017 #define STATUS_BUFFER_TOO_SMALL 0xC0000023 #define STATUS_MUTANT_NOT_OWNED 0xC0000046 #define STATUS_NOT_SUPPORTED 0xC00000BB #define STATUS_INVALID_PARAMETER_2 0xC00000F0 #define STATUS_INSUFFICIENT_RESOURCES 0xC000009A #define STATUS_DEVICE_NOT_CONNECTED 0xC000009D #define STATUS_CANCELLED 0xC0000120 #define STATUS_NOT_FOUND 0xC0000225 #define STATUS_DEVICE_REMOVED 0xC00002B6 #define STATUS_WAIT_0 0x00000000 /* Memory pool types, for ExAllocatePoolWithTag() */ #define NonPagedPool 0x00000000 #define PagedPool 0x00000001 #define NonPagedPoolMustSucceed 0x00000002 #define DontUseThisType 0x00000003 #define NonPagedPoolCacheAligned 0x00000004 #define PagedPoolCacheAligned 0x00000005 #define NonPagedPoolCacheAlignedMustS 0x00000006 #define MaxPoolType 0x00000007 /* * IO_WORKITEM is an opaque structures that must be allocated * via IoAllocateWorkItem() and released via IoFreeWorkItem(). * Consequently, we can define it any way we want. */ typedef void (*io_workitem_func)(device_object *, void *); struct io_workitem { io_workitem_func iw_func; void *iw_ctx; list_entry iw_listentry; device_object *iw_dobj; int iw_idx; }; typedef struct io_workitem io_workitem; #define WORKQUEUE_CRITICAL 0 #define WORKQUEUE_DELAYED 1 #define WORKQUEUE_HYPERCRITICAL 2 #define WORKITEM_THREADS 4 #define WORKITEM_LEGACY_THREAD 3 #define WORKIDX_INC(x) (x) = (x + 1) % WORKITEM_LEGACY_THREAD /* * Older, deprecated work item API, needed to support NdisQueueWorkItem(). */ struct work_queue_item; typedef void (*work_item_func)(struct work_queue_item *, void *); struct work_queue_item { list_entry wqi_entry; work_item_func wqi_func; void *wqi_ctx; }; typedef struct work_queue_item work_queue_item; #define ExInitializeWorkItem(w, func, ctx) \ do { \ (w)->wqi_func = (func); \ (w)->wqi_ctx = (ctx); \ InitializeListHead(&((w)->wqi_entry)); \ } while (0) /* * FreeBSD's kernel stack is 2 pages in size by default. The * Windows stack is larger, so we need to give our threads more * stack pages. 4 should be enough, we use 8 just to extra safe. */ #define NDIS_KSTACK_PAGES 8 /* * Different kinds of function wrapping we can do. */ #define WINDRV_WRAP_STDCALL 1 #define WINDRV_WRAP_FASTCALL 2 #define WINDRV_WRAP_REGPARM 3 #define WINDRV_WRAP_CDECL 4 #define WINDRV_WRAP_AMD64 5 struct drvdb_ent { driver_object *windrv_object; void *windrv_devlist; ndis_cfg *windrv_regvals; interface_type windrv_bustype; STAILQ_ENTRY(drvdb_ent) link; }; extern image_patch_table ntoskrnl_functbl[]; #ifdef __amd64__ extern struct kuser_shared_data kuser_shared_data; #endif typedef void (*funcptr)(void); typedef int (*matchfuncptr)(interface_type, void *, void *); __BEGIN_DECLS extern int windrv_libinit(void); extern int windrv_libfini(void); extern driver_object *windrv_lookup(vm_offset_t, char *); extern struct drvdb_ent *windrv_match(matchfuncptr, void *); extern int windrv_load(module_t, vm_offset_t, int, interface_type, void *, ndis_cfg *); extern int windrv_unload(module_t, vm_offset_t, int); extern int windrv_create_pdo(driver_object *, device_t); extern void windrv_destroy_pdo(driver_object *, device_t); extern device_object *windrv_find_pdo(driver_object *, device_t); extern int windrv_bus_attach(driver_object *, char *); extern int windrv_wrap(funcptr, funcptr *, int, int); extern int windrv_unwrap(funcptr); extern void ctxsw_utow(void); extern void ctxsw_wtou(void); extern int ntoskrnl_libinit(void); extern int ntoskrnl_libfini(void); extern void ntoskrnl_intr(void *); extern void ntoskrnl_time(uint64_t *); extern uint16_t ExQueryDepthSList(slist_header *); extern slist_entry *InterlockedPushEntrySList(slist_header *, slist_entry *); extern slist_entry *InterlockedPopEntrySList(slist_header *); extern uint32_t RtlUnicodeStringToAnsiString(ansi_string *, unicode_string *, uint8_t); extern uint32_t RtlAnsiStringToUnicodeString(unicode_string *, ansi_string *, uint8_t); extern void RtlInitAnsiString(ansi_string *, char *); extern void RtlInitUnicodeString(unicode_string *, uint16_t *); extern void RtlFreeUnicodeString(unicode_string *); extern void RtlFreeAnsiString(ansi_string *); extern void KeInitializeDpc(kdpc *, void *, void *); extern uint8_t KeInsertQueueDpc(kdpc *, void *, void *); extern uint8_t KeRemoveQueueDpc(kdpc *); extern void KeSetImportanceDpc(kdpc *, uint32_t); extern void KeSetTargetProcessorDpc(kdpc *, uint8_t); extern void KeFlushQueuedDpcs(void); extern uint32_t KeGetCurrentProcessorNumber(void); extern void KeInitializeTimer(ktimer *); extern void KeInitializeTimerEx(ktimer *, uint32_t); extern uint8_t KeSetTimer(ktimer *, int64_t, kdpc *); extern uint8_t KeSetTimerEx(ktimer *, int64_t, uint32_t, kdpc *); extern uint8_t KeCancelTimer(ktimer *); extern uint8_t KeReadStateTimer(ktimer *); extern uint32_t KeWaitForSingleObject(void *, uint32_t, uint32_t, uint8_t, int64_t *); extern void KeInitializeEvent(nt_kevent *, uint32_t, uint8_t); extern void KeClearEvent(nt_kevent *); extern uint32_t KeReadStateEvent(nt_kevent *); extern uint32_t KeSetEvent(nt_kevent *, uint32_t, uint8_t); extern uint32_t KeResetEvent(nt_kevent *); #ifdef __i386__ extern void KefAcquireSpinLockAtDpcLevel(kspin_lock *); extern void KefReleaseSpinLockFromDpcLevel(kspin_lock *); extern uint8_t KeAcquireSpinLockRaiseToDpc(kspin_lock *); #else extern void KeAcquireSpinLockAtDpcLevel(kspin_lock *); extern void KeReleaseSpinLockFromDpcLevel(kspin_lock *); #endif extern void KeInitializeSpinLock(kspin_lock *); extern uint8_t KeAcquireInterruptSpinLock(kinterrupt *); extern void KeReleaseInterruptSpinLock(kinterrupt *, uint8_t); extern uint8_t KeSynchronizeExecution(kinterrupt *, void *, void *); extern uintptr_t InterlockedExchange(volatile uint32_t *, uintptr_t); extern void *ExAllocatePoolWithTag(uint32_t, size_t, uint32_t); extern void ExFreePool(void *); extern uint32_t IoConnectInterrupt(kinterrupt **, void *, void *, kspin_lock *, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t, uint32_t, uint8_t); extern uint8_t MmIsAddressValid(void *); extern void *MmMapIoSpace(uint64_t, uint32_t, uint32_t); extern void MmUnmapIoSpace(void *, size_t); extern void MmBuildMdlForNonPagedPool(mdl *); extern void IoDisconnectInterrupt(kinterrupt *); extern uint32_t IoAllocateDriverObjectExtension(driver_object *, void *, uint32_t, void **); extern void *IoGetDriverObjectExtension(driver_object *, void *); extern uint32_t IoCreateDevice(driver_object *, uint32_t, unicode_string *, uint32_t, uint32_t, uint8_t, device_object **); extern void IoDeleteDevice(device_object *); extern device_object *IoGetAttachedDevice(device_object *); extern uint32_t IofCallDriver(device_object *, irp *); extern void IofCompleteRequest(irp *, uint8_t); extern void IoAcquireCancelSpinLock(uint8_t *); extern void IoReleaseCancelSpinLock(uint8_t); extern uint8_t IoCancelIrp(irp *); extern void IoDetachDevice(device_object *); extern device_object *IoAttachDeviceToDeviceStack(device_object *, device_object *); extern mdl *IoAllocateMdl(void *, uint32_t, uint8_t, uint8_t, irp *); extern void IoFreeMdl(mdl *); extern io_workitem *IoAllocateWorkItem(device_object *); extern void ExQueueWorkItem(work_queue_item *, u_int32_t); extern void IoFreeWorkItem(io_workitem *); extern void IoQueueWorkItem(io_workitem *, io_workitem_func, uint32_t, void *); #define IoCallDriver(a, b) IofCallDriver(a, b) #define IoCompleteRequest(a, b) IofCompleteRequest(a, b) /* * On the Windows x86 arch, KeAcquireSpinLock() and KeReleaseSpinLock() * routines live in the HAL. We try to imitate this behavior. */ #ifdef __i386__ #define KI_USER_SHARED_DATA 0xffdf0000 #define KeAcquireSpinLock(a, b) *(b) = KfAcquireSpinLock(a) #define KeReleaseSpinLock(a, b) KfReleaseSpinLock(a, b) #define KeRaiseIrql(a, b) *(b) = KfRaiseIrql(a) #define KeLowerIrql(a) KfLowerIrql(a) #define KeAcquireSpinLockAtDpcLevel(a) KefAcquireSpinLockAtDpcLevel(a) #define KeReleaseSpinLockFromDpcLevel(a) KefReleaseSpinLockFromDpcLevel(a) #endif /* __i386__ */ #ifdef __amd64__ #define KI_USER_SHARED_DATA 0xfffff78000000000UL #define KeAcquireSpinLock(a, b) *(b) = KfAcquireSpinLock(a) #define KeReleaseSpinLock(a, b) KfReleaseSpinLock(a, b) /* * These may need to be redefined later; * not sure where they live on amd64 yet. */ #define KeRaiseIrql(a, b) *(b) = KfRaiseIrql(a) #define KeLowerIrql(a) KfLowerIrql(a) #endif /* __amd64__ */ __END_DECLS #endif /* _NTOSKRNL_VAR_H_ */ Index: projects/binutils-2.17/sys/compat/ndis/subr_ndis.c =================================================================== --- projects/binutils-2.17/sys/compat/ndis/subr_ndis.c (revision 215829) +++ projects/binutils-2.17/sys/compat/ndis/subr_ndis.c (revision 215830) @@ -1,3340 +1,3364 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This file implements a translation layer between the BSD networking * infrasturcture and Windows(R) NDIS network driver modules. A Windows * NDIS driver calls into several functions in the NDIS.SYS Windows * kernel module and exports a table of functions designed to be called * by the NDIS subsystem. Using the PE loader, we can patch our own * versions of the NDIS routines into a given Windows driver module and * convince the driver that it is in fact running on Windows. * * We provide a table of all our implemented NDIS routines which is patched * into the driver object code. All our exported routines must use the * _stdcall calling convention, since that's what the Windows object code * expects. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static char ndis_filepath[MAXPATHLEN]; SYSCTL_STRING(_hw, OID_AUTO, ndis_filepath, CTLFLAG_RW, ndis_filepath, MAXPATHLEN, "Path used by NdisOpenFile() to search for files"); static void NdisInitializeWrapper(ndis_handle *, driver_object *, void *, void *); static ndis_status NdisMRegisterMiniport(ndis_handle, ndis_miniport_characteristics *, int); static ndis_status NdisAllocateMemoryWithTag(void **, uint32_t, uint32_t); static ndis_status NdisAllocateMemory(void **, uint32_t, uint32_t, ndis_physaddr); static void NdisFreeMemory(void *, uint32_t, uint32_t); static ndis_status NdisMSetAttributesEx(ndis_handle, ndis_handle, uint32_t, uint32_t, ndis_interface_type); static void NdisOpenConfiguration(ndis_status *, ndis_handle *, ndis_handle); static void NdisOpenConfigurationKeyByIndex(ndis_status *, ndis_handle, uint32_t, unicode_string *, ndis_handle *); static void NdisOpenConfigurationKeyByName(ndis_status *, ndis_handle, unicode_string *, ndis_handle *); static ndis_status ndis_encode_parm(ndis_miniport_block *, struct sysctl_oid *, ndis_parm_type, ndis_config_parm **); static ndis_status ndis_decode_parm(ndis_miniport_block *, ndis_config_parm *, char *); static void NdisReadConfiguration(ndis_status *, ndis_config_parm **, ndis_handle, unicode_string *, ndis_parm_type); static void NdisWriteConfiguration(ndis_status *, ndis_handle, unicode_string *, ndis_config_parm *); static void NdisCloseConfiguration(ndis_handle); static void NdisAllocateSpinLock(ndis_spin_lock *); static void NdisFreeSpinLock(ndis_spin_lock *); static void NdisAcquireSpinLock(ndis_spin_lock *); static void NdisReleaseSpinLock(ndis_spin_lock *); static void NdisDprAcquireSpinLock(ndis_spin_lock *); static void NdisDprReleaseSpinLock(ndis_spin_lock *); static void NdisInitializeReadWriteLock(ndis_rw_lock *); static void NdisAcquireReadWriteLock(ndis_rw_lock *, uint8_t, ndis_lock_state *); static void NdisReleaseReadWriteLock(ndis_rw_lock *, ndis_lock_state *); static uint32_t NdisReadPciSlotInformation(ndis_handle, uint32_t, uint32_t, void *, uint32_t); static uint32_t NdisWritePciSlotInformation(ndis_handle, uint32_t, uint32_t, void *, uint32_t); static void NdisWriteErrorLogEntry(ndis_handle, ndis_error_code, uint32_t, ...); static void ndis_map_cb(void *, bus_dma_segment_t *, int, int); static void NdisMStartBufferPhysicalMapping(ndis_handle, ndis_buffer *, uint32_t, uint8_t, ndis_paddr_unit *, uint32_t *); static void NdisMCompleteBufferPhysicalMapping(ndis_handle, ndis_buffer *, uint32_t); static void NdisMInitializeTimer(ndis_miniport_timer *, ndis_handle, ndis_timer_function, void *); static void NdisInitializeTimer(ndis_timer *, ndis_timer_function, void *); static void NdisSetTimer(ndis_timer *, uint32_t); static void NdisMSetPeriodicTimer(ndis_miniport_timer *, uint32_t); static void NdisMCancelTimer(ndis_timer *, uint8_t *); static void ndis_timercall(kdpc *, ndis_miniport_timer *, void *, void *); static void NdisMQueryAdapterResources(ndis_status *, ndis_handle, ndis_resource_list *, uint32_t *); static ndis_status NdisMRegisterIoPortRange(void **, ndis_handle, uint32_t, uint32_t); static void NdisMDeregisterIoPortRange(ndis_handle, uint32_t, uint32_t, void *); static void NdisReadNetworkAddress(ndis_status *, void **, uint32_t *, ndis_handle); static ndis_status NdisQueryMapRegisterCount(uint32_t, uint32_t *); static ndis_status NdisMAllocateMapRegisters(ndis_handle, uint32_t, uint8_t, uint32_t, uint32_t); static void NdisMFreeMapRegisters(ndis_handle); static void ndis_mapshared_cb(void *, bus_dma_segment_t *, int, int); static void NdisMAllocateSharedMemory(ndis_handle, uint32_t, uint8_t, void **, ndis_physaddr *); static void ndis_asyncmem_complete(device_object *, void *); static ndis_status NdisMAllocateSharedMemoryAsync(ndis_handle, uint32_t, uint8_t, void *); static void NdisMFreeSharedMemory(ndis_handle, uint32_t, uint8_t, void *, ndis_physaddr); static ndis_status NdisMMapIoSpace(void **, ndis_handle, ndis_physaddr, uint32_t); static void NdisMUnmapIoSpace(ndis_handle, void *, uint32_t); static uint32_t NdisGetCacheFillSize(void); static uint32_t NdisMGetDmaAlignment(ndis_handle); static ndis_status NdisMInitializeScatterGatherDma(ndis_handle, uint8_t, uint32_t); static void NdisUnchainBufferAtFront(ndis_packet *, ndis_buffer **); static void NdisUnchainBufferAtBack(ndis_packet *, ndis_buffer **); static void NdisAllocateBufferPool(ndis_status *, ndis_handle *, uint32_t); static void NdisFreeBufferPool(ndis_handle); static void NdisAllocateBuffer(ndis_status *, ndis_buffer **, ndis_handle, void *, uint32_t); static void NdisFreeBuffer(ndis_buffer *); static uint32_t NdisBufferLength(ndis_buffer *); static void NdisQueryBuffer(ndis_buffer *, void **, uint32_t *); static void NdisQueryBufferSafe(ndis_buffer *, void **, uint32_t *, uint32_t); static void *NdisBufferVirtualAddress(ndis_buffer *); static void *NdisBufferVirtualAddressSafe(ndis_buffer *, uint32_t); static void NdisAdjustBufferLength(ndis_buffer *, int); static uint32_t NdisInterlockedIncrement(uint32_t *); static uint32_t NdisInterlockedDecrement(uint32_t *); static void NdisInitializeEvent(ndis_event *); static void NdisSetEvent(ndis_event *); static void NdisResetEvent(ndis_event *); static uint8_t NdisWaitEvent(ndis_event *, uint32_t); static ndis_status NdisUnicodeStringToAnsiString(ansi_string *, unicode_string *); static ndis_status NdisAnsiStringToUnicodeString(unicode_string *, ansi_string *); static ndis_status NdisMPciAssignResources(ndis_handle, uint32_t, ndis_resource_list **); static ndis_status NdisMRegisterInterrupt(ndis_miniport_interrupt *, ndis_handle, uint32_t, uint32_t, uint8_t, uint8_t, ndis_interrupt_mode); static void NdisMDeregisterInterrupt(ndis_miniport_interrupt *); static void NdisMRegisterAdapterShutdownHandler(ndis_handle, void *, ndis_shutdown_handler); static void NdisMDeregisterAdapterShutdownHandler(ndis_handle); static uint32_t NDIS_BUFFER_TO_SPAN_PAGES(ndis_buffer *); static void NdisGetBufferPhysicalArraySize(ndis_buffer *, uint32_t *); static void NdisQueryBufferOffset(ndis_buffer *, uint32_t *, uint32_t *); static uint32_t NdisReadPcmciaAttributeMemory(ndis_handle, uint32_t, void *, uint32_t); static uint32_t NdisWritePcmciaAttributeMemory(ndis_handle, uint32_t, void *, uint32_t); static list_entry *NdisInterlockedInsertHeadList(list_entry *, list_entry *, ndis_spin_lock *); static list_entry *NdisInterlockedRemoveHeadList(list_entry *, ndis_spin_lock *); static list_entry *NdisInterlockedInsertTailList(list_entry *, list_entry *, ndis_spin_lock *); static uint8_t NdisMSynchronizeWithInterrupt(ndis_miniport_interrupt *, void *, void *); static void NdisGetCurrentSystemTime(uint64_t *); static void NdisGetSystemUpTime(uint32_t *); +static uint32_t NdisGetVersion(void); static void NdisInitializeString(unicode_string *, char *); static void NdisInitAnsiString(ansi_string *, char *); static void NdisInitUnicodeString(unicode_string *, uint16_t *); static void NdisFreeString(unicode_string *); static ndis_status NdisMRemoveMiniport(ndis_handle *); static void NdisTerminateWrapper(ndis_handle, void *); static void NdisMGetDeviceProperty(ndis_handle, device_object **, device_object **, device_object **, cm_resource_list *, cm_resource_list *); static void NdisGetFirstBufferFromPacket(ndis_packet *, ndis_buffer **, void **, uint32_t *, uint32_t *); static void NdisGetFirstBufferFromPacketSafe(ndis_packet *, ndis_buffer **, void **, uint32_t *, uint32_t *, uint32_t); static int ndis_find_sym(linker_file_t, char *, char *, caddr_t *); static void NdisOpenFile(ndis_status *, ndis_handle *, uint32_t *, unicode_string *, ndis_physaddr); static void NdisMapFile(ndis_status *, void **, ndis_handle); static void NdisUnmapFile(ndis_handle); static void NdisCloseFile(ndis_handle); static uint8_t NdisSystemProcessorCount(void); +static void NdisGetCurrentProcessorCounts(uint32_t *, uint32_t *, uint32_t *); static void NdisMIndicateStatusComplete(ndis_handle); static void NdisMIndicateStatus(ndis_handle, ndis_status, void *, uint32_t); static uint8_t ndis_intr(kinterrupt *, void *); static void ndis_intrhand(kdpc *, ndis_miniport_interrupt *, void *, void *); static funcptr ndis_findwrap(funcptr); static void NdisCopyFromPacketToPacket(ndis_packet *, uint32_t, uint32_t, ndis_packet *, uint32_t, uint32_t *); static void NdisCopyFromPacketToPacketSafe(ndis_packet *, uint32_t, uint32_t, ndis_packet *, uint32_t, uint32_t *, uint32_t); static void NdisIMCopySendPerPacketInfo(ndis_packet *, ndis_packet *); static ndis_status NdisMRegisterDevice(ndis_handle, unicode_string *, unicode_string *, driver_dispatch **, void **, ndis_handle *); static ndis_status NdisMDeregisterDevice(ndis_handle); static ndis_status NdisMQueryAdapterInstanceName(unicode_string *, ndis_handle); static void NdisMRegisterUnloadHandler(ndis_handle, void *); static void dummy(void); /* * Some really old drivers do not properly check the return value * from NdisAllocatePacket() and NdisAllocateBuffer() and will * sometimes allocate few more buffers/packets that they originally * requested when they created the pool. To prevent this from being * a problem, we allocate a few extra buffers/packets beyond what * the driver asks for. This #define controls how many. */ #define NDIS_POOL_EXTRA 16 int ndis_libinit() { image_patch_table *patch; strcpy(ndis_filepath, "/compat/ndis"); patch = ndis_functbl; while (patch->ipt_func != NULL) { windrv_wrap((funcptr)patch->ipt_func, (funcptr *)&patch->ipt_wrap, patch->ipt_argcnt, patch->ipt_ftype); patch++; } return (0); } int ndis_libfini() { image_patch_table *patch; patch = ndis_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } return (0); } static funcptr ndis_findwrap(func) funcptr func; { image_patch_table *patch; patch = ndis_functbl; while (patch->ipt_func != NULL) { if ((funcptr)patch->ipt_func == func) return ((funcptr)patch->ipt_wrap); patch++; } return (NULL); } /* * This routine does the messy Windows Driver Model device attachment * stuff on behalf of NDIS drivers. We register our own AddDevice * routine here */ static void NdisInitializeWrapper(wrapper, drv, path, unused) ndis_handle *wrapper; driver_object *drv; void *path; void *unused; { /* * As of yet, I haven't come up with a compelling * reason to define a private NDIS wrapper structure, * so we use a pointer to the driver object as the * wrapper handle. The driver object has the miniport * characteristics struct for this driver hung off it * via IoAllocateDriverObjectExtension(), and that's * really all the private data we need. */ *wrapper = drv; /* * If this was really Windows, we'd be registering dispatch * routines for the NDIS miniport module here, but we're * not Windows so all we really need to do is set up an * AddDevice function that'll be invoked when a new device * instance appears. */ drv->dro_driverext->dre_adddevicefunc = NdisAddDevice; } static void NdisTerminateWrapper(handle, syspec) ndis_handle handle; void *syspec; { /* Nothing to see here, move along. */ } static ndis_status NdisMRegisterMiniport(handle, characteristics, len) ndis_handle handle; ndis_miniport_characteristics *characteristics; int len; { ndis_miniport_characteristics *ch = NULL; driver_object *drv; drv = (driver_object *)handle; /* * We need to save the NDIS miniport characteristics * somewhere. This data is per-driver, not per-device * (all devices handled by the same driver have the * same characteristics) so we hook it onto the driver * object using IoAllocateDriverObjectExtension(). * The extra extension info is automagically deleted when * the driver is unloaded (see windrv_unload()). */ if (IoAllocateDriverObjectExtension(drv, (void *)1, sizeof(ndis_miniport_characteristics), (void **)&ch) != STATUS_SUCCESS) { return (NDIS_STATUS_RESOURCES); } bzero((char *)ch, sizeof(ndis_miniport_characteristics)); bcopy((char *)characteristics, (char *)ch, len); if (ch->nmc_version_major < 5 || ch->nmc_version_minor < 1) { ch->nmc_shutdown_handler = NULL; ch->nmc_canceltxpkts_handler = NULL; ch->nmc_pnpevent_handler = NULL; } return (NDIS_STATUS_SUCCESS); } static ndis_status NdisAllocateMemoryWithTag(vaddr, len, tag) void **vaddr; uint32_t len; uint32_t tag; { void *mem; mem = ExAllocatePoolWithTag(NonPagedPool, len, tag); if (mem == NULL) { return (NDIS_STATUS_RESOURCES); } *vaddr = mem; return (NDIS_STATUS_SUCCESS); } static ndis_status NdisAllocateMemory(vaddr, len, flags, highaddr) void **vaddr; uint32_t len; uint32_t flags; ndis_physaddr highaddr; { void *mem; mem = ExAllocatePoolWithTag(NonPagedPool, len, 0); if (mem == NULL) return (NDIS_STATUS_RESOURCES); *vaddr = mem; return (NDIS_STATUS_SUCCESS); } static void NdisFreeMemory(vaddr, len, flags) void *vaddr; uint32_t len; uint32_t flags; { if (len == 0) return; ExFreePool(vaddr); } static ndis_status NdisMSetAttributesEx(adapter_handle, adapter_ctx, hangsecs, flags, iftype) ndis_handle adapter_handle; ndis_handle adapter_ctx; uint32_t hangsecs; uint32_t flags; ndis_interface_type iftype; { ndis_miniport_block *block; /* * Save the adapter context, we need it for calling * the driver's internal functions. */ block = (ndis_miniport_block *)adapter_handle; block->nmb_miniportadapterctx = adapter_ctx; block->nmb_checkforhangsecs = hangsecs; block->nmb_flags = flags; return (NDIS_STATUS_SUCCESS); } static void NdisOpenConfiguration(status, cfg, wrapctx) ndis_status *status; ndis_handle *cfg; ndis_handle wrapctx; { *cfg = wrapctx; *status = NDIS_STATUS_SUCCESS; } static void NdisOpenConfigurationKeyByName(status, cfg, subkey, subhandle) ndis_status *status; ndis_handle cfg; unicode_string *subkey; ndis_handle *subhandle; { *subhandle = cfg; *status = NDIS_STATUS_SUCCESS; } static void NdisOpenConfigurationKeyByIndex(status, cfg, idx, subkey, subhandle) ndis_status *status; ndis_handle cfg; uint32_t idx; unicode_string *subkey; ndis_handle *subhandle; { *status = NDIS_STATUS_FAILURE; } static ndis_status ndis_encode_parm(block, oid, type, parm) ndis_miniport_block *block; struct sysctl_oid *oid; ndis_parm_type type; ndis_config_parm **parm; { ndis_config_parm *p; ndis_parmlist_entry *np; unicode_string *us; ansi_string as; int base = 0; uint32_t val; char tmp[32]; np = ExAllocatePoolWithTag(NonPagedPool, sizeof(ndis_parmlist_entry), 0); if (np == NULL) return (NDIS_STATUS_RESOURCES); InsertHeadList((&block->nmb_parmlist), (&np->np_list)); *parm = p = &np->np_parm; switch(type) { case ndis_parm_string: /* See if this might be a number. */ val = strtoul((char *)oid->oid_arg1, NULL, 10); us = &p->ncp_parmdata.ncp_stringdata; p->ncp_type = ndis_parm_string; if (val) { snprintf(tmp, 32, "%x", val); RtlInitAnsiString(&as, tmp); } else { RtlInitAnsiString(&as, (char *)oid->oid_arg1); } if (RtlAnsiStringToUnicodeString(us, &as, TRUE)) { ExFreePool(np); return (NDIS_STATUS_RESOURCES); } break; case ndis_parm_int: if (strncmp((char *)oid->oid_arg1, "0x", 2) == 0) base = 16; else base = 10; p->ncp_type = ndis_parm_int; p->ncp_parmdata.ncp_intdata = strtol((char *)oid->oid_arg1, NULL, base); break; case ndis_parm_hexint: #ifdef notdef if (strncmp((char *)oid->oid_arg1, "0x", 2) == 0) base = 16; else base = 10; #endif base = 16; p->ncp_type = ndis_parm_hexint; p->ncp_parmdata.ncp_intdata = strtoul((char *)oid->oid_arg1, NULL, base); break; default: return (NDIS_STATUS_FAILURE); break; } return (NDIS_STATUS_SUCCESS); } static void NdisReadConfiguration(status, parm, cfg, key, type) ndis_status *status; ndis_config_parm **parm; ndis_handle cfg; unicode_string *key; ndis_parm_type type; { char *keystr = NULL; ndis_miniport_block *block; struct ndis_softc *sc; struct sysctl_oid *oidp; struct sysctl_ctx_entry *e; ansi_string as; block = (ndis_miniport_block *)cfg; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (key->us_len == 0 || key->us_buf == NULL) { *status = NDIS_STATUS_FAILURE; return; } if (RtlUnicodeStringToAnsiString(&as, key, TRUE)) { *status = NDIS_STATUS_RESOURCES; return; } keystr = as.as_buf; /* * See if registry key is already in a list of known keys * included with the driver. */ TAILQ_FOREACH(e, device_get_sysctl_ctx(sc->ndis_dev), link) { oidp = e->entry; if (strcasecmp(oidp->oid_name, keystr) == 0) { if (strcmp((char *)oidp->oid_arg1, "UNSET") == 0) { RtlFreeAnsiString(&as); *status = NDIS_STATUS_FAILURE; return; } *status = ndis_encode_parm(block, oidp, type, parm); RtlFreeAnsiString(&as); return; } } /* * If the key didn't match, add it to the list of dynamically * created ones. Sometimes, drivers refer to registry keys * that aren't documented in their .INF files. These keys * are supposed to be created by some sort of utility or * control panel snap-in that comes with the driver software. * Sometimes it's useful to be able to manipulate these. * If the driver requests the key in the form of a string, * make its default value an empty string, otherwise default * it to "0". */ if (type == ndis_parm_int || type == ndis_parm_hexint) ndis_add_sysctl(sc, keystr, "(dynamic integer key)", "UNSET", CTLFLAG_RW); else ndis_add_sysctl(sc, keystr, "(dynamic string key)", "UNSET", CTLFLAG_RW); RtlFreeAnsiString(&as); *status = NDIS_STATUS_FAILURE; } static ndis_status ndis_decode_parm(block, parm, val) ndis_miniport_block *block; ndis_config_parm *parm; char *val; { unicode_string *ustr; ansi_string as; switch(parm->ncp_type) { case ndis_parm_string: ustr = &parm->ncp_parmdata.ncp_stringdata; if (RtlUnicodeStringToAnsiString(&as, ustr, TRUE)) return (NDIS_STATUS_RESOURCES); bcopy(as.as_buf, val, as.as_len); RtlFreeAnsiString(&as); break; case ndis_parm_int: sprintf(val, "%d", parm->ncp_parmdata.ncp_intdata); break; case ndis_parm_hexint: sprintf(val, "%xu", parm->ncp_parmdata.ncp_intdata); break; default: return (NDIS_STATUS_FAILURE); break; } return (NDIS_STATUS_SUCCESS); } static void NdisWriteConfiguration(status, cfg, key, parm) ndis_status *status; ndis_handle cfg; unicode_string *key; ndis_config_parm *parm; { ansi_string as; char *keystr = NULL; ndis_miniport_block *block; struct ndis_softc *sc; struct sysctl_oid *oidp; struct sysctl_ctx_entry *e; char val[256]; block = (ndis_miniport_block *)cfg; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (RtlUnicodeStringToAnsiString(&as, key, TRUE)) { *status = NDIS_STATUS_RESOURCES; return; } keystr = as.as_buf; /* Decode the parameter into a string. */ bzero(val, sizeof(val)); *status = ndis_decode_parm(block, parm, val); if (*status != NDIS_STATUS_SUCCESS) { RtlFreeAnsiString(&as); return; } /* See if the key already exists. */ TAILQ_FOREACH(e, device_get_sysctl_ctx(sc->ndis_dev), link) { oidp = e->entry; if (strcasecmp(oidp->oid_name, keystr) == 0) { /* Found it, set the value. */ strcpy((char *)oidp->oid_arg1, val); RtlFreeAnsiString(&as); return; } } /* Not found, add a new key with the specified value. */ ndis_add_sysctl(sc, keystr, "(dynamically set key)", val, CTLFLAG_RW); RtlFreeAnsiString(&as); *status = NDIS_STATUS_SUCCESS; } static void NdisCloseConfiguration(cfg) ndis_handle cfg; { list_entry *e; ndis_parmlist_entry *pe; ndis_miniport_block *block; ndis_config_parm *p; block = (ndis_miniport_block *)cfg; while (!IsListEmpty(&block->nmb_parmlist)) { e = RemoveHeadList(&block->nmb_parmlist); pe = CONTAINING_RECORD(e, ndis_parmlist_entry, np_list); p = &pe->np_parm; if (p->ncp_type == ndis_parm_string) RtlFreeUnicodeString(&p->ncp_parmdata.ncp_stringdata); ExFreePool(e); } } /* * Initialize a Windows spinlock. */ static void NdisAllocateSpinLock(lock) ndis_spin_lock *lock; { KeInitializeSpinLock(&lock->nsl_spinlock); lock->nsl_kirql = 0; } /* * Destroy a Windows spinlock. This is a no-op for now. There are two reasons * for this. One is that it's sort of superfluous: we don't have to do anything * special to deallocate the spinlock. The other is that there are some buggy * drivers which call NdisFreeSpinLock() _after_ calling NdisFreeMemory() on * the block of memory in which the spinlock resides. (Yes, ADMtek, I'm * talking to you.) */ static void NdisFreeSpinLock(lock) ndis_spin_lock *lock; { #ifdef notdef KeInitializeSpinLock(&lock->nsl_spinlock); lock->nsl_kirql = 0; #endif } /* * Acquire a spinlock from IRQL <= DISPATCH_LEVEL. */ static void NdisAcquireSpinLock(lock) ndis_spin_lock *lock; { KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql); } /* * Release a spinlock from IRQL == DISPATCH_LEVEL. */ static void NdisReleaseSpinLock(lock) ndis_spin_lock *lock; { KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql); } /* * Acquire a spinlock when already running at IRQL == DISPATCH_LEVEL. */ static void NdisDprAcquireSpinLock(lock) ndis_spin_lock *lock; { KeAcquireSpinLockAtDpcLevel(&lock->nsl_spinlock); } /* * Release a spinlock without leaving IRQL == DISPATCH_LEVEL. */ static void NdisDprReleaseSpinLock(lock) ndis_spin_lock *lock; { KeReleaseSpinLockFromDpcLevel(&lock->nsl_spinlock); } static void NdisInitializeReadWriteLock(lock) ndis_rw_lock *lock; { KeInitializeSpinLock(&lock->nrl_spinlock); bzero((char *)&lock->nrl_rsvd, sizeof(lock->nrl_rsvd)); } static void NdisAcquireReadWriteLock(ndis_rw_lock *lock, uint8_t writeacc, ndis_lock_state *state) { if (writeacc == TRUE) { KeAcquireSpinLock(&lock->nrl_spinlock, &state->nls_oldirql); lock->nrl_rsvd[0]++; } else lock->nrl_rsvd[1]++; } static void NdisReleaseReadWriteLock(lock, state) ndis_rw_lock *lock; ndis_lock_state *state; { if (lock->nrl_rsvd[0]) { lock->nrl_rsvd[0]--; KeReleaseSpinLock(&lock->nrl_spinlock, state->nls_oldirql); } else lock->nrl_rsvd[1]--; } static uint32_t NdisReadPciSlotInformation(adapter, slot, offset, buf, len) ndis_handle adapter; uint32_t slot; uint32_t offset; void *buf; uint32_t len; { ndis_miniport_block *block; int i; char *dest; device_t dev; block = (ndis_miniport_block *)adapter; dest = buf; if (block == NULL) return (0); dev = block->nmb_physdeviceobj->do_devext; /* * I have a test system consisting of a Sun w2100z * dual 2.4Ghz Opteron machine and an Atheros 802.11a/b/g * "Aries" miniPCI NIC. (The NIC is installed in the * machine using a miniPCI to PCI bus adapter card.) * When running in SMP mode, I found that * performing a large number of consecutive calls to * NdisReadPciSlotInformation() would result in a * sudden system reset (or in some cases a freeze). * My suspicion is that the multiple reads are somehow * triggering a fatal PCI bus error that leads to a * machine check. The 1us delay in the loop below * seems to prevent this problem. */ for (i = 0; i < len; i++) { DELAY(1); dest[i] = pci_read_config(dev, i + offset, 1); } return (len); } static uint32_t NdisWritePciSlotInformation(adapter, slot, offset, buf, len) ndis_handle adapter; uint32_t slot; uint32_t offset; void *buf; uint32_t len; { ndis_miniport_block *block; int i; char *dest; device_t dev; block = (ndis_miniport_block *)adapter; dest = buf; if (block == NULL) return (0); dev = block->nmb_physdeviceobj->do_devext; for (i = 0; i < len; i++) { DELAY(1); pci_write_config(dev, i + offset, dest[i], 1); } return (len); } /* * The errorlog routine uses a variable argument list, so we * have to declare it this way. */ #define ERRMSGLEN 512 static void NdisWriteErrorLogEntry(ndis_handle adapter, ndis_error_code code, uint32_t numerrors, ...) { ndis_miniport_block *block; va_list ap; int i, error; char *str = NULL; uint16_t flags; device_t dev; driver_object *drv; struct ndis_softc *sc; struct ifnet *ifp; unicode_string us; ansi_string as = { 0, 0, NULL }; block = (ndis_miniport_block *)adapter; dev = block->nmb_physdeviceobj->do_devext; drv = block->nmb_deviceobj->do_drvobj; sc = device_get_softc(dev); ifp = sc->ifp; if (ifp != NULL && ifp->if_flags & IFF_DEBUG) { error = pe_get_message((vm_offset_t)drv->dro_driverstart, code, &str, &i, &flags); if (error == 0) { if (flags & MESSAGE_RESOURCE_UNICODE) { RtlInitUnicodeString(&us, (uint16_t *)str); if (RtlUnicodeStringToAnsiString(&as, &us, TRUE) == STATUS_SUCCESS) str = as.as_buf; else str = NULL; } } } device_printf(dev, "NDIS ERROR: %x (%s)\n", code, str == NULL ? "unknown error" : str); if (ifp != NULL && ifp->if_flags & IFF_DEBUG) { device_printf(dev, "NDIS NUMERRORS: %x\n", numerrors); va_start(ap, numerrors); for (i = 0; i < numerrors; i++) device_printf(dev, "argptr: %p\n", va_arg(ap, void *)); va_end(ap); } if (as.as_len) RtlFreeAnsiString(&as); } static void ndis_map_cb(arg, segs, nseg, error) void *arg; bus_dma_segment_t *segs; int nseg; int error; { struct ndis_map_arg *ctx; int i; if (error) return; ctx = arg; for (i = 0; i < nseg; i++) { ctx->nma_fraglist[i].npu_physaddr.np_quad = segs[i].ds_addr; ctx->nma_fraglist[i].npu_len = segs[i].ds_len; } ctx->nma_cnt = nseg; } static void NdisMStartBufferPhysicalMapping(ndis_handle adapter, ndis_buffer *buf, uint32_t mapreg, uint8_t writedev, ndis_paddr_unit *addrarray, uint32_t *arraysize) { ndis_miniport_block *block; struct ndis_softc *sc; struct ndis_map_arg nma; bus_dmamap_t map; int error; if (adapter == NULL) return; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (mapreg > sc->ndis_mmapcnt) return; map = sc->ndis_mmaps[mapreg]; nma.nma_fraglist = addrarray; error = bus_dmamap_load(sc->ndis_mtag, map, MmGetMdlVirtualAddress(buf), MmGetMdlByteCount(buf), ndis_map_cb, (void *)&nma, BUS_DMA_NOWAIT); if (error) return; bus_dmamap_sync(sc->ndis_mtag, map, writedev ? BUS_DMASYNC_PREWRITE : BUS_DMASYNC_PREREAD); *arraysize = nma.nma_cnt; } static void NdisMCompleteBufferPhysicalMapping(adapter, buf, mapreg) ndis_handle adapter; ndis_buffer *buf; uint32_t mapreg; { ndis_miniport_block *block; struct ndis_softc *sc; bus_dmamap_t map; if (adapter == NULL) return; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (mapreg > sc->ndis_mmapcnt) return; map = sc->ndis_mmaps[mapreg]; bus_dmamap_sync(sc->ndis_mtag, map, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->ndis_mtag, map); } /* * This is an older (?) timer init routine which doesn't * accept a miniport context handle. Serialized miniports should * never call this function. */ static void NdisInitializeTimer(timer, func, ctx) ndis_timer *timer; ndis_timer_function func; void *ctx; { KeInitializeTimer(&timer->nt_ktimer); KeInitializeDpc(&timer->nt_kdpc, func, ctx); KeSetImportanceDpc(&timer->nt_kdpc, KDPC_IMPORTANCE_LOW); } static void ndis_timercall(dpc, timer, sysarg1, sysarg2) kdpc *dpc; ndis_miniport_timer *timer; void *sysarg1; void *sysarg2; { /* * Since we're called as a DPC, we should be running * at DISPATCH_LEVEL here. This means to acquire the * spinlock, we can use KeAcquireSpinLockAtDpcLevel() * rather than KeAcquireSpinLock(). */ if (NDIS_SERIALIZED(timer->nmt_block)) KeAcquireSpinLockAtDpcLevel(&timer->nmt_block->nmb_lock); MSCALL4(timer->nmt_timerfunc, dpc, timer->nmt_timerctx, sysarg1, sysarg2); if (NDIS_SERIALIZED(timer->nmt_block)) KeReleaseSpinLockFromDpcLevel(&timer->nmt_block->nmb_lock); } /* * For a long time I wondered why there were two NDIS timer initialization * routines, and why this one needed an NDIS_MINIPORT_TIMER and the * MiniportAdapterHandle. The NDIS_MINIPORT_TIMER has its own callout * function and context pointers separate from those in the DPC, which * allows for another level of indirection: when the timer fires, we * can have our own timer function invoked, and from there we can call * the driver's function. But why go to all that trouble? Then it hit * me: for serialized miniports, the timer callouts are not re-entrant. * By trapping the callouts and having access to the MiniportAdapterHandle, * we can protect the driver callouts by acquiring the NDIS serialization * lock. This is essential for allowing serialized miniports to work * correctly on SMP systems. On UP hosts, setting IRQL to DISPATCH_LEVEL * is enough to prevent other threads from pre-empting you, but with * SMP, you must acquire a lock as well, otherwise the other CPU is * free to clobber you. */ static void NdisMInitializeTimer(timer, handle, func, ctx) ndis_miniport_timer *timer; ndis_handle handle; ndis_timer_function func; void *ctx; { ndis_miniport_block *block; struct ndis_softc *sc; block = (ndis_miniport_block *)handle; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); /* Save the driver's funcptr and context */ timer->nmt_timerfunc = func; timer->nmt_timerctx = ctx; timer->nmt_block = handle; /* * Set up the timer so it will call our intermediate DPC. * Be sure to use the wrapped entry point, since * ntoskrnl_run_dpc() expects to invoke a function with * Microsoft calling conventions. */ KeInitializeTimer(&timer->nmt_ktimer); KeInitializeDpc(&timer->nmt_kdpc, ndis_findwrap((funcptr)ndis_timercall), timer); timer->nmt_ktimer.k_dpc = &timer->nmt_kdpc; } /* * In Windows, there's both an NdisMSetTimer() and an NdisSetTimer(), * but the former is just a macro wrapper around the latter. */ static void NdisSetTimer(timer, msecs) ndis_timer *timer; uint32_t msecs; { /* * KeSetTimer() wants the period in * hundred nanosecond intervals. */ KeSetTimer(&timer->nt_ktimer, ((int64_t)msecs * -10000), &timer->nt_kdpc); } static void NdisMSetPeriodicTimer(timer, msecs) ndis_miniport_timer *timer; uint32_t msecs; { KeSetTimerEx(&timer->nmt_ktimer, ((int64_t)msecs * -10000), msecs, &timer->nmt_kdpc); } /* * Technically, this is really NdisCancelTimer(), but we also * (ab)use it for NdisMCancelTimer(), since in our implementation * we don't need the extra info in the ndis_miniport_timer * structure just to cancel a timer. */ static void NdisMCancelTimer(timer, cancelled) ndis_timer *timer; uint8_t *cancelled; { *cancelled = KeCancelTimer(&timer->nt_ktimer); } static void NdisMQueryAdapterResources(status, adapter, list, buflen) ndis_status *status; ndis_handle adapter; ndis_resource_list *list; uint32_t *buflen; { ndis_miniport_block *block; struct ndis_softc *sc; int rsclen; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); rsclen = sizeof(ndis_resource_list) + (sizeof(cm_partial_resource_desc) * (sc->ndis_rescnt - 1)); if (*buflen < rsclen) { *buflen = rsclen; *status = NDIS_STATUS_INVALID_LENGTH; return; } bcopy((char *)block->nmb_rlist, (char *)list, rsclen); *status = NDIS_STATUS_SUCCESS; } static ndis_status NdisMRegisterIoPortRange(offset, adapter, port, numports) void **offset; ndis_handle adapter; uint32_t port; uint32_t numports; { struct ndis_miniport_block *block; struct ndis_softc *sc; if (adapter == NULL) return (NDIS_STATUS_FAILURE); block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (sc->ndis_res_io == NULL) return (NDIS_STATUS_FAILURE); /* Don't let the device map more ports than we have. */ if (rman_get_size(sc->ndis_res_io) < numports) return (NDIS_STATUS_INVALID_LENGTH); *offset = (void *)rman_get_start(sc->ndis_res_io); return (NDIS_STATUS_SUCCESS); } static void NdisMDeregisterIoPortRange(adapter, port, numports, offset) ndis_handle adapter; uint32_t port; uint32_t numports; void *offset; { } static void NdisReadNetworkAddress(status, addr, addrlen, adapter) ndis_status *status; void **addr; uint32_t *addrlen; ndis_handle adapter; { struct ndis_softc *sc; ndis_miniport_block *block; uint8_t empty[] = { 0, 0, 0, 0, 0, 0 }; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (sc->ifp == NULL) { *status = NDIS_STATUS_FAILURE; return; } if (sc->ifp->if_addr == NULL || bcmp(IF_LLADDR(sc->ifp), empty, ETHER_ADDR_LEN) == 0) *status = NDIS_STATUS_FAILURE; else { *addr = IF_LLADDR(sc->ifp); *addrlen = ETHER_ADDR_LEN; *status = NDIS_STATUS_SUCCESS; } } static ndis_status NdisQueryMapRegisterCount(bustype, cnt) uint32_t bustype; uint32_t *cnt; { *cnt = 8192; return (NDIS_STATUS_SUCCESS); } static ndis_status NdisMAllocateMapRegisters(ndis_handle adapter, uint32_t dmachannel, uint8_t dmasize, uint32_t physmapneeded, uint32_t maxmap) { struct ndis_softc *sc; ndis_miniport_block *block; int error, i, nseg = NDIS_MAXSEG; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); sc->ndis_mmaps = malloc(sizeof(bus_dmamap_t) * physmapneeded, M_DEVBUF, M_NOWAIT|M_ZERO); if (sc->ndis_mmaps == NULL) return (NDIS_STATUS_RESOURCES); error = bus_dma_tag_create(sc->ndis_parent_tag, ETHER_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, maxmap * nseg, nseg, maxmap, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->ndis_mtag); if (error) { free(sc->ndis_mmaps, M_DEVBUF); return (NDIS_STATUS_RESOURCES); } for (i = 0; i < physmapneeded; i++) bus_dmamap_create(sc->ndis_mtag, 0, &sc->ndis_mmaps[i]); sc->ndis_mmapcnt = physmapneeded; return (NDIS_STATUS_SUCCESS); } static void NdisMFreeMapRegisters(adapter) ndis_handle adapter; { struct ndis_softc *sc; ndis_miniport_block *block; int i; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); for (i = 0; i < sc->ndis_mmapcnt; i++) bus_dmamap_destroy(sc->ndis_mtag, sc->ndis_mmaps[i]); free(sc->ndis_mmaps, M_DEVBUF); bus_dma_tag_destroy(sc->ndis_mtag); } static void ndis_mapshared_cb(arg, segs, nseg, error) void *arg; bus_dma_segment_t *segs; int nseg; int error; { ndis_physaddr *p; if (error || nseg > 1) return; p = arg; p->np_quad = segs[0].ds_addr; } /* * This maps to bus_dmamem_alloc(). */ static void NdisMAllocateSharedMemory(ndis_handle adapter, uint32_t len, uint8_t cached, void **vaddr, ndis_physaddr *paddr) { ndis_miniport_block *block; struct ndis_softc *sc; struct ndis_shmem *sh; int error; if (adapter == NULL) return; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); sh = malloc(sizeof(struct ndis_shmem), M_DEVBUF, M_NOWAIT|M_ZERO); if (sh == NULL) return; InitializeListHead(&sh->ndis_list); /* * When performing shared memory allocations, create a tag * with a lowaddr limit that restricts physical memory mappings * so that they all fall within the first 1GB of memory. * At least one device/driver combination (Linksys Instant * Wireless PCI Card V2.7, Broadcom 802.11b) seems to have * problems with performing DMA operations with physical * addresses that lie above the 1GB mark. I don't know if this * is a hardware limitation or if the addresses are being * truncated within the driver, but this seems to be the only * way to make these cards work reliably in systems with more * than 1GB of physical memory. */ error = bus_dma_tag_create(sc->ndis_parent_tag, 64, 0, NDIS_BUS_SPACE_SHARED_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, BUS_DMA_ALLOCNOW, NULL, NULL, &sh->ndis_stag); if (error) { free(sh, M_DEVBUF); return; } error = bus_dmamem_alloc(sh->ndis_stag, vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sh->ndis_smap); if (error) { bus_dma_tag_destroy(sh->ndis_stag); free(sh, M_DEVBUF); return; } error = bus_dmamap_load(sh->ndis_stag, sh->ndis_smap, *vaddr, len, ndis_mapshared_cb, (void *)paddr, BUS_DMA_NOWAIT); if (error) { bus_dmamem_free(sh->ndis_stag, *vaddr, sh->ndis_smap); bus_dma_tag_destroy(sh->ndis_stag); free(sh, M_DEVBUF); return; } /* * Save the physical address along with the source address. * The AirGo MIMO driver will call NdisMFreeSharedMemory() * with a bogus virtual address sometimes, but with a valid * physical address. To keep this from causing trouble, we * use the physical address to as a sanity check in case * searching based on the virtual address fails. */ NDIS_LOCK(sc); sh->ndis_paddr.np_quad = paddr->np_quad; sh->ndis_saddr = *vaddr; InsertHeadList((&sc->ndis_shlist), (&sh->ndis_list)); NDIS_UNLOCK(sc); } struct ndis_allocwork { uint32_t na_len; uint8_t na_cached; void *na_ctx; io_workitem *na_iw; }; static void ndis_asyncmem_complete(dobj, arg) device_object *dobj; void *arg; { ndis_miniport_block *block; struct ndis_softc *sc; struct ndis_allocwork *w; void *vaddr; ndis_physaddr paddr; ndis_allocdone_handler donefunc; w = arg; block = (ndis_miniport_block *)dobj->do_devext; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); vaddr = NULL; paddr.np_quad = 0; donefunc = sc->ndis_chars->nmc_allocate_complete_func; NdisMAllocateSharedMemory(block, w->na_len, w->na_cached, &vaddr, &paddr); MSCALL5(donefunc, block, vaddr, &paddr, w->na_len, w->na_ctx); IoFreeWorkItem(w->na_iw); free(w, M_DEVBUF); } static ndis_status NdisMAllocateSharedMemoryAsync(ndis_handle adapter, uint32_t len, uint8_t cached, void *ctx) { ndis_miniport_block *block; struct ndis_allocwork *w; io_workitem *iw; io_workitem_func ifw; if (adapter == NULL) return (NDIS_STATUS_FAILURE); block = adapter; iw = IoAllocateWorkItem(block->nmb_deviceobj); if (iw == NULL) return (NDIS_STATUS_FAILURE); w = malloc(sizeof(struct ndis_allocwork), M_TEMP, M_NOWAIT); if (w == NULL) return (NDIS_STATUS_FAILURE); w->na_cached = cached; w->na_len = len; w->na_ctx = ctx; w->na_iw = iw; ifw = (io_workitem_func)ndis_findwrap((funcptr)ndis_asyncmem_complete); IoQueueWorkItem(iw, ifw, WORKQUEUE_DELAYED, w); return (NDIS_STATUS_PENDING); } static void NdisMFreeSharedMemory(ndis_handle adapter, uint32_t len, uint8_t cached, void *vaddr, ndis_physaddr paddr) { ndis_miniport_block *block; struct ndis_softc *sc; struct ndis_shmem *sh = NULL; list_entry *l; if (vaddr == NULL || adapter == NULL) return; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); /* Sanity check: is list empty? */ if (IsListEmpty(&sc->ndis_shlist)) return; NDIS_LOCK(sc); l = sc->ndis_shlist.nle_flink; while (l != &sc->ndis_shlist) { sh = CONTAINING_RECORD(l, struct ndis_shmem, ndis_list); if (sh->ndis_saddr == vaddr) break; /* * Check the physaddr too, just in case the driver lied * about the virtual address. */ if (sh->ndis_paddr.np_quad == paddr.np_quad) break; l = l->nle_flink; } if (sh == NULL) { NDIS_UNLOCK(sc); printf("NDIS: buggy driver tried to free " "invalid shared memory: vaddr: %p paddr: 0x%jx\n", vaddr, (uintmax_t)paddr.np_quad); return; } RemoveEntryList(&sh->ndis_list); NDIS_UNLOCK(sc); bus_dmamap_unload(sh->ndis_stag, sh->ndis_smap); bus_dmamem_free(sh->ndis_stag, sh->ndis_saddr, sh->ndis_smap); bus_dma_tag_destroy(sh->ndis_stag); free(sh, M_DEVBUF); } static ndis_status NdisMMapIoSpace(vaddr, adapter, paddr, len) void **vaddr; ndis_handle adapter; ndis_physaddr paddr; uint32_t len; { if (adapter == NULL) return (NDIS_STATUS_FAILURE); *vaddr = MmMapIoSpace(paddr.np_quad, len, 0); if (*vaddr == NULL) return (NDIS_STATUS_FAILURE); return (NDIS_STATUS_SUCCESS); } static void NdisMUnmapIoSpace(adapter, vaddr, len) ndis_handle adapter; void *vaddr; uint32_t len; { MmUnmapIoSpace(vaddr, len); } static uint32_t NdisGetCacheFillSize(void) { return (128); } static uint32_t NdisMGetDmaAlignment(handle) ndis_handle handle; { return (16); } /* * NDIS has two methods for dealing with NICs that support DMA. * One is to just pass packets to the driver and let it call * NdisMStartBufferPhysicalMapping() to map each buffer in the packet * all by itself, and the other is to let the NDIS library handle the * buffer mapping internally, and hand the driver an already populated * scatter/gather fragment list. If the driver calls * NdisMInitializeScatterGatherDma(), it wants to use the latter * method. */ static ndis_status NdisMInitializeScatterGatherDma(ndis_handle adapter, uint8_t is64, uint32_t maxphysmap) { struct ndis_softc *sc; ndis_miniport_block *block; int error; if (adapter == NULL) return (NDIS_STATUS_FAILURE); block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); /* Don't do this twice. */ if (sc->ndis_sc == 1) return (NDIS_STATUS_SUCCESS); error = bus_dma_tag_create(sc->ndis_parent_tag, ETHER_ALIGN, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * NDIS_MAXSEG, NDIS_MAXSEG, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->ndis_ttag); sc->ndis_sc = 1; return (NDIS_STATUS_SUCCESS); } void NdisAllocatePacketPool(status, pool, descnum, protrsvdlen) ndis_status *status; ndis_handle *pool; uint32_t descnum; uint32_t protrsvdlen; { ndis_packet_pool *p; ndis_packet *packets; int i; p = ExAllocatePoolWithTag(NonPagedPool, sizeof(ndis_packet_pool), 0); if (p == NULL) { *status = NDIS_STATUS_RESOURCES; return; } p->np_cnt = descnum + NDIS_POOL_EXTRA; p->np_protrsvd = protrsvdlen; p->np_len = sizeof(ndis_packet) + protrsvdlen; packets = ExAllocatePoolWithTag(NonPagedPool, p->np_cnt * p->np_len, 0); if (packets == NULL) { ExFreePool(p); *status = NDIS_STATUS_RESOURCES; return; } p->np_pktmem = packets; for (i = 0; i < p->np_cnt; i++) InterlockedPushEntrySList(&p->np_head, (struct slist_entry *)&packets[i]); #ifdef NDIS_DEBUG_PACKETS p->np_dead = 0; KeInitializeSpinLock(&p->np_lock); KeInitializeEvent(&p->np_event, EVENT_TYPE_NOTIFY, TRUE); #endif *pool = p; *status = NDIS_STATUS_SUCCESS; } void NdisAllocatePacketPoolEx(status, pool, descnum, oflowdescnum, protrsvdlen) ndis_status *status; ndis_handle *pool; uint32_t descnum; uint32_t oflowdescnum; uint32_t protrsvdlen; { return (NdisAllocatePacketPool(status, pool, descnum + oflowdescnum, protrsvdlen)); } uint32_t NdisPacketPoolUsage(pool) ndis_handle pool; { ndis_packet_pool *p; p = (ndis_packet_pool *)pool; return (p->np_cnt - ExQueryDepthSList(&p->np_head)); } void NdisFreePacketPool(pool) ndis_handle pool; { ndis_packet_pool *p; int usage; #ifdef NDIS_DEBUG_PACKETS uint8_t irql; #endif p = (ndis_packet_pool *)pool; #ifdef NDIS_DEBUG_PACKETS KeAcquireSpinLock(&p->np_lock, &irql); #endif usage = NdisPacketPoolUsage(pool); #ifdef NDIS_DEBUG_PACKETS if (usage) { p->np_dead = 1; KeResetEvent(&p->np_event); KeReleaseSpinLock(&p->np_lock, irql); KeWaitForSingleObject(&p->np_event, 0, 0, FALSE, NULL); } else KeReleaseSpinLock(&p->np_lock, irql); #endif ExFreePool(p->np_pktmem); ExFreePool(p); } void NdisAllocatePacket(status, packet, pool) ndis_status *status; ndis_packet **packet; ndis_handle pool; { ndis_packet_pool *p; ndis_packet *pkt; #ifdef NDIS_DEBUG_PACKETS uint8_t irql; #endif p = (ndis_packet_pool *)pool; #ifdef NDIS_DEBUG_PACKETS KeAcquireSpinLock(&p->np_lock, &irql); if (p->np_dead) { KeReleaseSpinLock(&p->np_lock, irql); printf("NDIS: tried to allocate packet from dead pool %p\n", pool); *status = NDIS_STATUS_RESOURCES; return; } #endif pkt = (ndis_packet *)InterlockedPopEntrySList(&p->np_head); #ifdef NDIS_DEBUG_PACKETS KeReleaseSpinLock(&p->np_lock, irql); #endif if (pkt == NULL) { *status = NDIS_STATUS_RESOURCES; return; } bzero((char *)pkt, sizeof(ndis_packet)); /* Save pointer to the pool. */ pkt->np_private.npp_pool = pool; /* Set the oob offset pointer. Lots of things expect this. */ pkt->np_private.npp_packetooboffset = offsetof(ndis_packet, np_oob); /* * We must initialize the packet flags correctly in order * for the NDIS_SET_PACKET_MEDIA_SPECIFIC_INFO() and * NDIS_GET_PACKET_MEDIA_SPECIFIC_INFO() macros to work * correctly. */ pkt->np_private.npp_ndispktflags = NDIS_PACKET_ALLOCATED_BY_NDIS; pkt->np_private.npp_validcounts = FALSE; *packet = pkt; *status = NDIS_STATUS_SUCCESS; } void NdisFreePacket(packet) ndis_packet *packet; { ndis_packet_pool *p; #ifdef NDIS_DEBUG_PACKETS uint8_t irql; #endif p = (ndis_packet_pool *)packet->np_private.npp_pool; #ifdef NDIS_DEBUG_PACKETS KeAcquireSpinLock(&p->np_lock, &irql); #endif InterlockedPushEntrySList(&p->np_head, (slist_entry *)packet); #ifdef NDIS_DEBUG_PACKETS if (p->np_dead) { if (ExQueryDepthSList(&p->np_head) == p->np_cnt) KeSetEvent(&p->np_event, IO_NO_INCREMENT, FALSE); } KeReleaseSpinLock(&p->np_lock, irql); #endif } static void NdisUnchainBufferAtFront(packet, buf) ndis_packet *packet; ndis_buffer **buf; { ndis_packet_private *priv; if (packet == NULL || buf == NULL) return; priv = &packet->np_private; priv->npp_validcounts = FALSE; if (priv->npp_head == priv->npp_tail) { *buf = priv->npp_head; priv->npp_head = priv->npp_tail = NULL; } else { *buf = priv->npp_head; priv->npp_head = (*buf)->mdl_next; } } static void NdisUnchainBufferAtBack(packet, buf) ndis_packet *packet; ndis_buffer **buf; { ndis_packet_private *priv; ndis_buffer *tmp; if (packet == NULL || buf == NULL) return; priv = &packet->np_private; priv->npp_validcounts = FALSE; if (priv->npp_head == priv->npp_tail) { *buf = priv->npp_head; priv->npp_head = priv->npp_tail = NULL; } else { *buf = priv->npp_tail; tmp = priv->npp_head; while (tmp->mdl_next != priv->npp_tail) tmp = tmp->mdl_next; priv->npp_tail = tmp; tmp->mdl_next = NULL; } } /* * The NDIS "buffer" is really an MDL (memory descriptor list) * which is used to describe a buffer in a way that allows it * to mapped into different contexts. We have to be careful how * we handle them: in some versions of Windows, the NdisFreeBuffer() * routine is an actual function in the NDIS API, but in others * it's just a macro wrapper around IoFreeMdl(). There's really * no way to use the 'descnum' parameter to count how many * "buffers" are allocated since in order to use IoFreeMdl() to * dispose of a buffer, we have to use IoAllocateMdl() to allocate * them, and IoAllocateMdl() just grabs them out of the heap. */ static void NdisAllocateBufferPool(status, pool, descnum) ndis_status *status; ndis_handle *pool; uint32_t descnum; { /* * The only thing we can really do here is verify that descnum * is a reasonable value, but I really don't know what to check * it against. */ *pool = NonPagedPool; *status = NDIS_STATUS_SUCCESS; } static void NdisFreeBufferPool(pool) ndis_handle pool; { } static void NdisAllocateBuffer(status, buffer, pool, vaddr, len) ndis_status *status; ndis_buffer **buffer; ndis_handle pool; void *vaddr; uint32_t len; { ndis_buffer *buf; buf = IoAllocateMdl(vaddr, len, FALSE, FALSE, NULL); if (buf == NULL) { *status = NDIS_STATUS_RESOURCES; return; } MmBuildMdlForNonPagedPool(buf); *buffer = buf; *status = NDIS_STATUS_SUCCESS; } static void NdisFreeBuffer(buf) ndis_buffer *buf; { IoFreeMdl(buf); } /* Aw c'mon. */ static uint32_t NdisBufferLength(buf) ndis_buffer *buf; { return (MmGetMdlByteCount(buf)); } /* * Get the virtual address and length of a buffer. * Note: the vaddr argument is optional. */ static void NdisQueryBuffer(buf, vaddr, len) ndis_buffer *buf; void **vaddr; uint32_t *len; { if (vaddr != NULL) *vaddr = MmGetMdlVirtualAddress(buf); *len = MmGetMdlByteCount(buf); } /* Same as above -- we don't care about the priority. */ static void NdisQueryBufferSafe(buf, vaddr, len, prio) ndis_buffer *buf; void **vaddr; uint32_t *len; uint32_t prio; { if (vaddr != NULL) *vaddr = MmGetMdlVirtualAddress(buf); *len = MmGetMdlByteCount(buf); } /* Damnit Microsoft!! How many ways can you do the same thing?! */ static void * NdisBufferVirtualAddress(buf) ndis_buffer *buf; { return (MmGetMdlVirtualAddress(buf)); } static void * NdisBufferVirtualAddressSafe(buf, prio) ndis_buffer *buf; uint32_t prio; { return (MmGetMdlVirtualAddress(buf)); } static void NdisAdjustBufferLength(buf, len) ndis_buffer *buf; int len; { MmGetMdlByteCount(buf) = len; } static uint32_t NdisInterlockedIncrement(addend) uint32_t *addend; { atomic_add_long((u_long *)addend, 1); return (*addend); } static uint32_t NdisInterlockedDecrement(addend) uint32_t *addend; { atomic_subtract_long((u_long *)addend, 1); return (*addend); } +static uint32_t +NdisGetVersion(void) +{ + return (0x00050001); +} + static void NdisInitializeEvent(event) ndis_event *event; { /* * NDIS events are always notification * events, and should be initialized to the * not signaled state. */ KeInitializeEvent(&event->ne_event, EVENT_TYPE_NOTIFY, FALSE); } static void NdisSetEvent(event) ndis_event *event; { KeSetEvent(&event->ne_event, IO_NO_INCREMENT, FALSE); } static void NdisResetEvent(event) ndis_event *event; { KeResetEvent(&event->ne_event); } static uint8_t NdisWaitEvent(event, msecs) ndis_event *event; uint32_t msecs; { int64_t duetime; uint32_t rval; duetime = ((int64_t)msecs * -10000); rval = KeWaitForSingleObject(event, 0, 0, TRUE, msecs ? & duetime : NULL); if (rval == STATUS_TIMEOUT) return (FALSE); return (TRUE); } static ndis_status NdisUnicodeStringToAnsiString(dstr, sstr) ansi_string *dstr; unicode_string *sstr; { uint32_t rval; rval = RtlUnicodeStringToAnsiString(dstr, sstr, FALSE); if (rval == STATUS_INSUFFICIENT_RESOURCES) return (NDIS_STATUS_RESOURCES); if (rval) return (NDIS_STATUS_FAILURE); return (NDIS_STATUS_SUCCESS); } static ndis_status NdisAnsiStringToUnicodeString(dstr, sstr) unicode_string *dstr; ansi_string *sstr; { uint32_t rval; rval = RtlAnsiStringToUnicodeString(dstr, sstr, FALSE); if (rval == STATUS_INSUFFICIENT_RESOURCES) return (NDIS_STATUS_RESOURCES); if (rval) return (NDIS_STATUS_FAILURE); return (NDIS_STATUS_SUCCESS); } static ndis_status NdisMPciAssignResources(adapter, slot, list) ndis_handle adapter; uint32_t slot; ndis_resource_list **list; { ndis_miniport_block *block; if (adapter == NULL || list == NULL) return (NDIS_STATUS_FAILURE); block = (ndis_miniport_block *)adapter; *list = block->nmb_rlist; return (NDIS_STATUS_SUCCESS); } static uint8_t ndis_intr(iobj, arg) kinterrupt *iobj; void *arg; { struct ndis_softc *sc; uint8_t is_our_intr = FALSE; int call_isr = 0; ndis_miniport_interrupt *intr; sc = arg; intr = sc->ndis_block->nmb_interrupt; if (intr == NULL || sc->ndis_block->nmb_miniportadapterctx == NULL) return (FALSE); if (sc->ndis_block->nmb_interrupt->ni_isrreq == TRUE) MSCALL3(intr->ni_isrfunc, &is_our_intr, &call_isr, sc->ndis_block->nmb_miniportadapterctx); else { MSCALL1(sc->ndis_chars->nmc_disable_interrupts_func, sc->ndis_block->nmb_miniportadapterctx); call_isr = 1; } if (call_isr) IoRequestDpc(sc->ndis_block->nmb_deviceobj, NULL, sc); return (is_our_intr); } static void ndis_intrhand(dpc, intr, sysarg1, sysarg2) kdpc *dpc; ndis_miniport_interrupt *intr; void *sysarg1; void *sysarg2; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_handle adapter; block = intr->ni_block; adapter = block->nmb_miniportadapterctx; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); if (NDIS_SERIALIZED(sc->ndis_block)) KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); MSCALL1(intr->ni_dpcfunc, adapter); /* If there's a MiniportEnableInterrupt() routine, call it. */ if (sc->ndis_chars->nmc_enable_interrupts_func != NULL) MSCALL1(sc->ndis_chars->nmc_enable_interrupts_func, adapter); if (NDIS_SERIALIZED(sc->ndis_block)) KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); /* * Set the completion event if we've drained all * pending interrupts. */ KeAcquireSpinLockAtDpcLevel(&intr->ni_dpccountlock); intr->ni_dpccnt--; if (intr->ni_dpccnt == 0) KeSetEvent(&intr->ni_dpcevt, IO_NO_INCREMENT, FALSE); KeReleaseSpinLockFromDpcLevel(&intr->ni_dpccountlock); } static ndis_status NdisMRegisterInterrupt(ndis_miniport_interrupt *intr, ndis_handle adapter, uint32_t ivec, uint32_t ilevel, uint8_t reqisr, uint8_t shared, ndis_interrupt_mode imode) { ndis_miniport_block *block; ndis_miniport_characteristics *ch; struct ndis_softc *sc; int error; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ch = IoGetDriverObjectExtension(block->nmb_deviceobj->do_drvobj, (void *)1); intr->ni_rsvd = ExAllocatePoolWithTag(NonPagedPool, sizeof(struct mtx), 0); if (intr->ni_rsvd == NULL) return (NDIS_STATUS_RESOURCES); intr->ni_block = adapter; intr->ni_isrreq = reqisr; intr->ni_shared = shared; intr->ni_dpccnt = 0; intr->ni_isrfunc = ch->nmc_isr_func; intr->ni_dpcfunc = ch->nmc_interrupt_func; KeInitializeEvent(&intr->ni_dpcevt, EVENT_TYPE_NOTIFY, TRUE); KeInitializeDpc(&intr->ni_dpc, ndis_findwrap((funcptr)ndis_intrhand), intr); KeSetImportanceDpc(&intr->ni_dpc, KDPC_IMPORTANCE_LOW); error = IoConnectInterrupt(&intr->ni_introbj, ndis_findwrap((funcptr)ndis_intr), sc, NULL, ivec, ilevel, 0, imode, shared, 0, FALSE); if (error != STATUS_SUCCESS) return (NDIS_STATUS_FAILURE); block->nmb_interrupt = intr; return (NDIS_STATUS_SUCCESS); } static void NdisMDeregisterInterrupt(intr) ndis_miniport_interrupt *intr; { ndis_miniport_block *block; uint8_t irql; block = intr->ni_block; /* Should really be KeSynchronizeExecution() */ KeAcquireSpinLock(intr->ni_introbj->ki_lock, &irql); block->nmb_interrupt = NULL; KeReleaseSpinLock(intr->ni_introbj->ki_lock, irql); /* KeFlushQueuedDpcs(); */ /* Disconnect our ISR */ IoDisconnectInterrupt(intr->ni_introbj); KeWaitForSingleObject(&intr->ni_dpcevt, 0, 0, FALSE, NULL); KeResetEvent(&intr->ni_dpcevt); } static void NdisMRegisterAdapterShutdownHandler(adapter, shutdownctx, shutdownfunc) ndis_handle adapter; void *shutdownctx; ndis_shutdown_handler shutdownfunc; { ndis_miniport_block *block; ndis_miniport_characteristics *chars; struct ndis_softc *sc; if (adapter == NULL) return; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); chars = sc->ndis_chars; chars->nmc_shutdown_handler = shutdownfunc; chars->nmc_rsvd0 = shutdownctx; } static void NdisMDeregisterAdapterShutdownHandler(adapter) ndis_handle adapter; { ndis_miniport_block *block; ndis_miniport_characteristics *chars; struct ndis_softc *sc; if (adapter == NULL) return; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); chars = sc->ndis_chars; chars->nmc_shutdown_handler = NULL; chars->nmc_rsvd0 = NULL; } static uint32_t NDIS_BUFFER_TO_SPAN_PAGES(buf) ndis_buffer *buf; { if (buf == NULL) return (0); if (MmGetMdlByteCount(buf) == 0) return (1); return (SPAN_PAGES(MmGetMdlVirtualAddress(buf), MmGetMdlByteCount(buf))); } static void NdisGetBufferPhysicalArraySize(buf, pages) ndis_buffer *buf; uint32_t *pages; { if (buf == NULL) return; *pages = NDIS_BUFFER_TO_SPAN_PAGES(buf); } static void NdisQueryBufferOffset(buf, off, len) ndis_buffer *buf; uint32_t *off; uint32_t *len; { if (buf == NULL) return; *off = MmGetMdlByteOffset(buf); *len = MmGetMdlByteCount(buf); } void NdisMSleep(usecs) uint32_t usecs; { ktimer timer; /* * During system bootstrap, (i.e. cold == 1), we aren't * allowed to sleep, so we have to do a hard DELAY() * instead. */ if (cold) DELAY(usecs); else { KeInitializeTimer(&timer); KeSetTimer(&timer, ((int64_t)usecs * -10), NULL); KeWaitForSingleObject(&timer, 0, 0, FALSE, NULL); } } static uint32_t NdisReadPcmciaAttributeMemory(handle, offset, buf, len) ndis_handle handle; uint32_t offset; void *buf; uint32_t len; { struct ndis_softc *sc; ndis_miniport_block *block; bus_space_handle_t bh; bus_space_tag_t bt; char *dest; int i; if (handle == NULL) return (0); block = (ndis_miniport_block *)handle; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); dest = buf; bh = rman_get_bushandle(sc->ndis_res_am); bt = rman_get_bustag(sc->ndis_res_am); for (i = 0; i < len; i++) dest[i] = bus_space_read_1(bt, bh, (offset + i) * 2); return (i); } static uint32_t NdisWritePcmciaAttributeMemory(handle, offset, buf, len) ndis_handle handle; uint32_t offset; void *buf; uint32_t len; { struct ndis_softc *sc; ndis_miniport_block *block; bus_space_handle_t bh; bus_space_tag_t bt; char *src; int i; if (handle == NULL) return (0); block = (ndis_miniport_block *)handle; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); src = buf; bh = rman_get_bushandle(sc->ndis_res_am); bt = rman_get_bustag(sc->ndis_res_am); for (i = 0; i < len; i++) bus_space_write_1(bt, bh, (offset + i) * 2, src[i]); return (i); } static list_entry * NdisInterlockedInsertHeadList(head, entry, lock) list_entry *head; list_entry *entry; ndis_spin_lock *lock; { list_entry *flink; KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql); flink = head->nle_flink; entry->nle_flink = flink; entry->nle_blink = head; flink->nle_blink = entry; head->nle_flink = entry; KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql); return (flink); } static list_entry * NdisInterlockedRemoveHeadList(head, lock) list_entry *head; ndis_spin_lock *lock; { list_entry *flink; list_entry *entry; KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql); entry = head->nle_flink; flink = entry->nle_flink; head->nle_flink = flink; flink->nle_blink = head; KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql); return (entry); } static list_entry * NdisInterlockedInsertTailList(head, entry, lock) list_entry *head; list_entry *entry; ndis_spin_lock *lock; { list_entry *blink; KeAcquireSpinLock(&lock->nsl_spinlock, &lock->nsl_kirql); blink = head->nle_blink; entry->nle_flink = head; entry->nle_blink = blink; blink->nle_flink = entry; head->nle_blink = entry; KeReleaseSpinLock(&lock->nsl_spinlock, lock->nsl_kirql); return (blink); } static uint8_t NdisMSynchronizeWithInterrupt(intr, syncfunc, syncctx) ndis_miniport_interrupt *intr; void *syncfunc; void *syncctx; { return (KeSynchronizeExecution(intr->ni_introbj, syncfunc, syncctx)); } static void NdisGetCurrentSystemTime(tval) uint64_t *tval; { ntoskrnl_time(tval); } /* * Return the number of milliseconds since the system booted. */ static void NdisGetSystemUpTime(tval) uint32_t *tval; { struct timespec ts; nanouptime(&ts); *tval = ts.tv_nsec / 1000000 + ts.tv_sec * 1000; } static void NdisInitializeString(dst, src) unicode_string *dst; char *src; { ansi_string as; RtlInitAnsiString(&as, src); RtlAnsiStringToUnicodeString(dst, &as, TRUE); } static void NdisFreeString(str) unicode_string *str; { RtlFreeUnicodeString(str); } static ndis_status NdisMRemoveMiniport(adapter) ndis_handle *adapter; { return (NDIS_STATUS_SUCCESS); } static void NdisInitAnsiString(dst, src) ansi_string *dst; char *src; { RtlInitAnsiString(dst, src); } static void NdisInitUnicodeString(dst, src) unicode_string *dst; uint16_t *src; { RtlInitUnicodeString(dst, src); } static void NdisMGetDeviceProperty(adapter, phydevobj, funcdevobj, nextdevobj, resources, transresources) ndis_handle adapter; device_object **phydevobj; device_object **funcdevobj; device_object **nextdevobj; cm_resource_list *resources; cm_resource_list *transresources; { ndis_miniport_block *block; block = (ndis_miniport_block *)adapter; if (phydevobj != NULL) *phydevobj = block->nmb_physdeviceobj; if (funcdevobj != NULL) *funcdevobj = block->nmb_deviceobj; if (nextdevobj != NULL) *nextdevobj = block->nmb_nextdeviceobj; } static void NdisGetFirstBufferFromPacket(packet, buf, firstva, firstlen, totlen) ndis_packet *packet; ndis_buffer **buf; void **firstva; uint32_t *firstlen; uint32_t *totlen; { ndis_buffer *tmp; tmp = packet->np_private.npp_head; *buf = tmp; if (tmp == NULL) { *firstva = NULL; *firstlen = *totlen = 0; } else { *firstva = MmGetMdlVirtualAddress(tmp); *firstlen = *totlen = MmGetMdlByteCount(tmp); for (tmp = tmp->mdl_next; tmp != NULL; tmp = tmp->mdl_next) *totlen += MmGetMdlByteCount(tmp); } } static void NdisGetFirstBufferFromPacketSafe(packet, buf, firstva, firstlen, totlen, prio) ndis_packet *packet; ndis_buffer **buf; void **firstva; uint32_t *firstlen; uint32_t *totlen; uint32_t prio; { NdisGetFirstBufferFromPacket(packet, buf, firstva, firstlen, totlen); } static int ndis_find_sym(lf, filename, suffix, sym) linker_file_t lf; char *filename; char *suffix; caddr_t *sym; { char *fullsym; char *suf; int i; fullsym = ExAllocatePoolWithTag(NonPagedPool, MAXPATHLEN, 0); if (fullsym == NULL) return (ENOMEM); bzero(fullsym, MAXPATHLEN); strncpy(fullsym, filename, MAXPATHLEN); if (strlen(filename) < 4) { ExFreePool(fullsym); return (EINVAL); } /* If the filename has a .ko suffix, strip if off. */ suf = fullsym + (strlen(filename) - 3); if (strcmp(suf, ".ko") == 0) *suf = '\0'; for (i = 0; i < strlen(fullsym); i++) { if (fullsym[i] == '.') fullsym[i] = '_'; else fullsym[i] = tolower(fullsym[i]); } strcat(fullsym, suffix); *sym = linker_file_lookup_symbol(lf, fullsym, 0); ExFreePool(fullsym); if (*sym == 0) return (ENOENT); return (0); } struct ndis_checkmodule { char *afilename; ndis_fh *fh; }; /* * See if a single module contains the symbols for a specified file. */ static int NdisCheckModule(linker_file_t lf, void *context) { struct ndis_checkmodule *nc; caddr_t kldstart, kldend; nc = (struct ndis_checkmodule *)context; if (ndis_find_sym(lf, nc->afilename, "_start", &kldstart)) return (0); if (ndis_find_sym(lf, nc->afilename, "_end", &kldend)) return (0); nc->fh->nf_vp = lf; nc->fh->nf_map = NULL; nc->fh->nf_type = NDIS_FH_TYPE_MODULE; nc->fh->nf_maplen = (kldend - kldstart) & 0xFFFFFFFF; return (1); } /* can also return NDIS_STATUS_RESOURCES/NDIS_STATUS_ERROR_READING_FILE */ static void NdisOpenFile(status, filehandle, filelength, filename, highestaddr) ndis_status *status; ndis_handle *filehandle; uint32_t *filelength; unicode_string *filename; ndis_physaddr highestaddr; { ansi_string as; char *afilename = NULL; struct thread *td = curthread; struct nameidata nd; int flags, error, vfslocked; struct vattr vat; struct vattr *vap = &vat; ndis_fh *fh; char *path; struct ndis_checkmodule nc; if (RtlUnicodeStringToAnsiString(&as, filename, TRUE)) { *status = NDIS_STATUS_RESOURCES; return; } afilename = strdup(as.as_buf, M_DEVBUF); RtlFreeAnsiString(&as); fh = ExAllocatePoolWithTag(NonPagedPool, sizeof(ndis_fh), 0); if (fh == NULL) { free(afilename, M_DEVBUF); *status = NDIS_STATUS_RESOURCES; return; } fh->nf_name = afilename; /* * During system bootstrap, it's impossible to load files * from the rootfs since it's not mounted yet. We therefore * offer the possibility of opening files that have been * preloaded as modules instead. Both choices will work * when kldloading a module from multiuser, but only the * module option will work during bootstrap. The module * loading option works by using the ndiscvt(8) utility * to convert the arbitrary file into a .ko using objcopy(1). * This file will contain two special symbols: filename_start * and filename_end. All we have to do is traverse the KLD * list in search of those symbols and we've found the file * data. As an added bonus, ndiscvt(8) will also generate * a normal .o file which can be linked statically with * the kernel. This means that the symbols will actual reside * in the kernel's symbol table, but that doesn't matter to * us since the kernel appears to us as just another module. */ nc.afilename = afilename; nc.fh = fh; if (linker_file_foreach(NdisCheckModule, &nc)) { *filelength = fh->nf_maplen; *filehandle = fh; *status = NDIS_STATUS_SUCCESS; return; } if (TAILQ_EMPTY(&mountlist)) { ExFreePool(fh); *status = NDIS_STATUS_FILE_NOT_FOUND; printf("NDIS: could not find file %s in linker list\n", afilename); printf("NDIS: and no filesystems mounted yet, " "aborting NdisOpenFile()\n"); free(afilename, M_DEVBUF); return; } path = ExAllocatePoolWithTag(NonPagedPool, MAXPATHLEN, 0); if (path == NULL) { ExFreePool(fh); free(afilename, M_DEVBUF); *status = NDIS_STATUS_RESOURCES; return; } snprintf(path, MAXPATHLEN, "%s/%s", ndis_filepath, afilename); /* Some threads don't have a current working directory. */ if (td->td_proc->p_fd->fd_rdir == NULL) td->td_proc->p_fd->fd_rdir = rootvnode; if (td->td_proc->p_fd->fd_cdir == NULL) td->td_proc->p_fd->fd_cdir = rootvnode; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, path, td); flags = FREAD; error = vn_open(&nd, &flags, 0, NULL); if (error) { *status = NDIS_STATUS_FILE_NOT_FOUND; ExFreePool(fh); printf("NDIS: open file %s failed: %d\n", path, error); ExFreePool(path); free(afilename, M_DEVBUF); return; } vfslocked = NDHASGIANT(&nd); ExFreePool(path); NDFREE(&nd, NDF_ONLY_PNBUF); /* Get the file size. */ VOP_GETATTR(nd.ni_vp, vap, td->td_ucred); VOP_UNLOCK(nd.ni_vp, 0); VFS_UNLOCK_GIANT(vfslocked); fh->nf_vp = nd.ni_vp; fh->nf_map = NULL; fh->nf_type = NDIS_FH_TYPE_VFS; *filehandle = fh; *filelength = fh->nf_maplen = vap->va_size & 0xFFFFFFFF; *status = NDIS_STATUS_SUCCESS; } static void NdisMapFile(status, mappedbuffer, filehandle) ndis_status *status; void **mappedbuffer; ndis_handle filehandle; { ndis_fh *fh; struct thread *td = curthread; linker_file_t lf; caddr_t kldstart; int error, resid, vfslocked; struct vnode *vp; if (filehandle == NULL) { *status = NDIS_STATUS_FAILURE; return; } fh = (ndis_fh *)filehandle; if (fh->nf_vp == NULL) { *status = NDIS_STATUS_FAILURE; return; } if (fh->nf_map != NULL) { *status = NDIS_STATUS_ALREADY_MAPPED; return; } if (fh->nf_type == NDIS_FH_TYPE_MODULE) { lf = fh->nf_vp; if (ndis_find_sym(lf, fh->nf_name, "_start", &kldstart)) { *status = NDIS_STATUS_FAILURE; return; } fh->nf_map = kldstart; *status = NDIS_STATUS_SUCCESS; *mappedbuffer = fh->nf_map; return; } fh->nf_map = ExAllocatePoolWithTag(NonPagedPool, fh->nf_maplen, 0); if (fh->nf_map == NULL) { *status = NDIS_STATUS_RESOURCES; return; } vp = fh->nf_vp; vfslocked = VFS_LOCK_GIANT(vp->v_mount); error = vn_rdwr(UIO_READ, vp, fh->nf_map, fh->nf_maplen, 0, UIO_SYSSPACE, 0, td->td_ucred, NOCRED, &resid, td); VFS_UNLOCK_GIANT(vfslocked); if (error) *status = NDIS_STATUS_FAILURE; else { *status = NDIS_STATUS_SUCCESS; *mappedbuffer = fh->nf_map; } } static void NdisUnmapFile(filehandle) ndis_handle filehandle; { ndis_fh *fh; fh = (ndis_fh *)filehandle; if (fh->nf_map == NULL) return; if (fh->nf_type == NDIS_FH_TYPE_VFS) ExFreePool(fh->nf_map); fh->nf_map = NULL; } static void NdisCloseFile(filehandle) ndis_handle filehandle; { struct thread *td = curthread; ndis_fh *fh; int vfslocked; struct vnode *vp; if (filehandle == NULL) return; fh = (ndis_fh *)filehandle; if (fh->nf_map != NULL) { if (fh->nf_type == NDIS_FH_TYPE_VFS) ExFreePool(fh->nf_map); fh->nf_map = NULL; } if (fh->nf_vp == NULL) return; if (fh->nf_type == NDIS_FH_TYPE_VFS) { vp = fh->nf_vp; vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_close(vp, FREAD, td->td_ucred, td); VFS_UNLOCK_GIANT(vfslocked); } fh->nf_vp = NULL; free(fh->nf_name, M_DEVBUF); ExFreePool(fh); } static uint8_t NdisSystemProcessorCount() { return (mp_ncpus); } +static void +NdisGetCurrentProcessorCounts(idle_count, kernel_and_user, index) + uint32_t *idle_count; + uint32_t *kernel_and_user; + uint32_t *index; +{ + struct pcpu *pcpu; + + pcpu = pcpu_find(curthread->td_oncpu); + *index = pcpu->pc_cpuid; + *idle_count = pcpu->pc_cp_time[CP_IDLE]; + *kernel_and_user = pcpu->pc_cp_time[CP_INTR]; +} + typedef void (*ndis_statusdone_handler)(ndis_handle); typedef void (*ndis_status_handler)(ndis_handle, ndis_status, void *, uint32_t); static void NdisMIndicateStatusComplete(adapter) ndis_handle adapter; { ndis_miniport_block *block; ndis_statusdone_handler statusdonefunc; block = (ndis_miniport_block *)adapter; statusdonefunc = block->nmb_statusdone_func; MSCALL1(statusdonefunc, adapter); } static void NdisMIndicateStatus(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; ndis_status_handler statusfunc; block = (ndis_miniport_block *)adapter; statusfunc = block->nmb_status_func; MSCALL4(statusfunc, adapter, status, sbuf, slen); } /* * The DDK documentation says that you should use IoQueueWorkItem() * instead of ExQueueWorkItem(). The problem is, IoQueueWorkItem() * is fundamentally incompatible with NdisScheduleWorkItem(), which * depends on the API semantics of ExQueueWorkItem(). In our world, * ExQueueWorkItem() is implemented on top of IoAllocateQueueItem() * anyway. * * There are actually three distinct APIs here. NdisScheduleWorkItem() * takes a pointer to an NDIS_WORK_ITEM. ExQueueWorkItem() takes a pointer * to a WORK_QUEUE_ITEM. And finally, IoQueueWorkItem() takes a pointer * to an opaque work item thingie which you get from IoAllocateWorkItem(). * An NDIS_WORK_ITEM is not the same as a WORK_QUEUE_ITEM. However, * the NDIS_WORK_ITEM has some opaque storage at the end of it, and we * (ab)use this storage as a WORK_QUEUE_ITEM, which is what we submit * to ExQueueWorkItem(). * * Got all that? (Sheesh.) */ ndis_status NdisScheduleWorkItem(work) ndis_work_item *work; { work_queue_item *wqi; wqi = (work_queue_item *)work->nwi_wraprsvd; ExInitializeWorkItem(wqi, (work_item_func)work->nwi_func, work->nwi_ctx); ExQueueWorkItem(wqi, WORKQUEUE_DELAYED); return (NDIS_STATUS_SUCCESS); } static void NdisCopyFromPacketToPacket(dpkt, doff, reqlen, spkt, soff, cpylen) ndis_packet *dpkt; uint32_t doff; uint32_t reqlen; ndis_packet *spkt; uint32_t soff; uint32_t *cpylen; { ndis_buffer *src, *dst; char *sptr, *dptr; int resid, copied, len, scnt, dcnt; *cpylen = 0; src = spkt->np_private.npp_head; dst = dpkt->np_private.npp_head; sptr = MmGetMdlVirtualAddress(src); dptr = MmGetMdlVirtualAddress(dst); scnt = MmGetMdlByteCount(src); dcnt = MmGetMdlByteCount(dst); while (soff) { if (MmGetMdlByteCount(src) > soff) { sptr += soff; scnt = MmGetMdlByteCount(src)- soff; break; } soff -= MmGetMdlByteCount(src); src = src->mdl_next; if (src == NULL) return; sptr = MmGetMdlVirtualAddress(src); } while (doff) { if (MmGetMdlByteCount(dst) > doff) { dptr += doff; dcnt = MmGetMdlByteCount(dst) - doff; break; } doff -= MmGetMdlByteCount(dst); dst = dst->mdl_next; if (dst == NULL) return; dptr = MmGetMdlVirtualAddress(dst); } resid = reqlen; copied = 0; while(1) { if (resid < scnt) len = resid; else len = scnt; if (dcnt < len) len = dcnt; bcopy(sptr, dptr, len); copied += len; resid -= len; if (resid == 0) break; dcnt -= len; if (dcnt == 0) { dst = dst->mdl_next; if (dst == NULL) break; dptr = MmGetMdlVirtualAddress(dst); dcnt = MmGetMdlByteCount(dst); } scnt -= len; if (scnt == 0) { src = src->mdl_next; if (src == NULL) break; sptr = MmGetMdlVirtualAddress(src); scnt = MmGetMdlByteCount(src); } } *cpylen = copied; } static void NdisCopyFromPacketToPacketSafe(dpkt, doff, reqlen, spkt, soff, cpylen, prio) ndis_packet *dpkt; uint32_t doff; uint32_t reqlen; ndis_packet *spkt; uint32_t soff; uint32_t *cpylen; uint32_t prio; { NdisCopyFromPacketToPacket(dpkt, doff, reqlen, spkt, soff, cpylen); } static void NdisIMCopySendPerPacketInfo(dpkt, spkt) ndis_packet *dpkt; ndis_packet *spkt; { memcpy(&dpkt->np_ext, &spkt->np_ext, sizeof(ndis_packet_extension)); } static ndis_status NdisMRegisterDevice(handle, devname, symname, majorfuncs, devobj, devhandle) ndis_handle handle; unicode_string *devname; unicode_string *symname; driver_dispatch *majorfuncs[]; void **devobj; ndis_handle *devhandle; { uint32_t status; device_object *dobj; status = IoCreateDevice(handle, 0, devname, FILE_DEVICE_UNKNOWN, 0, FALSE, &dobj); if (status == STATUS_SUCCESS) { *devobj = dobj; *devhandle = dobj; } return (status); } static ndis_status NdisMDeregisterDevice(handle) ndis_handle handle; { IoDeleteDevice(handle); return (NDIS_STATUS_SUCCESS); } static ndis_status NdisMQueryAdapterInstanceName(name, handle) unicode_string *name; ndis_handle handle; { ndis_miniport_block *block; device_t dev; ansi_string as; block = (ndis_miniport_block *)handle; dev = block->nmb_physdeviceobj->do_devext; RtlInitAnsiString(&as, __DECONST(char *, device_get_nameunit(dev))); if (RtlAnsiStringToUnicodeString(name, &as, TRUE)) return (NDIS_STATUS_RESOURCES); return (NDIS_STATUS_SUCCESS); } static void NdisMRegisterUnloadHandler(handle, func) ndis_handle handle; void *func; { } static void dummy() { printf("NDIS dummy called...\n"); } /* * Note: a couple of entries in this table specify the * number of arguments as "foo + 1". These are routines * that accept a 64-bit argument, passed by value. On * x86, these arguments consume two longwords on the stack, * so we lie and say there's one additional argument so * that the wrapping routines will do the right thing. */ image_patch_table ndis_functbl[] = { IMPORT_SFUNC(NdisCopyFromPacketToPacket, 6), IMPORT_SFUNC(NdisCopyFromPacketToPacketSafe, 7), IMPORT_SFUNC(NdisIMCopySendPerPacketInfo, 2), IMPORT_SFUNC(NdisScheduleWorkItem, 1), IMPORT_SFUNC(NdisMIndicateStatusComplete, 1), IMPORT_SFUNC(NdisMIndicateStatus, 4), IMPORT_SFUNC(NdisSystemProcessorCount, 0), + IMPORT_SFUNC(NdisGetCurrentProcessorCounts, 3), IMPORT_SFUNC(NdisUnchainBufferAtBack, 2), IMPORT_SFUNC(NdisGetFirstBufferFromPacket, 5), IMPORT_SFUNC(NdisGetFirstBufferFromPacketSafe, 6), IMPORT_SFUNC(NdisGetBufferPhysicalArraySize, 2), IMPORT_SFUNC(NdisMGetDeviceProperty, 6), IMPORT_SFUNC(NdisInitAnsiString, 2), IMPORT_SFUNC(NdisInitUnicodeString, 2), IMPORT_SFUNC(NdisWriteConfiguration, 4), IMPORT_SFUNC(NdisAnsiStringToUnicodeString, 2), IMPORT_SFUNC(NdisTerminateWrapper, 2), IMPORT_SFUNC(NdisOpenConfigurationKeyByName, 4), IMPORT_SFUNC(NdisOpenConfigurationKeyByIndex, 5), IMPORT_SFUNC(NdisMRemoveMiniport, 1), IMPORT_SFUNC(NdisInitializeString, 2), IMPORT_SFUNC(NdisFreeString, 1), IMPORT_SFUNC(NdisGetCurrentSystemTime, 1), IMPORT_SFUNC(NdisGetSystemUpTime, 1), + IMPORT_SFUNC(NdisGetVersion, 0), IMPORT_SFUNC(NdisMSynchronizeWithInterrupt, 3), IMPORT_SFUNC(NdisMAllocateSharedMemoryAsync, 4), IMPORT_SFUNC(NdisInterlockedInsertHeadList, 3), IMPORT_SFUNC(NdisInterlockedInsertTailList, 3), IMPORT_SFUNC(NdisInterlockedRemoveHeadList, 2), IMPORT_SFUNC(NdisInitializeWrapper, 4), IMPORT_SFUNC(NdisMRegisterMiniport, 3), IMPORT_SFUNC(NdisAllocateMemoryWithTag, 3), IMPORT_SFUNC(NdisAllocateMemory, 4 + 1), IMPORT_SFUNC(NdisMSetAttributesEx, 5), IMPORT_SFUNC(NdisCloseConfiguration, 1), IMPORT_SFUNC(NdisReadConfiguration, 5), IMPORT_SFUNC(NdisOpenConfiguration, 3), IMPORT_SFUNC(NdisAcquireSpinLock, 1), IMPORT_SFUNC(NdisReleaseSpinLock, 1), IMPORT_SFUNC(NdisDprAcquireSpinLock, 1), IMPORT_SFUNC(NdisDprReleaseSpinLock, 1), IMPORT_SFUNC(NdisAllocateSpinLock, 1), IMPORT_SFUNC(NdisInitializeReadWriteLock, 1), IMPORT_SFUNC(NdisAcquireReadWriteLock, 3), IMPORT_SFUNC(NdisReleaseReadWriteLock, 2), IMPORT_SFUNC(NdisFreeSpinLock, 1), IMPORT_SFUNC(NdisFreeMemory, 3), IMPORT_SFUNC(NdisReadPciSlotInformation, 5), IMPORT_SFUNC(NdisWritePciSlotInformation, 5), IMPORT_SFUNC_MAP(NdisImmediateReadPciSlotInformation, NdisReadPciSlotInformation, 5), IMPORT_SFUNC_MAP(NdisImmediateWritePciSlotInformation, NdisWritePciSlotInformation, 5), IMPORT_CFUNC(NdisWriteErrorLogEntry, 0), IMPORT_SFUNC(NdisMStartBufferPhysicalMapping, 6), IMPORT_SFUNC(NdisMCompleteBufferPhysicalMapping, 3), IMPORT_SFUNC(NdisMInitializeTimer, 4), IMPORT_SFUNC(NdisInitializeTimer, 3), IMPORT_SFUNC(NdisSetTimer, 2), IMPORT_SFUNC(NdisMCancelTimer, 2), IMPORT_SFUNC_MAP(NdisCancelTimer, NdisMCancelTimer, 2), IMPORT_SFUNC(NdisMSetPeriodicTimer, 2), IMPORT_SFUNC(NdisMQueryAdapterResources, 4), IMPORT_SFUNC(NdisMRegisterIoPortRange, 4), IMPORT_SFUNC(NdisMDeregisterIoPortRange, 4), IMPORT_SFUNC(NdisReadNetworkAddress, 4), IMPORT_SFUNC(NdisQueryMapRegisterCount, 2), IMPORT_SFUNC(NdisMAllocateMapRegisters, 5), IMPORT_SFUNC(NdisMFreeMapRegisters, 1), IMPORT_SFUNC(NdisMAllocateSharedMemory, 5), IMPORT_SFUNC(NdisMMapIoSpace, 4 + 1), IMPORT_SFUNC(NdisMUnmapIoSpace, 3), IMPORT_SFUNC(NdisGetCacheFillSize, 0), IMPORT_SFUNC(NdisMGetDmaAlignment, 1), IMPORT_SFUNC(NdisMInitializeScatterGatherDma, 3), IMPORT_SFUNC(NdisAllocatePacketPool, 4), IMPORT_SFUNC(NdisAllocatePacketPoolEx, 5), IMPORT_SFUNC(NdisAllocatePacket, 3), IMPORT_SFUNC(NdisFreePacket, 1), IMPORT_SFUNC(NdisFreePacketPool, 1), IMPORT_SFUNC_MAP(NdisDprAllocatePacket, NdisAllocatePacket, 3), IMPORT_SFUNC_MAP(NdisDprFreePacket, NdisFreePacket, 1), IMPORT_SFUNC(NdisAllocateBufferPool, 3), IMPORT_SFUNC(NdisAllocateBuffer, 5), IMPORT_SFUNC(NdisQueryBuffer, 3), IMPORT_SFUNC(NdisQueryBufferSafe, 4), IMPORT_SFUNC(NdisBufferVirtualAddress, 1), IMPORT_SFUNC(NdisBufferVirtualAddressSafe, 2), IMPORT_SFUNC(NdisBufferLength, 1), IMPORT_SFUNC(NdisFreeBuffer, 1), IMPORT_SFUNC(NdisFreeBufferPool, 1), IMPORT_SFUNC(NdisInterlockedIncrement, 1), IMPORT_SFUNC(NdisInterlockedDecrement, 1), IMPORT_SFUNC(NdisInitializeEvent, 1), IMPORT_SFUNC(NdisSetEvent, 1), IMPORT_SFUNC(NdisResetEvent, 1), IMPORT_SFUNC(NdisWaitEvent, 2), IMPORT_SFUNC(NdisUnicodeStringToAnsiString, 2), IMPORT_SFUNC(NdisMPciAssignResources, 3), IMPORT_SFUNC(NdisMFreeSharedMemory, 5 + 1), IMPORT_SFUNC(NdisMRegisterInterrupt, 7), IMPORT_SFUNC(NdisMDeregisterInterrupt, 1), IMPORT_SFUNC(NdisMRegisterAdapterShutdownHandler, 3), IMPORT_SFUNC(NdisMDeregisterAdapterShutdownHandler, 1), IMPORT_SFUNC(NDIS_BUFFER_TO_SPAN_PAGES, 1), IMPORT_SFUNC(NdisQueryBufferOffset, 3), IMPORT_SFUNC(NdisAdjustBufferLength, 2), IMPORT_SFUNC(NdisPacketPoolUsage, 1), IMPORT_SFUNC(NdisMSleep, 1), IMPORT_SFUNC(NdisUnchainBufferAtFront, 2), IMPORT_SFUNC(NdisReadPcmciaAttributeMemory, 4), IMPORT_SFUNC(NdisWritePcmciaAttributeMemory, 4), IMPORT_SFUNC(NdisOpenFile, 5 + 1), IMPORT_SFUNC(NdisMapFile, 3), IMPORT_SFUNC(NdisUnmapFile, 1), IMPORT_SFUNC(NdisCloseFile, 1), IMPORT_SFUNC(NdisMRegisterDevice, 6), IMPORT_SFUNC(NdisMDeregisterDevice, 1), IMPORT_SFUNC(NdisMQueryAdapterInstanceName, 2), IMPORT_SFUNC(NdisMRegisterUnloadHandler, 2), IMPORT_SFUNC(ndis_timercall, 4), IMPORT_SFUNC(ndis_asyncmem_complete, 2), IMPORT_SFUNC(ndis_intr, 2), IMPORT_SFUNC(ndis_intrhand, 4), /* * This last entry is a catch-all for any function we haven't * implemented yet. The PE import list patching routine will * use it for any function that doesn't have an explicit match * in this table. */ { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL }, /* End of list. */ { NULL, NULL, NULL } }; Index: projects/binutils-2.17/sys/compat/ndis/subr_ntoskrnl.c =================================================================== --- projects/binutils-2.17/sys/compat/ndis/subr_ntoskrnl.c (revision 215829) +++ projects/binutils-2.17/sys/compat/ndis/subr_ntoskrnl.c (revision 215830) @@ -1,4317 +1,4436 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NTOSKRNL_DEBUG_TIMERS static int sysctl_show_timers(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug, OID_AUTO, ntoskrnl_timers, CTLFLAG_RW, 0, 0, sysctl_show_timers, "I", "Show ntoskrnl timer stats"); #endif struct kdpc_queue { list_entry kq_disp; struct thread *kq_td; int kq_cpu; int kq_exit; int kq_running; kspin_lock kq_lock; nt_kevent kq_proc; nt_kevent kq_done; }; typedef struct kdpc_queue kdpc_queue; struct wb_ext { struct cv we_cv; struct thread *we_td; }; typedef struct wb_ext wb_ext; #define NTOSKRNL_TIMEOUTS 256 #ifdef NTOSKRNL_DEBUG_TIMERS static uint64_t ntoskrnl_timer_fires; static uint64_t ntoskrnl_timer_sets; static uint64_t ntoskrnl_timer_reloads; static uint64_t ntoskrnl_timer_cancels; #endif struct callout_entry { struct callout ce_callout; list_entry ce_list; }; typedef struct callout_entry callout_entry; static struct list_entry ntoskrnl_calllist; static struct mtx ntoskrnl_calllock; struct kuser_shared_data kuser_shared_data; static struct list_entry ntoskrnl_intlist; static kspin_lock ntoskrnl_intlock; static uint8_t RtlEqualUnicodeString(unicode_string *, unicode_string *, uint8_t); +static void RtlCopyString(ansi_string *, const ansi_string *); static void RtlCopyUnicodeString(unicode_string *, unicode_string *); static irp *IoBuildSynchronousFsdRequest(uint32_t, device_object *, void *, uint32_t, uint64_t *, nt_kevent *, io_status_block *); static irp *IoBuildAsynchronousFsdRequest(uint32_t, device_object *, void *, uint32_t, uint64_t *, io_status_block *); static irp *IoBuildDeviceIoControlRequest(uint32_t, device_object *, void *, uint32_t, void *, uint32_t, uint8_t, nt_kevent *, io_status_block *); static irp *IoAllocateIrp(uint8_t, uint8_t); static void IoReuseIrp(irp *, uint32_t); static void IoFreeIrp(irp *); static void IoInitializeIrp(irp *, uint16_t, uint8_t); static irp *IoMakeAssociatedIrp(irp *, uint8_t); static uint32_t KeWaitForMultipleObjects(uint32_t, nt_dispatch_header **, uint32_t, uint32_t, uint32_t, uint8_t, int64_t *, wait_block *); static void ntoskrnl_waittest(nt_dispatch_header *, uint32_t); static void ntoskrnl_satisfy_wait(nt_dispatch_header *, struct thread *); static void ntoskrnl_satisfy_multiple_waits(wait_block *); static int ntoskrnl_is_signalled(nt_dispatch_header *, struct thread *); static void ntoskrnl_insert_timer(ktimer *, int); static void ntoskrnl_remove_timer(ktimer *); #ifdef NTOSKRNL_DEBUG_TIMERS static void ntoskrnl_show_timers(void); #endif static void ntoskrnl_timercall(void *); static void ntoskrnl_dpc_thread(void *); static void ntoskrnl_destroy_dpc_threads(void); static void ntoskrnl_destroy_workitem_threads(void); static void ntoskrnl_workitem_thread(void *); static void ntoskrnl_workitem(device_object *, void *); static void ntoskrnl_unicode_to_ascii(uint16_t *, char *, int); static void ntoskrnl_ascii_to_unicode(char *, uint16_t *, int); static uint8_t ntoskrnl_insert_dpc(list_entry *, kdpc *); static void WRITE_REGISTER_USHORT(uint16_t *, uint16_t); static uint16_t READ_REGISTER_USHORT(uint16_t *); static void WRITE_REGISTER_ULONG(uint32_t *, uint32_t); static uint32_t READ_REGISTER_ULONG(uint32_t *); static void WRITE_REGISTER_UCHAR(uint8_t *, uint8_t); static uint8_t READ_REGISTER_UCHAR(uint8_t *); static int64_t _allmul(int64_t, int64_t); static int64_t _alldiv(int64_t, int64_t); static int64_t _allrem(int64_t, int64_t); static int64_t _allshr(int64_t, uint8_t); static int64_t _allshl(int64_t, uint8_t); static uint64_t _aullmul(uint64_t, uint64_t); static uint64_t _aulldiv(uint64_t, uint64_t); static uint64_t _aullrem(uint64_t, uint64_t); static uint64_t _aullshr(uint64_t, uint8_t); static uint64_t _aullshl(uint64_t, uint8_t); static slist_entry *ntoskrnl_pushsl(slist_header *, slist_entry *); +static void InitializeSListHead(slist_header *); static slist_entry *ntoskrnl_popsl(slist_header *); +static void ExFreePoolWithTag(void *, uint32_t); static void ExInitializePagedLookasideList(paged_lookaside_list *, lookaside_alloc_func *, lookaside_free_func *, uint32_t, size_t, uint32_t, uint16_t); static void ExDeletePagedLookasideList(paged_lookaside_list *); static void ExInitializeNPagedLookasideList(npaged_lookaside_list *, lookaside_alloc_func *, lookaside_free_func *, uint32_t, size_t, uint32_t, uint16_t); static void ExDeleteNPagedLookasideList(npaged_lookaside_list *); static slist_entry *ExInterlockedPushEntrySList(slist_header *, slist_entry *, kspin_lock *); static slist_entry *ExInterlockedPopEntrySList(slist_header *, kspin_lock *); static uint32_t InterlockedIncrement(volatile uint32_t *); static uint32_t InterlockedDecrement(volatile uint32_t *); static void ExInterlockedAddLargeStatistic(uint64_t *, uint32_t); static void *MmAllocateContiguousMemory(uint32_t, uint64_t); static void *MmAllocateContiguousMemorySpecifyCache(uint32_t, uint64_t, uint64_t, uint64_t, enum nt_caching_type); static void MmFreeContiguousMemory(void *); static void MmFreeContiguousMemorySpecifyCache(void *, uint32_t, enum nt_caching_type); static uint32_t MmSizeOfMdl(void *, size_t); static void *MmMapLockedPages(mdl *, uint8_t); static void *MmMapLockedPagesSpecifyCache(mdl *, uint8_t, uint32_t, void *, uint32_t, uint32_t); static void MmUnmapLockedPages(void *, mdl *); static device_t ntoskrnl_finddev(device_t, uint64_t, struct resource **); static void RtlZeroMemory(void *, size_t); +static void RtlSecureZeroMemory(void *, size_t); +static void RtlFillMemory(void *, size_t, uint8_t); +static void RtlMoveMemory(void *, const void *, size_t); +static ndis_status RtlCharToInteger(const char *, uint32_t, uint32_t *); static void RtlCopyMemory(void *, const void *, size_t); static size_t RtlCompareMemory(const void *, const void *, size_t); static ndis_status RtlUnicodeStringToInteger(unicode_string *, uint32_t, uint32_t *); static int atoi (const char *); static long atol (const char *); static int rand(void); static void srand(unsigned int); static void KeQuerySystemTime(uint64_t *); static uint32_t KeTickCount(void); static uint8_t IoIsWdmVersionAvailable(uint8_t, uint8_t); static void ntoskrnl_thrfunc(void *); static ndis_status PsCreateSystemThread(ndis_handle *, uint32_t, void *, ndis_handle, void *, void *, void *); static ndis_status PsTerminateSystemThread(ndis_status); static ndis_status IoGetDeviceObjectPointer(unicode_string *, uint32_t, void *, device_object *); static ndis_status IoGetDeviceProperty(device_object *, uint32_t, uint32_t, void *, uint32_t *); static void KeInitializeMutex(kmutant *, uint32_t); static uint32_t KeReleaseMutex(kmutant *, uint8_t); static uint32_t KeReadStateMutex(kmutant *); static ndis_status ObReferenceObjectByHandle(ndis_handle, uint32_t, void *, uint8_t, void **, void **); static void ObfDereferenceObject(void *); static uint32_t ZwClose(ndis_handle); static uint32_t WmiQueryTraceInformation(uint32_t, void *, uint32_t, uint32_t, void *); static uint32_t WmiTraceMessage(uint64_t, uint32_t, void *, uint16_t, ...); static uint32_t IoWMIRegistrationControl(device_object *, uint32_t); static void *ntoskrnl_memset(void *, int, size_t); static void *ntoskrnl_memmove(void *, void *, size_t); static void *ntoskrnl_memchr(void *, unsigned char, size_t); static char *ntoskrnl_strstr(char *, char *); static char *ntoskrnl_strncat(char *, char *, size_t); static int ntoskrnl_toupper(int); static int ntoskrnl_tolower(int); static funcptr ntoskrnl_findwrap(funcptr); static uint32_t DbgPrint(char *, ...); static void DbgBreakPoint(void); static void KeBugCheckEx(uint32_t, u_long, u_long, u_long, u_long); static int32_t KeDelayExecutionThread(uint8_t, uint8_t, int64_t *); static int32_t KeSetPriorityThread(struct thread *, int32_t); static void dummy(void); static struct mtx ntoskrnl_dispatchlock; static struct mtx ntoskrnl_interlock; static kspin_lock ntoskrnl_cancellock; static int ntoskrnl_kth = 0; static struct nt_objref_head ntoskrnl_reflist; static uma_zone_t mdl_zone; static uma_zone_t iw_zone; static struct kdpc_queue *kq_queues; static struct kdpc_queue *wq_queues; static int wq_idx = 0; int ntoskrnl_libinit() { image_patch_table *patch; int error; struct proc *p; kdpc_queue *kq; callout_entry *e; int i; mtx_init(&ntoskrnl_dispatchlock, "ntoskrnl dispatch lock", MTX_NDIS_LOCK, MTX_DEF|MTX_RECURSE); mtx_init(&ntoskrnl_interlock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN); KeInitializeSpinLock(&ntoskrnl_cancellock); KeInitializeSpinLock(&ntoskrnl_intlock); TAILQ_INIT(&ntoskrnl_reflist); InitializeListHead(&ntoskrnl_calllist); InitializeListHead(&ntoskrnl_intlist); mtx_init(&ntoskrnl_calllock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN); kq_queues = ExAllocatePoolWithTag(NonPagedPool, #ifdef NTOSKRNL_MULTIPLE_DPCS sizeof(kdpc_queue) * mp_ncpus, 0); #else sizeof(kdpc_queue), 0); #endif if (kq_queues == NULL) return (ENOMEM); wq_queues = ExAllocatePoolWithTag(NonPagedPool, sizeof(kdpc_queue) * WORKITEM_THREADS, 0); if (wq_queues == NULL) return (ENOMEM); #ifdef NTOSKRNL_MULTIPLE_DPCS bzero((char *)kq_queues, sizeof(kdpc_queue) * mp_ncpus); #else bzero((char *)kq_queues, sizeof(kdpc_queue)); #endif bzero((char *)wq_queues, sizeof(kdpc_queue) * WORKITEM_THREADS); /* * Launch the DPC threads. */ #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq = kq_queues + i; kq->kq_cpu = i; error = kproc_create(ntoskrnl_dpc_thread, kq, &p, RFHIGHPID, NDIS_KSTACK_PAGES, "Windows DPC %d", i); if (error) panic("failed to launch DPC thread"); } /* * Launch the workitem threads. */ for (i = 0; i < WORKITEM_THREADS; i++) { kq = wq_queues + i; error = kproc_create(ntoskrnl_workitem_thread, kq, &p, RFHIGHPID, NDIS_KSTACK_PAGES, "Windows Workitem %d", i); if (error) panic("failed to launch workitem thread"); } patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { windrv_wrap((funcptr)patch->ipt_func, (funcptr *)&patch->ipt_wrap, patch->ipt_argcnt, patch->ipt_ftype); patch++; } for (i = 0; i < NTOSKRNL_TIMEOUTS; i++) { e = ExAllocatePoolWithTag(NonPagedPool, sizeof(callout_entry), 0); if (e == NULL) panic("failed to allocate timeouts"); mtx_lock_spin(&ntoskrnl_calllock); InsertHeadList((&ntoskrnl_calllist), (&e->ce_list)); mtx_unlock_spin(&ntoskrnl_calllock); } /* * MDLs are supposed to be variable size (they describe * buffers containing some number of pages, but we don't * know ahead of time how many pages that will be). But * always allocating them off the heap is very slow. As * a compromise, we create an MDL UMA zone big enough to * handle any buffer requiring up to 16 pages, and we * use those for any MDLs for buffers of 16 pages or less * in size. For buffers larger than that (which we assume * will be few and far between, we allocate the MDLs off * the heap. */ mdl_zone = uma_zcreate("Windows MDL", MDL_ZONE_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); iw_zone = uma_zcreate("Windows WorkItem", sizeof(io_workitem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); return (0); } int ntoskrnl_libfini() { image_patch_table *patch; callout_entry *e; list_entry *l; patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } /* Stop the workitem queues. */ ntoskrnl_destroy_workitem_threads(); /* Stop the DPC queues. */ ntoskrnl_destroy_dpc_threads(); ExFreePool(kq_queues); ExFreePool(wq_queues); uma_zdestroy(mdl_zone); uma_zdestroy(iw_zone); mtx_lock_spin(&ntoskrnl_calllock); while(!IsListEmpty(&ntoskrnl_calllist)) { l = RemoveHeadList(&ntoskrnl_calllist); e = CONTAINING_RECORD(l, callout_entry, ce_list); mtx_unlock_spin(&ntoskrnl_calllock); ExFreePool(e); mtx_lock_spin(&ntoskrnl_calllock); } mtx_unlock_spin(&ntoskrnl_calllock); mtx_destroy(&ntoskrnl_dispatchlock); mtx_destroy(&ntoskrnl_interlock); mtx_destroy(&ntoskrnl_calllock); return (0); } /* * We need to be able to reference this externally from the wrapper; * GCC only generates a local implementation of memset. */ static void * ntoskrnl_memset(buf, ch, size) void *buf; int ch; size_t size; { return (memset(buf, ch, size)); } static void * ntoskrnl_memmove(dst, src, size) void *src; void *dst; size_t size; { bcopy(src, dst, size); return (dst); } static void * ntoskrnl_memchr(void *buf, unsigned char ch, size_t len) { if (len != 0) { unsigned char *p = buf; do { if (*p++ == ch) return (p - 1); } while (--len != 0); } return (NULL); } static char * ntoskrnl_strstr(s, find) char *s, *find; { char c, sc; size_t len; if ((c = *find++) != 0) { len = strlen(find); do { do { if ((sc = *s++) == 0) return (NULL); } while (sc != c); } while (strncmp(s, find, len) != 0); s--; } return ((char *)s); } /* Taken from libc */ static char * ntoskrnl_strncat(dst, src, n) char *dst; char *src; size_t n; { if (n != 0) { char *d = dst; const char *s = src; while (*d != 0) d++; do { if ((*d = *s++) == 0) break; d++; } while (--n != 0); *d = 0; } return (dst); } static int ntoskrnl_toupper(c) int c; { return (toupper(c)); } static int ntoskrnl_tolower(c) int c; { return (tolower(c)); } static uint8_t RtlEqualUnicodeString(unicode_string *str1, unicode_string *str2, uint8_t caseinsensitive) { int i; if (str1->us_len != str2->us_len) return (FALSE); for (i = 0; i < str1->us_len; i++) { if (caseinsensitive == TRUE) { if (toupper((char)(str1->us_buf[i] & 0xFF)) != toupper((char)(str2->us_buf[i] & 0xFF))) return (FALSE); } else { if (str1->us_buf[i] != str2->us_buf[i]) return (FALSE); } } return (TRUE); } static void +RtlCopyString(dst, src) + ansi_string *dst; + const ansi_string *src; +{ + if (src != NULL && src->as_buf != NULL && dst->as_buf != NULL) { + dst->as_len = min(src->as_len, dst->as_maxlen); + memcpy(dst->as_buf, src->as_buf, dst->as_len); + if (dst->as_len < dst->as_maxlen) + dst->as_buf[dst->as_len] = 0; + } else + dst->as_len = 0; +} + +static void RtlCopyUnicodeString(dest, src) unicode_string *dest; unicode_string *src; { if (dest->us_maxlen >= src->us_len) dest->us_len = src->us_len; else dest->us_len = dest->us_maxlen; memcpy(dest->us_buf, src->us_buf, dest->us_len); } static void ntoskrnl_ascii_to_unicode(ascii, unicode, len) char *ascii; uint16_t *unicode; int len; { int i; uint16_t *ustr; ustr = unicode; for (i = 0; i < len; i++) { *ustr = (uint16_t)ascii[i]; ustr++; } } static void ntoskrnl_unicode_to_ascii(unicode, ascii, len) uint16_t *unicode; char *ascii; int len; { int i; uint8_t *astr; astr = ascii; for (i = 0; i < len / 2; i++) { *astr = (uint8_t)unicode[i]; astr++; } } uint32_t RtlUnicodeStringToAnsiString(ansi_string *dest, unicode_string *src, uint8_t allocate) { if (dest == NULL || src == NULL) return (STATUS_INVALID_PARAMETER); dest->as_len = src->us_len / 2; if (dest->as_maxlen < dest->as_len) dest->as_len = dest->as_maxlen; if (allocate == TRUE) { dest->as_buf = ExAllocatePoolWithTag(NonPagedPool, (src->us_len / 2) + 1, 0); if (dest->as_buf == NULL) return (STATUS_INSUFFICIENT_RESOURCES); dest->as_len = dest->as_maxlen = src->us_len / 2; } else { dest->as_len = src->us_len / 2; /* XXX */ if (dest->as_maxlen < dest->as_len) dest->as_len = dest->as_maxlen; } ntoskrnl_unicode_to_ascii(src->us_buf, dest->as_buf, dest->as_len * 2); return (STATUS_SUCCESS); } uint32_t RtlAnsiStringToUnicodeString(unicode_string *dest, ansi_string *src, uint8_t allocate) { if (dest == NULL || src == NULL) return (STATUS_INVALID_PARAMETER); if (allocate == TRUE) { dest->us_buf = ExAllocatePoolWithTag(NonPagedPool, src->as_len * 2, 0); if (dest->us_buf == NULL) return (STATUS_INSUFFICIENT_RESOURCES); dest->us_len = dest->us_maxlen = strlen(src->as_buf) * 2; } else { dest->us_len = src->as_len * 2; /* XXX */ if (dest->us_maxlen < dest->us_len) dest->us_len = dest->us_maxlen; } ntoskrnl_ascii_to_unicode(src->as_buf, dest->us_buf, dest->us_len / 2); return (STATUS_SUCCESS); } void * ExAllocatePoolWithTag(pooltype, len, tag) uint32_t pooltype; size_t len; uint32_t tag; { void *buf; buf = malloc(len, M_DEVBUF, M_NOWAIT|M_ZERO); if (buf == NULL) return (NULL); return (buf); } +static void +ExFreePoolWithTag(buf, tag) + void *buf; + uint32_t tag; +{ + ExFreePool(buf); +} + void ExFreePool(buf) void *buf; { free(buf, M_DEVBUF); } uint32_t IoAllocateDriverObjectExtension(drv, clid, extlen, ext) driver_object *drv; void *clid; uint32_t extlen; void **ext; { custom_extension *ce; ce = ExAllocatePoolWithTag(NonPagedPool, sizeof(custom_extension) + extlen, 0); if (ce == NULL) return (STATUS_INSUFFICIENT_RESOURCES); ce->ce_clid = clid; InsertTailList((&drv->dro_driverext->dre_usrext), (&ce->ce_list)); *ext = (void *)(ce + 1); return (STATUS_SUCCESS); } void * IoGetDriverObjectExtension(drv, clid) driver_object *drv; void *clid; { list_entry *e; custom_extension *ce; /* * Sanity check. Our dummy bus drivers don't have * any driver extentions. */ if (drv->dro_driverext == NULL) return (NULL); e = drv->dro_driverext->dre_usrext.nle_flink; while (e != &drv->dro_driverext->dre_usrext) { ce = (custom_extension *)e; if (ce->ce_clid == clid) return ((void *)(ce + 1)); e = e->nle_flink; } return (NULL); } uint32_t IoCreateDevice(driver_object *drv, uint32_t devextlen, unicode_string *devname, uint32_t devtype, uint32_t devchars, uint8_t exclusive, device_object **newdev) { device_object *dev; dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device_object), 0); if (dev == NULL) return (STATUS_INSUFFICIENT_RESOURCES); dev->do_type = devtype; dev->do_drvobj = drv; dev->do_currirp = NULL; dev->do_flags = 0; if (devextlen) { dev->do_devext = ExAllocatePoolWithTag(NonPagedPool, devextlen, 0); if (dev->do_devext == NULL) { ExFreePool(dev); return (STATUS_INSUFFICIENT_RESOURCES); } bzero(dev->do_devext, devextlen); } else dev->do_devext = NULL; dev->do_size = sizeof(device_object) + devextlen; dev->do_refcnt = 1; dev->do_attacheddev = NULL; dev->do_nextdev = NULL; dev->do_devtype = devtype; dev->do_stacksize = 1; dev->do_alignreq = 1; dev->do_characteristics = devchars; dev->do_iotimer = NULL; KeInitializeEvent(&dev->do_devlock, EVENT_TYPE_SYNC, TRUE); /* * Vpd is used for disk/tape devices, * but we don't support those. (Yet.) */ dev->do_vpb = NULL; dev->do_devobj_ext = ExAllocatePoolWithTag(NonPagedPool, sizeof(devobj_extension), 0); if (dev->do_devobj_ext == NULL) { if (dev->do_devext != NULL) ExFreePool(dev->do_devext); ExFreePool(dev); return (STATUS_INSUFFICIENT_RESOURCES); } dev->do_devobj_ext->dve_type = 0; dev->do_devobj_ext->dve_size = sizeof(devobj_extension); dev->do_devobj_ext->dve_devobj = dev; /* * Attach this device to the driver object's list * of devices. Note: this is not the same as attaching * the device to the device stack. The driver's AddDevice * routine must explicitly call IoAddDeviceToDeviceStack() * to do that. */ if (drv->dro_devobj == NULL) { drv->dro_devobj = dev; dev->do_nextdev = NULL; } else { dev->do_nextdev = drv->dro_devobj; drv->dro_devobj = dev; } *newdev = dev; return (STATUS_SUCCESS); } void IoDeleteDevice(dev) device_object *dev; { device_object *prev; if (dev == NULL) return; if (dev->do_devobj_ext != NULL) ExFreePool(dev->do_devobj_ext); if (dev->do_devext != NULL) ExFreePool(dev->do_devext); /* Unlink the device from the driver's device list. */ prev = dev->do_drvobj->dro_devobj; if (prev == dev) dev->do_drvobj->dro_devobj = dev->do_nextdev; else { while (prev->do_nextdev != dev) prev = prev->do_nextdev; prev->do_nextdev = dev->do_nextdev; } ExFreePool(dev); } device_object * IoGetAttachedDevice(dev) device_object *dev; { device_object *d; if (dev == NULL) return (NULL); d = dev; while (d->do_attacheddev != NULL) d = d->do_attacheddev; return (d); } static irp * IoBuildSynchronousFsdRequest(func, dobj, buf, len, off, event, status) uint32_t func; device_object *dobj; void *buf; uint32_t len; uint64_t *off; nt_kevent *event; io_status_block *status; { irp *ip; ip = IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status); if (ip == NULL) return (NULL); ip->irp_usrevent = event; return (ip); } static irp * IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status) uint32_t func; device_object *dobj; void *buf; uint32_t len; uint64_t *off; io_status_block *status; { irp *ip; io_stack_location *sl; ip = IoAllocateIrp(dobj->do_stacksize, TRUE); if (ip == NULL) return (NULL); ip->irp_usriostat = status; ip->irp_tail.irp_overlay.irp_thread = NULL; sl = IoGetNextIrpStackLocation(ip); sl->isl_major = func; sl->isl_minor = 0; sl->isl_flags = 0; sl->isl_ctl = 0; sl->isl_devobj = dobj; sl->isl_fileobj = NULL; sl->isl_completionfunc = NULL; ip->irp_userbuf = buf; if (dobj->do_flags & DO_BUFFERED_IO) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, len, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return (NULL); } bcopy(buf, ip->irp_assoc.irp_sysbuf, len); } if (dobj->do_flags & DO_DIRECT_IO) { ip->irp_mdl = IoAllocateMdl(buf, len, FALSE, FALSE, ip); if (ip->irp_mdl == NULL) { if (ip->irp_assoc.irp_sysbuf != NULL) ExFreePool(ip->irp_assoc.irp_sysbuf); IoFreeIrp(ip); return (NULL); } ip->irp_userbuf = NULL; ip->irp_assoc.irp_sysbuf = NULL; } if (func == IRP_MJ_READ) { sl->isl_parameters.isl_read.isl_len = len; if (off != NULL) sl->isl_parameters.isl_read.isl_byteoff = *off; else sl->isl_parameters.isl_read.isl_byteoff = 0; } if (func == IRP_MJ_WRITE) { sl->isl_parameters.isl_write.isl_len = len; if (off != NULL) sl->isl_parameters.isl_write.isl_byteoff = *off; else sl->isl_parameters.isl_write.isl_byteoff = 0; } return (ip); } static irp * IoBuildDeviceIoControlRequest(uint32_t iocode, device_object *dobj, void *ibuf, uint32_t ilen, void *obuf, uint32_t olen, uint8_t isinternal, nt_kevent *event, io_status_block *status) { irp *ip; io_stack_location *sl; uint32_t buflen; ip = IoAllocateIrp(dobj->do_stacksize, TRUE); if (ip == NULL) return (NULL); ip->irp_usrevent = event; ip->irp_usriostat = status; ip->irp_tail.irp_overlay.irp_thread = NULL; sl = IoGetNextIrpStackLocation(ip); sl->isl_major = isinternal == TRUE ? IRP_MJ_INTERNAL_DEVICE_CONTROL : IRP_MJ_DEVICE_CONTROL; sl->isl_minor = 0; sl->isl_flags = 0; sl->isl_ctl = 0; sl->isl_devobj = dobj; sl->isl_fileobj = NULL; sl->isl_completionfunc = NULL; sl->isl_parameters.isl_ioctl.isl_iocode = iocode; sl->isl_parameters.isl_ioctl.isl_ibuflen = ilen; sl->isl_parameters.isl_ioctl.isl_obuflen = olen; switch(IO_METHOD(iocode)) { case METHOD_BUFFERED: if (ilen > olen) buflen = ilen; else buflen = olen; if (buflen) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, buflen, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return (NULL); } } if (ilen && ibuf != NULL) { bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen); bzero((char *)ip->irp_assoc.irp_sysbuf + ilen, buflen - ilen); } else bzero(ip->irp_assoc.irp_sysbuf, ilen); ip->irp_userbuf = obuf; break; case METHOD_IN_DIRECT: case METHOD_OUT_DIRECT: if (ilen && ibuf != NULL) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, ilen, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return (NULL); } bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen); } if (olen && obuf != NULL) { ip->irp_mdl = IoAllocateMdl(obuf, olen, FALSE, FALSE, ip); /* * Normally we would MmProbeAndLockPages() * here, but we don't have to in our * imlementation. */ } break; case METHOD_NEITHER: ip->irp_userbuf = obuf; sl->isl_parameters.isl_ioctl.isl_type3ibuf = ibuf; break; default: break; } /* * Ideally, we should associate this IRP with the calling * thread here. */ return (ip); } static irp * IoAllocateIrp(uint8_t stsize, uint8_t chargequota) { irp *i; i = ExAllocatePoolWithTag(NonPagedPool, IoSizeOfIrp(stsize), 0); if (i == NULL) return (NULL); IoInitializeIrp(i, IoSizeOfIrp(stsize), stsize); return (i); } static irp * IoMakeAssociatedIrp(irp *ip, uint8_t stsize) { irp *associrp; associrp = IoAllocateIrp(stsize, FALSE); if (associrp == NULL) return (NULL); mtx_lock(&ntoskrnl_dispatchlock); associrp->irp_flags |= IRP_ASSOCIATED_IRP; associrp->irp_tail.irp_overlay.irp_thread = ip->irp_tail.irp_overlay.irp_thread; associrp->irp_assoc.irp_master = ip; mtx_unlock(&ntoskrnl_dispatchlock); return (associrp); } static void IoFreeIrp(ip) irp *ip; { ExFreePool(ip); } static void IoInitializeIrp(irp *io, uint16_t psize, uint8_t ssize) { bzero((char *)io, IoSizeOfIrp(ssize)); io->irp_size = psize; io->irp_stackcnt = ssize; io->irp_currentstackloc = ssize; InitializeListHead(&io->irp_thlist); io->irp_tail.irp_overlay.irp_csl = (io_stack_location *)(io + 1) + ssize; } static void IoReuseIrp(ip, status) irp *ip; uint32_t status; { uint8_t allocflags; allocflags = ip->irp_allocflags; IoInitializeIrp(ip, ip->irp_size, ip->irp_stackcnt); ip->irp_iostat.isb_status = status; ip->irp_allocflags = allocflags; } void IoAcquireCancelSpinLock(uint8_t *irql) { KeAcquireSpinLock(&ntoskrnl_cancellock, irql); } void IoReleaseCancelSpinLock(uint8_t irql) { KeReleaseSpinLock(&ntoskrnl_cancellock, irql); } uint8_t IoCancelIrp(irp *ip) { cancel_func cfunc; uint8_t cancelirql; IoAcquireCancelSpinLock(&cancelirql); cfunc = IoSetCancelRoutine(ip, NULL); ip->irp_cancel = TRUE; if (cfunc == NULL) { IoReleaseCancelSpinLock(cancelirql); return (FALSE); } ip->irp_cancelirql = cancelirql; MSCALL2(cfunc, IoGetCurrentIrpStackLocation(ip)->isl_devobj, ip); return (uint8_t)IoSetCancelValue(ip, TRUE); } uint32_t IofCallDriver(dobj, ip) device_object *dobj; irp *ip; { driver_object *drvobj; io_stack_location *sl; uint32_t status; driver_dispatch disp; drvobj = dobj->do_drvobj; if (ip->irp_currentstackloc <= 0) panic("IoCallDriver(): out of stack locations"); IoSetNextIrpStackLocation(ip); sl = IoGetCurrentIrpStackLocation(ip); sl->isl_devobj = dobj; disp = drvobj->dro_dispatch[sl->isl_major]; status = MSCALL2(disp, dobj, ip); return (status); } void IofCompleteRequest(irp *ip, uint8_t prioboost) { uint32_t status; device_object *dobj; io_stack_location *sl; completion_func cf; KASSERT(ip->irp_iostat.isb_status != STATUS_PENDING, ("incorrect IRP(%p) status (STATUS_PENDING)", ip)); sl = IoGetCurrentIrpStackLocation(ip); IoSkipCurrentIrpStackLocation(ip); do { if (sl->isl_ctl & SL_PENDING_RETURNED) ip->irp_pendingreturned = TRUE; if (ip->irp_currentstackloc != (ip->irp_stackcnt + 1)) dobj = IoGetCurrentIrpStackLocation(ip)->isl_devobj; else dobj = NULL; if (sl->isl_completionfunc != NULL && ((ip->irp_iostat.isb_status == STATUS_SUCCESS && sl->isl_ctl & SL_INVOKE_ON_SUCCESS) || (ip->irp_iostat.isb_status != STATUS_SUCCESS && sl->isl_ctl & SL_INVOKE_ON_ERROR) || (ip->irp_cancel == TRUE && sl->isl_ctl & SL_INVOKE_ON_CANCEL))) { cf = sl->isl_completionfunc; status = MSCALL3(cf, dobj, ip, sl->isl_completionctx); if (status == STATUS_MORE_PROCESSING_REQUIRED) return; } else { if ((ip->irp_currentstackloc <= ip->irp_stackcnt) && (ip->irp_pendingreturned == TRUE)) IoMarkIrpPending(ip); } /* move to the next. */ IoSkipCurrentIrpStackLocation(ip); sl++; } while (ip->irp_currentstackloc <= (ip->irp_stackcnt + 1)); if (ip->irp_usriostat != NULL) *ip->irp_usriostat = ip->irp_iostat; if (ip->irp_usrevent != NULL) KeSetEvent(ip->irp_usrevent, prioboost, FALSE); /* Handle any associated IRPs. */ if (ip->irp_flags & IRP_ASSOCIATED_IRP) { uint32_t masterirpcnt; irp *masterirp; mdl *m; masterirp = ip->irp_assoc.irp_master; masterirpcnt = InterlockedDecrement(&masterirp->irp_assoc.irp_irpcnt); while ((m = ip->irp_mdl) != NULL) { ip->irp_mdl = m->mdl_next; IoFreeMdl(m); } IoFreeIrp(ip); if (masterirpcnt == 0) IoCompleteRequest(masterirp, IO_NO_INCREMENT); return; } /* With any luck, these conditions will never arise. */ if (ip->irp_flags & IRP_PAGING_IO) { if (ip->irp_mdl != NULL) IoFreeMdl(ip->irp_mdl); IoFreeIrp(ip); } } void ntoskrnl_intr(arg) void *arg; { kinterrupt *iobj; uint8_t irql; uint8_t claimed; list_entry *l; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); l = ntoskrnl_intlist.nle_flink; while (l != &ntoskrnl_intlist) { iobj = CONTAINING_RECORD(l, kinterrupt, ki_list); claimed = MSCALL2(iobj->ki_svcfunc, iobj, iobj->ki_svcctx); if (claimed == TRUE) break; l = l->nle_flink; } KeReleaseSpinLock(&ntoskrnl_intlock, irql); } uint8_t KeAcquireInterruptSpinLock(iobj) kinterrupt *iobj; { uint8_t irql; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); return (irql); } void KeReleaseInterruptSpinLock(kinterrupt *iobj, uint8_t irql) { KeReleaseSpinLock(&ntoskrnl_intlock, irql); } uint8_t KeSynchronizeExecution(iobj, syncfunc, syncctx) kinterrupt *iobj; void *syncfunc; void *syncctx; { uint8_t irql; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); MSCALL1(syncfunc, syncctx); KeReleaseSpinLock(&ntoskrnl_intlock, irql); return (TRUE); } /* * IoConnectInterrupt() is passed only the interrupt vector and * irql that a device wants to use, but no device-specific tag * of any kind. This conflicts rather badly with FreeBSD's * bus_setup_intr(), which needs the device_t for the device * requesting interrupt delivery. In order to bypass this * inconsistency, we implement a second level of interrupt * dispatching on top of bus_setup_intr(). All devices use * ntoskrnl_intr() as their ISR, and any device requesting * interrupts will be registered with ntoskrnl_intr()'s interrupt * dispatch list. When an interrupt arrives, we walk the list * and invoke all the registered ISRs. This effectively makes all * interrupts shared, but it's the only way to duplicate the * semantics of IoConnectInterrupt() and IoDisconnectInterrupt() properly. */ uint32_t IoConnectInterrupt(kinterrupt **iobj, void *svcfunc, void *svcctx, kspin_lock *lock, uint32_t vector, uint8_t irql, uint8_t syncirql, uint8_t imode, uint8_t shared, uint32_t affinity, uint8_t savefloat) { uint8_t curirql; *iobj = ExAllocatePoolWithTag(NonPagedPool, sizeof(kinterrupt), 0); if (*iobj == NULL) return (STATUS_INSUFFICIENT_RESOURCES); (*iobj)->ki_svcfunc = svcfunc; (*iobj)->ki_svcctx = svcctx; if (lock == NULL) { KeInitializeSpinLock(&(*iobj)->ki_lock_priv); (*iobj)->ki_lock = &(*iobj)->ki_lock_priv; } else (*iobj)->ki_lock = lock; KeAcquireSpinLock(&ntoskrnl_intlock, &curirql); InsertHeadList((&ntoskrnl_intlist), (&(*iobj)->ki_list)); KeReleaseSpinLock(&ntoskrnl_intlock, curirql); return (STATUS_SUCCESS); } void IoDisconnectInterrupt(iobj) kinterrupt *iobj; { uint8_t irql; if (iobj == NULL) return; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); RemoveEntryList((&iobj->ki_list)); KeReleaseSpinLock(&ntoskrnl_intlock, irql); ExFreePool(iobj); } device_object * IoAttachDeviceToDeviceStack(src, dst) device_object *src; device_object *dst; { device_object *attached; mtx_lock(&ntoskrnl_dispatchlock); attached = IoGetAttachedDevice(dst); attached->do_attacheddev = src; src->do_attacheddev = NULL; src->do_stacksize = attached->do_stacksize + 1; mtx_unlock(&ntoskrnl_dispatchlock); return (attached); } void IoDetachDevice(topdev) device_object *topdev; { device_object *tail; mtx_lock(&ntoskrnl_dispatchlock); /* First, break the chain. */ tail = topdev->do_attacheddev; if (tail == NULL) { mtx_unlock(&ntoskrnl_dispatchlock); return; } topdev->do_attacheddev = tail->do_attacheddev; topdev->do_refcnt--; /* Now reduce the stacksize count for the takm_il objects. */ tail = topdev->do_attacheddev; while (tail != NULL) { tail->do_stacksize--; tail = tail->do_attacheddev; } mtx_unlock(&ntoskrnl_dispatchlock); } /* * For the most part, an object is considered signalled if * dh_sigstate == TRUE. The exception is for mutant objects * (mutexes), where the logic works like this: * * - If the thread already owns the object and sigstate is * less than or equal to 0, then the object is considered * signalled (recursive acquisition). * - If dh_sigstate == 1, the object is also considered * signalled. */ static int ntoskrnl_is_signalled(obj, td) nt_dispatch_header *obj; struct thread *td; { kmutant *km; if (obj->dh_type == DISP_TYPE_MUTANT) { km = (kmutant *)obj; if ((obj->dh_sigstate <= 0 && km->km_ownerthread == td) || obj->dh_sigstate == 1) return (TRUE); return (FALSE); } if (obj->dh_sigstate > 0) return (TRUE); return (FALSE); } static void ntoskrnl_satisfy_wait(obj, td) nt_dispatch_header *obj; struct thread *td; { kmutant *km; switch (obj->dh_type) { case DISP_TYPE_MUTANT: km = (struct kmutant *)obj; obj->dh_sigstate--; /* * If sigstate reaches 0, the mutex is now * non-signalled (the new thread owns it). */ if (obj->dh_sigstate == 0) { km->km_ownerthread = td; if (km->km_abandoned == TRUE) km->km_abandoned = FALSE; } break; /* Synchronization objects get reset to unsignalled. */ case DISP_TYPE_SYNCHRONIZATION_EVENT: case DISP_TYPE_SYNCHRONIZATION_TIMER: obj->dh_sigstate = 0; break; case DISP_TYPE_SEMAPHORE: obj->dh_sigstate--; break; default: break; } } static void ntoskrnl_satisfy_multiple_waits(wb) wait_block *wb; { wait_block *cur; struct thread *td; cur = wb; td = wb->wb_kthread; do { ntoskrnl_satisfy_wait(wb->wb_object, td); cur->wb_awakened = TRUE; cur = cur->wb_next; } while (cur != wb); } /* Always called with dispatcher lock held. */ static void ntoskrnl_waittest(obj, increment) nt_dispatch_header *obj; uint32_t increment; { wait_block *w, *next; list_entry *e; struct thread *td; wb_ext *we; int satisfied; /* * Once an object has been signalled, we walk its list of * wait blocks. If a wait block can be awakened, then satisfy * waits as necessary and wake the thread. * * The rules work like this: * * If a wait block is marked as WAITTYPE_ANY, then * we can satisfy the wait conditions on the current * object and wake the thread right away. Satisfying * the wait also has the effect of breaking us out * of the search loop. * * If the object is marked as WAITTYLE_ALL, then the * wait block will be part of a circularly linked * list of wait blocks belonging to a waiting thread * that's sleeping in KeWaitForMultipleObjects(). In * order to wake the thread, all the objects in the * wait list must be in the signalled state. If they * are, we then satisfy all of them and wake the * thread. * */ e = obj->dh_waitlisthead.nle_flink; while (e != &obj->dh_waitlisthead && obj->dh_sigstate > 0) { w = CONTAINING_RECORD(e, wait_block, wb_waitlist); we = w->wb_ext; td = we->we_td; satisfied = FALSE; if (w->wb_waittype == WAITTYPE_ANY) { /* * Thread can be awakened if * any wait is satisfied. */ ntoskrnl_satisfy_wait(obj, td); satisfied = TRUE; w->wb_awakened = TRUE; } else { /* * Thread can only be woken up * if all waits are satisfied. * If the thread is waiting on multiple * objects, they should all be linked * through the wb_next pointers in the * wait blocks. */ satisfied = TRUE; next = w->wb_next; while (next != w) { if (ntoskrnl_is_signalled(obj, td) == FALSE) { satisfied = FALSE; break; } next = next->wb_next; } ntoskrnl_satisfy_multiple_waits(w); } if (satisfied == TRUE) cv_broadcastpri(&we->we_cv, (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ? w->wb_oldpri - (increment * 4) : PRI_MIN_KERN); e = e->nle_flink; } } /* * Return the number of 100 nanosecond intervals since * January 1, 1601. (?!?!) */ void ntoskrnl_time(tval) uint64_t *tval; { struct timespec ts; nanotime(&ts); *tval = (uint64_t)ts.tv_nsec / 100 + (uint64_t)ts.tv_sec * 10000000 + 11644473600 * 10000000; /* 100ns ticks from 1601 to 1970 */ } static void KeQuerySystemTime(current_time) uint64_t *current_time; { ntoskrnl_time(current_time); } static uint32_t KeTickCount(void) { struct timeval tv; getmicrouptime(&tv); return tvtohz(&tv); } /* * KeWaitForSingleObject() is a tricky beast, because it can be used * with several different object types: semaphores, timers, events, * mutexes and threads. Semaphores don't appear very often, but the * other object types are quite common. KeWaitForSingleObject() is * what's normally used to acquire a mutex, and it can be used to * wait for a thread termination. * * The Windows NDIS API is implemented in terms of Windows kernel * primitives, and some of the object manipulation is duplicated in * NDIS. For example, NDIS has timers and events, which are actually * Windows kevents and ktimers. Now, you're supposed to only use the * NDIS variants of these objects within the confines of the NDIS API, * but there are some naughty developers out there who will use * KeWaitForSingleObject() on NDIS timer and event objects, so we * have to support that as well. Conseqently, our NDIS timer and event * code has to be closely tied into our ntoskrnl timer and event code, * just as it is in Windows. * * KeWaitForSingleObject() may do different things for different kinds * of objects: * * - For events, we check if the event has been signalled. If the * event is already in the signalled state, we just return immediately, * otherwise we wait for it to be set to the signalled state by someone * else calling KeSetEvent(). Events can be either synchronization or * notification events. * * - For timers, if the timer has already fired and the timer is in * the signalled state, we just return, otherwise we wait on the * timer. Unlike an event, timers get signalled automatically when * they expire rather than someone having to trip them manually. * Timers initialized with KeInitializeTimer() are always notification * events: KeInitializeTimerEx() lets you initialize a timer as * either a notification or synchronization event. * * - For mutexes, we try to acquire the mutex and if we can't, we wait * on the mutex until it's available and then grab it. When a mutex is * released, it enters the signalled state, which wakes up one of the * threads waiting to acquire it. Mutexes are always synchronization * events. * * - For threads, the only thing we do is wait until the thread object * enters a signalled state, which occurs when the thread terminates. * Threads are always notification events. * * A notification event wakes up all threads waiting on an object. A * synchronization event wakes up just one. Also, a synchronization event * is auto-clearing, which means we automatically set the event back to * the non-signalled state once the wakeup is done. */ uint32_t KeWaitForSingleObject(void *arg, uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime) { wait_block w; struct thread *td = curthread; struct timeval tv; int error = 0; uint64_t curtime; wb_ext we; nt_dispatch_header *obj; obj = arg; if (obj == NULL) return (STATUS_INVALID_PARAMETER); mtx_lock(&ntoskrnl_dispatchlock); cv_init(&we.we_cv, "KeWFS"); we.we_td = td; /* * Check to see if this object is already signalled, * and just return without waiting if it is. */ if (ntoskrnl_is_signalled(obj, td) == TRUE) { /* Sanity check the signal state value. */ if (obj->dh_sigstate != INT32_MIN) { ntoskrnl_satisfy_wait(obj, curthread); mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_SUCCESS); } else { /* * There's a limit to how many times we can * recursively acquire a mutant. If we hit * the limit, something is very wrong. */ if (obj->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } } } bzero((char *)&w, sizeof(wait_block)); w.wb_object = obj; w.wb_ext = &we; w.wb_waittype = WAITTYPE_ANY; w.wb_next = &w; w.wb_waitkey = 0; w.wb_awakened = FALSE; w.wb_oldpri = td->td_priority; InsertTailList((&obj->dh_waitlisthead), (&w.wb_waitlist)); /* * The timeout value is specified in 100 nanosecond units * and can be a positive or negative number. If it's positive, * then the duetime is absolute, and we need to convert it * to an absolute offset relative to now in order to use it. * If it's negative, then the duetime is relative and we * just have to convert the units. */ if (duetime != NULL) { if (*duetime < 0) { tv.tv_sec = - (*duetime) / 10000000; tv.tv_usec = (- (*duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (*duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((*duetime) - curtime) / 10000000; tv.tv_usec = ((*duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } } if (duetime == NULL) cv_wait(&we.we_cv, &ntoskrnl_dispatchlock); else error = cv_timedwait(&we.we_cv, &ntoskrnl_dispatchlock, tvtohz(&tv)); RemoveEntryList(&w.wb_waitlist); cv_destroy(&we.we_cv); /* We timed out. Leave the object alone and return status. */ if (error == EWOULDBLOCK) { mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_TIMEOUT); } mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_SUCCESS); /* return (KeWaitForMultipleObjects(1, &obj, WAITTYPE_ALL, reason, mode, alertable, duetime, &w)); */ } static uint32_t KeWaitForMultipleObjects(uint32_t cnt, nt_dispatch_header *obj[], uint32_t wtype, uint32_t reason, uint32_t mode, uint8_t alertable, int64_t *duetime, wait_block *wb_array) { struct thread *td = curthread; wait_block *whead, *w; wait_block _wb_array[MAX_WAIT_OBJECTS]; nt_dispatch_header *cur; struct timeval tv; int i, wcnt = 0, error = 0; uint64_t curtime; struct timespec t1, t2; uint32_t status = STATUS_SUCCESS; wb_ext we; if (cnt > MAX_WAIT_OBJECTS) return (STATUS_INVALID_PARAMETER); if (cnt > THREAD_WAIT_OBJECTS && wb_array == NULL) return (STATUS_INVALID_PARAMETER); mtx_lock(&ntoskrnl_dispatchlock); cv_init(&we.we_cv, "KeWFM"); we.we_td = td; if (wb_array == NULL) whead = _wb_array; else whead = wb_array; bzero((char *)whead, sizeof(wait_block) * cnt); /* First pass: see if we can satisfy any waits immediately. */ wcnt = 0; w = whead; for (i = 0; i < cnt; i++) { InsertTailList((&obj[i]->dh_waitlisthead), (&w->wb_waitlist)); w->wb_ext = &we; w->wb_object = obj[i]; w->wb_waittype = wtype; w->wb_waitkey = i; w->wb_awakened = FALSE; w->wb_oldpri = td->td_priority; w->wb_next = w + 1; w++; wcnt++; if (ntoskrnl_is_signalled(obj[i], td)) { /* * There's a limit to how many times * we can recursively acquire a mutant. * If we hit the limit, something * is very wrong. */ if (obj[i]->dh_sigstate == INT32_MIN && obj[i]->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } /* * If this is a WAITTYPE_ANY wait, then * satisfy the waited object and exit * right now. */ if (wtype == WAITTYPE_ANY) { ntoskrnl_satisfy_wait(obj[i], td); status = STATUS_WAIT_0 + i; goto wait_done; } else { w--; wcnt--; w->wb_object = NULL; RemoveEntryList(&w->wb_waitlist); } } } /* * If this is a WAITTYPE_ALL wait and all objects are * already signalled, satisfy the waits and exit now. */ if (wtype == WAITTYPE_ALL && wcnt == 0) { for (i = 0; i < cnt; i++) ntoskrnl_satisfy_wait(obj[i], td); status = STATUS_SUCCESS; goto wait_done; } /* * Create a circular waitblock list. The waitcount * must always be non-zero when we get here. */ (w - 1)->wb_next = whead; /* Wait on any objects that aren't yet signalled. */ /* Calculate timeout, if any. */ if (duetime != NULL) { if (*duetime < 0) { tv.tv_sec = - (*duetime) / 10000000; tv.tv_usec = (- (*duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (*duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((*duetime) - curtime) / 10000000; tv.tv_usec = ((*duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } } while (wcnt) { nanotime(&t1); if (duetime == NULL) cv_wait(&we.we_cv, &ntoskrnl_dispatchlock); else error = cv_timedwait(&we.we_cv, &ntoskrnl_dispatchlock, tvtohz(&tv)); /* Wait with timeout expired. */ if (error) { status = STATUS_TIMEOUT; goto wait_done; } nanotime(&t2); /* See what's been signalled. */ w = whead; do { cur = w->wb_object; if (ntoskrnl_is_signalled(cur, td) == TRUE || w->wb_awakened == TRUE) { /* Sanity check the signal state value. */ if (cur->dh_sigstate == INT32_MIN && cur->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } wcnt--; if (wtype == WAITTYPE_ANY) { status = w->wb_waitkey & STATUS_WAIT_0; goto wait_done; } } w = w->wb_next; } while (w != whead); /* * If all objects have been signalled, or if this * is a WAITTYPE_ANY wait and we were woke up by * someone, we can bail. */ if (wcnt == 0) { status = STATUS_SUCCESS; goto wait_done; } /* * If this is WAITTYPE_ALL wait, and there's still * objects that haven't been signalled, deduct the * time that's elapsed so far from the timeout and * wait again (or continue waiting indefinitely if * there's no timeout). */ if (duetime != NULL) { tv.tv_sec -= (t2.tv_sec - t1.tv_sec); tv.tv_usec -= (t2.tv_nsec - t1.tv_nsec) / 1000; } } wait_done: cv_destroy(&we.we_cv); for (i = 0; i < cnt; i++) { if (whead[i].wb_object != NULL) RemoveEntryList(&whead[i].wb_waitlist); } mtx_unlock(&ntoskrnl_dispatchlock); return (status); } static void WRITE_REGISTER_USHORT(uint16_t *reg, uint16_t val) { bus_space_write_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); } static uint16_t READ_REGISTER_USHORT(reg) uint16_t *reg; { return (bus_space_read_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static void WRITE_REGISTER_ULONG(reg, val) uint32_t *reg; uint32_t val; { bus_space_write_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); } static uint32_t READ_REGISTER_ULONG(reg) uint32_t *reg; { return (bus_space_read_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static uint8_t READ_REGISTER_UCHAR(uint8_t *reg) { return (bus_space_read_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static void WRITE_REGISTER_UCHAR(uint8_t *reg, uint8_t val) { bus_space_write_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); } static int64_t _allmul(a, b) int64_t a; int64_t b; { return (a * b); } static int64_t _alldiv(a, b) int64_t a; int64_t b; { return (a / b); } static int64_t _allrem(a, b) int64_t a; int64_t b; { return (a % b); } static uint64_t _aullmul(a, b) uint64_t a; uint64_t b; { return (a * b); } static uint64_t _aulldiv(a, b) uint64_t a; uint64_t b; { return (a / b); } static uint64_t _aullrem(a, b) uint64_t a; uint64_t b; { return (a % b); } static int64_t _allshl(int64_t a, uint8_t b) { return (a << b); } static uint64_t _aullshl(uint64_t a, uint8_t b) { return (a << b); } static int64_t _allshr(int64_t a, uint8_t b) { return (a >> b); } static uint64_t _aullshr(uint64_t a, uint8_t b) { return (a >> b); } static slist_entry * ntoskrnl_pushsl(head, entry) slist_header *head; slist_entry *entry; { slist_entry *oldhead; oldhead = head->slh_list.slh_next; entry->sl_next = head->slh_list.slh_next; head->slh_list.slh_next = entry; head->slh_list.slh_depth++; head->slh_list.slh_seq++; return (oldhead); } +static void +InitializeSListHead(head) + slist_header *head; +{ + memset(head, 0, sizeof(*head)); +} + static slist_entry * ntoskrnl_popsl(head) slist_header *head; { slist_entry *first; first = head->slh_list.slh_next; if (first != NULL) { head->slh_list.slh_next = first->sl_next; head->slh_list.slh_depth--; head->slh_list.slh_seq++; } return (first); } /* * We need this to make lookaside lists work for amd64. * We pass a pointer to ExAllocatePoolWithTag() the lookaside * list structure. For amd64 to work right, this has to be a * pointer to the wrapped version of the routine, not the * original. Letting the Windows driver invoke the original * function directly will result in a convention calling * mismatch and a pretty crash. On x86, this effectively * becomes a no-op since ipt_func and ipt_wrap are the same. */ static funcptr ntoskrnl_findwrap(func) funcptr func; { image_patch_table *patch; patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { if ((funcptr)patch->ipt_func == func) return ((funcptr)patch->ipt_wrap); patch++; } return (NULL); } static void ExInitializePagedLookasideList(paged_lookaside_list *lookaside, lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc, uint32_t flags, size_t size, uint32_t tag, uint16_t depth) { bzero((char *)lookaside, sizeof(paged_lookaside_list)); if (size < sizeof(slist_entry)) lookaside->nll_l.gl_size = sizeof(slist_entry); else lookaside->nll_l.gl_size = size; lookaside->nll_l.gl_tag = tag; if (allocfunc == NULL) lookaside->nll_l.gl_allocfunc = ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag); else lookaside->nll_l.gl_allocfunc = allocfunc; if (freefunc == NULL) lookaside->nll_l.gl_freefunc = ntoskrnl_findwrap((funcptr)ExFreePool); else lookaside->nll_l.gl_freefunc = freefunc; #ifdef __i386__ KeInitializeSpinLock(&lookaside->nll_obsoletelock); #endif lookaside->nll_l.gl_type = NonPagedPool; lookaside->nll_l.gl_depth = depth; lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH; } static void ExDeletePagedLookasideList(lookaside) paged_lookaside_list *lookaside; { void *buf; void (*freefunc)(void *); freefunc = lookaside->nll_l.gl_freefunc; while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL) MSCALL1(freefunc, buf); } static void ExInitializeNPagedLookasideList(npaged_lookaside_list *lookaside, lookaside_alloc_func *allocfunc, lookaside_free_func *freefunc, uint32_t flags, size_t size, uint32_t tag, uint16_t depth) { bzero((char *)lookaside, sizeof(npaged_lookaside_list)); if (size < sizeof(slist_entry)) lookaside->nll_l.gl_size = sizeof(slist_entry); else lookaside->nll_l.gl_size = size; lookaside->nll_l.gl_tag = tag; if (allocfunc == NULL) lookaside->nll_l.gl_allocfunc = ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag); else lookaside->nll_l.gl_allocfunc = allocfunc; if (freefunc == NULL) lookaside->nll_l.gl_freefunc = ntoskrnl_findwrap((funcptr)ExFreePool); else lookaside->nll_l.gl_freefunc = freefunc; #ifdef __i386__ KeInitializeSpinLock(&lookaside->nll_obsoletelock); #endif lookaside->nll_l.gl_type = NonPagedPool; lookaside->nll_l.gl_depth = depth; lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH; } static void ExDeleteNPagedLookasideList(lookaside) npaged_lookaside_list *lookaside; { void *buf; void (*freefunc)(void *); freefunc = lookaside->nll_l.gl_freefunc; while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL) MSCALL1(freefunc, buf); } slist_entry * InterlockedPushEntrySList(head, entry) slist_header *head; slist_entry *entry; { slist_entry *oldhead; mtx_lock_spin(&ntoskrnl_interlock); oldhead = ntoskrnl_pushsl(head, entry); mtx_unlock_spin(&ntoskrnl_interlock); return (oldhead); } slist_entry * InterlockedPopEntrySList(head) slist_header *head; { slist_entry *first; mtx_lock_spin(&ntoskrnl_interlock); first = ntoskrnl_popsl(head); mtx_unlock_spin(&ntoskrnl_interlock); return (first); } static slist_entry * ExInterlockedPushEntrySList(head, entry, lock) slist_header *head; slist_entry *entry; kspin_lock *lock; { return (InterlockedPushEntrySList(head, entry)); } static slist_entry * ExInterlockedPopEntrySList(head, lock) slist_header *head; kspin_lock *lock; { return (InterlockedPopEntrySList(head)); } uint16_t ExQueryDepthSList(head) slist_header *head; { uint16_t depth; mtx_lock_spin(&ntoskrnl_interlock); depth = head->slh_list.slh_depth; mtx_unlock_spin(&ntoskrnl_interlock); return (depth); } void KeInitializeSpinLock(lock) kspin_lock *lock; { *lock = 0; } #ifdef __i386__ void KefAcquireSpinLockAtDpcLevel(lock) kspin_lock *lock; { #ifdef NTOSKRNL_DEBUG_SPINLOCKS int i = 0; #endif while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) { /* sit and spin */; #ifdef NTOSKRNL_DEBUG_SPINLOCKS i++; if (i > 200000000) panic("DEADLOCK!"); #endif } } void KefReleaseSpinLockFromDpcLevel(lock) kspin_lock *lock; { atomic_store_rel_int((volatile u_int *)lock, 0); } uint8_t KeAcquireSpinLockRaiseToDpc(kspin_lock *lock) { uint8_t oldirql; if (KeGetCurrentIrql() > DISPATCH_LEVEL) panic("IRQL_NOT_LESS_THAN_OR_EQUAL"); KeRaiseIrql(DISPATCH_LEVEL, &oldirql); KeAcquireSpinLockAtDpcLevel(lock); return (oldirql); } #else void KeAcquireSpinLockAtDpcLevel(kspin_lock *lock) { while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) /* sit and spin */; } void KeReleaseSpinLockFromDpcLevel(kspin_lock *lock) { atomic_store_rel_int((volatile u_int *)lock, 0); } #endif /* __i386__ */ uintptr_t InterlockedExchange(dst, val) volatile uint32_t *dst; uintptr_t val; { uintptr_t r; mtx_lock_spin(&ntoskrnl_interlock); r = *dst; *dst = val; mtx_unlock_spin(&ntoskrnl_interlock); return (r); } static uint32_t InterlockedIncrement(addend) volatile uint32_t *addend; { atomic_add_long((volatile u_long *)addend, 1); return (*addend); } static uint32_t InterlockedDecrement(addend) volatile uint32_t *addend; { atomic_subtract_long((volatile u_long *)addend, 1); return (*addend); } static void ExInterlockedAddLargeStatistic(addend, inc) uint64_t *addend; uint32_t inc; { mtx_lock_spin(&ntoskrnl_interlock); *addend += inc; mtx_unlock_spin(&ntoskrnl_interlock); }; mdl * IoAllocateMdl(void *vaddr, uint32_t len, uint8_t secondarybuf, uint8_t chargequota, irp *iopkt) { mdl *m; int zone = 0; if (MmSizeOfMdl(vaddr, len) > MDL_ZONE_SIZE) m = ExAllocatePoolWithTag(NonPagedPool, MmSizeOfMdl(vaddr, len), 0); else { m = uma_zalloc(mdl_zone, M_NOWAIT | M_ZERO); zone++; } if (m == NULL) return (NULL); MmInitializeMdl(m, vaddr, len); /* * MmInitializMdl() clears the flags field, so we * have to set this here. If the MDL came from the * MDL UMA zone, tag it so we can release it to * the right place later. */ if (zone) m->mdl_flags = MDL_ZONE_ALLOCED; if (iopkt != NULL) { if (secondarybuf == TRUE) { mdl *last; last = iopkt->irp_mdl; while (last->mdl_next != NULL) last = last->mdl_next; last->mdl_next = m; } else { if (iopkt->irp_mdl != NULL) panic("leaking an MDL in IoAllocateMdl()"); iopkt->irp_mdl = m; } } return (m); } void IoFreeMdl(m) mdl *m; { if (m == NULL) return; if (m->mdl_flags & MDL_ZONE_ALLOCED) uma_zfree(mdl_zone, m); else ExFreePool(m); } static void * MmAllocateContiguousMemory(size, highest) uint32_t size; uint64_t highest; { void *addr; size_t pagelength = roundup(size, PAGE_SIZE); addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0); return (addr); } static void * MmAllocateContiguousMemorySpecifyCache(size, lowest, highest, boundary, cachetype) uint32_t size; uint64_t lowest; uint64_t highest; uint64_t boundary; enum nt_caching_type cachetype; { vm_memattr_t memattr; void *ret; switch (cachetype) { case MmNonCached: memattr = VM_MEMATTR_UNCACHEABLE; break; case MmWriteCombined: memattr = VM_MEMATTR_WRITE_COMBINING; break; case MmNonCachedUnordered: memattr = VM_MEMATTR_UNCACHEABLE; break; case MmCached: case MmHardwareCoherentCached: case MmUSWCCached: default: memattr = VM_MEMATTR_DEFAULT; break; } ret = (void *)kmem_alloc_contig(kernel_map, size, M_ZERO | M_NOWAIT, lowest, highest, PAGE_SIZE, boundary, memattr); if (ret != NULL) malloc_type_allocated(M_DEVBUF, round_page(size)); return (ret); } static void MmFreeContiguousMemory(base) void *base; { ExFreePool(base); } static void MmFreeContiguousMemorySpecifyCache(base, size, cachetype) void *base; uint32_t size; enum nt_caching_type cachetype; { contigfree(base, size, M_DEVBUF); } static uint32_t MmSizeOfMdl(vaddr, len) void *vaddr; size_t len; { uint32_t l; l = sizeof(struct mdl) + (sizeof(vm_offset_t *) * SPAN_PAGES(vaddr, len)); return (l); } /* * The Microsoft documentation says this routine fills in the * page array of an MDL with the _physical_ page addresses that * comprise the buffer, but we don't really want to do that here. * Instead, we just fill in the page array with the kernel virtual * addresses of the buffers. */ void MmBuildMdlForNonPagedPool(m) mdl *m; { vm_offset_t *mdl_pages; int pagecnt, i; pagecnt = SPAN_PAGES(m->mdl_byteoffset, m->mdl_bytecount); if (pagecnt > (m->mdl_size - sizeof(mdl)) / sizeof(vm_offset_t *)) panic("not enough pages in MDL to describe buffer"); mdl_pages = MmGetMdlPfnArray(m); for (i = 0; i < pagecnt; i++) *mdl_pages = (vm_offset_t)m->mdl_startva + (i * PAGE_SIZE); m->mdl_flags |= MDL_SOURCE_IS_NONPAGED_POOL; m->mdl_mappedsystemva = MmGetMdlVirtualAddress(m); } static void * MmMapLockedPages(mdl *buf, uint8_t accessmode) { buf->mdl_flags |= MDL_MAPPED_TO_SYSTEM_VA; return (MmGetMdlVirtualAddress(buf)); } static void * MmMapLockedPagesSpecifyCache(mdl *buf, uint8_t accessmode, uint32_t cachetype, void *vaddr, uint32_t bugcheck, uint32_t prio) { return (MmMapLockedPages(buf, accessmode)); } static void MmUnmapLockedPages(vaddr, buf) void *vaddr; mdl *buf; { buf->mdl_flags &= ~MDL_MAPPED_TO_SYSTEM_VA; } /* * This function has a problem in that it will break if you * compile this module without PAE and try to use it on a PAE * kernel. Unfortunately, there's no way around this at the * moment. It's slightly less broken that using pmap_kextract(). * You'd think the virtual memory subsystem would help us out * here, but it doesn't. */ static uint64_t MmGetPhysicalAddress(void *base) { return (pmap_extract(kernel_map->pmap, (vm_offset_t)base)); } uint8_t MmIsAddressValid(vaddr) void *vaddr; { if (pmap_extract(kernel_map->pmap, (vm_offset_t)vaddr)) return (TRUE); return (FALSE); } void * MmMapIoSpace(paddr, len, cachetype) uint64_t paddr; uint32_t len; uint32_t cachetype; { devclass_t nexus_class; device_t *nexus_devs, devp; int nexus_count = 0; device_t matching_dev = NULL; struct resource *res; int i; vm_offset_t v; /* There will always be at least one nexus. */ nexus_class = devclass_find("nexus"); devclass_get_devices(nexus_class, &nexus_devs, &nexus_count); for (i = 0; i < nexus_count; i++) { devp = nexus_devs[i]; matching_dev = ntoskrnl_finddev(devp, paddr, &res); if (matching_dev) break; } free(nexus_devs, M_TEMP); if (matching_dev == NULL) return (NULL); v = (vm_offset_t)rman_get_virtual(res); if (paddr > rman_get_start(res)) v += paddr - rman_get_start(res); return ((void *)v); } void MmUnmapIoSpace(vaddr, len) void *vaddr; size_t len; { } static device_t ntoskrnl_finddev(dev, paddr, res) device_t dev; uint64_t paddr; struct resource **res; { device_t *children = NULL; device_t matching_dev; int childcnt; struct resource *r; struct resource_list *rl; struct resource_list_entry *rle; uint32_t flags; int i; /* We only want devices that have been successfully probed. */ if (device_is_alive(dev) == FALSE) return (NULL); rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev); if (rl != NULL) { STAILQ_FOREACH(rle, rl, link) { r = rle->res; if (r == NULL) continue; flags = rman_get_flags(r); if (rle->type == SYS_RES_MEMORY && paddr >= rman_get_start(r) && paddr <= rman_get_end(r)) { if (!(flags & RF_ACTIVE)) bus_activate_resource(dev, SYS_RES_MEMORY, 0, r); *res = r; return (dev); } } } /* * If this device has children, do another * level of recursion to inspect them. */ device_get_children(dev, &children, &childcnt); for (i = 0; i < childcnt; i++) { matching_dev = ntoskrnl_finddev(children[i], paddr, res); if (matching_dev != NULL) { free(children, M_TEMP); return (matching_dev); } } /* Won't somebody please think of the children! */ if (children != NULL) free(children, M_TEMP); return (NULL); } /* * Workitems are unlike DPCs, in that they run in a user-mode thread * context rather than at DISPATCH_LEVEL in kernel context. In our * case we run them in kernel context anyway. */ static void ntoskrnl_workitem_thread(arg) void *arg; { kdpc_queue *kq; list_entry *l; io_workitem *iw; uint8_t irql; kq = arg; InitializeListHead(&kq->kq_disp); kq->kq_td = curthread; kq->kq_exit = 0; KeInitializeSpinLock(&kq->kq_lock); KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); KeAcquireSpinLock(&kq->kq_lock, &irql); if (kq->kq_exit) { kq->kq_exit = 0; KeReleaseSpinLock(&kq->kq_lock, irql); break; } while (!IsListEmpty(&kq->kq_disp)) { l = RemoveHeadList(&kq->kq_disp); iw = CONTAINING_RECORD(l, io_workitem, iw_listentry); InitializeListHead((&iw->iw_listentry)); if (iw->iw_func == NULL) continue; KeReleaseSpinLock(&kq->kq_lock, irql); MSCALL2(iw->iw_func, iw->iw_dobj, iw->iw_ctx); KeAcquireSpinLock(&kq->kq_lock, &irql); } KeReleaseSpinLock(&kq->kq_lock, irql); } kproc_exit(0); return; /* notreached */ } +static ndis_status +RtlCharToInteger(src, base, val) + const char *src; + uint32_t base; + uint32_t *val; +{ + int negative = 0; + uint32_t res; + + if (!src || !val) + return (STATUS_ACCESS_VIOLATION); + while (*src != '\0' && *src <= ' ') + src++; + if (*src == '+') + src++; + else if (*src == '-') { + src++; + negative = 1; + } + if (base == 0) { + base = 10; + if (*src == '0') { + src++; + if (*src == 'b') { + base = 2; + src++; + } else if (*src == 'o') { + base = 8; + src++; + } else if (*src == 'x') { + base = 16; + src++; + } + } + } else if (!(base == 2 || base == 8 || base == 10 || base == 16)) + return (STATUS_INVALID_PARAMETER); + + for (res = 0; *src; src++) { + int v; + if (isdigit(*src)) + v = *src - '0'; + else if (isxdigit(*src)) + v = tolower(*src) - 'a' + 10; + else + v = base; + if (v >= base) + return (STATUS_INVALID_PARAMETER); + res = res * base + v; + } + *val = negative ? -res : res; + return (STATUS_SUCCESS); +} + static void ntoskrnl_destroy_workitem_threads(void) { kdpc_queue *kq; int i; for (i = 0; i < WORKITEM_THREADS; i++) { kq = wq_queues + i; kq->kq_exit = 1; KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); while (kq->kq_exit) tsleep(kq->kq_td->td_proc, PWAIT, "waitiw", hz/10); } } io_workitem * IoAllocateWorkItem(dobj) device_object *dobj; { io_workitem *iw; iw = uma_zalloc(iw_zone, M_NOWAIT); if (iw == NULL) return (NULL); InitializeListHead(&iw->iw_listentry); iw->iw_dobj = dobj; mtx_lock(&ntoskrnl_dispatchlock); iw->iw_idx = wq_idx; WORKIDX_INC(wq_idx); mtx_unlock(&ntoskrnl_dispatchlock); return (iw); } void IoFreeWorkItem(iw) io_workitem *iw; { uma_zfree(iw_zone, iw); } void IoQueueWorkItem(iw, iw_func, qtype, ctx) io_workitem *iw; io_workitem_func iw_func; uint32_t qtype; void *ctx; { kdpc_queue *kq; list_entry *l; io_workitem *cur; uint8_t irql; kq = wq_queues + iw->iw_idx; KeAcquireSpinLock(&kq->kq_lock, &irql); /* * Traverse the list and make sure this workitem hasn't * already been inserted. Queuing the same workitem * twice will hose the list but good. */ l = kq->kq_disp.nle_flink; while (l != &kq->kq_disp) { cur = CONTAINING_RECORD(l, io_workitem, iw_listentry); if (cur == iw) { /* Already queued -- do nothing. */ KeReleaseSpinLock(&kq->kq_lock, irql); return; } l = l->nle_flink; } iw->iw_func = iw_func; iw->iw_ctx = ctx; InsertTailList((&kq->kq_disp), (&iw->iw_listentry)); KeReleaseSpinLock(&kq->kq_lock, irql); KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); } static void ntoskrnl_workitem(dobj, arg) device_object *dobj; void *arg; { io_workitem *iw; work_queue_item *w; work_item_func f; iw = arg; w = (work_queue_item *)dobj; f = (work_item_func)w->wqi_func; uma_zfree(iw_zone, iw); MSCALL2(f, w, w->wqi_ctx); } /* * The ExQueueWorkItem() API is deprecated in Windows XP. Microsoft * warns that it's unsafe and to use IoQueueWorkItem() instead. The * problem with ExQueueWorkItem() is that it can't guard against * the condition where a driver submits a job to the work queue and * is then unloaded before the job is able to run. IoQueueWorkItem() * acquires a reference to the device's device_object via the * object manager and retains it until after the job has completed, * which prevents the driver from being unloaded before the job * runs. (We don't currently support this behavior, though hopefully * that will change once the object manager API is fleshed out a bit.) * * Having said all that, the ExQueueWorkItem() API remains, because * there are still other parts of Windows that use it, including * NDIS itself: NdisScheduleWorkItem() calls ExQueueWorkItem(). * We fake up the ExQueueWorkItem() API on top of our implementation * of IoQueueWorkItem(). Workitem thread #3 is reserved exclusively * for ExQueueWorkItem() jobs, and we pass a pointer to the work * queue item (provided by the caller) in to IoAllocateWorkItem() * instead of the device_object. We need to save this pointer so * we can apply a sanity check: as with the DPC queue and other * workitem queues, we can't allow the same work queue item to * be queued twice. If it's already pending, we silently return */ void ExQueueWorkItem(w, qtype) work_queue_item *w; uint32_t qtype; { io_workitem *iw; io_workitem_func iwf; kdpc_queue *kq; list_entry *l; io_workitem *cur; uint8_t irql; /* * We need to do a special sanity test to make sure * the ExQueueWorkItem() API isn't used to queue * the same workitem twice. Rather than checking the * io_workitem pointer itself, we test the attached * device object, which is really a pointer to the * legacy work queue item structure. */ kq = wq_queues + WORKITEM_LEGACY_THREAD; KeAcquireSpinLock(&kq->kq_lock, &irql); l = kq->kq_disp.nle_flink; while (l != &kq->kq_disp) { cur = CONTAINING_RECORD(l, io_workitem, iw_listentry); if (cur->iw_dobj == (device_object *)w) { /* Already queued -- do nothing. */ KeReleaseSpinLock(&kq->kq_lock, irql); return; } l = l->nle_flink; } KeReleaseSpinLock(&kq->kq_lock, irql); iw = IoAllocateWorkItem((device_object *)w); if (iw == NULL) return; iw->iw_idx = WORKITEM_LEGACY_THREAD; iwf = (io_workitem_func)ntoskrnl_findwrap((funcptr)ntoskrnl_workitem); IoQueueWorkItem(iw, iwf, qtype, iw); } static void RtlZeroMemory(dst, len) void *dst; size_t len; { bzero(dst, len); } static void +RtlSecureZeroMemory(dst, len) + void *dst; + size_t len; +{ + memset(dst, 0, len); +} + +static void +RtlFillMemory(dst, len, c) + void *dst; + size_t len; + uint8_t c; +{ + memset(dst, c, len); +} + +static void +RtlMoveMemory(dst, src, len) + void *dst; + const void *src; + size_t len; +{ + memmove(dst, src, len); +} + +static void RtlCopyMemory(dst, src, len) void *dst; const void *src; size_t len; { bcopy(src, dst, len); } static size_t RtlCompareMemory(s1, s2, len) const void *s1; const void *s2; size_t len; { - size_t i, total = 0; + size_t i; uint8_t *m1, *m2; m1 = __DECONST(char *, s1); m2 = __DECONST(char *, s2); - for (i = 0; i < len; i++) { - if (m1[i] == m2[i]) - total++; - } - return (total); + for (i = 0; i < len && m1[i] == m2[i]; i++); + return (i); } void RtlInitAnsiString(dst, src) ansi_string *dst; char *src; { ansi_string *a; a = dst; if (a == NULL) return; if (src == NULL) { a->as_len = a->as_maxlen = 0; a->as_buf = NULL; } else { a->as_buf = src; a->as_len = a->as_maxlen = strlen(src); } } void RtlInitUnicodeString(dst, src) unicode_string *dst; uint16_t *src; { unicode_string *u; int i; u = dst; if (u == NULL) return; if (src == NULL) { u->us_len = u->us_maxlen = 0; u->us_buf = NULL; } else { i = 0; while(src[i] != 0) i++; u->us_buf = src; u->us_len = u->us_maxlen = i * 2; } } ndis_status RtlUnicodeStringToInteger(ustr, base, val) unicode_string *ustr; uint32_t base; uint32_t *val; { uint16_t *uchr; int len, neg = 0; char abuf[64]; char *astr; uchr = ustr->us_buf; len = ustr->us_len; bzero(abuf, sizeof(abuf)); if ((char)((*uchr) & 0xFF) == '-') { neg = 1; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == '+') { neg = 0; uchr++; len -= 2; } if (base == 0) { if ((char)((*uchr) & 0xFF) == 'b') { base = 2; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == 'o') { base = 8; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == 'x') { base = 16; uchr++; len -= 2; } else base = 10; } astr = abuf; if (neg) { strcpy(astr, "-"); astr++; } ntoskrnl_unicode_to_ascii(uchr, astr, len); *val = strtoul(abuf, NULL, base); return (STATUS_SUCCESS); } void RtlFreeUnicodeString(ustr) unicode_string *ustr; { if (ustr->us_buf == NULL) return; ExFreePool(ustr->us_buf); ustr->us_buf = NULL; } void RtlFreeAnsiString(astr) ansi_string *astr; { if (astr->as_buf == NULL) return; ExFreePool(astr->as_buf); astr->as_buf = NULL; } static int atoi(str) const char *str; { return (int)strtol(str, (char **)NULL, 10); } static long atol(str) const char *str; { return strtol(str, (char **)NULL, 10); } static int rand(void) { struct timeval tv; microtime(&tv); srandom(tv.tv_usec); return ((int)random()); } static void srand(seed) unsigned int seed; { srandom(seed); } static uint8_t IoIsWdmVersionAvailable(uint8_t major, uint8_t minor) { if (major == WDM_MAJOR && minor == WDM_MINOR_WINXP) return (TRUE); return (FALSE); } static ndis_status IoGetDeviceObjectPointer(name, reqaccess, fileobj, devobj) unicode_string *name; uint32_t reqaccess; void *fileobj; device_object *devobj; { return (STATUS_SUCCESS); } static ndis_status IoGetDeviceProperty(devobj, regprop, buflen, prop, reslen) device_object *devobj; uint32_t regprop; uint32_t buflen; void *prop; uint32_t *reslen; { driver_object *drv; uint16_t **name; drv = devobj->do_drvobj; switch (regprop) { case DEVPROP_DRIVER_KEYNAME: name = prop; *name = drv->dro_drivername.us_buf; *reslen = drv->dro_drivername.us_len; break; default: return (STATUS_INVALID_PARAMETER_2); break; } return (STATUS_SUCCESS); } static void KeInitializeMutex(kmutex, level) kmutant *kmutex; uint32_t level; { InitializeListHead((&kmutex->km_header.dh_waitlisthead)); kmutex->km_abandoned = FALSE; kmutex->km_apcdisable = 1; kmutex->km_header.dh_sigstate = 1; kmutex->km_header.dh_type = DISP_TYPE_MUTANT; kmutex->km_header.dh_size = sizeof(kmutant) / sizeof(uint32_t); kmutex->km_ownerthread = NULL; } static uint32_t KeReleaseMutex(kmutant *kmutex, uint8_t kwait) { uint32_t prevstate; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kmutex->km_header.dh_sigstate; if (kmutex->km_ownerthread != curthread) { mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_MUTANT_NOT_OWNED); } kmutex->km_header.dh_sigstate++; kmutex->km_abandoned = FALSE; if (kmutex->km_header.dh_sigstate == 1) { kmutex->km_ownerthread = NULL; ntoskrnl_waittest(&kmutex->km_header, IO_NO_INCREMENT); } mtx_unlock(&ntoskrnl_dispatchlock); return (prevstate); } static uint32_t KeReadStateMutex(kmutex) kmutant *kmutex; { return (kmutex->km_header.dh_sigstate); } void KeInitializeEvent(nt_kevent *kevent, uint32_t type, uint8_t state) { InitializeListHead((&kevent->k_header.dh_waitlisthead)); kevent->k_header.dh_sigstate = state; if (type == EVENT_TYPE_NOTIFY) kevent->k_header.dh_type = DISP_TYPE_NOTIFICATION_EVENT; else kevent->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_EVENT; kevent->k_header.dh_size = sizeof(nt_kevent) / sizeof(uint32_t); } uint32_t KeResetEvent(kevent) nt_kevent *kevent; { uint32_t prevstate; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kevent->k_header.dh_sigstate; kevent->k_header.dh_sigstate = FALSE; mtx_unlock(&ntoskrnl_dispatchlock); return (prevstate); } uint32_t KeSetEvent(nt_kevent *kevent, uint32_t increment, uint8_t kwait) { uint32_t prevstate; wait_block *w; nt_dispatch_header *dh; struct thread *td; wb_ext *we; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kevent->k_header.dh_sigstate; dh = &kevent->k_header; if (IsListEmpty(&dh->dh_waitlisthead)) /* * If there's nobody in the waitlist, just set * the state to signalled. */ dh->dh_sigstate = 1; else { /* * Get the first waiter. If this is a synchronization * event, just wake up that one thread (don't bother * setting the state to signalled since we're supposed * to automatically clear synchronization events anyway). * * If it's a notification event, or the the first * waiter is doing a WAITTYPE_ALL wait, go through * the full wait satisfaction process. */ w = CONTAINING_RECORD(dh->dh_waitlisthead.nle_flink, wait_block, wb_waitlist); we = w->wb_ext; td = we->we_td; if (kevent->k_header.dh_type == DISP_TYPE_NOTIFICATION_EVENT || w->wb_waittype == WAITTYPE_ALL) { if (prevstate == 0) { dh->dh_sigstate = 1; ntoskrnl_waittest(dh, increment); } } else { w->wb_awakened |= TRUE; cv_broadcastpri(&we->we_cv, (w->wb_oldpri - (increment * 4)) > PRI_MIN_KERN ? w->wb_oldpri - (increment * 4) : PRI_MIN_KERN); } } mtx_unlock(&ntoskrnl_dispatchlock); return (prevstate); } void KeClearEvent(kevent) nt_kevent *kevent; { kevent->k_header.dh_sigstate = FALSE; } uint32_t KeReadStateEvent(kevent) nt_kevent *kevent; { return (kevent->k_header.dh_sigstate); } /* * The object manager in Windows is responsible for managing * references and access to various types of objects, including * device_objects, events, threads, timers and so on. However, * there's a difference in the way objects are handled in user * mode versus kernel mode. * * In user mode (i.e. Win32 applications), all objects are * managed by the object manager. For example, when you create * a timer or event object, you actually end up with an * object_header (for the object manager's bookkeeping * purposes) and an object body (which contains the actual object * structure, e.g. ktimer, kevent, etc...). This allows Windows * to manage resource quotas and to enforce access restrictions * on basically every kind of system object handled by the kernel. * * However, in kernel mode, you only end up using the object * manager some of the time. For example, in a driver, you create * a timer object by simply allocating the memory for a ktimer * structure and initializing it with KeInitializeTimer(). Hence, * the timer has no object_header and no reference counting or * security/resource checks are done on it. The assumption in * this case is that if you're running in kernel mode, you know * what you're doing, and you're already at an elevated privilege * anyway. * * There are some exceptions to this. The two most important ones * for our purposes are device_objects and threads. We need to use * the object manager to do reference counting on device_objects, * and for threads, you can only get a pointer to a thread's * dispatch header by using ObReferenceObjectByHandle() on the * handle returned by PsCreateSystemThread(). */ static ndis_status ObReferenceObjectByHandle(ndis_handle handle, uint32_t reqaccess, void *otype, uint8_t accessmode, void **object, void **handleinfo) { nt_objref *nr; nr = malloc(sizeof(nt_objref), M_DEVBUF, M_NOWAIT|M_ZERO); if (nr == NULL) return (STATUS_INSUFFICIENT_RESOURCES); InitializeListHead((&nr->no_dh.dh_waitlisthead)); nr->no_obj = handle; nr->no_dh.dh_type = DISP_TYPE_THREAD; nr->no_dh.dh_sigstate = 0; nr->no_dh.dh_size = (uint8_t)(sizeof(struct thread) / sizeof(uint32_t)); TAILQ_INSERT_TAIL(&ntoskrnl_reflist, nr, link); *object = nr; return (STATUS_SUCCESS); } static void ObfDereferenceObject(object) void *object; { nt_objref *nr; nr = object; TAILQ_REMOVE(&ntoskrnl_reflist, nr, link); free(nr, M_DEVBUF); } static uint32_t ZwClose(handle) ndis_handle handle; { return (STATUS_SUCCESS); } static uint32_t WmiQueryTraceInformation(traceclass, traceinfo, infolen, reqlen, buf) uint32_t traceclass; void *traceinfo; uint32_t infolen; uint32_t reqlen; void *buf; { return (STATUS_NOT_FOUND); } static uint32_t WmiTraceMessage(uint64_t loghandle, uint32_t messageflags, void *guid, uint16_t messagenum, ...) { return (STATUS_SUCCESS); } static uint32_t IoWMIRegistrationControl(dobj, action) device_object *dobj; uint32_t action; { return (STATUS_SUCCESS); } /* * This is here just in case the thread returns without calling * PsTerminateSystemThread(). */ static void ntoskrnl_thrfunc(arg) void *arg; { thread_context *thrctx; uint32_t (*tfunc)(void *); void *tctx; uint32_t rval; thrctx = arg; tfunc = thrctx->tc_thrfunc; tctx = thrctx->tc_thrctx; free(thrctx, M_TEMP); rval = MSCALL1(tfunc, tctx); PsTerminateSystemThread(rval); return; /* notreached */ } static ndis_status PsCreateSystemThread(handle, reqaccess, objattrs, phandle, clientid, thrfunc, thrctx) ndis_handle *handle; uint32_t reqaccess; void *objattrs; ndis_handle phandle; void *clientid; void *thrfunc; void *thrctx; { int error; thread_context *tc; struct proc *p; tc = malloc(sizeof(thread_context), M_TEMP, M_NOWAIT); if (tc == NULL) return (STATUS_INSUFFICIENT_RESOURCES); tc->tc_thrctx = thrctx; tc->tc_thrfunc = thrfunc; error = kproc_create(ntoskrnl_thrfunc, tc, &p, RFHIGHPID, NDIS_KSTACK_PAGES, "Windows Kthread %d", ntoskrnl_kth); if (error) { free(tc, M_TEMP); return (STATUS_INSUFFICIENT_RESOURCES); } *handle = p; ntoskrnl_kth++; return (STATUS_SUCCESS); } /* * In Windows, the exit of a thread is an event that you're allowed * to wait on, assuming you've obtained a reference to the thread using * ObReferenceObjectByHandle(). Unfortunately, the only way we can * simulate this behavior is to register each thread we create in a * reference list, and if someone holds a reference to us, we poke * them. */ static ndis_status PsTerminateSystemThread(status) ndis_status status; { struct nt_objref *nr; mtx_lock(&ntoskrnl_dispatchlock); TAILQ_FOREACH(nr, &ntoskrnl_reflist, link) { if (nr->no_obj != curthread->td_proc) continue; nr->no_dh.dh_sigstate = 1; ntoskrnl_waittest(&nr->no_dh, IO_NO_INCREMENT); break; } mtx_unlock(&ntoskrnl_dispatchlock); ntoskrnl_kth--; kproc_exit(0); return (0); /* notreached */ } static uint32_t DbgPrint(char *fmt, ...) { va_list ap; if (bootverbose) { va_start(ap, fmt); vprintf(fmt, ap); } return (STATUS_SUCCESS); } static void DbgBreakPoint(void) { kdb_enter(KDB_WHY_NDIS, "DbgBreakPoint(): breakpoint"); } static void KeBugCheckEx(code, param1, param2, param3, param4) uint32_t code; u_long param1; u_long param2; u_long param3; u_long param4; { panic("KeBugCheckEx: STOP 0x%X", code); } static void ntoskrnl_timercall(arg) void *arg; { ktimer *timer; struct timeval tv; kdpc *dpc; mtx_lock(&ntoskrnl_dispatchlock); timer = arg; #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_fires++; #endif ntoskrnl_remove_timer(timer); /* * This should never happen, but complain * if it does. */ if (timer->k_header.dh_inserted == FALSE) { mtx_unlock(&ntoskrnl_dispatchlock); printf("NTOS: timer %p fired even though " "it was canceled\n", timer); return; } /* Mark the timer as no longer being on the timer queue. */ timer->k_header.dh_inserted = FALSE; /* Now signal the object and satisfy any waits on it. */ timer->k_header.dh_sigstate = 1; ntoskrnl_waittest(&timer->k_header, IO_NO_INCREMENT); /* * If this is a periodic timer, re-arm it * so it will fire again. We do this before * calling any deferred procedure calls because * it's possible the DPC might cancel the timer, * in which case it would be wrong for us to * re-arm it again afterwards. */ if (timer->k_period) { tv.tv_sec = 0; tv.tv_usec = timer->k_period * 1000; timer->k_header.dh_inserted = TRUE; ntoskrnl_insert_timer(timer, tvtohz(&tv)); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_reloads++; #endif } dpc = timer->k_dpc; mtx_unlock(&ntoskrnl_dispatchlock); /* If there's a DPC associated with the timer, queue it up. */ if (dpc != NULL) KeInsertQueueDpc(dpc, NULL, NULL); } #ifdef NTOSKRNL_DEBUG_TIMERS static int sysctl_show_timers(SYSCTL_HANDLER_ARGS) { int ret; ret = 0; ntoskrnl_show_timers(); return (sysctl_handle_int(oidp, &ret, 0, req)); } static void ntoskrnl_show_timers() { int i = 0; list_entry *l; mtx_lock_spin(&ntoskrnl_calllock); l = ntoskrnl_calllist.nle_flink; while(l != &ntoskrnl_calllist) { i++; l = l->nle_flink; } mtx_unlock_spin(&ntoskrnl_calllock); printf("\n"); printf("%d timers available (out of %d)\n", i, NTOSKRNL_TIMEOUTS); printf("timer sets: %qu\n", ntoskrnl_timer_sets); printf("timer reloads: %qu\n", ntoskrnl_timer_reloads); printf("timer cancels: %qu\n", ntoskrnl_timer_cancels); printf("timer fires: %qu\n", ntoskrnl_timer_fires); printf("\n"); } #endif /* * Must be called with dispatcher lock held. */ static void ntoskrnl_insert_timer(timer, ticks) ktimer *timer; int ticks; { callout_entry *e; list_entry *l; struct callout *c; /* * Try and allocate a timer. */ mtx_lock_spin(&ntoskrnl_calllock); if (IsListEmpty(&ntoskrnl_calllist)) { mtx_unlock_spin(&ntoskrnl_calllock); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_show_timers(); #endif panic("out of timers!"); } l = RemoveHeadList(&ntoskrnl_calllist); mtx_unlock_spin(&ntoskrnl_calllock); e = CONTAINING_RECORD(l, callout_entry, ce_list); c = &e->ce_callout; timer->k_callout = c; callout_init(c, CALLOUT_MPSAFE); callout_reset(c, ticks, ntoskrnl_timercall, timer); } static void ntoskrnl_remove_timer(timer) ktimer *timer; { callout_entry *e; e = (callout_entry *)timer->k_callout; callout_stop(timer->k_callout); mtx_lock_spin(&ntoskrnl_calllock); InsertHeadList((&ntoskrnl_calllist), (&e->ce_list)); mtx_unlock_spin(&ntoskrnl_calllock); } void KeInitializeTimer(timer) ktimer *timer; { if (timer == NULL) return; KeInitializeTimerEx(timer, EVENT_TYPE_NOTIFY); } void KeInitializeTimerEx(timer, type) ktimer *timer; uint32_t type; { if (timer == NULL) return; bzero((char *)timer, sizeof(ktimer)); InitializeListHead((&timer->k_header.dh_waitlisthead)); timer->k_header.dh_sigstate = FALSE; timer->k_header.dh_inserted = FALSE; if (type == EVENT_TYPE_NOTIFY) timer->k_header.dh_type = DISP_TYPE_NOTIFICATION_TIMER; else timer->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_TIMER; timer->k_header.dh_size = sizeof(ktimer) / sizeof(uint32_t); } /* * DPC subsystem. A Windows Defered Procedure Call has the following * properties: * - It runs at DISPATCH_LEVEL. * - It can have one of 3 importance values that control when it * runs relative to other DPCs in the queue. * - On SMP systems, it can be set to run on a specific processor. * In order to satisfy the last property, we create a DPC thread for * each CPU in the system and bind it to that CPU. Each thread * maintains three queues with different importance levels, which * will be processed in order from lowest to highest. * * In Windows, interrupt handlers run as DPCs. (Not to be confused * with ISRs, which run in interrupt context and can preempt DPCs.) * ISRs are given the highest importance so that they'll take * precedence over timers and other things. */ static void ntoskrnl_dpc_thread(arg) void *arg; { kdpc_queue *kq; kdpc *d; list_entry *l; uint8_t irql; kq = arg; InitializeListHead(&kq->kq_disp); kq->kq_td = curthread; kq->kq_exit = 0; kq->kq_running = FALSE; KeInitializeSpinLock(&kq->kq_lock); KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE); KeInitializeEvent(&kq->kq_done, EVENT_TYPE_SYNC, FALSE); /* * Elevate our priority. DPCs are used to run interrupt * handlers, and they should trigger as soon as possible * once scheduled by an ISR. */ thread_lock(curthread); #ifdef NTOSKRNL_MULTIPLE_DPCS sched_bind(curthread, kq->kq_cpu); #endif sched_prio(curthread, PRI_MIN_KERN); thread_unlock(curthread); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); KeAcquireSpinLock(&kq->kq_lock, &irql); if (kq->kq_exit) { kq->kq_exit = 0; KeReleaseSpinLock(&kq->kq_lock, irql); break; } kq->kq_running = TRUE; while (!IsListEmpty(&kq->kq_disp)) { l = RemoveHeadList((&kq->kq_disp)); d = CONTAINING_RECORD(l, kdpc, k_dpclistentry); InitializeListHead((&d->k_dpclistentry)); KeReleaseSpinLockFromDpcLevel(&kq->kq_lock); MSCALL4(d->k_deferedfunc, d, d->k_deferredctx, d->k_sysarg1, d->k_sysarg2); KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); } kq->kq_running = FALSE; KeReleaseSpinLock(&kq->kq_lock, irql); KeSetEvent(&kq->kq_done, IO_NO_INCREMENT, FALSE); } kproc_exit(0); return; /* notreached */ } static void ntoskrnl_destroy_dpc_threads(void) { kdpc_queue *kq; kdpc dpc; int i; kq = kq_queues; #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq += i; kq->kq_exit = 1; KeInitializeDpc(&dpc, NULL, NULL); KeSetTargetProcessorDpc(&dpc, i); KeInsertQueueDpc(&dpc, NULL, NULL); while (kq->kq_exit) tsleep(kq->kq_td->td_proc, PWAIT, "dpcw", hz/10); } } static uint8_t ntoskrnl_insert_dpc(head, dpc) list_entry *head; kdpc *dpc; { list_entry *l; kdpc *d; l = head->nle_flink; while (l != head) { d = CONTAINING_RECORD(l, kdpc, k_dpclistentry); if (d == dpc) return (FALSE); l = l->nle_flink; } if (dpc->k_importance == KDPC_IMPORTANCE_LOW) InsertTailList((head), (&dpc->k_dpclistentry)); else InsertHeadList((head), (&dpc->k_dpclistentry)); return (TRUE); } void KeInitializeDpc(dpc, dpcfunc, dpcctx) kdpc *dpc; void *dpcfunc; void *dpcctx; { if (dpc == NULL) return; dpc->k_deferedfunc = dpcfunc; dpc->k_deferredctx = dpcctx; dpc->k_num = KDPC_CPU_DEFAULT; dpc->k_importance = KDPC_IMPORTANCE_MEDIUM; InitializeListHead((&dpc->k_dpclistentry)); } uint8_t KeInsertQueueDpc(dpc, sysarg1, sysarg2) kdpc *dpc; void *sysarg1; void *sysarg2; { kdpc_queue *kq; uint8_t r; uint8_t irql; if (dpc == NULL) return (FALSE); kq = kq_queues; #ifdef NTOSKRNL_MULTIPLE_DPCS KeRaiseIrql(DISPATCH_LEVEL, &irql); /* * By default, the DPC is queued to run on the same CPU * that scheduled it. */ if (dpc->k_num == KDPC_CPU_DEFAULT) kq += curthread->td_oncpu; else kq += dpc->k_num; KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); #else KeAcquireSpinLock(&kq->kq_lock, &irql); #endif r = ntoskrnl_insert_dpc(&kq->kq_disp, dpc); if (r == TRUE) { dpc->k_sysarg1 = sysarg1; dpc->k_sysarg2 = sysarg2; } KeReleaseSpinLock(&kq->kq_lock, irql); if (r == FALSE) return (r); KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); return (r); } uint8_t KeRemoveQueueDpc(dpc) kdpc *dpc; { kdpc_queue *kq; uint8_t irql; if (dpc == NULL) return (FALSE); #ifdef NTOSKRNL_MULTIPLE_DPCS KeRaiseIrql(DISPATCH_LEVEL, &irql); kq = kq_queues + dpc->k_num; KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); #else kq = kq_queues; KeAcquireSpinLock(&kq->kq_lock, &irql); #endif if (dpc->k_dpclistentry.nle_flink == &dpc->k_dpclistentry) { KeReleaseSpinLockFromDpcLevel(&kq->kq_lock); KeLowerIrql(irql); return (FALSE); } RemoveEntryList((&dpc->k_dpclistentry)); InitializeListHead((&dpc->k_dpclistentry)); KeReleaseSpinLock(&kq->kq_lock, irql); return (TRUE); } void KeSetImportanceDpc(dpc, imp) kdpc *dpc; uint32_t imp; { if (imp != KDPC_IMPORTANCE_LOW && imp != KDPC_IMPORTANCE_MEDIUM && imp != KDPC_IMPORTANCE_HIGH) return; dpc->k_importance = (uint8_t)imp; } void KeSetTargetProcessorDpc(kdpc *dpc, uint8_t cpu) { if (cpu > mp_ncpus) return; dpc->k_num = cpu; } void KeFlushQueuedDpcs(void) { kdpc_queue *kq; int i; /* * Poke each DPC queue and wait * for them to drain. */ #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq = kq_queues + i; KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); KeWaitForSingleObject(&kq->kq_done, 0, 0, TRUE, NULL); } } uint32_t KeGetCurrentProcessorNumber(void) { return ((uint32_t)curthread->td_oncpu); } uint8_t KeSetTimerEx(timer, duetime, period, dpc) ktimer *timer; int64_t duetime; uint32_t period; kdpc *dpc; { struct timeval tv; uint64_t curtime; uint8_t pending; if (timer == NULL) return (FALSE); mtx_lock(&ntoskrnl_dispatchlock); if (timer->k_header.dh_inserted == TRUE) { ntoskrnl_remove_timer(timer); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_cancels++; #endif timer->k_header.dh_inserted = FALSE; pending = TRUE; } else pending = FALSE; timer->k_duetime = duetime; timer->k_period = period; timer->k_header.dh_sigstate = FALSE; timer->k_dpc = dpc; if (duetime < 0) { tv.tv_sec = - (duetime) / 10000000; tv.tv_usec = (- (duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((duetime) - curtime) / 10000000; tv.tv_usec = ((duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } timer->k_header.dh_inserted = TRUE; ntoskrnl_insert_timer(timer, tvtohz(&tv)); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_sets++; #endif mtx_unlock(&ntoskrnl_dispatchlock); return (pending); } uint8_t KeSetTimer(timer, duetime, dpc) ktimer *timer; int64_t duetime; kdpc *dpc; { return (KeSetTimerEx(timer, duetime, 0, dpc)); } /* * The Windows DDK documentation seems to say that cancelling * a timer that has a DPC will result in the DPC also being * cancelled, but this isn't really the case. */ uint8_t KeCancelTimer(timer) ktimer *timer; { uint8_t pending; if (timer == NULL) return (FALSE); mtx_lock(&ntoskrnl_dispatchlock); pending = timer->k_header.dh_inserted; if (timer->k_header.dh_inserted == TRUE) { timer->k_header.dh_inserted = FALSE; ntoskrnl_remove_timer(timer); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_cancels++; #endif } mtx_unlock(&ntoskrnl_dispatchlock); return (pending); } uint8_t KeReadStateTimer(timer) ktimer *timer; { return (timer->k_header.dh_sigstate); } static int32_t KeDelayExecutionThread(uint8_t wait_mode, uint8_t alertable, int64_t *interval) { ktimer timer; if (wait_mode != 0) panic("invalid wait_mode %d", wait_mode); KeInitializeTimer(&timer); KeSetTimer(&timer, *interval, NULL); KeWaitForSingleObject(&timer, 0, 0, alertable, NULL); return STATUS_SUCCESS; } static uint64_t KeQueryInterruptTime(void) { int ticks; struct timeval tv; getmicrouptime(&tv); ticks = tvtohz(&tv); return ticks * ((10000000 + hz - 1) / hz); } static struct thread * KeGetCurrentThread(void) { return curthread; } static int32_t KeSetPriorityThread(td, pri) struct thread *td; int32_t pri; { int32_t old; if (td == NULL) return LOW_REALTIME_PRIORITY; if (td->td_priority <= PRI_MIN_KERN) old = HIGH_PRIORITY; else if (td->td_priority >= PRI_MAX_KERN) old = LOW_PRIORITY; else old = LOW_REALTIME_PRIORITY; thread_lock(td); if (pri == HIGH_PRIORITY) sched_prio(td, PRI_MIN_KERN); if (pri == LOW_REALTIME_PRIORITY) sched_prio(td, PRI_MIN_KERN + (PRI_MAX_KERN - PRI_MIN_KERN) / 2); if (pri == LOW_PRIORITY) sched_prio(td, PRI_MAX_KERN); thread_unlock(td); return old; } static void dummy() { printf("ntoskrnl dummy called...\n"); } image_patch_table ntoskrnl_functbl[] = { IMPORT_SFUNC(RtlZeroMemory, 2), + IMPORT_SFUNC(RtlSecureZeroMemory, 2), + IMPORT_SFUNC(RtlFillMemory, 3), + IMPORT_SFUNC(RtlMoveMemory, 3), + IMPORT_SFUNC(RtlCharToInteger, 3), IMPORT_SFUNC(RtlCopyMemory, 3), + IMPORT_SFUNC(RtlCopyString, 2), IMPORT_SFUNC(RtlCompareMemory, 3), IMPORT_SFUNC(RtlEqualUnicodeString, 3), IMPORT_SFUNC(RtlCopyUnicodeString, 2), IMPORT_SFUNC(RtlUnicodeStringToAnsiString, 3), IMPORT_SFUNC(RtlAnsiStringToUnicodeString, 3), IMPORT_SFUNC(RtlInitAnsiString, 2), IMPORT_SFUNC_MAP(RtlInitString, RtlInitAnsiString, 2), IMPORT_SFUNC(RtlInitUnicodeString, 2), IMPORT_SFUNC(RtlFreeAnsiString, 1), IMPORT_SFUNC(RtlFreeUnicodeString, 1), IMPORT_SFUNC(RtlUnicodeStringToInteger, 3), IMPORT_CFUNC(sprintf, 0), IMPORT_CFUNC(vsprintf, 0), IMPORT_CFUNC_MAP(_snprintf, snprintf, 0), IMPORT_CFUNC_MAP(_vsnprintf, vsnprintf, 0), IMPORT_CFUNC(DbgPrint, 0), IMPORT_SFUNC(DbgBreakPoint, 0), IMPORT_SFUNC(KeBugCheckEx, 5), IMPORT_CFUNC(strncmp, 0), IMPORT_CFUNC(strcmp, 0), IMPORT_CFUNC_MAP(stricmp, strcasecmp, 0), IMPORT_CFUNC(strncpy, 0), IMPORT_CFUNC(strcpy, 0), IMPORT_CFUNC(strlen, 0), IMPORT_CFUNC_MAP(toupper, ntoskrnl_toupper, 0), IMPORT_CFUNC_MAP(tolower, ntoskrnl_tolower, 0), IMPORT_CFUNC_MAP(strstr, ntoskrnl_strstr, 0), IMPORT_CFUNC_MAP(strncat, ntoskrnl_strncat, 0), IMPORT_CFUNC_MAP(strchr, index, 0), IMPORT_CFUNC_MAP(strrchr, rindex, 0), IMPORT_CFUNC(memcpy, 0), IMPORT_CFUNC_MAP(memmove, ntoskrnl_memmove, 0), IMPORT_CFUNC_MAP(memset, ntoskrnl_memset, 0), IMPORT_CFUNC_MAP(memchr, ntoskrnl_memchr, 0), IMPORT_SFUNC(IoAllocateDriverObjectExtension, 4), IMPORT_SFUNC(IoGetDriverObjectExtension, 2), IMPORT_FFUNC(IofCallDriver, 2), IMPORT_FFUNC(IofCompleteRequest, 2), IMPORT_SFUNC(IoAcquireCancelSpinLock, 1), IMPORT_SFUNC(IoReleaseCancelSpinLock, 1), IMPORT_SFUNC(IoCancelIrp, 1), IMPORT_SFUNC(IoConnectInterrupt, 11), IMPORT_SFUNC(IoDisconnectInterrupt, 1), IMPORT_SFUNC(IoCreateDevice, 7), IMPORT_SFUNC(IoDeleteDevice, 1), IMPORT_SFUNC(IoGetAttachedDevice, 1), IMPORT_SFUNC(IoAttachDeviceToDeviceStack, 2), IMPORT_SFUNC(IoDetachDevice, 1), IMPORT_SFUNC(IoBuildSynchronousFsdRequest, 7), IMPORT_SFUNC(IoBuildAsynchronousFsdRequest, 6), IMPORT_SFUNC(IoBuildDeviceIoControlRequest, 9), IMPORT_SFUNC(IoAllocateIrp, 2), IMPORT_SFUNC(IoReuseIrp, 2), IMPORT_SFUNC(IoMakeAssociatedIrp, 2), IMPORT_SFUNC(IoFreeIrp, 1), IMPORT_SFUNC(IoInitializeIrp, 3), IMPORT_SFUNC(KeAcquireInterruptSpinLock, 1), IMPORT_SFUNC(KeReleaseInterruptSpinLock, 2), IMPORT_SFUNC(KeSynchronizeExecution, 3), IMPORT_SFUNC(KeWaitForSingleObject, 5), IMPORT_SFUNC(KeWaitForMultipleObjects, 8), IMPORT_SFUNC(_allmul, 4), IMPORT_SFUNC(_alldiv, 4), IMPORT_SFUNC(_allrem, 4), IMPORT_RFUNC(_allshr, 0), IMPORT_RFUNC(_allshl, 0), IMPORT_SFUNC(_aullmul, 4), IMPORT_SFUNC(_aulldiv, 4), IMPORT_SFUNC(_aullrem, 4), IMPORT_RFUNC(_aullshr, 0), IMPORT_RFUNC(_aullshl, 0), IMPORT_CFUNC(atoi, 0), IMPORT_CFUNC(atol, 0), IMPORT_CFUNC(rand, 0), IMPORT_CFUNC(srand, 0), IMPORT_SFUNC(WRITE_REGISTER_USHORT, 2), IMPORT_SFUNC(READ_REGISTER_USHORT, 1), IMPORT_SFUNC(WRITE_REGISTER_ULONG, 2), IMPORT_SFUNC(READ_REGISTER_ULONG, 1), IMPORT_SFUNC(READ_REGISTER_UCHAR, 1), IMPORT_SFUNC(WRITE_REGISTER_UCHAR, 2), IMPORT_SFUNC(ExInitializePagedLookasideList, 7), IMPORT_SFUNC(ExDeletePagedLookasideList, 1), IMPORT_SFUNC(ExInitializeNPagedLookasideList, 7), IMPORT_SFUNC(ExDeleteNPagedLookasideList, 1), IMPORT_FFUNC(InterlockedPopEntrySList, 1), + IMPORT_FFUNC(InitializeSListHead, 1), IMPORT_FFUNC(InterlockedPushEntrySList, 2), IMPORT_SFUNC(ExQueryDepthSList, 1), IMPORT_FFUNC_MAP(ExpInterlockedPopEntrySList, InterlockedPopEntrySList, 1), IMPORT_FFUNC_MAP(ExpInterlockedPushEntrySList, InterlockedPushEntrySList, 2), IMPORT_FFUNC(ExInterlockedPopEntrySList, 2), IMPORT_FFUNC(ExInterlockedPushEntrySList, 3), IMPORT_SFUNC(ExAllocatePoolWithTag, 3), + IMPORT_SFUNC(ExFreePoolWithTag, 2), IMPORT_SFUNC(ExFreePool, 1), #ifdef __i386__ IMPORT_FFUNC(KefAcquireSpinLockAtDpcLevel, 1), IMPORT_FFUNC(KefReleaseSpinLockFromDpcLevel,1), IMPORT_FFUNC(KeAcquireSpinLockRaiseToDpc, 1), #else /* * For AMD64, we can get away with just mapping * KeAcquireSpinLockRaiseToDpc() directly to KfAcquireSpinLock() * because the calling conventions end up being the same. * On i386, we have to be careful because KfAcquireSpinLock() * is _fastcall but KeAcquireSpinLockRaiseToDpc() isn't. */ IMPORT_SFUNC(KeAcquireSpinLockAtDpcLevel, 1), IMPORT_SFUNC(KeReleaseSpinLockFromDpcLevel, 1), IMPORT_SFUNC_MAP(KeAcquireSpinLockRaiseToDpc, KfAcquireSpinLock, 1), #endif IMPORT_SFUNC_MAP(KeReleaseSpinLock, KfReleaseSpinLock, 1), IMPORT_FFUNC(InterlockedIncrement, 1), IMPORT_FFUNC(InterlockedDecrement, 1), IMPORT_FFUNC(InterlockedExchange, 2), IMPORT_FFUNC(ExInterlockedAddLargeStatistic, 2), IMPORT_SFUNC(IoAllocateMdl, 5), IMPORT_SFUNC(IoFreeMdl, 1), IMPORT_SFUNC(MmAllocateContiguousMemory, 2 + 1), IMPORT_SFUNC(MmAllocateContiguousMemorySpecifyCache, 5 + 3), IMPORT_SFUNC(MmFreeContiguousMemory, 1), IMPORT_SFUNC(MmFreeContiguousMemorySpecifyCache, 3), IMPORT_SFUNC(MmSizeOfMdl, 1), IMPORT_SFUNC(MmMapLockedPages, 2), IMPORT_SFUNC(MmMapLockedPagesSpecifyCache, 6), IMPORT_SFUNC(MmUnmapLockedPages, 2), IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1), IMPORT_SFUNC(MmGetPhysicalAddress, 1), IMPORT_SFUNC(MmIsAddressValid, 1), IMPORT_SFUNC(MmMapIoSpace, 3 + 1), IMPORT_SFUNC(MmUnmapIoSpace, 2), IMPORT_SFUNC(KeInitializeSpinLock, 1), IMPORT_SFUNC(IoIsWdmVersionAvailable, 2), IMPORT_SFUNC(IoGetDeviceObjectPointer, 4), IMPORT_SFUNC(IoGetDeviceProperty, 5), IMPORT_SFUNC(IoAllocateWorkItem, 1), IMPORT_SFUNC(IoFreeWorkItem, 1), IMPORT_SFUNC(IoQueueWorkItem, 4), IMPORT_SFUNC(ExQueueWorkItem, 2), IMPORT_SFUNC(ntoskrnl_workitem, 2), IMPORT_SFUNC(KeInitializeMutex, 2), IMPORT_SFUNC(KeReleaseMutex, 2), IMPORT_SFUNC(KeReadStateMutex, 1), IMPORT_SFUNC(KeInitializeEvent, 3), IMPORT_SFUNC(KeSetEvent, 3), IMPORT_SFUNC(KeResetEvent, 1), IMPORT_SFUNC(KeClearEvent, 1), IMPORT_SFUNC(KeReadStateEvent, 1), IMPORT_SFUNC(KeInitializeTimer, 1), IMPORT_SFUNC(KeInitializeTimerEx, 2), IMPORT_SFUNC(KeSetTimer, 3), IMPORT_SFUNC(KeSetTimerEx, 4), IMPORT_SFUNC(KeCancelTimer, 1), IMPORT_SFUNC(KeReadStateTimer, 1), IMPORT_SFUNC(KeInitializeDpc, 3), IMPORT_SFUNC(KeInsertQueueDpc, 3), IMPORT_SFUNC(KeRemoveQueueDpc, 1), IMPORT_SFUNC(KeSetImportanceDpc, 2), IMPORT_SFUNC(KeSetTargetProcessorDpc, 2), IMPORT_SFUNC(KeFlushQueuedDpcs, 0), IMPORT_SFUNC(KeGetCurrentProcessorNumber, 1), IMPORT_SFUNC(ObReferenceObjectByHandle, 6), IMPORT_FFUNC(ObfDereferenceObject, 1), IMPORT_SFUNC(ZwClose, 1), IMPORT_SFUNC(PsCreateSystemThread, 7), IMPORT_SFUNC(PsTerminateSystemThread, 1), IMPORT_SFUNC(IoWMIRegistrationControl, 2), IMPORT_SFUNC(WmiQueryTraceInformation, 5), IMPORT_CFUNC(WmiTraceMessage, 0), IMPORT_SFUNC(KeQuerySystemTime, 1), IMPORT_CFUNC(KeTickCount, 0), IMPORT_SFUNC(KeDelayExecutionThread, 3), IMPORT_SFUNC(KeQueryInterruptTime, 0), IMPORT_SFUNC(KeGetCurrentThread, 0), IMPORT_SFUNC(KeSetPriorityThread, 2), /* * This last entry is a catch-all for any function we haven't * implemented yet. The PE import list patching routine will * use it for any function that doesn't have an explicit match * in this table. */ { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL }, /* End of list. */ { NULL, NULL, NULL } }; Index: projects/binutils-2.17/sys/contrib/dev/acpica =================================================================== --- projects/binutils-2.17/sys/contrib/dev/acpica (revision 215829) +++ projects/binutils-2.17/sys/contrib/dev/acpica (revision 215830) Property changes on: projects/binutils-2.17/sys/contrib/dev/acpica ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/dev/acpica:r215709-215824 Index: projects/binutils-2.17/sys/contrib/pf =================================================================== --- projects/binutils-2.17/sys/contrib/pf (revision 215829) +++ projects/binutils-2.17/sys/contrib/pf (revision 215830) Property changes on: projects/binutils-2.17/sys/contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/pf:r215709-215824 Index: projects/binutils-2.17/sys/contrib/x86emu =================================================================== --- projects/binutils-2.17/sys/contrib/x86emu (revision 215829) +++ projects/binutils-2.17/sys/contrib/x86emu (revision 215830) Property changes on: projects/binutils-2.17/sys/contrib/x86emu ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/x86emu:r215709-215824 Index: projects/binutils-2.17/sys/dev/ahci/ahci.c =================================================================== --- projects/binutils-2.17/sys/dev/ahci/ahci.c (revision 215829) +++ projects/binutils-2.17/sys/dev/ahci/ahci.c (revision 215830) @@ -1,2672 +1,2672 @@ /*- * Copyright (c) 2009 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ahci.h" #include #include #include #include #include /* local prototypes */ static int ahci_setup_interrupt(device_t dev); static void ahci_intr(void *data); static void ahci_intr_one(void *data); static int ahci_suspend(device_t dev); static int ahci_resume(device_t dev); static int ahci_ch_init(device_t dev); static int ahci_ch_deinit(device_t dev); static int ahci_ch_suspend(device_t dev); static int ahci_ch_resume(device_t dev); static void ahci_ch_pm(void *arg); static void ahci_ch_intr_locked(void *data); static void ahci_ch_intr(void *data); static int ahci_ctlr_reset(device_t dev); static int ahci_ctlr_setup(device_t dev); static void ahci_begin_transaction(device_t dev, union ccb *ccb); static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error); static void ahci_execute_transaction(struct ahci_slot *slot); static void ahci_timeout(struct ahci_slot *slot); static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et); static int ahci_setup_fis(device_t dev, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag); static void ahci_dmainit(device_t dev); static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void ahci_dmafini(device_t dev); static void ahci_slotsalloc(device_t dev); static void ahci_slotsfree(device_t dev); static void ahci_reset(device_t dev); static void ahci_start(device_t dev, int fbs); static void ahci_stop(device_t dev); static void ahci_clo(device_t dev); static void ahci_start_fr(device_t dev); static void ahci_stop_fr(device_t dev); static int ahci_sata_connect(struct ahci_channel *ch); static int ahci_sata_phy_reset(device_t dev); static int ahci_wait_ready(device_t dev, int t); static void ahci_issue_read_log(device_t dev); static void ahci_process_read_log(device_t dev, union ccb *ccb); static void ahciaction(struct cam_sim *sim, union ccb *ccb); static void ahcipoll(struct cam_sim *sim); MALLOC_DEFINE(M_AHCI, "AHCI driver", "AHCI driver data buffers"); static struct { uint32_t id; uint8_t rev; const char *name; int quirks; #define AHCI_Q_NOFORCE 1 #define AHCI_Q_NOPMP 2 #define AHCI_Q_NONCQ 4 #define AHCI_Q_1CH 8 #define AHCI_Q_2CH 16 #define AHCI_Q_4CH 32 #define AHCI_Q_EDGEIS 64 #define AHCI_Q_SATA2 128 #define AHCI_Q_NOBSYRES 256 #define AHCI_Q_NOAA 512 } ahci_ids[] = { {0x43801002, 0x00, "ATI IXP600", 0}, {0x43901002, 0x00, "ATI IXP700", 0}, {0x43911002, 0x00, "ATI IXP700", 0}, {0x43921002, 0x00, "ATI IXP700", 0}, {0x43931002, 0x00, "ATI IXP700", 0}, {0x43941002, 0x00, "ATI IXP800", 0}, {0x43951002, 0x00, "ATI IXP800", 0}, {0x26528086, 0x00, "Intel ICH6", AHCI_Q_NOFORCE}, {0x26538086, 0x00, "Intel ICH6M", AHCI_Q_NOFORCE}, {0x26818086, 0x00, "Intel ESB2", 0}, {0x26828086, 0x00, "Intel ESB2", 0}, {0x26838086, 0x00, "Intel ESB2", 0}, {0x27c18086, 0x00, "Intel ICH7", 0}, {0x27c38086, 0x00, "Intel ICH7", 0}, {0x27c58086, 0x00, "Intel ICH7M", 0}, {0x27c68086, 0x00, "Intel ICH7M", 0}, {0x28218086, 0x00, "Intel ICH8", 0}, {0x28228086, 0x00, "Intel ICH8", 0}, {0x28248086, 0x00, "Intel ICH8", 0}, {0x28298086, 0x00, "Intel ICH8M", 0}, {0x282a8086, 0x00, "Intel ICH8M", 0}, {0x29228086, 0x00, "Intel ICH9", 0}, {0x29238086, 0x00, "Intel ICH9", 0}, {0x29248086, 0x00, "Intel ICH9", 0}, {0x29258086, 0x00, "Intel ICH9", 0}, {0x29278086, 0x00, "Intel ICH9", 0}, {0x29298086, 0x00, "Intel ICH9M", 0}, {0x292a8086, 0x00, "Intel ICH9M", 0}, {0x292b8086, 0x00, "Intel ICH9M", 0}, {0x292c8086, 0x00, "Intel ICH9M", 0}, {0x292f8086, 0x00, "Intel ICH9M", 0}, {0x294d8086, 0x00, "Intel ICH9", 0}, {0x294e8086, 0x00, "Intel ICH9M", 0}, {0x3a058086, 0x00, "Intel ICH10", 0}, {0x3a228086, 0x00, "Intel ICH10", 0}, {0x3a258086, 0x00, "Intel ICH10", 0}, {0x3b228086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b238086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b258086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b298086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b2c8086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b2f8086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x1c028086, 0x00, "Intel Cougar Point", 0}, {0x1c038086, 0x00, "Intel Cougar Point", 0}, {0x1c048086, 0x00, "Intel Cougar Point", 0}, {0x1c058086, 0x00, "Intel Cougar Point", 0}, {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE}, {0x2363197b, 0x00, "JMicron JMB363", AHCI_Q_NOFORCE}, {0x2365197b, 0x00, "JMicron JMB365", AHCI_Q_NOFORCE}, {0x2366197b, 0x00, "JMicron JMB366", AHCI_Q_NOFORCE}, {0x2368197b, 0x00, "JMicron JMB368", AHCI_Q_NOFORCE}, {0x611111ab, 0x00, "Marvell 88SX6111", AHCI_Q_NOFORCE|AHCI_Q_1CH|AHCI_Q_EDGEIS}, {0x612111ab, 0x00, "Marvell 88SX6121", AHCI_Q_NOFORCE|AHCI_Q_2CH|AHCI_Q_EDGEIS}, {0x614111ab, 0x00, "Marvell 88SX6141", AHCI_Q_NOFORCE|AHCI_Q_4CH|AHCI_Q_EDGEIS}, {0x614511ab, 0x00, "Marvell 88SX6145", AHCI_Q_NOFORCE|AHCI_Q_4CH|AHCI_Q_EDGEIS}, {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_NOBSYRES}, {0x91231b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_SATA2|AHCI_Q_NOBSYRES}, {0x044c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x055010de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055110de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055210de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055310de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055510de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055610de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055710de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055810de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055910de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055A10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055B10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x058410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x07f010de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f110de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f210de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f310de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f410de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f510de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f610de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f710de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f810de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f910de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07fa10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07fb10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x0ad010de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad110de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad210de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad310de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad410de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad510de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad610de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad710de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad810de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad910de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ada10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0adb10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ab410de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab510de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab610de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab710de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab810de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab910de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0aba10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abb10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abc10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abd10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abe10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abf10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0d8410de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8510de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8610de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8710de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8810de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8910de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8a10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8b10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8c10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8d10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8e10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8f10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x33491106, 0x00, "VIA VT8251", AHCI_Q_NOPMP|AHCI_Q_NONCQ}, {0x62871106, 0x00, "VIA VT8251", AHCI_Q_NOPMP|AHCI_Q_NONCQ}, {0x11841039, 0x00, "SiS 966", 0}, {0x11851039, 0x00, "SiS 968", 0}, {0x01861039, 0x00, "SiS 968", 0}, {0x00000000, 0x00, NULL, 0} }; static int ahci_probe(device_t dev) { char buf[64]; int i, valid = 0; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); /* Is this a possible AHCI candidate? */ if (pci_get_class(dev) == PCIC_STORAGE && pci_get_subclass(dev) == PCIS_STORAGE_SATA && pci_get_progif(dev) == PCIP_STORAGE_SATA_AHCI_1_0) valid = 1; /* Is this a known AHCI chip? */ for (i = 0; ahci_ids[i].id != 0; i++) { if (ahci_ids[i].id == devid && ahci_ids[i].rev <= revid && (valid || !(ahci_ids[i].quirks & AHCI_Q_NOFORCE))) { /* Do not attach JMicrons with single PCI function. */ if (pci_get_vendor(dev) == 0x197b && (pci_read_config(dev, 0xdf, 1) & 0x40) == 0) return (ENXIO); snprintf(buf, sizeof(buf), "%s AHCI SATA controller", ahci_ids[i].name); device_set_desc_copy(dev, buf); return (BUS_PROBE_VENDOR); } } if (!valid) return (ENXIO); device_set_desc_copy(dev, "AHCI SATA controller"); return (BUS_PROBE_VENDOR); } static int ahci_ata_probe(device_t dev) { char buf[64]; int i; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); if ((intptr_t)device_get_ivars(dev) >= 0) return (ENXIO); /* Is this a known AHCI chip? */ for (i = 0; ahci_ids[i].id != 0; i++) { if (ahci_ids[i].id == devid && ahci_ids[i].rev <= revid) { snprintf(buf, sizeof(buf), "%s AHCI SATA controller", ahci_ids[i].name); device_set_desc_copy(dev, buf); return (BUS_PROBE_VENDOR); } } device_set_desc_copy(dev, "AHCI SATA controller"); return (BUS_PROBE_VENDOR); } static int ahci_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); device_t child; int error, unit, speed, i; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); u_int32_t version; ctlr->dev = dev; i = 0; while (ahci_ids[i].id != 0 && (ahci_ids[i].id != devid || ahci_ids[i].rev > revid)) i++; ctlr->quirks = ahci_ids[i].quirks; resource_int_value(device_get_name(dev), device_get_unit(dev), "ccc", &ctlr->ccc); /* if we have a memory BAR(5) we are likely on an AHCI part */ ctlr->r_rid = PCIR_BAR(5); if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_rid, RF_ACTIVE))) return ENXIO; /* Setup our own memory management for channels. */ ctlr->sc_iomem.rm_start = rman_get_start(ctlr->r_mem); ctlr->sc_iomem.rm_end = rman_get_end(ctlr->r_mem); ctlr->sc_iomem.rm_type = RMAN_ARRAY; ctlr->sc_iomem.rm_descr = "I/O memory addresses"; if ((error = rman_init(&ctlr->sc_iomem)) != 0) { bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); return (error); } if ((error = rman_manage_region(&ctlr->sc_iomem, rman_get_start(ctlr->r_mem), rman_get_end(ctlr->r_mem))) != 0) { bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); rman_fini(&ctlr->sc_iomem); return (error); } pci_enable_busmaster(dev); /* Reset controller */ if ((error = ahci_ctlr_reset(dev)) != 0) { bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); rman_fini(&ctlr->sc_iomem); return (error); }; /* Get the HW capabilities */ version = ATA_INL(ctlr->r_mem, AHCI_VS); ctlr->caps = ATA_INL(ctlr->r_mem, AHCI_CAP); if (version >= 0x00010020) ctlr->caps2 = ATA_INL(ctlr->r_mem, AHCI_CAP2); if (ctlr->caps & AHCI_CAP_EMS) ctlr->capsem = ATA_INL(ctlr->r_mem, AHCI_EM_CTL); ctlr->ichannels = ATA_INL(ctlr->r_mem, AHCI_PI); if (ctlr->quirks & AHCI_Q_1CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->ichannels &= 0x01; } if (ctlr->quirks & AHCI_Q_2CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->caps |= 1; ctlr->ichannels &= 0x03; } if (ctlr->quirks & AHCI_Q_4CH) { ctlr->caps &= ~AHCI_CAP_NPMASK; ctlr->caps |= 3; ctlr->ichannels &= 0x0f; } ctlr->channels = MAX(flsl(ctlr->ichannels), (ctlr->caps & AHCI_CAP_NPMASK) + 1); if (ctlr->quirks & AHCI_Q_NOPMP) ctlr->caps &= ~AHCI_CAP_SPM; if (ctlr->quirks & AHCI_Q_NONCQ) ctlr->caps &= ~AHCI_CAP_SNCQ; if ((ctlr->caps & AHCI_CAP_CCCS) == 0) ctlr->ccc = 0; ahci_ctlr_setup(dev); /* Setup interrupts. */ if (ahci_setup_interrupt(dev)) { bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); rman_fini(&ctlr->sc_iomem); return ENXIO; } /* Announce HW capabilities. */ speed = (ctlr->caps & AHCI_CAP_ISS) >> AHCI_CAP_ISS_SHIFT; device_printf(dev, "AHCI v%x.%02x with %d %sGbps ports, Port Multiplier %s%s\n", ((version >> 20) & 0xf0) + ((version >> 16) & 0x0f), ((version >> 4) & 0xf0) + (version & 0x0f), (ctlr->caps & AHCI_CAP_NPMASK) + 1, ((speed == 1) ? "1.5":((speed == 2) ? "3": ((speed == 3) ? "6":"?"))), (ctlr->caps & AHCI_CAP_SPM) ? "supported" : "not supported", (ctlr->caps & AHCI_CAP_FBSS) ? " with FBS" : ""); if (bootverbose) { device_printf(dev, "Caps:%s%s%s%s%s%s%s%s %sGbps", (ctlr->caps & AHCI_CAP_64BIT) ? " 64bit":"", (ctlr->caps & AHCI_CAP_SNCQ) ? " NCQ":"", (ctlr->caps & AHCI_CAP_SSNTF) ? " SNTF":"", (ctlr->caps & AHCI_CAP_SMPS) ? " MPS":"", (ctlr->caps & AHCI_CAP_SSS) ? " SS":"", (ctlr->caps & AHCI_CAP_SALP) ? " ALP":"", (ctlr->caps & AHCI_CAP_SAL) ? " AL":"", (ctlr->caps & AHCI_CAP_SCLO) ? " CLO":"", ((speed == 1) ? "1.5":((speed == 2) ? "3": ((speed == 3) ? "6":"?")))); printf("%s%s%s%s%s%s %dcmd%s%s%s %dports\n", (ctlr->caps & AHCI_CAP_SAM) ? " AM":"", (ctlr->caps & AHCI_CAP_SPM) ? " PM":"", (ctlr->caps & AHCI_CAP_FBSS) ? " FBS":"", (ctlr->caps & AHCI_CAP_PMD) ? " PMD":"", (ctlr->caps & AHCI_CAP_SSC) ? " SSC":"", (ctlr->caps & AHCI_CAP_PSC) ? " PSC":"", ((ctlr->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1, (ctlr->caps & AHCI_CAP_CCCS) ? " CCC":"", (ctlr->caps & AHCI_CAP_EMS) ? " EM":"", (ctlr->caps & AHCI_CAP_SXS) ? " eSATA":"", (ctlr->caps & AHCI_CAP_NPMASK) + 1); } if (bootverbose && version >= 0x00010020) { device_printf(dev, "Caps2:%s%s%s\n", (ctlr->caps2 & AHCI_CAP2_APST) ? " APST":"", (ctlr->caps2 & AHCI_CAP2_NVMP) ? " NVMP":"", (ctlr->caps2 & AHCI_CAP2_BOH) ? " BOH":""); } if (bootverbose && (ctlr->caps & AHCI_CAP_EMS)) { device_printf(dev, "EM Caps:%s%s%s%s%s%s%s%s\n", (ctlr->capsem & AHCI_EM_PM) ? " PM":"", (ctlr->capsem & AHCI_EM_ALHD) ? " ALHD":"", (ctlr->capsem & AHCI_EM_XMT) ? " XMT":"", (ctlr->capsem & AHCI_EM_SMB) ? " SMB":"", (ctlr->capsem & AHCI_EM_SGPIO) ? " SGPIO":"", (ctlr->capsem & AHCI_EM_SES2) ? " SES-2":"", (ctlr->capsem & AHCI_EM_SAFTE) ? " SAF-TE":"", (ctlr->capsem & AHCI_EM_LED) ? " LED":""); } /* Attach all channels on this controller */ for (unit = 0; unit < ctlr->channels; unit++) { if ((ctlr->ichannels & (1 << unit)) == 0) continue; child = device_add_child(dev, "ahcich", -1); if (child == NULL) device_printf(dev, "failed to add channel device\n"); else device_set_ivars(child, (void *)(intptr_t)unit); } bus_generic_attach(dev); return 0; } static int ahci_detach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); device_t *children; int nchildren, i; /* Detach & delete all children */ if (!device_get_children(dev, &children, &nchildren)) { for (i = 0; i < nchildren; i++) device_delete_child(dev, children[i]); free(children, M_TEMP); } /* Free interrupts. */ for (i = 0; i < ctlr->numirqs; i++) { if (ctlr->irqs[i].r_irq) { bus_teardown_intr(dev, ctlr->irqs[i].r_irq, ctlr->irqs[i].handle); bus_release_resource(dev, SYS_RES_IRQ, ctlr->irqs[i].r_irq_rid, ctlr->irqs[i].r_irq); } } pci_release_msi(dev); /* Free memory. */ rman_fini(&ctlr->sc_iomem); if (ctlr->r_mem) bus_release_resource(dev, SYS_RES_MEMORY, ctlr->r_rid, ctlr->r_mem); return (0); } static int ahci_ctlr_reset(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int timeout; if (pci_read_config(dev, 0x00, 4) == 0x28298086 && (pci_read_config(dev, 0x92, 1) & 0xfe) == 0x04) pci_write_config(dev, 0x92, 0x01, 1); /* Enable AHCI mode */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE); /* Reset AHCI controller */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE|AHCI_GHC_HR); for (timeout = 1000; timeout > 0; timeout--) { DELAY(1000); if ((ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_HR) == 0) break; } if (timeout == 0) { device_printf(dev, "AHCI controller reset failure\n"); return ENXIO; } /* Reenable AHCI mode */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, AHCI_GHC_AE); return (0); } static int ahci_ctlr_setup(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); /* Clear interrupts */ ATA_OUTL(ctlr->r_mem, AHCI_IS, ATA_INL(ctlr->r_mem, AHCI_IS)); /* Configure CCC */ if (ctlr->ccc) { ATA_OUTL(ctlr->r_mem, AHCI_CCCP, ATA_INL(ctlr->r_mem, AHCI_PI)); ATA_OUTL(ctlr->r_mem, AHCI_CCCC, (ctlr->ccc << AHCI_CCCC_TV_SHIFT) | (4 << AHCI_CCCC_CC_SHIFT) | AHCI_CCCC_EN); ctlr->cccv = (ATA_INL(ctlr->r_mem, AHCI_CCCC) & AHCI_CCCC_INT_MASK) >> AHCI_CCCC_INT_SHIFT; if (bootverbose) { device_printf(dev, "CCC with %dms/4cmd enabled on vector %d\n", ctlr->ccc, ctlr->cccv); } } /* Enable AHCI interrupts */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, ATA_INL(ctlr->r_mem, AHCI_GHC) | AHCI_GHC_IE); return (0); } static int ahci_suspend(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); bus_generic_suspend(dev); /* Disable interupts, so the state change(s) doesn't trigger */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, ATA_INL(ctlr->r_mem, AHCI_GHC) & (~AHCI_GHC_IE)); return 0; } static int ahci_resume(device_t dev) { int res; if ((res = ahci_ctlr_reset(dev)) != 0) return (res); ahci_ctlr_setup(dev); return (bus_generic_resume(dev)); } static int ahci_setup_interrupt(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int i, msi = 1; /* Process hints. */ resource_int_value(device_get_name(dev), device_get_unit(dev), "msi", &msi); if (msi < 0) msi = 0; else if (msi == 1) msi = min(1, pci_msi_count(dev)); else if (msi > 1) msi = pci_msi_count(dev); /* Allocate MSI if needed/present. */ if (msi && pci_alloc_msi(dev, &msi) == 0) { ctlr->numirqs = msi; } else { msi = 0; ctlr->numirqs = 1; } /* Check for single MSI vector fallback. */ if (ctlr->numirqs > 1 && (ATA_INL(ctlr->r_mem, AHCI_GHC) & AHCI_GHC_MRSM) != 0) { device_printf(dev, "Falling back to one MSI\n"); ctlr->numirqs = 1; } /* Allocate all IRQs. */ for (i = 0; i < ctlr->numirqs; i++) { ctlr->irqs[i].ctlr = ctlr; ctlr->irqs[i].r_irq_rid = i + (msi ? 1 : 0); if (ctlr->numirqs == 1 || i >= ctlr->channels || (ctlr->ccc && i == ctlr->cccv)) ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL; else if (i == ctlr->numirqs - 1) ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER; else ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE; if (!(ctlr->irqs[i].r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ctlr->irqs[i].r_irq_rid, RF_SHAREABLE | RF_ACTIVE))) { device_printf(dev, "unable to map interrupt\n"); return ENXIO; } if ((bus_setup_intr(dev, ctlr->irqs[i].r_irq, ATA_INTR_FLAGS, NULL, (ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE) ? ahci_intr_one : ahci_intr, &ctlr->irqs[i], &ctlr->irqs[i].handle))) { /* SOS XXX release r_irq */ device_printf(dev, "unable to setup interrupt\n"); return ENXIO; } if (ctlr->numirqs > 1) { bus_describe_intr(dev, ctlr->irqs[i].r_irq, ctlr->irqs[i].handle, ctlr->irqs[i].mode == AHCI_IRQ_MODE_ONE ? "ch%d" : "%d", i); } } return (0); } /* * Common case interrupt handler. */ static void ahci_intr(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; u_int32_t is, ise = 0; void *arg; int unit; if (irq->mode == AHCI_IRQ_MODE_ALL) { unit = 0; if (ctlr->ccc) is = ctlr->ichannels; else is = ATA_INL(ctlr->r_mem, AHCI_IS); } else { /* AHCI_IRQ_MODE_AFTER */ unit = irq->r_irq_rid - 1; is = ATA_INL(ctlr->r_mem, AHCI_IS); } /* CCC interrupt is edge triggered. */ if (ctlr->ccc) ise = 1 << ctlr->cccv; /* Some controllers have edge triggered IS. */ if (ctlr->quirks & AHCI_Q_EDGEIS) ise |= is; if (ise != 0) ATA_OUTL(ctlr->r_mem, AHCI_IS, ise); for (; unit < ctlr->channels; unit++) { if ((is & (1 << unit)) != 0 && (arg = ctlr->interrupt[unit].argument)) { ctlr->interrupt[unit].function(arg); } } /* AHCI declares level triggered IS. */ if (!(ctlr->quirks & AHCI_Q_EDGEIS)) ATA_OUTL(ctlr->r_mem, AHCI_IS, is); } /* * Simplified interrupt handler for multivector MSI mode. */ static void ahci_intr_one(void *data) { struct ahci_controller_irq *irq = data; struct ahci_controller *ctlr = irq->ctlr; void *arg; int unit; unit = irq->r_irq_rid - 1; /* Some controllers have edge triggered IS. */ if (ctlr->quirks & AHCI_Q_EDGEIS) ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit); if ((arg = ctlr->interrupt[unit].argument)) ctlr->interrupt[unit].function(arg); /* AHCI declares level triggered IS. */ if (!(ctlr->quirks & AHCI_Q_EDGEIS)) ATA_OUTL(ctlr->r_mem, AHCI_IS, 1 << unit); } static struct resource * ahci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = ((struct ahci_channel *)device_get_softc(child))->unit; struct resource *res = NULL; int offset = AHCI_OFFSET + (unit << 7); long st; switch (type) { case SYS_RES_MEMORY: st = rman_get_start(ctlr->r_mem); res = rman_reserve_resource(&ctlr->sc_iomem, st + offset, st + offset + 127, 128, RF_ACTIVE, child); if (res) { bus_space_handle_t bsh; bus_space_tag_t bst; bsh = rman_get_bushandle(ctlr->r_mem); bst = rman_get_bustag(ctlr->r_mem); bus_space_subregion(bst, bsh, offset, 128, &bsh); rman_set_bushandle(res, bsh); rman_set_bustag(res, bst); } break; case SYS_RES_IRQ: if (*rid == ATA_IRQ_RID) res = ctlr->irqs[0].r_irq; break; } return (res); } static int ahci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { switch (type) { case SYS_RES_MEMORY: rman_release_resource(r); return (0); case SYS_RES_IRQ: if (rid != ATA_IRQ_RID) return ENOENT; return (0); } return (EINVAL); } static int ahci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_filter_t *filter, driver_intr_t *function, void *argument, void **cookiep) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = (intptr_t)device_get_ivars(child); if (filter != NULL) { printf("ahci.c: we cannot use a filter here\n"); return (EINVAL); } ctlr->interrupt[unit].function = function; ctlr->interrupt[unit].argument = argument; return (0); } static int ahci_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { struct ahci_controller *ctlr = device_get_softc(dev); int unit = (intptr_t)device_get_ivars(child); ctlr->interrupt[unit].function = NULL; ctlr->interrupt[unit].argument = NULL; return (0); } static int ahci_print_child(device_t dev, device_t child) { int retval; retval = bus_print_child_header(dev, child); retval += printf(" at channel %d", (int)(intptr_t)device_get_ivars(child)); retval += bus_print_child_footer(dev, child); return (retval); } static int ahci_child_location_str(device_t dev, device_t child, char *buf, size_t buflen) { snprintf(buf, buflen, "channel=%d", (int)(intptr_t)device_get_ivars(child)); return (0); } devclass_t ahci_devclass; static device_method_t ahci_methods[] = { DEVMETHOD(device_probe, ahci_probe), DEVMETHOD(device_attach, ahci_attach), DEVMETHOD(device_detach, ahci_detach), DEVMETHOD(device_suspend, ahci_suspend), DEVMETHOD(device_resume, ahci_resume), DEVMETHOD(bus_print_child, ahci_print_child), DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), DEVMETHOD(bus_release_resource, ahci_release_resource), DEVMETHOD(bus_setup_intr, ahci_setup_intr), DEVMETHOD(bus_teardown_intr,ahci_teardown_intr), DEVMETHOD(bus_child_location_str, ahci_child_location_str), { 0, 0 } }; static driver_t ahci_driver = { "ahci", ahci_methods, sizeof(struct ahci_controller) }; DRIVER_MODULE(ahci, pci, ahci_driver, ahci_devclass, 0, 0); static device_method_t ahci_ata_methods[] = { DEVMETHOD(device_probe, ahci_ata_probe), DEVMETHOD(device_attach, ahci_attach), DEVMETHOD(device_detach, ahci_detach), DEVMETHOD(device_suspend, ahci_suspend), DEVMETHOD(device_resume, ahci_resume), DEVMETHOD(bus_print_child, ahci_print_child), DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), DEVMETHOD(bus_release_resource, ahci_release_resource), DEVMETHOD(bus_setup_intr, ahci_setup_intr), DEVMETHOD(bus_teardown_intr,ahci_teardown_intr), DEVMETHOD(bus_child_location_str, ahci_child_location_str), { 0, 0 } }; static driver_t ahci_ata_driver = { "ahci", ahci_ata_methods, sizeof(struct ahci_controller) }; DRIVER_MODULE(ahci, atapci, ahci_ata_driver, ahci_devclass, 0, 0); MODULE_VERSION(ahci, 1); MODULE_DEPEND(ahci, cam, 1, 1, 1); static int ahci_ch_probe(device_t dev) { device_set_desc_copy(dev, "AHCI channel"); return (0); } static int ahci_ch_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev)); struct ahci_channel *ch = device_get_softc(dev); struct cam_devq *devq; int rid, error, i, sata_rev = 0; u_int32_t version; ch->dev = dev; ch->unit = (intptr_t)device_get_ivars(dev); ch->caps = ctlr->caps; ch->caps2 = ctlr->caps2; ch->quirks = ctlr->quirks; - ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1, + ch->numslots = ((ch->caps & AHCI_CAP_NCS) >> AHCI_CAP_NCS_SHIFT) + 1; mtx_init(&ch->mtx, "AHCI channel lock", NULL, MTX_DEF); resource_int_value(device_get_name(dev), device_get_unit(dev), "pm_level", &ch->pm_level); if (ch->pm_level > 3) callout_init_mtx(&ch->pm_timer, &ch->mtx, 0); /* Limit speed for my onboard JMicron external port. * It is not eSATA really. */ if (pci_get_devid(ctlr->dev) == 0x2363197b && pci_get_subvendor(ctlr->dev) == 0x1043 && pci_get_subdevice(ctlr->dev) == 0x81e4 && ch->unit == 0) sata_rev = 1; if (ch->quirks & AHCI_Q_SATA2) sata_rev = 2; resource_int_value(device_get_name(dev), device_get_unit(dev), "sata_rev", &sata_rev); for (i = 0; i < 16; i++) { ch->user[i].revision = sata_rev; ch->user[i].mode = 0; ch->user[i].bytecount = 8192; ch->user[i].tags = ch->numslots; ch->user[i].caps = 0; ch->curr[i] = ch->user[i]; if (ch->pm_level) { ch->user[i].caps = CTS_SATA_CAPS_H_PMREQ | CTS_SATA_CAPS_H_APST | CTS_SATA_CAPS_D_PMREQ | CTS_SATA_CAPS_D_APST; } ch->user[i].caps |= CTS_SATA_CAPS_H_DMAAA; } rid = ch->unit; if (!(ch->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE))) return (ENXIO); ahci_dmainit(dev); ahci_slotsalloc(dev); ahci_ch_init(dev); mtx_lock(&ch->mtx); rid = ATA_IRQ_RID; if (!(ch->r_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE))) { device_printf(dev, "Unable to map interrupt\n"); error = ENXIO; goto err0; } if ((bus_setup_intr(dev, ch->r_irq, ATA_INTR_FLAGS, NULL, ahci_ch_intr_locked, dev, &ch->ih))) { device_printf(dev, "Unable to setup interrupt\n"); error = ENXIO; goto err1; } ch->chcaps = ATA_INL(ch->r_mem, AHCI_P_CMD); version = ATA_INL(ctlr->r_mem, AHCI_VS); if (version < 0x00010020 && (ctlr->caps & AHCI_CAP_FBSS)) ch->chcaps |= AHCI_P_CMD_FBSCP; if (bootverbose) { device_printf(dev, "Caps:%s%s%s%s%s\n", (ch->chcaps & AHCI_P_CMD_HPCP) ? " HPCP":"", (ch->chcaps & AHCI_P_CMD_MPSP) ? " MPSP":"", (ch->chcaps & AHCI_P_CMD_CPD) ? " CPD":"", (ch->chcaps & AHCI_P_CMD_ESP) ? " ESP":"", (ch->chcaps & AHCI_P_CMD_FBSCP) ? " FBSCP":""); } /* Create the device queue for our SIM. */ devq = cam_simq_alloc(ch->numslots); if (devq == NULL) { device_printf(dev, "Unable to allocate simq\n"); error = ENOMEM; goto err1; } /* Construct SIM entry */ ch->sim = cam_sim_alloc(ahciaction, ahcipoll, "ahcich", ch, device_get_unit(dev), &ch->mtx, min(2, ch->numslots), (ch->caps & AHCI_CAP_SNCQ) ? ch->numslots : 0, devq); if (ch->sim == NULL) { cam_simq_free(devq); device_printf(dev, "unable to allocate sim\n"); error = ENOMEM; goto err1; } if (xpt_bus_register(ch->sim, dev, 0) != CAM_SUCCESS) { device_printf(dev, "unable to register xpt bus\n"); error = ENXIO; goto err2; } if (xpt_create_path(&ch->path, /*periph*/NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { device_printf(dev, "unable to create path\n"); error = ENXIO; goto err3; } if (ch->pm_level > 3) { callout_reset(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8, ahci_ch_pm, dev); } mtx_unlock(&ch->mtx); return (0); err3: xpt_bus_deregister(cam_sim_path(ch->sim)); err2: cam_sim_free(ch->sim, /*free_devq*/TRUE); err1: bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); err0: bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_unlock(&ch->mtx); mtx_destroy(&ch->mtx); return (error); } static int ahci_ch_detach(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); xpt_async(AC_LOST_DEVICE, ch->path, NULL); xpt_free_path(ch->path); xpt_bus_deregister(cam_sim_path(ch->sim)); cam_sim_free(ch->sim, /*free_devq*/TRUE); mtx_unlock(&ch->mtx); if (ch->pm_level > 3) callout_drain(&ch->pm_timer); bus_teardown_intr(dev, ch->r_irq, ch->ih); bus_release_resource(dev, SYS_RES_IRQ, ATA_IRQ_RID, ch->r_irq); ahci_ch_deinit(dev); ahci_slotsfree(dev); ahci_dmafini(dev); bus_release_resource(dev, SYS_RES_MEMORY, ch->unit, ch->r_mem); mtx_destroy(&ch->mtx); return (0); } static int ahci_ch_init(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); uint64_t work; /* Disable port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Setup work areas */ work = ch->dma.work_bus + AHCI_CL_OFFSET; ATA_OUTL(ch->r_mem, AHCI_P_CLB, work & 0xffffffff); ATA_OUTL(ch->r_mem, AHCI_P_CLBU, work >> 32); work = ch->dma.rfis_bus; ATA_OUTL(ch->r_mem, AHCI_P_FB, work & 0xffffffff); ATA_OUTL(ch->r_mem, AHCI_P_FBU, work >> 32); /* Activate the channel and power/spin up device */ ATA_OUTL(ch->r_mem, AHCI_P_CMD, (AHCI_P_CMD_ACTIVE | AHCI_P_CMD_POD | AHCI_P_CMD_SUD | ((ch->pm_level == 2 || ch->pm_level == 3) ? AHCI_P_CMD_ALPE : 0) | ((ch->pm_level > 2) ? AHCI_P_CMD_ASP : 0 ))); ahci_start_fr(dev); ahci_start(dev, 1); return (0); } static int ahci_ch_deinit(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); /* Disable port interrupts. */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Reset command register. */ ahci_stop(dev); ahci_stop_fr(dev); ATA_OUTL(ch->r_mem, AHCI_P_CMD, 0); /* Allow everything, including partial and slumber modes. */ ATA_OUTL(ch->r_mem, AHCI_P_SCTL, 0); /* Request slumber mode transition and give some time to get there. */ ATA_OUTL(ch->r_mem, AHCI_P_CMD, AHCI_P_CMD_SLUMBER); DELAY(100); /* Disable PHY. */ ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE); return (0); } static int ahci_ch_suspend(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); xpt_freeze_simq(ch->sim, 1); while (ch->oslots) msleep(ch, &ch->mtx, PRIBIO, "ahcisusp", hz/100); ahci_ch_deinit(dev); mtx_unlock(&ch->mtx); return (0); } static int ahci_ch_resume(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); ahci_ch_init(dev); ahci_reset(dev); xpt_release_simq(ch->sim, TRUE); mtx_unlock(&ch->mtx); return (0); } devclass_t ahcich_devclass; static device_method_t ahcich_methods[] = { DEVMETHOD(device_probe, ahci_ch_probe), DEVMETHOD(device_attach, ahci_ch_attach), DEVMETHOD(device_detach, ahci_ch_detach), DEVMETHOD(device_suspend, ahci_ch_suspend), DEVMETHOD(device_resume, ahci_ch_resume), { 0, 0 } }; static driver_t ahcich_driver = { "ahcich", ahcich_methods, sizeof(struct ahci_channel) }; DRIVER_MODULE(ahcich, ahci, ahcich_driver, ahcich_devclass, 0, 0); struct ahci_dc_cb_args { bus_addr_t maddr; int error; }; static void ahci_dmainit(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); struct ahci_dc_cb_args dcba; size_t rfsize; if (ch->caps & AHCI_CAP_64BIT) ch->dma.max_address = BUS_SPACE_MAXADDR; else ch->dma.max_address = BUS_SPACE_MAXADDR_32BIT; /* Command area. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1024, 0, ch->dma.max_address, BUS_SPACE_MAXADDR, NULL, NULL, AHCI_WORK_SIZE, 1, AHCI_WORK_SIZE, 0, NULL, NULL, &ch->dma.work_tag)) goto error; if (bus_dmamem_alloc(ch->dma.work_tag, (void **)&ch->dma.work, 0, &ch->dma.work_map)) goto error; if (bus_dmamap_load(ch->dma.work_tag, ch->dma.work_map, ch->dma.work, AHCI_WORK_SIZE, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) { bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map); goto error; } ch->dma.work_bus = dcba.maddr; /* FIS receive area. */ if (ch->chcaps & AHCI_P_CMD_FBSCP) rfsize = 4096; else rfsize = 256; if (bus_dma_tag_create(bus_get_dma_tag(dev), rfsize, 0, ch->dma.max_address, BUS_SPACE_MAXADDR, NULL, NULL, rfsize, 1, rfsize, 0, NULL, NULL, &ch->dma.rfis_tag)) goto error; if (bus_dmamem_alloc(ch->dma.rfis_tag, (void **)&ch->dma.rfis, 0, &ch->dma.rfis_map)) goto error; if (bus_dmamap_load(ch->dma.rfis_tag, ch->dma.rfis_map, ch->dma.rfis, rfsize, ahci_dmasetupc_cb, &dcba, 0) || dcba.error) { bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map); goto error; } ch->dma.rfis_bus = dcba.maddr; /* Data area. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0, ch->dma.max_address, BUS_SPACE_MAXADDR, NULL, NULL, AHCI_SG_ENTRIES * PAGE_SIZE * ch->numslots, AHCI_SG_ENTRIES, AHCI_PRD_MAX, 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) { goto error; } return; error: device_printf(dev, "WARNING - DMA initialization failed\n"); ahci_dmafini(dev); } static void ahci_dmasetupc_cb(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct ahci_dc_cb_args *dcba = (struct ahci_dc_cb_args *)xsc; if (!(dcba->error = error)) dcba->maddr = segs[0].ds_addr; } static void ahci_dmafini(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); if (ch->dma.data_tag) { bus_dma_tag_destroy(ch->dma.data_tag); ch->dma.data_tag = NULL; } if (ch->dma.rfis_bus) { bus_dmamap_unload(ch->dma.rfis_tag, ch->dma.rfis_map); bus_dmamem_free(ch->dma.rfis_tag, ch->dma.rfis, ch->dma.rfis_map); ch->dma.rfis_bus = 0; ch->dma.rfis_map = NULL; ch->dma.rfis = NULL; } if (ch->dma.work_bus) { bus_dmamap_unload(ch->dma.work_tag, ch->dma.work_map); bus_dmamem_free(ch->dma.work_tag, ch->dma.work, ch->dma.work_map); ch->dma.work_bus = 0; ch->dma.work_map = NULL; ch->dma.work = NULL; } if (ch->dma.work_tag) { bus_dma_tag_destroy(ch->dma.work_tag); ch->dma.work_tag = NULL; } } static void ahci_slotsalloc(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; /* Alloc and setup command/dma slots */ bzero(ch->slot, sizeof(ch->slot)); for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; slot->dev = dev; slot->slot = i; slot->state = AHCI_SLOT_EMPTY; slot->ccb = NULL; callout_init_mtx(&slot->timeout, &ch->mtx, 0); if (bus_dmamap_create(ch->dma.data_tag, 0, &slot->dma.data_map)) device_printf(ch->dev, "FAILURE - create data_map\n"); } } static void ahci_slotsfree(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; /* Free all dma slots */ for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; callout_drain(&slot->timeout); if (slot->dma.data_map) { bus_dmamap_destroy(ch->dma.data_tag, slot->dma.data_map); slot->dma.data_map = NULL; } } } static void ahci_phy_check_events(device_t dev, u_int32_t serr) { struct ahci_channel *ch = device_get_softc(dev); if ((serr & ATA_SE_PHY_CHANGED) && (ch->pm_level == 0)) { u_int32_t status = ATA_INL(ch->r_mem, AHCI_P_SSTS); union ccb *ccb; if (bootverbose) { if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) && ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) && ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE)) { device_printf(dev, "CONNECT requested\n"); } else device_printf(dev, "DISCONNECT requested\n"); } ahci_reset(dev); if ((ccb = xpt_alloc_ccb_nowait()) == NULL) return; if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(ch->sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); return; } xpt_rescan(ccb); } } static void ahci_notify_events(device_t dev, u_int32_t status) { struct ahci_channel *ch = device_get_softc(dev); struct cam_path *dpath; int i; if (ch->caps & AHCI_CAP_SSNTF) ATA_OUTL(ch->r_mem, AHCI_P_SNTF, status); if (bootverbose) device_printf(dev, "SNTF 0x%04x\n", status); for (i = 0; i < 16; i++) { if ((status & (1 << i)) == 0) continue; if (xpt_create_path(&dpath, NULL, xpt_path_path_id(ch->path), i, 0) == CAM_REQ_CMP) { xpt_async(AC_SCSI_AEN, dpath, NULL); xpt_free_path(dpath); } } } static void ahci_ch_intr_locked(void *data) { device_t dev = (device_t)data; struct ahci_channel *ch = device_get_softc(dev); mtx_lock(&ch->mtx); ahci_ch_intr(data); mtx_unlock(&ch->mtx); } static void ahci_ch_pm(void *arg) { device_t dev = (device_t)arg; struct ahci_channel *ch = device_get_softc(dev); uint32_t work; if (ch->numrslots != 0) return; work = ATA_INL(ch->r_mem, AHCI_P_CMD); if (ch->pm_level == 4) work |= AHCI_P_CMD_PARTIAL; else work |= AHCI_P_CMD_SLUMBER; ATA_OUTL(ch->r_mem, AHCI_P_CMD, work); } static void ahci_ch_intr(void *data) { device_t dev = (device_t)data; struct ahci_channel *ch = device_get_softc(dev); uint32_t istatus, sstatus, cstatus, serr = 0, sntf = 0, ok, err; enum ahci_err_type et; int i, ccs, port; /* Read and clear interrupt statuses. */ istatus = ATA_INL(ch->r_mem, AHCI_P_IS); if (istatus == 0) return; ATA_OUTL(ch->r_mem, AHCI_P_IS, istatus); /* Read command statuses. */ sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT); cstatus = ATA_INL(ch->r_mem, AHCI_P_CI); if (istatus & AHCI_P_IX_SDB) { if (ch->caps & AHCI_CAP_SSNTF) sntf = ATA_INL(ch->r_mem, AHCI_P_SNTF); else if (ch->fbs_enabled) { u_int8_t *fis = ch->dma.rfis + 0x58; for (i = 0; i < 16; i++) { if (fis[1] & 0x80) { fis[1] &= 0x7f; sntf |= 1 << i; } fis += 256; } } else { u_int8_t *fis = ch->dma.rfis + 0x58; if (fis[1] & 0x80) sntf = (1 << (fis[1] & 0x0f)); } } /* Process PHY events */ if (istatus & (AHCI_P_IX_PC | AHCI_P_IX_PRC | AHCI_P_IX_OF | AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) { serr = ATA_INL(ch->r_mem, AHCI_P_SERR); if (serr) { ATA_OUTL(ch->r_mem, AHCI_P_SERR, serr); ahci_phy_check_events(dev, serr); } } /* Process command errors */ if (istatus & (AHCI_P_IX_OF | AHCI_P_IX_IF | AHCI_P_IX_HBD | AHCI_P_IX_HBF | AHCI_P_IX_TFE)) { ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; //device_printf(dev, "%s ERROR is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x fbs %08x ccs %d\n", // __func__, istatus, cstatus, sstatus, ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), // serr, ATA_INL(ch->r_mem, AHCI_P_FBS), ccs); port = -1; if (ch->fbs_enabled) { uint32_t fbs = ATA_INL(ch->r_mem, AHCI_P_FBS); if (fbs & AHCI_P_FBS_SDE) { port = (fbs & AHCI_P_FBS_DWE) >> AHCI_P_FBS_DWE_SHIFT; } else { for (i = 0; i < 16; i++) { if (ch->numrslotspd[i] == 0) continue; if (port == -1) port = i; else if (port != i) { port = -2; break; } } } } err = ch->rslots & (cstatus | sstatus); } else { ccs = 0; err = 0; port = -1; } /* Complete all successfull commands. */ ok = ch->rslots & ~(cstatus | sstatus); for (i = 0; i < ch->numslots; i++) { if ((ok >> i) & 1) ahci_end_transaction(&ch->slot[i], AHCI_ERR_NONE); } /* On error, complete the rest of commands with error statuses. */ if (err) { if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } xpt_done(fccb); } for (i = 0; i < ch->numslots; i++) { /* XXX: reqests in loading state. */ if (((err >> i) & 1) == 0) continue; if (port >= 0 && ch->slot[i].ccb->ccb_h.target_id != port) continue; if (istatus & AHCI_P_IX_TFE) { if (port != -2) { /* Task File Error */ if (ch->numtslotspd[ ch->slot[i].ccb->ccb_h.target_id] == 0) { /* Untagged operation. */ if (i == ccs) et = AHCI_ERR_TFE; else et = AHCI_ERR_INNOCENT; } else { /* Tagged operation. */ et = AHCI_ERR_NCQ; } } else { et = AHCI_ERR_TFE; ch->fatalerr = 1; } } else if (istatus & AHCI_P_IX_IF) { if (ch->numtslots == 0 && i != ccs && port != -2) et = AHCI_ERR_INNOCENT; else et = AHCI_ERR_SATA; } else et = AHCI_ERR_INVALID; ahci_end_transaction(&ch->slot[i], et); } /* * We can't reinit port if there are some other * commands active, use resume to complete them. */ if (ch->rslots != 0) ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | AHCI_P_FBS_DEC); } /* Process NOTIFY events */ if (sntf) ahci_notify_events(dev, sntf); } /* Must be called with channel locked. */ static int ahci_check_collision(device_t dev, union ccb *ccb) { struct ahci_channel *ch = device_get_softc(dev); int t = ccb->ccb_h.target_id; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { /* Tagged command while we have no supported tag free. */ if (((~ch->oslots) & (0xffffffff >> (32 - ch->curr[t].tags))) == 0) return (1); /* If we have FBS */ if (ch->fbs_enabled) { /* Tagged command while untagged are active. */ if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] == 0) return (1); } else { /* Tagged command while untagged are active. */ if (ch->numrslots != 0 && ch->numtslots == 0) return (1); /* Tagged command while tagged to other target is active. */ if (ch->numtslots != 0 && ch->taggedtarget != ccb->ccb_h.target_id) return (1); } } else { /* If we have FBS */ if (ch->fbs_enabled) { /* Untagged command while tagged are active. */ if (ch->numrslotspd[t] != 0 && ch->numtslotspd[t] != 0) return (1); } else { /* Untagged command while tagged are active. */ if (ch->numrslots != 0 && ch->numtslots != 0) return (1); } } if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) { /* Atomic command while anything active. */ if (ch->numrslots != 0) return (1); } /* We have some atomic command running. */ if (ch->aslots != 0) return (1); return (0); } /* Must be called with channel locked. */ static void ahci_begin_transaction(device_t dev, union ccb *ccb) { struct ahci_channel *ch = device_get_softc(dev); struct ahci_slot *slot; int tag, tags; /* Choose empty slot. */ tags = ch->numslots; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) tags = ch->curr[ccb->ccb_h.target_id].tags; tag = ch->lastslot; while (1) { if (tag >= tags) tag = 0; if (ch->slot[tag].state == AHCI_SLOT_EMPTY) break; tag++; }; ch->lastslot = tag; /* Occupy chosen slot. */ slot = &ch->slot[tag]; slot->ccb = ccb; /* Stop PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3) callout_stop(&ch->pm_timer); /* Update channel stats. */ ch->oslots |= (1 << slot->slot); ch->numrslots++; ch->numrslotspd[ccb->ccb_h.target_id]++; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ch->numtslots++; ch->numtslotspd[ccb->ccb_h.target_id]++; ch->taggedtarget = ccb->ccb_h.target_id; } if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & (CAM_ATAIO_CONTROL | CAM_ATAIO_NEEDRESULT))) ch->aslots |= (1 << slot->slot); slot->dma.nsegs = 0; /* If request moves data, setup and load SG list */ if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { void *buf; bus_size_t size; slot->state = AHCI_SLOT_LOADING; if (ccb->ccb_h.func_code == XPT_ATA_IO) { buf = ccb->ataio.data_ptr; size = ccb->ataio.dxfer_len; } else { buf = ccb->csio.data_ptr; size = ccb->csio.dxfer_len; } bus_dmamap_load(ch->dma.data_tag, slot->dma.data_map, buf, size, ahci_dmasetprd, slot, 0); } else ahci_execute_transaction(slot); } /* Locked by busdma engine. */ static void ahci_dmasetprd(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct ahci_slot *slot = arg; struct ahci_channel *ch = device_get_softc(slot->dev); struct ahci_cmd_tab *ctp; struct ahci_dma_prd *prd; int i; if (error) { device_printf(slot->dev, "DMA load error\n"); ahci_end_transaction(slot, AHCI_ERR_INVALID); return; } KASSERT(nsegs <= AHCI_SG_ENTRIES, ("too many DMA segment entries\n")); /* Get a piece of the workspace for this request */ ctp = (struct ahci_cmd_tab *) (ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); /* Fill S/G table */ prd = &ctp->prd_tab[0]; for (i = 0; i < nsegs; i++) { prd[i].dba = htole64(segs[i].ds_addr); prd[i].dbc = htole32((segs[i].ds_len - 1) & AHCI_PRD_MASK); } slot->dma.nsegs = nsegs; bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, ((slot->ccb->ccb_h.flags & CAM_DIR_IN) ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE)); ahci_execute_transaction(slot); } /* Must be called with channel locked. */ static void ahci_execute_transaction(struct ahci_slot *slot) { device_t dev = slot->dev; struct ahci_channel *ch = device_get_softc(dev); struct ahci_cmd_tab *ctp; struct ahci_cmd_list *clp; union ccb *ccb = slot->ccb; int port = ccb->ccb_h.target_id & 0x0f; int fis_size, i; uint8_t *fis = ch->dma.rfis + 0x40; uint8_t val; /* Get a piece of the workspace for this request */ ctp = (struct ahci_cmd_tab *) (ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); /* Setup the FIS for this request */ if (!(fis_size = ahci_setup_fis(dev, ctp, ccb, slot->slot))) { device_printf(ch->dev, "Setting up SATA FIS failed\n"); ahci_end_transaction(slot, AHCI_ERR_INVALID); return; } /* Setup the command list entry */ clp = (struct ahci_cmd_list *) (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot)); clp->cmd_flags = htole16( (ccb->ccb_h.flags & CAM_DIR_OUT ? AHCI_CMD_WRITE : 0) | (ccb->ccb_h.func_code == XPT_SCSI_IO ? (AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH) : 0) | (fis_size / sizeof(u_int32_t)) | (port << 12)); clp->prd_length = htole16(slot->dma.nsegs); /* Special handling for Soft Reset command. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL)) { if (ccb->ataio.cmd.control & ATA_A_RESET) { /* Kick controller into sane state */ ahci_stop(dev); ahci_clo(dev); ahci_start(dev, 0); clp->cmd_flags |= AHCI_CMD_RESET | AHCI_CMD_CLR_BUSY; } else { /* Prepare FIS receive area for check. */ for (i = 0; i < 20; i++) fis[i] = 0xff; } } clp->bytecount = 0; clp->cmd_table_phys = htole64(ch->dma.work_bus + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_PREREAD); /* Set ACTIVE bit for NCQ commands. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ATA_OUTL(ch->r_mem, AHCI_P_SACT, 1 << slot->slot); } /* If FBS is enabled, set PMP port. */ if (ch->fbs_enabled) { ATA_OUTL(ch->r_mem, AHCI_P_FBS, AHCI_P_FBS_EN | (port << AHCI_P_FBS_DEV_SHIFT)); } /* Issue command to the controller. */ slot->state = AHCI_SLOT_RUNNING; ch->rslots |= (1 << slot->slot); ATA_OUTL(ch->r_mem, AHCI_P_CI, (1 << slot->slot)); /* Device reset commands doesn't interrupt. Poll them. */ if (ccb->ccb_h.func_code == XPT_ATA_IO && (ccb->ataio.cmd.command == ATA_DEVICE_RESET || (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL))) { int count, timeout = ccb->ccb_h.timeout; enum ahci_err_type et = AHCI_ERR_NONE; for (count = 0; count < timeout; count++) { DELAY(1000); if (!(ATA_INL(ch->r_mem, AHCI_P_CI) & (1 << slot->slot))) break; if (ATA_INL(ch->r_mem, AHCI_P_TFD) & ATA_S_ERROR) { device_printf(ch->dev, "Poll error on slot %d, TFD: %04x\n", slot->slot, ATA_INL(ch->r_mem, AHCI_P_TFD)); et = AHCI_ERR_TFE; break; } /* Workaround for ATI SB600/SB700 chipsets. */ if (ccb->ccb_h.target_id == 15 && pci_get_vendor(device_get_parent(dev)) == 0x1002 && (ATA_INL(ch->r_mem, AHCI_P_IS) & AHCI_P_IX_IPM)) { et = AHCI_ERR_TIMEOUT; break; } } if (timeout && (count >= timeout)) { device_printf(ch->dev, "Poll timeout on slot %d\n", slot->slot); device_printf(dev, "is %08x cs %08x ss %08x " "rs %08x tfd %02x serr %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR)); et = AHCI_ERR_TIMEOUT; } /* Marvell controllers do not wait for readyness. */ if ((ch->quirks & AHCI_Q_NOBSYRES) && (ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { while ((val = fis[2]) & (ATA_S_BUSY | ATA_S_DRQ)) { DELAY(1000); if (count++ >= timeout) { device_printf(dev, "device is not " "ready after soft-reset: " "tfd = %08x\n", val); et = AHCI_ERR_TIMEOUT; break; } } } ahci_end_transaction(slot, et); /* Kick controller into sane state and enable FBS. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) == 0) { ahci_stop(ch->dev); ahci_start(ch->dev, 1); } return; } /* Start command execution timeout */ callout_reset(&slot->timeout, (int)ccb->ccb_h.timeout * hz / 2000, (timeout_t*)ahci_timeout, slot); return; } /* Must be called with channel locked. */ static void ahci_process_timeout(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; mtx_assert(&ch->mtx, MA_OWNED); /* Handle the rest of commands. */ for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; ahci_end_transaction(&ch->slot[i], AHCI_ERR_TIMEOUT); } } /* Must be called with channel locked. */ static void ahci_rearm_timeout(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int i; mtx_assert(&ch->mtx, MA_OWNED); for (i = 0; i < ch->numslots; i++) { struct ahci_slot *slot = &ch->slot[i]; /* Do we have a running request on slot? */ if (slot->state < AHCI_SLOT_RUNNING) continue; if ((ch->toslots & (1 << i)) == 0) continue; callout_reset(&slot->timeout, (int)slot->ccb->ccb_h.timeout * hz / 2000, (timeout_t*)ahci_timeout, slot); } } /* Locked by callout mechanism. */ static void ahci_timeout(struct ahci_slot *slot) { device_t dev = slot->dev; struct ahci_channel *ch = device_get_softc(dev); uint32_t sstatus; int ccs; int i; /* Check for stale timeout. */ if (slot->state < AHCI_SLOT_RUNNING) return; /* Check if slot was not being executed last time we checked. */ if (slot->state < AHCI_SLOT_EXECUTING) { /* Check if slot started executing. */ sstatus = ATA_INL(ch->r_mem, AHCI_P_SACT); ccs = (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CCS_MASK) >> AHCI_P_CMD_CCS_SHIFT; if ((sstatus & (1 << slot->slot)) != 0 || ccs == slot->slot || ch->fbs_enabled) slot->state = AHCI_SLOT_EXECUTING; callout_reset(&slot->timeout, (int)slot->ccb->ccb_h.timeout * hz / 2000, (timeout_t*)ahci_timeout, slot); return; } device_printf(dev, "Timeout on slot %d\n", slot->slot); device_printf(dev, "is %08x cs %08x ss %08x rs %08x tfd %02x serr %08x\n", ATA_INL(ch->r_mem, AHCI_P_IS), ATA_INL(ch->r_mem, AHCI_P_CI), ATA_INL(ch->r_mem, AHCI_P_SACT), ch->rslots, ATA_INL(ch->r_mem, AHCI_P_TFD), ATA_INL(ch->r_mem, AHCI_P_SERR)); /* Handle frozen command. */ if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } xpt_done(fccb); } if (!ch->fbs_enabled) { /* Without FBS we know real timeout source. */ ch->fatalerr = 1; /* Handle command with timeout. */ ahci_end_transaction(&ch->slot[slot->slot], AHCI_ERR_TIMEOUT); /* Handle the rest of commands. */ for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT); } } else { /* With FBS we wait for other commands timeout and pray. */ if (ch->toslots == 0) xpt_freeze_simq(ch->sim, 1); ch->toslots |= (1 << slot->slot); if ((ch->rslots & ~ch->toslots) == 0) ahci_process_timeout(dev); else device_printf(dev, " ... waiting for slots %08x\n", ch->rslots & ~ch->toslots); } } /* Must be called with channel locked. */ static void ahci_end_transaction(struct ahci_slot *slot, enum ahci_err_type et) { device_t dev = slot->dev; struct ahci_channel *ch = device_get_softc(dev); union ccb *ccb = slot->ccb; struct ahci_cmd_list *clp; int lastto; bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); clp = (struct ahci_cmd_list *) (ch->dma.work + AHCI_CL_OFFSET + (AHCI_CL_SIZE * slot->slot)); /* Read result registers to the result struct * May be incorrect if several commands finished same time, * so read only when sure or have to. */ if (ccb->ccb_h.func_code == XPT_ATA_IO) { struct ata_res *res = &ccb->ataio.res; if ((et == AHCI_ERR_TFE) || (ccb->ataio.cmd.flags & CAM_ATAIO_NEEDRESULT)) { u_int8_t *fis = ch->dma.rfis + 0x40; bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, BUS_DMASYNC_POSTREAD); if (ch->fbs_enabled) { fis += ccb->ccb_h.target_id * 256; res->status = fis[2]; res->error = fis[3]; } else { uint16_t tfd = ATA_INL(ch->r_mem, AHCI_P_TFD); res->status = tfd; res->error = tfd >> 8; } res->lba_low = fis[4]; res->lba_mid = fis[5]; res->lba_high = fis[6]; res->device = fis[7]; res->lba_low_exp = fis[8]; res->lba_mid_exp = fis[9]; res->lba_high_exp = fis[10]; res->sector_count = fis[12]; res->sector_count_exp = fis[13]; } else bzero(res, sizeof(*res)); if ((ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) == 0 && (ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { ccb->ataio.resid = ccb->ataio.dxfer_len - le32toh(clp->bytecount); } } else { if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { ccb->csio.resid = ccb->csio.dxfer_len - le32toh(clp->bytecount); } } if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE) { bus_dmamap_sync(ch->dma.data_tag, slot->dma.data_map, (ccb->ccb_h.flags & CAM_DIR_IN) ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ch->dma.data_tag, slot->dma.data_map); } if (et != AHCI_ERR_NONE) ch->eslots |= (1 << slot->slot); /* In case of error, freeze device for proper recovery. */ if ((et != AHCI_ERR_NONE) && (!ch->readlog) && !(ccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(ccb->ccb_h.path, 1); ccb->ccb_h.status |= CAM_DEV_QFRZN; } /* Set proper result status. */ ccb->ccb_h.status &= ~CAM_STATUS_MASK; switch (et) { case AHCI_ERR_NONE: ccb->ccb_h.status |= CAM_REQ_CMP; if (ccb->ccb_h.func_code == XPT_SCSI_IO) ccb->csio.scsi_status = SCSI_STATUS_OK; break; case AHCI_ERR_INVALID: ch->fatalerr = 1; ccb->ccb_h.status |= CAM_REQ_INVALID; break; case AHCI_ERR_INNOCENT: ccb->ccb_h.status |= CAM_REQUEUE_REQ; break; case AHCI_ERR_TFE: case AHCI_ERR_NCQ: if (ccb->ccb_h.func_code == XPT_SCSI_IO) { ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; ccb->csio.scsi_status = SCSI_STATUS_CHECK_COND; } else { ccb->ccb_h.status |= CAM_ATA_STATUS_ERROR; } break; case AHCI_ERR_SATA: ch->fatalerr = 1; if (!ch->readlog) { xpt_freeze_simq(ch->sim, 1); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_RELEASE_SIMQ; } ccb->ccb_h.status |= CAM_UNCOR_PARITY; break; case AHCI_ERR_TIMEOUT: if (!ch->readlog) { xpt_freeze_simq(ch->sim, 1); ccb->ccb_h.status &= ~CAM_STATUS_MASK; ccb->ccb_h.status |= CAM_RELEASE_SIMQ; } ccb->ccb_h.status |= CAM_CMD_TIMEOUT; break; default: ch->fatalerr = 1; ccb->ccb_h.status |= CAM_REQ_CMP_ERR; } /* Free slot. */ ch->oslots &= ~(1 << slot->slot); ch->rslots &= ~(1 << slot->slot); ch->aslots &= ~(1 << slot->slot); slot->state = AHCI_SLOT_EMPTY; slot->ccb = NULL; /* Update channel stats. */ ch->numrslots--; ch->numrslotspd[ccb->ccb_h.target_id]--; if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA)) { ch->numtslots--; ch->numtslotspd[ccb->ccb_h.target_id]--; } /* Cancel timeout state if request completed normally. */ if (et != AHCI_ERR_TIMEOUT) { lastto = (ch->toslots == (1 << slot->slot)); ch->toslots &= ~(1 << slot->slot); if (lastto) xpt_release_simq(ch->sim, TRUE); } /* If it was first request of reset sequence and there is no error, * proceed to second request. */ if ((ccb->ccb_h.func_code == XPT_ATA_IO) && (ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) && (ccb->ataio.cmd.control & ATA_A_RESET) && et == AHCI_ERR_NONE) { ccb->ataio.cmd.control &= ~ATA_A_RESET; ahci_begin_transaction(dev, ccb); return; } /* If it was our READ LOG command - process it. */ if (ch->readlog) { ahci_process_read_log(dev, ccb); /* If it was NCQ command error, put result on hold. */ } else if (et == AHCI_ERR_NCQ) { ch->hold[slot->slot] = ccb; ch->numhslots++; } else xpt_done(ccb); /* Unfreeze frozen command. */ if (ch->frozen && !ahci_check_collision(dev, ch->frozen)) { union ccb *fccb = ch->frozen; ch->frozen = NULL; ahci_begin_transaction(dev, fccb); xpt_release_simq(ch->sim, TRUE); } /* If we have no other active commands, ... */ if (ch->rslots == 0) { /* if there was fatal error - reset port. */ if (ch->toslots != 0 || ch->fatalerr) { ahci_reset(dev); } else { /* if we have slots in error, we can reinit port. */ if (ch->eslots != 0) { ahci_stop(dev); ahci_start(dev, 1); } /* if there commands on hold, we can do READ LOG. */ if (!ch->readlog && ch->numhslots) ahci_issue_read_log(dev); } /* If all the rest of commands are in timeout - give them chance. */ } else if ((ch->rslots & ~ch->toslots) == 0 && et != AHCI_ERR_TIMEOUT) ahci_rearm_timeout(dev); /* Start PM timer. */ if (ch->numrslots == 0 && ch->pm_level > 3 && (ch->curr[ch->pm_present ? 15 : 0].caps & CTS_SATA_CAPS_D_PMREQ)) { callout_schedule(&ch->pm_timer, (ch->pm_level == 4) ? hz / 1000 : hz / 8); } } static void ahci_issue_read_log(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); union ccb *ccb; struct ccb_ataio *ataio; int i; ch->readlog = 1; /* Find some holden command. */ for (i = 0; i < ch->numslots; i++) { if (ch->hold[i]) break; } ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { device_printf(dev, "Unable allocate READ LOG command"); return; /* XXX */ } ccb->ccb_h = ch->hold[i]->ccb_h; /* Reuse old header. */ ccb->ccb_h.func_code = XPT_ATA_IO; ccb->ccb_h.flags = CAM_DIR_IN; ccb->ccb_h.timeout = 1000; /* 1s should be enough. */ ataio = &ccb->ataio; ataio->data_ptr = malloc(512, M_AHCI, M_NOWAIT); if (ataio->data_ptr == NULL) { xpt_free_ccb(ccb); device_printf(dev, "Unable allocate memory for READ LOG command"); return; /* XXX */ } ataio->dxfer_len = 512; bzero(&ataio->cmd, sizeof(ataio->cmd)); ataio->cmd.flags = CAM_ATAIO_48BIT; ataio->cmd.command = 0x2F; /* READ LOG EXT */ ataio->cmd.sector_count = 1; ataio->cmd.sector_count_exp = 0; ataio->cmd.lba_low = 0x10; ataio->cmd.lba_mid = 0; ataio->cmd.lba_mid_exp = 0; /* Freeze SIM while doing READ LOG EXT. */ xpt_freeze_simq(ch->sim, 1); ahci_begin_transaction(dev, ccb); } static void ahci_process_read_log(device_t dev, union ccb *ccb) { struct ahci_channel *ch = device_get_softc(dev); uint8_t *data; struct ata_res *res; int i; ch->readlog = 0; data = ccb->ataio.data_ptr; if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP && (data[0] & 0x80) == 0) { for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; if ((data[0] & 0x1F) == i) { res = &ch->hold[i]->ataio.res; res->status = data[2]; res->error = data[3]; res->lba_low = data[4]; res->lba_mid = data[5]; res->lba_high = data[6]; res->device = data[7]; res->lba_low_exp = data[8]; res->lba_mid_exp = data[9]; res->lba_high_exp = data[10]; res->sector_count = data[12]; res->sector_count_exp = data[13]; } else { ch->hold[i]->ccb_h.status &= ~CAM_STATUS_MASK; ch->hold[i]->ccb_h.status |= CAM_REQUEUE_REQ; } xpt_done(ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } } else { if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) device_printf(dev, "Error while READ LOG EXT\n"); else if ((data[0] & 0x80) == 0) { device_printf(dev, "Non-queued command error in READ LOG EXT\n"); } for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; xpt_done(ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } } free(ccb->ataio.data_ptr, M_AHCI); xpt_free_ccb(ccb); xpt_release_simq(ch->sim, TRUE); } static void ahci_start(device_t dev, int fbs) { struct ahci_channel *ch = device_get_softc(dev); u_int32_t cmd; /* Clear SATA error register */ ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xFFFFFFFF); /* Clear any interrupts pending on this channel */ ATA_OUTL(ch->r_mem, AHCI_P_IS, 0xFFFFFFFF); /* Configure FIS-based switching if supported. */ if (ch->chcaps & AHCI_P_CMD_FBSCP) { ch->fbs_enabled = (fbs && ch->pm_present) ? 1 : 0; ATA_OUTL(ch->r_mem, AHCI_P_FBS, ch->fbs_enabled ? AHCI_P_FBS_EN : 0); } /* Start operations on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); cmd &= ~AHCI_P_CMD_PMA; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_ST | (ch->pm_present ? AHCI_P_CMD_PMA : 0)); } static void ahci_stop(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); u_int32_t cmd; int timeout; /* Kill all activity on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_ST); /* Wait for activity stop. */ timeout = 0; do { DELAY(1000); if (timeout++ > 1000) { device_printf(dev, "stopping AHCI engine failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CR); ch->eslots = 0; } static void ahci_clo(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); u_int32_t cmd; int timeout; /* Issue Command List Override if supported */ if (ch->caps & AHCI_CAP_SCLO) { cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); cmd |= AHCI_P_CMD_CLO; ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd); timeout = 0; do { DELAY(1000); if (timeout++ > 1000) { device_printf(dev, "executing CLO failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_CLO); } } static void ahci_stop_fr(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); u_int32_t cmd; int timeout; /* Kill all FIS reception on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd & ~AHCI_P_CMD_FRE); /* Wait for FIS reception stop. */ timeout = 0; do { DELAY(1000); if (timeout++ > 1000) { device_printf(dev, "stopping AHCI FR engine failed\n"); break; } } while (ATA_INL(ch->r_mem, AHCI_P_CMD) & AHCI_P_CMD_FR); } static void ahci_start_fr(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); u_int32_t cmd; /* Start FIS reception on this channel */ cmd = ATA_INL(ch->r_mem, AHCI_P_CMD); ATA_OUTL(ch->r_mem, AHCI_P_CMD, cmd | AHCI_P_CMD_FRE); } static int ahci_wait_ready(device_t dev, int t) { struct ahci_channel *ch = device_get_softc(dev); int timeout = 0; uint32_t val; while ((val = ATA_INL(ch->r_mem, AHCI_P_TFD)) & (ATA_S_BUSY | ATA_S_DRQ)) { DELAY(1000); if (timeout++ > t) { device_printf(dev, "device is not ready (timeout %dms) " "tfd = %08x\n", t, val); return (EBUSY); } } if (bootverbose) device_printf(dev, "ready wait time=%dms\n", timeout); return (0); } static void ahci_reset(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); struct ahci_controller *ctlr = device_get_softc(device_get_parent(dev)); int i; xpt_freeze_simq(ch->sim, 1); if (bootverbose) device_printf(dev, "AHCI reset...\n"); /* Requeue freezed command. */ if (ch->frozen) { union ccb *fccb = ch->frozen; ch->frozen = NULL; fccb->ccb_h.status = CAM_REQUEUE_REQ | CAM_RELEASE_SIMQ; if (!(fccb->ccb_h.status & CAM_DEV_QFRZN)) { xpt_freeze_devq(fccb->ccb_h.path, 1); fccb->ccb_h.status |= CAM_DEV_QFRZN; } xpt_done(fccb); } /* Kill the engine and requeue all running commands. */ ahci_stop(dev); for (i = 0; i < ch->numslots; i++) { /* Do we have a running request on slot? */ if (ch->slot[i].state < AHCI_SLOT_RUNNING) continue; /* XXX; Commands in loading state. */ ahci_end_transaction(&ch->slot[i], AHCI_ERR_INNOCENT); } for (i = 0; i < ch->numslots; i++) { if (!ch->hold[i]) continue; xpt_done(ch->hold[i]); ch->hold[i] = NULL; ch->numhslots--; } if (ch->toslots != 0) xpt_release_simq(ch->sim, TRUE); ch->eslots = 0; ch->toslots = 0; ch->fatalerr = 0; /* Tell the XPT about the event */ xpt_async(AC_BUS_RESET, ch->path, NULL); /* Disable port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, 0); /* Reset and reconnect PHY, */ if (!ahci_sata_phy_reset(dev)) { if (bootverbose) device_printf(dev, "AHCI reset done: phy reset found no device\n"); ch->devices = 0; /* Enable wanted port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, (AHCI_P_IX_CPD | AHCI_P_IX_PRC | AHCI_P_IX_PC)); xpt_release_simq(ch->sim, TRUE); return; } /* Wait for clearing busy status. */ if (ahci_wait_ready(dev, 15000)) ahci_clo(dev); ahci_start(dev, 1); ch->devices = 1; /* Enable wanted port interrupts */ ATA_OUTL(ch->r_mem, AHCI_P_IE, (AHCI_P_IX_CPD | AHCI_P_IX_TFE | AHCI_P_IX_HBF | AHCI_P_IX_HBD | AHCI_P_IX_IF | AHCI_P_IX_OF | ((ch->pm_level == 0) ? AHCI_P_IX_PRC | AHCI_P_IX_PC : 0) | AHCI_P_IX_DP | AHCI_P_IX_UF | (ctlr->ccc ? 0 : AHCI_P_IX_SDB) | AHCI_P_IX_DS | AHCI_P_IX_PS | (ctlr->ccc ? 0 : AHCI_P_IX_DHR))); if (bootverbose) device_printf(dev, "AHCI reset done: device found\n"); xpt_release_simq(ch->sim, TRUE); } static int ahci_setup_fis(device_t dev, struct ahci_cmd_tab *ctp, union ccb *ccb, int tag) { struct ahci_channel *ch = device_get_softc(dev); u_int8_t *fis = &ctp->cfis[0]; bzero(ctp->cfis, 64); fis[0] = 0x27; /* host to device */ fis[1] = (ccb->ccb_h.target_id & 0x0f); if (ccb->ccb_h.func_code == XPT_SCSI_IO) { fis[1] |= 0x80; fis[2] = ATA_PACKET_CMD; if ((ccb->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_NONE && ch->curr[ccb->ccb_h.target_id].mode >= ATA_DMA) fis[3] = ATA_F_DMA; else { fis[5] = ccb->csio.dxfer_len; fis[6] = ccb->csio.dxfer_len >> 8; } fis[7] = ATA_D_LBA; fis[15] = ATA_A_4BIT; bzero(ctp->acmd, 32); bcopy((ccb->ccb_h.flags & CAM_CDB_POINTER) ? ccb->csio.cdb_io.cdb_ptr : ccb->csio.cdb_io.cdb_bytes, ctp->acmd, ccb->csio.cdb_len); } else if ((ccb->ataio.cmd.flags & CAM_ATAIO_CONTROL) == 0) { fis[1] |= 0x80; fis[2] = ccb->ataio.cmd.command; fis[3] = ccb->ataio.cmd.features; fis[4] = ccb->ataio.cmd.lba_low; fis[5] = ccb->ataio.cmd.lba_mid; fis[6] = ccb->ataio.cmd.lba_high; fis[7] = ccb->ataio.cmd.device; fis[8] = ccb->ataio.cmd.lba_low_exp; fis[9] = ccb->ataio.cmd.lba_mid_exp; fis[10] = ccb->ataio.cmd.lba_high_exp; fis[11] = ccb->ataio.cmd.features_exp; if (ccb->ataio.cmd.flags & CAM_ATAIO_FPDMA) { fis[12] = tag << 3; fis[13] = 0; } else { fis[12] = ccb->ataio.cmd.sector_count; fis[13] = ccb->ataio.cmd.sector_count_exp; } fis[15] = ATA_A_4BIT; } else { fis[15] = ccb->ataio.cmd.control; } return (20); } static int ahci_sata_connect(struct ahci_channel *ch) { u_int32_t status; int timeout; /* Wait up to 100ms for "connect well" */ for (timeout = 0; timeout < 100 ; timeout++) { status = ATA_INL(ch->r_mem, AHCI_P_SSTS); if (((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_ONLINE) && ((status & ATA_SS_SPD_MASK) != ATA_SS_SPD_NO_SPEED) && ((status & ATA_SS_IPM_MASK) == ATA_SS_IPM_ACTIVE)) break; if ((status & ATA_SS_DET_MASK) == ATA_SS_DET_PHY_OFFLINE) { if (bootverbose) { device_printf(ch->dev, "SATA offline status=%08x\n", status); } return (0); } DELAY(1000); } if (timeout >= 100) { if (bootverbose) { device_printf(ch->dev, "SATA connect timeout status=%08x\n", status); } return (0); } if (bootverbose) { device_printf(ch->dev, "SATA connect time=%dms status=%08x\n", timeout, status); } /* Clear SATA error register */ ATA_OUTL(ch->r_mem, AHCI_P_SERR, 0xffffffff); return (1); } static int ahci_sata_phy_reset(device_t dev) { struct ahci_channel *ch = device_get_softc(dev); int sata_rev; uint32_t val; sata_rev = ch->user[ch->pm_present ? 15 : 0].revision; if (sata_rev == 1) val = ATA_SC_SPD_SPEED_GEN1; else if (sata_rev == 2) val = ATA_SC_SPD_SPEED_GEN2; else if (sata_rev == 3) val = ATA_SC_SPD_SPEED_GEN3; else val = 0; ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_RESET | val | ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER); DELAY(5000); ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_IDLE | val | ((ch->pm_level > 0) ? 0 : (ATA_SC_IPM_DIS_PARTIAL | ATA_SC_IPM_DIS_SLUMBER))); DELAY(5000); if (!ahci_sata_connect(ch)) { if (ch->pm_level > 0) ATA_OUTL(ch->r_mem, AHCI_P_SCTL, ATA_SC_DET_DISABLE); return (0); } return (1); } static int ahci_check_ids(device_t dev, union ccb *ccb) { struct ahci_channel *ch = device_get_softc(dev); if (ccb->ccb_h.target_id > ((ch->caps & AHCI_CAP_SPM) ? 15 : 0)) { ccb->ccb_h.status = CAM_TID_INVALID; xpt_done(ccb); return (-1); } if (ccb->ccb_h.target_lun != 0) { ccb->ccb_h.status = CAM_LUN_INVALID; xpt_done(ccb); return (-1); } return (0); } static void ahciaction(struct cam_sim *sim, union ccb *ccb) { device_t dev, parent; struct ahci_channel *ch; CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE, ("ahciaction func_code=%x\n", ccb->ccb_h.func_code)); ch = (struct ahci_channel *)cam_sim_softc(sim); dev = ch->dev; switch (ccb->ccb_h.func_code) { /* Common cases first */ case XPT_ATA_IO: /* Execute the requested I/O operation */ case XPT_SCSI_IO: if (ahci_check_ids(dev, ccb)) return; if (ch->devices == 0 || (ch->pm_present == 0 && ccb->ccb_h.target_id > 0 && ccb->ccb_h.target_id < 15)) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; break; } /* Check for command collision. */ if (ahci_check_collision(dev, ccb)) { /* Freeze command. */ ch->frozen = ccb; /* We have only one frozen slot, so freeze simq also. */ xpt_freeze_simq(ch->sim, 1); return; } ahci_begin_transaction(dev, ccb); return; case XPT_EN_LUN: /* Enable LUN as a target */ case XPT_TARGET_IO: /* Execute target I/O request */ case XPT_ACCEPT_TARGET_IO: /* Accept Host Target Mode CDB */ case XPT_CONT_TARGET_IO: /* Continue Host Target I/O Connection*/ case XPT_ABORT: /* Abort the specified CCB */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_SET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; if (ahci_check_ids(dev, ccb)) return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else d = &ch->user[ccb->ccb_h.target_id]; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_REVISION) d->revision = cts->xport_specific.sata.revision; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_MODE) d->mode = cts->xport_specific.sata.mode; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_BYTECOUNT) d->bytecount = min(8192, cts->xport_specific.sata.bytecount); if (cts->xport_specific.sata.valid & CTS_SATA_VALID_TAGS) d->tags = min(ch->numslots, cts->xport_specific.sata.tags); if (cts->xport_specific.sata.valid & CTS_SATA_VALID_PM) ch->pm_present = cts->xport_specific.sata.pm_present; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_ATAPI) d->atapi = cts->xport_specific.sata.atapi; if (cts->xport_specific.sata.valid & CTS_SATA_VALID_CAPS) d->caps = cts->xport_specific.sata.caps; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: /* Get default/user set transfer settings for the target */ { struct ccb_trans_settings *cts = &ccb->cts; struct ahci_device *d; uint32_t status; if (ahci_check_ids(dev, ccb)) return; if (cts->type == CTS_TYPE_CURRENT_SETTINGS) d = &ch->curr[ccb->ccb_h.target_id]; else d = &ch->user[ccb->ccb_h.target_id]; cts->protocol = PROTO_ATA; cts->protocol_version = PROTO_VERSION_UNSPECIFIED; cts->transport = XPORT_SATA; cts->transport_version = XPORT_VERSION_UNSPECIFIED; cts->proto_specific.valid = 0; cts->xport_specific.sata.valid = 0; if (cts->type == CTS_TYPE_CURRENT_SETTINGS && (ccb->ccb_h.target_id == 15 || (ccb->ccb_h.target_id == 0 && !ch->pm_present))) { status = ATA_INL(ch->r_mem, AHCI_P_SSTS) & ATA_SS_SPD_MASK; if (status & 0x0f0) { cts->xport_specific.sata.revision = (status & 0x0f0) >> 4; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; } cts->xport_specific.sata.caps = d->caps & CTS_SATA_CAPS_D; if (ch->pm_level) { if (ch->caps & (AHCI_CAP_PSC | AHCI_CAP_SSC)) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_PMREQ; if (ch->caps2 & AHCI_CAP2_APST) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_APST; } if ((ch->caps & AHCI_CAP_SNCQ) && (ch->quirks & AHCI_Q_NOAA) == 0) cts->xport_specific.sata.caps |= CTS_SATA_CAPS_H_DMAAA; cts->xport_specific.sata.caps &= ch->user[ccb->ccb_h.target_id].caps; cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } else { cts->xport_specific.sata.revision = d->revision; cts->xport_specific.sata.valid |= CTS_SATA_VALID_REVISION; cts->xport_specific.sata.caps = d->caps; cts->xport_specific.sata.valid |= CTS_SATA_VALID_CAPS; } cts->xport_specific.sata.mode = d->mode; cts->xport_specific.sata.valid |= CTS_SATA_VALID_MODE; cts->xport_specific.sata.bytecount = d->bytecount; cts->xport_specific.sata.valid |= CTS_SATA_VALID_BYTECOUNT; cts->xport_specific.sata.pm_present = ch->pm_present; cts->xport_specific.sata.valid |= CTS_SATA_VALID_PM; cts->xport_specific.sata.tags = d->tags; cts->xport_specific.sata.valid |= CTS_SATA_VALID_TAGS; cts->xport_specific.sata.atapi = d->atapi; cts->xport_specific.sata.valid |= CTS_SATA_VALID_ATAPI; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: /* Reset the specified SCSI bus */ case XPT_RESET_DEV: /* Bus Device Reset the specified SCSI device */ ahci_reset(dev); ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_TERM_IO: /* Terminate the I/O process */ /* XXX Implement */ ccb->ccb_h.status = CAM_REQ_INVALID; break; case XPT_PATH_INQ: /* Path routing inquiry */ { struct ccb_pathinq *cpi = &ccb->cpi; parent = device_get_parent(dev); cpi->version_num = 1; /* XXX??? */ cpi->hba_inquiry = PI_SDTR_ABLE; if (ch->caps & AHCI_CAP_SNCQ) cpi->hba_inquiry |= PI_TAG_ABLE; if (ch->caps & AHCI_CAP_SPM) cpi->hba_inquiry |= PI_SATAPM; cpi->target_sprt = 0; cpi->hba_misc = PIM_SEQSCAN; cpi->hba_eng_cnt = 0; if (ch->caps & AHCI_CAP_SPM) cpi->max_target = 15; else cpi->max_target = 0; cpi->max_lun = 0; cpi->initiator_id = 0; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "AHCI", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->transport = XPORT_SATA; cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; cpi->maxio = MAXPHYS; /* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */ if (pci_get_devid(parent) == 0x43801002) cpi->maxio = min(cpi->maxio, 128 * 512); cpi->hba_vendor = pci_get_vendor(parent); cpi->hba_device = pci_get_device(parent); cpi->hba_subvendor = pci_get_subvendor(parent); cpi->hba_subdevice = pci_get_subdevice(parent); cpi->ccb_h.status = CAM_REQ_CMP; break; } default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } xpt_done(ccb); } static void ahcipoll(struct cam_sim *sim) { struct ahci_channel *ch = (struct ahci_channel *)cam_sim_softc(sim); ahci_ch_intr(ch->dev); } Index: projects/binutils-2.17/sys/dev/cas/if_cas.c =================================================================== --- projects/binutils-2.17/sys/dev/cas/if_cas.c (revision 215829) +++ projects/binutils-2.17/sys/dev/cas/if_cas.c (revision 215830) @@ -1,2939 +1,2936 @@ /*- * Copyright (C) 2001 Eduardo Horvath. * Copyright (c) 2001-2003 Thomas Moestl * Copyright (c) 2007-2009 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: gem.c,v 1.21 2002/06/01 23:50:58 lukem Exp * from: FreeBSD: if_gem.c 182060 2008-08-23 15:03:26Z marius */ #include __FBSDID("$FreeBSD$"); /* * driver for Sun Cassini/Cassini+ and National Semiconductor DP83065 * Saturn Gigabit Ethernet controllers */ #if 0 #define CAS_DEBUG #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__powerpc__) || defined(__sparc64__) #include #include #include #endif #include #include #include #include #include #include #include #include "miibus_if.h" #define RINGASSERT(n , min, max) \ CTASSERT(powerof2(n) && (n) >= (min) && (n) <= (max)) RINGASSERT(CAS_NRXCOMP, 128, 32768); RINGASSERT(CAS_NRXDESC, 32, 8192); RINGASSERT(CAS_NRXDESC2, 32, 8192); RINGASSERT(CAS_NTXDESC, 32, 8192); #undef RINGASSERT #define CCDASSERT(m, a) \ CTASSERT((offsetof(struct cas_control_data, m) & ((a) - 1)) == 0) CCDASSERT(ccd_rxcomps, CAS_RX_COMP_ALIGN); CCDASSERT(ccd_rxdescs, CAS_RX_DESC_ALIGN); CCDASSERT(ccd_rxdescs2, CAS_RX_DESC_ALIGN); #undef CCDASSERT #define CAS_TRIES 10000 /* * According to documentation, the hardware has support for basic TCP * checksum offloading only, in practice this can be also used for UDP * however (i.e. the problem of previous Sun NICs that a checksum of 0x0 * is not converted to 0xffff no longer exists). */ #define CAS_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) static inline void cas_add_rxdesc(struct cas_softc *sc, u_int idx); static int cas_attach(struct cas_softc *sc); static int cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr, uint32_t set); static void cas_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void cas_detach(struct cas_softc *sc); static int cas_disable_rx(struct cas_softc *sc); static int cas_disable_tx(struct cas_softc *sc); static void cas_eint(struct cas_softc *sc, u_int status); static void cas_free(void *arg1, void* arg2); static void cas_init(void *xsc); static void cas_init_locked(struct cas_softc *sc); static void cas_init_regs(struct cas_softc *sc); static int cas_intr(void *v); static void cas_intr_task(void *arg, int pending __unused); static int cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head); static int cas_mediachange(struct ifnet *ifp); static void cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr); static void cas_meminit(struct cas_softc *sc); static void cas_mifinit(struct cas_softc *sc); static int cas_mii_readreg(device_t dev, int phy, int reg); static void cas_mii_statchg(device_t dev); static int cas_mii_writereg(device_t dev, int phy, int reg, int val); static void cas_reset(struct cas_softc *sc); static int cas_reset_rx(struct cas_softc *sc); static int cas_reset_tx(struct cas_softc *sc); static void cas_resume(struct cas_softc *sc); static u_int cas_descsize(u_int sz); static void cas_rint(struct cas_softc *sc); static void cas_rint_timeout(void *arg); static inline void cas_rxcksum(struct mbuf *m, uint16_t cksum); static inline void cas_rxcompinit(struct cas_rx_comp *rxcomp); static u_int cas_rxcompsize(u_int sz); static void cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static void cas_setladrf(struct cas_softc *sc); static void cas_start(struct ifnet *ifp); static void cas_stop(struct ifnet *ifp); static void cas_suspend(struct cas_softc *sc); static void cas_tick(void *arg); static void cas_tint(struct cas_softc *sc); static void cas_tx_task(void *arg, int pending __unused); static inline void cas_txkick(struct cas_softc *sc); static void cas_watchdog(struct cas_softc *sc); static devclass_t cas_devclass; MODULE_DEPEND(cas, ether, 1, 1, 1); MODULE_DEPEND(cas, miibus, 1, 1, 1); #ifdef CAS_DEBUG #include #define KTR_CAS KTR_SPARE2 #endif static int cas_attach(struct cas_softc *sc) { struct cas_txsoft *txs; struct ifnet *ifp; int error, i; uint32_t v; /* Set up ifnet structure. */ ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_start = cas_start; ifp->if_ioctl = cas_ioctl; ifp->if_init = cas_init; IFQ_SET_MAXLEN(&ifp->if_snd, CAS_TXQUEUELEN); ifp->if_snd.ifq_drv_maxlen = CAS_TXQUEUELEN; IFQ_SET_READY(&ifp->if_snd); callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0); callout_init(&sc->sc_rx_ch, 1); /* Create local taskq. */ TASK_INIT(&sc->sc_intr_task, 0, cas_intr_task, sc); TASK_INIT(&sc->sc_tx_task, 1, cas_tx_task, ifp); sc->sc_tq = taskqueue_create_fast("cas_taskq", M_WAITOK, taskqueue_thread_enqueue, &sc->sc_tq); if (sc->sc_tq == NULL) { device_printf(sc->sc_dev, "could not create taskqueue\n"); error = ENXIO; goto fail_ifnet; } taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->sc_dev)); /* Make sure the chip is stopped. */ cas_reset(sc); error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 0, BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_pdmatag); if (error != 0) goto fail_taskq; error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, CAS_PAGE_SIZE, 1, CAS_PAGE_SIZE, 0, NULL, NULL, &sc->sc_rdmatag); if (error != 0) goto fail_ptag; error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * CAS_NTXSEGS, CAS_NTXSEGS, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_tdmatag); if (error != 0) goto fail_rtag; error = bus_dma_tag_create(sc->sc_pdmatag, CAS_TX_DESC_ALIGN, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct cas_control_data), 1, sizeof(struct cas_control_data), 0, NULL, NULL, &sc->sc_cdmatag); if (error != 0) goto fail_ttag; /* * Allocate the control data structures, create and load the * DMA map for it. */ if ((error = bus_dmamem_alloc(sc->sc_cdmatag, (void **)&sc->sc_control_data, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->sc_cddmamap)) != 0) { device_printf(sc->sc_dev, "unable to allocate control data, error = %d\n", error); goto fail_ctag; } sc->sc_cddma = 0; if ((error = bus_dmamap_load(sc->sc_cdmatag, sc->sc_cddmamap, sc->sc_control_data, sizeof(struct cas_control_data), cas_cddma_callback, sc, 0)) != 0 || sc->sc_cddma == 0) { device_printf(sc->sc_dev, "unable to load control data DMA map, error = %d\n", error); goto fail_cmem; } /* * Initialize the transmit job descriptors. */ STAILQ_INIT(&sc->sc_txfreeq); STAILQ_INIT(&sc->sc_txdirtyq); /* * Create the transmit buffer DMA maps. */ error = ENOMEM; for (i = 0; i < CAS_TXQUEUELEN; i++) { txs = &sc->sc_txsoft[i]; txs->txs_mbuf = NULL; txs->txs_ndescs = 0; if ((error = bus_dmamap_create(sc->sc_tdmatag, 0, &txs->txs_dmamap)) != 0) { device_printf(sc->sc_dev, "unable to create TX DMA map %d, error = %d\n", i, error); goto fail_txd; } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); } /* * Allocate the receive buffers, create and load the DMA maps * for them. */ for (i = 0; i < CAS_NRXDESC; i++) { if ((error = bus_dmamem_alloc(sc->sc_rdmatag, &sc->sc_rxdsoft[i].rxds_buf, BUS_DMA_WAITOK, &sc->sc_rxdsoft[i].rxds_dmamap)) != 0) { device_printf(sc->sc_dev, "unable to allocate RX buffer %d, error = %d\n", i, error); goto fail_rxmem; } sc->sc_rxdptr = i; sc->sc_rxdsoft[i].rxds_paddr = 0; if ((error = bus_dmamap_load(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap, sc->sc_rxdsoft[i].rxds_buf, CAS_PAGE_SIZE, cas_rxdma_callback, sc, 0)) != 0 || sc->sc_rxdsoft[i].rxds_paddr == 0) { device_printf(sc->sc_dev, "unable to load RX DMA map %d, error = %d\n", i, error); goto fail_rxmap; } } if ((sc->sc_flags & CAS_SERDES) == 0) { CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_MII); CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); cas_mifinit(sc); /* * Look for an external PHY. */ error = ENXIO; v = CAS_READ_4(sc, CAS_MIF_CONF); if ((v & CAS_MIF_CONF_MDI1) != 0) { v |= CAS_MIF_CONF_PHY_SELECT; CAS_WRITE_4(sc, CAS_MIF_CONF, v); CAS_BARRIER(sc, CAS_MIF_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); /* Enable/unfreeze the GMII pins of Saturn. */ if (sc->sc_variant == CAS_SATURN) { CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0); CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK, - MII_PHY_ANY, MII_OFFSET_ANY, 0); + MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE); } /* * Fall back on an internal PHY if no external PHY was found. */ if (error != 0 && (v & CAS_MIF_CONF_MDI0) != 0) { v &= ~CAS_MIF_CONF_PHY_SELECT; CAS_WRITE_4(sc, CAS_MIF_CONF, v); CAS_BARRIER(sc, CAS_MIF_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); /* Freeze the GMII pins of Saturn for saving power. */ if (sc->sc_variant == CAS_SATURN) { CAS_WRITE_4(sc, CAS_SATURN_PCFG, CAS_SATURN_PCFG_FSI); CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK, - MII_PHY_ANY, MII_OFFSET_ANY, 0); + MII_PHY_ANY, MII_OFFSET_ANY, MIIF_DOPAUSE); } } else { /* * Use the external PCS SERDES. */ CAS_WRITE_4(sc, CAS_PCS_DATAPATH, CAS_PCS_DATAPATH_SERDES); CAS_BARRIER(sc, CAS_PCS_DATAPATH, 4, BUS_SPACE_BARRIER_WRITE); /* Enable/unfreeze the SERDES pins of Saturn. */ if (sc->sc_variant == CAS_SATURN) { CAS_WRITE_4(sc, CAS_SATURN_PCFG, 0); CAS_BARRIER(sc, CAS_SATURN_PCFG, 4, BUS_SPACE_BARRIER_WRITE); } CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD); CAS_BARRIER(sc, CAS_PCS_SERDES_CTRL, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, cas_mediachange, cas_mediastatus, BMSR_DEFCAPMASK, - CAS_PHYAD_EXTERNAL, MII_OFFSET_ANY, 0); + CAS_PHYAD_EXTERNAL, MII_OFFSET_ANY, MIIF_DOPAUSE); } if (error != 0) { device_printf(sc->sc_dev, "attaching PHYs failed\n"); goto fail_rxmap; } sc->sc_mii = device_get_softc(sc->sc_miibus); /* * From this point forward, the attachment cannot fail. A failure * before this point releases all resources that may have been * allocated. */ /* Announce FIFO sizes. */ v = CAS_READ_4(sc, CAS_TX_FIFO_SIZE); device_printf(sc->sc_dev, "%ukB RX FIFO, %ukB TX FIFO\n", CAS_RX_FIFO_SIZE / 1024, v / 16); /* Attach the interface. */ ether_ifattach(ifp, sc->sc_enaddr); /* * Tell the upper layer(s) we support long frames/checksum offloads. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities = IFCAP_VLAN_MTU; if ((sc->sc_flags & CAS_NO_CSUM) == 0) { ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_hwassist = CAS_CSUM_FEATURES; } ifp->if_capenable = ifp->if_capabilities; return (0); /* * Free any resources we've allocated during the failed attach * attempt. Do this in reverse order and fall through. */ fail_rxmap: for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_paddr != 0) bus_dmamap_unload(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap); fail_rxmem: for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_buf != NULL) bus_dmamem_free(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_buf, sc->sc_rxdsoft[i].rxds_dmamap); fail_txd: for (i = 0; i < CAS_TXQUEUELEN; i++) if (sc->sc_txsoft[i].txs_dmamap != NULL) bus_dmamap_destroy(sc->sc_tdmatag, sc->sc_txsoft[i].txs_dmamap); bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap); fail_cmem: bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data, sc->sc_cddmamap); fail_ctag: bus_dma_tag_destroy(sc->sc_cdmatag); fail_ttag: bus_dma_tag_destroy(sc->sc_tdmatag); fail_rtag: bus_dma_tag_destroy(sc->sc_rdmatag); fail_ptag: bus_dma_tag_destroy(sc->sc_pdmatag); fail_taskq: taskqueue_free(sc->sc_tq); fail_ifnet: if_free(ifp); return (error); } static void cas_detach(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; int i; ether_ifdetach(ifp); CAS_LOCK(sc); cas_stop(ifp); CAS_UNLOCK(sc); callout_drain(&sc->sc_tick_ch); callout_drain(&sc->sc_rx_ch); taskqueue_drain(sc->sc_tq, &sc->sc_intr_task); taskqueue_drain(sc->sc_tq, &sc->sc_tx_task); if_free(ifp); taskqueue_free(sc->sc_tq); device_delete_child(sc->sc_dev, sc->sc_miibus); for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_dmamap != NULL) bus_dmamap_sync(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_paddr != 0) bus_dmamap_unload(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_dmamap); for (i = 0; i < CAS_NRXDESC; i++) if (sc->sc_rxdsoft[i].rxds_buf != NULL) bus_dmamem_free(sc->sc_rdmatag, sc->sc_rxdsoft[i].rxds_buf, sc->sc_rxdsoft[i].rxds_dmamap); for (i = 0; i < CAS_TXQUEUELEN; i++) if (sc->sc_txsoft[i].txs_dmamap != NULL) bus_dmamap_destroy(sc->sc_tdmatag, sc->sc_txsoft[i].txs_dmamap); CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap); bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data, sc->sc_cddmamap); bus_dma_tag_destroy(sc->sc_cdmatag); bus_dma_tag_destroy(sc->sc_tdmatag); bus_dma_tag_destroy(sc->sc_rdmatag); bus_dma_tag_destroy(sc->sc_pdmatag); } static void cas_suspend(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK(sc); cas_stop(ifp); CAS_UNLOCK(sc); } static void cas_resume(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK(sc); /* * On resume all registers have to be initialized again like * after power-on. */ sc->sc_flags &= ~CAS_INITED; if (ifp->if_flags & IFF_UP) cas_init_locked(sc); CAS_UNLOCK(sc); } static inline void cas_rxcksum(struct mbuf *m, uint16_t cksum) { struct ether_header *eh; struct ip *ip; struct udphdr *uh; uint16_t *opts; int32_t hlen, len, pktlen; uint32_t temp32; pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) return; /* Cannot handle fragmented packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (pktlen < (hlen + sizeof(struct tcphdr))) return; break; case IPPROTO_UDP: if (pktlen < (hlen + sizeof(struct udphdr))) return; uh = (struct udphdr *)((uint8_t *)ip + hlen); if (uh->uh_sum == 0) return; /* no checksum */ break; default: return; } cksum = ~cksum; /* checksum fixup for IP options */ len = hlen - sizeof(struct ip); if (len > 0) { opts = (uint16_t *)(ip + 1); for (; len > 0; len -= sizeof(uint16_t), opts++) { temp32 = cksum - *opts; temp32 = (temp32 >> 16) + (temp32 & 65535); cksum = temp32 & 65535; } } m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; m->m_pkthdr.csum_data = cksum; } static void cas_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct cas_softc *sc = xsc; if (error != 0) return; if (nsegs != 1) panic("%s: bad control buffer segment count", __func__); sc->sc_cddma = segs[0].ds_addr; } static void cas_rxdma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct cas_softc *sc = xsc; if (error != 0) return; if (nsegs != 1) panic("%s: bad RX buffer segment count", __func__); sc->sc_rxdsoft[sc->sc_rxdptr].rxds_paddr = segs[0].ds_addr; } static void cas_tick(void *arg) { struct cas_softc *sc = arg; struct ifnet *ifp = sc->sc_ifp; uint32_t v; CAS_LOCK_ASSERT(sc, MA_OWNED); /* * Unload collision and error counters. */ ifp->if_collisions += CAS_READ_4(sc, CAS_MAC_NORM_COLL_CNT) + CAS_READ_4(sc, CAS_MAC_FIRST_COLL_CNT); v = CAS_READ_4(sc, CAS_MAC_EXCESS_COLL_CNT) + CAS_READ_4(sc, CAS_MAC_LATE_COLL_CNT); ifp->if_collisions += v; ifp->if_oerrors += v; ifp->if_ierrors += CAS_READ_4(sc, CAS_MAC_RX_LEN_ERR_CNT) + CAS_READ_4(sc, CAS_MAC_RX_ALIGN_ERR) + CAS_READ_4(sc, CAS_MAC_RX_CRC_ERR_CNT) + CAS_READ_4(sc, CAS_MAC_RX_CODE_VIOL); /* * Then clear the hardware counters. */ CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0); mii_tick(sc->sc_mii); if (sc->sc_txfree != CAS_MAXTXFREE) cas_tint(sc); cas_watchdog(sc); callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc); } static int cas_bitwait(struct cas_softc *sc, bus_addr_t r, uint32_t clr, uint32_t set) { int i; uint32_t reg; for (i = CAS_TRIES; i--; DELAY(100)) { reg = CAS_READ_4(sc, r); if ((reg & clr) == 0 && (reg & set) == set) return (1); } return (0); } static void cas_reset(struct cas_softc *sc) { #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif /* Disable all interrupts in order to avoid spurious ones. */ CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff); cas_reset_rx(sc); cas_reset_tx(sc); /* * Do a full reset modulo the result of the last auto-negotiation * when using the SERDES. */ CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX | ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0)); CAS_BARRIER(sc, CAS_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); DELAY(3000); if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0)) device_printf(sc->sc_dev, "cannot reset device\n"); } static void cas_stop(struct ifnet *ifp) { struct cas_softc *sc = ifp->if_softc; struct cas_txsoft *txs; #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif callout_stop(&sc->sc_tick_ch); callout_stop(&sc->sc_rx_ch); /* Disable all interrupts in order to avoid spurious ones. */ CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff); cas_reset_tx(sc); cas_reset_rx(sc); /* * Release any queued transmit buffers. */ while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q); if (txs->txs_ndescs != 0) { bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); if (txs->txs_mbuf != NULL) { m_freem(txs->txs_mbuf); txs->txs_mbuf = NULL; } } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); } /* * Mark the interface down and cancel the watchdog timer. */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->sc_flags &= ~CAS_LINK; sc->sc_wdog_timer = 0; } static int cas_reset_rx(struct cas_softc *sc) { /* * Resetting while DMA is in progress can cause a bus hang, so we * disable DMA first. */ cas_disable_rx(sc); CAS_WRITE_4(sc, CAS_RX_CONF, 0); CAS_BARRIER(sc, CAS_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_RX_CONF, CAS_RX_CONF_RXDMA_EN, 0)) device_printf(sc->sc_dev, "cannot disable RX DMA\n"); /* Finally, reset the ERX. */ CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_RX | ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0)); CAS_BARRIER(sc, CAS_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0)) { device_printf(sc->sc_dev, "cannot reset receiver\n"); return (1); } return (0); } static int cas_reset_tx(struct cas_softc *sc) { /* * Resetting while DMA is in progress can cause a bus hang, so we * disable DMA first. */ cas_disable_tx(sc); CAS_WRITE_4(sc, CAS_TX_CONF, 0); CAS_BARRIER(sc, CAS_TX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_TX_CONF, CAS_TX_CONF_TXDMA_EN, 0)) device_printf(sc->sc_dev, "cannot disable TX DMA\n"); /* Finally, reset the ETX. */ CAS_WRITE_4(sc, CAS_RESET, CAS_RESET_TX | ((sc->sc_flags & CAS_SERDES) != 0 ? CAS_RESET_PCS_DIS : 0)); CAS_BARRIER(sc, CAS_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_RESET, CAS_RESET_RX | CAS_RESET_TX, 0)) { device_printf(sc->sc_dev, "cannot reset transmitter\n"); return (1); } return (0); } static int cas_disable_rx(struct cas_softc *sc) { CAS_WRITE_4(sc, CAS_MAC_RX_CONF, CAS_READ_4(sc, CAS_MAC_RX_CONF) & ~CAS_MAC_RX_CONF_EN); CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0)); } static int cas_disable_tx(struct cas_softc *sc) { CAS_WRITE_4(sc, CAS_MAC_TX_CONF, CAS_READ_4(sc, CAS_MAC_TX_CONF) & ~CAS_MAC_TX_CONF_EN); CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0)); } static inline void cas_rxcompinit(struct cas_rx_comp *rxcomp) { rxcomp->crc_word1 = 0; rxcomp->crc_word2 = 0; rxcomp->crc_word3 = htole64(CAS_SET(ETHER_HDR_LEN + sizeof(struct ip), CAS_RC3_CSO)); rxcomp->crc_word4 = htole64(CAS_RC4_ZERO); } static void cas_meminit(struct cas_softc *sc) { int i; CAS_LOCK_ASSERT(sc, MA_OWNED); /* * Initialize the transmit descriptor ring. */ for (i = 0; i < CAS_NTXDESC; i++) { sc->sc_txdescs[i].cd_flags = 0; sc->sc_txdescs[i].cd_buf_ptr = 0; } sc->sc_txfree = CAS_MAXTXFREE; sc->sc_txnext = 0; sc->sc_txwin = 0; /* * Initialize the receive completion ring. */ for (i = 0; i < CAS_NRXCOMP; i++) cas_rxcompinit(&sc->sc_rxcomps[i]); sc->sc_rxcptr = 0; /* * Initialize the first receive descriptor ring. We leave * the second one zeroed as we don't actually use it. */ for (i = 0; i < CAS_NRXDESC; i++) CAS_INIT_RXDESC(sc, i, i); sc->sc_rxdptr = 0; CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static u_int cas_descsize(u_int sz) { switch (sz) { case 32: return (CAS_DESC_32); case 64: return (CAS_DESC_64); case 128: return (CAS_DESC_128); case 256: return (CAS_DESC_256); case 512: return (CAS_DESC_512); case 1024: return (CAS_DESC_1K); case 2048: return (CAS_DESC_2K); case 4096: return (CAS_DESC_4K); case 8192: return (CAS_DESC_8K); default: printf("%s: invalid descriptor ring size %d\n", __func__, sz); return (CAS_DESC_32); } } static u_int cas_rxcompsize(u_int sz) { switch (sz) { case 128: return (CAS_RX_CONF_COMP_128); case 256: return (CAS_RX_CONF_COMP_256); case 512: return (CAS_RX_CONF_COMP_512); case 1024: return (CAS_RX_CONF_COMP_1K); case 2048: return (CAS_RX_CONF_COMP_2K); case 4096: return (CAS_RX_CONF_COMP_4K); case 8192: return (CAS_RX_CONF_COMP_8K); case 16384: return (CAS_RX_CONF_COMP_16K); case 32768: return (CAS_RX_CONF_COMP_32K); default: printf("%s: invalid dcompletion ring size %d\n", __func__, sz); return (CAS_RX_CONF_COMP_128); } } static void cas_init(void *xsc) { struct cas_softc *sc = xsc; CAS_LOCK(sc); cas_init_locked(sc); CAS_UNLOCK(sc); } /* * Initialization of interface; set up initialization block * and transmit/receive descriptor rings. */ static void cas_init_locked(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; uint32_t v; CAS_LOCK_ASSERT(sc, MA_OWNED); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) return; #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s: calling stop", device_get_name(sc->sc_dev), __func__); #endif /* * Initialization sequence. The numbered steps below correspond * to the sequence outlined in section 6.3.5.1 in the Ethernet * Channel Engine manual (part of the PCIO manual). * See also the STP2002-STQ document from Sun Microsystems. */ /* step 1 & 2. Reset the Ethernet Channel. */ cas_stop(ifp); cas_reset(sc); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s: restarting", device_get_name(sc->sc_dev), __func__); #endif if ((sc->sc_flags & CAS_SERDES) == 0) /* Re-initialize the MIF. */ cas_mifinit(sc); /* step 3. Setup data structures in host memory. */ cas_meminit(sc); /* step 4. TX MAC registers & counters */ cas_init_regs(sc); /* step 5. RX MAC registers & counters */ cas_setladrf(sc); /* step 6 & 7. Program Ring Base Addresses. */ CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_HI, (((uint64_t)CAS_CDTXDADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_TX_DESC3_BASE_LO, CAS_CDTXDADDR(sc, 0) & 0xffffffff); CAS_WRITE_4(sc, CAS_RX_COMP_BASE_HI, (((uint64_t)CAS_CDRXCADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_RX_COMP_BASE_LO, CAS_CDRXCADDR(sc, 0) & 0xffffffff); CAS_WRITE_4(sc, CAS_RX_DESC_BASE_HI, (((uint64_t)CAS_CDRXDADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_RX_DESC_BASE_LO, CAS_CDRXDADDR(sc, 0) & 0xffffffff); if ((sc->sc_flags & CAS_REG_PLUS) != 0) { CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_HI, (((uint64_t)CAS_CDRXD2ADDR(sc, 0)) >> 32)); CAS_WRITE_4(sc, CAS_RX_DESC2_BASE_LO, CAS_CDRXD2ADDR(sc, 0) & 0xffffffff); } #ifdef CAS_DEBUG CTR5(KTR_CAS, "loading TXDR %lx, RXCR %lx, RXDR %lx, RXD2R %lx, cddma %lx", CAS_CDTXDADDR(sc, 0), CAS_CDRXCADDR(sc, 0), CAS_CDRXDADDR(sc, 0), CAS_CDRXD2ADDR(sc, 0), sc->sc_cddma); #endif /* step 8. Global Configuration & Interrupt Masks */ /* Disable weighted round robin. */ CAS_WRITE_4(sc, CAS_CAW, CAS_CAW_RR_DIS); /* * Enable infinite bursts for revisions without PCI issues if * applicable. Doing so greatly improves the TX performance on * !__sparc64__. */ CAS_WRITE_4(sc, CAS_INF_BURST, #if !defined(__sparc64__) (sc->sc_flags & CAS_TABORT) == 0 ? CAS_INF_BURST_EN : #endif 0); /* Set up interrupts. */ CAS_WRITE_4(sc, CAS_INTMASK, ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT #ifdef CAS_DEBUG | CAS_INTR_PCS_INT | CAS_INTR_MIF #endif )); /* Don't clear top level interrupts when CAS_STATUS_ALIAS is read. */ CAS_WRITE_4(sc, CAS_CLEAR_ALIAS, 0); CAS_WRITE_4(sc, CAS_MAC_RX_MASK, ~CAS_MAC_RX_OVERFLOW); CAS_WRITE_4(sc, CAS_MAC_TX_MASK, ~(CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR)); #ifdef CAS_DEBUG CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK, ~(CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE | CAS_MAC_CTRL_NON_PAUSE)); #else CAS_WRITE_4(sc, CAS_MAC_CTRL_MASK, CAS_MAC_CTRL_PAUSE_RCVD | CAS_MAC_CTRL_PAUSE | CAS_MAC_CTRL_NON_PAUSE); #endif /* Enable PCI error interrupts. */ CAS_WRITE_4(sc, CAS_ERROR_MASK, ~(CAS_ERROR_DTRTO | CAS_ERROR_OTHER | CAS_ERROR_DMAW_ZERO | CAS_ERROR_DMAR_ZERO | CAS_ERROR_RTRTO)); /* Enable PCI error interrupts in BIM configuration. */ CAS_WRITE_4(sc, CAS_BIM_CONF, CAS_BIM_CONF_DPAR_EN | CAS_BIM_CONF_RMA_EN | CAS_BIM_CONF_RTA_EN); /* * step 9. ETX Configuration: encode receive descriptor ring size, * enable DMA and disable pre-interrupt writeback completion. */ v = cas_descsize(CAS_NTXDESC) << CAS_TX_CONF_DESC3_SHFT; CAS_WRITE_4(sc, CAS_TX_CONF, v | CAS_TX_CONF_TXDMA_EN | CAS_TX_CONF_RDPP_DIS | CAS_TX_CONF_PICWB_DIS); /* step 10. ERX Configuration */ /* * Encode receive completion and descriptor ring sizes, set the * swivel offset. */ v = cas_rxcompsize(CAS_NRXCOMP) << CAS_RX_CONF_COMP_SHFT; v |= cas_descsize(CAS_NRXDESC) << CAS_RX_CONF_DESC_SHFT; if ((sc->sc_flags & CAS_REG_PLUS) != 0) v |= cas_descsize(CAS_NRXDESC2) << CAS_RX_CONF_DESC2_SHFT; CAS_WRITE_4(sc, CAS_RX_CONF, v | (ETHER_ALIGN << CAS_RX_CONF_SOFF_SHFT)); /* Set the PAUSE thresholds. We use the maximum OFF threshold. */ CAS_WRITE_4(sc, CAS_RX_PTHRS, - ((111 * 64) << CAS_RX_PTHRS_XOFF_SHFT) | - ((15 * 64) << CAS_RX_PTHRS_XON_SHFT)); + (111 << CAS_RX_PTHRS_XOFF_SHFT) | (15 << CAS_RX_PTHRS_XON_SHFT)); /* RX blanking */ CAS_WRITE_4(sc, CAS_RX_BLANK, (15 << CAS_RX_BLANK_TIME_SHFT) | (5 << CAS_RX_BLANK_PKTS_SHFT)); /* Set RX_COMP_AFULL threshold to half of the RX completions. */ CAS_WRITE_4(sc, CAS_RX_AEMPTY_THRS, (CAS_NRXCOMP / 2) << CAS_RX_AEMPTY_COMP_SHFT); /* Initialize the RX page size register as appropriate for 8k. */ CAS_WRITE_4(sc, CAS_RX_PSZ, (CAS_RX_PSZ_8K << CAS_RX_PSZ_SHFT) | (4 << CAS_RX_PSZ_MB_CNT_SHFT) | (CAS_RX_PSZ_MB_STRD_2K << CAS_RX_PSZ_MB_STRD_SHFT) | (CAS_RX_PSZ_MB_OFF_64 << CAS_RX_PSZ_MB_OFF_SHFT)); /* Disable RX random early detection. */ CAS_WRITE_4(sc, CAS_RX_RED, 0); /* Zero the RX reassembly DMA table. */ for (v = 0; v <= CAS_RX_REAS_DMA_ADDR_LC; v++) { CAS_WRITE_4(sc, CAS_RX_REAS_DMA_ADDR, v); CAS_WRITE_4(sc, CAS_RX_REAS_DMA_DATA_LO, 0); CAS_WRITE_4(sc, CAS_RX_REAS_DMA_DATA_MD, 0); CAS_WRITE_4(sc, CAS_RX_REAS_DMA_DATA_HI, 0); } /* Ensure the RX control FIFO and RX IPP FIFO addresses are zero. */ CAS_WRITE_4(sc, CAS_RX_CTRL_FIFO, 0); CAS_WRITE_4(sc, CAS_RX_IPP_ADDR, 0); /* Finally, enable RX DMA. */ CAS_WRITE_4(sc, CAS_RX_CONF, CAS_READ_4(sc, CAS_RX_CONF) | CAS_RX_CONF_RXDMA_EN); /* step 11. Configure Media. */ /* step 12. RX_MAC Configuration Register */ v = CAS_READ_4(sc, CAS_MAC_RX_CONF) & ~CAS_MAC_RX_CONF_STRPPAD; v |= CAS_MAC_RX_CONF_EN | CAS_MAC_RX_CONF_STRPFCS; CAS_WRITE_4(sc, CAS_MAC_RX_CONF, 0); CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0)) device_printf(sc->sc_dev, "cannot configure RX MAC\n"); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v); /* step 13. TX_MAC Configuration Register */ v = CAS_READ_4(sc, CAS_MAC_TX_CONF); v |= CAS_MAC_TX_CONF_EN; CAS_WRITE_4(sc, CAS_MAC_TX_CONF, 0); CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0)) device_printf(sc->sc_dev, "cannot configure TX MAC\n"); CAS_WRITE_4(sc, CAS_MAC_TX_CONF, v); /* step 14. Issue Transmit Pending command. */ /* step 15. Give the reciever a swift kick. */ CAS_WRITE_4(sc, CAS_RX_KICK, CAS_NRXDESC - 4); CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, 0); if ((sc->sc_flags & CAS_REG_PLUS) != 0) CAS_WRITE_4(sc, CAS_RX_KICK2, CAS_NRXDESC2 - 4); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mii_mediachg(sc->sc_mii); /* Start the one second timer. */ sc->sc_wdog_timer = 0; callout_reset(&sc->sc_tick_ch, hz, cas_tick, sc); } static int cas_load_txmbuf(struct cas_softc *sc, struct mbuf **m_head) { bus_dma_segment_t txsegs[CAS_NTXSEGS]; struct cas_txsoft *txs; struct ip *ip; struct mbuf *m; uint64_t cflags; int error, nexttx, nsegs, offset, seg; CAS_LOCK_ASSERT(sc, MA_OWNED); /* Get a work queue entry. */ if ((txs = STAILQ_FIRST(&sc->sc_txfreeq)) == NULL) { /* Ran out of descriptors. */ return (ENOBUFS); } cflags = 0; if (((*m_head)->m_pkthdr.csum_flags & CAS_CSUM_FEATURES) != 0) { if (M_WRITABLE(*m_head) == 0) { m = m_dup(*m_head, M_DONTWAIT); m_freem(*m_head); *m_head = m; if (m == NULL) return (ENOBUFS); } offset = sizeof(struct ether_header); m = m_pullup(*m_head, offset + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, caddr_t) + offset); offset += (ip->ip_hl << 2); cflags = (offset << CAS_TD_CKSUM_START_SHFT) | ((offset + m->m_pkthdr.csum_data) << CAS_TD_CKSUM_STUFF_SHFT) | CAS_TD_CKSUM_EN; *m_head = m; } error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(*m_head, M_DONTWAIT, CAS_NTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs <= CAS_NTXSEGS, ("%s: too many DMA segments (%d)", __func__, nsegs)); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* * Ensure we have enough descriptors free to describe * the packet. Note, we always reserve one descriptor * at the end of the ring as a termination point, in * order to prevent wrap-around. */ if (nsegs > sc->sc_txfree - 1) { txs->txs_ndescs = 0; bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); return (ENOBUFS); } txs->txs_ndescs = nsegs; txs->txs_firstdesc = sc->sc_txnext; nexttx = txs->txs_firstdesc; for (seg = 0; seg < nsegs; seg++, nexttx = CAS_NEXTTX(nexttx)) { #ifdef CAS_DEBUG CTR6(KTR_CAS, "%s: mapping seg %d (txd %d), len %lx, addr %#lx (%#lx)", __func__, seg, nexttx, txsegs[seg].ds_len, txsegs[seg].ds_addr, htole64(txsegs[seg].ds_addr)); #endif sc->sc_txdescs[nexttx].cd_buf_ptr = htole64(txsegs[seg].ds_addr); KASSERT(txsegs[seg].ds_len < CAS_TD_BUF_LEN_MASK >> CAS_TD_BUF_LEN_SHFT, ("%s: segment size too large!", __func__)); sc->sc_txdescs[nexttx].cd_flags = htole64(txsegs[seg].ds_len << CAS_TD_BUF_LEN_SHFT); txs->txs_lastdesc = nexttx; } /* Set EOF on the last descriptor. */ #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: end of frame at segment %d, TX %d", __func__, seg, nexttx); #endif sc->sc_txdescs[txs->txs_lastdesc].cd_flags |= htole64(CAS_TD_END_OF_FRAME); /* Lastly set SOF on the first descriptor. */ #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: start of frame at segment %d, TX %d", __func__, seg, nexttx); #endif if (sc->sc_txwin += nsegs > CAS_MAXTXFREE * 2 / 3) { sc->sc_txwin = 0; sc->sc_txdescs[txs->txs_firstdesc].cd_flags |= htole64(cflags | CAS_TD_START_OF_FRAME | CAS_TD_INT_ME); } else sc->sc_txdescs[txs->txs_firstdesc].cd_flags |= htole64(cflags | CAS_TD_START_OF_FRAME); /* Sync the DMA map. */ bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_PREWRITE); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: setting firstdesc=%d, lastdesc=%d, ndescs=%d", __func__, txs->txs_firstdesc, txs->txs_lastdesc, txs->txs_ndescs); #endif STAILQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q); STAILQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q); txs->txs_mbuf = *m_head; sc->sc_txnext = CAS_NEXTTX(txs->txs_lastdesc); sc->sc_txfree -= txs->txs_ndescs; return (0); } static void cas_init_regs(struct cas_softc *sc) { int i; const u_char *laddr = IF_LLADDR(sc->sc_ifp); CAS_LOCK_ASSERT(sc, MA_OWNED); /* These registers are not cleared on reset. */ if ((sc->sc_flags & CAS_INITED) == 0) { /* magic values */ CAS_WRITE_4(sc, CAS_MAC_IPG0, 0); CAS_WRITE_4(sc, CAS_MAC_IPG1, 8); CAS_WRITE_4(sc, CAS_MAC_IPG2, 4); /* min frame length */ CAS_WRITE_4(sc, CAS_MAC_MIN_FRAME, ETHER_MIN_LEN); /* max frame length and max burst size */ CAS_WRITE_4(sc, CAS_MAC_MAX_BF, ((ETHER_MAX_LEN_JUMBO + ETHER_VLAN_ENCAP_LEN) << CAS_MAC_MAX_BF_FRM_SHFT) | (0x2000 << CAS_MAC_MAX_BF_BST_SHFT)); /* more magic values */ CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7); CAS_WRITE_4(sc, CAS_MAC_JAM_SIZE, 0x4); CAS_WRITE_4(sc, CAS_MAC_ATTEMPT_LIMIT, 0x10); - CAS_WRITE_4(sc, CAS_MAC_CTRL_TYPE, 0x8088); + CAS_WRITE_4(sc, CAS_MAC_CTRL_TYPE, 0x8808); /* random number seed */ CAS_WRITE_4(sc, CAS_MAC_RANDOM_SEED, ((laddr[5] << 8) | laddr[4]) & 0x3ff); /* secondary MAC addresses: 0:0:0:0:0:0 */ for (i = CAS_MAC_ADDR3; i <= CAS_MAC_ADDR41; i += CAS_MAC_ADDR4 - CAS_MAC_ADDR3) CAS_WRITE_4(sc, i, 0); /* MAC control address: 01:80:c2:00:00:01 */ CAS_WRITE_4(sc, CAS_MAC_ADDR42, 0x0001); CAS_WRITE_4(sc, CAS_MAC_ADDR43, 0xc200); CAS_WRITE_4(sc, CAS_MAC_ADDR44, 0x0180); /* MAC filter address: 0:0:0:0:0:0 */ CAS_WRITE_4(sc, CAS_MAC_AFILTER0, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER1, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER2, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK1_2, 0); CAS_WRITE_4(sc, CAS_MAC_AFILTER_MASK0, 0); /* Zero the hash table. */ for (i = CAS_MAC_HASH0; i <= CAS_MAC_HASH15; i += CAS_MAC_HASH1 - CAS_MAC_HASH0) CAS_WRITE_4(sc, i, 0); sc->sc_flags |= CAS_INITED; } /* Counters need to be zeroed. */ CAS_WRITE_4(sc, CAS_MAC_NORM_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_FIRST_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_EXCESS_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_LATE_COLL_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_DEFER_TMR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_PEAK_ATTEMPTS, 0); CAS_WRITE_4(sc, CAS_MAC_RX_FRAME_COUNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_LEN_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_ALIGN_ERR, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CRC_ERR_CNT, 0); CAS_WRITE_4(sc, CAS_MAC_RX_CODE_VIOL, 0); /* Set XOFF PAUSE time. */ CAS_WRITE_4(sc, CAS_MAC_SPC, 0x1BF0 << CAS_MAC_SPC_TIME_SHFT); /* Set the station address. */ CAS_WRITE_4(sc, CAS_MAC_ADDR0, (laddr[4] << 8) | laddr[5]); CAS_WRITE_4(sc, CAS_MAC_ADDR1, (laddr[2] << 8) | laddr[3]); CAS_WRITE_4(sc, CAS_MAC_ADDR2, (laddr[0] << 8) | laddr[1]); /* Enable MII outputs. */ CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, CAS_MAC_XIF_CONF_TX_OE); } static void cas_tx_task(void *arg, int pending __unused) { struct ifnet *ifp; ifp = (struct ifnet *)arg; cas_start(ifp); } static inline void cas_txkick(struct cas_softc *sc) { /* * Update the TX kick register. This register has to point to the * descriptor after the last valid one and for optimum performance * should be incremented in multiples of 4 (the DMA engine fetches/ * updates descriptors in batches of 4). */ #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: %s: kicking TX %d", device_get_name(sc->sc_dev), __func__, sc->sc_txnext); #endif CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CAS_WRITE_4(sc, CAS_TX_KICK3, sc->sc_txnext); } static void cas_start(struct ifnet *ifp) { struct cas_softc *sc = ifp->if_softc; struct mbuf *m; int kicked, ntx; CAS_LOCK(sc); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->sc_flags & CAS_LINK) == 0) { CAS_UNLOCK(sc); return; } if (sc->sc_txfree < CAS_MAXTXFREE / 4) cas_tint(sc); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: %s: txfree %d, txnext %d", device_get_name(sc->sc_dev), __func__, sc->sc_txfree, sc->sc_txnext); #endif ntx = 0; kicked = 0; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->sc_txfree > 1;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; if (cas_load_txmbuf(sc, &m) != 0) { if (m == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); break; } if ((sc->sc_txnext % 4) == 0) { cas_txkick(sc); kicked = 1; } else kicked = 0; ntx++; BPF_MTAP(ifp, m); } if (ntx > 0) { if (kicked == 0) cas_txkick(sc); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: packets enqueued, OWN on %d", device_get_name(sc->sc_dev), sc->sc_txnext); #endif /* Set a watchdog timer in case the chip flakes out. */ sc->sc_wdog_timer = 5; #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: %s: watchdog %d", device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer); #endif } CAS_UNLOCK(sc); } static void cas_tint(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct cas_txsoft *txs; int progress; uint32_t txlast; #ifdef CAS_DEBUG int i; CAS_LOCK_ASSERT(sc, MA_OWNED); CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif /* * Go through our TX list and free mbufs for those * frames that have been transmitted. */ progress = 0; CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD); while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { #ifdef CAS_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) { printf(" txsoft %p transmit chain:\n", txs); for (i = txs->txs_firstdesc;; i = CAS_NEXTTX(i)) { printf("descriptor %d: ", i); printf("cd_flags: 0x%016llx\t", (long long)le64toh( sc->sc_txdescs[i].cd_flags)); printf("cd_buf_ptr: 0x%016llx\n", (long long)le64toh( sc->sc_txdescs[i].cd_buf_ptr)); if (i == txs->txs_lastdesc) break; } } #endif /* * In theory, we could harvest some descriptors before * the ring is empty, but that's a bit complicated. * * CAS_TX_COMPn points to the last descriptor * processed + 1. */ txlast = CAS_READ_4(sc, CAS_TX_COMP3); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: txs->txs_firstdesc = %d, " "txs->txs_lastdesc = %d, txlast = %d", __func__, txs->txs_firstdesc, txs->txs_lastdesc, txlast); #endif if (txs->txs_firstdesc <= txs->txs_lastdesc) { if ((txlast >= txs->txs_firstdesc) && (txlast <= txs->txs_lastdesc)) break; } else { /* Ick -- this command wraps. */ if ((txlast >= txs->txs_firstdesc) || (txlast <= txs->txs_lastdesc)) break; } #ifdef CAS_DEBUG CTR1(KTR_CAS, "%s: releasing a descriptor", __func__); #endif STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q); sc->sc_txfree += txs->txs_ndescs; bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); if (txs->txs_mbuf != NULL) { m_freem(txs->txs_mbuf); txs->txs_mbuf = NULL; } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); ifp->if_opackets++; progress = 1; } #ifdef CAS_DEBUG - CTR4(KTR_CAS, "%s: CAS_TX_STATE_MACHINE %x CAS_TX_DESC_BASE %llx " + CTR5(KTR_CAS, "%s: CAS_TX_SM1 %x CAS_TX_SM2 %x CAS_TX_DESC_BASE %llx " "CAS_TX_COMP3 %x", - __func__, CAS_READ_4(sc, CAS_TX_STATE_MACHINE), - ((long long)CAS_READ_4(sc, CAS_TX_DESC_BASE_HI3) << 32) | - CAS_READ_4(sc, CAS_TX_DESC_BASE_LO3), + __func__, CAS_READ_4(sc, CAS_TX_SM1), CAS_READ_4(sc, CAS_TX_SM2), + ((long long)CAS_READ_4(sc, CAS_TX_DESC3_BASE_HI) << 32) | + CAS_READ_4(sc, CAS_TX_DESC3_BASE_LO), CAS_READ_4(sc, CAS_TX_COMP3)); #endif if (progress) { /* We freed some descriptors, so reset IFF_DRV_OACTIVE. */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (STAILQ_EMPTY(&sc->sc_txdirtyq)) sc->sc_wdog_timer = 0; } #ifdef CAS_DEBUG CTR3(KTR_CAS, "%s: %s: watchdog %d", device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer); #endif } static void cas_rint_timeout(void *arg) { struct cas_softc *sc = arg; CAS_LOCK_ASSERT(sc, MA_NOTOWNED); cas_rint(sc); } static void cas_rint(struct cas_softc *sc) { struct cas_rxdsoft *rxds, *rxds2; struct ifnet *ifp = sc->sc_ifp; struct mbuf *m, *m2; uint64_t word1, word2, word3, word4; uint32_t rxhead; u_int idx, idx2, len, off, skip; CAS_LOCK_ASSERT(sc, MA_NOTOWNED); callout_stop(&sc->sc_rx_ch); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif #define PRINTWORD(n, delimiter) \ printf("word ## n: 0x%016llx%c", (long long)word ## n, delimiter) #define SKIPASSERT(n) \ KASSERT(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n == 0, \ ("%s: word ## n not 0", __func__)) #define WORDTOH(n) \ word ## n = le64toh(sc->sc_rxcomps[sc->sc_rxcptr].crc_word ## n) /* * Read the completion head register once. This limits * how long the following loop can execute. */ rxhead = CAS_READ_4(sc, CAS_RX_COMP_HEAD); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d", - __func__, sc->rxcptr, sc->sc_rxdptr, rxhead); + __func__, sc->sc_rxcptr, sc->sc_rxdptr, rxhead); #endif skip = 0; CAS_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (; sc->sc_rxcptr != rxhead; sc->sc_rxcptr = CAS_NEXTRXCOMP(sc->sc_rxcptr)) { if (skip != 0) { SKIPASSERT(1); SKIPASSERT(2); SKIPASSERT(3); --skip; goto skip; } WORDTOH(1); WORDTOH(2); WORDTOH(3); WORDTOH(4); #ifdef CAS_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) { printf(" completion %d: ", sc->sc_rxcptr); PRINTWORD(1, '\t'); PRINTWORD(2, '\t'); PRINTWORD(3, '\t'); PRINTWORD(4, '\n'); } #endif if (__predict_false( (word1 & CAS_RC1_TYPE_MASK) == CAS_RC1_TYPE_HW || (word4 & CAS_RC4_ZERO) != 0)) { /* * The descriptor is still marked as owned, although * it is supposed to have completed. This has been * observed on some machines. Just exiting here * might leave the packet sitting around until another * one arrives to trigger a new interrupt, which is * generally undesirable, so set up a timeout. */ callout_reset(&sc->sc_rx_ch, CAS_RXOWN_TICKS, cas_rint_timeout, sc); break; } if (__predict_false( (word4 & (CAS_RC4_BAD | CAS_RC4_LEN_MMATCH)) != 0)) { ifp->if_ierrors++; device_printf(sc->sc_dev, "receive error: CRC error\n"); continue; } KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 || CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0, ("%s: data and header present", __func__)); KASSERT((word1 & CAS_RC1_SPLIT_PKT) == 0 || CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0, ("%s: split and header present", __func__)); KASSERT(CAS_GET(word1, CAS_RC1_DATA_SIZE) == 0 || (word1 & CAS_RC1_RELEASE_HDR) == 0, ("%s: data present but header release", __func__)); KASSERT(CAS_GET(word2, CAS_RC2_HDR_SIZE) == 0 || (word1 & CAS_RC1_RELEASE_DATA) == 0, ("%s: header present but data release", __func__)); if ((len = CAS_GET(word2, CAS_RC2_HDR_SIZE)) != 0) { idx = CAS_GET(word2, CAS_RC2_HDR_INDEX); off = CAS_GET(word2, CAS_RC2_HDR_OFF); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: hdr at idx %d, off %d, len %d", __func__, idx, off, len); #endif rxds = &sc->sc_rxdsoft[idx]; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m != NULL) { refcount_acquire(&rxds->rxds_refcount); bus_dmamap_sync(sc->sc_rdmatag, rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD); #if __FreeBSD_version < 800016 MEXTADD(m, (caddr_t)rxds->rxds_buf + off * 256 + ETHER_ALIGN, len, cas_free, rxds, M_RDONLY, EXT_NET_DRV); #else MEXTADD(m, (caddr_t)rxds->rxds_buf + off * 256 + ETHER_ALIGN, len, cas_free, sc, (void *)(uintptr_t)idx, M_RDONLY, EXT_NET_DRV); #endif if ((m->m_flags & M_EXT) == 0) { m_freem(m); m = NULL; } } if (m != NULL) { m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = len; ifp->if_ipackets++; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cas_rxcksum(m, CAS_GET(word4, CAS_RC4_TCP_CSUM)); /* Pass it on. */ (*ifp->if_input)(ifp, m); } else ifp->if_ierrors++; if ((word1 & CAS_RC1_RELEASE_HDR) != 0 && refcount_release(&rxds->rxds_refcount) != 0) cas_add_rxdesc(sc, idx); } else if ((len = CAS_GET(word1, CAS_RC1_DATA_SIZE)) != 0) { idx = CAS_GET(word1, CAS_RC1_DATA_INDEX); off = CAS_GET(word1, CAS_RC1_DATA_OFF); #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: data at idx %d, off %d, len %d", __func__, idx, off, len); #endif rxds = &sc->sc_rxdsoft[idx]; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m != NULL) { refcount_acquire(&rxds->rxds_refcount); off += ETHER_ALIGN; m->m_len = min(CAS_PAGE_SIZE - off, len); bus_dmamap_sync(sc->sc_rdmatag, rxds->rxds_dmamap, BUS_DMASYNC_POSTREAD); #if __FreeBSD_version < 800016 MEXTADD(m, (caddr_t)rxds->rxds_buf + off, m->m_len, cas_free, rxds, M_RDONLY, EXT_NET_DRV); #else MEXTADD(m, (caddr_t)rxds->rxds_buf + off, m->m_len, cas_free, sc, (void *)(uintptr_t)idx, M_RDONLY, EXT_NET_DRV); #endif if ((m->m_flags & M_EXT) == 0) { m_freem(m); m = NULL; } } idx2 = 0; m2 = NULL; rxds2 = NULL; if ((word1 & CAS_RC1_SPLIT_PKT) != 0) { KASSERT((word1 & CAS_RC1_RELEASE_NEXT) != 0, ("%s: split but no release next", __func__)); idx2 = CAS_GET(word2, CAS_RC2_NEXT_INDEX); #ifdef CAS_DEBUG CTR2(KTR_CAS, "%s: split at idx %d", __func__, idx2); #endif rxds2 = &sc->sc_rxdsoft[idx2]; if (m != NULL) { MGET(m2, M_DONTWAIT, MT_DATA); if (m2 != NULL) { refcount_acquire( &rxds2->rxds_refcount); m2->m_len = len - m->m_len; bus_dmamap_sync( sc->sc_rdmatag, rxds2->rxds_dmamap, BUS_DMASYNC_POSTREAD); #if __FreeBSD_version < 800016 MEXTADD(m2, (caddr_t)rxds2->rxds_buf, m2->m_len, cas_free, rxds2, M_RDONLY, EXT_NET_DRV); #else MEXTADD(m2, (caddr_t)rxds2->rxds_buf, m2->m_len, cas_free, sc, (void *)(uintptr_t)idx2, M_RDONLY, EXT_NET_DRV); #endif if ((m2->m_flags & M_EXT) == 0) { m_freem(m2); m2 = NULL; } } } if (m2 != NULL) m->m_next = m2; else if (m != NULL) { m_freem(m); m = NULL; } } if (m != NULL) { m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = len; ifp->if_ipackets++; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) cas_rxcksum(m, CAS_GET(word4, CAS_RC4_TCP_CSUM)); /* Pass it on. */ (*ifp->if_input)(ifp, m); } else ifp->if_ierrors++; if ((word1 & CAS_RC1_RELEASE_DATA) != 0 && refcount_release(&rxds->rxds_refcount) != 0) cas_add_rxdesc(sc, idx); if ((word1 & CAS_RC1_SPLIT_PKT) != 0 && refcount_release(&rxds2->rxds_refcount) != 0) cas_add_rxdesc(sc, idx2); } skip = CAS_GET(word1, CAS_RC1_SKIP); skip: cas_rxcompinit(&sc->sc_rxcomps[sc->sc_rxcptr]); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CAS_WRITE_4(sc, CAS_RX_COMP_TAIL, sc->sc_rxcptr); #undef PRINTWORD #undef SKIPASSERT #undef WORDTOH #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: done sc->sc_rxcptr %d, sc->sc_rxdptr %d, head %d", - __func__, sc->rxcptr, sc->sc_rxdptr, + __func__, sc->sc_rxcptr, sc->sc_rxdptr, CAS_READ_4(sc, CAS_RX_COMP_HEAD)); #endif } static void cas_free(void *arg1, void *arg2) { struct cas_rxdsoft *rxds; struct cas_softc *sc; u_int idx; #if __FreeBSD_version < 800016 rxds = arg2; sc = rxds->rxds_sc; idx = rxds->rxds_idx; #else sc = arg1; idx = (uintptr_t)arg2; rxds = &sc->sc_rxdsoft[idx]; #endif if (refcount_release(&rxds->rxds_refcount) == 0) return; /* * NB: this function can be called via m_freem(9) within * this driver! */ cas_add_rxdesc(sc, idx); } static inline void cas_add_rxdesc(struct cas_softc *sc, u_int idx) { u_int locked; if ((locked = CAS_LOCK_OWNED(sc)) == 0) CAS_LOCK(sc); bus_dmamap_sync(sc->sc_rdmatag, sc->sc_rxdsoft[idx].rxds_dmamap, BUS_DMASYNC_PREREAD); CAS_UPDATE_RXDESC(sc, sc->sc_rxdptr, idx); sc->sc_rxdptr = CAS_NEXTRXDESC(sc->sc_rxdptr); /* * Update the RX kick register. This register has to point to the * descriptor after the last valid one (before the current batch) * and for optimum performance should be incremented in multiples * of 4 (the DMA engine fetches/updates descriptors in batches of 4). */ if ((sc->sc_rxdptr % 4) == 0) { CAS_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CAS_WRITE_4(sc, CAS_RX_KICK, (sc->sc_rxdptr + CAS_NRXDESC - 4) & CAS_NRXDESC_MASK); } if (locked == 0) CAS_UNLOCK(sc); } static void cas_eint(struct cas_softc *sc, u_int status) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK_ASSERT(sc, MA_NOTOWNED); ifp->if_ierrors++; device_printf(sc->sc_dev, "%s: status 0x%x", __func__, status); if ((status & CAS_INTR_PCI_ERROR_INT) != 0) { status = CAS_READ_4(sc, CAS_ERROR_STATUS); printf(", PCI bus error 0x%x", status); if ((status & CAS_ERROR_OTHER) != 0) { status = pci_read_config(sc->sc_dev, PCIR_STATUS, 2); printf(", PCI status 0x%x", status); pci_write_config(sc->sc_dev, PCIR_STATUS, status, 2); } } printf("\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; cas_init(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); } static int cas_intr(void *v) { struct cas_softc *sc = v; if (__predict_false((CAS_READ_4(sc, CAS_STATUS_ALIAS) & CAS_INTR_SUMMARY) == 0)) return (FILTER_STRAY); /* Disable interrupts. */ CAS_WRITE_4(sc, CAS_INTMASK, 0xffffffff); taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task); return (FILTER_HANDLED); } static void cas_intr_task(void *arg, int pending __unused) { struct cas_softc *sc = arg; struct ifnet *ifp = sc->sc_ifp; uint32_t status, status2; CAS_LOCK_ASSERT(sc, MA_NOTOWNED); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; status = CAS_READ_4(sc, CAS_STATUS); if (__predict_false((status & CAS_INTR_SUMMARY) == 0)) goto done; #ifdef CAS_DEBUG CTR4(KTR_CAS, "%s: %s: cplt %x, status %x", device_get_name(sc->sc_dev), __func__, - (status >> CAS_STATUS_TX_COMP3_SHIFT), (u_int)status); + (status >> CAS_STATUS_TX_COMP3_SHFT), (u_int)status); /* * PCS interrupts must be cleared, otherwise no traffic is passed! */ if ((status & CAS_INTR_PCS_INT) != 0) { status2 = CAS_READ_4(sc, CAS_PCS_INTR_STATUS) | CAS_READ_4(sc, CAS_PCS_INTR_STATUS); if ((status2 & CAS_PCS_INTR_LINK) != 0) device_printf(sc->sc_dev, "%s: PCS link status changed\n", __func__); } if ((status & CAS_MAC_CTRL_STATUS) != 0) { status2 = CAS_READ_4(sc, CAS_MAC_CTRL_STATUS); if ((status2 & CAS_MAC_CTRL_PAUSE) != 0) device_printf(sc->sc_dev, "%s: PAUSE received (PAUSE time %d slots)\n", __func__, (status2 & CAS_MAC_CTRL_STATUS_PT_MASK) >> CAS_MAC_CTRL_STATUS_PT_SHFT); if ((status2 & CAS_MAC_CTRL_PAUSE) != 0) device_printf(sc->sc_dev, "%s: transited to PAUSE state\n", __func__); if ((status2 & CAS_MAC_CTRL_NON_PAUSE) != 0) device_printf(sc->sc_dev, "%s: transited to non-PAUSE state\n", __func__); } if ((status & CAS_INTR_MIF) != 0) device_printf(sc->sc_dev, "%s: MIF interrupt\n", __func__); #endif if (__predict_false((status & (CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_TAG_ERR | CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT)) != 0)) { cas_eint(sc, status); return; } if (__predict_false(status & CAS_INTR_TX_MAC_INT)) { status2 = CAS_READ_4(sc, CAS_MAC_TX_STATUS); if ((status2 & (CAS_MAC_TX_UNDERRUN | CAS_MAC_TX_MAX_PKT_ERR)) != 0) sc->sc_ifp->if_oerrors++; else if ((status2 & ~CAS_MAC_TX_FRAME_XMTD) != 0) device_printf(sc->sc_dev, "MAC TX fault, status %x\n", status2); } if (__predict_false(status & CAS_INTR_RX_MAC_INT)) { status2 = CAS_READ_4(sc, CAS_MAC_RX_STATUS); if ((status2 & CAS_MAC_RX_OVERFLOW) != 0) sc->sc_ifp->if_ierrors++; else if ((status2 & ~CAS_MAC_RX_FRAME_RCVD) != 0) device_printf(sc->sc_dev, "MAC RX fault, status %x\n", status2); } if ((status & (CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0) { cas_rint(sc); #ifdef CAS_DEBUG if (__predict_false((status & (CAS_INTR_RX_BUF_NA | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL)) != 0)) device_printf(sc->sc_dev, "RX fault, status %x\n", status); #endif } if ((status & (CAS_INTR_TX_INT_ME | CAS_INTR_TX_ALL | CAS_INTR_TX_DONE)) != 0) { CAS_LOCK(sc); cas_tint(sc); CAS_UNLOCK(sc); } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); status = CAS_READ_4(sc, CAS_STATUS_ALIAS); if (__predict_false((status & CAS_INTR_SUMMARY) != 0)) { taskqueue_enqueue(sc->sc_tq, &sc->sc_intr_task); return; } done: /* Re-enable interrupts. */ CAS_WRITE_4(sc, CAS_INTMASK, ~(CAS_INTR_TX_INT_ME | CAS_INTR_TX_TAG_ERR | CAS_INTR_RX_DONE | CAS_INTR_RX_BUF_NA | CAS_INTR_RX_TAG_ERR | CAS_INTR_RX_COMP_FULL | CAS_INTR_RX_BUF_AEMPTY | CAS_INTR_RX_COMP_AFULL | CAS_INTR_RX_LEN_MMATCH | CAS_INTR_PCI_ERROR_INT #ifdef CAS_DEBUG | CAS_INTR_PCS_INT | CAS_INTR_MIF #endif )); } static void cas_watchdog(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; CAS_LOCK_ASSERT(sc, MA_OWNED); #ifdef CAS_DEBUG CTR4(KTR_CAS, - "%s: CAS_RX_CONFIG %x CAS_MAC_RX_STATUS %x CAS_MAC_RX_CONFIG %x", - __func__, CAS_READ_4(sc, CAS_RX_CONFIG), + "%s: CAS_RX_CONF %x CAS_MAC_RX_STATUS %x CAS_MAC_RX_CONF %x", + __func__, CAS_READ_4(sc, CAS_RX_CONF), CAS_READ_4(sc, CAS_MAC_RX_STATUS), - CAS_READ_4(sc, CAS_MAC_RX_CONFIG)); + CAS_READ_4(sc, CAS_MAC_RX_CONF)); CTR4(KTR_CAS, - "%s: CAS_TX_CONFIG %x CAS_MAC_TX_STATUS %x CAS_MAC_TX_CONFIG %x", - __func__, CAS_READ_4(sc, CAS_TX_CONFIG), + "%s: CAS_TX_CONF %x CAS_MAC_TX_STATUS %x CAS_MAC_TX_CONF %x", + __func__, CAS_READ_4(sc, CAS_TX_CONF), CAS_READ_4(sc, CAS_MAC_TX_STATUS), - CAS_READ_4(sc, CAS_MAC_TX_CONFIG)); + CAS_READ_4(sc, CAS_MAC_TX_CONF)); #endif if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0) return; if ((sc->sc_flags & CAS_LINK) != 0) device_printf(sc->sc_dev, "device timeout\n"); else if (bootverbose) device_printf(sc->sc_dev, "device timeout (no link)\n"); ++ifp->if_oerrors; /* Try to get more packets going. */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; cas_init_locked(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) taskqueue_enqueue(sc->sc_tq, &sc->sc_tx_task); } static void cas_mifinit(struct cas_softc *sc) { /* Configure the MIF in frame mode. */ CAS_WRITE_4(sc, CAS_MIF_CONF, CAS_READ_4(sc, CAS_MIF_CONF) & ~CAS_MIF_CONF_BB_MODE); CAS_BARRIER(sc, CAS_MIF_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } /* * MII interface * * The MII interface supports at least three different operating modes: * * Bitbang mode is implemented using data, clock and output enable registers. * * Frame mode is implemented by loading a complete frame into the frame * register and polling the valid bit for completion. * * Polling mode uses the frame register but completion is indicated by * an interrupt. * */ static int cas_mii_readreg(device_t dev, int phy, int reg) { struct cas_softc *sc; int n; uint32_t v; #ifdef CAS_DEBUG_PHY printf("%s: phy %d reg %d\n", __func__, phy, reg); #endif sc = device_get_softc(dev); if ((sc->sc_flags & CAS_SERDES) != 0) { switch (reg) { case MII_BMCR: reg = CAS_PCS_CTRL; break; case MII_BMSR: reg = CAS_PCS_STATUS; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); case MII_ANAR: reg = CAS_PCS_ANAR; break; case MII_ANLPAR: reg = CAS_PCS_ANLPAR; break; case MII_EXTSR: return (EXTSR_1000XFDX | EXTSR_1000XHDX); default: device_printf(sc->sc_dev, "%s: unhandled register %d\n", __func__, reg); return (0); } return (CAS_READ_4(sc, reg)); } /* Construct the frame command. */ v = CAS_MIF_FRAME_READ | (phy << CAS_MIF_FRAME_PHY_SHFT) | (reg << CAS_MIF_FRAME_REG_SHFT); CAS_WRITE_4(sc, CAS_MIF_FRAME, v); CAS_BARRIER(sc, CAS_MIF_FRAME, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); for (n = 0; n < 100; n++) { DELAY(1); v = CAS_READ_4(sc, CAS_MIF_FRAME); if (v & CAS_MIF_FRAME_TA_LSB) return (v & CAS_MIF_FRAME_DATA); } device_printf(sc->sc_dev, "%s: timed out\n", __func__); return (0); } static int cas_mii_writereg(device_t dev, int phy, int reg, int val) { struct cas_softc *sc; int n; uint32_t v; #ifdef CAS_DEBUG_PHY printf("%s: phy %d reg %d val %x\n", phy, reg, val, __func__); #endif sc = device_get_softc(dev); if ((sc->sc_flags & CAS_SERDES) != 0) { switch (reg) { case MII_BMSR: reg = CAS_PCS_STATUS; break; case MII_BMCR: reg = CAS_PCS_CTRL; if ((val & CAS_PCS_CTRL_RESET) == 0) break; CAS_WRITE_4(sc, CAS_PCS_CTRL, val); CAS_BARRIER(sc, CAS_PCS_CTRL, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_PCS_CTRL, CAS_PCS_CTRL_RESET, 0)) device_printf(sc->sc_dev, "cannot reset PCS\n"); /* FALLTHROUGH */ case MII_ANAR: CAS_WRITE_4(sc, CAS_PCS_CONF, 0); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_ANAR, val); CAS_BARRIER(sc, CAS_PCS_ANAR, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_SERDES_CTRL, CAS_PCS_SERDES_CTRL_ESD); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_WRITE); CAS_WRITE_4(sc, CAS_PCS_CONF, CAS_PCS_CONF_EN); CAS_BARRIER(sc, CAS_PCS_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); case MII_ANLPAR: reg = CAS_PCS_ANLPAR; break; default: device_printf(sc->sc_dev, "%s: unhandled register %d\n", __func__, reg); return (0); } CAS_WRITE_4(sc, reg, val); CAS_BARRIER(sc, reg, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); } /* Construct the frame command. */ v = CAS_MIF_FRAME_WRITE | (phy << CAS_MIF_FRAME_PHY_SHFT) | (reg << CAS_MIF_FRAME_REG_SHFT) | (val & CAS_MIF_FRAME_DATA); CAS_WRITE_4(sc, CAS_MIF_FRAME, v); CAS_BARRIER(sc, CAS_MIF_FRAME, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); for (n = 0; n < 100; n++) { DELAY(1); v = CAS_READ_4(sc, CAS_MIF_FRAME); if (v & CAS_MIF_FRAME_TA_LSB) return (1); } device_printf(sc->sc_dev, "%s: timed out\n", __func__); return (0); } static void cas_mii_statchg(device_t dev) { struct cas_softc *sc; struct ifnet *ifp; int gigabit; uint32_t rxcfg, txcfg, v; sc = device_get_softc(dev); ifp = sc->sc_ifp; CAS_LOCK_ASSERT(sc, MA_OWNED); #ifdef CAS_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) device_printf(sc->sc_dev, "%s: status changen", __func__); #endif if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) != 0 && IFM_SUBTYPE(sc->sc_mii->mii_media_active) != IFM_NONE) sc->sc_flags |= CAS_LINK; else sc->sc_flags &= ~CAS_LINK; switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) { case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: case IFM_1000_T: gigabit = 1; break; default: gigabit = 0; } /* * The configuration done here corresponds to the steps F) and * G) and as far as enabling of RX and TX MAC goes also step H) * of the initialization sequence outlined in section 11.2.1 of * the Cassini+ ASIC Specification. */ rxcfg = CAS_READ_4(sc, CAS_MAC_RX_CONF); rxcfg &= ~(CAS_MAC_RX_CONF_EN | CAS_MAC_RX_CONF_CARR); txcfg = CAS_MAC_TX_CONF_EN_IPG0 | CAS_MAC_TX_CONF_NGU | CAS_MAC_TX_CONF_NGUL; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0) txcfg |= CAS_MAC_TX_CONF_ICARR | CAS_MAC_TX_CONF_ICOLLIS; else if (gigabit != 0) { rxcfg |= CAS_MAC_RX_CONF_CARR; txcfg |= CAS_MAC_TX_CONF_CARR; } CAS_WRITE_4(sc, CAS_MAC_TX_CONF, 0); CAS_BARRIER(sc, CAS_MAC_TX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_MAC_TX_CONF, CAS_MAC_TX_CONF_EN, 0)) device_printf(sc->sc_dev, "cannot disable TX MAC\n"); CAS_WRITE_4(sc, CAS_MAC_TX_CONF, txcfg); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, 0); CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_EN, 0)) device_printf(sc->sc_dev, "cannot disable RX MAC\n"); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, rxcfg); v = CAS_READ_4(sc, CAS_MAC_CTRL_CONF) & ~(CAS_MAC_CTRL_CONF_TXP | CAS_MAC_CTRL_CONF_RXP); -#ifdef notyet if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) v |= CAS_MAC_CTRL_CONF_RXP; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) v |= CAS_MAC_CTRL_CONF_TXP; -#endif CAS_WRITE_4(sc, CAS_MAC_CTRL_CONF, v); /* * All supported chips have a bug causing incorrect checksum * to be calculated when letting them strip the FCS in half- * duplex mode. In theory we could disable FCS stripping and * manually adjust the checksum accordingly. It seems to make * more sense to optimze for the common case and just disable * hardware checksumming in half-duplex mode though. */ if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) { ifp->if_capenable &= ~IFCAP_HWCSUM; ifp->if_hwassist = 0; } else if ((sc->sc_flags & CAS_NO_CSUM) == 0) { ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = CAS_CSUM_FEATURES; } if (sc->sc_variant == CAS_SATURN) { if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) /* silicon bug workaround */ CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x41); else CAS_WRITE_4(sc, CAS_MAC_PREAMBLE_LEN, 0x7); } if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0 && gigabit != 0) CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME, CAS_MAC_SLOT_TIME_CARR); else CAS_WRITE_4(sc, CAS_MAC_SLOT_TIME, CAS_MAC_SLOT_TIME_NORM); /* XIF Configuration */ v = CAS_MAC_XIF_CONF_TX_OE | CAS_MAC_XIF_CONF_LNKLED; if ((sc->sc_flags & CAS_SERDES) == 0) { if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) v |= CAS_MAC_XIF_CONF_NOECHO; v |= CAS_MAC_XIF_CONF_BUF_OE; } if (gigabit != 0) v |= CAS_MAC_XIF_CONF_GMII; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0) v |= CAS_MAC_XIF_CONF_FDXLED; CAS_WRITE_4(sc, CAS_MAC_XIF_CONF, v); if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && (sc->sc_flags & CAS_LINK) != 0) { CAS_WRITE_4(sc, CAS_MAC_TX_CONF, txcfg | CAS_MAC_TX_CONF_EN); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, rxcfg | CAS_MAC_RX_CONF_EN); } } static int cas_mediachange(struct ifnet *ifp) { struct cas_softc *sc = ifp->if_softc; int error; /* XXX add support for serial media. */ CAS_LOCK(sc); error = mii_mediachg(sc->sc_mii); CAS_UNLOCK(sc); return (error); } static void cas_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct cas_softc *sc = ifp->if_softc; CAS_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { CAS_UNLOCK(sc); return; } mii_pollstat(sc->sc_mii); ifmr->ifm_active = sc->sc_mii->mii_media_active; ifmr->ifm_status = sc->sc_mii->mii_media_status; CAS_UNLOCK(sc); } static int cas_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct cas_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int error; error = 0; switch (cmd) { case SIOCSIFFLAGS: CAS_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc->sc_ifflags) & (IFF_ALLMULTI | IFF_PROMISC)) != 0) cas_setladrf(sc); else cas_init_locked(sc); } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) cas_stop(ifp); sc->sc_ifflags = ifp->if_flags; CAS_UNLOCK(sc); break; case SIOCSIFCAP: CAS_LOCK(sc); if ((sc->sc_flags & CAS_NO_CSUM) != 0) { error = EINVAL; CAS_UNLOCK(sc); break; } ifp->if_capenable = ifr->ifr_reqcap; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist = CAS_CSUM_FEATURES; else ifp->if_hwassist = 0; CAS_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: CAS_LOCK(sc); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) cas_setladrf(sc); CAS_UNLOCK(sc); break; case SIOCSIFMTU: if ((ifr->ifr_mtu < ETHERMIN) || (ifr->ifr_mtu > ETHERMTU_JUMBO)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void cas_setladrf(struct cas_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct ifmultiaddr *inm; int i; uint32_t hash[16]; uint32_t crc, v; CAS_LOCK_ASSERT(sc, MA_OWNED); /* Get the current RX configuration. */ v = CAS_READ_4(sc, CAS_MAC_RX_CONF); /* * Turn off promiscuous mode, promiscuous group mode (all multicast), * and hash filter. Depending on the case, the right bit will be * enabled. */ v &= ~(CAS_MAC_RX_CONF_PROMISC | CAS_MAC_RX_CONF_HFILTER | CAS_MAC_RX_CONF_PGRP); CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v); CAS_BARRIER(sc, CAS_MAC_RX_CONF, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!cas_bitwait(sc, CAS_MAC_RX_CONF, CAS_MAC_RX_CONF_HFILTER, 0)) device_printf(sc->sc_dev, "cannot disable RX hash filter\n"); if ((ifp->if_flags & IFF_PROMISC) != 0) { v |= CAS_MAC_RX_CONF_PROMISC; goto chipit; } if ((ifp->if_flags & IFF_ALLMULTI) != 0) { v |= CAS_MAC_RX_CONF_PGRP; goto chipit; } /* * Set up multicast address filter by passing all multicast * addresses through a crc generator, and then using the high * order 8 bits as an index into the 256 bit logical address * filter. The high order 4 bits selects the word, while the * other 4 bits select the bit within the word (where bit 0 * is the MSB). */ /* Clear the hash table. */ memset(hash, 0, sizeof(hash)); if_maddr_rlock(ifp); TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) { if (inm->ifma_addr->sa_family != AF_LINK) continue; crc = ether_crc32_le(LLADDR((struct sockaddr_dl *) inm->ifma_addr), ETHER_ADDR_LEN); /* We just want the 8 most significant bits. */ crc >>= 24; /* Set the corresponding bit in the filter. */ hash[crc >> 4] |= 1 << (15 - (crc & 15)); } if_maddr_runlock(ifp); v |= CAS_MAC_RX_CONF_HFILTER; /* Now load the hash table into the chip (if we are using it). */ for (i = 0; i < 16; i++) CAS_WRITE_4(sc, CAS_MAC_HASH0 + i * (CAS_MAC_HASH1 - CAS_MAC_HASH0), hash[i]); chipit: CAS_WRITE_4(sc, CAS_MAC_RX_CONF, v); } static int cas_pci_attach(device_t dev); static int cas_pci_detach(device_t dev); static int cas_pci_probe(device_t dev); static int cas_pci_resume(device_t dev); static int cas_pci_suspend(device_t dev); static device_method_t cas_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, cas_pci_probe), DEVMETHOD(device_attach, cas_pci_attach), DEVMETHOD(device_detach, cas_pci_detach), DEVMETHOD(device_suspend, cas_pci_suspend), DEVMETHOD(device_resume, cas_pci_resume), /* Use the suspend handler here, it is all that is required. */ DEVMETHOD(device_shutdown, cas_pci_suspend), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), /* MII interface */ DEVMETHOD(miibus_readreg, cas_mii_readreg), DEVMETHOD(miibus_writereg, cas_mii_writereg), DEVMETHOD(miibus_statchg, cas_mii_statchg), KOBJMETHOD_END }; static driver_t cas_pci_driver = { "cas", cas_pci_methods, sizeof(struct cas_softc) }; DRIVER_MODULE(cas, pci, cas_pci_driver, cas_devclass, 0, 0); DRIVER_MODULE(miibus, cas, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(cas, pci, 1, 1, 1); static const struct cas_pci_dev { uint32_t cpd_devid; uint8_t cpd_revid; int cpd_variant; const char *cpd_desc; } const cas_pci_devlist[] = { { 0x0035100b, 0x0, CAS_SATURN, "NS DP83065 Saturn Gigabit Ethernet" }, { 0xabba108e, 0x10, CAS_CASPLUS, "Sun Cassini+ Gigabit Ethernet" }, { 0xabba108e, 0x0, CAS_CAS, "Sun Cassini Gigabit Ethernet" }, { 0, 0, 0, NULL } }; static int cas_pci_probe(device_t dev) { int i; for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) { if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid && pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) { device_set_desc(dev, cas_pci_devlist[i].cpd_desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static struct resource_spec cas_pci_res_spec[] = { { SYS_RES_IRQ, 0, RF_SHAREABLE | RF_ACTIVE }, /* CAS_RES_INTR */ { SYS_RES_MEMORY, PCIR_BAR(0), RF_ACTIVE }, /* CAS_RES_MEM */ { -1, 0 } }; #define CAS_LOCAL_MAC_ADDRESS "local-mac-address" #define CAS_PHY_INTERFACE "phy-interface" #define CAS_PHY_TYPE "phy-type" #define CAS_PHY_TYPE_PCS "pcs" static int cas_pci_attach(device_t dev) { char buf[sizeof(CAS_LOCAL_MAC_ADDRESS)]; struct cas_softc *sc; int i; #if !(defined(__powerpc__) || defined(__sparc64__)) u_char enaddr[4][ETHER_ADDR_LEN]; u_int j, k, lma, pcs[4], phy; #endif sc = device_get_softc(dev); sc->sc_variant = CAS_UNKNOWN; for (i = 0; cas_pci_devlist[i].cpd_desc != NULL; i++) { if (pci_get_devid(dev) == cas_pci_devlist[i].cpd_devid && pci_get_revid(dev) >= cas_pci_devlist[i].cpd_revid) { sc->sc_variant = cas_pci_devlist[i].cpd_variant; break; } } if (sc->sc_variant == CAS_UNKNOWN) { device_printf(dev, "unknown adaptor\n"); return (ENXIO); } pci_enable_busmaster(dev); sc->sc_dev = dev; if (sc->sc_variant == CAS_CAS && pci_get_devid(dev) < 0x02) /* Hardware checksumming may hang TX. */ sc->sc_flags |= CAS_NO_CSUM; if (sc->sc_variant == CAS_CASPLUS || sc->sc_variant == CAS_SATURN) sc->sc_flags |= CAS_REG_PLUS; if (sc->sc_variant == CAS_CAS || (sc->sc_variant == CAS_CASPLUS && pci_get_revid(dev) < 0x11)) sc->sc_flags |= CAS_TABORT; if (bootverbose) device_printf(dev, "flags=0x%x\n", sc->sc_flags); if (bus_alloc_resources(dev, cas_pci_res_spec, sc->sc_res)) { device_printf(dev, "failed to allocate resources\n"); bus_release_resources(dev, cas_pci_res_spec, sc->sc_res); return (ENXIO); } CAS_LOCK_INIT(sc, device_get_nameunit(dev)); #if defined(__powerpc__) || defined(__sparc64__) OF_getetheraddr(dev, sc->sc_enaddr); if (OF_getprop(ofw_bus_get_node(dev), CAS_PHY_INTERFACE, buf, sizeof(buf)) > 0 || OF_getprop(ofw_bus_get_node(dev), CAS_PHY_TYPE, buf, sizeof(buf)) > 0) { buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0) sc->sc_flags |= CAS_SERDES; } #else /* * Dig out VPD (vital product data) and read the MAC address as well * as the PHY type. The VPD resides in the PCI Expansion ROM (PCI * FCode) and can't be accessed via the PCI capability pointer. * SUNW,pci-ce and SUNW,pci-qge use the Enhanced VPD format described * in the free US Patent 7149820. */ #define PCI_ROMHDR_SIZE 0x1c #define PCI_ROMHDR_SIG 0x00 #define PCI_ROMHDR_SIG_MAGIC 0xaa55 /* little endian */ #define PCI_ROMHDR_PTR_DATA 0x18 #define PCI_ROM_SIZE 0x18 #define PCI_ROM_SIG 0x00 #define PCI_ROM_SIG_MAGIC 0x52494350 /* "PCIR", endian */ /* reversed */ #define PCI_ROM_VENDOR 0x04 #define PCI_ROM_DEVICE 0x06 #define PCI_ROM_PTR_VPD 0x08 #define PCI_VPDRES_BYTE0 0x00 #define PCI_VPDRES_ISLARGE(x) ((x) & 0x80) #define PCI_VPDRES_LARGE_NAME(x) ((x) & 0x7f) #define PCI_VPDRES_LARGE_LEN_LSB 0x01 #define PCI_VPDRES_LARGE_LEN_MSB 0x02 #define PCI_VPDRES_LARGE_SIZE 0x03 #define PCI_VPDRES_TYPE_ID_STRING 0x02 /* large */ #define PCI_VPDRES_TYPE_VPD 0x10 /* large */ #define PCI_VPD_KEY0 0x00 #define PCI_VPD_KEY1 0x01 #define PCI_VPD_LEN 0x02 #define PCI_VPD_SIZE 0x03 #define CAS_ROM_READ_1(sc, offs) \ CAS_READ_1((sc), CAS_PCI_ROM_OFFSET + (offs)) #define CAS_ROM_READ_2(sc, offs) \ CAS_READ_2((sc), CAS_PCI_ROM_OFFSET + (offs)) #define CAS_ROM_READ_4(sc, offs) \ CAS_READ_4((sc), CAS_PCI_ROM_OFFSET + (offs)) lma = phy = 0; memset(enaddr, 0, sizeof(enaddr)); memset(pcs, 0, sizeof(pcs)); /* Enable PCI Expansion ROM access. */ CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, CAS_BIM_LDEV_OEN_PAD | CAS_BIM_LDEV_OEN_PROM); /* Read PCI Expansion ROM header. */ if (CAS_ROM_READ_2(sc, PCI_ROMHDR_SIG) != PCI_ROMHDR_SIG_MAGIC || (i = CAS_ROM_READ_2(sc, PCI_ROMHDR_PTR_DATA)) < PCI_ROMHDR_SIZE) { device_printf(dev, "unexpected PCI Expansion ROM header\n"); goto fail_prom; } /* Read PCI Expansion ROM data. */ if (CAS_ROM_READ_4(sc, i + PCI_ROM_SIG) != PCI_ROM_SIG_MAGIC || CAS_ROM_READ_2(sc, i + PCI_ROM_VENDOR) != pci_get_vendor(dev) || CAS_ROM_READ_2(sc, i + PCI_ROM_DEVICE) != pci_get_device(dev) || (j = CAS_ROM_READ_2(sc, i + PCI_ROM_PTR_VPD)) < i + PCI_ROM_SIZE) { device_printf(dev, "unexpected PCI Expansion ROM data\n"); goto fail_prom; } /* Read PCI VPD. */ next: if (PCI_VPDRES_ISLARGE(CAS_ROM_READ_1(sc, j + PCI_VPDRES_BYTE0)) == 0) { device_printf(dev, "no large PCI VPD\n"); goto fail_prom; } i = (CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_MSB) << 8) | CAS_ROM_READ_1(sc, j + PCI_VPDRES_LARGE_LEN_LSB); switch (PCI_VPDRES_LARGE_NAME(CAS_ROM_READ_1(sc, j + PCI_VPDRES_BYTE0))) { case PCI_VPDRES_TYPE_ID_STRING: /* Skip identifier string. */ j += PCI_VPDRES_LARGE_SIZE + i; goto next; case PCI_VPDRES_TYPE_VPD: for (j += PCI_VPDRES_LARGE_SIZE; i > 0; i -= PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN), j += PCI_VPD_SIZE + CAS_ROM_READ_1(sc, j + PCI_VPD_LEN)) { if (CAS_ROM_READ_1(sc, j + PCI_VPD_KEY0) != 'Z') /* no Enhanced VPD */ continue; if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE) != 'I') /* no instance property */ continue; if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'B') { /* byte array */ if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 4) != ETHER_ADDR_LEN) continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, buf, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_LOCAL_MAC_ADDRESS) != 0) continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5 + sizeof(CAS_LOCAL_MAC_ADDRESS), enaddr[lma], sizeof(enaddr[lma])); lma++; if (lma == 4 && phy == 4) break; } else if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 3) == 'S') { /* string */ if (CAS_ROM_READ_1(sc, j + PCI_VPD_SIZE + 4) != sizeof(CAS_PHY_TYPE_PCS)) continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5, buf, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_PHY_INTERFACE) == 0) k = sizeof(CAS_PHY_INTERFACE); else if (strcmp(buf, CAS_PHY_TYPE) == 0) k = sizeof(CAS_PHY_TYPE); else continue; bus_read_region_1(sc->sc_res[CAS_RES_MEM], CAS_PCI_ROM_OFFSET + j + PCI_VPD_SIZE + 5 + k, buf, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; if (strcmp(buf, CAS_PHY_TYPE_PCS) == 0) pcs[phy] = 1; phy++; if (lma == 4 && phy == 4) break; } } break; default: device_printf(dev, "unexpected PCI VPD\n"); goto fail_prom; } fail_prom: CAS_WRITE_4(sc, CAS_BIM_LDEV_OEN, 0); if (lma == 0) { device_printf(dev, "could not determine Ethernet address\n"); goto fail; } i = 0; if (lma > 1 && pci_get_slot(dev) < sizeof(enaddr) / sizeof(*enaddr)) i = pci_get_slot(dev); memcpy(sc->sc_enaddr, enaddr[i], ETHER_ADDR_LEN); if (phy == 0) { device_printf(dev, "could not determine PHY type\n"); goto fail; } i = 0; if (phy > 1 && pci_get_slot(dev) < sizeof(pcs) / sizeof(*pcs)) i = pci_get_slot(dev); if (pcs[i] != 0) sc->sc_flags |= CAS_SERDES; #endif if (cas_attach(sc) != 0) { device_printf(dev, "could not be attached\n"); goto fail; } if (bus_setup_intr(dev, sc->sc_res[CAS_RES_INTR], INTR_TYPE_NET | INTR_MPSAFE, cas_intr, NULL, sc, &sc->sc_ih) != 0) { device_printf(dev, "failed to set up interrupt\n"); cas_detach(sc); goto fail; } return (0); fail: CAS_LOCK_DESTROY(sc); bus_release_resources(dev, cas_pci_res_spec, sc->sc_res); return (ENXIO); } static int cas_pci_detach(device_t dev) { struct cas_softc *sc; sc = device_get_softc(dev); bus_teardown_intr(dev, sc->sc_res[CAS_RES_INTR], sc->sc_ih); cas_detach(sc); CAS_LOCK_DESTROY(sc); bus_release_resources(dev, cas_pci_res_spec, sc->sc_res); return (0); } static int cas_pci_suspend(device_t dev) { cas_suspend(device_get_softc(dev)); return (0); } static int cas_pci_resume(device_t dev) { cas_resume(device_get_softc(dev)); return (0); } Index: projects/binutils-2.17/sys/dev/e1000/e1000_api.c =================================================================== --- projects/binutils-2.17/sys/dev/e1000/e1000_api.c (revision 215829) +++ projects/binutils-2.17/sys/dev/e1000/e1000_api.c (revision 215830) @@ -1,1334 +1,1336 @@ /****************************************************************************** Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" /** * e1000_init_mac_params - Initialize MAC function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the MAC * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_mac_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->mac.ops.init_params) { ret_val = hw->mac.ops.init_params(hw); if (ret_val) { DEBUGOUT("MAC Initialization Error\n"); goto out; } } else { DEBUGOUT("mac.init_mac_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_nvm_params - Initialize NVM function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the NVM * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_nvm_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->nvm.ops.init_params) { ret_val = hw->nvm.ops.init_params(hw); if (ret_val) { DEBUGOUT("NVM Initialization Error\n"); goto out; } } else { DEBUGOUT("nvm.init_nvm_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_phy_params - Initialize PHY function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the PHY * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_phy_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->phy.ops.init_params) { ret_val = hw->phy.ops.init_params(hw); if (ret_val) { DEBUGOUT("PHY Initialization Error\n"); goto out; } } else { DEBUGOUT("phy.init_phy_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_mbx_params - Initialize mailbox function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the PHY * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_mbx_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->mbx.ops.init_params) { ret_val = hw->mbx.ops.init_params(hw); if (ret_val) { DEBUGOUT("Mailbox Initialization Error\n"); goto out; } } else { DEBUGOUT("mbx.init_mbx_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_set_mac_type - Sets MAC type * @hw: pointer to the HW structure * * This function sets the mac type of the adapter based on the * device ID stored in the hw structure. * MUST BE FIRST FUNCTION CALLED (explicitly or through * e1000_setup_init_funcs()). **/ s32 e1000_set_mac_type(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_set_mac_type"); switch (hw->device_id) { case E1000_DEV_ID_82542: mac->type = e1000_82542; break; case E1000_DEV_ID_82543GC_FIBER: case E1000_DEV_ID_82543GC_COPPER: mac->type = e1000_82543; break; case E1000_DEV_ID_82544EI_COPPER: case E1000_DEV_ID_82544EI_FIBER: case E1000_DEV_ID_82544GC_COPPER: case E1000_DEV_ID_82544GC_LOM: mac->type = e1000_82544; break; case E1000_DEV_ID_82540EM: case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82540EP: case E1000_DEV_ID_82540EP_LOM: case E1000_DEV_ID_82540EP_LP: mac->type = e1000_82540; break; case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82545EM_FIBER: mac->type = e1000_82545; break; case E1000_DEV_ID_82545GM_COPPER: case E1000_DEV_ID_82545GM_FIBER: case E1000_DEV_ID_82545GM_SERDES: mac->type = e1000_82545_rev_3; break; case E1000_DEV_ID_82546EB_COPPER: case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546EB_QUAD_COPPER: mac->type = e1000_82546; break; case E1000_DEV_ID_82546GB_COPPER: case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82546GB_SERDES: case E1000_DEV_ID_82546GB_PCIE: case E1000_DEV_ID_82546GB_QUAD_COPPER: case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: mac->type = e1000_82546_rev_3; break; case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EI_MOBILE: case E1000_DEV_ID_82541ER_LOM: mac->type = e1000_82541; break; case E1000_DEV_ID_82541ER: case E1000_DEV_ID_82541GI: case E1000_DEV_ID_82541GI_LF: case E1000_DEV_ID_82541GI_MOBILE: mac->type = e1000_82541_rev_2; break; case E1000_DEV_ID_82547EI: case E1000_DEV_ID_82547EI_MOBILE: mac->type = e1000_82547; break; case E1000_DEV_ID_82547GI: mac->type = e1000_82547_rev_2; break; case E1000_DEV_ID_82571EB_COPPER: case E1000_DEV_ID_82571EB_FIBER: case E1000_DEV_ID_82571EB_SERDES: case E1000_DEV_ID_82571EB_SERDES_DUAL: case E1000_DEV_ID_82571EB_SERDES_QUAD: case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571PT_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: mac->type = e1000_82571; break; case E1000_DEV_ID_82572EI: case E1000_DEV_ID_82572EI_COPPER: case E1000_DEV_ID_82572EI_FIBER: case E1000_DEV_ID_82572EI_SERDES: mac->type = e1000_82572; break; case E1000_DEV_ID_82573E: case E1000_DEV_ID_82573E_IAMT: case E1000_DEV_ID_82573L: mac->type = e1000_82573; break; case E1000_DEV_ID_82574L: case E1000_DEV_ID_82574LA: mac->type = e1000_82574; break; case E1000_DEV_ID_82583V: mac->type = e1000_82583; break; case E1000_DEV_ID_80003ES2LAN_COPPER_DPT: case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: case E1000_DEV_ID_80003ES2LAN_COPPER_SPT: case E1000_DEV_ID_80003ES2LAN_SERDES_SPT: mac->type = e1000_80003es2lan; break; case E1000_DEV_ID_ICH8_IFE: case E1000_DEV_ID_ICH8_IFE_GT: case E1000_DEV_ID_ICH8_IFE_G: case E1000_DEV_ID_ICH8_IGP_M: case E1000_DEV_ID_ICH8_IGP_M_AMT: case E1000_DEV_ID_ICH8_IGP_AMT: case E1000_DEV_ID_ICH8_IGP_C: case E1000_DEV_ID_ICH8_82567V_3: mac->type = e1000_ich8lan; break; case E1000_DEV_ID_ICH9_IFE: case E1000_DEV_ID_ICH9_IFE_GT: case E1000_DEV_ID_ICH9_IFE_G: case E1000_DEV_ID_ICH9_IGP_M: case E1000_DEV_ID_ICH9_IGP_M_AMT: case E1000_DEV_ID_ICH9_IGP_M_V: case E1000_DEV_ID_ICH9_IGP_AMT: case E1000_DEV_ID_ICH9_BM: case E1000_DEV_ID_ICH9_IGP_C: case E1000_DEV_ID_ICH10_R_BM_LM: case E1000_DEV_ID_ICH10_R_BM_LF: case E1000_DEV_ID_ICH10_R_BM_V: mac->type = e1000_ich9lan; break; case E1000_DEV_ID_ICH10_D_BM_LM: case E1000_DEV_ID_ICH10_D_BM_LF: case E1000_DEV_ID_ICH10_D_BM_V: case E1000_DEV_ID_ICH10_HANKSVILLE: mac->type = e1000_ich10lan; break; case E1000_DEV_ID_PCH_D_HV_DM: case E1000_DEV_ID_PCH_D_HV_DC: case E1000_DEV_ID_PCH_M_HV_LM: case E1000_DEV_ID_PCH_M_HV_LC: mac->type = e1000_pchlan; break; case E1000_DEV_ID_PCH2_LV_LM: case E1000_DEV_ID_PCH2_LV_V: mac->type = e1000_pch2lan; break; case E1000_DEV_ID_82575EB_COPPER: case E1000_DEV_ID_82575EB_FIBER_SERDES: case E1000_DEV_ID_82575GB_QUAD_COPPER: case E1000_DEV_ID_82575GB_QUAD_COPPER_PM: mac->type = e1000_82575; break; case E1000_DEV_ID_82576: case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: case E1000_DEV_ID_82576_QUAD_COPPER: case E1000_DEV_ID_82576_QUAD_COPPER_ET2: case E1000_DEV_ID_82576_NS: case E1000_DEV_ID_82576_NS_SERDES: case E1000_DEV_ID_82576_SERDES_QUAD: mac->type = e1000_82576; break; case E1000_DEV_ID_82580_COPPER: case E1000_DEV_ID_82580_FIBER: case E1000_DEV_ID_82580_SERDES: case E1000_DEV_ID_82580_SGMII: case E1000_DEV_ID_82580_COPPER_DUAL: case E1000_DEV_ID_82580_QUAD_FIBER: + case E1000_DEV_ID_DH89XXCC_SGMII: + case E1000_DEV_ID_DH89XXCC_SERDES: mac->type = e1000_82580; break; case E1000_DEV_ID_82576_VF: mac->type = e1000_vfadapt; break; default: /* Should never have loaded on this device */ ret_val = -E1000_ERR_MAC_INIT; break; } return ret_val; } /** * e1000_setup_init_funcs - Initializes function pointers * @hw: pointer to the HW structure * @init_device: TRUE will initialize the rest of the function pointers * getting the device ready for use. FALSE will only set * MAC type and the function pointers for the other init * functions. Passing FALSE will not generate any hardware * reads or writes. * * This function must be called by a driver in order to use the rest * of the 'shared' code files. Called by drivers only. **/ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) { s32 ret_val; /* Can't do much good without knowing the MAC type. */ ret_val = e1000_set_mac_type(hw); if (ret_val) { DEBUGOUT("ERROR: MAC type could not be set properly.\n"); goto out; } if (!hw->hw_addr) { DEBUGOUT("ERROR: Registers not mapped\n"); ret_val = -E1000_ERR_CONFIG; goto out; } /* * Init function pointers to generic implementations. We do this first * allowing a driver module to override it afterward. */ e1000_init_mac_ops_generic(hw); e1000_init_phy_ops_generic(hw); e1000_init_nvm_ops_generic(hw); e1000_init_mbx_ops_generic(hw); /* * Set up the init function pointers. These are functions within the * adapter family file that sets up function pointers for the rest of * the functions in that family. */ switch (hw->mac.type) { case e1000_82542: e1000_init_function_pointers_82542(hw); break; case e1000_82543: case e1000_82544: e1000_init_function_pointers_82543(hw); break; case e1000_82540: case e1000_82545: case e1000_82545_rev_3: case e1000_82546: case e1000_82546_rev_3: e1000_init_function_pointers_82540(hw); break; case e1000_82541: case e1000_82541_rev_2: case e1000_82547: case e1000_82547_rev_2: e1000_init_function_pointers_82541(hw); break; case e1000_82571: case e1000_82572: case e1000_82573: case e1000_82574: case e1000_82583: e1000_init_function_pointers_82571(hw); break; case e1000_80003es2lan: e1000_init_function_pointers_80003es2lan(hw); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: e1000_init_function_pointers_ich8lan(hw); break; case e1000_82575: case e1000_82576: case e1000_82580: e1000_init_function_pointers_82575(hw); break; case e1000_vfadapt: e1000_init_function_pointers_vf(hw); break; default: DEBUGOUT("Hardware not supported\n"); ret_val = -E1000_ERR_CONFIG; break; } /* * Initialize the rest of the function pointers. These require some * register reads/writes in some cases. */ if (!(ret_val) && init_device) { ret_val = e1000_init_mac_params(hw); if (ret_val) goto out; ret_val = e1000_init_nvm_params(hw); if (ret_val) goto out; ret_val = e1000_init_phy_params(hw); if (ret_val) goto out; ret_val = e1000_init_mbx_params(hw); if (ret_val) goto out; } out: return ret_val; } /** * e1000_get_bus_info - Obtain bus information for adapter * @hw: pointer to the HW structure * * This will obtain information about the HW bus for which the * adapter is attached and stores it in the hw structure. This is a * function pointer entry point called by drivers. **/ s32 e1000_get_bus_info(struct e1000_hw *hw) { if (hw->mac.ops.get_bus_info) return hw->mac.ops.get_bus_info(hw); return E1000_SUCCESS; } /** * e1000_clear_vfta - Clear VLAN filter table * @hw: pointer to the HW structure * * This clears the VLAN filter table on the adapter. This is a function * pointer entry point called by drivers. **/ void e1000_clear_vfta(struct e1000_hw *hw) { if (hw->mac.ops.clear_vfta) hw->mac.ops.clear_vfta(hw); } /** * e1000_write_vfta - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: the 32-bit offset in which to write the value to. * @value: the 32-bit value to write at location offset. * * This writes a 32-bit value to a 32-bit offset in the VLAN filter * table. This is a function pointer entry point called by drivers. **/ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { if (hw->mac.ops.write_vfta) hw->mac.ops.write_vfta(hw, offset, value); } /** * e1000_update_mc_addr_list - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates the Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. **/ void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { if (hw->mac.ops.update_mc_addr_list) hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count); } /** * e1000_force_mac_fc - Force MAC flow control * @hw: pointer to the HW structure * * Force the MAC's flow control settings. Currently no func pointer exists * and all implementations are handled in the generic version of this * function. **/ s32 e1000_force_mac_fc(struct e1000_hw *hw) { return e1000_force_mac_fc_generic(hw); } /** * e1000_check_for_link - Check/Store link connection * @hw: pointer to the HW structure * * This checks the link condition of the adapter and stores the * results in the hw->mac structure. This is a function pointer entry * point called by drivers. **/ s32 e1000_check_for_link(struct e1000_hw *hw) { if (hw->mac.ops.check_for_link) return hw->mac.ops.check_for_link(hw); return -E1000_ERR_CONFIG; } /** * e1000_check_mng_mode - Check management mode * @hw: pointer to the HW structure * * This checks if the adapter has manageability enabled. * This is a function pointer entry point called by drivers. **/ bool e1000_check_mng_mode(struct e1000_hw *hw) { if (hw->mac.ops.check_mng_mode) return hw->mac.ops.check_mng_mode(hw); return FALSE; } /** * e1000_mng_write_dhcp_info - Writes DHCP info to host interface * @hw: pointer to the HW structure * @buffer: pointer to the host interface * @length: size of the buffer * * Writes the DHCP information to the host interface. **/ s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) { return e1000_mng_write_dhcp_info_generic(hw, buffer, length); } /** * e1000_reset_hw - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. This is a function pointer * entry point called by drivers. **/ s32 e1000_reset_hw(struct e1000_hw *hw) { if (hw->mac.ops.reset_hw) return hw->mac.ops.reset_hw(hw); return -E1000_ERR_CONFIG; } /** * e1000_init_hw - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. This is a function * pointer entry point called by drivers. **/ s32 e1000_init_hw(struct e1000_hw *hw) { if (hw->mac.ops.init_hw) return hw->mac.ops.init_hw(hw); return -E1000_ERR_CONFIG; } /** * e1000_setup_link - Configures link and flow control * @hw: pointer to the HW structure * * This configures link and flow control settings for the adapter. This * is a function pointer entry point called by drivers. While modules can * also call this, they probably call their own version of this function. **/ s32 e1000_setup_link(struct e1000_hw *hw) { if (hw->mac.ops.setup_link) return hw->mac.ops.setup_link(hw); return -E1000_ERR_CONFIG; } /** * e1000_get_speed_and_duplex - Returns current speed and duplex * @hw: pointer to the HW structure * @speed: pointer to a 16-bit value to store the speed * @duplex: pointer to a 16-bit value to store the duplex. * * This returns the speed and duplex of the adapter in the two 'out' * variables passed in. This is a function pointer entry point called * by drivers. **/ s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) { if (hw->mac.ops.get_link_up_info) return hw->mac.ops.get_link_up_info(hw, speed, duplex); return -E1000_ERR_CONFIG; } /** * e1000_setup_led - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use and saves the current state * of the LED so it can be later restored. This is a function pointer entry * point called by drivers. **/ s32 e1000_setup_led(struct e1000_hw *hw) { if (hw->mac.ops.setup_led) return hw->mac.ops.setup_led(hw); return E1000_SUCCESS; } /** * e1000_cleanup_led - Restores SW controllable LED * @hw: pointer to the HW structure * * This restores the SW controllable LED to the value saved off by * e1000_setup_led. This is a function pointer entry point called by drivers. **/ s32 e1000_cleanup_led(struct e1000_hw *hw) { if (hw->mac.ops.cleanup_led) return hw->mac.ops.cleanup_led(hw); return E1000_SUCCESS; } /** * e1000_blink_led - Blink SW controllable LED * @hw: pointer to the HW structure * * This starts the adapter LED blinking. Request the LED to be setup first * and cleaned up after. This is a function pointer entry point called by * drivers. **/ s32 e1000_blink_led(struct e1000_hw *hw) { if (hw->mac.ops.blink_led) return hw->mac.ops.blink_led(hw); return E1000_SUCCESS; } /** * e1000_id_led_init - store LED configurations in SW * @hw: pointer to the HW structure * * Initializes the LED config in SW. This is a function pointer entry point * called by drivers. **/ s32 e1000_id_led_init(struct e1000_hw *hw) { if (hw->mac.ops.id_led_init) return hw->mac.ops.id_led_init(hw); return E1000_SUCCESS; } /** * e1000_led_on - Turn on SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED on. This is a function pointer entry point * called by drivers. **/ s32 e1000_led_on(struct e1000_hw *hw) { if (hw->mac.ops.led_on) return hw->mac.ops.led_on(hw); return E1000_SUCCESS; } /** * e1000_led_off - Turn off SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED off. This is a function pointer entry point * called by drivers. **/ s32 e1000_led_off(struct e1000_hw *hw) { if (hw->mac.ops.led_off) return hw->mac.ops.led_off(hw); return E1000_SUCCESS; } /** * e1000_reset_adaptive - Reset adaptive IFS * @hw: pointer to the HW structure * * Resets the adaptive IFS. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ void e1000_reset_adaptive(struct e1000_hw *hw) { e1000_reset_adaptive_generic(hw); } /** * e1000_update_adaptive - Update adaptive IFS * @hw: pointer to the HW structure * * Updates adapter IFS. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ void e1000_update_adaptive(struct e1000_hw *hw) { e1000_update_adaptive_generic(hw); } /** * e1000_disable_pcie_master - Disable PCI-Express master access * @hw: pointer to the HW structure * * Disables PCI-Express master access and verifies there are no pending * requests. Currently no func pointer exists and all implementations are * handled in the generic version of this function. **/ s32 e1000_disable_pcie_master(struct e1000_hw *hw) { return e1000_disable_pcie_master_generic(hw); } /** * e1000_config_collision_dist - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ void e1000_config_collision_dist(struct e1000_hw *hw) { if (hw->mac.ops.config_collision_dist) hw->mac.ops.config_collision_dist(hw); } /** * e1000_rar_set - Sets a receive address register * @hw: pointer to the HW structure * @addr: address to set the RAR to * @index: the RAR to set * * Sets a Receive Address Register (RAR) to the specified address. **/ void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) { if (hw->mac.ops.rar_set) hw->mac.ops.rar_set(hw, addr, index); } /** * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state * @hw: pointer to the HW structure * * Ensures that the MDI/MDIX SW state is valid. **/ s32 e1000_validate_mdi_setting(struct e1000_hw *hw) { if (hw->mac.ops.validate_mdi_setting) return hw->mac.ops.validate_mdi_setting(hw); return E1000_SUCCESS; } /** * e1000_hash_mc_addr - Determines address location in multicast table * @hw: pointer to the HW structure * @mc_addr: Multicast address to hash. * * This hashes an address to determine its location in the multicast * table. Currently no func pointer exists and all implementations * are handled in the generic version of this function. **/ u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) { return e1000_hash_mc_addr_generic(hw, mc_addr); } /** * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX * @hw: pointer to the HW structure * * Enables packet filtering on transmit packets if manageability is enabled * and host interface is enabled. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) { return e1000_enable_tx_pkt_filtering_generic(hw); } /** * e1000_mng_host_if_write - Writes to the manageability host interface * @hw: pointer to the HW structure * @buffer: pointer to the host interface buffer * @length: size of the buffer * @offset: location in the buffer to write to * @sum: sum of the data (not checksum) * * This function writes the buffer content at the offset given on the host if. * It also does alignment considerations to do the writes in most efficient * way. Also fills up the sum of the buffer in *buffer parameter. **/ s32 e1000_mng_host_if_write(struct e1000_hw * hw, u8 *buffer, u16 length, u16 offset, u8 *sum) { if (hw->mac.ops.mng_host_if_write) return hw->mac.ops.mng_host_if_write(hw, buffer, length, offset, sum); return E1000_NOT_IMPLEMENTED; } /** * e1000_mng_write_cmd_header - Writes manageability command header * @hw: pointer to the HW structure * @hdr: pointer to the host interface command header * * Writes the command header after does the checksum calculation. **/ s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, struct e1000_host_mng_command_header *hdr) { if (hw->mac.ops.mng_write_cmd_header) return hw->mac.ops.mng_write_cmd_header(hw, hdr); return E1000_NOT_IMPLEMENTED; } /** * e1000_mng_enable_host_if - Checks host interface is enabled * @hw: pointer to the HW structure * * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND * * This function checks whether the HOST IF is enabled for command operation * and also checks whether the previous command is completed. It busy waits * in case of previous command is not completed. **/ s32 e1000_mng_enable_host_if(struct e1000_hw * hw) { if (hw->mac.ops.mng_enable_host_if) return hw->mac.ops.mng_enable_host_if(hw); return E1000_NOT_IMPLEMENTED; } /** * e1000_wait_autoneg - Waits for autonegotiation completion * @hw: pointer to the HW structure * * Waits for autoneg to complete. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ s32 e1000_wait_autoneg(struct e1000_hw *hw) { if (hw->mac.ops.wait_autoneg) return hw->mac.ops.wait_autoneg(hw); return E1000_SUCCESS; } /** * e1000_check_reset_block - Verifies PHY can be reset * @hw: pointer to the HW structure * * Checks if the PHY is in a state that can be reset or if manageability * has it tied up. This is a function pointer entry point called by drivers. **/ s32 e1000_check_reset_block(struct e1000_hw *hw) { if (hw->phy.ops.check_reset_block) return hw->phy.ops.check_reset_block(hw); return E1000_SUCCESS; } /** * e1000_read_phy_reg - Reads PHY register * @hw: pointer to the HW structure * @offset: the register to read * @data: the buffer to store the 16-bit read. * * Reads the PHY register and returns the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) { if (hw->phy.ops.read_reg) return hw->phy.ops.read_reg(hw, offset, data); return E1000_SUCCESS; } /** * e1000_write_phy_reg - Writes PHY register * @hw: pointer to the HW structure * @offset: the register to write * @data: the value to write. * * Writes the PHY register at offset with the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) { if (hw->phy.ops.write_reg) return hw->phy.ops.write_reg(hw, offset, data); return E1000_SUCCESS; } /** * e1000_release_phy - Generic release PHY * @hw: pointer to the HW structure * * Return if silicon family does not require a semaphore when accessing the * PHY. **/ void e1000_release_phy(struct e1000_hw *hw) { if (hw->phy.ops.release) hw->phy.ops.release(hw); } /** * e1000_acquire_phy - Generic acquire PHY * @hw: pointer to the HW structure * * Return success if silicon family does not require a semaphore when * accessing the PHY. **/ s32 e1000_acquire_phy(struct e1000_hw *hw) { if (hw->phy.ops.acquire) return hw->phy.ops.acquire(hw); return E1000_SUCCESS; } /** * e1000_cfg_on_link_up - Configure PHY upon link up * @hw: pointer to the HW structure **/ s32 e1000_cfg_on_link_up(struct e1000_hw *hw) { if (hw->phy.ops.cfg_on_link_up) return hw->phy.ops.cfg_on_link_up(hw); return E1000_SUCCESS; } /** * e1000_read_kmrn_reg - Reads register using Kumeran interface * @hw: pointer to the HW structure * @offset: the register to read * @data: the location to store the 16-bit value read. * * Reads a register out of the Kumeran interface. Currently no func pointer * exists and all implementations are handled in the generic version of * this function. **/ s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) { return e1000_read_kmrn_reg_generic(hw, offset, data); } /** * e1000_write_kmrn_reg - Writes register using Kumeran interface * @hw: pointer to the HW structure * @offset: the register to write * @data: the value to write. * * Writes a register to the Kumeran interface. Currently no func pointer * exists and all implementations are handled in the generic version of * this function. **/ s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) { return e1000_write_kmrn_reg_generic(hw, offset, data); } /** * e1000_get_cable_length - Retrieves cable length estimation * @hw: pointer to the HW structure * * This function estimates the cable length and stores them in * hw->phy.min_length and hw->phy.max_length. This is a function pointer * entry point called by drivers. **/ s32 e1000_get_cable_length(struct e1000_hw *hw) { if (hw->phy.ops.get_cable_length) return hw->phy.ops.get_cable_length(hw); return E1000_SUCCESS; } /** * e1000_get_phy_info - Retrieves PHY information from registers * @hw: pointer to the HW structure * * This function gets some information from various PHY registers and * populates hw->phy values with it. This is a function pointer entry * point called by drivers. **/ s32 e1000_get_phy_info(struct e1000_hw *hw) { if (hw->phy.ops.get_info) return hw->phy.ops.get_info(hw); return E1000_SUCCESS; } /** * e1000_phy_hw_reset - Hard PHY reset * @hw: pointer to the HW structure * * Performs a hard PHY reset. This is a function pointer entry point called * by drivers. **/ s32 e1000_phy_hw_reset(struct e1000_hw *hw) { if (hw->phy.ops.reset) return hw->phy.ops.reset(hw); return E1000_SUCCESS; } /** * e1000_phy_commit - Soft PHY reset * @hw: pointer to the HW structure * * Performs a soft PHY reset on those that apply. This is a function pointer * entry point called by drivers. **/ s32 e1000_phy_commit(struct e1000_hw *hw) { if (hw->phy.ops.commit) return hw->phy.ops.commit(hw); return E1000_SUCCESS; } /** * e1000_set_d0_lplu_state - Sets low power link up state for D0 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D0 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D0 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. This is a function pointer entry point called by drivers. **/ s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) { if (hw->phy.ops.set_d0_lplu_state) return hw->phy.ops.set_d0_lplu_state(hw, active); return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. This is a function pointer entry point called by drivers. **/ s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) { if (hw->phy.ops.set_d3_lplu_state) return hw->phy.ops.set_d3_lplu_state(hw, active); return E1000_SUCCESS; } /** * e1000_read_mac_addr - Reads MAC address * @hw: pointer to the HW structure * * Reads the MAC address out of the adapter and stores it in the HW structure. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_mac_addr(struct e1000_hw *hw) { if (hw->mac.ops.read_mac_addr) return hw->mac.ops.read_mac_addr(hw); return e1000_read_mac_addr_generic(hw); } /** * e1000_read_pba_string - Read device part number string * @hw: pointer to the HW structure * @pba_num: pointer to device part number * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number from the EEPROM and stores * the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) { return e1000_read_pba_string_generic(hw, pba_num, pba_num_size); } /** * e1000_read_pba_length - Read device part number string length * @hw: pointer to the HW structure * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number length from the EEPROM and * stores the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size) { return e1000_read_pba_length_generic(hw, pba_num_size); } /** * e1000_read_pba_num - Read device part number * @hw: pointer to the HW structure * @pba_num: pointer to device part number * * Reads the product board assembly (PBA) number from the EEPROM and stores * the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *pba_num) { return e1000_read_pba_num_generic(hw, pba_num); } /** * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum * @hw: pointer to the HW structure * * Validates the NVM checksum is correct. This is a function pointer entry * point called by drivers. **/ s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) { if (hw->nvm.ops.validate) return hw->nvm.ops.validate(hw); return -E1000_ERR_CONFIG; } /** * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum * @hw: pointer to the HW structure * * Updates the NVM checksum. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ s32 e1000_update_nvm_checksum(struct e1000_hw *hw) { if (hw->nvm.ops.update) return hw->nvm.ops.update(hw); return -E1000_ERR_CONFIG; } /** * e1000_reload_nvm - Reloads EEPROM * @hw: pointer to the HW structure * * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the * extended control register. **/ void e1000_reload_nvm(struct e1000_hw *hw) { if (hw->nvm.ops.reload) hw->nvm.ops.reload(hw); } /** * e1000_read_nvm - Reads NVM (EEPROM) * @hw: pointer to the HW structure * @offset: the word offset to read * @words: number of 16-bit words to read * @data: pointer to the properly sized buffer for the data. * * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function * pointer entry point called by drivers. **/ s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { if (hw->nvm.ops.read) return hw->nvm.ops.read(hw, offset, words, data); return -E1000_ERR_CONFIG; } /** * e1000_write_nvm - Writes to NVM (EEPROM) * @hw: pointer to the HW structure * @offset: the word offset to read * @words: number of 16-bit words to write * @data: pointer to the properly sized buffer for the data. * * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function * pointer entry point called by drivers. **/ s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { if (hw->nvm.ops.write) return hw->nvm.ops.write(hw, offset, words, data); return E1000_SUCCESS; } /** * e1000_write_8bit_ctrl_reg - Writes 8bit Control register * @hw: pointer to the HW structure * @reg: 32bit register offset * @offset: the register to write * @data: the value to write. * * Writes the PHY register at offset with the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, u8 data) { return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); } /** * e1000_power_up_phy - Restores link in case of PHY power down * @hw: pointer to the HW structure * * The phy may be powered down to save power, to turn off link when the * driver is unloaded, or wake on lan is not enabled (among others). **/ void e1000_power_up_phy(struct e1000_hw *hw) { if (hw->phy.ops.power_up) hw->phy.ops.power_up(hw); e1000_setup_link(hw); } /** * e1000_power_down_phy - Power down PHY * @hw: pointer to the HW structure * * The phy may be powered down to save power, to turn off link when the * driver is unloaded, or wake on lan is not enabled (among others). **/ void e1000_power_down_phy(struct e1000_hw *hw) { if (hw->phy.ops.power_down) hw->phy.ops.power_down(hw); } /** * e1000_power_up_fiber_serdes_link - Power up serdes link * @hw: pointer to the HW structure * * Power on the optics and PCS. **/ void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw) { if (hw->mac.ops.power_up_serdes) hw->mac.ops.power_up_serdes(hw); } /** * e1000_shutdown_fiber_serdes_link - Remove link during power down * @hw: pointer to the HW structure * * Shutdown the optics and PCS on driver unload. **/ void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw) { if (hw->mac.ops.shutdown_serdes) hw->mac.ops.shutdown_serdes(hw); } Index: projects/binutils-2.17/sys/dev/e1000/e1000_hw.h =================================================================== --- projects/binutils-2.17/sys/dev/e1000/e1000_hw.h (revision 215829) +++ projects/binutils-2.17/sys/dev/e1000/e1000_hw.h (revision 215830) @@ -1,943 +1,945 @@ /****************************************************************************** Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_HW_H_ #define _E1000_HW_H_ #include "e1000_osdep.h" #include "e1000_regs.h" #include "e1000_defines.h" struct e1000_hw; #define E1000_DEV_ID_82542 0x1000 #define E1000_DEV_ID_82543GC_FIBER 0x1001 #define E1000_DEV_ID_82543GC_COPPER 0x1004 #define E1000_DEV_ID_82544EI_COPPER 0x1008 #define E1000_DEV_ID_82544EI_FIBER 0x1009 #define E1000_DEV_ID_82544GC_COPPER 0x100C #define E1000_DEV_ID_82544GC_LOM 0x100D #define E1000_DEV_ID_82540EM 0x100E #define E1000_DEV_ID_82540EM_LOM 0x1015 #define E1000_DEV_ID_82540EP_LOM 0x1016 #define E1000_DEV_ID_82540EP 0x1017 #define E1000_DEV_ID_82540EP_LP 0x101E #define E1000_DEV_ID_82545EM_COPPER 0x100F #define E1000_DEV_ID_82545EM_FIBER 0x1011 #define E1000_DEV_ID_82545GM_COPPER 0x1026 #define E1000_DEV_ID_82545GM_FIBER 0x1027 #define E1000_DEV_ID_82545GM_SERDES 0x1028 #define E1000_DEV_ID_82546EB_COPPER 0x1010 #define E1000_DEV_ID_82546EB_FIBER 0x1012 #define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D #define E1000_DEV_ID_82546GB_COPPER 0x1079 #define E1000_DEV_ID_82546GB_FIBER 0x107A #define E1000_DEV_ID_82546GB_SERDES 0x107B #define E1000_DEV_ID_82546GB_PCIE 0x108A #define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 #define E1000_DEV_ID_82541EI 0x1013 #define E1000_DEV_ID_82541EI_MOBILE 0x1018 #define E1000_DEV_ID_82541ER_LOM 0x1014 #define E1000_DEV_ID_82541ER 0x1078 #define E1000_DEV_ID_82541GI 0x1076 #define E1000_DEV_ID_82541GI_LF 0x107C #define E1000_DEV_ID_82541GI_MOBILE 0x1077 #define E1000_DEV_ID_82547EI 0x1019 #define E1000_DEV_ID_82547EI_MOBILE 0x101A #define E1000_DEV_ID_82547GI 0x1075 #define E1000_DEV_ID_82571EB_COPPER 0x105E #define E1000_DEV_ID_82571EB_FIBER 0x105F #define E1000_DEV_ID_82571EB_SERDES 0x1060 #define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 #define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA #define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 #define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 #define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 #define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC #define E1000_DEV_ID_82572EI_COPPER 0x107D #define E1000_DEV_ID_82572EI_FIBER 0x107E #define E1000_DEV_ID_82572EI_SERDES 0x107F #define E1000_DEV_ID_82572EI 0x10B9 #define E1000_DEV_ID_82573E 0x108B #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A #define E1000_DEV_ID_82574L 0x10D3 #define E1000_DEV_ID_82574LA 0x10F6 #define E1000_DEV_ID_82583V 0x150C #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 #define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA #define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB #define E1000_DEV_ID_ICH8_82567V_3 0x1501 #define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 #define E1000_DEV_ID_ICH8_IGP_AMT 0x104A #define E1000_DEV_ID_ICH8_IGP_C 0x104B #define E1000_DEV_ID_ICH8_IFE 0x104C #define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 #define E1000_DEV_ID_ICH8_IFE_G 0x10C5 #define E1000_DEV_ID_ICH8_IGP_M 0x104D #define E1000_DEV_ID_ICH9_IGP_M 0x10BF #define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 #define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB #define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD #define E1000_DEV_ID_ICH9_BM 0x10E5 #define E1000_DEV_ID_ICH9_IGP_C 0x294C #define E1000_DEV_ID_ICH9_IFE 0x10C0 #define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 #define E1000_DEV_ID_ICH9_IFE_G 0x10C2 #define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC #define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD #define E1000_DEV_ID_ICH10_R_BM_V 0x10CE #define E1000_DEV_ID_ICH10_HANKSVILLE 0xF0FE #define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE #define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF #define E1000_DEV_ID_ICH10_D_BM_V 0x1525 #define E1000_DEV_ID_PCH_M_HV_LM 0x10EA #define E1000_DEV_ID_PCH_M_HV_LC 0x10EB #define E1000_DEV_ID_PCH_D_HV_DM 0x10EF #define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 #define E1000_DEV_ID_PCH2_LV_LM 0x1502 #define E1000_DEV_ID_PCH2_LV_V 0x1503 #define E1000_DEV_ID_82576 0x10C9 #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 #define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 #define E1000_DEV_ID_82576_NS 0x150A #define E1000_DEV_ID_82576_NS_SERDES 0x1518 #define E1000_DEV_ID_82576_SERDES_QUAD 0x150D #define E1000_DEV_ID_82576_VF 0x10CA #define E1000_DEV_ID_82575EB_COPPER 0x10A7 #define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 #define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 #define E1000_DEV_ID_82575GB_QUAD_COPPER_PM 0x10E2 #define E1000_DEV_ID_82580_COPPER 0x150E #define E1000_DEV_ID_82580_FIBER 0x150F #define E1000_DEV_ID_82580_SERDES 0x1510 #define E1000_DEV_ID_82580_SGMII 0x1511 #define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 #define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0436 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x0438 #define E1000_REVISION_0 0 #define E1000_REVISION_1 1 #define E1000_REVISION_2 2 #define E1000_REVISION_3 3 #define E1000_REVISION_4 4 #define E1000_FUNC_0 0 #define E1000_FUNC_1 1 #define E1000_FUNC_2 2 #define E1000_FUNC_3 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 enum e1000_mac_type { e1000_undefined = 0, e1000_82542, e1000_82543, e1000_82544, e1000_82540, e1000_82545, e1000_82545_rev_3, e1000_82546, e1000_82546_rev_3, e1000_82541, e1000_82541_rev_2, e1000_82547, e1000_82547_rev_2, e1000_82571, e1000_82572, e1000_82573, e1000_82574, e1000_82583, e1000_80003es2lan, e1000_ich8lan, e1000_ich9lan, e1000_ich10lan, e1000_pchlan, e1000_pch2lan, e1000_82575, e1000_82576, e1000_82580, e1000_vfadapt, e1000_num_macs /* List is 1-based, so subtract 1 for TRUE count. */ }; enum e1000_media_type { e1000_media_type_unknown = 0, e1000_media_type_copper = 1, e1000_media_type_fiber = 2, e1000_media_type_internal_serdes = 3, e1000_num_media_types }; enum e1000_nvm_type { e1000_nvm_unknown = 0, e1000_nvm_none, e1000_nvm_eeprom_spi, e1000_nvm_eeprom_microwire, e1000_nvm_flash_hw, e1000_nvm_flash_sw }; enum e1000_nvm_override { e1000_nvm_override_none = 0, e1000_nvm_override_spi_small, e1000_nvm_override_spi_large, e1000_nvm_override_microwire_small, e1000_nvm_override_microwire_large }; enum e1000_phy_type { e1000_phy_unknown = 0, e1000_phy_none, e1000_phy_m88, e1000_phy_igp, e1000_phy_igp_2, e1000_phy_gg82563, e1000_phy_igp_3, e1000_phy_ife, e1000_phy_bm, e1000_phy_82578, e1000_phy_82577, e1000_phy_82579, e1000_phy_82580, e1000_phy_vf, }; enum e1000_bus_type { e1000_bus_type_unknown = 0, e1000_bus_type_pci, e1000_bus_type_pcix, e1000_bus_type_pci_express, e1000_bus_type_reserved }; enum e1000_bus_speed { e1000_bus_speed_unknown = 0, e1000_bus_speed_33, e1000_bus_speed_66, e1000_bus_speed_100, e1000_bus_speed_120, e1000_bus_speed_133, e1000_bus_speed_2500, e1000_bus_speed_5000, e1000_bus_speed_reserved }; enum e1000_bus_width { e1000_bus_width_unknown = 0, e1000_bus_width_pcie_x1, e1000_bus_width_pcie_x2, e1000_bus_width_pcie_x4 = 4, e1000_bus_width_pcie_x8 = 8, e1000_bus_width_32, e1000_bus_width_64, e1000_bus_width_reserved }; enum e1000_1000t_rx_status { e1000_1000t_rx_status_not_ok = 0, e1000_1000t_rx_status_ok, e1000_1000t_rx_status_undefined = 0xFF }; enum e1000_rev_polarity { e1000_rev_polarity_normal = 0, e1000_rev_polarity_reversed, e1000_rev_polarity_undefined = 0xFF }; enum e1000_fc_mode { e1000_fc_none = 0, e1000_fc_rx_pause, e1000_fc_tx_pause, e1000_fc_full, e1000_fc_default = 0xFF }; enum e1000_ffe_config { e1000_ffe_config_enabled = 0, e1000_ffe_config_active, e1000_ffe_config_blocked }; enum e1000_dsp_config { e1000_dsp_config_disabled = 0, e1000_dsp_config_enabled, e1000_dsp_config_activated, e1000_dsp_config_undefined = 0xFF }; enum e1000_ms_type { e1000_ms_hw_default = 0, e1000_ms_force_master, e1000_ms_force_slave, e1000_ms_auto }; enum e1000_smart_speed { e1000_smart_speed_default = 0, e1000_smart_speed_on, e1000_smart_speed_off }; enum e1000_serdes_link_state { e1000_serdes_link_down = 0, e1000_serdes_link_autoneg_progress, e1000_serdes_link_autoneg_complete, e1000_serdes_link_forced_up }; #define __le16 u16 #define __le32 u32 #define __le64 u64 /* Receive Descriptor */ struct e1000_rx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ __le16 length; /* Length of data DMAed into data buffer */ __le16 csum; /* Packet checksum */ u8 status; /* Descriptor status */ u8 errors; /* Descriptor Errors */ __le16 special; }; /* Receive Descriptor - Extended */ union e1000_rx_desc_extended { struct { __le64 buffer_addr; __le64 reserved; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length; __le16 vlan; /* VLAN tag */ } upper; } wb; /* writeback */ }; #define MAX_PS_BUFFERS 4 /* Receive Descriptor - Packet Split */ union e1000_rx_desc_packet_split { struct { /* one buffer for protocol header(s), three data buffers */ __le64 buffer_addr[MAX_PS_BUFFERS]; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length0; /* length of buffer 0 */ __le16 vlan; /* VLAN tag */ } middle; struct { __le16 header_status; __le16 length[3]; /* length of buffers 1-3 */ } upper; __le64 reserved; } wb; /* writeback */ }; /* Transmit Descriptor */ struct e1000_tx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 cso; /* Checksum offset */ u8 cmd; /* Descriptor control */ } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 css; /* Checksum start */ __le16 special; } fields; } upper; }; /* Offload Context Descriptor */ struct e1000_context_desc { union { __le32 ip_config; struct { u8 ipcss; /* IP checksum start */ u8 ipcso; /* IP checksum offset */ __le16 ipcse; /* IP checksum end */ } ip_fields; } lower_setup; union { __le32 tcp_config; struct { u8 tucss; /* TCP checksum start */ u8 tucso; /* TCP checksum offset */ __le16 tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; __le32 cmd_and_length; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 hdr_len; /* Header length */ __le16 mss; /* Maximum segment size */ } fields; } tcp_seg_setup; }; /* Offload data descriptor */ struct e1000_data_desc { __le64 buffer_addr; /* Address of the descriptor's buffer address */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 typ_len_ext; u8 cmd; } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 popts; /* Packet Options */ __le16 special; } fields; } upper; }; /* Statistics counters collected by the MAC */ struct e1000_hw_stats { u64 crcerrs; u64 algnerrc; u64 symerrs; u64 rxerrc; u64 mpc; u64 scc; u64 ecol; u64 mcc; u64 latecol; u64 colc; u64 dc; u64 tncrs; u64 sec; u64 cexterr; u64 rlec; u64 xonrxc; u64 xontxc; u64 xoffrxc; u64 xofftxc; u64 fcruc; u64 prc64; u64 prc127; u64 prc255; u64 prc511; u64 prc1023; u64 prc1522; u64 gprc; u64 bprc; u64 mprc; u64 gptc; u64 gorc; u64 gotc; u64 rnbc; u64 ruc; u64 rfc; u64 roc; u64 rjc; u64 mgprc; u64 mgpdc; u64 mgptc; u64 tor; u64 tot; u64 tpr; u64 tpt; u64 ptc64; u64 ptc127; u64 ptc255; u64 ptc511; u64 ptc1023; u64 ptc1522; u64 mptc; u64 bptc; u64 tsctc; u64 tsctfc; u64 iac; u64 icrxptc; u64 icrxatc; u64 ictxptc; u64 ictxatc; u64 ictxqec; u64 ictxqmtc; u64 icrxdmtc; u64 icrxoc; u64 cbtmpc; u64 htdpmc; u64 cbrdpc; u64 cbrmpc; u64 rpthc; u64 hgptc; u64 htcbdpc; u64 hgorc; u64 hgotc; u64 lenerrs; u64 scvpc; u64 hrmpc; u64 doosync; }; struct e1000_vf_stats { u64 base_gprc; u64 base_gptc; u64 base_gorc; u64 base_gotc; u64 base_mprc; u64 base_gotlbc; u64 base_gptlbc; u64 base_gorlbc; u64 base_gprlbc; u32 last_gprc; u32 last_gptc; u32 last_gorc; u32 last_gotc; u32 last_mprc; u32 last_gotlbc; u32 last_gptlbc; u32 last_gorlbc; u32 last_gprlbc; u64 gprc; u64 gptc; u64 gorc; u64 gotc; u64 mprc; u64 gotlbc; u64 gptlbc; u64 gorlbc; u64 gprlbc; }; struct e1000_phy_stats { u32 idle_errors; u32 receive_errors; }; struct e1000_host_mng_dhcp_cookie { u32 signature; u8 status; u8 reserved0; u16 vlan_id; u32 reserved1; u16 reserved2; u8 reserved3; u8 checksum; }; /* Host Interface "Rev 1" */ struct e1000_host_command_header { u8 command_id; u8 command_length; u8 command_options; u8 checksum; }; #define E1000_HI_MAX_DATA_LENGTH 252 struct e1000_host_command_info { struct e1000_host_command_header command_header; u8 command_data[E1000_HI_MAX_DATA_LENGTH]; }; /* Host Interface "Rev 2" */ struct e1000_host_mng_command_header { u8 command_id; u8 checksum; u16 reserved1; u16 reserved2; u16 command_length; }; #define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 struct e1000_host_mng_command_info { struct e1000_host_mng_command_header command_header; u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; }; #include "e1000_mac.h" #include "e1000_phy.h" #include "e1000_nvm.h" #include "e1000_manage.h" #include "e1000_mbx.h" struct e1000_mac_operations { /* Function pointers for the MAC. */ s32 (*init_params)(struct e1000_hw *); s32 (*id_led_init)(struct e1000_hw *); s32 (*blink_led)(struct e1000_hw *); s32 (*check_for_link)(struct e1000_hw *); bool (*check_mng_mode)(struct e1000_hw *hw); s32 (*cleanup_led)(struct e1000_hw *); void (*clear_hw_cntrs)(struct e1000_hw *); void (*clear_vfta)(struct e1000_hw *); s32 (*get_bus_info)(struct e1000_hw *); void (*set_lan_id)(struct e1000_hw *); s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); s32 (*led_on)(struct e1000_hw *); s32 (*led_off)(struct e1000_hw *); void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); s32 (*reset_hw)(struct e1000_hw *); s32 (*init_hw)(struct e1000_hw *); void (*shutdown_serdes)(struct e1000_hw *); void (*power_up_serdes)(struct e1000_hw *); s32 (*setup_link)(struct e1000_hw *); s32 (*setup_physical_interface)(struct e1000_hw *); s32 (*setup_led)(struct e1000_hw *); void (*write_vfta)(struct e1000_hw *, u32, u32); void (*config_collision_dist)(struct e1000_hw *); void (*rar_set)(struct e1000_hw *, u8*, u32); s32 (*read_mac_addr)(struct e1000_hw *); s32 (*validate_mdi_setting)(struct e1000_hw *); s32 (*mng_host_if_write)(struct e1000_hw *, u8*, u16, u16, u8*); s32 (*mng_write_cmd_header)(struct e1000_hw *hw, struct e1000_host_mng_command_header*); s32 (*mng_enable_host_if)(struct e1000_hw *); s32 (*wait_autoneg)(struct e1000_hw *); }; struct e1000_phy_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*cfg_on_link_up)(struct e1000_hw *); s32 (*check_polarity)(struct e1000_hw *); s32 (*check_reset_block)(struct e1000_hw *); s32 (*commit)(struct e1000_hw *); s32 (*force_speed_duplex)(struct e1000_hw *); s32 (*get_cfg_done)(struct e1000_hw *hw); s32 (*get_cable_length)(struct e1000_hw *); s32 (*get_info)(struct e1000_hw *); s32 (*read_reg)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); void (*release)(struct e1000_hw *); s32 (*reset)(struct e1000_hw *); s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); s32 (*write_reg)(struct e1000_hw *, u32, u16); s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); void (*power_up)(struct e1000_hw *); void (*power_down)(struct e1000_hw *); }; struct e1000_nvm_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*read)(struct e1000_hw *, u16, u16, u16 *); void (*release)(struct e1000_hw *); void (*reload)(struct e1000_hw *); s32 (*update)(struct e1000_hw *); s32 (*valid_led_default)(struct e1000_hw *, u16 *); s32 (*validate)(struct e1000_hw *); s32 (*write)(struct e1000_hw *, u16, u16, u16 *); }; struct e1000_mac_info { struct e1000_mac_operations ops; u8 addr[6]; u8 perm_addr[6]; enum e1000_mac_type type; u32 collision_delta; u32 ledctl_default; u32 ledctl_mode1; u32 ledctl_mode2; u32 mc_filter_type; u32 tx_packet_delta; u32 txcw; u16 current_ifs_val; u16 ifs_max_val; u16 ifs_min_val; u16 ifs_ratio; u16 ifs_step_size; u16 mta_reg_count; u16 uta_reg_count; /* Maximum size of the MTA register table in all supported adapters */ #define MAX_MTA_REG 128 u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; u8 forced_speed_duplex; bool adaptive_ifs; bool has_fwsm; bool arc_subsystem_valid; bool asf_firmware_present; bool autoneg; bool autoneg_failed; bool get_link_status; bool in_ifs_mode; bool report_tx_early; enum e1000_serdes_link_state serdes_link_state; bool serdes_has_link; bool tx_pkt_filtering; }; struct e1000_phy_info { struct e1000_phy_operations ops; enum e1000_phy_type type; enum e1000_1000t_rx_status local_rx; enum e1000_1000t_rx_status remote_rx; enum e1000_ms_type ms_type; enum e1000_ms_type original_ms_type; enum e1000_rev_polarity cable_polarity; enum e1000_smart_speed smart_speed; u32 addr; u32 id; u32 reset_delay_us; /* in usec */ u32 revision; enum e1000_media_type media_type; u16 autoneg_advertised; u16 autoneg_mask; u16 cable_length; u16 max_cable_length; u16 min_cable_length; u8 mdix; bool disable_polarity_correction; bool is_mdix; bool polarity_correction; bool reset_disable; bool speed_downgraded; bool autoneg_wait_to_complete; }; struct e1000_nvm_info { struct e1000_nvm_operations ops; enum e1000_nvm_type type; enum e1000_nvm_override override; u32 flash_bank_size; u32 flash_base_addr; u16 word_size; u16 delay_usec; u16 address_bits; u16 opcode_bits; u16 page_size; }; struct e1000_bus_info { enum e1000_bus_type type; enum e1000_bus_speed speed; enum e1000_bus_width width; u16 func; u16 pci_cmd_word; }; struct e1000_fc_info { u32 high_water; /* Flow control high-water mark */ u32 low_water; /* Flow control low-water mark */ u16 pause_time; /* Flow control pause timer */ u16 refresh_time; /* Flow control refresh timer */ bool send_xon; /* Flow control send XON */ bool strict_ieee; /* Strict IEEE mode */ enum e1000_fc_mode current_mode; /* FC mode in effect */ enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ }; struct e1000_mbx_operations { s32 (*init_params)(struct e1000_hw *hw); s32 (*read)(struct e1000_hw *, u32 *, u16, u16); s32 (*write)(struct e1000_hw *, u32 *, u16, u16); s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*check_for_msg)(struct e1000_hw *, u16); s32 (*check_for_ack)(struct e1000_hw *, u16); s32 (*check_for_rst)(struct e1000_hw *, u16); }; struct e1000_mbx_stats { u32 msgs_tx; u32 msgs_rx; u32 acks; u32 reqs; u32 rsts; }; struct e1000_mbx_info { struct e1000_mbx_operations ops; struct e1000_mbx_stats stats; u32 timeout; u32 usec_delay; u16 size; }; struct e1000_dev_spec_82541 { enum e1000_dsp_config dsp_config; enum e1000_ffe_config ffe_config; u16 spd_default; bool phy_init_script; }; struct e1000_dev_spec_82542 { bool dma_fairness; }; struct e1000_dev_spec_82543 { u32 tbi_compatibility; bool dma_fairness; bool init_phy_disabled; }; struct e1000_dev_spec_82571 { bool laa_is_present; u32 smb_counter; E1000_MUTEX swflag_mutex; }; struct e1000_dev_spec_80003es2lan { bool mdic_wa_enable; }; struct e1000_shadow_ram { u16 value; bool modified; }; #define E1000_SHADOW_RAM_WORDS 2048 struct e1000_dev_spec_ich8lan { bool kmrn_lock_loss_workaround_enabled; struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; E1000_MUTEX nvm_mutex; E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; bool eee_disable; }; struct e1000_dev_spec_82575 { bool sgmii_active; bool global_device_reset; }; struct e1000_dev_spec_vf { u32 vf_number; u32 v2p_mailbox; }; struct e1000_hw { void *back; u8 *hw_addr; u8 *flash_address; unsigned long io_base; struct e1000_mac_info mac; struct e1000_fc_info fc; struct e1000_phy_info phy; struct e1000_nvm_info nvm; struct e1000_bus_info bus; struct e1000_mbx_info mbx; struct e1000_host_mng_dhcp_cookie mng_cookie; union { struct e1000_dev_spec_82541 _82541; struct e1000_dev_spec_82542 _82542; struct e1000_dev_spec_82543 _82543; struct e1000_dev_spec_82571 _82571; struct e1000_dev_spec_80003es2lan _80003es2lan; struct e1000_dev_spec_ich8lan ich8lan; struct e1000_dev_spec_82575 _82575; struct e1000_dev_spec_vf vf; } dev_spec; u16 device_id; u16 subsystem_vendor_id; u16 subsystem_device_id; u16 vendor_id; u8 revision_id; }; #include "e1000_82541.h" #include "e1000_82543.h" #include "e1000_82571.h" #include "e1000_80003es2lan.h" #include "e1000_ich8lan.h" #include "e1000_82575.h" /* These functions must be implemented by drivers */ void e1000_pci_clear_mwi(struct e1000_hw *hw); void e1000_pci_set_mwi(struct e1000_hw *hw); s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); #endif Index: projects/binutils-2.17/sys/dev/e1000/if_em.c =================================================================== --- projects/binutils-2.17/sys/dev/e1000/if_em.c (revision 215829) +++ projects/binutils-2.17/sys/dev/e1000/if_em.c (revision 215830) @@ -1,5480 +1,5508 @@ /****************************************************************************** Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_inet.h" #endif #include #include #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "if_em.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int em_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char em_driver_version[] = "7.1.7"; +char em_driver_version[] = "7.1.8"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *em_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int em_probe(device_t); static int em_attach(device_t); static int em_detach(device_t); static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); static void em_start(struct ifnet *); static void em_start_locked(struct ifnet *, struct tx_ring *); #ifdef EM_MULTIQUEUE static int em_mq_start(struct ifnet *, struct mbuf *); static int em_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void em_qflush(struct ifnet *); #endif static int em_ioctl(struct ifnet *, u_long, caddr_t); static void em_init(void *); static void em_init_locked(struct adapter *); static void em_stop(void *); static void em_media_status(struct ifnet *, struct ifmediareq *); static int em_media_change(struct ifnet *); static void em_identify_hardware(struct adapter *); static int em_allocate_pci_resources(struct adapter *); static int em_allocate_legacy(struct adapter *); static int em_allocate_msix(struct adapter *); static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); static void em_local_timer(void *); static void em_reset(struct adapter *); static int em_setup_interface(device_t, struct adapter *); static void em_setup_transmit_structures(struct adapter *); static void em_initialize_transmit_unit(struct adapter *); static int em_allocate_transmit_buffers(struct tx_ring *); static void em_free_transmit_structures(struct adapter *); static void em_free_transmit_buffers(struct tx_ring *); static int em_setup_receive_structures(struct adapter *); static int em_allocate_receive_buffers(struct rx_ring *); static void em_initialize_receive_unit(struct adapter *); static void em_free_receive_structures(struct adapter *); static void em_free_receive_buffers(struct rx_ring *); static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static bool em_txeof(struct tx_ring *); static bool em_rxeof(struct rx_ring *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, struct tcphdr *, u32 *, u32 *); static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); static void em_update_link_status(struct adapter *); static void em_refresh_mbufs(struct rx_ring *, int); static void em_register_vlan(void *, struct ifnet *, u16); static void em_unregister_vlan(void *, struct ifnet *, u16); static void em_setup_vlan_hw_support(struct adapter *); static int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(device_t); static void em_enable_wakeup(device_t); static int em_enable_phy_wakeup(struct adapter *); static void em_led_func(void *, int); +static void em_disable_aspm(struct adapter *); static int em_irq_fast(void *); /* MSIX handlers */ static void em_msix_tx(void *); static void em_msix_rx(void *); static void em_msix_link(void *); static void em_handle_tx(void *context, int pending); static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); static void em_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); static void em_set_flow_cntrl(struct adapter *, const char *, const char *, int *, int); static __inline void em_rx_discard(struct rx_ring *, int); #ifdef DEVICE_POLLING static poll_handler_t em_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_probe, em_probe), DEVMETHOD(device_attach, em_attach), DEVMETHOD(device_detach, em_detach), DEVMETHOD(device_shutdown, em_shutdown), DEVMETHOD(device_suspend, em_suspend), DEVMETHOD(device_resume, em_resume), {0, 0} }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt); TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt); static int em_rxd = EM_DEFAULT_RXD; static int em_txd = EM_DEFAULT_TXD; TUNABLE_INT("hw.em.rxd", &em_rxd); TUNABLE_INT("hw.em.txd", &em_txd); static int em_smart_pwr_down = FALSE; TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = FALSE; TUNABLE_INT("hw.em.sbp", &em_debug_sbp); static int em_enable_msix = TRUE; TUNABLE_INT("hw.em.enable_msix", &em_enable_msix); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit); /* Flow control setting - default to FULL */ static int em_fc_setting = e1000_fc_full; TUNABLE_INT("hw.em.fc_setting", &em_fc_setting); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int em_probe(device_t dev) { char adapter_name[60]; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = em_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", em_strings[ent->index], em_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_attach(device_t dev) { struct adapter *adapter; int error = 0; INIT_DEBUGOUT("em_attach: begin"); adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); /* Setup PCI resources */ if (em_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_pch2lan)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ adapter->hw.flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_TADV), em_tx_abs_int_delay_dflt); /* Sysctl for limiting the amount of work done in the taskqueue */ em_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, em_rx_process_limit); /* Sysctl for setting the interface flow control */ em_set_flow_cntrl(adapter, "flow_control", "max number of rx packets to process", &adapter->fc_setting, em_fc_setting); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, em_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = em_txd; if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = em_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ adapter->hw.mac.report_tx_early = 1; /* ** Get queue/ring memory */ if (em_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* ** Do interrupt configuration */ if (adapter->msix > 1) /* Do MSIX */ error = em_allocate_msix(adapter); else /* MSI or Legacy */ error = em_allocate_legacy(adapter); if (error) goto err_late; /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(dev); /* Setup OS specific network interface */ if (em_setup_interface(dev, adapter) != 0) goto err_late; em_reset(adapter); /* Initialize statistics */ em_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); adapter->led_dev = led_create(em_led_func, adapter, device_get_nameunit(dev)); INIT_DEBUGOUT("em_attach: end"); return (0); err_late: em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); err_pci: em_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); adapter->in_detach = 1; em_stop(adapter); EM_CORE_UNLOCK(adapter); EM_CORE_LOCK_DESTROY(adapter); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); em_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_shutdown(device_t dev) { return em_suspend(dev); } /* * Suspend/resume device methods. */ static int em_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int em_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; EM_CORE_LOCK(adapter); em_init_locked(adapter); em_init_manageability(adapter); EM_CORE_UNLOCK(adapter); em_start(ifp); return bus_generic_resume(dev); } /********************************************************************* * Transmit entry point * * em_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ #ifdef EM_MULTIQUEUE static int em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || adapter->link_active == 0) { if (m != NULL) err = drbr_enqueue(ifp, txr->br, m); return (err); } /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); enq = 0; if (m == NULL) { next = drbr_dequeue(ifp, txr->br); } else if (drbr_needs_enqueue(ifp, txr->br)) { if ((err = drbr_enqueue(ifp, txr->br, m)) != 0) return (err); next = drbr_dequeue(ifp, txr->br); } else next = m; /* Process the queue */ while (next != NULL) { if ((err = em_xmit(txr, &next)) != 0) { if (next != NULL) err = drbr_enqueue(ifp, txr->br, next); break; } enq++; drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; if (txr->tx_avail < EM_MAX_SCATTER) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } next = drbr_dequeue(ifp, txr->br); } if (enq > 0) { /* Set the watchdog */ txr->queue_status = EM_QUEUE_WORKING; txr->watchdog_time = ticks; } return (err); } /* ** Multiqueue capable stack interface */ static int em_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; int error; if (EM_TX_TRYLOCK(txr)) { error = em_mq_start_locked(ifp, txr, m); EM_TX_UNLOCK(txr); } else error = drbr_enqueue(ifp, txr->br, m); return (error); } /* ** Flush all ring buffers */ static void em_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); EM_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* EM_MULTIQUEUE */ static void em_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; EM_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail < EM_MAX_SCATTER) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (em_xmit(txr, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set timeout in case hardware has problems transmitting. */ txr->watchdog_time = ticks; txr->queue_status = EM_QUEUE_WORKING; } return; } static void em_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { EM_TX_LOCK(txr); em_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } return; } /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { /* * XXX * Since resetting hardware takes a very long time * and results in link renegotiation we only * initialize the hardware only when it is absolutely * required. */ ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_82574: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; /* Adapters that do not support jumbo frames */ case e1000_82583: case e1000_ich8lan: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; em_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { em_disable_promisc(adapter); em_set_promisc(adapter); } } else em_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) em_stop(adapter); adapter->if_flags = ifp->if_flags; EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif em_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* ** As the speed/duplex settings are being ** changed, we need to reset the PHY. */ adapter->hw.phy.reset_disable = FALSE; /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); /* falls thru */ case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(em_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); em_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); em_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } if ((mask & IFCAP_WOL) && (ifp->if_capabilities & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) ifp->if_capenable ^= IFCAP_WOL_MCAST; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) em_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; u32 pba; INIT_DEBUGOUT("em_init: begin"); EM_CORE_LOCK_ASSERT(adapter); em_disable_intr(adapter); callout_stop(&adapter->timer); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (adapter->hw.mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: - case e1000_pchlan: pba = E1000_PBA_10K; break; + case e1000_pchlan: case e1000_pch2lan: pba = E1000_PBA_26K; break; default: if (adapter->max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } INIT_DEBUGOUT1("em_init: pba=%dK",pba); E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(adapter); em_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_setup_transmit_structures(adapter); em_initialize_transmit_unit(adapter); /* Setup Multicast table */ em_set_multi(adapter); /* ** Figure out the desired mbuf ** pool for doing jumbos */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (em_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); em_stop(adapter); return; } em_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_set_promisc(adapter); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, em_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp; tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) em_disable_intr(adapter); else #endif /* DEVICE_POLLING */ em_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); /* Don't reset the phy next time init gets called */ adapter->hw.phy.reset_disable = TRUE; } static void em_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine: note this only works with single queue * *********************************************************************/ static int em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 reg_icr; int rx_done; EM_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (0); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); em_rxeof(rxr, count, &rx_done); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int em_irq_fast(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; em_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /* Combined RX/TX handler, used by Legacy and MSI */ static void em_handle_que(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = em_rxeof(rxr, adapter->rx_process_limit, NULL); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif em_txeof(txr); EM_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(adapter->tq, &adapter->que_task); return; } } em_enable_intr(adapter); return; } /********************************************************************* * * MSIX Interrupt Service Routines * **********************************************************************/ static void em_msix_tx(void *arg) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; bool more; ++txr->tx_irq; EM_TX_LOCK(txr); more = em_txeof(txr); EM_TX_UNLOCK(txr); if (more) taskqueue_enqueue(txr->tq, &txr->tx_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); return; } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static void em_msix_rx(void *arg) { struct rx_ring *rxr = arg; struct adapter *adapter = rxr->adapter; bool more; ++rxr->rx_irq; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); return; } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static void em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; em_handle_link(adapter, 0); } else E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); return; } static void em_handle_rx(void *context, int pending) { struct rx_ring *rxr = context; struct adapter *adapter = rxr->adapter; bool more; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } static void em_handle_tx(void *context, int pending) { struct tx_ring *txr = context; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif em_txeof(txr); E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); } static void em_handle_link(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_media_status: begin"); EM_CORE_LOCK(adapter); em_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("em_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_buffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; struct ip *ip = NULL; struct tcphdr *tp = NULL; u32 txd_upper, txd_lower, txd_used, txd_saved; int ip_off, poff; int nsegs, i, j, first, last = 0; int error, do_tso, tso_desc = 0; m_head = *m_headp; txd_upper = txd_lower = txd_used = txd_saved = 0; do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0); ip_off = poff = 0; /* * Intel recommends entire IP/TCP header length reside in a single * buffer. If multiple descriptors are used to describe the IP and * TCP header, each descriptor should describe one or more * complete headers; descriptors referencing only parts of headers * are not supported. If all layer headers are not coalesced into * a single buffer, each buffer should not cross a 4KB boundary, * or be larger than the maximum read request size. * Controller also requires modifing IP/TCP header to make TSO work * so we firstly get a writable mbuf chain then coalesce ethernet/ * IP/TCP header into a single buffer to meet the requirement of * controller. This also simplifies IP/TCP/UDP checksum offloading * which also has similiar restrictions. */ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { if (do_tso || (m_head->m_next != NULL && m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { if (M_WRITABLE(*m_headp) == 0) { m_head = m_dup(*m_headp, M_DONTWAIT); m_freem(*m_headp); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } *m_headp = m_head; } } /* * XXX * Assume IPv4, we don't have TSO/checksum offload support * for IPv6 yet. */ ip_off = sizeof(struct ether_header); m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } eh = mtod(m_head, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } if (do_tso) { tp = (struct tcphdr *)(mtod(m_head, char *) + poff); /* * TSO workaround: * pull 4 more bytes of data into it. */ m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); ip->ip_len = 0; ip->ip_sum = 0; /* * The pseudo TCP checksum does not include TCP payload * length so driver should recompute the checksum here * what hardware expect to see. This is adherence of * Microsoft's Large Send specification. */ tp = (struct tcphdr *)(mtod(m_head, char *) + poff); tp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { tp = (struct tcphdr *)(mtod(m_head, char *) + poff); m_head = m_pullup(m_head, poff + (tp->th_off << 2)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); tp = (struct tcphdr *)(mtod(m_head, char *) + poff); } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m_head, char *) + ip_off); } *m_headp = m_head; } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG) { struct mbuf *m; m = m_defrag(*m_headp, M_DONTWAIT); if (m == NULL) { adapter->mbuf_alloc_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again */ error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } else if (error != 0) { adapter->no_tx_dma_setup++; return (error); } /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((do_tso == 0) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } if (nsegs > (txr->tx_avail - 2)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) em_transmit_checksum_setup(txr, m_head, ip_off, ip, &txd_upper, &txd_lower); i = txr->next_avail_desc; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; tx_buffer = &txr->tx_buffers[i]; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; /* ** TSO Workaround: ** If this is the last descriptor, we want to ** split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) { seg_len -= 4; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == adapter->num_tx_desc) i = 0; /* Now make the sentinel */ ++txd_used; /* using an extra txd */ ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | 4); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } txr->next_avail_desc = i; txr->tx_avail -= nsegs; if (tso_desc) /* TSO used an extra for sentinel */ txr->tx_avail -= txd_used; if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ ctxd->upper.fields.special = htole16(m_head->m_pkthdr.ether_vtag); /* Tell hardware to add tag */ ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); } tx_buffer->m_head = m_head; tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; /* Update the watchdog time early and often */ txr->watchdog_time = ticks; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void em_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void em_disable_promisc(struct adapter *adapter) { u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_local_timer(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); em_update_link_status(adapter); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* ** Don't do TX watchdog check if we've been paused */ if (adapter->pause_frames) { adapter->pause_frames = 0; goto out; } /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) if (txr->queue_status == EM_QUEUE_HUNG) goto hung; out: callout_reset(&adapter->timer, hz, em_local_timer, adapter); return; hung: /* Looks like we're hung */ device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(adapter->dev, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(adapter->dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; EM_TX_UNLOCK(txr); em_init_locked(adapter); } static void em_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; ifp->if_baudrate = adapter->link_speed * 1000000; if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable watchdog */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->queue_status = EM_QUEUE_IDLE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* Unarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); txr->queue_status = EM_QUEUE_IDLE; EM_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) && (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) { device_printf(dev, "Memory Access and/or Bus Master bits " "were not set!\n"); adapter->hw.bus.pci_cmd_word |= (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN); pci_write_config(dev, PCIR_COMMAND, adapter->hw.bus.pci_cmd_word, 2); } /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Default to a single queue */ adapter->num_queues = 1; /* * Setup MSI/X or MSI if PCI Express */ adapter->msix = em_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); if (adapter->msix == 1) /* using MSI */ rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Allocate a fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * This is not really Multiqueue, rather * its just multiple interrupt vectors. * **********************************************************************/ int em_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { /* RX ring */ rid = vector + 1; rxr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (rxr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "RX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, rxr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, rxr, &rxr->tag)) != 0) { device_printf(dev, "Failed to register RX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i); #endif rxr->msix = vector++; /* NOTE increment vector for TX */ TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq", device_get_nameunit(adapter->dev)); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 ** are for RX0 and RX1, note this has ** NOTHING to do with the MSIX vector */ rxr->ims = 1 << (20 + i); adapter->ivars |= (8 | rxr->msix) << (i * 4); /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (txr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "TX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, txr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, txr, &txr->tag)) != 0) { device_printf(dev, "Failed to register TX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i); #endif txr->msix = vector++; /* Increment vector for next pass */ TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", device_get_nameunit(adapter->dev)); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 ** are for TX0 and TX1, note this has ** NOTHING to do with the MSIX vector */ txr->ims = 1 << (22 + i); adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); } /* Link interrupt */ ++rid; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; return (0); } static void em_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr; struct rx_ring *rxr; int rid; /* ** Release all the queue interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; rxr = &adapter->rx_rings[i]; /* an early abort? */ if ((txr == NULL) || (rxr == NULL)) break; rid = txr->msix +1; if (txr->tag != NULL) { bus_teardown_intr(dev, txr->res, txr->tag); txr->tag = NULL; } if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); rid = rxr->msix +1; if (rxr->tag != NULL) { bus_teardown_intr(dev, rxr->res, rxr->tag); rxr->tag = NULL; } if (rxr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res); } if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->flash != NULL) bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); } /* * Setup MSI or MSI/X */ static int em_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int val = 0; /* ** Setup MSI/X for Hartwell: tests have shown ** use of two queues to be unstable, and to ** provide no great gain anyway, so we simply ** seperate the interrupts and use a single queue. */ if ((adapter->hw.mac.type == e1000_82574) && (em_enable_msix == TRUE)) { /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!adapter->msix_mem) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } val = pci_msix_count(dev); if (val < 3) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; device_printf(adapter->dev, "MSIX: insufficient vectors, using MSI\n"); goto msi; } val = 3; adapter->num_queues = 1; if (pci_alloc_msix(dev, &val) == 0) { device_printf(adapter->dev, "Using MSIX interrupts " "with %d vectors\n", val); } return (val); } msi: val = pci_msi_count(dev); if (val == 1 && pci_alloc_msi(dev, &val) == 0) { adapter->msix = 1; device_printf(adapter->dev,"Using an MSI interrupt\n"); return (val); } /* Should only happen due to manual configuration */ device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); return (0); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void em_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; INIT_DEBUGOUT("em_reset: begin"); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); hw->fc.high_water = rx_buffer_size - roundup2(adapter->max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Set Flow control, use the tunable location if sane */ hw->fc.requested_mode = adapter->fc_setting; /* Workaround: no TX flow ctrl for PCH */ if (hw->mac.type == e1000_pchlan) hw->fc.requested_mode = e1000_fc_rx_pause; /* Override - settings for PCH2LAN, ya its magic :) */ if (hw->mac.type == e1000_pch2lan) { hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (ifp->if_mtu > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); } /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); + em_disable_aspm(adapter); if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("em_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_init = em_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = em_ioctl; ifp->if_start = em_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; #ifdef EM_MULTIQUEUE /* Multiqueue tx functions */ ifp->if_transmit = em_mq_start; ifp->if_qflush = em_qflush; #endif ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; /* Enable TSO by default, can disable with ifconfig */ ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capenable |= IFCAP_TSO4; /* * Tell the upper layer(s) we * support full VLAN capability */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; /* ** Dont turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { ifp->if_capabilities |= IFCAP_WOL; ifp->if_capenable |= IFCAP_WOL_MAGIC; } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, em_media_change, em_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (error); } static void em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_map = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int em_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* Allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); if (em_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (em_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) em_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) em_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int em_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct em_buffer *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ EM_TSO_SIZE, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct em_buffer *) malloc(sizeof(struct em_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ em_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_buffer *txbuf; int i; /* Clear the old descriptor contents */ EM_TX_LOCK(txr); bzero((void *)txr->tx_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } /* clear the watch index */ txbuf->next_eop = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; txr->queue_status = EM_QUEUE_IDLE; /* Clear checksum offload context. */ txr->last_hw_offload = 0; txr->last_hw_ipcss = 0; txr->last_hw_ipcso = 0; txr->last_hw_tucss = 0; txr->last_hw_tucso = 0; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); EM_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void em_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) em_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->queue_status = EM_QUEUE_IDLE; } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void em_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); em_free_transmit_buffers(txr); em_dma_free(adapter, &txr->txdma); EM_TX_UNLOCK(txr); EM_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_buffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; for (int i = 0; i < adapter->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; if (txbuf->map != NULL) { bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } else if (txbuf->map != NULL) { bus_dmamap_unload(txr->txtag, txbuf->map); bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static void em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; struct em_buffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; u8 ipcso, ipcss, tucso, tucss; ipcss = ipcso = tucss = tucso = 0; hdr_len = ip_off + (ip->ip_hl << 2); cur = txr->next_avail_desc; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; offload |= CSUM_IP; ipcss = ip_off; ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->lower_setup.ip_fields.ipcss = ipcss; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = ipcso; cmd |= E1000_TXD_CMD_IP; } if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; offload |= CSUM_TCP; tucss = hdr_len; tucso = hdr_len + offsetof(struct tcphdr, th_sum); /* * Setting up new checksum offload context for every frames * takes a lot of processing time for hardware. This also * reduces performance a lot for small sized frames so avoid * it if driver can use previously configured checksum * offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; cmd |= E1000_TXD_CMD_TCP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; tucss = hdr_len; tucso = hdr_len + offsetof(struct udphdr, uh_sum); /* * Setting up new checksum offload context for every frames * takes a lot of processing time for hardware. This also * reduces performance a lot for small sized frames so avoid * it if driver can use previously configured checksum * offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = tucss; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } if (offload & CSUM_IP) { txr->last_hw_ipcss = ipcss; txr->last_hw_ipcso = ipcso; } TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &txr->tx_buffers[cur]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static void em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; struct em_buffer *tx_buffer; int cur, hdr_len; /* * In theory we can use the same TSO context if and only if * frame is the same type(IP/TCP) and the same MSS. However * checking whether a frame has the same IP/TCP structure is * hard thing so just ignore that and always restablish a * new TSO context. */ hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[cur]; TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = ip_off; TXD->lower_setup.ip_fields.ipcse = htole16(ip_off + (ip->ip_hl << 2) - 1); TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; txr->tx_tso = TRUE; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static bool em_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; int first, last, done, processed; struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; struct ifnet *ifp = adapter->ifp; EM_TX_LOCK_ASSERT(txr); /* No work, make sure watchdog is off */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = EM_QUEUE_IDLE; return (FALSE); } processed = 0; first = txr->next_to_clean; tx_desc = &txr->tx_base[first]; tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; ++processed; if (tx_buffer->m_head) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; txr->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = &txr->tx_base[first]; } ++ifp->if_opackets; /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. local timer ** will examine this and do a reset if needed. */ if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG)) txr->queue_status = EM_QUEUE_HUNG; /* * If we have enough room, clear IFF_DRV_OACTIVE * to tell the stack that it is OK to send packets. */ if (txr->tx_avail > EM_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* Disable watchdog if all clean */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = EM_QUEUE_IDLE; return (FALSE); } } return (TRUE); } /********************************************************************* * * Refresh RX descriptor mbufs from system mbuf buffer pool. * **********************************************************************/ static void em_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs[1]; struct em_buffer *rxbuf; int i, error, nsegs, cleaned; i = rxr->next_to_refresh; cleaned = -1; while (i != limit) { rxbuf = &rxr->rx_buffers[i]; /* ** Just skip entries with a buffer, ** they can only be due to an error ** and are to be reused. */ if (rxbuf->m_head != NULL) goto reuse; m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); /* ** If we have a temporary resource shortage ** that causes a failure, just abort refresh ** for now, we will return to this point when ** reinvoked from em_rxeof. */ if (m == NULL) goto update; m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_free(m); goto update; } /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs == 1, ("Too many segments returned!")); bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); rxbuf->m_head = m; rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr); reuse: cleaned = i; /* Calculate next index */ if (++i == adapter->num_rx_desc) i = 0; /* This is the work marker for refresh */ rxr->next_to_refresh = i; } update: /* ** Update the tail pointer only if, ** and as far as we have refreshed. */ if (cleaned != -1) /* Update tail index */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), cleaned); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int em_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct em_buffer *rxbuf; int error; rxr->rx_buffers = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &rxr->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } rxbuf = rxr->rx_buffers; for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT, &rxbuf->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: em_free_receive_structures(adapter); return (error); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_buffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error; /* Clear the ring contents */ EM_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* ** Free current RX buffer structs and their mbufs */ for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); } } /* Now replenish the mbufs */ for (int j = 0; j != adapter->num_rx_desc; ++j) { rxbuf = &rxr->rx_buffers[j]; rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_head == NULL) return (ENOBUFS); rxbuf->m_head->m_len = adapter->rx_mbuf_sz; rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, rxbuf->m_head, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; return (error); } bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = 0; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); EM_RX_UNLOCK(rxr); return (0); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int em_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int j; for (j = 0; j < adapter->num_queues; j++, rxr++) if (em_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'j' failed, so its the terminus. */ for (int i = 0; i < j; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { struct em_buffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } } return (ENOBUFS); } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void em_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { em_free_receive_buffers(rxr); /* Free the ring memory as well */ em_dma_free(adapter, &rxr->rxdma); EM_RX_LOCK_DESTROY(rxr); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_buffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->map != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); bus_dmamap_destroy(rxr->rxtag, rxbuf->map); } if (rxbuf->m_head != NULL) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } if (rxr->rxtag != NULL) { bus_dma_tag_destroy(rxr->rxtag); rxr->rxtag = NULL; } return; } /********************************************************************* * * Enable receive unit. * **********************************************************************/ #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) static void em_initialize_receive_unit(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u64 bus_addr; u32 rctl, rxcsum; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* ** When using MSIX interrupts we need to throttle ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated ackknowledge */ if (adapter->hw.mac.type == e1000_82574) E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS); if (ifp->if_capenable & IFCAP_RXCSUM) { rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); } /* ** XXX TEMPORARY WORKAROUND: on some systems with 82573 ** long latencies are observed, like Lenovo X60. This ** change eliminates the problem, but since having positive ** values in RDTR is a known source of problems on other ** platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ bus_addr = rxr->rxdma.dma_paddr; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1); } /* Set early receive threshold on appropriate hw */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (ifp->if_mtu > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); E1000_WRITE_REG(hw, E1000_ERT, 0x100 | (1 << 13)); } if (adapter->hw.mac.type == e1000_pch2lan) { if (ifp->if_mtu > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip the CRC */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; rctl &= ~E1000_RCTL_SBP; if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; if (ifp->if_mtu > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool em_rxeof(struct rx_ring *rxr, int count, int *done) { struct adapter *adapter = rxr->adapter; struct ifnet *ifp = adapter->ifp; struct mbuf *mp, *sendmp; u8 status = 0; u16 len; int i, processed, rxdone = 0; bool eop; struct e1000_rx_desc *cur; EM_RX_LOCK(rxr); for (i = rxr->next_to_check, processed = 0; count != 0;) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->rx_base[i]; status = cur->status; mp = sendmp = NULL; if ((status & E1000_RXD_STAT_DD) == 0) break; len = le16toh(cur->length); eop = (status & E1000_RXD_STAT_EOP) != 0; - count--; - if (((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) && - (rxr->discard == FALSE)) { + if ((rxr->discard == TRUE) || (cur->errors & + E1000_RXD_ERR_FRAME_ERR_MASK)) { + ifp->if_ierrors++; + ++rxr->rx_discarded; + if (!eop) /* Catch subsequent segs */ + rxr->discard = TRUE; + else + rxr->discard = FALSE; + em_rx_discard(rxr, i); + goto next_desc; + } - /* Assign correct length to the current fragment */ - mp = rxr->rx_buffers[i].m_head; - mp->m_len = len; + /* Assign correct length to the current fragment */ + mp = rxr->rx_buffers[i].m_head; + mp->m_len = len; - /* Trigger for refresh */ - rxr->rx_buffers[i].m_head = NULL; + /* Trigger for refresh */ + rxr->rx_buffers[i].m_head = NULL; - if (rxr->fmp == NULL) { - mp->m_pkthdr.len = len; - rxr->fmp = mp; /* Store the first mbuf */ - rxr->lmp = mp; - } else { - /* Chain mbuf's together */ - mp->m_flags &= ~M_PKTHDR; - rxr->lmp->m_next = mp; - rxr->lmp = rxr->lmp->m_next; - rxr->fmp->m_pkthdr.len += len; - } + /* First segment? */ + if (rxr->fmp == NULL) { + mp->m_pkthdr.len = len; + rxr->fmp = rxr->lmp = mp; + } else { + /* Chain mbuf's together */ + mp->m_flags &= ~M_PKTHDR; + rxr->lmp->m_next = mp; + rxr->lmp = mp; + rxr->fmp->m_pkthdr.len += len; + } - if (eop) { - rxr->fmp->m_pkthdr.rcvif = ifp; - ifp->if_ipackets++; - em_receive_checksum(cur, rxr->fmp); + if (eop) { + --count; + sendmp = rxr->fmp; + sendmp->m_pkthdr.rcvif = ifp; + ifp->if_ipackets++; + em_receive_checksum(cur, sendmp); #ifndef __NO_STRICT_ALIGNMENT - if (adapter->max_frame_size > - (MCLBYTES - ETHER_ALIGN) && - em_fixup_rx(rxr) != 0) - goto skip; + if (adapter->max_frame_size > + (MCLBYTES - ETHER_ALIGN) && + em_fixup_rx(rxr) != 0) + goto skip; #endif - if (status & E1000_RXD_STAT_VP) { - rxr->fmp->m_pkthdr.ether_vtag = - (le16toh(cur->special) & - E1000_RXD_SPC_VLAN_MASK); - rxr->fmp->m_flags |= M_VLANTAG; - } + if (status & E1000_RXD_STAT_VP) { + sendmp->m_pkthdr.ether_vtag = + (le16toh(cur->special) & + E1000_RXD_SPC_VLAN_MASK); + sendmp->m_flags |= M_VLANTAG; + } #ifdef EM_MULTIQUEUE - rxr->fmp->m_pkthdr.flowid = rxr->msix; - rxr->fmp->m_flags |= M_FLOWID; + sendmp->m_pkthdr.flowid = rxr->msix; + sendmp->m_flags |= M_FLOWID; #endif #ifndef __NO_STRICT_ALIGNMENT skip: #endif - sendmp = rxr->fmp; - rxr->fmp = NULL; - rxr->lmp = NULL; - } - } else { - ifp->if_ierrors++; - ++rxr->rx_discarded; - if (!eop) /* Catch subsequent segs */ - rxr->discard = TRUE; - else - rxr->discard = FALSE; - em_rx_discard(rxr, i); - sendmp = NULL; + rxr->fmp = rxr->lmp = NULL; } - +next_desc: /* Zero out the receive descriptors status. */ cur->status = 0; ++rxdone; /* cumulative for POLL */ ++processed; /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Send to the stack */ if (sendmp != NULL) { rxr->next_to_check = i; EM_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, sendmp); EM_RX_LOCK(rxr); i = rxr->next_to_check; } /* Only refresh mbufs every 8 descriptors */ if (processed == 8) { em_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remaining refresh work */ - if (processed != 0) { - em_refresh_mbufs(rxr, i); - processed = 0; - } + em_refresh_mbufs(rxr, i); rxr->next_to_check = i; if (done != NULL) *done = rxdone; EM_RX_UNLOCK(rxr); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } static __inline void em_rx_discard(struct rx_ring *rxr, int i) { struct adapter *adapter = rxr->adapter; struct em_buffer *rbuf; struct mbuf *m; rbuf = &rxr->rx_buffers[i]; /* Free any previous pieces */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* Reset state, keep loaded DMA map and reuse */ m = rbuf->m_head; m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; m->m_flags |= M_PKTHDR; m->m_data = m->m_ext.ext_buf; m->m_next = NULL; return; } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achived only when jumbo frame is * not used at all on architectures with strict alignment. */ static int em_fixup_rx(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct mbuf *m, *n; int error; error = 0; m = rxr->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_DONTWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; rxr->fmp = n; } else { adapter->dropped_pkts++; m_freem(rxr->fmp); rxr->fmp = NULL; error = ENOMEM; } } return (error); } #endif /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp) { /* Ignore Checksum bit is set */ if (rx_desc->status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if (rx_desc->status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else { mp->m_pkthdr.csum_flags = 0; } } if (rx_desc->status & E1000_RXD_STAT_TCPCS) { /* Did it pass? */ if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } } /* * This routine is run via an vlan * config EVENT */ static void em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= EM_MSIX_MASK; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* Falls thru */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) { e1000_disable_gig_wol_ich8lan(&adapter->hw); e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw); } /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if ((adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_pch2lan)) { if (em_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (ifp->if_capenable & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); +} + +/* +** Disable the L0S and L1 LINK states +*/ +static void +em_disable_aspm(struct adapter *adapter) +{ + int base, reg; + u16 link_cap,link_ctrl; + device_t dev = adapter->dev; + + switch (adapter->hw.mac.type) { + case e1000_82573: + case e1000_82574: + case e1000_82583: + break; + default: + return; + } + if (pci_find_extcap(dev, PCIY_EXPRESS, &base) != 0) + return; + reg = base + PCIR_EXPRESS_LINK_CAP; + link_cap = pci_read_config(dev, reg, 2); + if ((link_cap & PCIM_LINK_CAP_ASPM) == 0) + return; + reg = base + PCIR_EXPRESS_LINK_CTL; + link_ctrl = pci_read_config(dev, reg, 2); + link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */ + pci_write_config(dev, reg, link_ctrl, 2); + return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xoffrxc += adapter->pause_frames; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } ifp = adapter->ifp; ifp->if_collisions = adapter->stats.colc; /* Rx Errors */ ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr; /* Tx Errors */ ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events; } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, 0, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", CTLFLAG_RD, &adapter->mbuf_alloc_failed, "Std mbuf failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", CTLFLAG_RD, &adapter->mbuf_cluster_failed, "Std mbuf cluster failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; em_print_nvm_info(adapter); } return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } static void em_add_rx_process_limit(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); } static void em_set_flow_cntrl(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; em_print_debug_info(adapter); } return (error); } /* ** This routine is meant to be fluid, add whatever is ** needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE) printf("and ACTIVE\n"); else printf("and INACTIVE\n"); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(0)), E1000_READ_REG(&adapter->hw, E1000_TDT(0))); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(0)), E1000_READ_REG(&adapter->hw, E1000_RDT(0))); device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status); device_printf(dev, "TX descriptors avail = %d\n", txr->tx_avail); device_printf(dev, "Tx Descriptors avail failure = %ld\n", txr->no_desc_avail); device_printf(dev, "RX discarded packets = %ld\n", rxr->rx_discarded); device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); } Index: projects/binutils-2.17/sys/dev/e1000/if_igb.c =================================================================== --- projects/binutils-2.17/sys/dev/e1000/if_igb.c (revision 215829) +++ projects/binutils-2.17/sys/dev/e1000/if_igb.c (revision 215830) @@ -1,5439 +1,5501 @@ /****************************************************************************** Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_inet.h" #include "opt_altq.h" #endif #include #include #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82575.h" #include "if_igb.h" /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int igb_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char igb_driver_version[] = "version - 2.0.4"; +char igb_driver_version[] = "version - 2.0.7"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static igb_vendor_info_t igb_vendor_info_array[] = { { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82576_VF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER, + PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *igb_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int igb_probe(device_t); static int igb_attach(device_t); static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); static void igb_start(struct ifnet *); static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #if __FreeBSD_version >= 800000 static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void igb_qflush(struct ifnet *); #endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static void igb_init(void *); static void igb_init_locked(struct adapter *); static void igb_stop(void *); static void igb_media_status(struct ifnet *, struct ifmediareq *); static int igb_media_change(struct ifnet *); static void igb_identify_hardware(struct adapter *); static int igb_allocate_pci_resources(struct adapter *); static int igb_allocate_msix(struct adapter *); static int igb_allocate_legacy(struct adapter *); static int igb_setup_msix(struct adapter *); static void igb_free_pci_resources(struct adapter *); static void igb_local_timer(void *); static void igb_reset(struct adapter *); static int igb_setup_interface(device_t, struct adapter *); static int igb_allocate_queues(struct adapter *); static void igb_configure_queues(struct adapter *); static int igb_allocate_transmit_buffers(struct tx_ring *); static void igb_setup_transmit_structures(struct adapter *); static void igb_setup_transmit_ring(struct tx_ring *); static void igb_initialize_transmit_units(struct adapter *); static void igb_free_transmit_structures(struct adapter *); static void igb_free_transmit_buffers(struct tx_ring *); static int igb_allocate_receive_buffers(struct rx_ring *); static int igb_setup_receive_structures(struct adapter *); static int igb_setup_receive_ring(struct rx_ring *); static void igb_initialize_receive_units(struct adapter *); static void igb_free_receive_structures(struct adapter *); static void igb_free_receive_buffers(struct rx_ring *); static void igb_free_receive_ring(struct rx_ring *); static void igb_enable_intr(struct adapter *); static void igb_disable_intr(struct adapter *); static void igb_update_stats_counters(struct adapter *); static bool igb_txeof(struct tx_ring *); static __inline void igb_rx_discard(struct rx_ring *, int); static __inline void igb_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); static bool igb_rxeof(struct igb_queue *, int, int *); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *); static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *); static void igb_set_promisc(struct adapter *); static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); static void igb_update_link_status(struct adapter *); static void igb_refresh_mbufs(struct rx_ring *, int); static void igb_register_vlan(void *, struct ifnet *, u16); static void igb_unregister_vlan(void *, struct ifnet *, u16); static void igb_setup_vlan_hw_support(struct adapter *); static int igb_xmit(struct tx_ring *, struct mbuf **); static int igb_dma_malloc(struct adapter *, bus_size_t, struct igb_dma_alloc *, int); static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void igb_print_nvm_info(struct adapter *); static int igb_is_valid_ether_addr(u8 *); static void igb_add_hw_stats(struct adapter *); static void igb_vf_init_stats(struct adapter *); static void igb_update_vf_stats_counters(struct adapter *); /* Management and WOL Support */ static void igb_init_manageability(struct adapter *); static void igb_release_manageability(struct adapter *); static void igb_get_hw_control(struct adapter *); static void igb_release_hw_control(struct adapter *); static void igb_enable_wakeup(device_t); static void igb_led_func(void *, int); static int igb_irq_fast(void *); static void igb_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); /* These are MSIX only irq handlers */ static void igb_msix_que(void *); static void igb_msix_link(void *); #ifdef DEVICE_POLLING static poll_handler_t igb_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, igb_probe), DEVMETHOD(device_attach, igb_attach), DEVMETHOD(device_detach, igb_detach), DEVMETHOD(device_shutdown, igb_shutdown), DEVMETHOD(device_suspend, igb_suspend), DEVMETHOD(device_resume, igb_resume), {0, 0} }; static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); /********************************************************************* * Tunable default values. *********************************************************************/ /* Descriptor defaults */ static int igb_rxd = IGB_DEFAULT_RXD; static int igb_txd = IGB_DEFAULT_TXD; TUNABLE_INT("hw.igb.rxd", &igb_rxd); TUNABLE_INT("hw.igb.txd", &igb_txd); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int igb_enable_aim = TRUE; TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int igb_enable_msix = 1; TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix); /* - * Header split has seemed to be beneficial in - * many circumstances tested, however there have - * been some stability issues, so the default is - * off. - */ +** Tuneable Interrupt rate +*/ +static int igb_max_interrupt_rate = 8000; +TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate); + +/* +** Header split causes the packet header to +** be dma'd to a seperate mbuf from the payload. +** this can have memory alignment benefits. But +** another plus is that small packets often fit +** into the header and thus use no cluster. Its +** a very workload dependent type feature. +*/ static bool igb_header_split = FALSE; TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); /* ** This will autoconfigure based on ** the number of CPUs if left at 0. */ static int igb_num_queues = 0; TUNABLE_INT("hw.igb.num_queues", &igb_num_queues); /* How many packets rxeof tries to clean at a time */ static int igb_rx_process_limit = 100; TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit); /* Flow control setting - default to FULL */ static int igb_fc_setting = e1000_fc_full; TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); -/* -** Shadow VFTA table, this is needed because -** the real filter table gets cleared during -** a soft reset and the driver needs to be able -** to repopulate it. -*/ -static u32 igb_shadow_vfta[IGB_VFTA_SIZE]; - - /********************************************************************* * Device identification routine * * igb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int igb_probe(device_t dev) { char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; igb_vendor_info_t *ent; INIT_DEBUGOUT("igb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IGB_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = igb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", igb_strings[ent->index], igb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int igb_attach(device_t dev) { struct adapter *adapter; int error = 0; u16 eeprom_data; INIT_DEBUGOUT("igb_attach: begin"); adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, &igb_fc_setting, 0, "Flow Control"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, &igb_enable_aim, 1, "Interrupt Moderation"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ igb_identify_hardware(adapter); /* Setup PCI resources */ if (igb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Sysctls for limiting the amount of work done in the taskqueue */ igb_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, igb_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", IGB_DEFAULT_TXD, igb_txd); adapter->num_tx_desc = IGB_DEFAULT_TXD; } else adapter->num_tx_desc = igb_txd; if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", IGB_DEFAULT_RXD, igb_rxd); adapter->num_rx_desc = IGB_DEFAULT_RXD; } else adapter->num_rx_desc = igb_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; /* ** Allocate and Setup Queues */ if (igb_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate the appropriate stats memory */ if (adapter->hw.mac.type == e1000_vfadapt) { adapter->stats = (struct e1000_vf_stats *)malloc(sizeof \ (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); igb_vf_init_stats(adapter); } else adapter->stats = (struct e1000_hw_stats *)malloc(sizeof \ (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->stats == NULL) { device_printf(dev, "Can not allocate stats memory\n"); error = ENOMEM; goto err_late; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* ** Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } /* Check its sanity */ if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* ** Configure Interrupts */ if ((adapter->msix > 1) && (igb_enable_msix)) error = igb_allocate_msix(adapter); else /* MSI or Legacy */ error = igb_allocate_legacy(adapter); if (error) goto err_late; /* Setup OS specific network interface */ if (igb_setup_interface(dev, adapter) != 0) goto err_late; /* Now get a good starting state */ igb_reset(adapter); /* Initialize statistics */ igb_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Determine if we have to control management hardware */ adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); /* * Setup Wake-on-Lan */ /* APME bit in EEPROM is mapped to WUC.APME */ eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; if (eeprom_data) adapter->wol = E1000_WUFC_MAG; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); igb_add_hw_stats(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); adapter->led_dev = led_create(igb_led_func, adapter, device_get_nameunit(dev)); INIT_DEBUGOUT("igb_attach: end"); return (0); err_late: igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); igb_release_hw_control(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); err_pci: igb_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int igb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("igb_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif IGB_CORE_LOCK(adapter); adapter->in_detach = 1; igb_stop(adapter); IGB_CORE_UNLOCK(adapter); e1000_phy_hw_reset(&adapter->hw); /* Give control back to firmware */ igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); igb_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int igb_shutdown(device_t dev) { return igb_suspend(dev); } /* * Suspend/resume device methods. */ static int igb_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); IGB_CORE_LOCK(adapter); igb_stop(adapter); igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } IGB_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int igb_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); igb_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_start(ifp); IGB_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } /********************************************************************* * Transmit entry point * * igb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; IGB_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (igb_xmit(txr, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_time = ticks; - txr->watchdog_check = TRUE; + txr->queue_status = IGB_QUEUE_WORKING; } } /* * Legacy TX driver routine, called from the * stack, always uses tx[0], and spins for it. * Should not be used with multiqueue tx */ static void igb_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_TX_LOCK(txr); igb_start_locked(txr, ifp); IGB_TX_UNLOCK(txr); } return; } #if __FreeBSD_version >= 800000 /* ** Multiqueue Transmit driver ** */ static int igb_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; int i = 0, err = 0; /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; if (IGB_TX_TRYLOCK(txr)) { err = igb_mq_start_locked(ifp, txr, m); IGB_TX_UNLOCK(txr); } else { err = drbr_enqueue(ifp, txr->br, m); taskqueue_enqueue(que->tq, &que->que_task); } return (err); } static int igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq; IGB_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || adapter->link_active == 0) { if (m != NULL) err = drbr_enqueue(ifp, txr->br, m); return (err); } /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); enq = 0; if (m == NULL) { next = drbr_dequeue(ifp, txr->br); } else if (drbr_needs_enqueue(ifp, txr->br)) { if ((err = drbr_enqueue(ifp, txr->br, m)) != 0) return (err); next = drbr_dequeue(ifp, txr->br); } else next = m; /* Process the queue */ while (next != NULL) { if ((err = igb_xmit(txr, &next)) != 0) { if (next != NULL) err = drbr_enqueue(ifp, txr->br, next); break; } enq++; drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } next = drbr_dequeue(ifp, txr->br); } if (enq > 0) { /* Set the watchdog */ - txr->watchdog_check = TRUE; + txr->queue_status = IGB_QUEUE_WORKING; txr->watchdog_time = ticks; } return (err); } /* ** Flush all ring buffers */ static void igb_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IGB_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* __FreeBSD_version >= 800000 */ /********************************************************************* * Ioctl entry point * * igb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { /* * XXX * Since resetting hardware takes a very long time * and results in link renegotiation we only * initialize the hardware only when it is absolutely * required. */ ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { IGB_CORE_LOCK(adapter); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); } if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); IGB_CORE_LOCK(adapter); max_frame_size = 9234; if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { IGB_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); IGB_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { igb_disable_promisc(adapter); igb_set_promisc(adapter); } } else igb_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) igb_stop(adapter); adapter->if_flags = ifp->if_flags; IGB_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); igb_set_multi(adapter); #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif igb_enable_intr(adapter); IGB_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: + /* + ** As the speed/duplex settings are being + ** changed, we need toreset the PHY. + */ + adapter->hw.phy.reset_disable = FALSE; /* Check SOL/IDER usage */ IGB_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { IGB_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } IGB_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(igb_poll, ifp); if (error) return (error); IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ IGB_CORE_LOCK(adapter); igb_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; reinit = 1; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void igb_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("igb_init: begin"); IGB_CORE_LOCK_ASSERT(adapter); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); igb_reset(adapter); igb_update_link_status(adapter); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - /* Use real VLAN Filter support? */ - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - /* Use real VLAN Filter support */ - igb_setup_vlan_hw_support(adapter); - else { - u32 ctrl; - ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); - ctrl |= E1000_CTRL_VME; - E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); - } - } - /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type == e1000_82576) ifp->if_hwassist |= CSUM_SCTP; #endif } if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; /* Configure for OS presence */ igb_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ igb_setup_transmit_structures(adapter); igb_initialize_transmit_units(adapter); /* Setup Multicast table */ igb_set_multi(adapter); /* ** Figure out the desired mbuf pool ** for doing jumbo/packetsplit */ - if (ifp->if_mtu > ETHERMTU) + if (adapter->max_frame_size <= 2048) + adapter->rx_mbuf_sz = MCLBYTES; + else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else - adapter->rx_mbuf_sz = MCLBYTES; + adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (igb_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); return; } igb_initialize_receive_units(adapter); + /* Use real VLAN Filter support? */ + if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + /* Use real VLAN Filter support */ + igb_setup_vlan_hw_support(adapter); + else { + u32 ctrl; + ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); + ctrl |= E1000_CTRL_VME; + E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); + } + } + /* Don't lose promiscuous settings */ igb_set_promisc(adapter); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); if (adapter->msix > 1) /* Set up queue routing */ igb_configure_queues(adapter); - /* Set up VLAN tag offload and filter */ - igb_setup_vlan_hw_support(adapter); - /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) igb_disable_intr(adapter); else #endif /* DEVICE_POLLING */ { igb_enable_intr(adapter); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); } /* Don't reset the phy next time init gets called */ adapter->hw.phy.reset_disable = TRUE; } static void igb_init(void *arg) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_que(void *context, int pending) { struct igb_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more; more = igb_rxeof(que, -1, NULL); IGB_TX_LOCK(txr); if (igb_txeof(txr)) more = TRUE; #if __FreeBSD_version >= 800000 if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) return; #endif /* Reenable this interrupt */ if (que->eims) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); else igb_enable_intr(adapter); } /* Deal with link in a sleepable context */ static void igb_handle_link(void *context, int pending) { struct adapter *adapter = context; adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); } /********************************************************************* * * MSI/Legacy Deferred * Interrupt Service routine * *********************************************************************/ static int igb_irq_fast(void *arg) { struct adapter *adapter = arg; struct igb_queue *que = adapter->queues; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); taskqueue_enqueue(que->tq, &que->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) taskqueue_enqueue(que->tq, &adapter->link_task); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine : if using this code you MUST be sure that * multiqueue is not defined, ie, set igb_num_queues to 1. * *********************************************************************/ #if __FreeBSD_version >= 800000 #define POLL_RETURN_COUNT(a) (a) static int #else #define POLL_RETURN_COUNT(a) static void #endif igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; u32 reg_icr, rx_done = 0; u32 loop = IGB_MAX_LOOP; bool more; IGB_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { IGB_CORE_UNLOCK(adapter); return POLL_RETURN_COUNT(rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) igb_handle_link(adapter, 0); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; } IGB_CORE_UNLOCK(adapter); igb_rxeof(que, count, &rx_done); IGB_TX_LOCK(txr); do { more = igb_txeof(txr); } while (loop-- && more); #if __FreeBSD_version >= 800000 if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); return POLL_RETURN_COUNT(rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * MSIX TX Interrupt Service routine * **********************************************************************/ static void igb_msix_que(void *arg) { struct igb_queue *que = arg; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; u32 newitr = 0; bool more_tx, more_rx; E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); ++que->irqs; IGB_TX_LOCK(txr); more_tx = igb_txeof(txr); IGB_TX_UNLOCK(txr); more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); if (igb_enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) E1000_WRITE_REG(&adapter->hw, E1000_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; /* Used half Default if sub-gig */ if (adapter->link_speed != 1000) newitr = IGB_DEFAULT_ITR / 2; else { if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); } newitr &= 0x7FFC; /* Mask invalid bits */ if (adapter->hw.mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: /* Schedule a clean task if needed*/ if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); return; } /********************************************************************* * * MSIX Link Interrupt Service routine * **********************************************************************/ static void igb_msix_link(void *arg) { struct adapter *adapter = arg; u32 icr; ++adapter->link_irq; icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; igb_handle_link(adapter, 0); spurious: /* Rearm */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("igb_media_status: begin"); IGB_CORE_LOCK(adapter); igb_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IGB_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) ifmr->ifm_active |= fiber_type | IFM_FDX; else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } IGB_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int igb_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("igb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); IGB_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } - /* As the speed/duplex settings my have changed we need to - * reset the PHY. - */ - adapter->hw.phy.reset_disable = FALSE; - igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to Advanced TX descriptors. * used by the 82575 adapter. * **********************************************************************/ static int igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[IGB_MAX_SCATTER]; bus_dmamap_t map; struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped; union e1000_adv_tx_desc *txd = NULL; struct mbuf *m_head; u32 olinfo_status = 0, cmd_type_len = 0; int nsegs, i, j, error, first, last = 0; u32 hdrlen = 0; m_head = *m_headp; /* Set basic descriptor constants */ cmd_type_len |= E1000_ADVTXD_DTYP_DATA; cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; if (m_head->m_flags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; /* * Force a cleanup if number of TX descriptors * available hits the threshold */ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) { igb_txeof(txr); /* Now do we at least have a minimal? */ if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) { txr->no_desc_avail++; return (ENOBUFS); } } /* * Map the packet for DMA. * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m; m = m_defrag(*m_headp, M_DONTWAIT); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again */ error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error == ENOMEM) { adapter->no_tx_dma_setup++; return (error); } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } else if (error == ENOMEM) { adapter->no_tx_dma_setup++; return (error); } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* Check again to be sure we have enough descriptors */ if (nsegs > (txr->tx_avail - 2)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* * Set up the context descriptor: * used when any hardware offload is done. * This includes CSUM, VLAN, and TSO. It * will use the first descriptor. */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { if (igb_tso_setup(txr, m_head, &hdrlen)) { cmd_type_len |= E1000_ADVTXD_DCMD_TSE; olinfo_status |= E1000_TXD_POPTS_IXSM << 8; olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } else return (ENXIO); } else if (igb_tx_ctx_setup(txr, m_head)) olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* Calculate payload length */ olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) << E1000_ADVTXD_PAYLEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; /* Set up our transmit descriptors */ i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; tx_buffer = &txr->tx_buffers[i]; txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; txd->read.buffer_addr = htole64(seg_addr); txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); txd->read.olinfo_status = htole32(olinfo_status); last = i; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } txr->next_avail_desc = i; txr->tx_avail -= nsegs; tx_buffer->m_head = m_head; tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; txr->watchdog_time = ticks; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); ++txr->tx_packets; return (0); } static void igb_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 reg; if (hw->mac.type == e1000_vfadapt) { e1000_promisc_set_vf(hw, e1000_promisc_enabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } else if (ifp->if_flags & IFF_ALLMULTI) { reg |= E1000_RCTL_MPE; reg &= ~E1000_RCTL_UPE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } } static void igb_disable_promisc(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; if (hw->mac.type == e1000_vfadapt) { e1000_promisc_set_vf(hw, e1000_promisc_disabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); reg &= (~E1000_RCTL_UPE); reg &= (~E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void igb_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; int mcnt = 0; IOCTL_DEBUGOUT("igb_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); } /********************************************************************* * Timer routine: * This routine checks for link status, * updates statistics, and does the watchdog. * **********************************************************************/ static void igb_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; IGB_CORE_LOCK_ASSERT(adapter); igb_update_link_status(adapter); igb_update_stats_counters(adapter); /* ** If flow control has paused us since last checking ** it invalidates the watchdog timing, so dont run it. */ if (adapter->pause_frames) { adapter->pause_frames = 0; goto out; } /* ** Watchdog: check for time since any descriptor was cleaned */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IGB_TX_LOCK(txr); - if ((txr->watchdog_check == FALSE) || - (txr->tx_avail == adapter->num_tx_desc)) { - IGB_TX_UNLOCK(txr); - continue; - } - if ((ticks - txr->watchdog_time) > IGB_WATCHDOG) + for (int i = 0; i < adapter->num_queues; i++, txr++) + if (txr->queue_status == IGB_QUEUE_HUNG) goto timeout; - IGB_TX_UNLOCK(txr); - } - out: callout_reset(&adapter->timer, hz, igb_local_timer, adapter); return; timeout: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; - IGB_TX_UNLOCK(txr); igb_init_locked(adapter); } static void igb_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check = 0; /* Get the cached link value or read for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* Fall thru */ default: break; } /* Now we check if a transition has happened */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; ifp->if_baudrate = adapter->link_speed * 1000000; /* This can sleep */ if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* This can sleep */ if_link_state_change(ifp, LINK_STATE_DOWN); /* Turn off watchdogs */ for (int i = 0; i < adapter->num_queues; i++, txr++) - txr->watchdog_check = FALSE; + txr->queue_status = IGB_QUEUE_IDLE; } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void igb_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; IGB_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("igb_stop: begin"); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* Unarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); - txr->watchdog_check = FALSE; + txr->queue_status = IGB_QUEUE_IDLE; IGB_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void igb_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) && (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) { INIT_DEBUGOUT("Memory Access and/or Bus Master " "bits were not set!\n"); adapter->hw.bus.pci_cmd_word |= (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN); pci_write_config(dev, PCIR_COMMAND, adapter->hw.bus.pci_cmd_word, 2); } /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MAC type early for PCI setup */ e1000_set_mac_type(&adapter->hw); } static int igb_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->pci_mem == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->num_queues = 1; /* Defaults for Legacy or MSI */ /* This will setup either MSI/X or MSI */ adapter->msix = igb_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; int error, rid = 0; /* Turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* MSI RID is 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); /* Make tasklet for deferred link handling */ TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); que->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Queue Interrupt handlers: * **********************************************************************/ static int igb_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; int error, rid, vector = 0; for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector +1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Queue Interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register Queue handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; if (adapter->hw.mac.type == e1000_82575) que->eims = E1000_EICR_TX_QUEUE0 << i; else que->eims = 1 << vector; /* ** Bind the msix vector, and thus the ** rings to the corresponding cpu. */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create_fast("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* And Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Link Interrupt\n"); return (ENXIO); } if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register Link handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; return (0); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igb_queue *que; - u32 tmp, ivar = 0; - u32 newitr = IGB_DEFAULT_ITR; + u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type > e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_vfadapt: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->eims_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; adapter->eims_mask |= adapter->link_mask; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->eims_mask |= que->eims; } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->eims_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; adapter->eims_mask |= adapter->link_mask; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, que->eims); adapter->eims_mask |= que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; adapter->eims_mask |= adapter->link_mask; default: break; } /* Set the starting interrupt rate */ + if (igb_max_interrupt_rate > 0) + newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; + if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); } return; } static void igb_free_pci_resources(struct adapter *adapter) { struct igb_queue *que = adapter->queues; device_t dev = adapter->dev; int rid; /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* * First release all the interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); } /* * Setup Either MSI/X or MSI */ static int igb_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want, queues, msgs; /* tuneable override */ if (igb_enable_msix == 0) goto msi; /* First try MSI/X */ rid = PCIR_BAR(IGB_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!adapter->msix_mem) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } msgs = pci_msix_count(dev); if (msgs == 0) { /* system has msix disabled */ bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; goto msi; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; /* Manual override */ if (igb_num_queues != 0) queues = igb_num_queues; + if (queues > 8) /* max queues */ + queues = 8; /* Can have max of 4 queues on 82575 */ if ((adapter->hw.mac.type == e1000_82575) && (queues > 4)) queues = 4; /* Limit the VF adapter to one queue */ if (adapter->hw.mac.type == e1000_vfadapt) queues = 1; /* ** One vector (RX/TX pair) per queue ** plus an additional for Link interrupt */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors configured, but %d queues wanted!\n", msgs, want); return (ENXIO); } if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } msi: msgs = pci_msi_count(dev); if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) device_printf(adapter->dev,"Using MSI interrupt\n"); return (msgs); } /********************************************************************* * * Set up an fresh starting state * **********************************************************************/ static void igb_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; u32 pba = 0; u16 hwm; INIT_DEBUGOUT("igb_reset: begin"); /* Let the firmware know the OS is in control */ igb_get_hw_control(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_PBA_64K; break; case e1000_82580: pba = E1000_PBA_35K; default: break; } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } INIT_DEBUGOUT1("igb_init: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); if (hw->mac.type < e1000_82576) { fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ fc->low_water = fc->high_water - 8; } else { fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; } fc->pause_time = IGB_FC_PAUSE_TIME; fc->send_xon = TRUE; /* Set Flow control, use the tunable location if sane */ - if ((igb_fc_setting >= 0) || (igb_fc_setting < 4)) + if ((igb_fc_setting >= 0) && (igb_fc_setting < 4)) fc->requested_mode = igb_fc_setting; else fc->requested_mode = e1000_fc_none; fc->current_mode = fc->requested_mode; /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); if (e1000_init_hw(hw) < 0) device_printf(dev, "Hardware Initialization Failed\n"); if (hw->mac.type == e1000_82580) { u32 reg; hwm = (pba << 10) - (2 * adapter->max_frame_size); /* * 0x80000000 - enable DMA COAL * 0x10000000 - use L0s as low power * 0x20000000 - use L1 as low power * X << 16 - exit dma coal when rx data exceeds X kB * Y - upper limit to stay in dma coal in units of 32usecs */ E1000_WRITE_REG(hw, E1000_DMACR, 0xA0000006 | ((hwm << 6) & 0x00FF0000)); /* set hwm to PBA - 2 * max frame size */ E1000_WRITE_REG(hw, E1000_FCRTC, hwm); /* * This sets the time to wait before requesting transition to * low power state to number of usecs needed to receive 1 512 * byte frame at gigabit line rate */ E1000_WRITE_REG(hw, E1000_DMCTLX, 4); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (20480 - (2 * adapter->max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg | E1000_PCIEMISC_LX_DECISION); } E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int igb_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("igb_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_init = igb_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = igb_ioctl; ifp->if_start = igb_start; #if __FreeBSD_version >= 800000 ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; #endif IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capabilities |= IFCAP_JUMBO_MTU; - if (igb_header_split) - ifp->if_capabilities |= IFCAP_LRO; - ifp->if_capenable = ifp->if_capabilities; + + /* Don't enable LRO by default */ + ifp->if_capabilities |= IFCAP_LRO; + #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we * support full VLAN capability. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; /* ** Dont turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, igb_media_change, igb_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int igb_dma_malloc(struct adapter *adapter, bus_size_t size, struct igb_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ IGB_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (error); } static void igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_map = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int igb_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = NULL; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct igb_queue *) malloc(sizeof(struct igb_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* Next allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (igb_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (igb_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) igb_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int igb_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct igb_tx_buffer *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IGB_TSO_SIZE, /* maxsize */ IGB_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ igb_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void igb_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buffer *txbuf; int i; /* Clear the old descriptor contents */ IGB_TX_LOCK(txr); bzero((void *)txr->tx_base, (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } /* clear the watch index */ txbuf->next_eop = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IGB_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void igb_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) igb_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void igb_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl; INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); tctl = txdctl = 0; /* Setup the Tx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(hw, E1000_TDBAL(i)), E1000_READ_REG(hw, E1000_TDLEN(i))); - txr->watchdog_check = FALSE; + txr->queue_status = IGB_QUEUE_IDLE; txdctl |= IGB_TX_PTHRESH; txdctl |= IGB_TX_HTHRESH << 8; txdctl |= IGB_TX_WTHRESH << 16; txdctl |= E1000_TXDCTL_QUEUE_ENABLE; E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } if (adapter->hw.mac.type == e1000_vfadapt) return; + e1000_config_collision_dist(hw); + /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); - e1000_config_collision_dist(hw); - /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void igb_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); igb_free_transmit_buffers(txr); igb_dma_free(adapter, &txr->txdma); IGB_TX_UNLOCK(txr); IGB_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void igb_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buffer *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static boolean_t igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; struct igb_tx_buffer *tx_buffer; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0; u16 vtag = 0; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; struct ip *ip; struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ehdrlen = ETHER_HDR_LEN; /* Ensure we have at least the IP+TCP header in the first mbuf. */ if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr)) return FALSE; /* Only supports IPV4 for now */ ctxd = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[ctxd]; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return FALSE; /* 0 */ ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); tcp_hlen = th->th_off << 2; /* * Calculate header length, this is used * in the transmit desc in igb_xmit */ *hdrlen = ehdrlen + ip_hlen + tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT); vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens |= htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++ctxd == adapter->num_tx_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; return TRUE; } /********************************************************************* * * Context Descriptor setup for VLAN or CSUM * **********************************************************************/ static bool igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; struct igb_tx_buffer *tx_buffer; u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; struct ether_vlan_header *eh; struct ip *ip = NULL; struct ip6_hdr *ip6; int ehdrlen, ctxd, ip_hlen = 0; u16 etype, vtag = 0; u8 ipproto = 0; bool offload = TRUE; if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; ctxd = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[ctxd]; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor, thus ** we need to be here just for that setup. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) return FALSE; /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; if (mp->m_len < ehdrlen + ip_hlen) { offload = FALSE; break; } ipproto = ip->ip_p; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); if (mp->m_len < ehdrlen + ip_hlen) return (FALSE); ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & CSUM_UDP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & CSUM_SCTP) type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens |= htole32(vlan_macip_lens); TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; /* We've consumed the first desc, adjust counters */ if (++ctxd == adapter->num_tx_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (offload); } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * * TRUE return means there's work in the ring to clean, FALSE its empty. **********************************************************************/ static bool igb_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; - int first, last, done; + int first, last, done, processed; struct igb_tx_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; struct ifnet *ifp = adapter->ifp; IGB_TX_LOCK_ASSERT(txr); - if (txr->tx_avail == adapter->num_tx_desc) + if (txr->tx_avail == adapter->num_tx_desc) { + txr->queue_status = IGB_QUEUE_IDLE; return FALSE; + } + processed = 0; first = txr->next_to_clean; tx_desc = &txr->tx_base[first]; tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; + ++processed; if (tx_buffer->m_head) { txr->bytes += tx_buffer->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; txr->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = &txr->tx_base[first]; } ++txr->packets; ++ifp->if_opackets; /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; + /* + ** Watchdog calculation, we know there's + ** work outstanding or the first return + ** would have been taken, so none processed + ** for too long indicates a hang. + */ + if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) + txr->queue_status = IGB_QUEUE_HUNG; + /* * If we have enough room, clear IFF_DRV_OACTIVE * to tell the stack that it is OK to send packets. */ if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* All clean, turn off the watchdog */ if (txr->tx_avail == adapter->num_tx_desc) { - txr->watchdog_check = FALSE; + txr->queue_status = IGB_QUEUE_IDLE; return (FALSE); } } return (TRUE); } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void igb_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct igb_rx_buf *rxbuf; struct mbuf *mh, *mp; int i, nsegs, error, cleaned; i = rxr->next_to_refresh; cleaned = -1; /* Signify no completions */ while (i != limit) { rxbuf = &rxr->rx_buffers[i]; - if ((rxbuf->m_head == NULL) && (rxr->hdr_split)) { + /* No hdr mbuf used with header split off */ + if (rxr->hdr_split == FALSE) + goto no_split; + if (rxbuf->m_head == NULL) { mh = m_gethdr(M_DONTWAIT, MT_DATA); if (mh == NULL) goto update; - mh->m_pkthdr.len = mh->m_len = MHLEN; - mh->m_len = MHLEN; - mh->m_flags |= M_PKTHDR; - m_adj(mh, ETHER_ALIGN); - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->htag, - rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("GET BUF: dmamap load" - " failure - %d\n", error); - m_free(mh); - goto update; - } - rxbuf->m_head = mh; - bus_dmamap_sync(rxr->htag, rxbuf->hmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.hdr_addr = - htole64(hseg[0].ds_addr); - } + } else + mh = rxbuf->m_head; + mh->m_pkthdr.len = mh->m_len = MHLEN; + mh->m_len = MHLEN; + mh->m_flags |= M_PKTHDR; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->htag, + rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("Refresh mbufs: hdr dmamap load" + " failure - %d\n", error); + m_free(mh); + rxbuf->m_head = NULL; + goto update; + } + rxbuf->m_head = mh; + bus_dmamap_sync(rxr->htag, rxbuf->hmap, + BUS_DMASYNC_PREREAD); + rxr->rx_base[i].read.hdr_addr = + htole64(hseg[0].ds_addr); +no_split: if (rxbuf->m_pack == NULL) { mp = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) goto update; - mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("GET BUF: dmamap load" - " failure - %d\n", error); - m_free(mp); - goto update; - } - rxbuf->m_pack = mp; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.pkt_addr = - htole64(pseg[0].ds_addr); + } else + mp = rxbuf->m_pack; + + mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("Refresh mbufs: payload dmamap load" + " failure - %d\n", error); + m_free(mp); + rxbuf->m_pack = NULL; + goto update; } + rxbuf->m_pack = mp; + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_PREREAD); + rxr->rx_base[i].read.pkt_addr = + htole64(pseg[0].ds_addr); cleaned = i; /* Calculate next index */ if (++i == adapter->num_rx_desc) i = 0; /* This is the work marker for refresh */ rxr->next_to_refresh = i; } update: if (cleaned != -1) /* If we refreshed some, bump tail */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), cleaned); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int igb_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct igb_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; if (!(rxr->rx_buffers = (struct igb_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ - MJUMPAGESIZE, /* maxsize */ + MJUM9BYTES, /* maxsize */ 1, /* nsegments */ - MJUMPAGESIZE, /* maxsegsize */ + MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX payload DMA tag\n"); goto fail; } for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->htag, BUS_DMA_NOWAIT, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head DMA maps\n"); goto fail; } error = bus_dmamap_create(rxr->ptag, BUS_DMA_NOWAIT, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX packet DMA maps\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ igb_free_receive_structures(adapter); return (error); } static void igb_free_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct igb_rx_buf *rxbuf; int i; adapter = rxr->adapter; for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int igb_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct igb_rx_buf *rxbuf; bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; adapter = rxr->adapter; dev = adapter->dev; ifp = adapter->ifp; /* Clear the ring contents */ IGB_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* ** Free current RX buffer structures and their mbufs */ igb_free_receive_ring(rxr); /* Configure for header split? */ if (igb_header_split) rxr->hdr_split = TRUE; /* Now replenish the ring mbufs */ - for (int j = 0; j != adapter->num_rx_desc; ++j) { + for (int j = 0; j < adapter->num_rx_desc; ++j) { struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(rxbuf->m_head, ETHER_ALIGN); mh = rxbuf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, rxbuf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; rxr->rx_bytes = 0; rxr->fmp = NULL; rxr->lmp = NULL; rxr->discard = FALSE; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface, we ** also only do head split when LRO ** is enabled, since so often they ** are undesireable in similar setups. */ if (ifp->if_capenable & IFCAP_LRO) { - int err = tcp_lro_init(lro); - if (err) { + error = tcp_lro_init(lro); + if (error) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IGB_RX_UNLOCK(rxr); return (0); fail: igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int igb_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int i; for (i = 0; i < adapter->num_queues; i++, rxr++) if (igb_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'i' is the endpoint. */ for (int j = 0; j > i; ++j) { rxr = &adapter->rx_rings[i]; + IGB_RX_LOCK(rxr); igb_free_receive_ring(rxr); + IGB_RX_UNLOCK(rxr); } return (ENOBUFS); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void igb_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, psize, srrctl = 0; INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* ** Set up for header split */ if (rxr->hdr_split) { /* Use a standard mbuf for the header */ srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; /* ** Set up for jumbo frames */ if (ifp->if_mtu > ETHERMTU) { rctl |= E1000_RCTL_LPE; - srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; - rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; - + if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { + srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; + } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { + srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; + rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; + } /* Set maximum packet len */ psize = adapter->max_frame_size; /* are we on a vlan? */ if (adapter->ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { rctl &= ~E1000_RCTL_LPE; srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 bus_addr = rxr->rxdma.dma_paddr; u32 rxdctl; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } /* ** Setup for RX MultiQueue */ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (adapter->num_queues >1) { u32 random[10], mrqc, shift = 0; union igb_reta { u32 dword; u8 bytes[4]; } reta; arc4rand(&random, sizeof(random), 0); if (adapter->hw.mac.type == e1000_82575) shift = 6; /* Warning FM follows */ for (int i = 0; i < 128; i++) { reta.bytes[i & 3] = (i % adapter->num_queues) << shift; if ((i & 3) == 3) E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword); } /* Now fill in hash table */ mrqc = E1000_MRQC_ENABLE_RSS_4Q; for (int i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, random[i]); mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); /* ** NOTE: Receive Full-Packet Checksum Offload ** is mutually exclusive with Multiqueue. However ** this is not the same as TCP/IP checksums which ** still work. */ rxcsum |= E1000_RXCSUM_PCSD; #if __FreeBSD_version >= 800000 /* For SCTP Offload */ if ((hw->mac.type == e1000_82576) && (ifp->if_capenable & IFCAP_RXCSUM)) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else { /* Non RSS setup */ if (ifp->if_capenable & IFCAP_RXCSUM) { rxcsum |= E1000_RXCSUM_IPPCSE; #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type == e1000_82576) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; } E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip CRC bytes. */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; /* Don't store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Receives */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and Tail Descriptor Pointers * - needs to be after enable */ for (int i = 0; i < adapter->num_queues; i++) { E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1); } return; } /********************************************************************* * * Free receive rings. * **********************************************************************/ static void igb_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; igb_free_receive_buffers(rxr); tcp_lro_free(lro); igb_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures. * **********************************************************************/ static void igb_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; int i; INIT_DEBUGOUT("free_receive_structures: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; if (rxbuf->hmap != NULL) { bus_dmamap_destroy(rxr->htag, rxbuf->hmap); rxbuf->hmap = NULL; } if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } } static __inline void igb_rx_discard(struct rx_ring *rxr, int i) { - struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rbuf; - struct mbuf *mh, *mp; rbuf = &rxr->rx_buffers[i]; + + /* Partially received? Free the chain */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } - mh = rbuf->m_head; - mp = rbuf->m_pack; + /* + ** With advanced descriptors the writeback + ** clobbers the buffer addrs, so its easier + ** to just free the existing mbufs and take + ** the normal refresh path to get new buffers + ** and mapping. + */ + if (rbuf->m_head) { + m_free(rbuf->m_head); + rbuf->m_head = NULL; + } - /* Reuse loaded DMA map and just update mbuf chain */ - if (mh) { /* with no hdr split would be null */ - mh->m_len = MHLEN; - mh->m_flags |= M_PKTHDR; - mh->m_next = NULL; + if (rbuf->m_pack) { + m_free(rbuf->m_pack); + rbuf->m_pack = NULL; } - mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz; - mp->m_data = mp->m_ext.ext_buf; - mp->m_next = NULL; return; } static __inline void igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IGB_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IGB_RX_LOCK(rxr); } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE if more to clean, FALSE otherwise *********************************************************************/ static bool igb_rxeof(struct igb_queue *que, int count, int *done) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; int i, processed = 0, rxdone = 0; u32 ptype, staterr = 0; union e1000_adv_rx_desc *cur; IGB_RX_LOCK(rxr); /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Main clean loop */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; struct igb_rx_buf *rxbuf; u16 hlen, plen, hdr, vtag; bool eop = FALSE; cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = mh = mp = NULL; cur->wb.upper.status_error = 0; rxbuf = &rxr->rx_buffers[i]; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; vtag = le16toh(cur->wb.upper.vlan); hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); /* Make sure all segments of a bad packet are discarded */ if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) || (rxr->discard)) { ifp->if_ierrors++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ rxr->discard = TRUE; else rxr->discard = FALSE; igb_rx_discard(rxr, i); goto next_desc; } /* ** The way the hardware is configured to ** split, it will ONLY use the header buffer ** when header split is enabled, otherwise we ** get normal behavior, ie, both header and ** payload are DMA'd into the payload buffer. ** ** The fmp test is to catch the case where a ** packet spans multiple descriptors, in that ** case only the first header is valid. */ if (rxr->hdr_split && rxr->fmp == NULL) { hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IGB_HDR_BUF) hlen = IGB_HDR_BUF; - /* Handle the header mbuf */ mh = rxr->rx_buffers[i].m_head; mh->m_len = hlen; - /* clear buf info for refresh */ + /* clear buf pointer for refresh */ rxbuf->m_head = NULL; /* ** Get the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp = rxr->rx_buffers[i].m_pack; mp->m_len = plen; mh->m_next = mp; - /* clear buf info for refresh */ + /* clear buf pointer */ rxbuf->m_pack = NULL; rxr->rx_split_packets++; } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mh = rxr->rx_buffers[i].m_pack; mh->m_len = plen; /* clear buf info for refresh */ rxbuf->m_pack = NULL; } ++processed; /* So we know when to refresh */ /* Initial frame - setup */ if (rxr->fmp == NULL) { mh->m_pkthdr.len = mh->m_len; - /* Store the first mbuf */ + /* Save the head of the chain */ rxr->fmp = mh; rxr->lmp = mh; if (mp != NULL) { /* Add payload if split */ mh->m_pkthdr.len += mp->m_len; rxr->lmp = mh->m_next; } } else { /* Chain mbuf's together */ rxr->lmp->m_next = mh; rxr->lmp = rxr->lmp->m_next; rxr->fmp->m_pkthdr.len += mh->m_len; } if (eop) { rxr->fmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; rxr->rx_packets++; /* capture data for AIM */ rxr->packets++; rxr->bytes += rxr->fmp->m_pkthdr.len; rxr->rx_bytes += rxr->fmp->m_pkthdr.len; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, rxr->fmp, ptype); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { rxr->fmp->m_pkthdr.ether_vtag = vtag; rxr->fmp->m_flags |= M_VLANTAG; } #if __FreeBSD_version >= 800000 rxr->fmp->m_pkthdr.flowid = que->msix; rxr->fmp->m_flags |= M_FLOWID; #endif sendmp = rxr->fmp; /* Make sure to set M_PKTHDR. */ sendmp->m_flags |= M_PKTHDR; rxr->fmp = NULL; rxr->lmp = NULL; } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* ** Send to the stack or LRO */ if (sendmp != NULL) { rxr->next_to_check = i; igb_rx_input(rxr, ifp, sendmp, ptype); i = rxr->next_to_check; rxdone++; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { igb_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remainders */ if (processed != 0) { igb_refresh_mbufs(rxr, i); processed = 0; } rxr->next_to_check = i; /* * Flush any outstanding LRO work */ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } IGB_RX_UNLOCK(rxr); if (done != NULL) *done = rxdone; /* ** We still have cleaning to do? ** Schedule another interrupt if so. */ if ((staterr & E1000_RXD_STAT_DD) != 0) return (TRUE); return (FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); int sctp; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; #endif /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (sctp == 0) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* * This routine is run via an vlan * config EVENT */ static void igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; + IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; - igb_shadow_vfta[index] |= (1 << bit); + adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ - igb_init(adapter); + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + igb_init_locked(adapter); + IGB_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; + IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; - igb_shadow_vfta[index] &= ~(1 << bit); + adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ - igb_init(adapter); + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + igb_init_locked(adapter); + IGB_CORE_UNLOCK(adapter); } static void igb_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IGB_VFTA_SIZE; i++) - if (igb_shadow_vfta[i] != 0) { + if (adapter->shadow_vfta[i] != 0) { if (hw->mac.type == e1000_vfadapt) - e1000_vfta_set_vf(hw, igb_shadow_vfta[i], TRUE); + e1000_vfta_set_vf(hw, + adapter->shadow_vfta[i], TRUE); else E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, - i, igb_shadow_vfta[i]); + i, adapter->shadow_vfta[i]); } if (hw->mac.type == e1000_vfadapt) e1000_rlpml_set_vf(hw, adapter->max_frame_size + VLAN_TAG_SIZE); else { reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); /* Update the frame size */ E1000_WRITE_REG(&adapter->hw, E1000_RLPML, adapter->max_frame_size + VLAN_TAG_SIZE); } } static void igb_enable_intr(struct adapter *adapter) { /* With RSS set up what to auto clear */ if (adapter->msix_mem) { E1000_WRITE_REG(&adapter->hw, E1000_EIAC, adapter->eims_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, adapter->eims_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->eims_mask); E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK); } E1000_WRITE_FLUSH(&adapter->hw); return; } static void igb_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) { E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); return; } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void igb_init_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; manc2h |= 1 << 5; /* Mng Port 623 */ manc2h |= 1 << 6; /* Mng Port 664 */ E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void igb_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is loaded. * */ static void igb_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->hw.mac.type == e1000_vfadapt) return; /* Let firmware know the driver has taken over */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. * */ static void igb_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->hw.mac.type == e1000_vfadapt) return; /* Let firmware taken over control of h/w */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); } static int igb_is_valid_ether_addr(uint8_t *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* * Enable PCI Wake On Lan capability */ static void igb_enable_wakeup(device_t dev) { u16 cap, status; u8 id; /* First find the capabilities pointer*/ cap = pci_read_config(dev, PCIR_CAP_PTR, 2); /* Read the PM Capabilities */ id = pci_read_config(dev, cap, 1); if (id != PCIY_PMG) /* Something wrong */ return; /* OK, we have the power capabilities, so now get the status register */ cap += PCIR_POWER_STATUS; status = pci_read_config(dev, cap, 2); status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, cap, status, 2); return; } static void igb_led_func(void *arg, int onoff) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } IGB_CORE_UNLOCK(adapter); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void igb_update_stats_counters(struct adapter *adapter) { struct ifnet *ifp; struct e1000_hw *hw = &adapter->hw; struct e1000_hw_stats *stats; /* ** The virtual function adapter has only a ** small controlled set of stats, do only ** those and return. */ if (adapter->hw.mac.type == e1000_vfadapt) { igb_update_vf_stats_counters(adapter); return; } stats = (struct e1000_hw_stats *)adapter->stats; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { stats->symerrs += E1000_READ_REG(hw,E1000_SYMERRS); stats->sec += E1000_READ_REG(hw, E1000_SEC); } stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); stats->mpc += E1000_READ_REG(hw, E1000_MPC); stats->scc += E1000_READ_REG(hw, E1000_SCC); stats->ecol += E1000_READ_REG(hw, E1000_ECOL); stats->mcc += E1000_READ_REG(hw, E1000_MCC); stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); stats->colc += E1000_READ_REG(hw, E1000_COLC); stats->dc += E1000_READ_REG(hw, E1000_DC); stats->rlec += E1000_READ_REG(hw, E1000_RLEC); stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); stats->xoffrxc += adapter->pause_frames; stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); stats->gprc += E1000_READ_REG(hw, E1000_GPRC); stats->bprc += E1000_READ_REG(hw, E1000_BPRC); stats->mprc += E1000_READ_REG(hw, E1000_MPRC); stats->gptc += E1000_READ_REG(hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); stats->ruc += E1000_READ_REG(hw, E1000_RUC); stats->rfc += E1000_READ_REG(hw, E1000_RFC); stats->roc += E1000_READ_REG(hw, E1000_ROC); stats->rjc += E1000_READ_REG(hw, E1000_RJC); stats->tor += E1000_READ_REG(hw, E1000_TORH); stats->tot += E1000_READ_REG(hw, E1000_TOTH); stats->tpr += E1000_READ_REG(hw, E1000_TPR); stats->tpt += E1000_READ_REG(hw, E1000_TPT); stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); stats->mptc += E1000_READ_REG(hw, E1000_MPTC); stats->bptc += E1000_READ_REG(hw, E1000_BPTC); /* Interrupt Counts */ stats->iac += E1000_READ_REG(hw, E1000_IAC); stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); /* Host to Card Statistics */ stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); ifp = adapter->ifp; ifp->if_collisions = stats->colc; /* Rx Errors */ ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc + stats->crcerrs + stats->algnerrc + stats->ruc + stats->roc + stats->mpc + stats->cexterr; /* Tx Errors */ ifp->if_oerrors = stats->ecol + stats->latecol + adapter->watchdog_events; /* Driver specific counters */ adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); adapter->packet_buf_alloc_tx = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); adapter->packet_buf_alloc_rx = (E1000_READ_REG(hw, E1000_PBA) & 0xffff); } /********************************************************************** * * Initialize the VF board statistics counters. * **********************************************************************/ static void igb_vf_init_stats(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; stats = (struct e1000_vf_stats *)adapter->stats; if (stats == NULL) return; stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); } /********************************************************************** * * Update the VF board statistics counters. * **********************************************************************/ static void igb_update_vf_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; if (adapter->link_speed == 0) return; stats = (struct e1000_vf_stats *)adapter->stats; UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); } /* Export a single 32-bit register via a read-only sysctl. */ static int igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* +** Tuneable interrupt rate handler +*/ +static int +igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) +{ + struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); + int error; + u32 reg, usec, rate; + + reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); + usec = ((reg & 0x7FFC) >> 2); + if (usec > 0) + rate = 1000000 / usec; + else + rate = 0; + error = sysctl_handle_int(oidp, &rate, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/* * Add sysctl variables, one per statistic, to the system. */ static void igb_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, 0, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", CTLFLAG_RD, &adapter->device_control, "Device Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", CTLFLAG_RD, &adapter->rx_control, "Receiver Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", CTLFLAG_RD, &adapter->int_mask, "Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", CTLFLAG_RD, &adapter->eint_mask, "Extended Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_tx, "Transmit Buffer Packet Allocation"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_rx, "Receive Buffer Packet Allocation"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", + CTLFLAG_RD, &adapter->queues[i], + sizeof(&adapter->queues[i]), + igb_sysctl_interrupt_rate_handler, + "IU", "Interrupt Rate"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLFLAG_RD, adapter, E1000_TDH(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLFLAG_RD, adapter, E1000_TDT(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->tx_packets, "Queue Packets Transmitted"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLFLAG_RD, adapter, E1000_RDH(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLFLAG_RD, adapter, E1000_RDT(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); /* ** VF adapter has a very limited set of stats ** since its not managing the metal, so to speak. */ if (adapter->hw.mac.type == e1000_vfadapt) { SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); return; } SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &stats->symerrs, "Symbol Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &stats->sec, "Sequence Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &stats->dc, "Defer Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &stats->mpc, "Missed Packets"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &stats->rnbc, "Receive No Buffers"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &stats->ruc, "Receive Undersize"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &stats->rjc, "Recevied Jabber"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &stats->rxerrc, "Receive Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &stats->algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &stats->cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->xonrxc, "XON Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->xontxc, "XON Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->xoffrxc, "XOFF Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &stats->tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &stats->tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &stats->iac, "Interrupt Assertion Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &stats->icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &stats->icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &stats->ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &stats->ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &stats->ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &stats->ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &stats->icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &stats->icrxoc, "Interrupt Cause Receiver Overrun Count"); /* Host to Card Stats */ host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", CTLFLAG_RD, NULL, "Host to Card Statistics"); host_list = SYSCTL_CHILDREN(host_node); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", CTLFLAG_RD, &stats->cbtmpc, "Circuit Breaker Tx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", CTLFLAG_RD, &stats->htdpmc, "Host Transmit Discarded Packets"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", CTLFLAG_RD, &stats->rpthc, "Rx Packets To Host"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", CTLFLAG_RD, &stats->cbrmpc, "Circuit Breaker Rx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", CTLFLAG_RD, &stats->cbrdpc, "Circuit Breaker Rx Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", CTLFLAG_RD, &stats->hgptc, "Host Good Packets Tx Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", CTLFLAG_RD, &stats->htcbdpc, "Host Tx Circuit Breaker Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", CTLFLAG_RD, &stats->hgorc, "Host Good Octets Received Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", CTLFLAG_RD, &stats->hgotc, "Host Good Octets Transmit Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", CTLFLAG_RD, &stats->lenerrs, "Length Errors"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", CTLFLAG_RD, &stats->scvpc, "SerDes/SGMII Code Violation Pkt Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", CTLFLAG_RD, &stats->hrmpc, "Header Redirection Missed Packet Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; igb_print_nvm_info(adapter); } return (error); } static void igb_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static void igb_add_rx_process_limit(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); } Index: projects/binutils-2.17/sys/dev/e1000/if_igb.h =================================================================== --- projects/binutils-2.17/sys/dev/e1000/if_igb.h (revision 215829) +++ projects/binutils-2.17/sys/dev/e1000/if_igb.h (revision 215830) @@ -1,515 +1,526 @@ /****************************************************************************** Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _IGB_H_DEFINED_ #define _IGB_H_DEFINED_ /* Tunables */ /* * IGB_TXD: Maximum number of Transmit Descriptors * * This value is the number of transmit descriptors allocated by the driver. * Increasing this value allows the driver to queue more transmits. Each * descriptor is 16 bytes. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define IGB_MIN_TXD 256 #define IGB_DEFAULT_TXD 1024 #define IGB_MAX_TXD 4096 /* * IGB_RXD: Maximum number of Transmit Descriptors * * This value is the number of receive descriptors allocated by the driver. * Increasing this value allows the driver to buffer more incoming packets. * Each descriptor is 16 bytes. A receive buffer is also allocated for each * descriptor. The maximum MTU size is 16110. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define IGB_MIN_RXD 256 #define IGB_DEFAULT_RXD 1024 #define IGB_MAX_RXD 4096 /* * IGB_TIDV - Transmit Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value delays the generation of transmit interrupts in units of * 1.024 microseconds. Transmit interrupt reduction can improve CPU * efficiency if properly tuned for specific network traffic. If the * system is reporting dropped transmits, this value may be set too high * causing the driver to run out of available transmit descriptors. */ #define IGB_TIDV 64 /* * IGB_TADV - Transmit Absolute Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * transmit interrupt is generated. Useful only if IGB_TIDV is non-zero, * this value ensures that an interrupt is generated after the initial * packet is sent on the wire within the set amount of time. Proper tuning, * along with IGB_TIDV, may improve traffic throughput in specific * network conditions. */ #define IGB_TADV 64 /* * IGB_RDTR - Receive Interrupt Delay Timer (Packet Timer) * Valid Range: 0-65535 (0=off) * Default Value: 0 * This value delays the generation of receive interrupts in units of 1.024 * microseconds. Receive interrupt reduction can improve CPU efficiency if * properly tuned for specific network traffic. Increasing this value adds * extra latency to frame reception and can end up decreasing the throughput * of TCP traffic. If the system is reporting dropped receives, this value * may be set too high, causing the driver to run out of available receive * descriptors. * * CAUTION: When setting IGB_RDTR to a value other than 0, adapters * may hang (stop transmitting) under certain network conditions. * If this occurs a WATCHDOG message is logged in the system * event log. In addition, the controller is automatically reset, * restoring the network connection. To eliminate the potential * for the hang ensure that IGB_RDTR is set to 0. */ #define IGB_RDTR 0 /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * receive interrupt is generated. Useful only if IGB_RDTR is non-zero, * this value ensures that an interrupt is generated after the initial * packet is received within the set amount of time. Proper tuning, * along with IGB_RDTR, may improve traffic throughput in specific network * conditions. */ #define IGB_RADV 64 /* * This parameter controls the duration of transmit watchdog timer. */ #define IGB_WATCHDOG (10 * hz) /* * This parameter controls when the driver calls the routine to reclaim * transmit descriptors. */ #define IGB_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) #define IGB_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) /* * This parameter controls whether or not autonegotation is enabled. * 0 - Disable autonegotiation * 1 - Enable autonegotiation */ #define DO_AUTO_NEG 1 /* * This parameter control whether or not the driver will wait for * autonegotiation to complete. * 1 - Wait for autonegotiation to complete * 0 - Don't wait for autonegotiation to complete */ #define WAIT_FOR_AUTO_NEG_DEFAULT 0 /* Tunables -- End */ #define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ADVERTISE_1000_FULL) #define AUTO_ALL_MODES 0 /* PHY master/slave setting */ #define IGB_MASTER_SLAVE e1000_ms_hw_default /* * Micellaneous constants */ #define IGB_VENDOR_ID 0x8086 #define IGB_JUMBO_PBA 0x00000028 #define IGB_DEFAULT_PBA 0x00000030 #define IGB_SMARTSPEED_DOWNSHIFT 3 #define IGB_SMARTSPEED_MAX 15 #define IGB_MAX_LOOP 10 #define IGB_RX_PTHRESH (hw->mac.type <= e1000_82576 ? 16 : 8) #define IGB_RX_HTHRESH 8 #define IGB_RX_WTHRESH 1 #define IGB_TX_PTHRESH 8 #define IGB_TX_HTHRESH 1 #define IGB_TX_WTHRESH (((hw->mac.type == e1000_82576 || \ hw->mac.type == e1000_vfadapt) && \ adapter->msix_mem) ? 1 : 16) #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) #define ETHER_ALIGN 2 #define IGB_TX_BUFFER_SIZE ((uint32_t) 1514) #define IGB_FC_PAUSE_TIME 0x0680 #define IGB_EEPROM_APME 0x400; +#define IGB_QUEUE_IDLE 0 +#define IGB_QUEUE_WORKING 1 +#define IGB_QUEUE_HUNG 2 /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will * also optimize cache line size effect. H/W supports up to cache line size 128. */ #define IGB_DBA_ALIGN 128 #define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ /* PCI Config defines */ #define IGB_MSIX_BAR 3 /* Defines for printing debug information */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") #define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) #define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) #define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") #define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) #define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) #define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define IGB_MAX_SCATTER 64 #define IGB_VFTA_SIZE 128 #define IGB_BR_SIZE 4096 /* ring buf size */ #define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define IGB_HDR_BUF 128 #define IGB_PKTTYPE_MASK 0x0000FFF0 #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 /* Offload bits in mbuf flag */ #if __FreeBSD_version >= 800000 #define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) #else #define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) #endif /* Define the starting Interrupt rate per Queue */ #define IGB_INTS_PER_SEC 8000 -#define IGB_DEFAULT_ITR 1000000000/(IGB_INTS_PER_SEC * 256) +#define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) #define IGB_LINK_ITR 2000 /* Precision Time Sync (IEEE 1588) defines */ #define ETHERTYPE_IEEE1588 0x88F7 #define PICOSECS_PER_TICK 20833 #define TSYNC_PORT 319 /* UDP port for the protocol */ /* * Bus dma allocation structure used by * e1000_dma_malloc and e1000_dma_free. */ struct igb_dma_alloc { bus_addr_t dma_paddr; caddr_t dma_vaddr; bus_dma_tag_t dma_tag; bus_dmamap_t dma_map; bus_dma_segment_t dma_seg; int dma_nseg; }; /* ** Driver queue struct: this is the interrupt container ** for the associated tx and rx ring. */ struct igb_queue { struct adapter *adapter; u32 msix; /* This queue's MSIX vector */ u32 eims; /* This queue's EIMS bit */ u32 eitr_setting; struct resource *res; void *tag; struct tx_ring *txr; struct rx_ring *rxr; struct task que_task; struct taskqueue *tq; u64 irqs; }; /* * Transmit ring: one per queue */ struct tx_ring { struct adapter *adapter; u32 me; struct mtx tx_mtx; char mtx_name[16]; struct igb_dma_alloc txdma; struct e1000_tx_desc *tx_base; u32 next_avail_desc; u32 next_to_clean; volatile u16 tx_avail; struct igb_tx_buffer *tx_buffers; #if __FreeBSD_version >= 800000 struct buf_ring *br; #endif bus_dma_tag_t txtag; u32 bytes; u32 packets; - bool watchdog_check; + int queue_status; int watchdog_time; int tdt; int tdh; u64 no_desc_avail; u64 tx_packets; }; /* * Receive ring: one per queue */ struct rx_ring { struct adapter *adapter; u32 me; struct igb_dma_alloc rxdma; union e1000_adv_rx_desc *rx_base; struct lro_ctrl lro; bool lro_enabled; bool hdr_split; bool discard; struct mtx rx_mtx; char mtx_name[16]; u32 next_to_refresh; u32 next_to_check; struct igb_rx_buf *rx_buffers; bus_dma_tag_t htag; /* dma tag for rx head */ bus_dma_tag_t ptag; /* dma tag for rx packet */ /* * First/last mbuf pointers, for * collecting multisegment RX packets. */ struct mbuf *fmp; struct mbuf *lmp; u32 bytes; u32 packets; int rdt; int rdh; /* Soft stats */ u64 rx_split_packets; u64 rx_discarded; u64 rx_packets; u64 rx_bytes; }; struct adapter { struct ifnet *ifp; struct e1000_hw hw; struct e1000_osdep osdep; struct device *dev; struct cdev *led_dev; struct resource *pci_mem; struct resource *msix_mem; struct resource *res; void *tag; u32 eims_mask; int linkvec; int link_mask; struct task link_task; int link_irq; struct ifmedia media; struct callout timer; int msix; /* total vectors allocated */ int if_flags; int max_frame_size; int min_frame_size; int pause_frames; struct mtx core_mtx; int igb_insert_vlan_header; u16 num_queues; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; u32 num_vlans; /* Management and WOL features */ int wol; int has_manage; - /* Info about the board itself */ + /* + ** Shadow VFTA table, this is needed because + ** the real vlan filter table gets cleared during + ** a soft reset and the driver needs to be able + ** to repopulate it. + */ + u32 shadow_vfta[IGB_VFTA_SIZE]; + + /* Info about the interface */ u8 link_active; u16 link_speed; u16 link_duplex; u32 smartspeed; /* Interface queues */ struct igb_queue *queues; /* * Transmit rings */ struct tx_ring *tx_rings; u16 num_tx_desc; /* Multicast array pointer */ u8 *mta; /* * Receive rings */ struct rx_ring *rx_rings; bool rx_hdr_split; u16 num_rx_desc; int rx_process_limit; u32 rx_mbuf_sz; u32 rx_mask; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long mbuf_defrag_failed; unsigned long mbuf_header_failed; unsigned long mbuf_packet_failed; unsigned long no_tx_map_avail; unsigned long no_tx_dma_setup; unsigned long watchdog_events; unsigned long rx_overruns; unsigned long device_control; unsigned long rx_control; unsigned long int_mask; unsigned long eint_mask; unsigned long packet_buf_alloc_rx; unsigned long packet_buf_alloc_tx; boolean_t in_detach; #ifdef IGB_IEEE1588 /* IEEE 1588 precision time support */ struct cyclecounter cycles; struct nettimer clock; struct nettime_compare compare; struct hwtstamp_ctrl hwtstamp; #endif void *stats; }; /* ****************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * * ******************************************************************************/ typedef struct _igb_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } igb_vendor_info_t; struct igb_tx_buffer { int next_eop; /* Index of the desc to watch */ struct mbuf *m_head; bus_dmamap_t map; /* bus_dma map for packet */ }; struct igb_rx_buf { struct mbuf *m_head; struct mbuf *m_pack; bus_dmamap_t hmap; /* bus_dma map for header */ bus_dmamap_t pmap; /* bus_dma map for packet */ }; #define IGB_CORE_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF) #define IGB_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) #define IGB_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) #define IGB_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) #define IGB_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) #define IGB_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) #define IGB_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) #define IGB_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) #define IGB_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) #define IGB_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) #define IGB_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) #define IGB_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) #define IGB_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) #define IGB_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_mtx, MA_OWNED) #define UPDATE_VF_REG(reg, last, cur) \ { \ u32 new = E1000_READ_REG(hw, reg); \ if (new < last) \ cur += 0x100000000LL; \ last = new; \ cur &= 0xFFFFFFFF00000000LL; \ cur |= new; \ } #if __FreeBSD_version < 800504 static __inline int drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) { #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) return (1); #endif return (!buf_ring_empty(br)); } #endif #endif /* _IGB_H_DEFINED_ */ Index: projects/binutils-2.17/sys/dev/fxp/if_fxp.c =================================================================== --- projects/binutils-2.17/sys/dev/fxp/if_fxp.c (revision 215829) +++ projects/binutils-2.17/sys/dev/fxp/if_fxp.c (revision 215830) @@ -1,3187 +1,3175 @@ /*- * Copyright (c) 1995, David Greenman * Copyright (c) 2001 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Intel EtherExpress Pro/100B PCI Fast Ethernet driver */ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for PCIM_CMD_xxx */ #include #include #include #include #include MODULE_DEPEND(fxp, pci, 1, 1, 1); MODULE_DEPEND(fxp, ether, 1, 1, 1); MODULE_DEPEND(fxp, miibus, 1, 1, 1); #include "miibus_if.h" /* * NOTE! On the Alpha, we have an alignment constraint. The * card DMAs the packet immediately following the RFA. However, * the first thing in the packet is a 14-byte Ethernet header. * This means that the packet is misaligned. To compensate, * we actually offset the RFA 2 bytes into the cluster. This * alignes the packet after the Ethernet header at a 32-bit * boundary. HOWEVER! This means that the RFA is misaligned! */ #define RFA_ALIGNMENT_FUDGE 2 /* * Set initial transmit threshold at 64 (512 bytes). This is * increased by 64 (512 bytes) at a time, to maximum of 192 * (1536 bytes), if an underrun occurs. */ static int tx_threshold = 64; /* * The configuration byte map has several undefined fields which * must be one or must be zero. Set up a template for these bits. - * The actual configuration is performed in fxp_init. + * The actual configuration is performed in fxp_init_body. * * See struct fxp_cb_config for the bit definitions. */ -static u_char fxp_cb_config_template[] = { +static const u_char const fxp_cb_config_template[] = { 0x0, 0x0, /* cb_status */ 0x0, 0x0, /* cb_command */ 0x0, 0x0, 0x0, 0x0, /* link_addr */ 0x0, /* 0 */ 0x0, /* 1 */ 0x0, /* 2 */ 0x0, /* 3 */ 0x0, /* 4 */ 0x0, /* 5 */ 0x32, /* 6 */ 0x0, /* 7 */ 0x0, /* 8 */ 0x0, /* 9 */ 0x6, /* 10 */ 0x0, /* 11 */ 0x0, /* 12 */ 0x0, /* 13 */ 0xf2, /* 14 */ 0x48, /* 15 */ 0x0, /* 16 */ 0x40, /* 17 */ 0xf0, /* 18 */ 0x0, /* 19 */ 0x3f, /* 20 */ 0x5, /* 21 */ 0x0, /* 22 */ 0x0, /* 23 */ 0x0, /* 24 */ 0x0, /* 25 */ 0x0, /* 26 */ 0x0, /* 27 */ 0x0, /* 28 */ 0x0, /* 29 */ 0x0, /* 30 */ 0x0 /* 31 */ }; /* * Claim various Intel PCI device identifiers for this driver. The * sub-vendor and sub-device field are extensively used to identify * particular variants, but we don't currently differentiate between * them. */ -static struct fxp_ident fxp_ident_table[] = { +static const struct fxp_ident const fxp_ident_table[] = { { 0x1029, -1, 0, "Intel 82559 PCI/CardBus Pro/100" }, { 0x1030, -1, 0, "Intel 82559 Pro/100 Ethernet" }, { 0x1031, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" }, { 0x1032, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VE Ethernet" }, { 0x1033, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" }, { 0x1034, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" }, { 0x1035, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" }, { 0x1036, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" }, { 0x1037, -1, 3, "Intel 82801CAM (ICH3) Pro/100 Ethernet" }, { 0x1038, -1, 3, "Intel 82801CAM (ICH3) Pro/100 VM Ethernet" }, { 0x1039, -1, 4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" }, { 0x103A, -1, 4, "Intel 82801DB (ICH4) Pro/100 Ethernet" }, { 0x103B, -1, 4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" }, { 0x103C, -1, 4, "Intel 82801DB (ICH4) Pro/100 Ethernet" }, { 0x103D, -1, 4, "Intel 82801DB (ICH4) Pro/100 VE Ethernet" }, { 0x103E, -1, 4, "Intel 82801DB (ICH4) Pro/100 VM Ethernet" }, { 0x1050, -1, 5, "Intel 82801BA (D865) Pro/100 VE Ethernet" }, { 0x1051, -1, 5, "Intel 82562ET (ICH5/ICH5R) Pro/100 VE Ethernet" }, { 0x1059, -1, 0, "Intel 82551QM Pro/100 M Mobile Connection" }, { 0x1064, -1, 6, "Intel 82562EZ (ICH6)" }, { 0x1065, -1, 6, "Intel 82562ET/EZ/GT/GZ PRO/100 VE Ethernet" }, { 0x1068, -1, 6, "Intel 82801FBM (ICH6-M) Pro/100 VE Ethernet" }, { 0x1069, -1, 6, "Intel 82562EM/EX/GX Pro/100 Ethernet" }, { 0x1091, -1, 7, "Intel 82562GX Pro/100 Ethernet" }, { 0x1092, -1, 7, "Intel Pro/100 VE Network Connection" }, { 0x1093, -1, 7, "Intel Pro/100 VM Network Connection" }, { 0x1094, -1, 7, "Intel Pro/100 946GZ (ICH7) Network Connection" }, { 0x1209, -1, 0, "Intel 82559ER Embedded 10/100 Ethernet" }, { 0x1229, 0x01, 0, "Intel 82557 Pro/100 Ethernet" }, { 0x1229, 0x02, 0, "Intel 82557 Pro/100 Ethernet" }, { 0x1229, 0x03, 0, "Intel 82557 Pro/100 Ethernet" }, { 0x1229, 0x04, 0, "Intel 82558 Pro/100 Ethernet" }, { 0x1229, 0x05, 0, "Intel 82558 Pro/100 Ethernet" }, { 0x1229, 0x06, 0, "Intel 82559 Pro/100 Ethernet" }, { 0x1229, 0x07, 0, "Intel 82559 Pro/100 Ethernet" }, { 0x1229, 0x08, 0, "Intel 82559 Pro/100 Ethernet" }, { 0x1229, 0x09, 0, "Intel 82559ER Pro/100 Ethernet" }, { 0x1229, 0x0c, 0, "Intel 82550 Pro/100 Ethernet" }, { 0x1229, 0x0d, 0, "Intel 82550 Pro/100 Ethernet" }, { 0x1229, 0x0e, 0, "Intel 82550 Pro/100 Ethernet" }, { 0x1229, 0x0f, 0, "Intel 82551 Pro/100 Ethernet" }, { 0x1229, 0x10, 0, "Intel 82551 Pro/100 Ethernet" }, { 0x1229, -1, 0, "Intel 82557/8/9 Pro/100 Ethernet" }, { 0x2449, -1, 2, "Intel 82801BA/CAM (ICH2/3) Pro/100 Ethernet" }, { 0x27dc, -1, 7, "Intel 82801GB (ICH7) 10/100 Ethernet" }, { 0, -1, 0, NULL }, }; #ifdef FXP_IP_CSUM_WAR #define FXP_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) #else #define FXP_CSUM_FEATURES (CSUM_TCP | CSUM_UDP) #endif static int fxp_probe(device_t dev); static int fxp_attach(device_t dev); static int fxp_detach(device_t dev); static int fxp_shutdown(device_t dev); static int fxp_suspend(device_t dev); static int fxp_resume(device_t dev); -static struct fxp_ident *fxp_find_ident(device_t dev); +static const struct fxp_ident *fxp_find_ident(device_t dev); static void fxp_intr(void *xsc); static void fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp, struct mbuf *m, uint16_t status, int pos); static int fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack, int count); static void fxp_init(void *xsc); static void fxp_init_body(struct fxp_softc *sc); static void fxp_tick(void *xsc); static void fxp_start(struct ifnet *ifp); static void fxp_start_body(struct ifnet *ifp); static int fxp_encap(struct fxp_softc *sc, struct mbuf **m_head); static void fxp_txeof(struct fxp_softc *sc); static void fxp_stop(struct fxp_softc *sc); static void fxp_release(struct fxp_softc *sc); static int fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void fxp_watchdog(struct fxp_softc *sc); static void fxp_add_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp); static void fxp_discard_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp); static int fxp_new_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp); static int fxp_mc_addrs(struct fxp_softc *sc); static void fxp_mc_setup(struct fxp_softc *sc); static uint16_t fxp_eeprom_getword(struct fxp_softc *sc, int offset, int autosize); static void fxp_eeprom_putword(struct fxp_softc *sc, int offset, uint16_t data); static void fxp_autosize_eeprom(struct fxp_softc *sc); static void fxp_read_eeprom(struct fxp_softc *sc, u_short *data, int offset, int words); static void fxp_write_eeprom(struct fxp_softc *sc, u_short *data, int offset, int words); static int fxp_ifmedia_upd(struct ifnet *ifp); static void fxp_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int fxp_serial_ifmedia_upd(struct ifnet *ifp); static void fxp_serial_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int fxp_miibus_readreg(device_t dev, int phy, int reg); static int fxp_miibus_writereg(device_t dev, int phy, int reg, int value); static void fxp_load_ucode(struct fxp_softc *sc); static void fxp_update_stats(struct fxp_softc *sc); static void fxp_sysctl_node(struct fxp_softc *sc); static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high); static int sysctl_hw_fxp_bundle_max(SYSCTL_HANDLER_ARGS); static int sysctl_hw_fxp_int_delay(SYSCTL_HANDLER_ARGS); static void fxp_scb_wait(struct fxp_softc *sc); static void fxp_scb_cmd(struct fxp_softc *sc, int cmd); static void fxp_dma_wait(struct fxp_softc *sc, volatile uint16_t *status, bus_dma_tag_t dmat, bus_dmamap_t map); static device_method_t fxp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fxp_probe), DEVMETHOD(device_attach, fxp_attach), DEVMETHOD(device_detach, fxp_detach), DEVMETHOD(device_shutdown, fxp_shutdown), DEVMETHOD(device_suspend, fxp_suspend), DEVMETHOD(device_resume, fxp_resume), /* MII interface */ DEVMETHOD(miibus_readreg, fxp_miibus_readreg), DEVMETHOD(miibus_writereg, fxp_miibus_writereg), { 0, 0 } }; static driver_t fxp_driver = { "fxp", fxp_methods, sizeof(struct fxp_softc), }; static devclass_t fxp_devclass; DRIVER_MODULE(fxp, pci, fxp_driver, fxp_devclass, 0, 0); DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, 0, 0); static struct resource_spec fxp_res_spec_mem[] = { { SYS_RES_MEMORY, FXP_PCI_MMBA, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0 } }; static struct resource_spec fxp_res_spec_io[] = { { SYS_RES_IOPORT, FXP_PCI_IOBA, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, { -1, 0 } }; /* * Wait for the previous command to be accepted (but not necessarily * completed). */ static void fxp_scb_wait(struct fxp_softc *sc) { union { uint16_t w; uint8_t b[2]; } flowctl; int i = 10000; while (CSR_READ_1(sc, FXP_CSR_SCB_COMMAND) && --i) DELAY(2); if (i == 0) { flowctl.b[0] = CSR_READ_1(sc, FXP_CSR_FLOWCONTROL); flowctl.b[1] = CSR_READ_1(sc, FXP_CSR_FLOWCONTROL + 1); device_printf(sc->dev, "SCB timeout: 0x%x 0x%x 0x%x 0x%x\n", CSR_READ_1(sc, FXP_CSR_SCB_COMMAND), CSR_READ_1(sc, FXP_CSR_SCB_STATACK), CSR_READ_1(sc, FXP_CSR_SCB_RUSCUS), flowctl.w); } } static void fxp_scb_cmd(struct fxp_softc *sc, int cmd) { if (cmd == FXP_SCB_COMMAND_CU_RESUME && sc->cu_resume_bug) { CSR_WRITE_1(sc, FXP_CSR_SCB_COMMAND, FXP_CB_COMMAND_NOP); fxp_scb_wait(sc); } CSR_WRITE_1(sc, FXP_CSR_SCB_COMMAND, cmd); } static void fxp_dma_wait(struct fxp_softc *sc, volatile uint16_t *status, bus_dma_tag_t dmat, bus_dmamap_t map) { int i; for (i = 10000; i > 0; i--) { DELAY(2); bus_dmamap_sync(dmat, map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if ((le16toh(*status) & FXP_CB_STATUS_C) != 0) break; } if (i == 0) device_printf(sc->dev, "DMA timeout\n"); } -static struct fxp_ident * +static const struct fxp_ident * fxp_find_ident(device_t dev) { uint16_t devid; uint8_t revid; - struct fxp_ident *ident; + const struct fxp_ident *ident; if (pci_get_vendor(dev) == FXP_VENDORID_INTEL) { devid = pci_get_device(dev); revid = pci_get_revid(dev); for (ident = fxp_ident_table; ident->name != NULL; ident++) { if (ident->devid == devid && (ident->revid == revid || ident->revid == -1)) { return (ident); } } } return (NULL); } /* * Return identification string if this device is ours. */ static int fxp_probe(device_t dev) { - struct fxp_ident *ident; + const struct fxp_ident *ident; ident = fxp_find_ident(dev); if (ident != NULL) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static void fxp_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { uint32_t *addr; if (error) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); addr = arg; *addr = segs->ds_addr; } static int fxp_attach(device_t dev) { struct fxp_softc *sc; struct fxp_cb_tx *tcbp; struct fxp_tx *txp; struct fxp_rx *rxp; struct ifnet *ifp; uint32_t val; uint16_t data, myea[ETHER_ADDR_LEN / 2]; u_char eaddr[ETHER_ADDR_LEN]; int i, pmc, prefer_iomap; int error; error = 0; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->sc_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->stat_ch, &sc->sc_mtx, 0); ifmedia_init(&sc->sc_media, 0, fxp_serial_ifmedia_upd, fxp_serial_ifmedia_sts); ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } /* * Enable bus mastering. */ pci_enable_busmaster(dev); val = pci_read_config(dev, PCIR_COMMAND, 2); /* * Figure out which we should try first - memory mapping or i/o mapping? * We default to memory mapping. Then we accept an override from the * command line. Then we check to see which one is enabled. */ prefer_iomap = 0; resource_int_value(device_get_name(dev), device_get_unit(dev), "prefer_iomap", &prefer_iomap); if (prefer_iomap) sc->fxp_spec = fxp_res_spec_io; else sc->fxp_spec = fxp_res_spec_mem; error = bus_alloc_resources(dev, sc->fxp_spec, sc->fxp_res); if (error) { if (sc->fxp_spec == fxp_res_spec_mem) sc->fxp_spec = fxp_res_spec_io; else sc->fxp_spec = fxp_res_spec_mem; error = bus_alloc_resources(dev, sc->fxp_spec, sc->fxp_res); } if (error) { device_printf(dev, "could not allocate resources\n"); error = ENXIO; goto fail; } if (bootverbose) { device_printf(dev, "using %s space register mapping\n", sc->fxp_spec == fxp_res_spec_mem ? "memory" : "I/O"); } /* * Put CU/RU idle state and prepare full reset. */ CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SELECTIVE_RESET); DELAY(10); /* Full reset and disable interrupts. */ CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SOFTWARE_RESET); DELAY(10); CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE); /* * Find out how large of an SEEPROM we have. */ fxp_autosize_eeprom(sc); /* * Find out the chip revision; lump all 82557 revs together. */ sc->ident = fxp_find_ident(dev); if (sc->ident->ich > 0) { /* Assume ICH controllers are 82559. */ sc->revision = FXP_REV_82559_A0; } else { fxp_read_eeprom(sc, &data, 5, 1); if ((data >> 8) == 1) sc->revision = FXP_REV_82557; else sc->revision = pci_get_revid(dev); } /* * Check availability of WOL. 82559ER does not support WOL. */ if (sc->revision >= FXP_REV_82558_A4 && sc->revision != FXP_REV_82559S_A) { fxp_read_eeprom(sc, &data, 10, 1); if ((data & 0x20) != 0 && pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) sc->flags |= FXP_FLAG_WOLCAP; } /* Receiver lock-up workaround detection. */ fxp_read_eeprom(sc, &data, 3, 1); if ((data & 0x03) != 0x03) { sc->flags |= FXP_FLAG_RXBUG; device_printf(dev, "Enabling Rx lock-up workaround\n"); } /* * Determine whether we must use the 503 serial interface. */ fxp_read_eeprom(sc, &data, 6, 1); if (sc->revision == FXP_REV_82557 && (data & FXP_PHY_DEVICE_MASK) != 0 && (data & FXP_PHY_SERIAL_ONLY)) sc->flags |= FXP_FLAG_SERIAL_MEDIA; fxp_sysctl_node(sc); /* * Enable workarounds for certain chip revision deficiencies. * * Systems based on the ICH2/ICH2-M chip from Intel, and possibly * some systems based a normal 82559 design, have a defect where * the chip can cause a PCI protocol violation if it receives * a CU_RESUME command when it is entering the IDLE state. The * workaround is to disable Dynamic Standby Mode, so the chip never * deasserts CLKRUN#, and always remains in an active state. * * See Intel 82801BA/82801BAM Specification Update, Errata #30. */ if ((sc->ident->ich >= 2 && sc->ident->ich <= 3) || (sc->ident->ich == 0 && sc->revision >= FXP_REV_82559_A0)) { fxp_read_eeprom(sc, &data, 10, 1); if (data & 0x02) { /* STB enable */ uint16_t cksum; int i; device_printf(dev, "Disabling dynamic standby mode in EEPROM\n"); data &= ~0x02; fxp_write_eeprom(sc, &data, 10, 1); device_printf(dev, "New EEPROM ID: 0x%x\n", data); cksum = 0; for (i = 0; i < (1 << sc->eeprom_size) - 1; i++) { fxp_read_eeprom(sc, &data, i, 1); cksum += data; } i = (1 << sc->eeprom_size) - 1; cksum = 0xBABA - cksum; fxp_read_eeprom(sc, &data, i, 1); fxp_write_eeprom(sc, &cksum, i, 1); device_printf(dev, "EEPROM checksum @ 0x%x: 0x%x -> 0x%x\n", i, data, cksum); #if 1 /* * If the user elects to continue, try the software * workaround, as it is better than nothing. */ sc->flags |= FXP_FLAG_CU_RESUME_BUG; #endif } } /* * If we are not a 82557 chip, we can enable extended features. */ if (sc->revision != FXP_REV_82557) { /* * If MWI is enabled in the PCI configuration, and there * is a valid cacheline size (8 or 16 dwords), then tell * the board to turn on MWI. */ if (val & PCIM_CMD_MWRICEN && pci_read_config(dev, PCIR_CACHELNSZ, 1) != 0) sc->flags |= FXP_FLAG_MWI_ENABLE; /* turn on the extended TxCB feature */ sc->flags |= FXP_FLAG_EXT_TXCB; /* enable reception of long frames for VLAN */ sc->flags |= FXP_FLAG_LONG_PKT_EN; } else { /* a hack to get long VLAN frames on a 82557 */ sc->flags |= FXP_FLAG_SAVE_BAD; } /* For 82559 or later chips, Rx checksum offload is supported. */ if (sc->revision >= FXP_REV_82559_A0) { /* 82559ER does not support Rx checksum offloading. */ if (sc->ident->devid != 0x1209) sc->flags |= FXP_FLAG_82559_RXCSUM; } /* * Enable use of extended RFDs and TCBs for 82550 * and later chips. Note: we need extended TXCB support * too, but that's already enabled by the code above. * Be careful to do this only on the right devices. */ if (sc->revision == FXP_REV_82550 || sc->revision == FXP_REV_82550_C || sc->revision == FXP_REV_82551_E || sc->revision == FXP_REV_82551_F || sc->revision == FXP_REV_82551_10) { sc->rfa_size = sizeof (struct fxp_rfa); sc->tx_cmd = FXP_CB_COMMAND_IPCBXMIT; sc->flags |= FXP_FLAG_EXT_RFA; /* Use extended RFA instead of 82559 checksum mode. */ sc->flags &= ~FXP_FLAG_82559_RXCSUM; } else { sc->rfa_size = sizeof (struct fxp_rfa) - FXP_RFAX_LEN; sc->tx_cmd = FXP_CB_COMMAND_XMIT; } /* * Allocate DMA tags and DMA safe memory. */ sc->maxtxseg = FXP_NTXSEG; sc->maxsegsize = MCLBYTES; if (sc->flags & FXP_FLAG_EXT_RFA) { sc->maxtxseg--; sc->maxsegsize = FXP_TSO_SEGSIZE; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sc->maxsegsize * sc->maxtxseg + sizeof(struct ether_vlan_header), sc->maxtxseg, sc->maxsegsize, 0, busdma_lock_mutex, &Giant, &sc->fxp_txmtag); if (error) { device_printf(dev, "could not create TX DMA tag\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, busdma_lock_mutex, &Giant, &sc->fxp_rxmtag); if (error) { device_printf(dev, "could not create RX DMA tag\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct fxp_stats), 1, sizeof(struct fxp_stats), 0, busdma_lock_mutex, &Giant, &sc->fxp_stag); if (error) { device_printf(dev, "could not create stats DMA tag\n"); goto fail; } error = bus_dmamem_alloc(sc->fxp_stag, (void **)&sc->fxp_stats, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->fxp_smap); if (error) { device_printf(dev, "could not allocate stats DMA memory\n"); goto fail; } error = bus_dmamap_load(sc->fxp_stag, sc->fxp_smap, sc->fxp_stats, sizeof(struct fxp_stats), fxp_dma_map_addr, &sc->stats_addr, 0); if (error) { device_printf(dev, "could not load the stats DMA buffer\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, FXP_TXCB_SZ, 1, FXP_TXCB_SZ, 0, busdma_lock_mutex, &Giant, &sc->cbl_tag); if (error) { device_printf(dev, "could not create TxCB DMA tag\n"); goto fail; } error = bus_dmamem_alloc(sc->cbl_tag, (void **)&sc->fxp_desc.cbl_list, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->cbl_map); if (error) { device_printf(dev, "could not allocate TxCB DMA memory\n"); goto fail; } error = bus_dmamap_load(sc->cbl_tag, sc->cbl_map, sc->fxp_desc.cbl_list, FXP_TXCB_SZ, fxp_dma_map_addr, &sc->fxp_desc.cbl_addr, 0); if (error) { device_printf(dev, "could not load TxCB DMA buffer\n"); goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct fxp_cb_mcs), 1, sizeof(struct fxp_cb_mcs), 0, busdma_lock_mutex, &Giant, &sc->mcs_tag); if (error) { device_printf(dev, "could not create multicast setup DMA tag\n"); goto fail; } error = bus_dmamem_alloc(sc->mcs_tag, (void **)&sc->mcsp, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->mcs_map); if (error) { device_printf(dev, "could not allocate multicast setup DMA memory\n"); goto fail; } error = bus_dmamap_load(sc->mcs_tag, sc->mcs_map, sc->mcsp, sizeof(struct fxp_cb_mcs), fxp_dma_map_addr, &sc->mcs_addr, 0); if (error) { device_printf(dev, "can't load the multicast setup DMA buffer\n"); goto fail; } /* * Pre-allocate the TX DMA maps and setup the pointers to * the TX command blocks. */ txp = sc->fxp_desc.tx_list; tcbp = sc->fxp_desc.cbl_list; for (i = 0; i < FXP_NTXCB; i++) { txp[i].tx_cb = tcbp + i; error = bus_dmamap_create(sc->fxp_txmtag, 0, &txp[i].tx_map); if (error) { device_printf(dev, "can't create DMA map for TX\n"); goto fail; } } error = bus_dmamap_create(sc->fxp_rxmtag, 0, &sc->spare_map); if (error) { device_printf(dev, "can't create spare DMA map\n"); goto fail; } /* * Pre-allocate our receive buffers. */ sc->fxp_desc.rx_head = sc->fxp_desc.rx_tail = NULL; for (i = 0; i < FXP_NRFABUFS; i++) { rxp = &sc->fxp_desc.rx_list[i]; error = bus_dmamap_create(sc->fxp_rxmtag, 0, &rxp->rx_map); if (error) { device_printf(dev, "can't create DMA map for RX\n"); goto fail; } if (fxp_new_rfabuf(sc, rxp) != 0) { error = ENOMEM; goto fail; } fxp_add_rfabuf(sc, rxp); } /* * Read MAC address. */ fxp_read_eeprom(sc, myea, 0, 3); eaddr[0] = myea[0] & 0xff; eaddr[1] = myea[0] >> 8; eaddr[2] = myea[1] & 0xff; eaddr[3] = myea[1] >> 8; eaddr[4] = myea[2] & 0xff; eaddr[5] = myea[2] >> 8; if (bootverbose) { device_printf(dev, "PCI IDs: %04x %04x %04x %04x %04x\n", pci_get_vendor(dev), pci_get_device(dev), pci_get_subvendor(dev), pci_get_subdevice(dev), pci_get_revid(dev)); fxp_read_eeprom(sc, &data, 10, 1); device_printf(dev, "Dynamic Standby mode is %s\n", data & 0x02 ? "enabled" : "disabled"); } /* * If this is only a 10Mbps device, then there is no MII, and * the PHY will use a serial interface instead. * * The Seeq 80c24 AutoDUPLEX(tm) Ethernet Interface Adapter * doesn't have a programming interface of any sort. The * media is sensed automatically based on how the link partner * is configured. This is, in essence, manual configuration. */ if (sc->flags & FXP_FLAG_SERIAL_MEDIA) { ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_MANUAL); } else { /* * i82557 wedge when isolating all of their PHYs. */ error = mii_attach(dev, &sc->miibus, ifp, fxp_ifmedia_upd, fxp_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, MIIF_NOISOLATE); if (error != 0) { device_printf(dev, "attaching PHYs failed\n"); goto fail; } } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = fxp_init; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = fxp_ioctl; ifp->if_start = fxp_start; ifp->if_capabilities = ifp->if_capenable = 0; /* Enable checksum offload/TSO for 82550 or better chips */ if (sc->flags & FXP_FLAG_EXT_RFA) { ifp->if_hwassist = FXP_CSUM_FEATURES | CSUM_TSO; ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4; ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_TSO4; } if (sc->flags & FXP_FLAG_82559_RXCSUM) { ifp->if_capabilities |= IFCAP_RXCSUM; ifp->if_capenable |= IFCAP_RXCSUM; } if (sc->flags & FXP_FLAG_WOLCAP) { ifp->if_capabilities |= IFCAP_WOL_MAGIC; ifp->if_capenable |= IFCAP_WOL_MAGIC; } #ifdef DEVICE_POLLING /* Inform the world we support polling. */ ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Attach the interface. */ ether_ifattach(ifp, eaddr); /* * Tell the upper layer(s) we support long frames. * Must appear after the call to ether_ifattach() because * ether_ifattach() sets ifi_hdrlen to the default value. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_MTU; /* the hw bits already set */ if ((sc->flags & FXP_FLAG_EXT_RFA) != 0) { ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO; } /* * Let the system queue as many packets as we have available * TX descriptors. */ IFQ_SET_MAXLEN(&ifp->if_snd, FXP_NTXCB - 1); ifp->if_snd.ifq_drv_maxlen = FXP_NTXCB - 1; IFQ_SET_READY(&ifp->if_snd); /* * Hook our interrupt after all initialization is complete. */ error = bus_setup_intr(dev, sc->fxp_res[1], INTR_TYPE_NET | INTR_MPSAFE, NULL, fxp_intr, sc, &sc->ih); if (error) { device_printf(dev, "could not setup irq\n"); ether_ifdetach(sc->ifp); goto fail; } /* * Configure hardware to reject magic frames otherwise * system will hang on recipt of magic frames. */ if ((sc->flags & FXP_FLAG_WOLCAP) != 0) { FXP_LOCK(sc); /* Clear wakeup events. */ CSR_WRITE_1(sc, FXP_CSR_PMDR, CSR_READ_1(sc, FXP_CSR_PMDR)); fxp_init_body(sc); fxp_stop(sc); FXP_UNLOCK(sc); } fail: if (error) fxp_release(sc); return (error); } /* * Release all resources. The softc lock should not be held and the * interrupt should already be torn down. */ static void fxp_release(struct fxp_softc *sc) { struct fxp_rx *rxp; struct fxp_tx *txp; int i; FXP_LOCK_ASSERT(sc, MA_NOTOWNED); KASSERT(sc->ih == NULL, ("fxp_release() called with intr handle still active")); if (sc->miibus) device_delete_child(sc->dev, sc->miibus); bus_generic_detach(sc->dev); ifmedia_removeall(&sc->sc_media); if (sc->fxp_desc.cbl_list) { bus_dmamap_unload(sc->cbl_tag, sc->cbl_map); bus_dmamem_free(sc->cbl_tag, sc->fxp_desc.cbl_list, sc->cbl_map); } if (sc->fxp_stats) { bus_dmamap_unload(sc->fxp_stag, sc->fxp_smap); bus_dmamem_free(sc->fxp_stag, sc->fxp_stats, sc->fxp_smap); } if (sc->mcsp) { bus_dmamap_unload(sc->mcs_tag, sc->mcs_map); bus_dmamem_free(sc->mcs_tag, sc->mcsp, sc->mcs_map); } bus_release_resources(sc->dev, sc->fxp_spec, sc->fxp_res); if (sc->fxp_rxmtag) { for (i = 0; i < FXP_NRFABUFS; i++) { rxp = &sc->fxp_desc.rx_list[i]; if (rxp->rx_mbuf != NULL) { bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->fxp_rxmtag, rxp->rx_map); m_freem(rxp->rx_mbuf); } bus_dmamap_destroy(sc->fxp_rxmtag, rxp->rx_map); } bus_dmamap_destroy(sc->fxp_rxmtag, sc->spare_map); bus_dma_tag_destroy(sc->fxp_rxmtag); } if (sc->fxp_txmtag) { for (i = 0; i < FXP_NTXCB; i++) { txp = &sc->fxp_desc.tx_list[i]; if (txp->tx_mbuf != NULL) { bus_dmamap_sync(sc->fxp_txmtag, txp->tx_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->fxp_txmtag, txp->tx_map); m_freem(txp->tx_mbuf); } bus_dmamap_destroy(sc->fxp_txmtag, txp->tx_map); } bus_dma_tag_destroy(sc->fxp_txmtag); } if (sc->fxp_stag) bus_dma_tag_destroy(sc->fxp_stag); if (sc->cbl_tag) bus_dma_tag_destroy(sc->cbl_tag); if (sc->mcs_tag) bus_dma_tag_destroy(sc->mcs_tag); if (sc->ifp) if_free(sc->ifp); mtx_destroy(&sc->sc_mtx); } /* * Detach interface. */ static int fxp_detach(device_t dev) { struct fxp_softc *sc = device_get_softc(dev); #ifdef DEVICE_POLLING if (sc->ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(sc->ifp); #endif FXP_LOCK(sc); /* * Stop DMA and drop transmit queue, but disable interrupts first. */ CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE); fxp_stop(sc); FXP_UNLOCK(sc); callout_drain(&sc->stat_ch); /* * Close down routes etc. */ ether_ifdetach(sc->ifp); /* * Unhook interrupt before dropping lock. This is to prevent * races with fxp_intr(). */ bus_teardown_intr(sc->dev, sc->fxp_res[1], sc->ih); sc->ih = NULL; /* Release our allocated resources. */ fxp_release(sc); return (0); } /* * Device shutdown routine. Called at system shutdown after sync. The * main purpose of this routine is to shut off receiver DMA so that * kernel memory doesn't get clobbered during warmboot. */ static int fxp_shutdown(device_t dev) { /* * Make sure that DMA is disabled prior to reboot. Not doing * do could allow DMA to corrupt kernel memory during the * reboot before the driver initializes. */ return (fxp_suspend(dev)); } /* * Device suspend routine. Stop the interface and save some PCI * settings in case the BIOS doesn't restore them properly on * resume. */ static int fxp_suspend(device_t dev) { struct fxp_softc *sc = device_get_softc(dev); struct ifnet *ifp; int pmc; uint16_t pmstat; FXP_LOCK(sc); ifp = sc->ifp; if (pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) { pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2); pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0) { /* Request PME. */ pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; sc->flags |= FXP_FLAG_WOL; /* Reconfigure hardware to accept magic frames. */ fxp_init_body(sc); } pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2); } fxp_stop(sc); sc->suspended = 1; FXP_UNLOCK(sc); return (0); } /* * Device resume routine. re-enable busmastering, and restart the interface if * appropriate. */ static int fxp_resume(device_t dev) { struct fxp_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->ifp; int pmc; uint16_t pmstat; FXP_LOCK(sc); if (pci_find_extcap(sc->dev, PCIY_PMG, &pmc) == 0) { sc->flags &= ~FXP_FLAG_WOL; pmstat = pci_read_config(sc->dev, pmc + PCIR_POWER_STATUS, 2); /* Disable PME and clear PME status. */ pmstat &= ~PCIM_PSTAT_PMEENABLE; pci_write_config(sc->dev, pmc + PCIR_POWER_STATUS, pmstat, 2); if ((sc->flags & FXP_FLAG_WOLCAP) != 0) CSR_WRITE_1(sc, FXP_CSR_PMDR, CSR_READ_1(sc, FXP_CSR_PMDR)); } CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SELECTIVE_RESET); DELAY(10); /* reinitialize interface if necessary */ if (ifp->if_flags & IFF_UP) fxp_init_body(sc); sc->suspended = 0; FXP_UNLOCK(sc); return (0); } static void fxp_eeprom_shiftin(struct fxp_softc *sc, int data, int length) { uint16_t reg; int x; /* * Shift in data. */ for (x = 1 << (length - 1); x; x >>= 1) { if (data & x) reg = FXP_EEPROM_EECS | FXP_EEPROM_EEDI; else reg = FXP_EEPROM_EECS; CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg); DELAY(1); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg | FXP_EEPROM_EESK); DELAY(1); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg); DELAY(1); } } /* * Read from the serial EEPROM. Basically, you manually shift in * the read opcode (one bit at a time) and then shift in the address, * and then you shift out the data (all of this one bit at a time). * The word size is 16 bits, so you have to provide the address for * every 16 bits of data. */ static uint16_t fxp_eeprom_getword(struct fxp_softc *sc, int offset, int autosize) { uint16_t reg, data; int x; CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS); /* * Shift in read opcode. */ fxp_eeprom_shiftin(sc, FXP_EEPROM_OPC_READ, 3); /* * Shift in address. */ data = 0; for (x = 1 << (sc->eeprom_size - 1); x; x >>= 1) { if (offset & x) reg = FXP_EEPROM_EECS | FXP_EEPROM_EEDI; else reg = FXP_EEPROM_EECS; CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg); DELAY(1); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg | FXP_EEPROM_EESK); DELAY(1); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg); DELAY(1); reg = CSR_READ_2(sc, FXP_CSR_EEPROMCONTROL) & FXP_EEPROM_EEDO; data++; if (autosize && reg == 0) { sc->eeprom_size = data; break; } } /* * Shift out data. */ data = 0; reg = FXP_EEPROM_EECS; for (x = 1 << 15; x; x >>= 1) { CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg | FXP_EEPROM_EESK); DELAY(1); if (CSR_READ_2(sc, FXP_CSR_EEPROMCONTROL) & FXP_EEPROM_EEDO) data |= x; CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, reg); DELAY(1); } CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0); DELAY(1); return (data); } static void fxp_eeprom_putword(struct fxp_softc *sc, int offset, uint16_t data) { int i; /* * Erase/write enable. */ CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS); fxp_eeprom_shiftin(sc, 0x4, 3); fxp_eeprom_shiftin(sc, 0x03 << (sc->eeprom_size - 2), sc->eeprom_size); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0); DELAY(1); /* * Shift in write opcode, address, data. */ CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS); fxp_eeprom_shiftin(sc, FXP_EEPROM_OPC_WRITE, 3); fxp_eeprom_shiftin(sc, offset, sc->eeprom_size); fxp_eeprom_shiftin(sc, data, 16); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0); DELAY(1); /* * Wait for EEPROM to finish up. */ CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS); DELAY(1); for (i = 0; i < 1000; i++) { if (CSR_READ_2(sc, FXP_CSR_EEPROMCONTROL) & FXP_EEPROM_EEDO) break; DELAY(50); } CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0); DELAY(1); /* * Erase/write disable. */ CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, FXP_EEPROM_EECS); fxp_eeprom_shiftin(sc, 0x4, 3); fxp_eeprom_shiftin(sc, 0, sc->eeprom_size); CSR_WRITE_2(sc, FXP_CSR_EEPROMCONTROL, 0); DELAY(1); } /* * From NetBSD: * * Figure out EEPROM size. * * 559's can have either 64-word or 256-word EEPROMs, the 558 * datasheet only talks about 64-word EEPROMs, and the 557 datasheet * talks about the existance of 16 to 256 word EEPROMs. * * The only known sizes are 64 and 256, where the 256 version is used * by CardBus cards to store CIS information. * * The address is shifted in msb-to-lsb, and after the last * address-bit the EEPROM is supposed to output a `dummy zero' bit, * after which follows the actual data. We try to detect this zero, by * probing the data-out bit in the EEPROM control register just after * having shifted in a bit. If the bit is zero, we assume we've * shifted enough address bits. The data-out should be tri-state, * before this, which should translate to a logical one. */ static void fxp_autosize_eeprom(struct fxp_softc *sc) { /* guess maximum size of 256 words */ sc->eeprom_size = 8; /* autosize */ (void) fxp_eeprom_getword(sc, 0, 1); } static void fxp_read_eeprom(struct fxp_softc *sc, u_short *data, int offset, int words) { int i; for (i = 0; i < words; i++) data[i] = fxp_eeprom_getword(sc, offset + i, 0); } static void fxp_write_eeprom(struct fxp_softc *sc, u_short *data, int offset, int words) { int i; for (i = 0; i < words; i++) fxp_eeprom_putword(sc, offset + i, data[i]); } /* * Grab the softc lock and call the real fxp_start_body() routine */ static void fxp_start(struct ifnet *ifp) { struct fxp_softc *sc = ifp->if_softc; FXP_LOCK(sc); fxp_start_body(ifp); FXP_UNLOCK(sc); } /* * Start packet transmission on the interface. * This routine must be called with the softc lock held, and is an * internal entry point only. */ static void fxp_start_body(struct ifnet *ifp) { struct fxp_softc *sc = ifp->if_softc; struct mbuf *mb_head; int txqueued; FXP_LOCK_ASSERT(sc, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (sc->tx_queued > FXP_NTXCB_HIWAT) fxp_txeof(sc); /* * We're finished if there is nothing more to add to the list or if * we're all filled up with buffers to transmit. * NOTE: One TxCB is reserved to guarantee that fxp_mc_setup() can add * a NOP command when needed. */ txqueued = 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->tx_queued < FXP_NTXCB - 1) { /* * Grab a packet to transmit. */ IFQ_DRV_DEQUEUE(&ifp->if_snd, mb_head); if (mb_head == NULL) break; if (fxp_encap(sc, &mb_head)) { if (mb_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, mb_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; } txqueued++; /* * Pass packet to bpf if there is a listener. */ BPF_MTAP(ifp, mb_head); } /* * We're finished. If we added to the list, issue a RESUME to get DMA * going again if suspended. */ if (txqueued > 0) { bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); fxp_scb_wait(sc); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_RESUME); /* * Set a 5 second timer just in case we don't hear * from the card again. */ sc->watchdog_timer = 5; } } static int fxp_encap(struct fxp_softc *sc, struct mbuf **m_head) { struct ifnet *ifp; struct mbuf *m; struct fxp_tx *txp; struct fxp_cb_tx *cbp; struct tcphdr *tcp; bus_dma_segment_t segs[FXP_NTXSEG]; int error, i, nseg, tcp_payload; FXP_LOCK_ASSERT(sc, MA_OWNED); ifp = sc->ifp; tcp_payload = 0; tcp = NULL; /* * Get pointer to next available tx desc. */ txp = sc->fxp_desc.tx_last->tx_next; /* * A note in Appendix B of the Intel 8255x 10/100 Mbps * Ethernet Controller Family Open Source Software * Developer Manual says: * Using software parsing is only allowed with legal * TCP/IP or UDP/IP packets. * ... * For all other datagrams, hardware parsing must * be used. * Software parsing appears to truncate ICMP and * fragmented UDP packets that contain one to three * bytes in the second (and final) mbuf of the packet. */ if (sc->flags & FXP_FLAG_EXT_RFA) txp->tx_cb->ipcb_ip_activation_high = FXP_IPCB_HARDWAREPARSING_ENABLE; m = *m_head; if (m->m_pkthdr.csum_flags & CSUM_TSO) { /* * 82550/82551 requires ethernet/IP/TCP headers must be * contained in the first active transmit buffer. */ struct ether_header *eh; struct ip *ip; uint32_t ip_off, poff; if (M_WRITABLE(*m_head) == 0) { /* Get a writable copy. */ m = m_dup(*m_head, M_DONTWAIT); m_freem(*m_head); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } *m_head = m; } ip_off = sizeof(struct ether_header); m = m_pullup(*m_head, ip_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } eh = mtod(m, struct ether_header *); /* Check the existence of VLAN tag. */ if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); m = m_pullup(m, ip_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } } m = m_pullup(m, ip_off + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); m = m_pullup(m, poff + sizeof(struct tcphdr)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } tcp = (struct tcphdr *)(mtod(m, char *) + poff); m = m_pullup(m, poff + sizeof(struct tcphdr) + tcp->th_off); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } /* * Since 82550/82551 doesn't modify IP length and pseudo * checksum in the first frame driver should compute it. */ ip = (struct ip *)(mtod(m, char *) + ip_off); tcp = (struct tcphdr *)(mtod(m, char *) + poff); ip->ip_sum = 0; ip->ip_len = htons(m->m_pkthdr.tso_segsz + (ip->ip_hl << 2) + (tcp->th_off << 2)); tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP + (tcp->th_off << 2) + m->m_pkthdr.tso_segsz)); /* Compute total TCP payload. */ tcp_payload = m->m_pkthdr.len - ip_off - (ip->ip_hl << 2); tcp_payload -= tcp->th_off << 2; *m_head = m; } else if (m->m_pkthdr.csum_flags & FXP_CSUM_FEATURES) { /* * Deal with TCP/IP checksum offload. Note that * in order for TCP checksum offload to work, * the pseudo header checksum must have already * been computed and stored in the checksum field * in the TCP header. The stack should have * already done this for us. */ txp->tx_cb->ipcb_ip_schedule = FXP_IPCB_TCPUDP_CHECKSUM_ENABLE; if (m->m_pkthdr.csum_flags & CSUM_TCP) txp->tx_cb->ipcb_ip_schedule |= FXP_IPCB_TCP_PACKET; #ifdef FXP_IP_CSUM_WAR /* * XXX The 82550 chip appears to have trouble * dealing with IP header checksums in very small * datagrams, namely fragments from 1 to 3 bytes * in size. For example, say you want to transmit * a UDP packet of 1473 bytes. The packet will be * fragmented over two IP datagrams, the latter * containing only one byte of data. The 82550 will * botch the header checksum on the 1-byte fragment. * As long as the datagram contains 4 or more bytes * of data, you're ok. * * The following code attempts to work around this * problem: if the datagram is less than 38 bytes * in size (14 bytes ether header, 20 bytes IP header, * plus 4 bytes of data), we punt and compute the IP * header checksum by hand. This workaround doesn't * work very well, however, since it can be fooled * by things like VLAN tags and IP options that make * the header sizes/offsets vary. */ if (m->m_pkthdr.csum_flags & CSUM_IP) { if (m->m_pkthdr.len < 38) { struct ip *ip; m->m_data += ETHER_HDR_LEN; ip = mtod(m, struct ip *); ip->ip_sum = in_cksum(m, ip->ip_hl << 2); m->m_data -= ETHER_HDR_LEN; m->m_pkthdr.csum_flags &= ~CSUM_IP; } else { txp->tx_cb->ipcb_ip_activation_high = FXP_IPCB_HARDWAREPARSING_ENABLE; txp->tx_cb->ipcb_ip_schedule |= FXP_IPCB_IP_CHECKSUM_ENABLE; } } #endif } error = bus_dmamap_load_mbuf_sg(sc->fxp_txmtag, txp->tx_map, *m_head, segs, &nseg, 0); if (error == EFBIG) { m = m_collapse(*m_head, M_DONTWAIT, sc->maxtxseg); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->fxp_txmtag, txp->tx_map, *m_head, segs, &nseg, 0); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } } else if (error != 0) return (error); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } KASSERT(nseg <= sc->maxtxseg, ("too many DMA segments")); bus_dmamap_sync(sc->fxp_txmtag, txp->tx_map, BUS_DMASYNC_PREWRITE); cbp = txp->tx_cb; for (i = 0; i < nseg; i++) { /* * If this is an 82550/82551, then we're using extended * TxCBs _and_ we're using checksum offload. This means * that the TxCB is really an IPCB. One major difference * between the two is that with plain extended TxCBs, * the bottom half of the TxCB contains two entries from * the TBD array, whereas IPCBs contain just one entry: * one entry (8 bytes) has been sacrificed for the TCP/IP * checksum offload control bits. So to make things work * right, we have to start filling in the TBD array * starting from a different place depending on whether * the chip is an 82550/82551 or not. */ if (sc->flags & FXP_FLAG_EXT_RFA) { cbp->tbd[i + 1].tb_addr = htole32(segs[i].ds_addr); cbp->tbd[i + 1].tb_size = htole32(segs[i].ds_len); } else { cbp->tbd[i].tb_addr = htole32(segs[i].ds_addr); cbp->tbd[i].tb_size = htole32(segs[i].ds_len); } } if (sc->flags & FXP_FLAG_EXT_RFA) { /* Configure dynamic TBD for 82550/82551. */ cbp->tbd_number = 0xFF; cbp->tbd[nseg].tb_size |= htole32(0x8000); } else cbp->tbd_number = nseg; /* Configure TSO. */ if (m->m_pkthdr.csum_flags & CSUM_TSO) { cbp->tbd[-1].tb_size = htole32(m->m_pkthdr.tso_segsz << 16); cbp->tbd[1].tb_size |= htole32(tcp_payload << 16); cbp->ipcb_ip_schedule |= FXP_IPCB_LARGESEND_ENABLE | FXP_IPCB_IP_CHECKSUM_ENABLE | FXP_IPCB_TCP_PACKET | FXP_IPCB_TCPUDP_CHECKSUM_ENABLE; } /* Configure VLAN hardware tag insertion. */ if ((m->m_flags & M_VLANTAG) != 0) { cbp->ipcb_vlan_id = htons(m->m_pkthdr.ether_vtag); txp->tx_cb->ipcb_ip_activation_high |= FXP_IPCB_INSERTVLAN_ENABLE; } txp->tx_mbuf = m; txp->tx_cb->cb_status = 0; txp->tx_cb->byte_count = 0; if (sc->tx_queued != FXP_CXINT_THRESH - 1) txp->tx_cb->cb_command = htole16(sc->tx_cmd | FXP_CB_COMMAND_SF | FXP_CB_COMMAND_S); else txp->tx_cb->cb_command = htole16(sc->tx_cmd | FXP_CB_COMMAND_SF | FXP_CB_COMMAND_S | FXP_CB_COMMAND_I); if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) txp->tx_cb->tx_threshold = tx_threshold; /* * Advance the end of list forward. */ - -#ifdef __alpha__ - /* - * On platforms which can't access memory in 16-bit - * granularities, we must prevent the card from DMA'ing - * up the status while we update the command field. - * This could cause us to overwrite the completion status. - * XXX This is probably bogus and we're _not_ looking - * for atomicity here. - */ - atomic_clear_16(&sc->fxp_desc.tx_last->tx_cb->cb_command, - htole16(FXP_CB_COMMAND_S)); -#else sc->fxp_desc.tx_last->tx_cb->cb_command &= htole16(~FXP_CB_COMMAND_S); -#endif /*__alpha__*/ sc->fxp_desc.tx_last = txp; /* * Advance the beginning of the list forward if there are * no other packets queued (when nothing is queued, tx_first * sits on the last TxCB that was sent out). */ if (sc->tx_queued == 0) sc->fxp_desc.tx_first = txp; sc->tx_queued++; return (0); } #ifdef DEVICE_POLLING static poll_handler_t fxp_poll; static int fxp_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct fxp_softc *sc = ifp->if_softc; uint8_t statack; int rx_npkts = 0; FXP_LOCK(sc); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { FXP_UNLOCK(sc); return (rx_npkts); } statack = FXP_SCB_STATACK_CXTNO | FXP_SCB_STATACK_CNA | FXP_SCB_STATACK_FR; if (cmd == POLL_AND_CHECK_STATUS) { uint8_t tmp; tmp = CSR_READ_1(sc, FXP_CSR_SCB_STATACK); if (tmp == 0xff || tmp == 0) { FXP_UNLOCK(sc); return (rx_npkts); /* nothing to do */ } tmp &= ~statack; /* ack what we can */ if (tmp != 0) CSR_WRITE_1(sc, FXP_CSR_SCB_STATACK, tmp); statack |= tmp; } rx_npkts = fxp_intr_body(sc, ifp, statack, count); FXP_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ /* * Process interface interrupts. */ static void fxp_intr(void *xsc) { struct fxp_softc *sc = xsc; struct ifnet *ifp = sc->ifp; uint8_t statack; FXP_LOCK(sc); if (sc->suspended) { FXP_UNLOCK(sc); return; } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { FXP_UNLOCK(sc); return; } #endif while ((statack = CSR_READ_1(sc, FXP_CSR_SCB_STATACK)) != 0) { /* * It should not be possible to have all bits set; the * FXP_SCB_INTR_SWI bit always returns 0 on a read. If * all bits are set, this may indicate that the card has * been physically ejected, so ignore it. */ if (statack == 0xff) { FXP_UNLOCK(sc); return; } /* * First ACK all the interrupts in this pass. */ CSR_WRITE_1(sc, FXP_CSR_SCB_STATACK, statack); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) fxp_intr_body(sc, ifp, statack, -1); } FXP_UNLOCK(sc); } static void fxp_txeof(struct fxp_softc *sc) { struct ifnet *ifp; struct fxp_tx *txp; ifp = sc->ifp; bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (txp = sc->fxp_desc.tx_first; sc->tx_queued && (le16toh(txp->tx_cb->cb_status) & FXP_CB_STATUS_C) != 0; txp = txp->tx_next) { if (txp->tx_mbuf != NULL) { bus_dmamap_sync(sc->fxp_txmtag, txp->tx_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->fxp_txmtag, txp->tx_map); m_freem(txp->tx_mbuf); txp->tx_mbuf = NULL; /* clear this to reset csum offload bits */ txp->tx_cb->tbd[0].tb_addr = 0; } sc->tx_queued--; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->fxp_desc.tx_first = txp; bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (sc->tx_queued == 0) sc->watchdog_timer = 0; } static void fxp_rxcsum(struct fxp_softc *sc, struct ifnet *ifp, struct mbuf *m, uint16_t status, int pos) { struct ether_header *eh; struct ip *ip; struct udphdr *uh; int32_t hlen, len, pktlen, temp32; uint16_t csum, *opts; if ((sc->flags & FXP_FLAG_82559_RXCSUM) == 0) { if ((status & FXP_RFA_STATUS_PARSE) != 0) { if (status & FXP_RFDX_CS_IP_CSUM_BIT_VALID) m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if (status & FXP_RFDX_CS_IP_CSUM_VALID) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; if ((status & FXP_RFDX_CS_TCPUDP_CSUM_BIT_VALID) && (status & FXP_RFDX_CS_TCPUDP_CSUM_VALID)) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } } return; } pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) return; /* can't handle fragmented packet */ switch (ip->ip_p) { case IPPROTO_TCP: if (pktlen < (hlen + sizeof(struct tcphdr))) return; break; case IPPROTO_UDP: if (pktlen < (hlen + sizeof(struct udphdr))) return; uh = (struct udphdr *)((caddr_t)ip + hlen); if (uh->uh_sum == 0) return; /* no checksum */ break; default: return; } /* Extract computed checksum. */ csum = be16dec(mtod(m, char *) + pos); /* checksum fixup for IP options */ len = hlen - sizeof(struct ip); if (len > 0) { opts = (uint16_t *)(ip + 1); for (; len > 0; len -= sizeof(uint16_t), opts++) { temp32 = csum - *opts; temp32 = (temp32 >> 16) + (temp32 & 65535); csum = temp32 & 65535; } } m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; m->m_pkthdr.csum_data = csum; } static int fxp_intr_body(struct fxp_softc *sc, struct ifnet *ifp, uint8_t statack, int count) { struct mbuf *m; struct fxp_rx *rxp; struct fxp_rfa *rfa; int rnr = (statack & FXP_SCB_STATACK_RNR) ? 1 : 0; int rx_npkts; uint16_t status; rx_npkts = 0; FXP_LOCK_ASSERT(sc, MA_OWNED); if (rnr) sc->rnr++; #ifdef DEVICE_POLLING /* Pick up a deferred RNR condition if `count' ran out last time. */ if (sc->flags & FXP_FLAG_DEFERRED_RNR) { sc->flags &= ~FXP_FLAG_DEFERRED_RNR; rnr = 1; } #endif /* * Free any finished transmit mbuf chains. * * Handle the CNA event likt a CXTNO event. It used to * be that this event (control unit not ready) was not * encountered, but it is now with the SMPng modifications. * The exact sequence of events that occur when the interface * is brought up are different now, and if this event * goes unhandled, the configuration/rxfilter setup sequence * can stall for several seconds. The result is that no * packets go out onto the wire for about 5 to 10 seconds * after the interface is ifconfig'ed for the first time. */ if (statack & (FXP_SCB_STATACK_CXTNO | FXP_SCB_STATACK_CNA)) fxp_txeof(sc); /* * Try to start more packets transmitting. */ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) fxp_start_body(ifp); /* * Just return if nothing happened on the receive side. */ if (!rnr && (statack & FXP_SCB_STATACK_FR) == 0) return (rx_npkts); /* * Process receiver interrupts. If a no-resource (RNR) * condition exists, get whatever packets we can and * re-start the receiver. * * When using polling, we do not process the list to completion, * so when we get an RNR interrupt we must defer the restart * until we hit the last buffer with the C bit set. * If we run out of cycles and rfa_headm has the C bit set, * record the pending RNR in the FXP_FLAG_DEFERRED_RNR flag so * that the info will be used in the subsequent polling cycle. */ for (;;) { rxp = sc->fxp_desc.rx_head; m = rxp->rx_mbuf; rfa = (struct fxp_rfa *)(m->m_ext.ext_buf + RFA_ALIGNMENT_FUDGE); bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEVICE_POLLING /* loop at most count times if count >=0 */ if (count >= 0 && count-- == 0) { if (rnr) { /* Defer RNR processing until the next time. */ sc->flags |= FXP_FLAG_DEFERRED_RNR; rnr = 0; } break; } #endif /* DEVICE_POLLING */ status = le16toh(rfa->rfa_status); if ((status & FXP_RFA_STATUS_C) == 0) break; if ((status & FXP_RFA_STATUS_RNR) != 0) rnr++; /* * Advance head forward. */ sc->fxp_desc.rx_head = rxp->rx_next; /* * Add a new buffer to the receive chain. * If this fails, the old buffer is recycled * instead. */ if (fxp_new_rfabuf(sc, rxp) == 0) { int total_len; /* * Fetch packet length (the top 2 bits of * actual_size are flags set by the controller * upon completion), and drop the packet in case * of bogus length or CRC errors. */ total_len = le16toh(rfa->actual_size) & 0x3fff; if ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0 && (ifp->if_capenable & IFCAP_RXCSUM) != 0) { /* Adjust for appended checksum bytes. */ total_len -= 2; } if (total_len < sizeof(struct ether_header) || total_len > (MCLBYTES - RFA_ALIGNMENT_FUDGE - sc->rfa_size) || status & (FXP_RFA_STATUS_CRC | FXP_RFA_STATUS_ALIGN)) { m_freem(m); fxp_add_rfabuf(sc, rxp); continue; } m->m_pkthdr.len = m->m_len = total_len; m->m_pkthdr.rcvif = ifp; /* Do IP checksum checking. */ if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) fxp_rxcsum(sc, ifp, m, status, total_len); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (status & FXP_RFA_STATUS_VLAN) != 0) { m->m_pkthdr.ether_vtag = ntohs(rfa->rfax_vlan_id); m->m_flags |= M_VLANTAG; } /* * Drop locks before calling if_input() since it * may re-enter fxp_start() in the netisr case. * This would result in a lock reversal. Better * performance might be obtained by chaining all * packets received, dropping the lock, and then * calling if_input() on each one. */ FXP_UNLOCK(sc); (*ifp->if_input)(ifp, m); FXP_LOCK(sc); rx_npkts++; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (rx_npkts); } else { /* Reuse RFA and loaded DMA map. */ ifp->if_iqdrops++; fxp_discard_rfabuf(sc, rxp); } fxp_add_rfabuf(sc, rxp); } if (rnr) { fxp_scb_wait(sc); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.rx_head->rx_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_RU_START); } return (rx_npkts); } static void fxp_update_stats(struct fxp_softc *sc) { struct ifnet *ifp = sc->ifp; struct fxp_stats *sp = sc->fxp_stats; struct fxp_hwstats *hsp; uint32_t *status; FXP_LOCK_ASSERT(sc, MA_OWNED); bus_dmamap_sync(sc->fxp_stag, sc->fxp_smap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Update statistical counters. */ if (sc->revision >= FXP_REV_82559_A0) status = &sp->completion_status; else if (sc->revision >= FXP_REV_82558_A4) status = (uint32_t *)&sp->tx_tco; else status = &sp->tx_pause; if (*status == htole32(FXP_STATS_DR_COMPLETE)) { hsp = &sc->fxp_hwstats; hsp->tx_good += le32toh(sp->tx_good); hsp->tx_maxcols += le32toh(sp->tx_maxcols); hsp->tx_latecols += le32toh(sp->tx_latecols); hsp->tx_underruns += le32toh(sp->tx_underruns); hsp->tx_lostcrs += le32toh(sp->tx_lostcrs); hsp->tx_deffered += le32toh(sp->tx_deffered); hsp->tx_single_collisions += le32toh(sp->tx_single_collisions); hsp->tx_multiple_collisions += le32toh(sp->tx_multiple_collisions); hsp->tx_total_collisions += le32toh(sp->tx_total_collisions); hsp->rx_good += le32toh(sp->rx_good); hsp->rx_crc_errors += le32toh(sp->rx_crc_errors); hsp->rx_alignment_errors += le32toh(sp->rx_alignment_errors); hsp->rx_rnr_errors += le32toh(sp->rx_rnr_errors); hsp->rx_overrun_errors += le32toh(sp->rx_overrun_errors); hsp->rx_cdt_errors += le32toh(sp->rx_cdt_errors); hsp->rx_shortframes += le32toh(sp->rx_shortframes); hsp->tx_pause += le32toh(sp->tx_pause); hsp->rx_pause += le32toh(sp->rx_pause); hsp->rx_controls += le32toh(sp->rx_controls); hsp->tx_tco += le16toh(sp->tx_tco); hsp->rx_tco += le16toh(sp->rx_tco); ifp->if_opackets += le32toh(sp->tx_good); ifp->if_collisions += le32toh(sp->tx_total_collisions); if (sp->rx_good) { ifp->if_ipackets += le32toh(sp->rx_good); sc->rx_idle_secs = 0; } else if (sc->flags & FXP_FLAG_RXBUG) { /* * Receiver's been idle for another second. */ sc->rx_idle_secs++; } ifp->if_ierrors += le32toh(sp->rx_crc_errors) + le32toh(sp->rx_alignment_errors) + le32toh(sp->rx_rnr_errors) + le32toh(sp->rx_overrun_errors); /* * If any transmit underruns occured, bump up the transmit * threshold by another 512 bytes (64 * 8). */ if (sp->tx_underruns) { ifp->if_oerrors += le32toh(sp->tx_underruns); if (tx_threshold < 192) tx_threshold += 64; } *status = 0; bus_dmamap_sync(sc->fxp_stag, sc->fxp_smap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } } /* * Update packet in/out/collision statistics. The i82557 doesn't * allow you to access these counters without doing a fairly * expensive DMA to get _all_ of the statistics it maintains, so * we do this operation here only once per second. The statistics * counters in the kernel are updated from the previous dump-stats * DMA and then a new dump-stats DMA is started. The on-chip * counters are zeroed when the DMA completes. If we can't start * the DMA immediately, we don't wait - we just prepare to read * them again next time. */ static void fxp_tick(void *xsc) { struct fxp_softc *sc = xsc; struct ifnet *ifp = sc->ifp; FXP_LOCK_ASSERT(sc, MA_OWNED); /* Update statistical counters. */ fxp_update_stats(sc); /* * Release any xmit buffers that have completed DMA. This isn't * strictly necessary to do here, but it's advantagous for mbufs * with external storage to be released in a timely manner rather * than being defered for a potentially long time. This limits * the delay to a maximum of one second. */ fxp_txeof(sc); /* * If we haven't received any packets in FXP_MAC_RX_IDLE seconds, * then assume the receiver has locked up and attempt to clear * the condition by reprogramming the multicast filter. This is * a work-around for a bug in the 82557 where the receiver locks * up if it gets certain types of garbage in the syncronization * bits prior to the packet header. This bug is supposed to only * occur in 10Mbps mode, but has been seen to occur in 100Mbps * mode as well (perhaps due to a 10/100 speed transition). */ if (sc->rx_idle_secs > FXP_MAX_RX_IDLE) { sc->rx_idle_secs = 0; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) fxp_init_body(sc); return; } /* * If there is no pending command, start another stats * dump. Otherwise punt for now. */ if (CSR_READ_1(sc, FXP_CSR_SCB_COMMAND) == 0) { /* * Start another stats dump. */ fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_DUMPRESET); } if (sc->miibus != NULL) mii_tick(device_get_softc(sc->miibus)); /* * Check that chip hasn't hung. */ fxp_watchdog(sc); /* * Schedule another timeout one second from now. */ callout_reset(&sc->stat_ch, hz, fxp_tick, sc); } /* * Stop the interface. Cancels the statistics updater and resets * the interface. */ static void fxp_stop(struct fxp_softc *sc) { struct ifnet *ifp = sc->ifp; struct fxp_tx *txp; int i; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->watchdog_timer = 0; /* * Cancel stats updater. */ callout_stop(&sc->stat_ch); /* * Preserve PCI configuration, configure, IA/multicast * setup and put RU and CU into idle state. */ CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SELECTIVE_RESET); DELAY(50); /* Disable interrupts. */ CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE); fxp_update_stats(sc); /* * Release any xmit buffers. */ txp = sc->fxp_desc.tx_list; if (txp != NULL) { for (i = 0; i < FXP_NTXCB; i++) { if (txp[i].tx_mbuf != NULL) { bus_dmamap_sync(sc->fxp_txmtag, txp[i].tx_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->fxp_txmtag, txp[i].tx_map); m_freem(txp[i].tx_mbuf); txp[i].tx_mbuf = NULL; /* clear this to reset csum offload bits */ txp[i].tx_cb->tbd[0].tb_addr = 0; } } } bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->tx_queued = 0; } /* * Watchdog/transmission transmit timeout handler. Called when a * transmission is started on the interface, but no interrupt is * received before the timeout. This usually indicates that the * card has wedged for some reason. */ static void fxp_watchdog(struct fxp_softc *sc) { FXP_LOCK_ASSERT(sc, MA_OWNED); if (sc->watchdog_timer == 0 || --sc->watchdog_timer) return; device_printf(sc->dev, "device timeout\n"); sc->ifp->if_oerrors++; fxp_init_body(sc); } /* * Acquire locks and then call the real initialization function. This * is necessary because ether_ioctl() calls if_init() and this would * result in mutex recursion if the mutex was held. */ static void fxp_init(void *xsc) { struct fxp_softc *sc = xsc; FXP_LOCK(sc); fxp_init_body(sc); FXP_UNLOCK(sc); } /* * Perform device initialization. This routine must be called with the * softc lock held. */ static void fxp_init_body(struct fxp_softc *sc) { struct ifnet *ifp = sc->ifp; struct fxp_cb_config *cbp; struct fxp_cb_ias *cb_ias; struct fxp_cb_tx *tcbp; struct fxp_tx *txp; int i, prm; FXP_LOCK_ASSERT(sc, MA_OWNED); /* * Cancel any pending I/O */ fxp_stop(sc); /* * Issue software reset, which also unloads the microcode. */ sc->flags &= ~FXP_FLAG_UCODE; CSR_WRITE_4(sc, FXP_CSR_PORT, FXP_PORT_SOFTWARE_RESET); DELAY(50); prm = (ifp->if_flags & IFF_PROMISC) ? 1 : 0; /* * Initialize base of CBL and RFA memory. Loading with zero * sets it up for regular linear addressing. */ CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, 0); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_BASE); fxp_scb_wait(sc); fxp_scb_cmd(sc, FXP_SCB_COMMAND_RU_BASE); /* * Initialize base of dump-stats buffer. */ fxp_scb_wait(sc); bzero(sc->fxp_stats, sizeof(struct fxp_stats)); bus_dmamap_sync(sc->fxp_stag, sc->fxp_smap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->stats_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_DUMP_ADR); /* * Attempt to load microcode if requested. * For ICH based controllers do not load microcode. */ if (sc->ident->ich == 0) { if (ifp->if_flags & IFF_LINK0 && (sc->flags & FXP_FLAG_UCODE) == 0) fxp_load_ucode(sc); } /* * Set IFF_ALLMULTI status. It's needed in configure action * command. */ fxp_mc_addrs(sc); /* * We temporarily use memory that contains the TxCB list to * construct the config CB. The TxCB list memory is rebuilt * later. */ cbp = (struct fxp_cb_config *)sc->fxp_desc.cbl_list; /* * This bcopy is kind of disgusting, but there are a bunch of must be * zero and must be one bits in this structure and this is the easiest * way to initialize them all to proper values. */ bcopy(fxp_cb_config_template, cbp, sizeof(fxp_cb_config_template)); cbp->cb_status = 0; cbp->cb_command = htole16(FXP_CB_COMMAND_CONFIG | FXP_CB_COMMAND_EL); cbp->link_addr = 0xffffffff; /* (no) next command */ cbp->byte_count = sc->flags & FXP_FLAG_EXT_RFA ? 32 : 22; cbp->rx_fifo_limit = 8; /* rx fifo threshold (32 bytes) */ cbp->tx_fifo_limit = 0; /* tx fifo threshold (0 bytes) */ cbp->adaptive_ifs = 0; /* (no) adaptive interframe spacing */ cbp->mwi_enable = sc->flags & FXP_FLAG_MWI_ENABLE ? 1 : 0; cbp->type_enable = 0; /* actually reserved */ cbp->read_align_en = sc->flags & FXP_FLAG_READ_ALIGN ? 1 : 0; cbp->end_wr_on_cl = sc->flags & FXP_FLAG_WRITE_ALIGN ? 1 : 0; cbp->rx_dma_bytecount = 0; /* (no) rx DMA max */ cbp->tx_dma_bytecount = 0; /* (no) tx DMA max */ cbp->dma_mbce = 0; /* (disable) dma max counters */ cbp->late_scb = 0; /* (don't) defer SCB update */ cbp->direct_dma_dis = 1; /* disable direct rcv dma mode */ cbp->tno_int_or_tco_en =0; /* (disable) tx not okay interrupt */ cbp->ci_int = 1; /* interrupt on CU idle */ cbp->ext_txcb_dis = sc->flags & FXP_FLAG_EXT_TXCB ? 0 : 1; cbp->ext_stats_dis = 1; /* disable extended counters */ cbp->keep_overrun_rx = 0; /* don't pass overrun frames to host */ cbp->save_bf = sc->flags & FXP_FLAG_SAVE_BAD ? 1 : prm; cbp->disc_short_rx = !prm; /* discard short packets */ cbp->underrun_retry = 1; /* retry mode (once) on DMA underrun */ cbp->two_frames = 0; /* do not limit FIFO to 2 frames */ cbp->dyn_tbd = sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0; cbp->ext_rfa = sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0; cbp->mediatype = sc->flags & FXP_FLAG_SERIAL_MEDIA ? 0 : 1; cbp->csma_dis = 0; /* (don't) disable link */ cbp->tcp_udp_cksum = ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0 && (ifp->if_capenable & IFCAP_RXCSUM) != 0) ? 1 : 0; cbp->vlan_tco = 0; /* (don't) enable vlan wakeup */ cbp->link_wake_en = 0; /* (don't) assert PME# on link change */ cbp->arp_wake_en = 0; /* (don't) assert PME# on arp */ cbp->mc_wake_en = 0; /* (don't) enable PME# on mcmatch */ cbp->nsai = 1; /* (don't) disable source addr insert */ cbp->preamble_length = 2; /* (7 byte) preamble */ cbp->loopback = 0; /* (don't) loopback */ cbp->linear_priority = 0; /* (normal CSMA/CD operation) */ cbp->linear_pri_mode = 0; /* (wait after xmit only) */ cbp->interfrm_spacing = 6; /* (96 bits of) interframe spacing */ cbp->promiscuous = prm; /* promiscuous mode */ cbp->bcast_disable = 0; /* (don't) disable broadcasts */ cbp->wait_after_win = 0; /* (don't) enable modified backoff alg*/ cbp->ignore_ul = 0; /* consider U/L bit in IA matching */ cbp->crc16_en = 0; /* (don't) enable crc-16 algorithm */ cbp->crscdt = sc->flags & FXP_FLAG_SERIAL_MEDIA ? 1 : 0; cbp->stripping = !prm; /* truncate rx packet to byte count */ cbp->padding = 1; /* (do) pad short tx packets */ cbp->rcv_crc_xfer = 0; /* (don't) xfer CRC to host */ cbp->long_rx_en = sc->flags & FXP_FLAG_LONG_PKT_EN ? 1 : 0; cbp->ia_wake_en = 0; /* (don't) wake up on address match */ cbp->magic_pkt_dis = sc->flags & FXP_FLAG_WOL ? 0 : 1; cbp->force_fdx = 0; /* (don't) force full duplex */ cbp->fdx_pin_en = 1; /* (enable) FDX# pin */ cbp->multi_ia = 0; /* (don't) accept multiple IAs */ cbp->mc_all = ifp->if_flags & IFF_ALLMULTI ? 1 : prm; cbp->gamla_rx = sc->flags & FXP_FLAG_EXT_RFA ? 1 : 0; cbp->vlan_strip_en = ((sc->flags & FXP_FLAG_EXT_RFA) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) ? 1 : 0; if (sc->tunable_noflow || sc->revision == FXP_REV_82557) { /* * The 82557 has no hardware flow control, the values * below are the defaults for the chip. */ cbp->fc_delay_lsb = 0; cbp->fc_delay_msb = 0x40; cbp->pri_fc_thresh = 3; cbp->tx_fc_dis = 0; cbp->rx_fc_restop = 0; cbp->rx_fc_restart = 0; cbp->fc_filter = 0; cbp->pri_fc_loc = 1; } else { cbp->fc_delay_lsb = 0x1f; cbp->fc_delay_msb = 0x01; cbp->pri_fc_thresh = 3; cbp->tx_fc_dis = 0; /* enable transmit FC */ cbp->rx_fc_restop = 1; /* enable FC restop frames */ cbp->rx_fc_restart = 1; /* enable FC restart frames */ cbp->fc_filter = !prm; /* drop FC frames to host */ cbp->pri_fc_loc = 1; /* FC pri location (byte31) */ } /* Enable 82558 and 82559 extended statistics functionality. */ if (sc->revision >= FXP_REV_82558_A4) { if (sc->revision >= FXP_REV_82559_A0) { /* * Extend configuration table size to 32 * to include TCO configuration. */ cbp->byte_count = 32; cbp->ext_stats_dis = 1; /* Enable TCO stats. */ cbp->tno_int_or_tco_en = 1; cbp->gamla_rx = 1; } else cbp->ext_stats_dis = 0; } /* * Start the config command/DMA. */ fxp_scb_wait(sc); bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START); /* ...and wait for it to complete. */ fxp_dma_wait(sc, &cbp->cb_status, sc->cbl_tag, sc->cbl_map); /* * Now initialize the station address. Temporarily use the TxCB * memory area like we did above for the config CB. */ cb_ias = (struct fxp_cb_ias *)sc->fxp_desc.cbl_list; cb_ias->cb_status = 0; cb_ias->cb_command = htole16(FXP_CB_COMMAND_IAS | FXP_CB_COMMAND_EL); cb_ias->link_addr = 0xffffffff; bcopy(IF_LLADDR(sc->ifp), cb_ias->macaddr, ETHER_ADDR_LEN); /* * Start the IAS (Individual Address Setup) command/DMA. */ fxp_scb_wait(sc); bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START); /* ...and wait for it to complete. */ fxp_dma_wait(sc, &cb_ias->cb_status, sc->cbl_tag, sc->cbl_map); /* * Initialize the multicast address list. */ fxp_mc_setup(sc); /* * Initialize transmit control block (TxCB) list. */ txp = sc->fxp_desc.tx_list; tcbp = sc->fxp_desc.cbl_list; bzero(tcbp, FXP_TXCB_SZ); for (i = 0; i < FXP_NTXCB; i++) { txp[i].tx_mbuf = NULL; tcbp[i].cb_status = htole16(FXP_CB_STATUS_C | FXP_CB_STATUS_OK); tcbp[i].cb_command = htole16(FXP_CB_COMMAND_NOP); tcbp[i].link_addr = htole32(sc->fxp_desc.cbl_addr + (((i + 1) & FXP_TXCB_MASK) * sizeof(struct fxp_cb_tx))); if (sc->flags & FXP_FLAG_EXT_TXCB) tcbp[i].tbd_array_addr = htole32(FXP_TXCB_DMA_ADDR(sc, &tcbp[i].tbd[2])); else tcbp[i].tbd_array_addr = htole32(FXP_TXCB_DMA_ADDR(sc, &tcbp[i].tbd[0])); txp[i].tx_next = &txp[(i + 1) & FXP_TXCB_MASK]; } /* * Set the suspend flag on the first TxCB and start the control * unit. It will execute the NOP and then suspend. */ tcbp->cb_command = htole16(FXP_CB_COMMAND_NOP | FXP_CB_COMMAND_S); bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->fxp_desc.tx_first = sc->fxp_desc.tx_last = txp; sc->tx_queued = 1; fxp_scb_wait(sc); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START); /* * Initialize receiver buffer area - RFA. */ fxp_scb_wait(sc); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.rx_head->rx_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_RU_START); /* * Set current media. */ if (sc->miibus != NULL) mii_mediachg(device_get_softc(sc->miibus)); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* * Enable interrupts. */ #ifdef DEVICE_POLLING /* * ... but only do that if we are not polling. And because (presumably) * the default is interrupts on, we need to disable them explicitly! */ if (ifp->if_capenable & IFCAP_POLLING ) CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE); else #endif /* DEVICE_POLLING */ CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, 0); /* * Start stats updater. */ callout_reset(&sc->stat_ch, hz, fxp_tick, sc); } static int fxp_serial_ifmedia_upd(struct ifnet *ifp) { return (0); } static void fxp_serial_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { ifmr->ifm_active = IFM_ETHER|IFM_MANUAL; } /* * Change media according to request. */ static int fxp_ifmedia_upd(struct ifnet *ifp) { struct fxp_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->miibus); FXP_LOCK(sc); if (mii->mii_instance) { struct mii_softc *miisc; LIST_FOREACH(miisc, &mii->mii_phys, mii_list) mii_phy_reset(miisc); } mii_mediachg(mii); FXP_UNLOCK(sc); return (0); } /* * Notify the world which media we're using. */ static void fxp_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct fxp_softc *sc = ifp->if_softc; struct mii_data *mii; mii = device_get_softc(sc->miibus); FXP_LOCK(sc); mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; if (IFM_SUBTYPE(ifmr->ifm_active) == IFM_10_T && sc->flags & FXP_FLAG_CU_RESUME_BUG) sc->cu_resume_bug = 1; else sc->cu_resume_bug = 0; FXP_UNLOCK(sc); } /* * Add a buffer to the end of the RFA buffer list. * Return 0 if successful, 1 for failure. A failure results in * reusing the RFA buffer. * The RFA struct is stuck at the beginning of mbuf cluster and the * data pointer is fixed up to point just past it. */ static int fxp_new_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp) { struct mbuf *m; struct fxp_rfa *rfa; bus_dmamap_t tmp_map; int error; m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); /* * Move the data pointer up so that the incoming data packet * will be 32-bit aligned. */ m->m_data += RFA_ALIGNMENT_FUDGE; /* * Get a pointer to the base of the mbuf cluster and move * data start past it. */ rfa = mtod(m, struct fxp_rfa *); m->m_data += sc->rfa_size; rfa->size = htole16(MCLBYTES - sc->rfa_size - RFA_ALIGNMENT_FUDGE); rfa->rfa_status = 0; rfa->rfa_control = htole16(FXP_RFA_CONTROL_EL); rfa->actual_size = 0; m->m_len = m->m_pkthdr.len = MCLBYTES - RFA_ALIGNMENT_FUDGE - sc->rfa_size; /* * Initialize the rest of the RFA. Note that since the RFA * is misaligned, we cannot store values directly. We're thus * using the le32enc() function which handles endianness and * is also alignment-safe. */ le32enc(&rfa->link_addr, 0xffffffff); le32enc(&rfa->rbd_addr, 0xffffffff); /* Map the RFA into DMA memory. */ error = bus_dmamap_load(sc->fxp_rxmtag, sc->spare_map, rfa, MCLBYTES - RFA_ALIGNMENT_FUDGE, fxp_dma_map_addr, &rxp->rx_addr, BUS_DMA_NOWAIT); if (error) { m_freem(m); return (error); } if (rxp->rx_mbuf != NULL) bus_dmamap_unload(sc->fxp_rxmtag, rxp->rx_map); tmp_map = sc->spare_map; sc->spare_map = rxp->rx_map; rxp->rx_map = tmp_map; rxp->rx_mbuf = m; bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } static void fxp_add_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp) { struct fxp_rfa *p_rfa; struct fxp_rx *p_rx; /* * If there are other buffers already on the list, attach this * one to the end by fixing up the tail to point to this one. */ if (sc->fxp_desc.rx_head != NULL) { p_rx = sc->fxp_desc.rx_tail; p_rfa = (struct fxp_rfa *) (p_rx->rx_mbuf->m_ext.ext_buf + RFA_ALIGNMENT_FUDGE); p_rx->rx_next = rxp; le32enc(&p_rfa->link_addr, rxp->rx_addr); p_rfa->rfa_control = 0; bus_dmamap_sync(sc->fxp_rxmtag, p_rx->rx_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } else { rxp->rx_next = NULL; sc->fxp_desc.rx_head = rxp; } sc->fxp_desc.rx_tail = rxp; } static void fxp_discard_rfabuf(struct fxp_softc *sc, struct fxp_rx *rxp) { struct mbuf *m; struct fxp_rfa *rfa; m = rxp->rx_mbuf; m->m_data = m->m_ext.ext_buf; /* * Move the data pointer up so that the incoming data packet * will be 32-bit aligned. */ m->m_data += RFA_ALIGNMENT_FUDGE; /* * Get a pointer to the base of the mbuf cluster and move * data start past it. */ rfa = mtod(m, struct fxp_rfa *); m->m_data += sc->rfa_size; rfa->size = htole16(MCLBYTES - sc->rfa_size - RFA_ALIGNMENT_FUDGE); rfa->rfa_status = 0; rfa->rfa_control = htole16(FXP_RFA_CONTROL_EL); rfa->actual_size = 0; /* * Initialize the rest of the RFA. Note that since the RFA * is misaligned, we cannot store values directly. We're thus * using the le32enc() function which handles endianness and * is also alignment-safe. */ le32enc(&rfa->link_addr, 0xffffffff); le32enc(&rfa->rbd_addr, 0xffffffff); bus_dmamap_sync(sc->fxp_rxmtag, rxp->rx_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static int fxp_miibus_readreg(device_t dev, int phy, int reg) { struct fxp_softc *sc = device_get_softc(dev); int count = 10000; int value; CSR_WRITE_4(sc, FXP_CSR_MDICONTROL, (FXP_MDI_READ << 26) | (reg << 16) | (phy << 21)); while (((value = CSR_READ_4(sc, FXP_CSR_MDICONTROL)) & 0x10000000) == 0 && count--) DELAY(10); if (count <= 0) device_printf(dev, "fxp_miibus_readreg: timed out\n"); return (value & 0xffff); } static int fxp_miibus_writereg(device_t dev, int phy, int reg, int value) { struct fxp_softc *sc = device_get_softc(dev); int count = 10000; CSR_WRITE_4(sc, FXP_CSR_MDICONTROL, (FXP_MDI_WRITE << 26) | (reg << 16) | (phy << 21) | (value & 0xffff)); while ((CSR_READ_4(sc, FXP_CSR_MDICONTROL) & 0x10000000) == 0 && count--) DELAY(10); if (count <= 0) device_printf(dev, "fxp_miibus_writereg: timed out\n"); return (0); } static int fxp_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct fxp_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct mii_data *mii; int flag, mask, error = 0, reinit; switch (command) { case SIOCSIFFLAGS: FXP_LOCK(sc); /* * If interface is marked up and not running, then start it. * If it is marked down and running, stop it. * XXX If it's up then re-initialize it. This is so flags * such as IFF_PROMISC are handled. */ if (ifp->if_flags & IFF_UP) { if (((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) && ((ifp->if_flags ^ sc->if_flags) & (IFF_PROMISC | IFF_ALLMULTI | IFF_LINK0)) != 0) fxp_init_body(sc); else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) fxp_init_body(sc); } else { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) fxp_stop(sc); } sc->if_flags = ifp->if_flags; FXP_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) fxp_init(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: if (sc->miibus != NULL) { mii = device_get_softc(sc->miibus); error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); } else { error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, command); } break; case SIOCSIFCAP: reinit = 0; mask = ifp->if_capenable ^ ifr->ifr_reqcap; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(fxp_poll, ifp); if (error) return(error); FXP_LOCK(sc); CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, FXP_SCB_INTR_DISABLE); ifp->if_capenable |= IFCAP_POLLING; FXP_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); /* Enable interrupts in any case */ FXP_LOCK(sc); CSR_WRITE_1(sc, FXP_CSR_SCB_INTRCNTL, 0); ifp->if_capenable &= ~IFCAP_POLLING; FXP_UNLOCK(sc); } } #endif FXP_LOCK(sc); if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= FXP_CSUM_FEATURES; else ifp->if_hwassist &= ~FXP_CSUM_FEATURES; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; if ((sc->flags & FXP_FLAG_82559_RXCSUM) != 0) reinit++; } if ((mask & IFCAP_TSO4) != 0 && (ifp->if_capabilities & IFCAP_TSO4) != 0) { ifp->if_capenable ^= IFCAP_TSO4; if ((ifp->if_capenable & IFCAP_TSO4) != 0) ifp->if_hwassist |= CSUM_TSO; else ifp->if_hwassist &= ~CSUM_TSO; } if ((mask & IFCAP_WOL_MAGIC) != 0 && (ifp->if_capabilities & IFCAP_WOL_MAGIC) != 0) ifp->if_capenable ^= IFCAP_WOL_MAGIC; if ((mask & IFCAP_VLAN_MTU) != 0 && (ifp->if_capabilities & IFCAP_VLAN_MTU) != 0) { ifp->if_capenable ^= IFCAP_VLAN_MTU; if (sc->revision != FXP_REV_82557) flag = FXP_FLAG_LONG_PKT_EN; else /* a hack to get long frames on the old chip */ flag = FXP_FLAG_SAVE_BAD; sc->flags ^= flag; if (ifp->if_flags & IFF_UP) reinit++; } if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; if ((mask & IFCAP_VLAN_HWTSO) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) ifp->if_capenable &= ~(IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM); reinit++; } if (reinit > 0 && ifp->if_flags & IFF_UP) fxp_init_body(sc); FXP_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, command, data); } return (error); } /* * Fill in the multicast address list and return number of entries. */ static int fxp_mc_addrs(struct fxp_softc *sc) { struct fxp_cb_mcs *mcsp = sc->mcsp; struct ifnet *ifp = sc->ifp; struct ifmultiaddr *ifma; int nmcasts; nmcasts = 0; if ((ifp->if_flags & IFF_ALLMULTI) == 0) { if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (nmcasts >= MAXMCADDR) { ifp->if_flags |= IFF_ALLMULTI; nmcasts = 0; break; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &sc->mcsp->mc_addr[nmcasts][0], ETHER_ADDR_LEN); nmcasts++; } if_maddr_runlock(ifp); } mcsp->mc_cnt = htole16(nmcasts * ETHER_ADDR_LEN); return (nmcasts); } /* * Program the multicast filter. * * We have an artificial restriction that the multicast setup command * must be the first command in the chain, so we take steps to ensure * this. By requiring this, it allows us to keep up the performance of * the pre-initialized command ring (esp. link pointers) by not actually * inserting the mcsetup command in the ring - i.e. its link pointer * points to the TxCB ring, but the mcsetup descriptor itself is not part * of it. We then can do 'CU_START' on the mcsetup descriptor and have it * lead into the regular TxCB ring when it completes. */ static void fxp_mc_setup(struct fxp_softc *sc) { struct fxp_cb_mcs *mcsp; int count; FXP_LOCK_ASSERT(sc, MA_OWNED); mcsp = sc->mcsp; mcsp->cb_status = 0; mcsp->cb_command = htole16(FXP_CB_COMMAND_MCAS | FXP_CB_COMMAND_EL); mcsp->link_addr = 0xffffffff; fxp_mc_addrs(sc); /* * Wait until command unit is idle. This should never be the * case when nothing is queued, but make sure anyway. */ count = 100; while ((CSR_READ_1(sc, FXP_CSR_SCB_RUSCUS) >> 6) != FXP_SCB_CUS_IDLE && --count) DELAY(10); if (count == 0) { device_printf(sc->dev, "command queue timeout\n"); return; } /* * Start the multicast setup command. */ fxp_scb_wait(sc); bus_dmamap_sync(sc->mcs_tag, sc->mcs_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->mcs_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START); /* ...and wait for it to complete. */ fxp_dma_wait(sc, &mcsp->cb_status, sc->mcs_tag, sc->mcs_map); } static uint32_t fxp_ucode_d101a[] = D101_A_RCVBUNDLE_UCODE; static uint32_t fxp_ucode_d101b0[] = D101_B0_RCVBUNDLE_UCODE; static uint32_t fxp_ucode_d101ma[] = D101M_B_RCVBUNDLE_UCODE; static uint32_t fxp_ucode_d101s[] = D101S_RCVBUNDLE_UCODE; static uint32_t fxp_ucode_d102[] = D102_B_RCVBUNDLE_UCODE; static uint32_t fxp_ucode_d102c[] = D102_C_RCVBUNDLE_UCODE; static uint32_t fxp_ucode_d102e[] = D102_E_RCVBUNDLE_UCODE; #define UCODE(x) x, sizeof(x)/sizeof(uint32_t) -struct ucode { +static const struct ucode { uint32_t revision; uint32_t *ucode; int length; u_short int_delay_offset; u_short bundle_max_offset; -} ucode_table[] = { +} const ucode_table[] = { { FXP_REV_82558_A4, UCODE(fxp_ucode_d101a), D101_CPUSAVER_DWORD, 0 }, { FXP_REV_82558_B0, UCODE(fxp_ucode_d101b0), D101_CPUSAVER_DWORD, 0 }, { FXP_REV_82559_A0, UCODE(fxp_ucode_d101ma), D101M_CPUSAVER_DWORD, D101M_CPUSAVER_BUNDLE_MAX_DWORD }, { FXP_REV_82559S_A, UCODE(fxp_ucode_d101s), D101S_CPUSAVER_DWORD, D101S_CPUSAVER_BUNDLE_MAX_DWORD }, { FXP_REV_82550, UCODE(fxp_ucode_d102), D102_B_CPUSAVER_DWORD, D102_B_CPUSAVER_BUNDLE_MAX_DWORD }, { FXP_REV_82550_C, UCODE(fxp_ucode_d102c), D102_C_CPUSAVER_DWORD, D102_C_CPUSAVER_BUNDLE_MAX_DWORD }, { FXP_REV_82551_F, UCODE(fxp_ucode_d102e), D102_E_CPUSAVER_DWORD, D102_E_CPUSAVER_BUNDLE_MAX_DWORD }, { 0, NULL, 0, 0, 0 } }; static void fxp_load_ucode(struct fxp_softc *sc) { - struct ucode *uc; + const struct ucode *uc; struct fxp_cb_ucode *cbp; int i; for (uc = ucode_table; uc->ucode != NULL; uc++) if (sc->revision == uc->revision) break; if (uc->ucode == NULL) return; cbp = (struct fxp_cb_ucode *)sc->fxp_desc.cbl_list; cbp->cb_status = 0; cbp->cb_command = htole16(FXP_CB_COMMAND_UCODE | FXP_CB_COMMAND_EL); cbp->link_addr = 0xffffffff; /* (no) next command */ for (i = 0; i < uc->length; i++) cbp->ucode[i] = htole32(uc->ucode[i]); if (uc->int_delay_offset) *(uint16_t *)&cbp->ucode[uc->int_delay_offset] = htole16(sc->tunable_int_delay + sc->tunable_int_delay / 2); if (uc->bundle_max_offset) *(uint16_t *)&cbp->ucode[uc->bundle_max_offset] = htole16(sc->tunable_bundle_max); /* * Download the ucode to the chip. */ fxp_scb_wait(sc); bus_dmamap_sync(sc->cbl_tag, sc->cbl_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); CSR_WRITE_4(sc, FXP_CSR_SCB_GENERAL, sc->fxp_desc.cbl_addr); fxp_scb_cmd(sc, FXP_SCB_COMMAND_CU_START); /* ...and wait for it to complete. */ fxp_dma_wait(sc, &cbp->cb_status, sc->cbl_tag, sc->cbl_map); device_printf(sc->dev, "Microcode loaded, int_delay: %d usec bundle_max: %d\n", sc->tunable_int_delay, uc->bundle_max_offset == 0 ? 0 : sc->tunable_bundle_max); sc->flags |= FXP_FLAG_UCODE; } #define FXP_SYSCTL_STAT_ADD(c, h, n, p, d) \ SYSCTL_ADD_UINT(c, h, OID_AUTO, n, CTLFLAG_RD, p, 0, d) static void fxp_sysctl_node(struct fxp_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child, *parent; struct sysctl_oid *tree; struct fxp_hwstats *hsp; ctx = device_get_sysctl_ctx(sc->dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "int_delay", CTLTYPE_INT | CTLFLAG_RW, &sc->tunable_int_delay, 0, sysctl_hw_fxp_int_delay, "I", "FXP driver receive interrupt microcode bundling delay"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "bundle_max", CTLTYPE_INT | CTLFLAG_RW, &sc->tunable_bundle_max, 0, sysctl_hw_fxp_bundle_max, "I", "FXP driver receive interrupt microcode bundle size limit"); SYSCTL_ADD_INT(ctx, child,OID_AUTO, "rnr", CTLFLAG_RD, &sc->rnr, 0, "FXP RNR events"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "noflow", CTLFLAG_RW, &sc->tunable_noflow, 0, "FXP flow control disabled"); /* * Pull in device tunables. */ sc->tunable_int_delay = TUNABLE_INT_DELAY; sc->tunable_bundle_max = TUNABLE_BUNDLE_MAX; sc->tunable_noflow = 1; (void) resource_int_value(device_get_name(sc->dev), device_get_unit(sc->dev), "int_delay", &sc->tunable_int_delay); (void) resource_int_value(device_get_name(sc->dev), device_get_unit(sc->dev), "bundle_max", &sc->tunable_bundle_max); (void) resource_int_value(device_get_name(sc->dev), device_get_unit(sc->dev), "noflow", &sc->tunable_noflow); sc->rnr = 0; hsp = &sc->fxp_hwstats; tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, NULL, "FXP statistics"); parent = SYSCTL_CHILDREN(tree); /* Rx MAC statistics. */ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx", CTLFLAG_RD, NULL, "Rx MAC statistics"); child = SYSCTL_CHILDREN(tree); FXP_SYSCTL_STAT_ADD(ctx, child, "good_frames", &hsp->rx_good, "Good frames"); FXP_SYSCTL_STAT_ADD(ctx, child, "crc_errors", &hsp->rx_crc_errors, "CRC errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "alignment_errors", &hsp->rx_alignment_errors, "Alignment errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "rnr_errors", &hsp->rx_rnr_errors, "RNR errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "overrun_errors", &hsp->rx_overrun_errors, "Overrun errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "cdt_errors", &hsp->rx_cdt_errors, "Collision detect errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "shortframes", &hsp->rx_shortframes, "Short frame errors"); if (sc->revision >= FXP_REV_82558_A4) { FXP_SYSCTL_STAT_ADD(ctx, child, "pause", &hsp->rx_pause, "Pause frames"); FXP_SYSCTL_STAT_ADD(ctx, child, "controls", &hsp->rx_controls, "Unsupported control frames"); } if (sc->revision >= FXP_REV_82559_A0) FXP_SYSCTL_STAT_ADD(ctx, child, "tco", &hsp->rx_tco, "TCO frames"); /* Tx MAC statistics. */ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "tx", CTLFLAG_RD, NULL, "Tx MAC statistics"); child = SYSCTL_CHILDREN(tree); FXP_SYSCTL_STAT_ADD(ctx, child, "good_frames", &hsp->tx_good, "Good frames"); FXP_SYSCTL_STAT_ADD(ctx, child, "maxcols", &hsp->tx_maxcols, "Maximum collisions errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "latecols", &hsp->tx_latecols, "Late collisions errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "underruns", &hsp->tx_underruns, "Underrun errors"); FXP_SYSCTL_STAT_ADD(ctx, child, "lostcrs", &hsp->tx_lostcrs, "Lost carrier sense"); FXP_SYSCTL_STAT_ADD(ctx, child, "deffered", &hsp->tx_deffered, "Deferred"); FXP_SYSCTL_STAT_ADD(ctx, child, "single_collisions", &hsp->tx_single_collisions, "Single collisions"); FXP_SYSCTL_STAT_ADD(ctx, child, "multiple_collisions", &hsp->tx_multiple_collisions, "Multiple collisions"); FXP_SYSCTL_STAT_ADD(ctx, child, "total_collisions", &hsp->tx_total_collisions, "Total collisions"); if (sc->revision >= FXP_REV_82558_A4) FXP_SYSCTL_STAT_ADD(ctx, child, "pause", &hsp->tx_pause, "Pause frames"); if (sc->revision >= FXP_REV_82559_A0) FXP_SYSCTL_STAT_ADD(ctx, child, "tco", &hsp->tx_tco, "TCO frames"); } #undef FXP_SYSCTL_STAT_ADD static int sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high) { int error, value; value = *(int *)arg1; error = sysctl_handle_int(oidp, &value, 0, req); if (error || !req->newptr) return (error); if (value < low || value > high) return (EINVAL); *(int *)arg1 = value; return (0); } /* * Interrupt delay is expressed in microseconds, a multiplier is used * to convert this to the appropriate clock ticks before using. */ static int sysctl_hw_fxp_int_delay(SYSCTL_HANDLER_ARGS) { + return (sysctl_int_range(oidp, arg1, arg2, req, 300, 3000)); } static int sysctl_hw_fxp_bundle_max(SYSCTL_HANDLER_ARGS) { + return (sysctl_int_range(oidp, arg1, arg2, req, 1, 0xffff)); } Index: projects/binutils-2.17/sys/dev/fxp/if_fxpvar.h =================================================================== --- projects/binutils-2.17/sys/dev/fxp/if_fxpvar.h (revision 215829) +++ projects/binutils-2.17/sys/dev/fxp/if_fxpvar.h (revision 215830) @@ -1,247 +1,247 @@ /*- * Copyright (c) 1995, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Misc. defintions for the Intel EtherExpress Pro/100B PCI Fast * Ethernet driver */ /* * Number of transmit control blocks. This determines the number * of transmit buffers that can be chained in the CB list. * This must be a power of two. */ #define FXP_NTXCB 128 #define FXP_NTXCB_HIWAT ((FXP_NTXCB * 7) / 10) /* * Maximum size of a DMA segment. */ #define FXP_TSO_SEGSIZE 4096 /* * Size of the TxCB list. */ #define FXP_TXCB_SZ (FXP_NTXCB * sizeof(struct fxp_cb_tx)) /* * Macro to obtain the DMA address of a virtual address in the * TxCB list based on the base DMA address of the TxCB list. */ #define FXP_TXCB_DMA_ADDR(sc, addr) \ (sc->fxp_desc.cbl_addr + (uintptr_t)addr - \ (uintptr_t)sc->fxp_desc.cbl_list) /* * Number of completed TX commands at which point an interrupt * will be generated to garbage collect the attached buffers. * Must be at least one less than FXP_NTXCB, and should be * enough less so that the transmitter doesn't becomes idle * during the buffer rundown (which would reduce performance). */ #define FXP_CXINT_THRESH 120 /* * TxCB list index mask. This is used to do list wrap-around. */ #define FXP_TXCB_MASK (FXP_NTXCB - 1) /* * Number of receive frame area buffers. These are large so chose * wisely. */ #ifdef DEVICE_POLLING #define FXP_NRFABUFS 192 #else #define FXP_NRFABUFS 64 #endif /* * Maximum number of seconds that the receiver can be idle before we * assume it's dead and attempt to reset it by reprogramming the * multicast filter. This is part of a work-around for a bug in the * NIC. See fxp_stats_update(). */ #define FXP_MAX_RX_IDLE 15 /* * Default maximum time, in microseconds, that an interrupt may be delayed * in an attempt to coalesce interrupts. This is only effective if the Intel * microcode is loaded, and may be changed via either loader tunables or * sysctl. See also the CPUSAVER_DWORD entry in rcvbundl.h. */ #define TUNABLE_INT_DELAY 1000 /* * Default number of packets that will be bundled, before an interrupt is * generated. This is only effective if the Intel microcode is loaded, and * may be changed via either loader tunables or sysctl. This may not be * present in all microcode revisions, see also the CPUSAVER_BUNDLE_MAX_DWORD * entry in rcvbundl.h. */ #define TUNABLE_BUNDLE_MAX 6 #define FXP_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define FXP_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define FXP_LOCK_ASSERT(_sc, _what) mtx_assert(&(_sc)->sc_mtx, (_what)) /* * Structures to handle TX and RX descriptors. */ struct fxp_rx { struct fxp_rx *rx_next; struct mbuf *rx_mbuf; bus_dmamap_t rx_map; uint32_t rx_addr; }; struct fxp_tx { struct fxp_tx *tx_next; struct fxp_cb_tx *tx_cb; struct mbuf *tx_mbuf; bus_dmamap_t tx_map; }; struct fxp_desc_list { struct fxp_rx rx_list[FXP_NRFABUFS]; struct fxp_tx tx_list[FXP_NTXCB]; struct fxp_tx mcs_tx; struct fxp_rx *rx_head; struct fxp_rx *rx_tail; struct fxp_tx *tx_first; struct fxp_tx *tx_last; struct fxp_rfa *rfa_list; struct fxp_cb_tx *cbl_list; uint32_t cbl_addr; bus_dma_tag_t rx_tag; }; struct fxp_ident { uint16_t devid; int16_t revid; /* -1 matches anything */ uint8_t ich; - char *name; + const char *name; }; struct fxp_hwstats { uint32_t tx_good; uint32_t tx_maxcols; uint32_t tx_latecols; uint32_t tx_underruns; uint32_t tx_lostcrs; uint32_t tx_deffered; uint32_t tx_single_collisions; uint32_t tx_multiple_collisions; uint32_t tx_total_collisions; uint32_t tx_pause; uint32_t tx_tco; uint32_t rx_good; uint32_t rx_crc_errors; uint32_t rx_alignment_errors; uint32_t rx_rnr_errors; uint32_t rx_overrun_errors; uint32_t rx_cdt_errors; uint32_t rx_shortframes; uint32_t rx_pause; uint32_t rx_controls; uint32_t rx_tco; }; /* * NOTE: Elements are ordered for optimal cacheline behavior, and NOT * for functional grouping. */ struct fxp_softc { struct ifnet *ifp; /* per-interface network data */ struct resource *fxp_res[2]; /* I/O and IRQ resources */ struct resource_spec *fxp_spec; /* the resource spec we used */ void *ih; /* interrupt handler cookie */ - struct fxp_ident *ident; + const struct fxp_ident *ident; struct mtx sc_mtx; bus_dma_tag_t fxp_txmtag; /* bus DMA tag for Tx mbufs */ bus_dma_tag_t fxp_rxmtag; /* bus DMA tag for Rx mbufs */ bus_dma_tag_t fxp_stag; /* bus DMA tag for stats */ bus_dmamap_t fxp_smap; /* bus DMA map for stats */ bus_dma_tag_t cbl_tag; /* DMA tag for the TxCB list */ bus_dmamap_t cbl_map; /* DMA map for the TxCB list */ bus_dma_tag_t mcs_tag; /* DMA tag for the multicast setup */ bus_dmamap_t mcs_map; /* DMA map for the multicast setup */ bus_dmamap_t spare_map; /* spare DMA map */ struct fxp_desc_list fxp_desc; /* descriptors management struct */ int maxtxseg; /* maximum # of TX segments */ int maxsegsize; /* maximum size of a TX segment */ int tx_queued; /* # of active TxCB's */ struct fxp_stats *fxp_stats; /* Pointer to interface stats */ uint32_t stats_addr; /* DMA address of the stats structure */ struct fxp_hwstats fxp_hwstats; int rx_idle_secs; /* # of seconds RX has been idle */ struct callout stat_ch; /* stat callout */ int watchdog_timer; /* seconds until chip reset */ struct fxp_cb_mcs *mcsp; /* Pointer to mcast setup descriptor */ uint32_t mcs_addr; /* DMA address of the multicast cmd */ struct ifmedia sc_media; /* media information */ device_t miibus; device_t dev; int tunable_int_delay; /* interrupt delay value for ucode */ int tunable_bundle_max; /* max # frames per interrupt (ucode) */ int tunable_noflow; /* flow control disabled */ int rnr; /* RNR events */ int eeprom_size; /* size of serial EEPROM */ int suspended; /* 0 = normal 1 = suspended or dead */ int cu_resume_bug; int revision; int flags; int if_flags; uint8_t rfa_size; uint32_t tx_cmd; }; #define FXP_FLAG_MWI_ENABLE 0x0001 /* MWI enable */ #define FXP_FLAG_READ_ALIGN 0x0002 /* align read access with cacheline */ #define FXP_FLAG_WRITE_ALIGN 0x0004 /* end write on cacheline */ #define FXP_FLAG_EXT_TXCB 0x0008 /* enable use of extended TXCB */ #define FXP_FLAG_SERIAL_MEDIA 0x0010 /* 10Mbps serial interface */ #define FXP_FLAG_LONG_PKT_EN 0x0020 /* enable long packet reception */ #define FXP_FLAG_CU_RESUME_BUG 0x0080 /* requires workaround for CU_RESUME */ #define FXP_FLAG_UCODE 0x0100 /* ucode is loaded */ #define FXP_FLAG_DEFERRED_RNR 0x0200 /* DEVICE_POLLING deferred RNR */ #define FXP_FLAG_EXT_RFA 0x0400 /* extended RFDs for csum offload */ #define FXP_FLAG_SAVE_BAD 0x0800 /* save bad pkts: bad size, CRC, etc */ #define FXP_FLAG_82559_RXCSUM 0x1000 /* 82559 compatible RX checksum */ #define FXP_FLAG_WOLCAP 0x2000 /* WOL capability */ #define FXP_FLAG_WOL 0x4000 /* WOL active */ #define FXP_FLAG_RXBUG 0x8000 /* Rx lock-up bug */ /* Macros to ease CSR access. */ #define CSR_READ_1(sc, reg) bus_read_1(sc->fxp_res[0], reg) #define CSR_READ_2(sc, reg) bus_read_2(sc->fxp_res[0], reg) #define CSR_READ_4(sc, reg) bus_read_4(sc->fxp_res[0], reg) #define CSR_WRITE_1(sc, reg, val) bus_write_1(sc->fxp_res[0], reg, val) #define CSR_WRITE_2(sc, reg, val) bus_write_2(sc->fxp_res[0], reg, val) #define CSR_WRITE_4(sc, reg, val) bus_write_4(sc->fxp_res[0], reg, val) Index: projects/binutils-2.17/sys/dev/gem/if_gem.c =================================================================== --- projects/binutils-2.17/sys/dev/gem/if_gem.c (revision 215829) +++ projects/binutils-2.17/sys/dev/gem/if_gem.c (revision 215830) @@ -1,2255 +1,2259 @@ /*- * Copyright (C) 2001 Eduardo Horvath. * Copyright (c) 2001-2003 Thomas Moestl * Copyright (c) 2007 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: gem.c,v 1.21 2002/06/01 23:50:58 lukem Exp */ #include __FBSDID("$FreeBSD$"); /* * Driver for Apple GMAC, Sun ERI and Sun GEM Ethernet controllers */ #if 0 #define GEM_DEBUG #endif #if 0 /* XXX: In case of emergency, re-enable this. */ #define GEM_RINT_TIMEOUT #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(powerof2(GEM_NRXDESC) && GEM_NRXDESC >= 32 && GEM_NRXDESC <= 8192); CTASSERT(powerof2(GEM_NTXDESC) && GEM_NTXDESC >= 32 && GEM_NTXDESC <= 8192); #define GEM_TRIES 10000 /* * The hardware supports basic TCP/UDP checksum offloading. However, * the hardware doesn't compensate the checksum for UDP datagram which * can yield to 0x0. As a safe guard, UDP checksum offload is disabled * by default. It can be reactivated by setting special link option * link0 with ifconfig(8). */ #define GEM_CSUM_FEATURES (CSUM_TCP) static int gem_add_rxbuf(struct gem_softc *sc, int idx); static int gem_bitwait(struct gem_softc *sc, u_int bank, bus_addr_t r, uint32_t clr, uint32_t set); static void gem_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error); static int gem_disable_rx(struct gem_softc *sc); static int gem_disable_tx(struct gem_softc *sc); static void gem_eint(struct gem_softc *sc, u_int status); static void gem_init(void *xsc); static void gem_init_locked(struct gem_softc *sc); static void gem_init_regs(struct gem_softc *sc); static int gem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int gem_load_txmbuf(struct gem_softc *sc, struct mbuf **m_head); static int gem_meminit(struct gem_softc *sc); static void gem_mifinit(struct gem_softc *sc); static void gem_reset(struct gem_softc *sc); static int gem_reset_rx(struct gem_softc *sc); static void gem_reset_rxdma(struct gem_softc *sc); static int gem_reset_tx(struct gem_softc *sc); static u_int gem_ringsize(u_int sz); static void gem_rint(struct gem_softc *sc); #ifdef GEM_RINT_TIMEOUT static void gem_rint_timeout(void *arg); #endif static inline void gem_rxcksum(struct mbuf *m, uint64_t flags); static void gem_rxdrain(struct gem_softc *sc); static void gem_setladrf(struct gem_softc *sc); static void gem_start(struct ifnet *ifp); static void gem_start_locked(struct ifnet *ifp); static void gem_stop(struct ifnet *ifp, int disable); static void gem_tick(void *arg); static void gem_tint(struct gem_softc *sc); static inline void gem_txkick(struct gem_softc *sc); static int gem_watchdog(struct gem_softc *sc); devclass_t gem_devclass; DRIVER_MODULE(miibus, gem, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(gem, miibus, 1, 1, 1); #ifdef GEM_DEBUG #include #define KTR_GEM KTR_SPARE2 #endif #define GEM_BANK1_BITWAIT(sc, r, clr, set) \ gem_bitwait((sc), GEM_RES_BANK1, (r), (clr), (set)) #define GEM_BANK2_BITWAIT(sc, r, clr, set) \ gem_bitwait((sc), GEM_RES_BANK2, (r), (clr), (set)) int gem_attach(struct gem_softc *sc) { struct gem_txsoft *txs; struct ifnet *ifp; int error, i, phy; uint32_t v; if (bootverbose) device_printf(sc->sc_dev, "flags=0x%x\n", sc->sc_flags); /* Set up ifnet structure. */ ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); sc->sc_csum_features = GEM_CSUM_FEATURES; ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->sc_dev), device_get_unit(sc->sc_dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_start = gem_start; ifp->if_ioctl = gem_ioctl; ifp->if_init = gem_init; IFQ_SET_MAXLEN(&ifp->if_snd, GEM_TXQUEUELEN); ifp->if_snd.ifq_drv_maxlen = GEM_TXQUEUELEN; IFQ_SET_READY(&ifp->if_snd); callout_init_mtx(&sc->sc_tick_ch, &sc->sc_mtx, 0); #ifdef GEM_RINT_TIMEOUT callout_init_mtx(&sc->sc_rx_ch, &sc->sc_mtx, 0); #endif /* Make sure the chip is stopped. */ gem_reset(sc); error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->sc_pdmatag); if (error != 0) goto fail_ifnet; error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_rdmatag); if (error != 0) goto fail_ptag; error = bus_dma_tag_create(sc->sc_pdmatag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * GEM_NTXSEGS, GEM_NTXSEGS, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_tdmatag); if (error != 0) goto fail_rtag; error = bus_dma_tag_create(sc->sc_pdmatag, PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct gem_control_data), 1, sizeof(struct gem_control_data), 0, NULL, NULL, &sc->sc_cdmatag); if (error != 0) goto fail_ttag; /* * Allocate the control data structures, create and load the * DMA map for it. */ if ((error = bus_dmamem_alloc(sc->sc_cdmatag, (void **)&sc->sc_control_data, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &sc->sc_cddmamap)) != 0) { device_printf(sc->sc_dev, "unable to allocate control data, error = %d\n", error); goto fail_ctag; } sc->sc_cddma = 0; if ((error = bus_dmamap_load(sc->sc_cdmatag, sc->sc_cddmamap, sc->sc_control_data, sizeof(struct gem_control_data), gem_cddma_callback, sc, 0)) != 0 || sc->sc_cddma == 0) { device_printf(sc->sc_dev, "unable to load control data DMA map, error = %d\n", error); goto fail_cmem; } /* * Initialize the transmit job descriptors. */ STAILQ_INIT(&sc->sc_txfreeq); STAILQ_INIT(&sc->sc_txdirtyq); /* * Create the transmit buffer DMA maps. */ error = ENOMEM; for (i = 0; i < GEM_TXQUEUELEN; i++) { txs = &sc->sc_txsoft[i]; txs->txs_mbuf = NULL; txs->txs_ndescs = 0; if ((error = bus_dmamap_create(sc->sc_tdmatag, 0, &txs->txs_dmamap)) != 0) { device_printf(sc->sc_dev, "unable to create TX DMA map %d, error = %d\n", i, error); goto fail_txd; } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); } /* * Create the receive buffer DMA maps. */ for (i = 0; i < GEM_NRXDESC; i++) { if ((error = bus_dmamap_create(sc->sc_rdmatag, 0, &sc->sc_rxsoft[i].rxs_dmamap)) != 0) { device_printf(sc->sc_dev, "unable to create RX DMA map %d, error = %d\n", i, error); goto fail_rxd; } sc->sc_rxsoft[i].rxs_mbuf = NULL; } /* Bypass probing PHYs if we already know for sure to use a SERDES. */ if ((sc->sc_flags & GEM_SERDES) != 0) goto serdes; /* Bad things will happen when touching this register on ERI. */ if (sc->sc_variant != GEM_SUN_ERI) { GEM_BANK1_WRITE_4(sc, GEM_MII_DATAPATH_MODE, GEM_MII_DATAPATH_MII); GEM_BANK1_BARRIER(sc, GEM_MII_DATAPATH_MODE, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } gem_mifinit(sc); /* * Look for an external PHY. */ error = ENXIO; v = GEM_BANK1_READ_4(sc, GEM_MIF_CONFIG); if ((v & GEM_MIF_CONFIG_MDI1) != 0) { v |= GEM_MIF_CONFIG_PHY_SEL; GEM_BANK1_WRITE_4(sc, GEM_MIF_CONFIG, v); GEM_BANK1_BARRIER(sc, GEM_MIF_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); switch (sc->sc_variant) { case GEM_SUN_ERI: phy = GEM_PHYAD_EXTERNAL; break; default: phy = MII_PHY_ANY; break; } error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, gem_mediachange, gem_mediastatus, BMSR_DEFCAPMASK, phy, - MII_OFFSET_ANY, 0); + MII_OFFSET_ANY, MIIF_DOPAUSE); } /* * Fall back on an internal PHY if no external PHY was found. * Note that with Apple (K2) GMACs GEM_MIF_CONFIG_MDI0 can't be * trusted when the firmware has powered down the chip. */ if (error != 0 && ((v & GEM_MIF_CONFIG_MDI0) != 0 || GEM_IS_APPLE(sc))) { v &= ~GEM_MIF_CONFIG_PHY_SEL; GEM_BANK1_WRITE_4(sc, GEM_MIF_CONFIG, v); GEM_BANK1_BARRIER(sc, GEM_MIF_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); switch (sc->sc_variant) { case GEM_SUN_ERI: case GEM_APPLE_K2_GMAC: phy = GEM_PHYAD_INTERNAL; break; case GEM_APPLE_GMAC: phy = GEM_PHYAD_EXTERNAL; break; default: phy = MII_PHY_ANY; break; } error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, gem_mediachange, gem_mediastatus, BMSR_DEFCAPMASK, phy, - MII_OFFSET_ANY, 0); + MII_OFFSET_ANY, MIIF_DOPAUSE); } /* * Try the external PCS SERDES if we didn't find any PHYs. */ if (error != 0 && sc->sc_variant == GEM_SUN_GEM) { serdes: GEM_BANK1_WRITE_4(sc, GEM_MII_DATAPATH_MODE, GEM_MII_DATAPATH_SERDES); GEM_BANK1_BARRIER(sc, GEM_MII_DATAPATH_MODE, 4, BUS_SPACE_BARRIER_WRITE); GEM_BANK1_WRITE_4(sc, GEM_MII_SLINK_CONTROL, GEM_MII_SLINK_LOOPBACK | GEM_MII_SLINK_EN_SYNC_D); GEM_BANK1_BARRIER(sc, GEM_MII_SLINK_CONTROL, 4, BUS_SPACE_BARRIER_WRITE); GEM_BANK1_WRITE_4(sc, GEM_MII_CONFIG, GEM_MII_CONFIG_ENABLE); GEM_BANK1_BARRIER(sc, GEM_MII_CONFIG, 4, BUS_SPACE_BARRIER_WRITE); sc->sc_flags |= GEM_SERDES; error = mii_attach(sc->sc_dev, &sc->sc_miibus, ifp, gem_mediachange, gem_mediastatus, BMSR_DEFCAPMASK, - GEM_PHYAD_EXTERNAL, MII_OFFSET_ANY, 0); + GEM_PHYAD_EXTERNAL, MII_OFFSET_ANY, MIIF_DOPAUSE); } if (error != 0) { device_printf(sc->sc_dev, "attaching PHYs failed\n"); goto fail_rxd; } sc->sc_mii = device_get_softc(sc->sc_miibus); /* * From this point forward, the attachment cannot fail. A failure * before this point releases all resources that may have been * allocated. */ /* Get RX FIFO size. */ sc->sc_rxfifosize = 64 * GEM_BANK1_READ_4(sc, GEM_RX_FIFO_SIZE); /* Get TX FIFO size. */ v = GEM_BANK1_READ_4(sc, GEM_TX_FIFO_SIZE); device_printf(sc->sc_dev, "%ukB RX FIFO, %ukB TX FIFO\n", sc->sc_rxfifosize / 1024, v / 16); /* Attach the interface. */ ether_ifattach(ifp, sc->sc_enaddr); /* * Tell the upper layer(s) we support long frames/checksum offloads. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM; ifp->if_hwassist |= sc->sc_csum_features; ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM; return (0); /* * Free any resources we've allocated during the failed attach * attempt. Do this in reverse order and fall through. */ fail_rxd: for (i = 0; i < GEM_NRXDESC; i++) if (sc->sc_rxsoft[i].rxs_dmamap != NULL) bus_dmamap_destroy(sc->sc_rdmatag, sc->sc_rxsoft[i].rxs_dmamap); fail_txd: for (i = 0; i < GEM_TXQUEUELEN; i++) if (sc->sc_txsoft[i].txs_dmamap != NULL) bus_dmamap_destroy(sc->sc_tdmatag, sc->sc_txsoft[i].txs_dmamap); bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap); fail_cmem: bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data, sc->sc_cddmamap); fail_ctag: bus_dma_tag_destroy(sc->sc_cdmatag); fail_ttag: bus_dma_tag_destroy(sc->sc_tdmatag); fail_rtag: bus_dma_tag_destroy(sc->sc_rdmatag); fail_ptag: bus_dma_tag_destroy(sc->sc_pdmatag); fail_ifnet: if_free(ifp); return (error); } void gem_detach(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; int i; ether_ifdetach(ifp); GEM_LOCK(sc); gem_stop(ifp, 1); GEM_UNLOCK(sc); callout_drain(&sc->sc_tick_ch); #ifdef GEM_RINT_TIMEOUT callout_drain(&sc->sc_rx_ch); #endif if_free(ifp); device_delete_child(sc->sc_dev, sc->sc_miibus); for (i = 0; i < GEM_NRXDESC; i++) if (sc->sc_rxsoft[i].rxs_dmamap != NULL) bus_dmamap_destroy(sc->sc_rdmatag, sc->sc_rxsoft[i].rxs_dmamap); for (i = 0; i < GEM_TXQUEUELEN; i++) if (sc->sc_txsoft[i].txs_dmamap != NULL) bus_dmamap_destroy(sc->sc_tdmatag, sc->sc_txsoft[i].txs_dmamap); GEM_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_cdmatag, sc->sc_cddmamap); bus_dmamem_free(sc->sc_cdmatag, sc->sc_control_data, sc->sc_cddmamap); bus_dma_tag_destroy(sc->sc_cdmatag); bus_dma_tag_destroy(sc->sc_tdmatag); bus_dma_tag_destroy(sc->sc_rdmatag); bus_dma_tag_destroy(sc->sc_pdmatag); } void gem_suspend(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; GEM_LOCK(sc); gem_stop(ifp, 0); GEM_UNLOCK(sc); } void gem_resume(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; GEM_LOCK(sc); /* * On resume all registers have to be initialized again like * after power-on. */ sc->sc_flags &= ~GEM_INITED; if (ifp->if_flags & IFF_UP) gem_init_locked(sc); GEM_UNLOCK(sc); } static inline void gem_rxcksum(struct mbuf *m, uint64_t flags) { struct ether_header *eh; struct ip *ip; struct udphdr *uh; uint16_t *opts; int32_t hlen, len, pktlen; uint32_t temp32; uint16_t cksum; pktlen = m->m_pkthdr.len; if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) return; eh = mtod(m, struct ether_header *); if (eh->ether_type != htons(ETHERTYPE_IP)) return; ip = (struct ip *)(eh + 1); if (ip->ip_v != IPVERSION) return; hlen = ip->ip_hl << 2; pktlen -= sizeof(struct ether_header); if (hlen < sizeof(struct ip)) return; if (ntohs(ip->ip_len) < hlen) return; if (ntohs(ip->ip_len) != pktlen) return; if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) return; /* Cannot handle fragmented packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (pktlen < (hlen + sizeof(struct tcphdr))) return; break; case IPPROTO_UDP: if (pktlen < (hlen + sizeof(struct udphdr))) return; uh = (struct udphdr *)((uint8_t *)ip + hlen); if (uh->uh_sum == 0) return; /* no checksum */ break; default: return; } cksum = ~(flags & GEM_RD_CHECKSUM); /* checksum fixup for IP options */ len = hlen - sizeof(struct ip); if (len > 0) { opts = (uint16_t *)(ip + 1); for (; len > 0; len -= sizeof(uint16_t), opts++) { temp32 = cksum - *opts; temp32 = (temp32 >> 16) + (temp32 & 65535); cksum = temp32 & 65535; } } m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; m->m_pkthdr.csum_data = cksum; } static void gem_cddma_callback(void *xsc, bus_dma_segment_t *segs, int nsegs, int error) { struct gem_softc *sc = xsc; if (error != 0) return; if (nsegs != 1) panic("%s: bad control buffer segment count", __func__); sc->sc_cddma = segs[0].ds_addr; } static void gem_tick(void *arg) { struct gem_softc *sc = arg; struct ifnet *ifp = sc->sc_ifp; uint32_t v; GEM_LOCK_ASSERT(sc, MA_OWNED); /* * Unload collision and error counters. */ ifp->if_collisions += GEM_BANK1_READ_4(sc, GEM_MAC_NORM_COLL_CNT) + GEM_BANK1_READ_4(sc, GEM_MAC_FIRST_COLL_CNT); v = GEM_BANK1_READ_4(sc, GEM_MAC_EXCESS_COLL_CNT) + GEM_BANK1_READ_4(sc, GEM_MAC_LATE_COLL_CNT); ifp->if_collisions += v; ifp->if_oerrors += v; ifp->if_ierrors += GEM_BANK1_READ_4(sc, GEM_MAC_RX_LEN_ERR_CNT) + GEM_BANK1_READ_4(sc, GEM_MAC_RX_ALIGN_ERR) + GEM_BANK1_READ_4(sc, GEM_MAC_RX_CRC_ERR_CNT) + GEM_BANK1_READ_4(sc, GEM_MAC_RX_CODE_VIOL); /* * Then clear the hardware counters. */ GEM_BANK1_WRITE_4(sc, GEM_MAC_NORM_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_FIRST_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_EXCESS_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_LATE_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_LEN_ERR_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_ALIGN_ERR, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CRC_ERR_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CODE_VIOL, 0); mii_tick(sc->sc_mii); if (gem_watchdog(sc) == EJUSTRETURN) return; callout_reset(&sc->sc_tick_ch, hz, gem_tick, sc); } static int gem_bitwait(struct gem_softc *sc, u_int bank, bus_addr_t r, uint32_t clr, uint32_t set) { int i; uint32_t reg; for (i = GEM_TRIES; i--; DELAY(100)) { reg = GEM_BANKN_READ_M(bank, 4, sc, r); if ((reg & clr) == 0 && (reg & set) == set) return (1); } return (0); } static void gem_reset(struct gem_softc *sc) { #ifdef GEM_DEBUG CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif gem_reset_rx(sc); gem_reset_tx(sc); /* Do a full reset. */ GEM_BANK2_WRITE_4(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX); GEM_BANK2_BARRIER(sc, GEM_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK2_BITWAIT(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX, 0)) device_printf(sc->sc_dev, "cannot reset device\n"); } static void gem_rxdrain(struct gem_softc *sc) { struct gem_rxsoft *rxs; int i; for (i = 0; i < GEM_NRXDESC; i++) { rxs = &sc->sc_rxsoft[i]; if (rxs->rxs_mbuf != NULL) { bus_dmamap_sync(sc->sc_rdmatag, rxs->rxs_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_rdmatag, rxs->rxs_dmamap); m_freem(rxs->rxs_mbuf); rxs->rxs_mbuf = NULL; } } } static void gem_stop(struct ifnet *ifp, int disable) { struct gem_softc *sc = ifp->if_softc; struct gem_txsoft *txs; #ifdef GEM_DEBUG CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif callout_stop(&sc->sc_tick_ch); #ifdef GEM_RINT_TIMEOUT callout_stop(&sc->sc_rx_ch); #endif gem_reset_tx(sc); gem_reset_rx(sc); /* * Release any queued transmit buffers. */ while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q); if (txs->txs_ndescs != 0) { bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); if (txs->txs_mbuf != NULL) { m_freem(txs->txs_mbuf); txs->txs_mbuf = NULL; } } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); } if (disable) gem_rxdrain(sc); /* * Mark the interface down and cancel the watchdog timer. */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->sc_flags &= ~GEM_LINK; sc->sc_wdog_timer = 0; } static int gem_reset_rx(struct gem_softc *sc) { /* * Resetting while DMA is in progress can cause a bus hang, so we * disable DMA first. */ gem_disable_rx(sc); GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_RX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_RX_CONFIG, GEM_RX_CONFIG_RXDMA_EN, 0)) device_printf(sc->sc_dev, "cannot disable RX DMA\n"); + /* Wait 5ms extra. */ + DELAY(5000); + /* Finally, reset the ERX. */ GEM_BANK2_WRITE_4(sc, GEM_RESET, GEM_RESET_RX); GEM_BANK2_BARRIER(sc, GEM_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK2_BITWAIT(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX, 0)) { device_printf(sc->sc_dev, "cannot reset receiver\n"); return (1); } return (0); } /* * Reset the receiver DMA engine. * * Intended to be used in case of GEM_INTR_RX_TAG_ERR, GEM_MAC_RX_OVERFLOW * etc in order to reset the receiver DMA engine only and not do a full * reset which amongst others also downs the link and clears the FIFOs. */ static void gem_reset_rxdma(struct gem_softc *sc) { int i; if (gem_reset_rx(sc) != 0) return (gem_init_locked(sc)); for (i = 0; i < GEM_NRXDESC; i++) if (sc->sc_rxsoft[i].rxs_mbuf != NULL) GEM_UPDATE_RXDESC(sc, i); sc->sc_rxptr = 0; GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* NOTE: we use only 32-bit DMA addresses here. */ GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_HI, 0); GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_LO, GEM_CDRXADDR(sc, 0)); GEM_BANK1_WRITE_4(sc, GEM_RX_KICK, GEM_NRXDESC - 4); GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG, gem_ringsize(GEM_NRXDESC /* XXX */) | ((ETHER_HDR_LEN + sizeof(struct ip)) << GEM_RX_CONFIG_CXM_START_SHFT) | (GEM_THRSH_1024 << GEM_RX_CONFIG_FIFO_THRS_SHIFT) | (ETHER_ALIGN << GEM_RX_CONFIG_FBOFF_SHFT)); /* Adjust for the SBus clock probably isn't worth the fuzz. */ GEM_BANK1_WRITE_4(sc, GEM_RX_BLANKING, ((6 * (sc->sc_flags & GEM_PCI66) != 0 ? 2 : 1) << GEM_RX_BLANKING_TIME_SHIFT) | 6); GEM_BANK1_WRITE_4(sc, GEM_RX_PAUSE_THRESH, (3 * sc->sc_rxfifosize / 256) | ((sc->sc_rxfifosize / 256) << 12)); GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG, GEM_BANK1_READ_4(sc, GEM_RX_CONFIG) | GEM_RX_CONFIG_RXDMA_EN); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_MASK, GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG) | GEM_MAC_RX_ENABLE); } static int gem_reset_tx(struct gem_softc *sc) { /* * Resetting while DMA is in progress can cause a bus hang, so we * disable DMA first. */ gem_disable_tx(sc); GEM_BANK1_WRITE_4(sc, GEM_TX_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_TX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_TX_CONFIG, GEM_TX_CONFIG_TXDMA_EN, 0)) device_printf(sc->sc_dev, "cannot disable TX DMA\n"); + /* Wait 5ms extra. */ + DELAY(5000); + /* Finally, reset the ETX. */ GEM_BANK2_WRITE_4(sc, GEM_RESET, GEM_RESET_TX); GEM_BANK2_BARRIER(sc, GEM_RESET, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK2_BITWAIT(sc, GEM_RESET, GEM_RESET_RX | GEM_RESET_TX, 0)) { device_printf(sc->sc_dev, "cannot reset transmitter\n"); return (1); } return (0); } static int gem_disable_rx(struct gem_softc *sc) { GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG) & ~GEM_MAC_RX_ENABLE); GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_ENABLE, 0)); } static int gem_disable_tx(struct gem_softc *sc) { GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, GEM_BANK1_READ_4(sc, GEM_MAC_TX_CONFIG) & ~GEM_MAC_TX_ENABLE); GEM_BANK1_BARRIER(sc, GEM_MAC_TX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (GEM_BANK1_BITWAIT(sc, GEM_MAC_TX_CONFIG, GEM_MAC_TX_ENABLE, 0)); } static int gem_meminit(struct gem_softc *sc) { struct gem_rxsoft *rxs; int error, i; GEM_LOCK_ASSERT(sc, MA_OWNED); /* * Initialize the transmit descriptor ring. */ for (i = 0; i < GEM_NTXDESC; i++) { sc->sc_txdescs[i].gd_flags = 0; sc->sc_txdescs[i].gd_addr = 0; } sc->sc_txfree = GEM_MAXTXFREE; sc->sc_txnext = 0; sc->sc_txwin = 0; /* * Initialize the receive descriptor and receive job * descriptor rings. */ for (i = 0; i < GEM_NRXDESC; i++) { rxs = &sc->sc_rxsoft[i]; if (rxs->rxs_mbuf == NULL) { if ((error = gem_add_rxbuf(sc, i)) != 0) { device_printf(sc->sc_dev, "unable to allocate or map RX buffer %d, " "error = %d\n", i, error); /* * XXX we should attempt to run with fewer * receive buffers instead of just failing. */ gem_rxdrain(sc); return (1); } } else GEM_INIT_RXDESC(sc, i); } sc->sc_rxptr = 0; GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } static u_int gem_ringsize(u_int sz) { switch (sz) { case 32: return (GEM_RING_SZ_32); case 64: return (GEM_RING_SZ_64); case 128: return (GEM_RING_SZ_128); case 256: return (GEM_RING_SZ_256); case 512: return (GEM_RING_SZ_512); case 1024: return (GEM_RING_SZ_1024); case 2048: return (GEM_RING_SZ_2048); case 4096: return (GEM_RING_SZ_4096); case 8192: return (GEM_RING_SZ_8192); default: printf("%s: invalid ring size %d\n", __func__, sz); return (GEM_RING_SZ_32); } } static void gem_init(void *xsc) { struct gem_softc *sc = xsc; GEM_LOCK(sc); gem_init_locked(sc); GEM_UNLOCK(sc); } /* * Initialization of interface; set up initialization block * and transmit/receive descriptor rings. */ static void gem_init_locked(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; uint32_t v; GEM_LOCK_ASSERT(sc, MA_OWNED); #ifdef GEM_DEBUG CTR2(KTR_GEM, "%s: %s: calling stop", device_get_name(sc->sc_dev), __func__); #endif /* * Initialization sequence. The numbered steps below correspond * to the sequence outlined in section 6.3.5.1 in the Ethernet * Channel Engine manual (part of the PCIO manual). * See also the STP2002-STQ document from Sun Microsystems. */ /* step 1 & 2. Reset the Ethernet Channel. */ gem_stop(ifp, 0); gem_reset(sc); #ifdef GEM_DEBUG CTR2(KTR_GEM, "%s: %s: restarting", device_get_name(sc->sc_dev), __func__); #endif if ((sc->sc_flags & GEM_SERDES) == 0) /* Re-initialize the MIF. */ gem_mifinit(sc); /* step 3. Setup data structures in host memory. */ if (gem_meminit(sc) != 0) return; /* step 4. TX MAC registers & counters */ gem_init_regs(sc); /* step 5. RX MAC registers & counters */ gem_setladrf(sc); /* step 6 & 7. Program Descriptor Ring Base Addresses. */ /* NOTE: we use only 32-bit DMA addresses here. */ GEM_BANK1_WRITE_4(sc, GEM_TX_RING_PTR_HI, 0); GEM_BANK1_WRITE_4(sc, GEM_TX_RING_PTR_LO, GEM_CDTXADDR(sc, 0)); GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_HI, 0); GEM_BANK1_WRITE_4(sc, GEM_RX_RING_PTR_LO, GEM_CDRXADDR(sc, 0)); #ifdef GEM_DEBUG CTR3(KTR_GEM, "loading RX ring %lx, TX ring %lx, cddma %lx", GEM_CDRXADDR(sc, 0), GEM_CDTXADDR(sc, 0), sc->sc_cddma); #endif /* step 8. Global Configuration & Interrupt Mask */ /* * Set the internal arbitration to "infinite" bursts of the * maximum length of 31 * 64 bytes so DMA transfers aren't * split up in cache line size chunks. This greatly improves * RX performance. * Enable silicon bug workarounds for the Apple variants. */ GEM_BANK1_WRITE_4(sc, GEM_CONFIG, GEM_CONFIG_TXDMA_LIMIT | GEM_CONFIG_RXDMA_LIMIT | ((sc->sc_flags & GEM_PCI) != 0 ? GEM_CONFIG_BURST_INF : GEM_CONFIG_BURST_64) | (GEM_IS_APPLE(sc) ? GEM_CONFIG_RONPAULBIT | GEM_CONFIG_BUG2FIX : 0)); GEM_BANK1_WRITE_4(sc, GEM_INTMASK, ~(GEM_INTR_TX_INTME | GEM_INTR_TX_EMPTY | GEM_INTR_RX_DONE | GEM_INTR_RX_NOBUF | GEM_INTR_RX_TAG_ERR | GEM_INTR_PERR | GEM_INTR_BERR #ifdef GEM_DEBUG | GEM_INTR_PCS | GEM_INTR_MIF #endif )); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_MASK, GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT); GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_MASK, GEM_MAC_TX_XMIT_DONE | GEM_MAC_TX_DEFER_EXP | GEM_MAC_TX_PEAK_EXP); #ifdef GEM_DEBUG GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_MASK, ~(GEM_MAC_PAUSED | GEM_MAC_PAUSE | GEM_MAC_RESUME)); #else GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_MASK, GEM_MAC_PAUSED | GEM_MAC_PAUSE | GEM_MAC_RESUME); #endif /* step 9. ETX Configuration: use mostly default values. */ /* Enable DMA. */ v = gem_ringsize(GEM_NTXDESC); /* Set TX FIFO threshold and enable DMA. */ v |= ((sc->sc_variant == GEM_SUN_ERI ? 0x100 : 0x4ff) << 10) & GEM_TX_CONFIG_TXFIFO_TH; GEM_BANK1_WRITE_4(sc, GEM_TX_CONFIG, v | GEM_TX_CONFIG_TXDMA_EN); /* step 10. ERX Configuration */ /* Encode Receive Descriptor ring size. */ v = gem_ringsize(GEM_NRXDESC /* XXX */); /* RX TCP/UDP checksum offset */ v |= ((ETHER_HDR_LEN + sizeof(struct ip)) << GEM_RX_CONFIG_CXM_START_SHFT); /* Set RX FIFO threshold, set first byte offset and enable DMA. */ GEM_BANK1_WRITE_4(sc, GEM_RX_CONFIG, v | (GEM_THRSH_1024 << GEM_RX_CONFIG_FIFO_THRS_SHIFT) | (ETHER_ALIGN << GEM_RX_CONFIG_FBOFF_SHFT) | GEM_RX_CONFIG_RXDMA_EN); /* Adjust for the SBus clock probably isn't worth the fuzz. */ GEM_BANK1_WRITE_4(sc, GEM_RX_BLANKING, ((6 * (sc->sc_flags & GEM_PCI66) != 0 ? 2 : 1) << GEM_RX_BLANKING_TIME_SHIFT) | 6); /* * The following value is for an OFF Threshold of about 3/4 full * and an ON Threshold of 1/4 full. */ GEM_BANK1_WRITE_4(sc, GEM_RX_PAUSE_THRESH, (3 * sc->sc_rxfifosize / 256) | ((sc->sc_rxfifosize / 256) << 12)); /* step 11. Configure Media. */ /* step 12. RX_MAC Configuration Register */ v = GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG); v |= GEM_MAC_RX_ENABLE | GEM_MAC_RX_STRIP_CRC; GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_ENABLE, 0)) device_printf(sc->sc_dev, "cannot configure RX MAC\n"); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, v); /* step 13. TX_MAC Configuration Register */ v = GEM_BANK1_READ_4(sc, GEM_MAC_TX_CONFIG); v |= GEM_MAC_TX_ENABLE; GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_MAC_TX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_TX_CONFIG, GEM_MAC_TX_ENABLE, 0)) device_printf(sc->sc_dev, "cannot configure TX MAC\n"); GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, v); /* step 14. Issue Transmit Pending command. */ /* step 15. Give the reciever a swift kick. */ GEM_BANK1_WRITE_4(sc, GEM_RX_KICK, GEM_NRXDESC - 4); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mii_mediachg(sc->sc_mii); /* Start the one second timer. */ sc->sc_wdog_timer = 0; callout_reset(&sc->sc_tick_ch, hz, gem_tick, sc); } static int gem_load_txmbuf(struct gem_softc *sc, struct mbuf **m_head) { bus_dma_segment_t txsegs[GEM_NTXSEGS]; struct gem_txsoft *txs; struct ip *ip; struct mbuf *m; uint64_t cflags, flags; int error, nexttx, nsegs, offset, seg; GEM_LOCK_ASSERT(sc, MA_OWNED); /* Get a work queue entry. */ if ((txs = STAILQ_FIRST(&sc->sc_txfreeq)) == NULL) { /* Ran out of descriptors. */ return (ENOBUFS); } cflags = 0; if (((*m_head)->m_pkthdr.csum_flags & sc->sc_csum_features) != 0) { if (M_WRITABLE(*m_head) == 0) { m = m_dup(*m_head, M_DONTWAIT); m_freem(*m_head); *m_head = m; if (m == NULL) return (ENOBUFS); } offset = sizeof(struct ether_header); m = m_pullup(*m_head, offset + sizeof(struct ip)); if (m == NULL) { *m_head = NULL; return (ENOBUFS); } ip = (struct ip *)(mtod(m, caddr_t) + offset); offset += (ip->ip_hl << 2); cflags = offset << GEM_TD_CXSUM_STARTSHFT | ((offset + m->m_pkthdr.csum_data) << GEM_TD_CXSUM_STUFFSHFT) | GEM_TD_CXSUM_ENABLE; *m_head = m; } error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { m = m_collapse(*m_head, M_DONTWAIT, GEM_NTXSEGS); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOBUFS); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->sc_tdmatag, txs->txs_dmamap, *m_head, txsegs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs <= GEM_NTXSEGS, ("%s: too many DMA segments (%d)", __func__, nsegs)); if (nsegs == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } /* * Ensure we have enough descriptors free to describe * the packet. Note, we always reserve one descriptor * at the end of the ring as a termination point, in * order to prevent wrap-around. */ if (nsegs > sc->sc_txfree - 1) { txs->txs_ndescs = 0; bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); return (ENOBUFS); } txs->txs_ndescs = nsegs; txs->txs_firstdesc = sc->sc_txnext; nexttx = txs->txs_firstdesc; for (seg = 0; seg < nsegs; seg++, nexttx = GEM_NEXTTX(nexttx)) { #ifdef GEM_DEBUG CTR6(KTR_GEM, "%s: mapping seg %d (txd %d), len %lx, addr %#lx (%#lx)", __func__, seg, nexttx, txsegs[seg].ds_len, txsegs[seg].ds_addr, GEM_DMA_WRITE(sc, txsegs[seg].ds_addr)); #endif sc->sc_txdescs[nexttx].gd_addr = GEM_DMA_WRITE(sc, txsegs[seg].ds_addr); KASSERT(txsegs[seg].ds_len < GEM_TD_BUFSIZE, ("%s: segment size too large!", __func__)); flags = txsegs[seg].ds_len & GEM_TD_BUFSIZE; sc->sc_txdescs[nexttx].gd_flags = GEM_DMA_WRITE(sc, flags | cflags); txs->txs_lastdesc = nexttx; } /* Set EOP on the last descriptor. */ #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: end of packet at segment %d, TX %d", __func__, seg, nexttx); #endif sc->sc_txdescs[txs->txs_lastdesc].gd_flags |= GEM_DMA_WRITE(sc, GEM_TD_END_OF_PACKET); /* Lastly set SOP on the first descriptor. */ #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: start of packet at segment %d, TX %d", __func__, seg, nexttx); #endif if (++sc->sc_txwin > GEM_NTXSEGS * 2 / 3) { sc->sc_txwin = 0; sc->sc_txdescs[txs->txs_firstdesc].gd_flags |= GEM_DMA_WRITE(sc, GEM_TD_INTERRUPT_ME | GEM_TD_START_OF_PACKET); } else sc->sc_txdescs[txs->txs_firstdesc].gd_flags |= GEM_DMA_WRITE(sc, GEM_TD_START_OF_PACKET); /* Sync the DMA map. */ bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_PREWRITE); #ifdef GEM_DEBUG CTR4(KTR_GEM, "%s: setting firstdesc=%d, lastdesc=%d, ndescs=%d", __func__, txs->txs_firstdesc, txs->txs_lastdesc, txs->txs_ndescs); #endif STAILQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q); STAILQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q); txs->txs_mbuf = *m_head; sc->sc_txnext = GEM_NEXTTX(txs->txs_lastdesc); sc->sc_txfree -= txs->txs_ndescs; return (0); } static void gem_init_regs(struct gem_softc *sc) { const u_char *laddr = IF_LLADDR(sc->sc_ifp); GEM_LOCK_ASSERT(sc, MA_OWNED); /* These registers are not cleared on reset. */ if ((sc->sc_flags & GEM_INITED) == 0) { /* magic values */ GEM_BANK1_WRITE_4(sc, GEM_MAC_IPG0, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_IPG1, 8); GEM_BANK1_WRITE_4(sc, GEM_MAC_IPG2, 4); /* min frame length */ GEM_BANK1_WRITE_4(sc, GEM_MAC_MAC_MIN_FRAME, ETHER_MIN_LEN); /* max frame length and max burst size */ GEM_BANK1_WRITE_4(sc, GEM_MAC_MAC_MAX_FRAME, (ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN) | (0x2000 << 16)); /* more magic values */ GEM_BANK1_WRITE_4(sc, GEM_MAC_PREAMBLE_LEN, 0x7); GEM_BANK1_WRITE_4(sc, GEM_MAC_JAM_SIZE, 0x4); GEM_BANK1_WRITE_4(sc, GEM_MAC_ATTEMPT_LIMIT, 0x10); - GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_TYPE, 0x8088); + GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_TYPE, 0x8808); /* random number seed */ GEM_BANK1_WRITE_4(sc, GEM_MAC_RANDOM_SEED, ((laddr[5] << 8) | laddr[4]) & 0x3ff); /* secondary MAC address: 0:0:0:0:0:0 */ GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR3, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR4, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR5, 0); /* MAC control address: 01:80:c2:00:00:01 */ GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR6, 0x0001); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR7, 0xc200); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR8, 0x0180); /* MAC filter address: 0:0:0:0:0:0 */ GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR_FILTER0, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR_FILTER1, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR_FILTER2, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADR_FLT_MASK1_2, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADR_FLT_MASK0, 0); sc->sc_flags |= GEM_INITED; } /* Counters need to be zeroed. */ GEM_BANK1_WRITE_4(sc, GEM_MAC_NORM_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_FIRST_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_EXCESS_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_LATE_COLL_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_DEFER_TMR_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_PEAK_ATTEMPTS, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_FRAME_COUNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_LEN_ERR_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_ALIGN_ERR, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CRC_ERR_CNT, 0); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CODE_VIOL, 0); /* Set XOFF PAUSE time. */ GEM_BANK1_WRITE_4(sc, GEM_MAC_SEND_PAUSE_CMD, 0x1BF0); /* Set the station address. */ GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR0, (laddr[4] << 8) | laddr[5]); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR1, (laddr[2] << 8) | laddr[3]); GEM_BANK1_WRITE_4(sc, GEM_MAC_ADDR2, (laddr[0] << 8) | laddr[1]); /* Enable MII outputs. */ GEM_BANK1_WRITE_4(sc, GEM_MAC_XIF_CONFIG, GEM_MAC_XIF_TX_MII_ENA); } static void gem_start(struct ifnet *ifp) { struct gem_softc *sc = ifp->if_softc; GEM_LOCK(sc); gem_start_locked(ifp); GEM_UNLOCK(sc); } static inline void gem_txkick(struct gem_softc *sc) { /* * Update the TX kick register. This register has to point to the * descriptor after the last valid one and for optimum performance * should be incremented in multiples of 4 (the DMA engine fetches/ * updates descriptors in batches of 4). */ #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: %s: kicking TX %d", device_get_name(sc->sc_dev), __func__, sc->sc_txnext); #endif GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); GEM_BANK1_WRITE_4(sc, GEM_TX_KICK, sc->sc_txnext); } static void gem_start_locked(struct ifnet *ifp) { struct gem_softc *sc = ifp->if_softc; struct mbuf *m; int kicked, ntx; GEM_LOCK_ASSERT(sc, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || (sc->sc_flags & GEM_LINK) == 0) return; #ifdef GEM_DEBUG CTR4(KTR_GEM, "%s: %s: txfree %d, txnext %d", device_get_name(sc->sc_dev), __func__, sc->sc_txfree, sc->sc_txnext); #endif ntx = 0; kicked = 0; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->sc_txfree > 1;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; if (gem_load_txmbuf(sc, &m) != 0) { if (m == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); break; } if ((sc->sc_txnext % 4) == 0) { gem_txkick(sc); kicked = 1; } else kicked = 0; ntx++; BPF_MTAP(ifp, m); } if (ntx > 0) { if (kicked == 0) gem_txkick(sc); #ifdef GEM_DEBUG CTR2(KTR_GEM, "%s: packets enqueued, OWN on %d", device_get_name(sc->sc_dev), sc->sc_txnext); #endif /* Set a watchdog timer in case the chip flakes out. */ sc->sc_wdog_timer = 5; #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: %s: watchdog %d", device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer); #endif } } static void gem_tint(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct gem_txsoft *txs; int progress; uint32_t txlast; #ifdef GEM_DEBUG int i; GEM_LOCK_ASSERT(sc, MA_OWNED); CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif /* * Go through our TX list and free mbufs for those * frames that have been transmitted. */ progress = 0; GEM_CDSYNC(sc, BUS_DMASYNC_POSTREAD); while ((txs = STAILQ_FIRST(&sc->sc_txdirtyq)) != NULL) { #ifdef GEM_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) { printf(" txsoft %p transmit chain:\n", txs); for (i = txs->txs_firstdesc;; i = GEM_NEXTTX(i)) { printf("descriptor %d: ", i); printf("gd_flags: 0x%016llx\t", (long long)GEM_DMA_READ(sc, sc->sc_txdescs[i].gd_flags)); printf("gd_addr: 0x%016llx\n", (long long)GEM_DMA_READ(sc, sc->sc_txdescs[i].gd_addr)); if (i == txs->txs_lastdesc) break; } } #endif /* * In theory, we could harvest some descriptors before * the ring is empty, but that's a bit complicated. * * GEM_TX_COMPLETION points to the last descriptor * processed + 1. */ txlast = GEM_BANK1_READ_4(sc, GEM_TX_COMPLETION); #ifdef GEM_DEBUG CTR4(KTR_GEM, "%s: txs->txs_firstdesc = %d, " "txs->txs_lastdesc = %d, txlast = %d", __func__, txs->txs_firstdesc, txs->txs_lastdesc, txlast); #endif if (txs->txs_firstdesc <= txs->txs_lastdesc) { if ((txlast >= txs->txs_firstdesc) && (txlast <= txs->txs_lastdesc)) break; } else { /* Ick -- this command wraps. */ if ((txlast >= txs->txs_firstdesc) || (txlast <= txs->txs_lastdesc)) break; } #ifdef GEM_DEBUG CTR1(KTR_GEM, "%s: releasing a descriptor", __func__); #endif STAILQ_REMOVE_HEAD(&sc->sc_txdirtyq, txs_q); sc->sc_txfree += txs->txs_ndescs; bus_dmamap_sync(sc->sc_tdmatag, txs->txs_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_tdmatag, txs->txs_dmamap); if (txs->txs_mbuf != NULL) { m_freem(txs->txs_mbuf); txs->txs_mbuf = NULL; } STAILQ_INSERT_TAIL(&sc->sc_txfreeq, txs, txs_q); ifp->if_opackets++; progress = 1; } #ifdef GEM_DEBUG CTR4(KTR_GEM, "%s: GEM_TX_STATE_MACHINE %x GEM_TX_DATA_PTR %llx " "GEM_TX_COMPLETION %x", __func__, GEM_BANK1_READ_4(sc, GEM_TX_STATE_MACHINE), ((long long)GEM_BANK1_READ_4(sc, GEM_TX_DATA_PTR_HI) << 32) | GEM_BANK1_READ_4(sc, GEM_TX_DATA_PTR_LO), GEM_BANK1_READ_4(sc, GEM_TX_COMPLETION)); #endif if (progress) { if (sc->sc_txfree == GEM_NTXDESC - 1) sc->sc_txwin = 0; /* * We freed some descriptors, so reset IFF_DRV_OACTIVE * and restart. */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (STAILQ_EMPTY(&sc->sc_txdirtyq)) sc->sc_wdog_timer = 0; gem_start_locked(ifp); } #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: %s: watchdog %d", device_get_name(sc->sc_dev), __func__, sc->sc_wdog_timer); #endif } #ifdef GEM_RINT_TIMEOUT static void gem_rint_timeout(void *arg) { struct gem_softc *sc = arg; GEM_LOCK_ASSERT(sc, MA_OWNED); gem_rint(sc); } #endif static void gem_rint(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; uint64_t rxstat; uint32_t rxcomp; GEM_LOCK_ASSERT(sc, MA_OWNED); #ifdef GEM_RINT_TIMEOUT callout_stop(&sc->sc_rx_ch); #endif #ifdef GEM_DEBUG CTR2(KTR_GEM, "%s: %s", device_get_name(sc->sc_dev), __func__); #endif /* * Read the completion register once. This limits * how long the following loop can execute. */ rxcomp = GEM_BANK1_READ_4(sc, GEM_RX_COMPLETION); #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: sc->sc_rxptr %d, complete %d", __func__, sc->sc_rxptr, rxcomp); #endif GEM_CDSYNC(sc, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (; sc->sc_rxptr != rxcomp;) { m = sc->sc_rxsoft[sc->sc_rxptr].rxs_mbuf; rxstat = GEM_DMA_READ(sc, sc->sc_rxdescs[sc->sc_rxptr].gd_flags); if (rxstat & GEM_RD_OWN) { #ifdef GEM_RINT_TIMEOUT /* * The descriptor is still marked as owned, although * it is supposed to have completed. This has been * observed on some machines. Just exiting here * might leave the packet sitting around until another * one arrives to trigger a new interrupt, which is * generally undesirable, so set up a timeout. */ callout_reset(&sc->sc_rx_ch, GEM_RXOWN_TICKS, gem_rint_timeout, sc); #endif m = NULL; goto kickit; } if (rxstat & GEM_RD_BAD_CRC) { ifp->if_ierrors++; device_printf(sc->sc_dev, "receive error: CRC error\n"); GEM_INIT_RXDESC(sc, sc->sc_rxptr); m = NULL; goto kickit; } #ifdef GEM_DEBUG if ((ifp->if_flags & IFF_DEBUG) != 0) { printf(" rxsoft %p descriptor %d: ", &sc->sc_rxsoft[sc->sc_rxptr], sc->sc_rxptr); printf("gd_flags: 0x%016llx\t", (long long)GEM_DMA_READ(sc, sc->sc_rxdescs[sc->sc_rxptr].gd_flags)); printf("gd_addr: 0x%016llx\n", (long long)GEM_DMA_READ(sc, sc->sc_rxdescs[sc->sc_rxptr].gd_addr)); } #endif /* * Allocate a new mbuf cluster. If that fails, we are * out of memory, and must drop the packet and recycle * the buffer that's already attached to this descriptor. */ if (gem_add_rxbuf(sc, sc->sc_rxptr) != 0) { ifp->if_ierrors++; GEM_INIT_RXDESC(sc, sc->sc_rxptr); m = NULL; } kickit: /* * Update the RX kick register. This register has to point * to the descriptor after the last valid one (before the * current batch) and for optimum performance should be * incremented in multiples of 4 (the DMA engine fetches/ * updates descriptors in batches of 4). */ sc->sc_rxptr = GEM_NEXTRX(sc->sc_rxptr); if ((sc->sc_rxptr % 4) == 0) { GEM_CDSYNC(sc, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); GEM_BANK1_WRITE_4(sc, GEM_RX_KICK, (sc->sc_rxptr + GEM_NRXDESC - 4) & GEM_NRXDESC_MASK); } if (m == NULL) { if (rxstat & GEM_RD_OWN) break; continue; } ifp->if_ipackets++; m->m_data += ETHER_ALIGN; /* first byte offset */ m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = GEM_RD_BUFLEN(rxstat); if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) gem_rxcksum(m, rxstat); /* Pass it on. */ GEM_UNLOCK(sc); (*ifp->if_input)(ifp, m); GEM_LOCK(sc); } #ifdef GEM_DEBUG CTR3(KTR_GEM, "%s: done sc->sc_rxptr %d, complete %d", __func__, sc->sc_rxptr, GEM_BANK1_READ_4(sc, GEM_RX_COMPLETION)); #endif } static int gem_add_rxbuf(struct gem_softc *sc, int idx) { struct gem_rxsoft *rxs = &sc->sc_rxsoft[idx]; struct mbuf *m; bus_dma_segment_t segs[1]; int error, nsegs; GEM_LOCK_ASSERT(sc, MA_OWNED); m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; #ifdef GEM_DEBUG /* Bzero the packet to check DMA. */ memset(m->m_ext.ext_buf, 0, m->m_ext.ext_size); #endif if (rxs->rxs_mbuf != NULL) { bus_dmamap_sync(sc->sc_rdmatag, rxs->rxs_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_rdmatag, rxs->rxs_dmamap); } error = bus_dmamap_load_mbuf_sg(sc->sc_rdmatag, rxs->rxs_dmamap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "cannot load RS DMA map %d, error = %d\n", idx, error); m_freem(m); return (error); } /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs == 1, ("%s: too many DMA segments (%d)", __func__, nsegs)); rxs->rxs_mbuf = m; rxs->rxs_paddr = segs[0].ds_addr; bus_dmamap_sync(sc->sc_rdmatag, rxs->rxs_dmamap, BUS_DMASYNC_PREREAD); GEM_INIT_RXDESC(sc, idx); return (0); } static void gem_eint(struct gem_softc *sc, u_int status) { sc->sc_ifp->if_ierrors++; if ((status & GEM_INTR_RX_TAG_ERR) != 0) { gem_reset_rxdma(sc); return; } device_printf(sc->sc_dev, "%s: status 0x%x", __func__, status); if ((status & GEM_INTR_BERR) != 0) { if ((sc->sc_flags & GEM_PCI) != 0) printf(", PCI bus error 0x%x\n", GEM_BANK1_READ_4(sc, GEM_PCI_ERROR_STATUS)); else printf(", SBus error 0x%x\n", GEM_BANK1_READ_4(sc, GEM_SBUS_STATUS)); } } void gem_intr(void *v) { struct gem_softc *sc = v; uint32_t status, status2; GEM_LOCK(sc); status = GEM_BANK1_READ_4(sc, GEM_STATUS); #ifdef GEM_DEBUG CTR4(KTR_GEM, "%s: %s: cplt %x, status %x", device_get_name(sc->sc_dev), __func__, (status >> GEM_STATUS_TX_COMPLETION_SHFT), (u_int)status); /* * PCS interrupts must be cleared, otherwise no traffic is passed! */ if ((status & GEM_INTR_PCS) != 0) { status2 = GEM_BANK1_READ_4(sc, GEM_MII_INTERRUP_STATUS) | GEM_BANK1_READ_4(sc, GEM_MII_INTERRUP_STATUS); if ((status2 & GEM_MII_INTERRUP_LINK) != 0) device_printf(sc->sc_dev, "%s: PCS link status changed\n", __func__); } if ((status & GEM_MAC_CONTROL_STATUS) != 0) { status2 = GEM_BANK1_READ_4(sc, GEM_MAC_CONTROL_STATUS); if ((status2 & GEM_MAC_PAUSED) != 0) device_printf(sc->sc_dev, "%s: PAUSE received (PAUSE time %d slots)\n", __func__, GEM_MAC_PAUSE_TIME(status2)); if ((status2 & GEM_MAC_PAUSE) != 0) device_printf(sc->sc_dev, "%s: transited to PAUSE state\n", __func__); if ((status2 & GEM_MAC_RESUME) != 0) device_printf(sc->sc_dev, "%s: transited to non-PAUSE state\n", __func__); } if ((status & GEM_INTR_MIF) != 0) device_printf(sc->sc_dev, "%s: MIF interrupt\n", __func__); #endif if (__predict_false(status & (GEM_INTR_RX_TAG_ERR | GEM_INTR_PERR | GEM_INTR_BERR)) != 0) gem_eint(sc, status); if ((status & (GEM_INTR_RX_DONE | GEM_INTR_RX_NOBUF)) != 0) gem_rint(sc); if ((status & (GEM_INTR_TX_EMPTY | GEM_INTR_TX_INTME)) != 0) gem_tint(sc); if (__predict_false((status & GEM_INTR_TX_MAC) != 0)) { status2 = GEM_BANK1_READ_4(sc, GEM_MAC_TX_STATUS); if ((status2 & ~(GEM_MAC_TX_XMIT_DONE | GEM_MAC_TX_DEFER_EXP | GEM_MAC_TX_PEAK_EXP)) != 0) device_printf(sc->sc_dev, "MAC TX fault, status %x\n", status2); if ((status2 & (GEM_MAC_TX_UNDERRUN | GEM_MAC_TX_PKT_TOO_LONG)) != 0) { sc->sc_ifp->if_oerrors++; gem_init_locked(sc); } } if (__predict_false((status & GEM_INTR_RX_MAC) != 0)) { status2 = GEM_BANK1_READ_4(sc, GEM_MAC_RX_STATUS); /* * At least with GEM_SUN_GEM and some GEM_SUN_ERI * revisions GEM_MAC_RX_OVERFLOW happen often due to a * silicon bug so handle them silently. Moreover, it's * likely that the receiver has hung so we reset it. */ if ((status2 & GEM_MAC_RX_OVERFLOW) != 0) { sc->sc_ifp->if_ierrors++; gem_reset_rxdma(sc); } else if ((status2 & ~(GEM_MAC_RX_DONE | GEM_MAC_RX_FRAME_CNT)) != 0) device_printf(sc->sc_dev, "MAC RX fault, status %x\n", status2); } GEM_UNLOCK(sc); } static int gem_watchdog(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; GEM_LOCK_ASSERT(sc, MA_OWNED); #ifdef GEM_DEBUG CTR4(KTR_GEM, "%s: GEM_RX_CONFIG %x GEM_MAC_RX_STATUS %x GEM_MAC_RX_CONFIG %x", __func__, GEM_BANK1_READ_4(sc, GEM_RX_CONFIG), GEM_BANK1_READ_4(sc, GEM_MAC_RX_STATUS), GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG)); CTR4(KTR_GEM, "%s: GEM_TX_CONFIG %x GEM_MAC_TX_STATUS %x GEM_MAC_TX_CONFIG %x", __func__, GEM_BANK1_READ_4(sc, GEM_TX_CONFIG), GEM_BANK1_READ_4(sc, GEM_MAC_TX_STATUS), GEM_BANK1_READ_4(sc, GEM_MAC_TX_CONFIG)); #endif if (sc->sc_wdog_timer == 0 || --sc->sc_wdog_timer != 0) return (0); if ((sc->sc_flags & GEM_LINK) != 0) device_printf(sc->sc_dev, "device timeout\n"); else if (bootverbose) device_printf(sc->sc_dev, "device timeout (no link)\n"); ++ifp->if_oerrors; /* Try to get more packets going. */ gem_init_locked(sc); gem_start_locked(ifp); return (EJUSTRETURN); } static void gem_mifinit(struct gem_softc *sc) { /* Configure the MIF in frame mode. */ GEM_BANK1_WRITE_4(sc, GEM_MIF_CONFIG, GEM_BANK1_READ_4(sc, GEM_MIF_CONFIG) & ~GEM_MIF_CONFIG_BB_ENA); GEM_BANK1_BARRIER(sc, GEM_MIF_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); } /* * MII interface * * The MII interface supports at least three different operating modes: * * Bitbang mode is implemented using data, clock and output enable registers. * * Frame mode is implemented by loading a complete frame into the frame * register and polling the valid bit for completion. * * Polling mode uses the frame register but completion is indicated by * an interrupt. * */ int gem_mii_readreg(device_t dev, int phy, int reg) { struct gem_softc *sc; int n; uint32_t v; #ifdef GEM_DEBUG_PHY printf("%s: phy %d reg %d\n", __func__, phy, reg); #endif sc = device_get_softc(dev); if ((sc->sc_flags & GEM_SERDES) != 0) { switch (reg) { case MII_BMCR: reg = GEM_MII_CONTROL; break; case MII_BMSR: reg = GEM_MII_STATUS; break; case MII_PHYIDR1: case MII_PHYIDR2: return (0); case MII_ANAR: reg = GEM_MII_ANAR; break; case MII_ANLPAR: reg = GEM_MII_ANLPAR; break; case MII_EXTSR: return (EXTSR_1000XFDX | EXTSR_1000XHDX); default: device_printf(sc->sc_dev, "%s: unhandled register %d\n", __func__, reg); return (0); } return (GEM_BANK1_READ_4(sc, reg)); } /* Construct the frame command. */ v = GEM_MIF_FRAME_READ | (phy << GEM_MIF_PHY_SHIFT) | (reg << GEM_MIF_REG_SHIFT); GEM_BANK1_WRITE_4(sc, GEM_MIF_FRAME, v); GEM_BANK1_BARRIER(sc, GEM_MIF_FRAME, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); for (n = 0; n < 100; n++) { DELAY(1); v = GEM_BANK1_READ_4(sc, GEM_MIF_FRAME); if (v & GEM_MIF_FRAME_TA0) return (v & GEM_MIF_FRAME_DATA); } device_printf(sc->sc_dev, "%s: timed out\n", __func__); return (0); } int gem_mii_writereg(device_t dev, int phy, int reg, int val) { struct gem_softc *sc; int n; uint32_t v; #ifdef GEM_DEBUG_PHY printf("%s: phy %d reg %d val %x\n", phy, reg, val, __func__); #endif sc = device_get_softc(dev); if ((sc->sc_flags & GEM_SERDES) != 0) { switch (reg) { case MII_BMSR: reg = GEM_MII_STATUS; break; case MII_BMCR: reg = GEM_MII_CONTROL; if ((val & GEM_MII_CONTROL_RESET) == 0) break; GEM_BANK1_WRITE_4(sc, GEM_MII_CONTROL, val); GEM_BANK1_BARRIER(sc, GEM_MII_CONTROL, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_MII_CONTROL, GEM_MII_CONTROL_RESET, 0)) device_printf(sc->sc_dev, "cannot reset PCS\n"); /* FALLTHROUGH */ case MII_ANAR: GEM_BANK1_WRITE_4(sc, GEM_MII_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_MII_CONFIG, 4, BUS_SPACE_BARRIER_WRITE); GEM_BANK1_WRITE_4(sc, GEM_MII_ANAR, val); GEM_BANK1_BARRIER(sc, GEM_MII_ANAR, 4, BUS_SPACE_BARRIER_WRITE); GEM_BANK1_WRITE_4(sc, GEM_MII_SLINK_CONTROL, GEM_MII_SLINK_LOOPBACK | GEM_MII_SLINK_EN_SYNC_D); GEM_BANK1_BARRIER(sc, GEM_MII_SLINK_CONTROL, 4, BUS_SPACE_BARRIER_WRITE); GEM_BANK1_WRITE_4(sc, GEM_MII_CONFIG, GEM_MII_CONFIG_ENABLE); GEM_BANK1_BARRIER(sc, GEM_MII_CONFIG, 4, BUS_SPACE_BARRIER_WRITE); return (0); case MII_ANLPAR: reg = GEM_MII_ANLPAR; break; default: device_printf(sc->sc_dev, "%s: unhandled register %d\n", __func__, reg); return (0); } GEM_BANK1_WRITE_4(sc, reg, val); GEM_BANK1_BARRIER(sc, reg, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return (0); } /* Construct the frame command. */ v = GEM_MIF_FRAME_WRITE | (phy << GEM_MIF_PHY_SHIFT) | (reg << GEM_MIF_REG_SHIFT) | (val & GEM_MIF_FRAME_DATA); GEM_BANK1_WRITE_4(sc, GEM_MIF_FRAME, v); GEM_BANK1_BARRIER(sc, GEM_MIF_FRAME, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); for (n = 0; n < 100; n++) { DELAY(1); v = GEM_BANK1_READ_4(sc, GEM_MIF_FRAME); if (v & GEM_MIF_FRAME_TA0) return (1); } device_printf(sc->sc_dev, "%s: timed out\n", __func__); return (0); } void gem_mii_statchg(device_t dev) { struct gem_softc *sc; int gigabit; uint32_t rxcfg, txcfg, v; sc = device_get_softc(dev); GEM_LOCK_ASSERT(sc, MA_OWNED); #ifdef GEM_DEBUG if ((sc->sc_ifp->if_flags & IFF_DEBUG) != 0) device_printf(sc->sc_dev, "%s: status change\n", __func__); #endif if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) != 0 && IFM_SUBTYPE(sc->sc_mii->mii_media_active) != IFM_NONE) sc->sc_flags |= GEM_LINK; else sc->sc_flags &= ~GEM_LINK; switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) { case IFM_1000_SX: case IFM_1000_LX: case IFM_1000_CX: case IFM_1000_T: gigabit = 1; break; default: gigabit = 0; } /* * The configuration done here corresponds to the steps F) and * G) and as far as enabling of RX and TX MAC goes also step H) * of the initialization sequence outlined in section 3.2.1 of * the GEM Gigabit Ethernet ASIC Specification. */ rxcfg = GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG); rxcfg &= ~(GEM_MAC_RX_CARR_EXTEND | GEM_MAC_RX_ENABLE); txcfg = GEM_MAC_TX_ENA_IPG0 | GEM_MAC_TX_NGU | GEM_MAC_TX_NGU_LIMIT; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0) txcfg |= GEM_MAC_TX_IGN_CARRIER | GEM_MAC_TX_IGN_COLLIS; else if (gigabit != 0) { rxcfg |= GEM_MAC_RX_CARR_EXTEND; txcfg |= GEM_MAC_TX_CARR_EXTEND; } GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_MAC_TX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_TX_CONFIG, GEM_MAC_TX_ENABLE, 0)) device_printf(sc->sc_dev, "cannot disable TX MAC\n"); GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, txcfg); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, 0); GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_ENABLE, 0)) device_printf(sc->sc_dev, "cannot disable RX MAC\n"); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, rxcfg); v = GEM_BANK1_READ_4(sc, GEM_MAC_CONTROL_CONFIG) & ~(GEM_MAC_CC_RX_PAUSE | GEM_MAC_CC_TX_PAUSE); -#ifdef notyet if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0) v |= GEM_MAC_CC_RX_PAUSE; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0) v |= GEM_MAC_CC_TX_PAUSE; -#endif GEM_BANK1_WRITE_4(sc, GEM_MAC_CONTROL_CONFIG, v); if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0 && gigabit != 0) GEM_BANK1_WRITE_4(sc, GEM_MAC_SLOT_TIME, GEM_MAC_SLOT_TIME_CARR_EXTEND); else GEM_BANK1_WRITE_4(sc, GEM_MAC_SLOT_TIME, GEM_MAC_SLOT_TIME_NORMAL); /* XIF Configuration */ v = GEM_MAC_XIF_LINK_LED; v |= GEM_MAC_XIF_TX_MII_ENA; if ((sc->sc_flags & GEM_SERDES) == 0) { if ((GEM_BANK1_READ_4(sc, GEM_MIF_CONFIG) & GEM_MIF_CONFIG_PHY_SEL) != 0) { /* External MII needs echo disable if half duplex. */ if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) == 0) v |= GEM_MAC_XIF_ECHO_DISABL; } else /* * Internal MII needs buffer enable. * XXX buffer enable makes only sense for an * external PHY. */ v |= GEM_MAC_XIF_MII_BUF_ENA; } if (gigabit != 0) v |= GEM_MAC_XIF_GMII_MODE; if ((IFM_OPTIONS(sc->sc_mii->mii_media_active) & IFM_FDX) != 0) v |= GEM_MAC_XIF_FDPLX_LED; GEM_BANK1_WRITE_4(sc, GEM_MAC_XIF_CONFIG, v); if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && (sc->sc_flags & GEM_LINK) != 0) { GEM_BANK1_WRITE_4(sc, GEM_MAC_TX_CONFIG, txcfg | GEM_MAC_TX_ENABLE); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, rxcfg | GEM_MAC_RX_ENABLE); } } int gem_mediachange(struct ifnet *ifp) { struct gem_softc *sc = ifp->if_softc; int error; /* XXX add support for serial media. */ GEM_LOCK(sc); error = mii_mediachg(sc->sc_mii); GEM_UNLOCK(sc); return (error); } void gem_mediastatus(struct ifnet *ifp, struct ifmediareq *ifmr) { struct gem_softc *sc = ifp->if_softc; GEM_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { GEM_UNLOCK(sc); return; } mii_pollstat(sc->sc_mii); ifmr->ifm_active = sc->sc_mii->mii_media_active; ifmr->ifm_status = sc->sc_mii->mii_media_status; GEM_UNLOCK(sc); } static int gem_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct gem_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int error; error = 0; switch (cmd) { case SIOCSIFFLAGS: GEM_LOCK(sc); if ((ifp->if_flags & IFF_UP) != 0) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0 && ((ifp->if_flags ^ sc->sc_ifflags) & (IFF_ALLMULTI | IFF_PROMISC)) != 0) gem_setladrf(sc); else gem_init_locked(sc); } else if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) gem_stop(ifp, 0); if ((ifp->if_flags & IFF_LINK0) != 0) sc->sc_csum_features |= CSUM_UDP; else sc->sc_csum_features &= ~CSUM_UDP; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist = sc->sc_csum_features; sc->sc_ifflags = ifp->if_flags; GEM_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: GEM_LOCK(sc); gem_setladrf(sc); GEM_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd); break; case SIOCSIFCAP: GEM_LOCK(sc); ifp->if_capenable = ifr->ifr_reqcap; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist = sc->sc_csum_features; else ifp->if_hwassist = 0; GEM_UNLOCK(sc); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } static void gem_setladrf(struct gem_softc *sc) { struct ifnet *ifp = sc->sc_ifp; struct ifmultiaddr *inm; int i; uint32_t hash[16]; uint32_t crc, v; GEM_LOCK_ASSERT(sc, MA_OWNED); /* Get the current RX configuration. */ v = GEM_BANK1_READ_4(sc, GEM_MAC_RX_CONFIG); /* * Turn off promiscuous mode, promiscuous group mode (all multicast), * and hash filter. Depending on the case, the right bit will be * enabled. */ v &= ~(GEM_MAC_RX_PROMISCUOUS | GEM_MAC_RX_HASH_FILTER | GEM_MAC_RX_PROMISC_GRP); GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, v); GEM_BANK1_BARRIER(sc, GEM_MAC_RX_CONFIG, 4, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); if (!GEM_BANK1_BITWAIT(sc, GEM_MAC_RX_CONFIG, GEM_MAC_RX_HASH_FILTER, 0)) device_printf(sc->sc_dev, "cannot disable RX hash filter\n"); if ((ifp->if_flags & IFF_PROMISC) != 0) { v |= GEM_MAC_RX_PROMISCUOUS; goto chipit; } if ((ifp->if_flags & IFF_ALLMULTI) != 0) { v |= GEM_MAC_RX_PROMISC_GRP; goto chipit; } /* * Set up multicast address filter by passing all multicast * addresses through a crc generator, and then using the high * order 8 bits as an index into the 256 bit logical address * filter. The high order 4 bits selects the word, while the * other 4 bits select the bit within the word (where bit 0 * is the MSB). */ /* Clear the hash table. */ memset(hash, 0, sizeof(hash)); if_maddr_rlock(ifp); TAILQ_FOREACH(inm, &ifp->if_multiaddrs, ifma_link) { if (inm->ifma_addr->sa_family != AF_LINK) continue; crc = ether_crc32_le(LLADDR((struct sockaddr_dl *) inm->ifma_addr), ETHER_ADDR_LEN); /* We just want the 8 most significant bits. */ crc >>= 24; /* Set the corresponding bit in the filter. */ hash[crc >> 4] |= 1 << (15 - (crc & 15)); } if_maddr_runlock(ifp); v |= GEM_MAC_RX_HASH_FILTER; /* Now load the hash table into the chip (if we are using it). */ for (i = 0; i < 16; i++) GEM_BANK1_WRITE_4(sc, GEM_MAC_HASH0 + i * (GEM_MAC_HASH1 - GEM_MAC_HASH0), hash[i]); chipit: GEM_BANK1_WRITE_4(sc, GEM_MAC_RX_CONFIG, v); } Index: projects/binutils-2.17/sys/dev/mii/bmtphy.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/bmtphy.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/bmtphy.c (revision 215830) @@ -1,251 +1,294 @@ /*- * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997 Manuel Bouyer. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: NetBSD: bmtphy.c,v 1.8 2002/07/03 06:25:50 simonb Exp * */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Broadcom BCM5201/BCM5202 "Mini-Theta" PHYs. This also * drives the PHY on the 3Com 3c905C. The 3c905C's PHY is described in * the 3c905C data sheet. */ #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #include "miibus_if.h" static int bmtphy_probe(device_t); static int bmtphy_attach(device_t); +struct bmtphy_softc { + struct mii_softc mii_sc; + int mii_model; +}; + static device_method_t bmtphy_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bmtphy_probe), DEVMETHOD(device_attach, bmtphy_attach), DEVMETHOD(device_detach, mii_phy_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), { 0, 0 } }; static devclass_t bmtphy_devclass; static driver_t bmtphy_driver = { "bmtphy", bmtphy_methods, - sizeof(struct mii_softc) + sizeof(struct bmtphy_softc) }; DRIVER_MODULE(bmtphy, miibus, bmtphy_driver, bmtphy_devclass, 0, 0); static int bmtphy_service(struct mii_softc *, struct mii_data *, int); static void bmtphy_status(struct mii_softc *); +static void bmtphy_reset(struct mii_softc *); static const struct mii_phydesc bmtphys_dp[] = { MII_PHY_DESC(BROADCOM, BCM4401), MII_PHY_DESC(BROADCOM, BCM5201), + MII_PHY_DESC(BROADCOM, BCM5214), MII_PHY_DESC(BROADCOM, BCM5221), + MII_PHY_DESC(BROADCOM, BCM5222), MII_PHY_END }; static const struct mii_phydesc bmtphys_lp[] = { MII_PHY_DESC(BROADCOM, 3C905B), MII_PHY_DESC(BROADCOM, 3C905C), MII_PHY_END }; static int bmtphy_probe(device_t dev) { int rval; /* Let exphy(4) take precedence for these. */ rval = mii_phy_dev_probe(dev, bmtphys_lp, BUS_PROBE_LOW_PRIORITY); if (rval <= 0) return (rval); return (mii_phy_dev_probe(dev, bmtphys_dp, BUS_PROBE_DEFAULT)); } static int bmtphy_attach(device_t dev) { - struct mii_softc *sc; - struct mii_attach_args *ma; - struct mii_data *mii; + struct bmtphy_softc *bsc; + struct mii_softc *sc; + struct mii_attach_args *ma; + struct mii_data *mii; - sc = device_get_softc(dev); + bsc = device_get_softc(dev); + sc = &bsc->mii_sc; ma = device_get_ivars(dev); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list); sc->mii_flags = miibus_get_flags(dev); sc->mii_inst = mii->mii_instance++; sc->mii_phy = ma->mii_phyno; sc->mii_service = bmtphy_service; sc->mii_pdata = mii; - mii_phy_reset(sc); + sc->mii_flags |= MIIF_NOMANPAUSE; + bsc->mii_model = MII_MODEL(ma->mii_id2); + + bmtphy_reset(sc); + sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; device_printf(dev, " "); mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); return (0); } static int bmtphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd) { switch (cmd) { case MII_POLLSTAT: break; case MII_MEDIACHG: /* * If the interface is not up, don't do anything. */ if ((mii->mii_ifp->if_flags & IFF_UP) == 0) break; mii_phy_setmedia(sc); break; case MII_TICK: if (mii_phy_tick(sc) == EJUSTRETURN) return (0); break; } /* Update the media status. */ bmtphy_status(sc); /* Callback if something changed. */ mii_phy_update(sc, cmd); - return (0); } static void bmtphy_status(struct mii_softc *sc) { - struct mii_data *mii; - struct ifmedia_entry *ife; - int bmsr, bmcr, aux_csr; + struct mii_data *mii; + struct ifmedia_entry *ife; + int bmsr, bmcr, aux_csr; mii = sc->mii_pdata; ife = mii->mii_media.ifm_cur; mii->mii_media_status = IFM_AVALID; mii->mii_media_active = IFM_ETHER; bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR); - aux_csr = PHY_READ(sc, MII_BMTPHY_AUX_CSR); if (bmsr & BMSR_LINK) mii->mii_media_status |= IFM_ACTIVE; bmcr = PHY_READ(sc, MII_BMCR); if (bmcr & BMCR_ISO) { mii->mii_media_active |= IFM_NONE; mii->mii_media_status = 0; return; } if (bmcr & BMCR_LOOP) mii->mii_media_active |= IFM_LOOP; if (bmcr & BMCR_AUTOEN) { /* * The media status bits are only valid if autonegotiation * has completed (or it's disabled). */ if ((bmsr & BMSR_ACOMP) == 0) { /* Erg, still trying, I guess... */ mii->mii_media_active |= IFM_NONE; return; } + aux_csr = PHY_READ(sc, MII_BMTPHY_AUX_CSR); if (aux_csr & AUX_CSR_SPEED) mii->mii_media_active |= IFM_100_TX; else mii->mii_media_active |= IFM_10_T; if (aux_csr & AUX_CSR_FDX) - mii->mii_media_active |= IFM_FDX; + mii->mii_media_active |= + IFM_FDX | mii_phy_flowstatus(sc); else mii->mii_media_active |= IFM_HDX; } else mii->mii_media_active = ife->ifm_media; +} + +static void +bmtphy_reset(struct mii_softc *sc) +{ + struct bmtphy_softc *bsc; + u_int16_t data; + + bsc = (struct bmtphy_softc *)sc; + + mii_phy_reset(sc); + + if (bsc->mii_model == MII_MODEL_BROADCOM_BCM5221) { + /* Enable shadow register mode. */ + data = PHY_READ(sc, 0x1f); + PHY_WRITE(sc, 0x1f, data | 0x0080); + + /* Enable APD (Auto PowerDetect). */ + data = PHY_READ(sc, MII_BMTPHY_AUX2); + PHY_WRITE(sc, MII_BMTPHY_AUX2, data | 0x0020); + + /* Enable clocks across APD for Auto-MDIX functionality. */ + data = PHY_READ(sc, MII_BMTPHY_INTR); + PHY_WRITE(sc, MII_BMTPHY_INTR, data | 0x0004); + + /* Disable shadow register mode. */ + data = PHY_READ(sc, 0x1f); + PHY_WRITE(sc, 0x1f, data & ~0x0080); + } } Index: projects/binutils-2.17/sys/dev/mii/gentbi.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/gentbi.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/gentbi.c (revision 215830) @@ -1,276 +1,278 @@ /* $NetBSD: gentbi.c,v 1.15 2006/03/29 07:05:24 thorpej Exp $ */ /*- * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997 Manuel Bouyer. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Driver for generic ten-bit (1000BASE-SX) interfaces, built into * many Gigabit Ethernet chips. * * All we have to do here is correctly report speed and duplex. */ #include __FBSDID("$FreeBSD$"); /* * Driver for generic unknown ten-bit interfaces(1000BASE-{LX,SX} * fiber interfaces). */ #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include "miibus_if.h" static int gentbi_probe(device_t); static int gentbi_attach(device_t); static device_method_t gentbi_methods[] = { /* device interface */ DEVMETHOD(device_probe, gentbi_probe), DEVMETHOD(device_attach, gentbi_attach), DEVMETHOD(device_detach, mii_phy_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), {0, 0} }; static devclass_t gentbi_devclass; static driver_t gentbi_driver = { "gentbi", gentbi_methods, sizeof(struct mii_softc) }; DRIVER_MODULE(gentbi, miibus, gentbi_driver, gentbi_devclass, 0, 0); static int gentbi_service(struct mii_softc *, struct mii_data *, int); static void gentbi_status(struct mii_softc *); static int gentbi_probe(device_t dev) { device_t parent; struct mii_attach_args *ma; int bmsr, extsr; parent = device_get_parent(dev); ma = device_get_ivars(dev); /* * We match as a generic TBI if: * * - There is no media in the BMSR. * - EXTSR has only 1000X. */ bmsr = MIIBUS_READREG(parent, ma->mii_phyno, MII_BMSR); if ((bmsr & BMSR_EXTSTAT) == 0 || (bmsr & BMSR_MEDIAMASK) != 0) return (ENXIO); extsr = MIIBUS_READREG(parent, ma->mii_phyno, MII_EXTSR); if (extsr & (EXTSR_1000TFDX|EXTSR_1000THDX)) return (ENXIO); if (extsr & (EXTSR_1000XFDX|EXTSR_1000XHDX)) { /* * We think this is a generic TBI. Return a match * priority higher than ukphy, but lower than what * specific drivers will return. */ device_set_desc(dev, "Generic ten-bit interface"); return (BUS_PROBE_LOW_PRIORITY); } return (ENXIO); } static int gentbi_attach(device_t dev) { struct mii_softc *sc; struct mii_attach_args *ma; struct mii_data *mii; sc = device_get_softc(dev); ma = device_get_ivars(dev); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list); if (bootverbose) device_printf(dev, "OUI 0x%06x, model 0x%04x, rev. %d\n", MII_OUI(ma->mii_id1, ma->mii_id2), MII_MODEL(ma->mii_id2), MII_REV(ma->mii_id2)); sc->mii_flags = miibus_get_flags(dev); sc->mii_inst = mii->mii_instance++; sc->mii_phy = ma->mii_phyno; sc->mii_service = gentbi_service; sc->mii_pdata = mii; + sc->mii_flags |= MIIF_NOMANPAUSE; + mii_phy_reset(sc); /* * Mask out all media in the BMSR. We only are really interested * in "auto". */ sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask & ~BMSR_MEDIAMASK; if (sc->mii_capabilities & BMSR_EXTSTAT) sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR); device_printf(dev, " "); mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); return (0); } static int gentbi_service(struct mii_softc *sc, struct mii_data *mii, int cmd) { switch (cmd) { case MII_POLLSTAT: break; case MII_MEDIACHG: /* * If the interface is not up, don't do anything. */ if ((mii->mii_ifp->if_flags & IFF_UP) == 0) break; mii_phy_setmedia(sc); break; case MII_TICK: if (mii_phy_tick(sc) == EJUSTRETURN) return (0); break; } /* Update the media status. */ gentbi_status(sc); /* Callback if something changed. */ mii_phy_update(sc, cmd); return (0); } static void gentbi_status(struct mii_softc *sc) { struct mii_data *mii = sc->mii_pdata; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; int bmsr, bmcr, anlpar; mii->mii_media_status = IFM_AVALID; mii->mii_media_active = IFM_ETHER; bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR); if (bmsr & BMSR_LINK) mii->mii_media_status |= IFM_ACTIVE; bmcr = PHY_READ(sc, MII_BMCR); if (bmcr & BMCR_ISO) { mii->mii_media_active |= IFM_NONE; mii->mii_media_status = 0; return; } if (bmcr & BMCR_LOOP) mii->mii_media_active |= IFM_LOOP; if (bmcr & BMCR_AUTOEN) { /* * The media status bits are only valid if autonegotiation * has completed (or it's disabled). */ if ((bmsr & BMSR_ACOMP) == 0) { /* Erg, still trying, I guess... */ mii->mii_media_active |= IFM_NONE; return; } /* * The media is always 1000baseSX. Check the ANLPAR to * see if we're doing full-duplex. */ mii->mii_media_active |= IFM_1000_SX; anlpar = PHY_READ(sc, MII_ANLPAR); if ((sc->mii_extcapabilities & EXTSR_1000XFDX) != 0 && (anlpar & ANLPAR_X_FD) != 0) mii->mii_media_active |= IFM_FDX | mii_phy_flowstatus(sc); else mii->mii_media_active |= IFM_HDX; } else mii->mii_media_active = ife->ifm_media; } Index: projects/binutils-2.17/sys/dev/mii/inphy.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/inphy.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/inphy.c (revision 215830) @@ -1,205 +1,208 @@ /*- * Copyright (c) 2001 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * driver for Intel 82553 and 82555 PHYs */ #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #include "miibus_if.h" static int inphy_probe(device_t dev); static int inphy_attach(device_t dev); static device_method_t inphy_methods[] = { /* device interface */ DEVMETHOD(device_probe, inphy_probe), DEVMETHOD(device_attach, inphy_attach), DEVMETHOD(device_detach, mii_phy_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), { 0, 0 } }; static devclass_t inphy_devclass; static driver_t inphy_driver = { "inphy", inphy_methods, sizeof(struct mii_softc) }; DRIVER_MODULE(inphy, miibus, inphy_driver, inphy_devclass, 0, 0); static int inphy_service(struct mii_softc *, struct mii_data *, int); static void inphy_status(struct mii_softc *); static const struct mii_phydesc inphys[] = { MII_PHY_DESC(INTEL, I82553C), MII_PHY_DESC(INTEL, I82555), MII_PHY_DESC(INTEL, I82562EM), MII_PHY_DESC(INTEL, I82562ET), MII_PHY_DESC(xxINTEL, I82553AB), MII_PHY_END }; static int inphy_probe(device_t dev) { return (mii_phy_dev_probe(dev, inphys, BUS_PROBE_DEFAULT)); } static int inphy_attach(device_t dev) { struct mii_softc *sc; struct mii_attach_args *ma; struct mii_data *mii; sc = device_get_softc(dev); ma = device_get_ivars(dev); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list); sc->mii_flags = miibus_get_flags(dev); sc->mii_inst = mii->mii_instance++; sc->mii_phy = ma->mii_phyno; sc->mii_service = inphy_service; sc->mii_pdata = mii; + sc->mii_flags |= MIIF_NOMANPAUSE; + ifmedia_add(&mii->mii_media, IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_LOOP, sc->mii_inst), MII_MEDIA_100_TX, NULL); mii_phy_reset(sc); sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; device_printf(dev, " "); mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); return (0); } static int inphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd) { switch (cmd) { case MII_POLLSTAT: break; case MII_MEDIACHG: /* * If the interface is not up, don't do anything. */ if ((mii->mii_ifp->if_flags & IFF_UP) == 0) break; mii_phy_setmedia(sc); break; case MII_TICK: if (mii_phy_tick(sc) == EJUSTRETURN) return (0); break; } /* Update the media status. */ inphy_status(sc); /* Callback if something changed. */ mii_phy_update(sc, cmd); return (0); } static void inphy_status(struct mii_softc *sc) { struct mii_data *mii = sc->mii_pdata; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; int bmsr, bmcr, scr; mii->mii_media_status = IFM_AVALID; mii->mii_media_active = IFM_ETHER; bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR); if (bmsr & BMSR_LINK) mii->mii_media_status |= IFM_ACTIVE; bmcr = PHY_READ(sc, MII_BMCR); if (bmcr & BMCR_ISO) { mii->mii_media_active |= IFM_NONE; mii->mii_media_status = 0; return; } if (bmcr & BMCR_LOOP) mii->mii_media_active |= IFM_LOOP; if (bmcr & BMCR_AUTOEN) { if ((bmsr & BMSR_ACOMP) == 0) { mii->mii_media_active |= IFM_NONE; return; } scr = PHY_READ(sc, MII_INPHY_SCR); if (scr & SCR_S100) mii->mii_media_active |= IFM_100_TX; else mii->mii_media_active |= IFM_10_T; if (scr & SCR_FDX) - mii->mii_media_active |= IFM_FDX; + mii->mii_media_active |= + IFM_FDX | mii_phy_flowstatus(sc); else mii->mii_media_active |= IFM_HDX; } else mii->mii_media_active = ife->ifm_media; } Index: projects/binutils-2.17/sys/dev/mii/mii.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/mii.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/mii.c (revision 215830) @@ -1,548 +1,548 @@ /* $NetBSD: mii.c,v 1.12 1999/08/03 19:41:49 drochner Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * MII bus layer, glues MII-capable network interface drivers to sharable * PHY drivers. This exports an interface compatible with BSD/OS 3.0's, * plus some NetBSD extensions. */ #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(miibus, 1); #include "miibus_if.h" static int miibus_print_child(device_t dev, device_t child); static int miibus_read_ivar(device_t dev, device_t child, int which, uintptr_t *result); static int miibus_child_location_str(device_t bus, device_t child, char *buf, size_t buflen); static int miibus_child_pnpinfo_str(device_t bus, device_t child, char *buf, size_t buflen); static int miibus_readreg(device_t, int, int); static int miibus_writereg(device_t, int, int, int); static void miibus_statchg(device_t); static void miibus_linkchg(device_t); static void miibus_mediainit(device_t); static device_method_t miibus_methods[] = { /* device interface */ DEVMETHOD(device_probe, miibus_probe), DEVMETHOD(device_attach, miibus_attach), DEVMETHOD(device_detach, miibus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), /* bus interface */ DEVMETHOD(bus_print_child, miibus_print_child), DEVMETHOD(bus_read_ivar, miibus_read_ivar), DEVMETHOD(bus_driver_added, bus_generic_driver_added), DEVMETHOD(bus_child_pnpinfo_str, miibus_child_pnpinfo_str), DEVMETHOD(bus_child_location_str, miibus_child_location_str), /* MII interface */ DEVMETHOD(miibus_readreg, miibus_readreg), DEVMETHOD(miibus_writereg, miibus_writereg), DEVMETHOD(miibus_statchg, miibus_statchg), DEVMETHOD(miibus_linkchg, miibus_linkchg), DEVMETHOD(miibus_mediainit, miibus_mediainit), { 0, 0 } }; devclass_t miibus_devclass; driver_t miibus_driver = { "miibus", miibus_methods, sizeof(struct mii_data) }; struct miibus_ivars { struct ifnet *ifp; ifm_change_cb_t ifmedia_upd; ifm_stat_cb_t ifmedia_sts; int mii_flags; }; int miibus_probe(device_t dev) { device_set_desc(dev, "MII bus"); return (BUS_PROBE_SPECIFIC); } int miibus_attach(device_t dev) { struct miibus_ivars *ivars; struct mii_attach_args *ma; struct mii_data *mii; device_t *children; int i, nchildren; mii = device_get_softc(dev); nchildren = 0; if (device_get_children(dev, &children, &nchildren) == 0) { for (i = 0; i < nchildren; i++) { ma = device_get_ivars(children[i]); ma->mii_data = mii; } free(children, M_TEMP); } if (nchildren == 0) { - device_printf(dev, "cannot get children"); + device_printf(dev, "cannot get children\n"); return (ENXIO); } ivars = device_get_ivars(dev); ifmedia_init(&mii->mii_media, IFM_IMASK, ivars->ifmedia_upd, ivars->ifmedia_sts); mii->mii_ifp = ivars->ifp; mii->mii_ifp->if_capabilities |= IFCAP_LINKSTATE; mii->mii_ifp->if_capenable |= IFCAP_LINKSTATE; LIST_INIT(&mii->mii_phys); return (bus_generic_attach(dev)); } int miibus_detach(device_t dev) { struct mii_data *mii; bus_generic_detach(dev); mii = device_get_softc(dev); ifmedia_removeall(&mii->mii_media); mii->mii_ifp = NULL; return (0); } static int miibus_print_child(device_t dev, device_t child) { struct mii_attach_args *ma; int retval; ma = device_get_ivars(child); retval = bus_print_child_header(dev, child); retval += printf(" PHY %d", ma->mii_phyno); retval += bus_print_child_footer(dev, child); return (retval); } static int miibus_read_ivar(device_t dev, device_t child __unused, int which, uintptr_t *result) { struct miibus_ivars *ivars; /* * NB: this uses the instance variables of the miibus rather than * its PHY children. */ ivars = device_get_ivars(dev); switch (which) { case MIIBUS_IVAR_FLAGS: *result = ivars->mii_flags; break; default: return (ENOENT); } return (0); } static int miibus_child_pnpinfo_str(device_t bus __unused, device_t child, char *buf, size_t buflen) { struct mii_attach_args *ma; ma = device_get_ivars(child); snprintf(buf, buflen, "oui=0x%x model=0x%x rev=0x%x", MII_OUI(ma->mii_id1, ma->mii_id2), MII_MODEL(ma->mii_id2), MII_REV(ma->mii_id2)); return (0); } static int miibus_child_location_str(device_t bus __unused, device_t child, char *buf, size_t buflen) { struct mii_attach_args *ma; ma = device_get_ivars(child); snprintf(buf, buflen, "phyno=%d", ma->mii_phyno); return (0); } static int miibus_readreg(device_t dev, int phy, int reg) { device_t parent; parent = device_get_parent(dev); return (MIIBUS_READREG(parent, phy, reg)); } static int miibus_writereg(device_t dev, int phy, int reg, int data) { device_t parent; parent = device_get_parent(dev); return (MIIBUS_WRITEREG(parent, phy, reg, data)); } static void miibus_statchg(device_t dev) { device_t parent; struct mii_data *mii; parent = device_get_parent(dev); MIIBUS_STATCHG(parent); mii = device_get_softc(dev); mii->mii_ifp->if_baudrate = ifmedia_baudrate(mii->mii_media_active); } static void miibus_linkchg(device_t dev) { struct mii_data *mii; device_t parent; int link_state; parent = device_get_parent(dev); MIIBUS_LINKCHG(parent); mii = device_get_softc(dev); if (mii->mii_media_status & IFM_AVALID) { if (mii->mii_media_status & IFM_ACTIVE) link_state = LINK_STATE_UP; else link_state = LINK_STATE_DOWN; } else link_state = LINK_STATE_UNKNOWN; if_link_state_change(mii->mii_ifp, link_state); } static void miibus_mediainit(device_t dev) { struct mii_data *mii; struct ifmedia_entry *m; int media = 0; /* Poke the parent in case it has any media of its own to add. */ MIIBUS_MEDIAINIT(device_get_parent(dev)); mii = device_get_softc(dev); LIST_FOREACH(m, &mii->mii_media.ifm_list, ifm_list) { media = m->ifm_media; if (media == (IFM_ETHER | IFM_AUTO)) break; } ifmedia_set(&mii->mii_media, media); } /* * Helper function used by network interface drivers, attaches the miibus and * the PHYs to the network interface driver parent. */ int mii_attach(device_t dev, device_t *miibus, struct ifnet *ifp, ifm_change_cb_t ifmedia_upd, ifm_stat_cb_t ifmedia_sts, int capmask, int phyloc, int offloc, int flags) { struct miibus_ivars *ivars; struct mii_attach_args ma, *args; device_t *children, phy; int bmsr, first, i, nchildren, offset, phymax, phymin, rv; if (phyloc != MII_PHY_ANY && offloc != MII_OFFSET_ANY) { - printf("%s: phyloc and offloc specified", __func__); + printf("%s: phyloc and offloc specified\n", __func__); return (EINVAL); } if (offloc != MII_OFFSET_ANY && (offloc < 0 || offloc >= MII_NPHY)) { - printf("%s: ivalid offloc %d", __func__, offloc); + printf("%s: ivalid offloc %d\n", __func__, offloc); return (EINVAL); } if (phyloc == MII_PHY_ANY) { phymin = 0; phymax = MII_NPHY - 1; } else { if (phyloc < 0 || phyloc >= MII_NPHY) { - printf("%s: ivalid phyloc %d", __func__, phyloc); + printf("%s: ivalid phyloc %d\n", __func__, phyloc); return (EINVAL); } phymin = phymax = phyloc; } first = 0; if (*miibus == NULL) { first = 1; ivars = malloc(sizeof(*ivars), M_DEVBUF, M_NOWAIT); if (ivars == NULL) return (ENOMEM); ivars->ifp = ifp; ivars->ifmedia_upd = ifmedia_upd; ivars->ifmedia_sts = ifmedia_sts; ivars->mii_flags = flags; *miibus = device_add_child(dev, "miibus", -1); if (*miibus == NULL) { rv = ENXIO; goto fail; } device_set_ivars(*miibus, ivars); } else { ivars = device_get_ivars(*miibus); if (ivars->ifp != ifp || ivars->ifmedia_upd != ifmedia_upd || ivars->ifmedia_sts != ifmedia_sts || ivars->mii_flags != flags) { - printf("%s: non-matching invariant", __func__); + printf("%s: non-matching invariant\n", __func__); return (EINVAL); } /* * Assignment of the attach arguments mii_data for the first * pass is done in miibus_attach(), i.e. once the miibus softc * has been allocated. */ ma.mii_data = device_get_softc(*miibus); } ma.mii_capmask = capmask; phy = NULL; offset = 0; for (ma.mii_phyno = phymin; ma.mii_phyno <= phymax; ma.mii_phyno++) { /* * Make sure we haven't already configured a PHY at this * address. This allows mii_attach() to be called * multiple times. */ if (device_get_children(*miibus, &children, &nchildren) == 0) { for (i = 0; i < nchildren; i++) { args = device_get_ivars(children[i]); if (args->mii_phyno == ma.mii_phyno) { /* * Yes, there is already something * configured at this address. */ free(children, M_TEMP); goto skip; } } free(children, M_TEMP); } /* * Check to see if there is a PHY at this address. Note, * many braindead PHYs report 0/0 in their ID registers, * so we test for media in the BMSR. */ bmsr = MIIBUS_READREG(dev, ma.mii_phyno, MII_BMSR); if (bmsr == 0 || bmsr == 0xffff || (bmsr & (BMSR_EXTSTAT | BMSR_MEDIAMASK)) == 0) { /* Assume no PHY at this address. */ continue; } /* * There is a PHY at this address. If we were given an * `offset' locator, skip this PHY if it doesn't match. */ if (offloc != MII_OFFSET_ANY && offloc != offset) goto skip; /* * Extract the IDs. Braindead PHYs will be handled by * the `ukphy' driver, as we have no ID information to * match on. */ ma.mii_id1 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR1); ma.mii_id2 = MIIBUS_READREG(dev, ma.mii_phyno, MII_PHYIDR2); args = malloc(sizeof(struct mii_attach_args), M_DEVBUF, M_NOWAIT); if (args == NULL) goto skip; bcopy((char *)&ma, (char *)args, sizeof(ma)); phy = device_add_child(*miibus, NULL, -1); if (phy == NULL) { free(args, M_DEVBUF); goto skip; } device_set_ivars(phy, args); skip: offset++; } if (first != 0) { if (phy == NULL) { rv = ENXIO; goto fail; } rv = bus_generic_attach(dev); if (rv != 0) goto fail; /* Attaching of the PHY drivers is done in miibus_attach(). */ return (0); } rv = bus_generic_attach(*miibus); if (rv != 0) goto fail; return (0); fail: if (*miibus != NULL) device_delete_child(dev, *miibus); free(ivars, M_DEVBUF); if (first != 0) *miibus = NULL; return (rv); } /* * Media changed; notify all PHYs. */ int mii_mediachg(struct mii_data *mii) { struct mii_softc *child; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; int rv; mii->mii_media_status = 0; mii->mii_media_active = IFM_NONE; LIST_FOREACH(child, &mii->mii_phys, mii_list) { /* * If the media indicates a different PHY instance, * isolate this one. */ if (IFM_INST(ife->ifm_media) != child->mii_inst) { if ((child->mii_flags & MIIF_NOISOLATE) != 0) { device_printf(child->mii_dev, "%s: " "can't handle non-zero PHY instance %d\n", __func__, child->mii_inst); continue; } PHY_WRITE(child, MII_BMCR, PHY_READ(child, MII_BMCR) | BMCR_ISO); continue; } rv = (*child->mii_service)(child, mii, MII_MEDIACHG); if (rv) return (rv); } return (0); } /* * Call the PHY tick routines, used during autonegotiation. */ void mii_tick(struct mii_data *mii) { struct mii_softc *child; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; LIST_FOREACH(child, &mii->mii_phys, mii_list) { /* * If this PHY instance isn't currently selected, just skip * it. */ if (IFM_INST(ife->ifm_media) != child->mii_inst) continue; (void)(*child->mii_service)(child, mii, MII_TICK); } } /* * Get media status from PHYs. */ void mii_pollstat(struct mii_data *mii) { struct mii_softc *child; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; mii->mii_media_status = 0; mii->mii_media_active = IFM_NONE; LIST_FOREACH(child, &mii->mii_phys, mii_list) { /* * If we're not polling this PHY instance, just skip it. */ if (IFM_INST(ife->ifm_media) != child->mii_inst) continue; (void)(*child->mii_service)(child, mii, MII_POLLSTAT); } } /* * Inform the PHYs that the interface is down. */ void mii_down(struct mii_data *mii) { struct mii_softc *child; LIST_FOREACH(child, &mii->mii_phys, mii_list) mii_phy_down(child); } Index: projects/binutils-2.17/sys/dev/mii/miidevs =================================================================== --- projects/binutils-2.17/sys/dev/mii/miidevs (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/miidevs (revision 215830) @@ -1,259 +1,261 @@ $FreeBSD$ /*$NetBSD: miidevs,v 1.6 1999/05/14 11:37:30 drochner Exp $*/ /*- * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * List of known MII OUIs. * For a complete list see http://standards.ieee.org/regauth/oui/ * * XXX Vendors do obviously not agree how OUIs (18 bit) are mapped * to the 16 bits available in the id registers. The MII_OUI() macro * in "mii.h" reflects the most obvious way. If a vendor uses a * different mapping, an "xx" prefixed OUI is defined here which is * mangled accordingly to compensate. */ oui AGERE 0x00a0bc Agere Systems oui ALTIMA 0x0010a9 Altima Communications oui AMD 0x00001a Advanced Micro Devices oui ASIX 0x00602e Asix Semiconductor oui ATHEROS 0x001374 Atheros Communications oui BROADCOM 0x001018 Broadcom Corporation oui BROADCOM2 0x000af7 Broadcom Corporation oui CICADA 0x0003F1 Cicada Semiconductor oui DAVICOM 0x00606e Davicom Semiconductor oui ICPLUS 0x0090c3 IC Plus Corp. oui ICS 0x00a0be Integrated Circuit Systems oui INTEL 0x00aa00 Intel oui JATO 0x00e083 Jato Technologies oui JMICRON 0x001b8c JMicron Technologies oui LEVEL1 0x00207b Level 1 oui NATSEMI 0x080017 National Semiconductor oui QUALSEMI 0x006051 Quality Semiconductor oui REALTEK 0x000020 RealTek Semicondctor oui SEEQ 0x00a07d Seeq oui SIS 0x00e006 Silicon Integrated Systems oui SMSC 0x0005be SMSC oui TDK 0x00c039 TDK oui TI 0x080028 Texas Instruments oui VITESSE 0x0001c1 Vitesse Semiconductor oui XAQTI 0x00e0ae XaQti Corp. oui MARVELL 0x005043 Marvell Semiconductor oui xxMARVELL 0x000ac2 Marvell Semiconductor /* in the 79c873, AMD uses another OUI (which matches Davicom!) */ oui xxAMD 0x00606e Advanced Micro Devices /* Intel 82553 A/B steppings */ oui xxINTEL 0x00f800 Intel /* some vendors have the bits swapped within bytes (ie, ordered as on the wire) */ oui xxALTIMA 0x000895 Altima Communications oui xxBROADCOM 0x000818 Broadcom Corporation oui xxBROADCOM_ALT1 0x0050ef Broadcom Corporation oui xxBROADCOM_ALT2 0x00d897 Broadcom Corporation oui xxICS 0x00057d Integrated Circuit Systems oui xxSEEQ 0x0005be Seeq oui xxSIS 0x000760 Silicon Integrated Systems oui xxTI 0x100014 Texas Instruments oui xxXAQTI 0x350700 XaQti Corp. /* Level 1 is completely different - from right to left. (Two bits get lost in the third OUI byte.) */ oui xxLEVEL1 0x1e0400 Level 1 /* Don't know what's going on here. */ oui xxDAVICOM 0x006040 Davicom Semiconductor /* This is the OUI of the gigE PHY in the RealTek 8169S/8110S/8211B chips */ oui xxREALTEK 0x000732 /* * List of known models. Grouped by oui. */ /* Agere Systems PHYs */ model AGERE ET1011 0x0001 ET1011 10/100/1000baseT PHY model AGERE ET1011C 0x0004 ET1011C 10/100/1000baseT PHY /* Altima Communications PHYs */ model xxALTIMA AC101 0x0021 AC101 10/100 media interface model xxALTIMA AC101L 0x0012 AC101L 10/100 media interface model xxALTIMA ACXXX 0x0001 ACXXX 10/100 media interface /* Advanced Micro Devices PHYs */ model AMD 79c973phy 0x0036 Am79c973 internal PHY model AMD 79c978 0x0039 Am79c978 HomePNA PHY model xxAMD 79C873 0x0000 Am79C873/DM9101 10/100 media interface /* Asix semiconductor PHYs. */ model ASIX AX88X9X 0x0031 Ax88x9x internal PHY /* Atheros Communications/Attansic PHYs. */ model ATHEROS F1 0x0001 Atheros F1 10/100/1000 PHY model ATHEROS F2 0x0002 Atheros F2 10/100 PHY model ATHEROS F1_7 0x0007 Atheros F1 10/100/1000 PHY /* Broadcom Corp. PHYs. */ model BROADCOM 3C905B 0x0012 3c905B 10/100 internal PHY model BROADCOM 3C905C 0x0017 3c905C 10/100 internal PHY model BROADCOM BCM5201 0x0021 BCM5201 10/100baseTX PHY +model BROADCOM BCM5214 0x0028 BCM5214 Quad 10/100 PHY model BROADCOM BCM5221 0x001e BCM5221 10/100baseTX PHY +model BROADCOM BCM5222 0x0032 BCM5222 Dual 10/100 PHY model BROADCOM BCM4401 0x0036 BCM4401 10/100baseTX PHY model xxBROADCOM BCM5400 0x0004 Broadcom 1000baseTX PHY model xxBROADCOM BCM5401 0x0005 BCM5401 10/100/1000baseTX PHY model xxBROADCOM BCM5411 0x0007 BCM5411 10/100/1000baseTX PHY model xxBROADCOM BCM5754 0x000e BCM5754 10/100/1000baseTX PHY model xxBROADCOM BCM5752 0x0010 BCM5752 10/100/1000baseTX PHY model xxBROADCOM BCM5701 0x0011 BCM5701 10/100/1000baseTX PHY model xxBROADCOM BCM5706 0x0015 BCM5706 10/100/1000baseTX/SX PHY model xxBROADCOM BCM5703 0x0016 BCM5703 10/100/1000baseTX PHY model xxBROADCOM BCM5704 0x0019 BCM5704 10/100/1000baseTX PHY model xxBROADCOM BCM5705 0x001a BCM5705 10/100/1000baseTX PHY model xxBROADCOM BCM5750 0x0018 BCM5750 10/100/1000baseTX PHY model xxBROADCOM BCM54K2 0x002e BCM54K2 10/100/1000baseTX PHY model xxBROADCOM BCM5714 0x0034 BCM5714 10/100/1000baseTX PHY model xxBROADCOM BCM5780 0x0035 BCM5780 10/100/1000baseTX PHY model xxBROADCOM BCM5708C 0x0036 BCM5708C 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5482S 0x000b BCM5482S Dual-Port 10/100/1000baseX/FX PHY model xxBROADCOM_ALT1 BCM5755 0x000c BCM5755 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5787 0x000e BCM5787 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5708S 0x0015 BCM5708S 1000/2500BaseSX PHY model xxBROADCOM_ALT1 BCM5709CAX 0x002c BCM5709C(AX) 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5722 0x002d BCM5722 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5784 0x003a BCM5784 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5709C 0x003c BCM5709C 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5761 0x003d BCM5761 10/100/1000baseTX PHY model xxBROADCOM_ALT1 BCM5709S 0x003f BCM5709S 1000/2500baseSX PHY model xxBROADCOM_ALT2 BCM5717C 0x0020 BCM5717C 10/100/1000baseTX PHY model BROADCOM2 BCM5906 0x0004 BCM5906 10/100baseTX PHY /* Cicada Semiconductor PHYs (now owned by Vitesse?) */ model CICADA CS8201 0x0001 Cicada CS8201 10/100/1000TX PHY model CICADA CS8204 0x0004 Cicada CS8204 10/100/1000TX PHY model CICADA VSC8211 0x000b Cicada VSC8211 10/100/1000TX PHY model CICADA CS8201A 0x0020 Cicada CS8201 10/100/1000TX PHY model CICADA CS8201B 0x0021 Cicada CS8201 10/100/1000TX PHY model CICADA CS8244 0x002c Cicada CS8244 10/100/1000TX PHY model VITESSE VSC8601 0x0002 Vitesse VSC8601 10/100/1000TX PHY /* Davicom Semiconductor PHYs */ model DAVICOM DM9102 0x0004 DM9102 10/100 media interface model xxDAVICOM DM9101 0x0000 DM9101 10/100 media interface /* Integrated Circuit Systems PHYs */ model xxICS 1889 0x0001 ICS1889 10/100 media interface model xxICS 1890 0x0002 ICS1890 10/100 media interface model xxICS 1892 0x0003 ICS1892 10/100 media interface model xxICS 1893 0x0004 ICS1893 10/100 media interface /* IC Plus Corp. PHYs */ model ICPLUS IP101 0x0005 IC Plus 10/100 PHY model ICPLUS IP1000A 0x0008 IC Plus 10/100/1000 media interface model ICPLUS IP1001 0x0019 IC Plus IP1001 10/100/1000 media interface /* Intel PHYs */ model xxINTEL I82553AB 0x0000 i83553 10/100 media interface model INTEL I82555 0x0015 i82555 10/100 media interface model INTEL I82562EM 0x0032 i82562EM 10/100 media interface model INTEL I82562ET 0x0033 i82562ET 10/100 media interface model INTEL I82553C 0x0035 i82553 10/100 media interface /* Jato Technologies PHYs */ model JATO BASEX 0x0000 Jato 1000baseX media interface /* JMicron Technologies PHYs */ model JMICRON JMP211 0x0021 JMP211 10/100/1000 media interface model JMICRON JMP202 0x0022 JMP202 10/100 media interface /* Level 1 PHYs */ model xxLEVEL1 LXT970 0x0000 LXT970 10/100 media interface /* National Semiconductor PHYs */ model NATSEMI DP83840 0x0000 DP83840 10/100 media interface model NATSEMI DP83843 0x0001 DP83843 10/100 media interface model NATSEMI DP83815 0x0002 DP83815 10/100 media interface model NATSEMI DP83847 0x0003 DP83847 10/100 media interface model NATSEMI DP83891 0x0005 DP83891 10/100/1000 media interface model NATSEMI DP83861 0x0006 DP83861 10/100/1000 media interface model NATSEMI DP83865 0x0007 DP83865 10/100/1000 media interface /* Quality Semiconductor PHYs */ model QUALSEMI QS6612 0x0000 QS6612 10/100 media interface /* RealTek Semiconductor PHYs */ model REALTEK RTL8201L 0x0020 RTL8201L 10/100 media interface model xxREALTEK RTL8305SC 0x0005 RTL8305SC 10/100 802.1q switch model xxREALTEK RTL8169S 0x0011 RTL8169S/8110S/8211B media interface /* Seeq PHYs */ model xxSEEQ 80220 0x0003 Seeq 80220 10/100 media interface model xxSEEQ 84220 0x0004 Seeq 84220 10/100 media interface /* Silicon Integrated Systems PHYs */ model xxSIS 900 0x0000 SiS 900 10/100 media interface /* SMSC PHYs */ model SMSC LAN83C183 0x0004 SMSC LAN83C183 10/100 media interface /* TDK */ model TDK 78Q2120 0x0014 TDK 78Q2120 media interface /* Texas Instruments PHYs */ model xxTI TLAN10T 0x0001 ThunderLAN 10baseT media interface model xxTI 100VGPMI 0x0002 ThunderLAN 100VG-AnyLan media interface /* XaQti Corp. PHYs. */ model XAQTI XMACII 0x0000 XaQti Corp. XMAC II gigabit interface /* Marvell Semiconductor PHYs */ model MARVELL E1000 0x0000 Marvell 88E1000 Gigabit PHY model MARVELL E1011 0x0002 Marvell 88E1011 Gigabit PHY model MARVELL E1000_3 0x0003 Marvell 88E1000 Gigabit PHY model MARVELL E1000S 0x0004 Marvell 88E1000S Gigabit PHY model MARVELL E1000_5 0x0005 Marvell 88E1000 Gigabit PHY model MARVELL E1101 0x0006 Marvell 88E1101 Gigabit PHY model MARVELL E3082 0x0008 Marvell 88E3082 10/100 Fast Ethernet PHY model MARVELL E1112 0x0009 Marvell 88E1112 Gigabit PHY model MARVELL E1149 0x000b Marvell 88E1149 Gigabit PHY model MARVELL E1111 0x000c Marvell 88E1111 Gigabit PHY model MARVELL E1116 0x0021 Marvell 88E1116 Gigabit PHY model MARVELL E1116R 0x0024 Marvell 88E1116R Gigabit PHY model MARVELL E1118 0x0022 Marvell 88E1118 Gigabit PHY model MARVELL E3016 0x0026 Marvell 88E3016 10/100 Fast Ethernet PHY model MARVELL PHYG65G 0x0027 Marvell PHYG65G Gigabit PHY model xxMARVELL E1000 0x0005 Marvell 88E1000 Gigabit PHY model xxMARVELL E1011 0x0002 Marvell 88E1011 Gigabit PHY model xxMARVELL E1000_3 0x0003 Marvell 88E1000 Gigabit PHY model xxMARVELL E1000_5 0x0005 Marvell 88E1000 Gigabit PHY model xxMARVELL E1111 0x000c Marvell 88E1111 Gigabit PHY Index: projects/binutils-2.17/sys/dev/mii/nsgphy.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/nsgphy.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/nsgphy.c (revision 215830) @@ -1,255 +1,257 @@ /*- * Copyright (c) 2001 Wind River Systems * Copyright (c) 2001 * Bill Paul . All rights reserved. * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the National Semiconductor DP83861, DP83865 and DP83891 * 10/100/1000 PHYs. * Datasheet available at: http://www.national.com/ds/DP/DP83861.pdf * and at: http://www.national.com/ds/DP/DP83865.pdf * * The DP83891 is the older NS GigE PHY which isn't being sold * anymore. The DP83861 is its replacement, which is an 'enhanced' * firmware driven component. The major difference between the * two is that the DP83891 can't generate interrupts, while the * 83861 can (probably it wasn't originally designed to do this, but * it can now thanks to firmware updates). The DP83861 also allows * access to its internal RAM via indirect register access. The * DP83865 is an ultra low power version of the DP83861 and DP83891. */ #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #include "miibus_if.h" static int nsgphy_probe(device_t); static int nsgphy_attach(device_t); static device_method_t nsgphy_methods[] = { /* device interface */ DEVMETHOD(device_probe, nsgphy_probe), DEVMETHOD(device_attach, nsgphy_attach), DEVMETHOD(device_detach, mii_phy_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), { 0, 0 } }; static devclass_t nsgphy_devclass; static driver_t nsgphy_driver = { "nsgphy", nsgphy_methods, sizeof(struct mii_softc) }; DRIVER_MODULE(nsgphy, miibus, nsgphy_driver, nsgphy_devclass, 0, 0); static int nsgphy_service(struct mii_softc *, struct mii_data *,int); static void nsgphy_status(struct mii_softc *); static const struct mii_phydesc nsgphys[] = { MII_PHY_DESC(NATSEMI, DP83861), MII_PHY_DESC(NATSEMI, DP83865), MII_PHY_DESC(NATSEMI, DP83891), MII_PHY_END }; static int nsgphy_probe(device_t dev) { return (mii_phy_dev_probe(dev, nsgphys, BUS_PROBE_DEFAULT)); } static int nsgphy_attach(device_t dev) { struct mii_softc *sc; struct mii_attach_args *ma; struct mii_data *mii; sc = device_get_softc(dev); ma = device_get_ivars(dev); if (bootverbose) device_printf(dev, "\n", MII_REV(ma->mii_id2)); device_printf(dev, " "); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list); sc->mii_flags = miibus_get_flags(dev); sc->mii_inst = mii->mii_instance++; sc->mii_phy = ma->mii_phyno; sc->mii_service = nsgphy_service; sc->mii_pdata = mii; + sc->mii_flags |= MIIF_NOMANPAUSE; + mii_phy_reset(sc); /* * NB: the PHY has the 10baseT BMSR bits hard-wired to 0, * even though it supports 10baseT. */ sc->mii_capabilities = (PHY_READ(sc, MII_BMSR) | (BMSR_10TFDX | BMSR_10THDX)) & ma->mii_capmask; if (sc->mii_capabilities & BMSR_EXTSTAT) sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR); mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); return (0); } static int nsgphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd) { switch (cmd) { case MII_POLLSTAT: break; case MII_MEDIACHG: /* * If the interface is not up, don't do anything. */ if ((mii->mii_ifp->if_flags & IFF_UP) == 0) break; mii_phy_setmedia(sc); break; case MII_TICK: if (mii_phy_tick(sc) == EJUSTRETURN) return (0); break; } /* Update the media status. */ nsgphy_status(sc); /* Callback if something changed. */ mii_phy_update(sc, cmd); return (0); } static void nsgphy_status(struct mii_softc *sc) { struct mii_data *mii = sc->mii_pdata; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; int bmsr, bmcr, physup, gtsr; mii->mii_media_status = IFM_AVALID; mii->mii_media_active = IFM_ETHER; bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR); physup = PHY_READ(sc, NSGPHY_MII_PHYSUP); if (physup & PHY_SUP_LINK) mii->mii_media_status |= IFM_ACTIVE; bmcr = PHY_READ(sc, MII_BMCR); if (bmcr & BMCR_ISO) { mii->mii_media_active |= IFM_NONE; mii->mii_media_status = 0; return; } if (bmcr & BMCR_LOOP) mii->mii_media_active |= IFM_LOOP; if (bmcr & BMCR_AUTOEN) { /* * The media status bits are only valid if autonegotiation * has completed (or it's disabled). */ if ((bmsr & BMSR_ACOMP) == 0) { /* Erg, still trying, I guess... */ mii->mii_media_active |= IFM_NONE; return; } switch (physup & (PHY_SUP_SPEED1 | PHY_SUP_SPEED0)) { case PHY_SUP_SPEED1: mii->mii_media_active |= IFM_1000_T; gtsr = PHY_READ(sc, MII_100T2SR); if (gtsr & GTSR_MS_RES) mii->mii_media_active |= IFM_ETH_MASTER; break; case PHY_SUP_SPEED0: mii->mii_media_active |= IFM_100_TX; break; case 0: mii->mii_media_active |= IFM_10_T; break; default: mii->mii_media_active |= IFM_NONE; mii->mii_media_status = 0; return; } if (physup & PHY_SUP_DUPLEX) mii->mii_media_active |= IFM_FDX | mii_phy_flowstatus(sc); else mii->mii_media_active |= IFM_HDX; } else mii->mii_media_active = ife->ifm_media; } Index: projects/binutils-2.17/sys/dev/mii/nsphyter.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/nsphyter.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/nsphyter.c (revision 215830) @@ -1,300 +1,298 @@ /* $NetBSD: nsphyter.c,v 1.28 2008/01/20 07:58:19 msaitoh Exp $ */ /*- * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1997 Manuel Bouyer. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * driver for National Semiconductor's DP83843 `PHYTER' ethernet 10/100 PHY * Data Sheet available from www.national.com * * We also support the DP83815 `MacPHYTER' internal PHY since, for our * purposes, they are compatible. */ #include #include #include #include #include #include #include #include #include #include #include #include "miidevs.h" #include #include "miibus_if.h" static device_probe_t nsphyter_probe; static device_attach_t nsphyter_attach; static device_method_t nsphyter_methods[] = { /* device interface */ DEVMETHOD(device_probe, nsphyter_probe), DEVMETHOD(device_attach, nsphyter_attach), DEVMETHOD(device_detach, mii_phy_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), { 0, 0 } }; static devclass_t nsphyter_devclass; static driver_t nsphyter_driver = { "nsphyter", nsphyter_methods, sizeof(struct mii_softc) }; DRIVER_MODULE(nsphyter, miibus, nsphyter_driver, nsphyter_devclass, 0, 0); static int nsphyter_service(struct mii_softc *, struct mii_data *, int); static void nsphyter_status(struct mii_softc *); static void nsphyter_reset(struct mii_softc *); static const struct mii_phydesc nsphys[] = { MII_PHY_DESC(NATSEMI, DP83815), MII_PHY_DESC(NATSEMI, DP83843), MII_PHY_DESC(NATSEMI, DP83847), MII_PHY_END }; static int nsphyter_probe(device_t dev) { return (mii_phy_dev_probe(dev, nsphys, BUS_PROBE_DEFAULT)); } static int nsphyter_attach(device_t dev) { struct mii_softc *sc; struct mii_attach_args *ma; struct mii_data *mii; sc = device_get_softc(dev); ma = device_get_ivars(dev); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list); sc->mii_flags = miibus_get_flags(dev); sc->mii_inst = mii->mii_instance++; sc->mii_phy = ma->mii_phyno; sc->mii_service = nsphyter_service; sc->mii_pdata = mii; + sc->mii_flags |= MIIF_NOMANPAUSE; + #if 1 #define ADD(m, c) ifmedia_add(&mii->mii_media, (m), (c), NULL) /* * XXX IFM_LOOP should be handled by mii_phy_add_media() based * on MIIF_NOLOOP. */ if ((sc->mii_flags & MIIF_NOLOOP) == 0) ADD(IFM_MAKEWORD(IFM_ETHER, IFM_100_TX, IFM_LOOP, sc->mii_inst), MII_MEDIA_100_TX); #endif nsphyter_reset(sc); sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; device_printf(dev, " "); mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); return (0); } static int nsphyter_service(struct mii_softc *sc, struct mii_data *mii, int cmd) { switch (cmd) { case MII_POLLSTAT: break; case MII_MEDIACHG: /* * If the interface is not up, don't do anything. */ if ((mii->mii_ifp->if_flags & IFF_UP) == 0) break; mii_phy_setmedia(sc); break; case MII_TICK: if (mii_phy_tick(sc) == EJUSTRETURN) return (0); break; } /* Update the media status. */ nsphyter_status(sc); /* Callback if something changed. */ mii_phy_update(sc, cmd); return (0); } static void nsphyter_status(struct mii_softc *sc) { struct mii_data *mii = sc->mii_pdata; struct ifmedia_entry *ife = mii->mii_media.ifm_cur; int bmsr, bmcr, physts; mii->mii_media_status = IFM_AVALID; mii->mii_media_active = IFM_ETHER; bmsr = PHY_READ(sc, MII_BMSR) | PHY_READ(sc, MII_BMSR); physts = PHY_READ(sc, MII_NSPHYTER_PHYSTS); if ((physts & PHYSTS_LINK) != 0) mii->mii_media_status |= IFM_ACTIVE; bmcr = PHY_READ(sc, MII_BMCR); if ((bmcr & BMCR_ISO) != 0) { mii->mii_media_active |= IFM_NONE; mii->mii_media_status = 0; return; } if ((bmcr & BMCR_LOOP) != 0) mii->mii_media_active |= IFM_LOOP; if ((bmcr & BMCR_AUTOEN) != 0) { /* * The media status bits are only valid if autonegotiation * has completed (or it's disabled). */ if ((bmsr & BMSR_ACOMP) == 0) { /* Erg, still trying, I guess... */ mii->mii_media_active |= IFM_NONE; return; } if ((physts & PHYSTS_SPEED10) != 0) mii->mii_media_active |= IFM_10_T; else mii->mii_media_active |= IFM_100_TX; if ((physts & PHYSTS_DUPLEX) != 0) -#ifdef notyet mii->mii_media_active |= IFM_FDX | mii_phy_flowstatus(sc); -#else - mii->mii_media_active |= IFM_FDX; -#endif else mii->mii_media_active |= IFM_HDX; } else mii->mii_media_active = ife->ifm_media; } static void nsphyter_reset(struct mii_softc *sc) { struct ifmedia_entry *ife = sc->mii_pdata->mii_media.ifm_cur; int reg, i; if ((sc->mii_flags & MIIF_NOISOLATE) != 0) reg = BMCR_RESET; else reg = BMCR_RESET | BMCR_ISO; PHY_WRITE(sc, MII_BMCR, reg); /* * It is best to allow a little time for the reset to settle * in before we start polling the BMCR again. Notably, the * DP8384{3,7} manuals state that there should be a 500us delay * between asserting software reset and attempting MII serial * operations. Be conservative. Also, a DP83815 can get into * a bad state on cable removal and reinsertion if we do not * delay here. */ DELAY(1000); /* * Wait another 2s for it to complete. * This is only a little overkill as under normal circumstances * the PHY can take up to 1s to complete reset. * This is also a bit odd because after a reset, the BMCR will * clear the reset bit and simply reports 0 even though the reset * is not yet complete. */ for (i = 0; i < 1000; i++) { reg = PHY_READ(sc, MII_BMCR); if (reg != 0 && (reg & BMCR_RESET) == 0) break; DELAY(2000); } if ((sc->mii_flags & MIIF_NOISOLATE) == 0) { if ((ife == NULL && sc->mii_inst != 0) || (ife != NULL && IFM_INST(ife->ifm_media) != sc->mii_inst)) PHY_WRITE(sc, MII_BMCR, reg | BMCR_ISO); } } Index: projects/binutils-2.17/sys/dev/mii/ukphy.c =================================================================== --- projects/binutils-2.17/sys/dev/mii/ukphy.c (revision 215829) +++ projects/binutils-2.17/sys/dev/mii/ukphy.c (revision 215830) @@ -1,184 +1,186 @@ /* $NetBSD: ukphy.c,v 1.2 1999/04/23 04:24:32 thorpej Exp $ */ /*- * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center, and by Frank van der Linden. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1997 Manuel Bouyer. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * driver for generic unknown PHYs */ #include #include #include #include #include #include #include #include #include #include #include #include "miibus_if.h" static int ukphy_probe(device_t); static int ukphy_attach(device_t); static device_method_t ukphy_methods[] = { /* device interface */ DEVMETHOD(device_probe, ukphy_probe), DEVMETHOD(device_attach, ukphy_attach), DEVMETHOD(device_detach, mii_phy_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), { 0, 0 } }; static devclass_t ukphy_devclass; static driver_t ukphy_driver = { "ukphy", ukphy_methods, sizeof(struct mii_softc) }; DRIVER_MODULE(ukphy, miibus, ukphy_driver, ukphy_devclass, 0, 0); static int ukphy_service(struct mii_softc *, struct mii_data *, int); static int ukphy_probe(device_t dev) { /* * We know something is here, so always match at a low priority. */ device_set_desc(dev, "Generic IEEE 802.3u media interface"); return (BUS_PROBE_GENERIC); } static int ukphy_attach(device_t dev) { struct mii_softc *sc; struct mii_attach_args *ma; struct mii_data *mii; sc = device_get_softc(dev); ma = device_get_ivars(dev); sc->mii_dev = device_get_parent(dev); mii = ma->mii_data; LIST_INSERT_HEAD(&mii->mii_phys, sc, mii_list); if (bootverbose) device_printf(dev, "OUI 0x%06x, model 0x%04x, rev. %d\n", MII_OUI(ma->mii_id1, ma->mii_id2), MII_MODEL(ma->mii_id2), MII_REV(ma->mii_id2)); sc->mii_flags = miibus_get_flags(dev); sc->mii_inst = mii->mii_instance++; sc->mii_phy = ma->mii_phyno; sc->mii_service = ukphy_service; sc->mii_pdata = mii; + sc->mii_flags |= MIIF_NOMANPAUSE; + mii_phy_reset(sc); sc->mii_capabilities = PHY_READ(sc, MII_BMSR) & ma->mii_capmask; if (sc->mii_capabilities & BMSR_EXTSTAT) sc->mii_extcapabilities = PHY_READ(sc, MII_EXTSR); device_printf(dev, " "); mii_phy_add_media(sc); printf("\n"); MIIBUS_MEDIAINIT(sc->mii_dev); mii_phy_setmedia(sc); return (0); } static int ukphy_service(struct mii_softc *sc, struct mii_data *mii, int cmd) { switch (cmd) { case MII_POLLSTAT: break; case MII_MEDIACHG: /* * If the interface is not up, don't do anything. */ if ((mii->mii_ifp->if_flags & IFF_UP) == 0) break; mii_phy_setmedia(sc); break; case MII_TICK: if (mii_phy_tick(sc) == EJUSTRETURN) return (0); break; } /* Update the media status. */ ukphy_status(sc); /* Callback if something changed. */ mii_phy_update(sc, cmd); return (0); } Index: projects/binutils-2.17/sys/dev/pci/pci.c =================================================================== --- projects/binutils-2.17/sys/dev/pci/pci.c (revision 215829) +++ projects/binutils-2.17/sys/dev/pci/pci.c (revision 215830) @@ -1,4163 +1,4190 @@ /*- * Copyright (c) 1997, Stefan Esser * Copyright (c) 2000, Michael Smith * Copyright (c) 2000, BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bus.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) #include #endif #include #include #include #include #include #include #include #include "pcib_if.h" #include "pci_if.h" static pci_addr_t pci_mapbase(uint64_t mapreg); static const char *pci_maptype(uint64_t mapreg); static int pci_mapsize(uint64_t testval); static int pci_maprange(uint64_t mapreg); static pci_addr_t pci_rombase(uint64_t mapreg); static int pci_romsize(uint64_t testval); static void pci_fixancient(pcicfgregs *cfg); static int pci_printf(pcicfgregs *cfg, const char *fmt, ...); static int pci_porten(device_t dev); static int pci_memen(device_t dev); static void pci_assign_interrupt(device_t bus, device_t dev, int force_route); static int pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl, int force, int prefetch); static int pci_probe(device_t dev); static int pci_attach(device_t dev); static void pci_load_vendor_data(void); static int pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc); static char *pci_describe_device(device_t dev); static int pci_modevent(module_t mod, int what, void *arg); static void pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg); static void pci_read_extcap(device_t pcib, pcicfgregs *cfg); static int pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data); #if 0 static int pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data); #endif static void pci_read_vpd(device_t pcib, pcicfgregs *cfg); static void pci_disable_msi(device_t dev); static void pci_enable_msi(device_t dev, uint64_t address, uint16_t data); static void pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data); static void pci_mask_msix(device_t dev, u_int index); static void pci_unmask_msix(device_t dev, u_int index); static int pci_msi_blacklisted(void); static void pci_resume_msi(device_t dev); static void pci_resume_msix(device_t dev); static int pci_remap_intr_method(device_t bus, device_t dev, u_int irq); static device_method_t pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pci_probe), DEVMETHOD(device_attach, pci_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, pci_suspend), DEVMETHOD(device_resume, pci_resume), /* Bus interface */ DEVMETHOD(bus_print_child, pci_print_child), DEVMETHOD(bus_probe_nomatch, pci_probe_nomatch), DEVMETHOD(bus_read_ivar, pci_read_ivar), DEVMETHOD(bus_write_ivar, pci_write_ivar), DEVMETHOD(bus_driver_added, pci_driver_added), DEVMETHOD(bus_setup_intr, pci_setup_intr), DEVMETHOD(bus_teardown_intr, pci_teardown_intr), DEVMETHOD(bus_get_resource_list,pci_get_resource_list), DEVMETHOD(bus_set_resource, bus_generic_rl_set_resource), DEVMETHOD(bus_get_resource, bus_generic_rl_get_resource), DEVMETHOD(bus_delete_resource, pci_delete_resource), DEVMETHOD(bus_alloc_resource, pci_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_rl_release_resource), DEVMETHOD(bus_activate_resource, pci_activate_resource), DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource), DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method), DEVMETHOD(bus_child_location_str, pci_child_location_str_method), DEVMETHOD(bus_remap_intr, pci_remap_intr_method), /* PCI interface */ DEVMETHOD(pci_read_config, pci_read_config_method), DEVMETHOD(pci_write_config, pci_write_config_method), DEVMETHOD(pci_enable_busmaster, pci_enable_busmaster_method), DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method), DEVMETHOD(pci_enable_io, pci_enable_io_method), DEVMETHOD(pci_disable_io, pci_disable_io_method), DEVMETHOD(pci_get_vpd_ident, pci_get_vpd_ident_method), DEVMETHOD(pci_get_vpd_readonly, pci_get_vpd_readonly_method), DEVMETHOD(pci_get_powerstate, pci_get_powerstate_method), DEVMETHOD(pci_set_powerstate, pci_set_powerstate_method), DEVMETHOD(pci_assign_interrupt, pci_assign_interrupt_method), DEVMETHOD(pci_find_extcap, pci_find_extcap_method), DEVMETHOD(pci_alloc_msi, pci_alloc_msi_method), DEVMETHOD(pci_alloc_msix, pci_alloc_msix_method), DEVMETHOD(pci_remap_msix, pci_remap_msix_method), DEVMETHOD(pci_release_msi, pci_release_msi_method), DEVMETHOD(pci_msi_count, pci_msi_count_method), DEVMETHOD(pci_msix_count, pci_msix_count_method), { 0, 0 } }; DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0); static devclass_t pci_devclass; DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0); MODULE_VERSION(pci, 1); static char *pci_vendordata; static size_t pci_vendordata_size; struct pci_quirk { uint32_t devid; /* Vendor/device of the card */ int type; #define PCI_QUIRK_MAP_REG 1 /* PCI map register in weird place */ #define PCI_QUIRK_DISABLE_MSI 2 /* MSI/MSI-X doesn't work */ #define PCI_QUIRK_ENABLE_MSI_VM 3 /* Older chipset in VM where MSI works */ int arg1; int arg2; }; struct pci_quirk pci_quirks[] = { /* The Intel 82371AB and 82443MX has a map register at offset 0x90. */ { 0x71138086, PCI_QUIRK_MAP_REG, 0x90, 0 }, { 0x719b8086, PCI_QUIRK_MAP_REG, 0x90, 0 }, /* As does the Serverworks OSB4 (the SMBus mapping register) */ { 0x02001166, PCI_QUIRK_MAP_REG, 0x90, 0 }, /* * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge * or the CMIC-SL (AKA ServerWorks GC_LE). */ { 0x00141166, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x00171166, PCI_QUIRK_DISABLE_MSI, 0, 0 }, /* * MSI doesn't work on earlier Intel chipsets including * E7500, E7501, E7505, 845, 865, 875/E7210, and 855. */ { 0x25408086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x254c8086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x25508086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x25608086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x25708086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x25788086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, { 0x35808086, PCI_QUIRK_DISABLE_MSI, 0, 0 }, /* * MSI doesn't work with devices behind the AMD 8131 HT-PCIX * bridge. */ { 0x74501022, PCI_QUIRK_DISABLE_MSI, 0, 0 }, /* * Some virtualization environments emulate an older chipset * but support MSI just fine. QEMU uses the Intel 82440. */ { 0x12378086, PCI_QUIRK_ENABLE_MSI_VM, 0, 0 }, { 0 } }; /* map register information */ #define PCI_MAPMEM 0x01 /* memory map */ #define PCI_MAPMEMP 0x02 /* prefetchable memory map */ #define PCI_MAPPORT 0x04 /* port map */ struct devlist pci_devq; uint32_t pci_generation; uint32_t pci_numdevs = 0; static int pcie_chipset, pcix_chipset; /* sysctl vars */ SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters"); static int pci_enable_io_modes = 1; TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes); SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW, &pci_enable_io_modes, 1, "Enable I/O and memory bits in the config register. Some BIOSes do not\n\ enable these bits correctly. We'd like to do this all the time, but there\n\ are some peripherals that this causes problems with."); static int pci_do_power_nodriver = 0; TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver); SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW, &pci_do_power_nodriver, 0, "Place a function into D3 state when no driver attaches to it. 0 means\n\ disable. 1 means conservatively place devices into D3 state. 2 means\n\ agressively place devices into D3 state. 3 means put absolutely everything\n\ in D3 state."); int pci_do_power_resume = 1; TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume); SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW, &pci_do_power_resume, 1, "Transition from D3 -> D0 on resume."); int pci_do_power_suspend = 1; TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend); SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW, &pci_do_power_suspend, 1, "Transition from D0 -> D3 on suspend."); static int pci_do_msi = 1; TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi); SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1, "Enable support for MSI interrupts"); static int pci_do_msix = 1; TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix); SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1, "Enable support for MSI-X interrupts"); static int pci_honor_msi_blacklist = 1; TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist); SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD, &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI"); #if defined(__i386__) || defined(__amd64__) static int pci_usb_takeover = 1; #else static int pci_usb_takeover = 0; #endif TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover); SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN, &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\ Disable this if you depend on BIOS emulation of USB devices, that is\n\ you use USB devices (like keyboard or mouse) but do not load USB drivers"); /* Find a device_t by bus/slot/function in domain 0 */ device_t pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func) { return (pci_find_dbsf(0, bus, slot, func)); } /* Find a device_t by domain/bus/slot/function */ device_t pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func) { struct pci_devinfo *dinfo; STAILQ_FOREACH(dinfo, &pci_devq, pci_links) { if ((dinfo->cfg.domain == domain) && (dinfo->cfg.bus == bus) && (dinfo->cfg.slot == slot) && (dinfo->cfg.func == func)) { return (dinfo->cfg.dev); } } return (NULL); } /* Find a device_t by vendor/device ID */ device_t pci_find_device(uint16_t vendor, uint16_t device) { struct pci_devinfo *dinfo; STAILQ_FOREACH(dinfo, &pci_devq, pci_links) { if ((dinfo->cfg.vendor == vendor) && (dinfo->cfg.device == device)) { return (dinfo->cfg.dev); } } return (NULL); } static int pci_printf(pcicfgregs *cfg, const char *fmt, ...) { va_list ap; int retval; retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot, cfg->func); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } /* return base address of memory or port map */ static pci_addr_t pci_mapbase(uint64_t mapreg) { if (PCI_BAR_MEM(mapreg)) return (mapreg & PCIM_BAR_MEM_BASE); else return (mapreg & PCIM_BAR_IO_BASE); } /* return map type of memory or port map */ static const char * pci_maptype(uint64_t mapreg) { if (PCI_BAR_IO(mapreg)) return ("I/O Port"); if (mapreg & PCIM_BAR_MEM_PREFETCH) return ("Prefetchable Memory"); return ("Memory"); } /* return log2 of map size decoded for memory or port map */ static int pci_mapsize(uint64_t testval) { int ln2size; testval = pci_mapbase(testval); ln2size = 0; if (testval != 0) { while ((testval & 1) == 0) { ln2size++; testval >>= 1; } } return (ln2size); } /* return base address of device ROM */ static pci_addr_t pci_rombase(uint64_t mapreg) { return (mapreg & PCIM_BIOS_ADDR_MASK); } /* return log2 of map size decided for device ROM */ static int pci_romsize(uint64_t testval) { int ln2size; testval = pci_rombase(testval); ln2size = 0; if (testval != 0) { while ((testval & 1) == 0) { ln2size++; testval >>= 1; } } return (ln2size); } /* return log2 of address range supported by map register */ static int pci_maprange(uint64_t mapreg) { int ln2range = 0; if (PCI_BAR_IO(mapreg)) ln2range = 32; else switch (mapreg & PCIM_BAR_MEM_TYPE) { case PCIM_BAR_MEM_32: ln2range = 32; break; case PCIM_BAR_MEM_1MB: ln2range = 20; break; case PCIM_BAR_MEM_64: ln2range = 64; break; } return (ln2range); } /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */ static void pci_fixancient(pcicfgregs *cfg) { if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL) return; /* PCI to PCI bridges use header type 1 */ if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI) cfg->hdrtype = PCIM_HDRTYPE_BRIDGE; } /* extract header type specific config data */ static void pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg) { #define REG(n, w) PCIB_READ_CONFIG(pcib, b, s, f, n, w) switch (cfg->hdrtype & PCIM_HDRTYPE) { case PCIM_HDRTYPE_NORMAL: cfg->subvendor = REG(PCIR_SUBVEND_0, 2); cfg->subdevice = REG(PCIR_SUBDEV_0, 2); cfg->nummaps = PCI_MAXMAPS_0; break; case PCIM_HDRTYPE_BRIDGE: cfg->nummaps = PCI_MAXMAPS_1; break; case PCIM_HDRTYPE_CARDBUS: cfg->subvendor = REG(PCIR_SUBVEND_2, 2); cfg->subdevice = REG(PCIR_SUBDEV_2, 2); cfg->nummaps = PCI_MAXMAPS_2; break; } #undef REG } /* read configuration header into pcicfgregs structure */ struct pci_devinfo * pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size) { #define REG(n, w) PCIB_READ_CONFIG(pcib, b, s, f, n, w) pcicfgregs *cfg = NULL; struct pci_devinfo *devlist_entry; struct devlist *devlist_head; devlist_head = &pci_devq; devlist_entry = NULL; if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) { devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); if (devlist_entry == NULL) return (NULL); cfg = &devlist_entry->cfg; cfg->domain = d; cfg->bus = b; cfg->slot = s; cfg->func = f; cfg->vendor = REG(PCIR_VENDOR, 2); cfg->device = REG(PCIR_DEVICE, 2); cfg->cmdreg = REG(PCIR_COMMAND, 2); cfg->statreg = REG(PCIR_STATUS, 2); cfg->baseclass = REG(PCIR_CLASS, 1); cfg->subclass = REG(PCIR_SUBCLASS, 1); cfg->progif = REG(PCIR_PROGIF, 1); cfg->revid = REG(PCIR_REVID, 1); cfg->hdrtype = REG(PCIR_HDRTYPE, 1); cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1); cfg->lattimer = REG(PCIR_LATTIMER, 1); cfg->intpin = REG(PCIR_INTPIN, 1); cfg->intline = REG(PCIR_INTLINE, 1); cfg->mingnt = REG(PCIR_MINGNT, 1); cfg->maxlat = REG(PCIR_MAXLAT, 1); cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0; cfg->hdrtype &= ~PCIM_MFDEV; pci_fixancient(cfg); pci_hdrtypedata(pcib, b, s, f, cfg); if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) pci_read_extcap(pcib, cfg); STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links); devlist_entry->conf.pc_sel.pc_domain = cfg->domain; devlist_entry->conf.pc_sel.pc_bus = cfg->bus; devlist_entry->conf.pc_sel.pc_dev = cfg->slot; devlist_entry->conf.pc_sel.pc_func = cfg->func; devlist_entry->conf.pc_hdr = cfg->hdrtype; devlist_entry->conf.pc_subvendor = cfg->subvendor; devlist_entry->conf.pc_subdevice = cfg->subdevice; devlist_entry->conf.pc_vendor = cfg->vendor; devlist_entry->conf.pc_device = cfg->device; devlist_entry->conf.pc_class = cfg->baseclass; devlist_entry->conf.pc_subclass = cfg->subclass; devlist_entry->conf.pc_progif = cfg->progif; devlist_entry->conf.pc_revid = cfg->revid; pci_numdevs++; pci_generation++; } return (devlist_entry); #undef REG } static void pci_read_extcap(device_t pcib, pcicfgregs *cfg) { #define REG(n, w) PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w) #define WREG(n, v, w) PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w) #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) uint64_t addr; #endif uint32_t val; int ptr, nextptr, ptrptr; switch (cfg->hdrtype & PCIM_HDRTYPE) { case PCIM_HDRTYPE_NORMAL: case PCIM_HDRTYPE_BRIDGE: ptrptr = PCIR_CAP_PTR; break; case PCIM_HDRTYPE_CARDBUS: ptrptr = PCIR_CAP_PTR_2; /* cardbus capabilities ptr */ break; default: return; /* no extended capabilities support */ } nextptr = REG(ptrptr, 1); /* sanity check? */ /* * Read capability entries. */ while (nextptr != 0) { /* Sanity check */ if (nextptr > 255) { printf("illegal PCI extended capability offset %d\n", nextptr); return; } /* Find the next entry */ ptr = nextptr; nextptr = REG(ptr + PCICAP_NEXTPTR, 1); /* Process this entry */ switch (REG(ptr + PCICAP_ID, 1)) { case PCIY_PMG: /* PCI power management */ if (cfg->pp.pp_cap == 0) { cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2); cfg->pp.pp_status = ptr + PCIR_POWER_STATUS; cfg->pp.pp_bse = ptr + PCIR_POWER_BSE; if ((nextptr - ptr) > PCIR_POWER_DATA) cfg->pp.pp_data = ptr + PCIR_POWER_DATA; } break; #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__) case PCIY_HT: /* HyperTransport */ /* Determine HT-specific capability type. */ val = REG(ptr + PCIR_HT_COMMAND, 2); switch (val & PCIM_HTCMD_CAP_MASK) { case PCIM_HTCAP_MSI_MAPPING: if (!(val & PCIM_HTCMD_MSI_FIXED)) { /* Sanity check the mapping window. */ addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4); addr <<= 32; addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4); if (addr != MSI_INTEL_ADDR_BASE) device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n", cfg->domain, cfg->bus, cfg->slot, cfg->func, (long long)addr); } else addr = MSI_INTEL_ADDR_BASE; cfg->ht.ht_msimap = ptr; cfg->ht.ht_msictrl = val; cfg->ht.ht_msiaddr = addr; break; } break; #endif case PCIY_MSI: /* PCI MSI */ cfg->msi.msi_location = ptr; cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2); cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl & PCIM_MSICTRL_MMC_MASK)>>1); break; case PCIY_MSIX: /* PCI MSI-X */ cfg->msix.msix_location = ptr; cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2); cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1; val = REG(ptr + PCIR_MSIX_TABLE, 4); cfg->msix.msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK; val = REG(ptr + PCIR_MSIX_PBA, 4); cfg->msix.msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK; break; case PCIY_VPD: /* PCI Vital Product Data */ cfg->vpd.vpd_reg = ptr; break; case PCIY_SUBVENDOR: /* Should always be true. */ if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE) { val = REG(ptr + PCIR_SUBVENDCAP_ID, 4); cfg->subvendor = val & 0xffff; cfg->subdevice = val >> 16; } break; case PCIY_PCIX: /* PCI-X */ /* * Assume we have a PCI-X chipset if we have * at least one PCI-PCI bridge with a PCI-X * capability. Note that some systems with * PCI-express or HT chipsets might match on * this check as well. */ if ((cfg->hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_BRIDGE) pcix_chipset = 1; break; case PCIY_EXPRESS: /* PCI-express */ /* * Assume we have a PCI-express chipset if we have * at least one PCI-express device. */ pcie_chipset = 1; break; default: break; } } /* REG and WREG use carry through to next functions */ } /* * PCI Vital Product Data */ #define PCI_VPD_TIMEOUT 1000000 static int pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data) { int count = PCI_VPD_TIMEOUT; KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned")); WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2); while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) { if (--count < 0) return (ENXIO); DELAY(1); /* limit looping */ } *data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4)); return (0); } #if 0 static int pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data) { int count = PCI_VPD_TIMEOUT; KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned")); WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4); WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2); while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) { if (--count < 0) return (ENXIO); DELAY(1); /* limit looping */ } return (0); } #endif #undef PCI_VPD_TIMEOUT struct vpd_readstate { device_t pcib; pcicfgregs *cfg; uint32_t val; int bytesinval; int off; uint8_t cksum; }; static int vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data) { uint32_t reg; uint8_t byte; if (vrs->bytesinval == 0) { if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, ®)) return (ENXIO); vrs->val = le32toh(reg); vrs->off += 4; byte = vrs->val & 0xff; vrs->bytesinval = 3; } else { vrs->val = vrs->val >> 8; byte = vrs->val & 0xff; vrs->bytesinval--; } vrs->cksum += byte; *data = byte; return (0); } static void pci_read_vpd(device_t pcib, pcicfgregs *cfg) { struct vpd_readstate vrs; int state; int name; int remain; int i; int alloc, off; /* alloc/off for RO/W arrays */ int cksumvalid; int dflen; uint8_t byte; uint8_t byte2; /* init vpd reader */ vrs.bytesinval = 0; vrs.off = 0; vrs.pcib = pcib; vrs.cfg = cfg; vrs.cksum = 0; state = 0; name = remain = i = 0; /* shut up stupid gcc */ alloc = off = 0; /* shut up stupid gcc */ dflen = 0; /* shut up stupid gcc */ cksumvalid = -1; while (state >= 0) { if (vpd_nextbyte(&vrs, &byte)) { state = -2; break; } #if 0 printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \ "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val, vrs.off, vrs.bytesinval, byte, state, remain, name, i); #endif switch (state) { case 0: /* item name */ if (byte & 0x80) { if (vpd_nextbyte(&vrs, &byte2)) { state = -2; break; } remain = byte2; if (vpd_nextbyte(&vrs, &byte2)) { state = -2; break; } remain |= byte2 << 8; if (remain > (0x7f*4 - vrs.off)) { state = -1; printf( "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n", cfg->domain, cfg->bus, cfg->slot, cfg->func, remain); } name = byte & 0x7f; } else { remain = byte & 0x7; name = (byte >> 3) & 0xf; } switch (name) { case 0x2: /* String */ cfg->vpd.vpd_ident = malloc(remain + 1, M_DEVBUF, M_WAITOK); i = 0; state = 1; break; case 0xf: /* End */ state = -1; break; case 0x10: /* VPD-R */ alloc = 8; off = 0; cfg->vpd.vpd_ros = malloc(alloc * sizeof(*cfg->vpd.vpd_ros), M_DEVBUF, M_WAITOK | M_ZERO); state = 2; break; case 0x11: /* VPD-W */ alloc = 8; off = 0; cfg->vpd.vpd_w = malloc(alloc * sizeof(*cfg->vpd.vpd_w), M_DEVBUF, M_WAITOK | M_ZERO); state = 5; break; default: /* Invalid data, abort */ state = -1; break; } break; case 1: /* Identifier String */ cfg->vpd.vpd_ident[i++] = byte; remain--; if (remain == 0) { cfg->vpd.vpd_ident[i] = '\0'; state = 0; } break; case 2: /* VPD-R Keyword Header */ if (off == alloc) { cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros, (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros), M_DEVBUF, M_WAITOK | M_ZERO); } cfg->vpd.vpd_ros[off].keyword[0] = byte; if (vpd_nextbyte(&vrs, &byte2)) { state = -2; break; } cfg->vpd.vpd_ros[off].keyword[1] = byte2; if (vpd_nextbyte(&vrs, &byte2)) { state = -2; break; } dflen = byte2; if (dflen == 0 && strncmp(cfg->vpd.vpd_ros[off].keyword, "RV", 2) == 0) { /* * if this happens, we can't trust the rest * of the VPD. */ printf( "pci%d:%d:%d:%d: bad keyword length: %d\n", cfg->domain, cfg->bus, cfg->slot, cfg->func, dflen); cksumvalid = 0; state = -1; break; } else if (dflen == 0) { cfg->vpd.vpd_ros[off].value = malloc(1 * sizeof(*cfg->vpd.vpd_ros[off].value), M_DEVBUF, M_WAITOK); cfg->vpd.vpd_ros[off].value[0] = '\x00'; } else cfg->vpd.vpd_ros[off].value = malloc( (dflen + 1) * sizeof(*cfg->vpd.vpd_ros[off].value), M_DEVBUF, M_WAITOK); remain -= 3; i = 0; /* keep in sync w/ state 3's transistions */ if (dflen == 0 && remain == 0) state = 0; else if (dflen == 0) state = 2; else state = 3; break; case 3: /* VPD-R Keyword Value */ cfg->vpd.vpd_ros[off].value[i++] = byte; if (strncmp(cfg->vpd.vpd_ros[off].keyword, "RV", 2) == 0 && cksumvalid == -1) { if (vrs.cksum == 0) cksumvalid = 1; else { if (bootverbose) printf( "pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n", cfg->domain, cfg->bus, cfg->slot, cfg->func, vrs.cksum); cksumvalid = 0; state = -1; break; } } dflen--; remain--; /* keep in sync w/ state 2's transistions */ if (dflen == 0) cfg->vpd.vpd_ros[off++].value[i++] = '\0'; if (dflen == 0 && remain == 0) { cfg->vpd.vpd_rocnt = off; cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros, off * sizeof(*cfg->vpd.vpd_ros), M_DEVBUF, M_WAITOK | M_ZERO); state = 0; } else if (dflen == 0) state = 2; break; case 4: remain--; if (remain == 0) state = 0; break; case 5: /* VPD-W Keyword Header */ if (off == alloc) { cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w, (alloc *= 2) * sizeof(*cfg->vpd.vpd_w), M_DEVBUF, M_WAITOK | M_ZERO); } cfg->vpd.vpd_w[off].keyword[0] = byte; if (vpd_nextbyte(&vrs, &byte2)) { state = -2; break; } cfg->vpd.vpd_w[off].keyword[1] = byte2; if (vpd_nextbyte(&vrs, &byte2)) { state = -2; break; } cfg->vpd.vpd_w[off].len = dflen = byte2; cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval; cfg->vpd.vpd_w[off].value = malloc((dflen + 1) * sizeof(*cfg->vpd.vpd_w[off].value), M_DEVBUF, M_WAITOK); remain -= 3; i = 0; /* keep in sync w/ state 6's transistions */ if (dflen == 0 && remain == 0) state = 0; else if (dflen == 0) state = 5; else state = 6; break; case 6: /* VPD-W Keyword Value */ cfg->vpd.vpd_w[off].value[i++] = byte; dflen--; remain--; /* keep in sync w/ state 5's transistions */ if (dflen == 0) cfg->vpd.vpd_w[off++].value[i++] = '\0'; if (dflen == 0 && remain == 0) { cfg->vpd.vpd_wcnt = off; cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w, off * sizeof(*cfg->vpd.vpd_w), M_DEVBUF, M_WAITOK | M_ZERO); state = 0; } else if (dflen == 0) state = 5; break; default: printf("pci%d:%d:%d:%d: invalid state: %d\n", cfg->domain, cfg->bus, cfg->slot, cfg->func, state); state = -1; break; } } if (cksumvalid == 0 || state < -1) { /* read-only data bad, clean up */ if (cfg->vpd.vpd_ros != NULL) { for (off = 0; cfg->vpd.vpd_ros[off].value; off++) free(cfg->vpd.vpd_ros[off].value, M_DEVBUF); free(cfg->vpd.vpd_ros, M_DEVBUF); cfg->vpd.vpd_ros = NULL; } } if (state < -1) { /* I/O error, clean up */ printf("pci%d:%d:%d:%d: failed to read VPD data.\n", cfg->domain, cfg->bus, cfg->slot, cfg->func); if (cfg->vpd.vpd_ident != NULL) { free(cfg->vpd.vpd_ident, M_DEVBUF); cfg->vpd.vpd_ident = NULL; } if (cfg->vpd.vpd_w != NULL) { for (off = 0; cfg->vpd.vpd_w[off].value; off++) free(cfg->vpd.vpd_w[off].value, M_DEVBUF); free(cfg->vpd.vpd_w, M_DEVBUF); cfg->vpd.vpd_w = NULL; } } cfg->vpd.vpd_cached = 1; #undef REG #undef WREG } int pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0) pci_read_vpd(device_get_parent(dev), cfg); *identptr = cfg->vpd.vpd_ident; if (*identptr == NULL) return (ENXIO); return (0); } int pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw, const char **vptr) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; int i; if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0) pci_read_vpd(device_get_parent(dev), cfg); for (i = 0; i < cfg->vpd.vpd_rocnt; i++) if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword, sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) { *vptr = cfg->vpd.vpd_ros[i].value; } if (i != cfg->vpd.vpd_rocnt) return (0); *vptr = NULL; return (ENXIO); } /* * Find the requested extended capability and return the offset in * configuration space via the pointer provided. The function returns * 0 on success and error code otherwise. */ int pci_find_extcap_method(device_t dev, device_t child, int capability, int *capreg) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; u_int32_t status; u_int8_t ptr; /* * Check the CAP_LIST bit of the PCI status register first. */ status = pci_read_config(child, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (ENXIO); /* * Determine the start pointer of the capabilities list. */ switch (cfg->hdrtype & PCIM_HDRTYPE) { case PCIM_HDRTYPE_NORMAL: case PCIM_HDRTYPE_BRIDGE: ptr = PCIR_CAP_PTR; break; case PCIM_HDRTYPE_CARDBUS: ptr = PCIR_CAP_PTR_2; break; default: /* XXX: panic? */ return (ENXIO); /* no extended capabilities support */ } ptr = pci_read_config(child, ptr, 1); /* * Traverse the capabilities list. */ while (ptr != 0) { if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) { if (capreg != NULL) *capreg = ptr; return (0); } ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1); } return (ENOENT); } /* * Support for MSI-X message interrupts. */ void pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset; KASSERT(msix->msix_table_len > index, ("bogus index")); offset = msix->msix_table_offset + index * 16; bus_write_4(msix->msix_table_res, offset, address & 0xffffffff); bus_write_4(msix->msix_table_res, offset + 4, address >> 32); bus_write_4(msix->msix_table_res, offset + 8, data); /* Enable MSI -> HT mapping. */ pci_ht_map_msi(dev, address); } void pci_mask_msix(device_t dev, u_int index) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset, val; KASSERT(msix->msix_msgnum > index, ("bogus index")); offset = msix->msix_table_offset + index * 16 + 12; val = bus_read_4(msix->msix_table_res, offset); if (!(val & PCIM_MSIX_VCTRL_MASK)) { val |= PCIM_MSIX_VCTRL_MASK; bus_write_4(msix->msix_table_res, offset, val); } } void pci_unmask_msix(device_t dev, u_int index) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset, val; KASSERT(msix->msix_table_len > index, ("bogus index")); offset = msix->msix_table_offset + index * 16 + 12; val = bus_read_4(msix->msix_table_res, offset); if (val & PCIM_MSIX_VCTRL_MASK) { val &= ~PCIM_MSIX_VCTRL_MASK; bus_write_4(msix->msix_table_res, offset, val); } } int pci_pending_msix(device_t dev, u_int index) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset, bit; KASSERT(msix->msix_table_len > index, ("bogus index")); offset = msix->msix_pba_offset + (index / 32) * 4; bit = 1 << index % 32; return (bus_read_4(msix->msix_pba_res, offset) & bit); } /* * Restore MSI-X registers and table during resume. If MSI-X is * enabled then walk the virtual table to restore the actual MSI-X * table. */ static void pci_resume_msix(device_t dev) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msix *msix = &dinfo->cfg.msix; struct msix_table_entry *mte; struct msix_vector *mv; int i; if (msix->msix_alloc > 0) { /* First, mask all vectors. */ for (i = 0; i < msix->msix_msgnum; i++) pci_mask_msix(dev, i); /* Second, program any messages with at least one handler. */ for (i = 0; i < msix->msix_table_len; i++) { mte = &msix->msix_table[i]; if (mte->mte_vector == 0 || mte->mte_handlers == 0) continue; mv = &msix->msix_vectors[mte->mte_vector - 1]; pci_enable_msix(dev, i, mv->mv_address, mv->mv_data); pci_unmask_msix(dev, i); } } pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL, msix->msix_ctrl, 2); } /* * Attempt to allocate *count MSI-X messages. The actual number allocated is * returned in *count. After this function returns, each message will be * available to the driver as SYS_RES_IRQ resources starting at rid 1. */ int pci_alloc_msix_method(device_t dev, device_t child, int *count) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; struct resource_list_entry *rle; int actual, error, i, irq, max; /* Don't let count == 0 get us into trouble. */ if (*count == 0) return (EINVAL); /* If rid 0 is allocated, then fail. */ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0); if (rle != NULL && rle->res != NULL) return (ENXIO); /* Already have allocated messages? */ if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0) return (ENXIO); /* If MSI is blacklisted for this system, fail. */ if (pci_msi_blacklisted()) return (ENXIO); /* MSI-X capability present? */ if (cfg->msix.msix_location == 0 || !pci_do_msix) return (ENODEV); /* Make sure the appropriate BARs are mapped. */ rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, cfg->msix.msix_table_bar); if (rle == NULL || rle->res == NULL || !(rman_get_flags(rle->res) & RF_ACTIVE)) return (ENXIO); cfg->msix.msix_table_res = rle->res; if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) { rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, cfg->msix.msix_pba_bar); if (rle == NULL || rle->res == NULL || !(rman_get_flags(rle->res) & RF_ACTIVE)) return (ENXIO); } cfg->msix.msix_pba_res = rle->res; if (bootverbose) device_printf(child, "attempting to allocate %d MSI-X vectors (%d supported)\n", *count, cfg->msix.msix_msgnum); max = min(*count, cfg->msix.msix_msgnum); for (i = 0; i < max; i++) { /* Allocate a message. */ error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq); if (error) break; resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq, irq, 1); } actual = i; if (bootverbose) { rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1); if (actual == 1) device_printf(child, "using IRQ %lu for MSI-X\n", rle->start); else { int run; /* * Be fancy and try to print contiguous runs of * IRQ values as ranges. 'irq' is the previous IRQ. * 'run' is true if we are in a range. */ device_printf(child, "using IRQs %lu", rle->start); irq = rle->start; run = 0; for (i = 1; i < actual; i++) { rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); /* Still in a run? */ if (rle->start == irq + 1) { run = 1; irq++; continue; } /* Finish previous range. */ if (run) { printf("-%d", irq); run = 0; } /* Start new range. */ printf(",%lu", rle->start); irq = rle->start; } /* Unfinished range? */ if (run) printf("-%d", irq); printf(" for MSI-X\n"); } } /* Mask all vectors. */ for (i = 0; i < cfg->msix.msix_msgnum; i++) pci_mask_msix(child, i); /* Allocate and initialize vector data and virtual table. */ cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual, M_DEVBUF, M_WAITOK | M_ZERO); cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < actual; i++) { rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); cfg->msix.msix_vectors[i].mv_irq = rle->start; cfg->msix.msix_table[i].mte_vector = i + 1; } /* Update control register to enable MSI-X. */ cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL, cfg->msix.msix_ctrl, 2); /* Update counts of alloc'd messages. */ cfg->msix.msix_alloc = actual; cfg->msix.msix_table_len = actual; *count = actual; return (0); } /* * By default, pci_alloc_msix() will assign the allocated IRQ * resources consecutively to the first N messages in the MSI-X table. * However, device drivers may want to use different layouts if they * either receive fewer messages than they asked for, or they wish to * populate the MSI-X table sparsely. This method allows the driver * to specify what layout it wants. It must be called after a * successful pci_alloc_msix() but before any of the associated * SYS_RES_IRQ resources are allocated via bus_alloc_resource(). * * The 'vectors' array contains 'count' message vectors. The array * maps directly to the MSI-X table in that index 0 in the array * specifies the vector for the first message in the MSI-X table, etc. * The vector value in each array index can either be 0 to indicate * that no vector should be assigned to a message slot, or it can be a * number from 1 to N (where N is the count returned from a * succcessful call to pci_alloc_msix()) to indicate which message * vector (IRQ) to be used for the corresponding message. * * On successful return, each message with a non-zero vector will have * an associated SYS_RES_IRQ whose rid is equal to the array index + * 1. Additionally, if any of the IRQs allocated via the previous * call to pci_alloc_msix() are not used in the mapping, those IRQs * will be freed back to the system automatically. * * For example, suppose a driver has a MSI-X table with 6 messages and * asks for 6 messages, but pci_alloc_msix() only returns a count of * 3. Call the three vectors allocated by pci_alloc_msix() A, B, and * C. After the call to pci_alloc_msix(), the device will be setup to * have an MSI-X table of ABC--- (where - means no vector assigned). * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 }, * then the MSI-X table will look like A-AB-B, and the 'C' vector will * be freed back to the system. This device will also have valid * SYS_RES_IRQ rids of 1, 3, 4, and 6. * * In any case, the SYS_RES_IRQ rid X will always map to the message * at MSI-X table index X - 1 and will only be valid if a vector is * assigned to that table entry. */ int pci_remap_msix_method(device_t dev, device_t child, int count, const u_int *vectors) { struct pci_devinfo *dinfo = device_get_ivars(child); struct pcicfg_msix *msix = &dinfo->cfg.msix; struct resource_list_entry *rle; int i, irq, j, *used; /* * Have to have at least one message in the table but the * table can't be bigger than the actual MSI-X table in the * device. */ if (count == 0 || count > msix->msix_msgnum) return (EINVAL); /* Sanity check the vectors. */ for (i = 0; i < count; i++) if (vectors[i] > msix->msix_alloc) return (EINVAL); /* * Make sure there aren't any holes in the vectors to be used. * It's a big pain to support it, and it doesn't really make * sense anyway. Also, at least one vector must be used. */ used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < count; i++) if (vectors[i] != 0) used[vectors[i] - 1] = 1; for (i = 0; i < msix->msix_alloc - 1; i++) if (used[i] == 0 && used[i + 1] == 1) { free(used, M_DEVBUF); return (EINVAL); } if (used[0] != 1) { free(used, M_DEVBUF); return (EINVAL); } /* Make sure none of the resources are allocated. */ for (i = 0; i < msix->msix_table_len; i++) { if (msix->msix_table[i].mte_vector == 0) continue; if (msix->msix_table[i].mte_handlers > 0) return (EBUSY); rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); KASSERT(rle != NULL, ("missing resource")); if (rle->res != NULL) return (EBUSY); } /* Free the existing resource list entries. */ for (i = 0; i < msix->msix_table_len; i++) { if (msix->msix_table[i].mte_vector == 0) continue; resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); } /* * Build the new virtual table keeping track of which vectors are * used. */ free(msix->msix_table, M_DEVBUF); msix->msix_table = malloc(sizeof(struct msix_table_entry) * count, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < count; i++) msix->msix_table[i].mte_vector = vectors[i]; msix->msix_table_len = count; /* Free any unused IRQs and resize the vectors array if necessary. */ j = msix->msix_alloc - 1; if (used[j] == 0) { struct msix_vector *vec; while (used[j] == 0) { PCIB_RELEASE_MSIX(device_get_parent(dev), child, msix->msix_vectors[j].mv_irq); j--; } vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF, M_WAITOK); bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) * (j + 1)); free(msix->msix_vectors, M_DEVBUF); msix->msix_vectors = vec; msix->msix_alloc = j + 1; } free(used, M_DEVBUF); /* Map the IRQs onto the rids. */ for (i = 0; i < count; i++) { if (vectors[i] == 0) continue; irq = msix->msix_vectors[vectors[i]].mv_irq; resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq, irq, 1); } if (bootverbose) { device_printf(child, "Remapped MSI-X IRQs as: "); for (i = 0; i < count; i++) { if (i != 0) printf(", "); if (vectors[i] == 0) printf("---"); else printf("%d", msix->msix_vectors[vectors[i]].mv_irq); } printf("\n"); } return (0); } static int pci_release_msix(device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); struct pcicfg_msix *msix = &dinfo->cfg.msix; struct resource_list_entry *rle; int i; /* Do we have any messages to release? */ if (msix->msix_alloc == 0) return (ENODEV); /* Make sure none of the resources are allocated. */ for (i = 0; i < msix->msix_table_len; i++) { if (msix->msix_table[i].mte_vector == 0) continue; if (msix->msix_table[i].mte_handlers > 0) return (EBUSY); rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); KASSERT(rle != NULL, ("missing resource")); if (rle->res != NULL) return (EBUSY); } /* Update control register to disable MSI-X. */ msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL, msix->msix_ctrl, 2); /* Free the resource list entries. */ for (i = 0; i < msix->msix_table_len; i++) { if (msix->msix_table[i].mte_vector == 0) continue; resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); } free(msix->msix_table, M_DEVBUF); msix->msix_table_len = 0; /* Release the IRQs. */ for (i = 0; i < msix->msix_alloc; i++) PCIB_RELEASE_MSIX(device_get_parent(dev), child, msix->msix_vectors[i].mv_irq); free(msix->msix_vectors, M_DEVBUF); msix->msix_alloc = 0; return (0); } /* * Return the max supported MSI-X messages this device supports. * Basically, assuming the MD code can alloc messages, this function * should return the maximum value that pci_alloc_msix() can return. * Thus, it is subject to the tunables, etc. */ int pci_msix_count_method(device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); struct pcicfg_msix *msix = &dinfo->cfg.msix; if (pci_do_msix && msix->msix_location != 0) return (msix->msix_msgnum); return (0); } /* * HyperTransport MSI mapping control */ void pci_ht_map_msi(device_t dev, uint64_t addr) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_ht *ht = &dinfo->cfg.ht; if (!ht->ht_msimap) return; if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) && ht->ht_msiaddr >> 20 == addr >> 20) { /* Enable MSI -> HT mapping. */ ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE; pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND, ht->ht_msictrl, 2); } if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) { /* Disable MSI -> HT mapping. */ ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE; pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND, ht->ht_msictrl, 2); } } int pci_get_max_read_req(device_t dev) { int cap; uint16_t val; if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0) return (0); val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2); val &= PCIM_EXP_CTL_MAX_READ_REQUEST; val >>= 12; return (1 << (val + 7)); } int pci_set_max_read_req(device_t dev, int size) { int cap; uint16_t val; if (pci_find_extcap(dev, PCIY_EXPRESS, &cap) != 0) return (0); if (size < 128) size = 128; if (size > 4096) size = 4096; size = (1 << (fls(size) - 1)); val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2); val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST; val |= (fls(size) - 8) << 12; pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2); return (size); } /* * Support for MSI message signalled interrupts. */ void pci_enable_msi(device_t dev, uint64_t address, uint16_t data) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msi *msi = &dinfo->cfg.msi; /* Write data and address values. */ pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR, address & 0xffffffff, 4); if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) { pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH, address >> 32, 4); pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT, data, 2); } else pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data, 2); /* Enable MSI in the control register. */ msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE; pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl, 2); /* Enable MSI -> HT mapping. */ pci_ht_map_msi(dev, address); } void pci_disable_msi(device_t dev) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msi *msi = &dinfo->cfg.msi; /* Disable MSI -> HT mapping. */ pci_ht_map_msi(dev, 0); /* Disable MSI in the control register. */ msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE; pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl, 2); } /* * Restore MSI registers during resume. If MSI is enabled then * restore the data and address registers in addition to the control * register. */ static void pci_resume_msi(device_t dev) { struct pci_devinfo *dinfo = device_get_ivars(dev); struct pcicfg_msi *msi = &dinfo->cfg.msi; uint64_t address; uint16_t data; if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) { address = msi->msi_addr; data = msi->msi_data; pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR, address & 0xffffffff, 4); if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) { pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH, address >> 32, 4); pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT, data, 2); } else pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data, 2); } pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl, 2); } static int pci_remap_intr_method(device_t bus, device_t dev, u_int irq) { struct pci_devinfo *dinfo = device_get_ivars(dev); pcicfgregs *cfg = &dinfo->cfg; struct resource_list_entry *rle; struct msix_table_entry *mte; struct msix_vector *mv; uint64_t addr; uint32_t data; int error, i, j; /* * Handle MSI first. We try to find this IRQ among our list * of MSI IRQs. If we find it, we request updated address and * data registers and apply the results. */ if (cfg->msi.msi_alloc > 0) { /* If we don't have any active handlers, nothing to do. */ if (cfg->msi.msi_handlers == 0) return (0); for (i = 0; i < cfg->msi.msi_alloc; i++) { rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); if (rle->start == irq) { error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, &addr, &data); if (error) return (error); pci_disable_msi(dev); dinfo->cfg.msi.msi_addr = addr; dinfo->cfg.msi.msi_data = data; pci_enable_msi(dev, addr, data); return (0); } } return (ENOENT); } /* * For MSI-X, we check to see if we have this IRQ. If we do, * we request the updated mapping info. If that works, we go * through all the slots that use this IRQ and update them. */ if (cfg->msix.msix_alloc > 0) { for (i = 0; i < cfg->msix.msix_alloc; i++) { mv = &cfg->msix.msix_vectors[i]; if (mv->mv_irq == irq) { error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, &addr, &data); if (error) return (error); mv->mv_address = addr; mv->mv_data = data; for (j = 0; j < cfg->msix.msix_table_len; j++) { mte = &cfg->msix.msix_table[j]; if (mte->mte_vector != i + 1) continue; if (mte->mte_handlers == 0) continue; pci_mask_msix(dev, j); pci_enable_msix(dev, j, addr, data); pci_unmask_msix(dev, j); } } } return (ENOENT); } return (ENOENT); } /* * Returns true if the specified device is blacklisted because MSI * doesn't work. */ int pci_msi_device_blacklisted(device_t dev) { struct pci_quirk *q; if (!pci_honor_msi_blacklist) return (0); for (q = &pci_quirks[0]; q->devid; q++) { if (q->devid == pci_get_devid(dev) && q->type == PCI_QUIRK_DISABLE_MSI) return (1); } return (0); } /* * Returns true if a specified chipset supports MSI when it is * emulated hardware in a virtual machine. */ static int pci_msi_vm_chipset(device_t dev) { struct pci_quirk *q; for (q = &pci_quirks[0]; q->devid; q++) { if (q->devid == pci_get_devid(dev) && q->type == PCI_QUIRK_ENABLE_MSI_VM) return (1); } return (0); } /* * Determine if MSI is blacklisted globally on this sytem. Currently, * we just check for blacklisted chipsets as represented by the * host-PCI bridge at device 0:0:0. In the future, it may become * necessary to check other system attributes, such as the kenv values * that give the motherboard manufacturer and model number. */ static int pci_msi_blacklisted(void) { device_t dev; if (!pci_honor_msi_blacklist) return (0); /* Blacklist all non-PCI-express and non-PCI-X chipsets. */ if (!(pcie_chipset || pcix_chipset)) { if (vm_guest != VM_GUEST_NO) { dev = pci_find_bsf(0, 0, 0); if (dev != NULL) return (pci_msi_vm_chipset(dev) == 0); } return (1); } dev = pci_find_bsf(0, 0, 0); if (dev != NULL) return (pci_msi_device_blacklisted(dev)); return (0); } /* * Attempt to allocate *count MSI messages. The actual number allocated is * returned in *count. After this function returns, each message will be * available to the driver as SYS_RES_IRQ resources starting at a rid 1. */ int pci_alloc_msi_method(device_t dev, device_t child, int *count) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; struct resource_list_entry *rle; int actual, error, i, irqs[32]; uint16_t ctrl; /* Don't let count == 0 get us into trouble. */ if (*count == 0) return (EINVAL); /* If rid 0 is allocated, then fail. */ rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0); if (rle != NULL && rle->res != NULL) return (ENXIO); /* Already have allocated messages? */ if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0) return (ENXIO); /* If MSI is blacklisted for this system, fail. */ if (pci_msi_blacklisted()) return (ENXIO); /* MSI capability present? */ if (cfg->msi.msi_location == 0 || !pci_do_msi) return (ENODEV); if (bootverbose) device_printf(child, "attempting to allocate %d MSI vectors (%d supported)\n", *count, cfg->msi.msi_msgnum); /* Don't ask for more than the device supports. */ actual = min(*count, cfg->msi.msi_msgnum); /* Don't ask for more than 32 messages. */ actual = min(actual, 32); /* MSI requires power of 2 number of messages. */ if (!powerof2(actual)) return (EINVAL); for (;;) { /* Try to allocate N messages. */ error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual, cfg->msi.msi_msgnum, irqs); if (error == 0) break; if (actual == 1) return (error); /* Try N / 2. */ actual >>= 1; } /* * We now have N actual messages mapped onto SYS_RES_IRQ * resources in the irqs[] array, so add new resources * starting at rid 1. */ for (i = 0; i < actual; i++) resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irqs[i], irqs[i], 1); if (bootverbose) { if (actual == 1) device_printf(child, "using IRQ %d for MSI\n", irqs[0]); else { int run; /* * Be fancy and try to print contiguous runs * of IRQ values as ranges. 'run' is true if * we are in a range. */ device_printf(child, "using IRQs %d", irqs[0]); run = 0; for (i = 1; i < actual; i++) { /* Still in a run? */ if (irqs[i] == irqs[i - 1] + 1) { run = 1; continue; } /* Finish previous range. */ if (run) { printf("-%d", irqs[i - 1]); run = 0; } /* Start new range. */ printf(",%d", irqs[i]); } /* Unfinished range? */ if (run) printf("-%d", irqs[actual - 1]); printf(" for MSI\n"); } } /* Update control register with actual count. */ ctrl = cfg->msi.msi_ctrl; ctrl &= ~PCIM_MSICTRL_MME_MASK; ctrl |= (ffs(actual) - 1) << 4; cfg->msi.msi_ctrl = ctrl; pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2); /* Update counts of alloc'd messages. */ cfg->msi.msi_alloc = actual; cfg->msi.msi_handlers = 0; *count = actual; return (0); } /* Release the MSI messages associated with this device. */ int pci_release_msi_method(device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); struct pcicfg_msi *msi = &dinfo->cfg.msi; struct resource_list_entry *rle; int error, i, irqs[32]; /* Try MSI-X first. */ error = pci_release_msix(dev, child); if (error != ENODEV) return (error); /* Do we have any messages to release? */ if (msi->msi_alloc == 0) return (ENODEV); KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages")); /* Make sure none of the resources are allocated. */ if (msi->msi_handlers > 0) return (EBUSY); for (i = 0; i < msi->msi_alloc; i++) { rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); KASSERT(rle != NULL, ("missing MSI resource")); if (rle->res != NULL) return (EBUSY); irqs[i] = rle->start; } /* Update control register with 0 count. */ KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE), ("%s: MSI still enabled", __func__)); msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK; pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl, 2); /* Release the messages. */ PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs); for (i = 0; i < msi->msi_alloc; i++) resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); /* Update alloc count. */ msi->msi_alloc = 0; msi->msi_addr = 0; msi->msi_data = 0; return (0); } /* * Return the max supported MSI messages this device supports. * Basically, assuming the MD code can alloc messages, this function * should return the maximum value that pci_alloc_msi() can return. * Thus, it is subject to the tunables, etc. */ int pci_msi_count_method(device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); struct pcicfg_msi *msi = &dinfo->cfg.msi; if (pci_do_msi && msi->msi_location != 0) return (msi->msi_msgnum); return (0); } /* free pcicfgregs structure and all depending data structures */ int pci_freecfg(struct pci_devinfo *dinfo) { struct devlist *devlist_head; int i; devlist_head = &pci_devq; if (dinfo->cfg.vpd.vpd_reg) { free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF); for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++) free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF); free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF); for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++) free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF); free(dinfo->cfg.vpd.vpd_w, M_DEVBUF); } STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links); free(dinfo, M_DEVBUF); /* increment the generation count */ pci_generation++; /* we're losing one device */ pci_numdevs--; return (0); } /* * PCI power manangement */ int pci_set_powerstate_method(device_t dev, device_t child, int state) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; uint16_t status; int result, oldstate, highest, delay; if (cfg->pp.pp_cap == 0) return (EOPNOTSUPP); /* * Optimize a no state change request away. While it would be OK to * write to the hardware in theory, some devices have shown odd * behavior when going from D3 -> D3. */ oldstate = pci_get_powerstate(child); if (oldstate == state) return (0); /* * The PCI power management specification states that after a state * transition between PCI power states, system software must * guarantee a minimal delay before the function accesses the device. * Compute the worst case delay that we need to guarantee before we * access the device. Many devices will be responsive much more * quickly than this delay, but there are some that don't respond * instantly to state changes. Transitions to/from D3 state require * 10ms, while D2 requires 200us, and D0/1 require none. The delay * is done below with DELAY rather than a sleeper function because * this function can be called from contexts where we cannot sleep. */ highest = (oldstate > state) ? oldstate : state; if (highest == PCI_POWERSTATE_D3) delay = 10000; else if (highest == PCI_POWERSTATE_D2) delay = 200; else delay = 0; status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2) & ~PCIM_PSTAT_DMASK; result = 0; switch (state) { case PCI_POWERSTATE_D0: status |= PCIM_PSTAT_D0; break; case PCI_POWERSTATE_D1: if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0) return (EOPNOTSUPP); status |= PCIM_PSTAT_D1; break; case PCI_POWERSTATE_D2: if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0) return (EOPNOTSUPP); status |= PCIM_PSTAT_D2; break; case PCI_POWERSTATE_D3: status |= PCIM_PSTAT_D3; break; default: return (EINVAL); } if (bootverbose) pci_printf(cfg, "Transition from D%d to D%d\n", oldstate, state); PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2); if (delay) DELAY(delay); return (0); } int pci_get_powerstate_method(device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; uint16_t status; int result; if (cfg->pp.pp_cap != 0) { status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2); switch (status & PCIM_PSTAT_DMASK) { case PCIM_PSTAT_D0: result = PCI_POWERSTATE_D0; break; case PCIM_PSTAT_D1: result = PCI_POWERSTATE_D1; break; case PCIM_PSTAT_D2: result = PCI_POWERSTATE_D2; break; case PCIM_PSTAT_D3: result = PCI_POWERSTATE_D3; break; default: result = PCI_POWERSTATE_UNKNOWN; break; } } else { /* No support, device is always at D0 */ result = PCI_POWERSTATE_D0; } return (result); } /* * Some convenience functions for PCI device drivers. */ static __inline void pci_set_command_bit(device_t dev, device_t child, uint16_t bit) { uint16_t command; command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2); command |= bit; PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2); } static __inline void pci_clear_command_bit(device_t dev, device_t child, uint16_t bit) { uint16_t command; command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2); command &= ~bit; PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2); } int pci_enable_busmaster_method(device_t dev, device_t child) { pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN); return (0); } int pci_disable_busmaster_method(device_t dev, device_t child) { pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN); return (0); } int pci_enable_io_method(device_t dev, device_t child, int space) { uint16_t bit; switch(space) { case SYS_RES_IOPORT: bit = PCIM_CMD_PORTEN; break; case SYS_RES_MEMORY: bit = PCIM_CMD_MEMEN; break; default: return (EINVAL); } pci_set_command_bit(dev, child, bit); return (0); } int pci_disable_io_method(device_t dev, device_t child, int space) { uint16_t bit; switch(space) { case SYS_RES_IOPORT: bit = PCIM_CMD_PORTEN; break; case SYS_RES_MEMORY: bit = PCIM_CMD_MEMEN; break; default: return (EINVAL); } pci_clear_command_bit(dev, child, bit); return (0); } /* * New style pci driver. Parent device is either a pci-host-bridge or a * pci-pci-bridge. Both kinds are represented by instances of pcib. */ void pci_print_verbose(struct pci_devinfo *dinfo) { if (bootverbose) { pcicfgregs *cfg = &dinfo->cfg; printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n", cfg->vendor, cfg->device, cfg->revid); printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n", cfg->domain, cfg->bus, cfg->slot, cfg->func); printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n", cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype, cfg->mfdev); printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n", cfg->cmdreg, cfg->statreg, cfg->cachelnsz); printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n", cfg->lattimer, cfg->lattimer * 30, cfg->mingnt, cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250); if (cfg->intpin > 0) printf("\tintpin=%c, irq=%d\n", cfg->intpin +'a' -1, cfg->intline); if (cfg->pp.pp_cap) { uint16_t status; status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2); printf("\tpowerspec %d supports D0%s%s D3 current D%d\n", cfg->pp.pp_cap & PCIM_PCAP_SPEC, cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "", cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "", status & PCIM_PSTAT_DMASK); } if (cfg->msi.msi_location) { int ctrl; ctrl = cfg->msi.msi_ctrl; printf("\tMSI supports %d message%s%s%s\n", cfg->msi.msi_msgnum, (cfg->msi.msi_msgnum == 1) ? "" : "s", (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "", (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":""); } if (cfg->msix.msix_location) { printf("\tMSI-X supports %d message%s ", cfg->msix.msix_msgnum, (cfg->msix.msix_msgnum == 1) ? "" : "s"); if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar) printf("in map 0x%x\n", cfg->msix.msix_table_bar); else printf("in maps 0x%x and 0x%x\n", cfg->msix.msix_table_bar, cfg->msix.msix_pba_bar); } } } static int pci_porten(device_t dev) { return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0; } static int pci_memen(device_t dev) { return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0; } static void pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp) { pci_addr_t map, testval; int ln2range; uint16_t cmd; /* * The device ROM BAR is special. It is always a 32-bit * memory BAR. Bit 0 is special and should not be set when * sizing the BAR. */ if (reg == PCIR_BIOS) { map = pci_read_config(dev, reg, 4); pci_write_config(dev, reg, 0xfffffffe, 4); testval = pci_read_config(dev, reg, 4); pci_write_config(dev, reg, map, 4); *mapp = map; *testvalp = testval; return; } map = pci_read_config(dev, reg, 4); ln2range = pci_maprange(map); if (ln2range == 64) map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32; /* * Disable decoding via the command register before * determining the BAR's length since we will be placing it in * a weird state. */ cmd = pci_read_config(dev, PCIR_COMMAND, 2); pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2); /* * Determine the BAR's length by writing all 1's. The bottom * log_2(size) bits of the BAR will stick as 0 when we read * the value back. */ pci_write_config(dev, reg, 0xffffffff, 4); testval = pci_read_config(dev, reg, 4); if (ln2range == 64) { pci_write_config(dev, reg + 4, 0xffffffff, 4); testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32; } /* * Restore the original value of the BAR. We may have reprogrammed * the BAR of the low-level console device and when booting verbose, * we need the console device addressable. */ pci_write_config(dev, reg, map, 4); if (ln2range == 64) pci_write_config(dev, reg + 4, map >> 32, 4); pci_write_config(dev, PCIR_COMMAND, cmd, 2); *mapp = map; *testvalp = testval; } static void pci_write_bar(device_t dev, int reg, pci_addr_t base) { pci_addr_t map; int ln2range; map = pci_read_config(dev, reg, 4); /* The device ROM BAR is always 32-bits. */ if (reg == PCIR_BIOS) return; ln2range = pci_maprange(map); pci_write_config(dev, reg, base, 4); if (ln2range == 64) pci_write_config(dev, reg + 4, base >> 32, 4); } /* * Add a resource based on a pci map register. Return 1 if the map * register is a 32bit map register or 2 if it is a 64bit register. */ static int pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl, int force, int prefetch) { pci_addr_t base, map, testval; pci_addr_t start, end, count; int barlen, basezero, maprange, mapsize, type; uint16_t cmd; struct resource *res; pci_read_bar(dev, reg, &map, &testval); if (PCI_BAR_MEM(map)) { type = SYS_RES_MEMORY; if (map & PCIM_BAR_MEM_PREFETCH) prefetch = 1; } else type = SYS_RES_IOPORT; mapsize = pci_mapsize(testval); base = pci_mapbase(map); #ifdef __PCI_BAR_ZERO_VALID basezero = 0; #else basezero = base == 0; #endif maprange = pci_maprange(map); barlen = maprange == 64 ? 2 : 1; /* * For I/O registers, if bottom bit is set, and the next bit up * isn't clear, we know we have a BAR that doesn't conform to the * spec, so ignore it. Also, sanity check the size of the data * areas to the type of memory involved. Memory must be at least * 16 bytes in size, while I/O ranges must be at least 4. */ if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0) return (barlen); if ((type == SYS_RES_MEMORY && mapsize < 4) || (type == SYS_RES_IOPORT && mapsize < 2)) return (barlen); if (bootverbose) { printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d", reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize); if (type == SYS_RES_IOPORT && !pci_porten(dev)) printf(", port disabled\n"); else if (type == SYS_RES_MEMORY && !pci_memen(dev)) printf(", memory disabled\n"); else printf(", enabled\n"); } /* * If base is 0, then we have problems if this architecture does * not allow that. It is best to ignore such entries for the * moment. These will be allocated later if the driver specifically * requests them. However, some removable busses look better when * all resources are allocated, so allow '0' to be overriden. * * Similarly treat maps whose values is the same as the test value * read back. These maps have had all f's written to them by the * BIOS in an attempt to disable the resources. */ if (!force && (basezero || map == testval)) return (barlen); if ((u_long)base != base) { device_printf(bus, "pci%d:%d:%d:%d bar %#x too many address bits", pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), reg); return (barlen); } /* * This code theoretically does the right thing, but has * undesirable side effects in some cases where peripherals * respond oddly to having these bits enabled. Let the user * be able to turn them off (since pci_enable_io_modes is 1 by * default). */ if (pci_enable_io_modes) { /* Turn on resources that have been left off by a lazy BIOS */ if (type == SYS_RES_IOPORT && !pci_porten(dev)) { cmd = pci_read_config(dev, PCIR_COMMAND, 2); cmd |= PCIM_CMD_PORTEN; pci_write_config(dev, PCIR_COMMAND, cmd, 2); } if (type == SYS_RES_MEMORY && !pci_memen(dev)) { cmd = pci_read_config(dev, PCIR_COMMAND, 2); cmd |= PCIM_CMD_MEMEN; pci_write_config(dev, PCIR_COMMAND, cmd, 2); } } else { if (type == SYS_RES_IOPORT && !pci_porten(dev)) return (barlen); if (type == SYS_RES_MEMORY && !pci_memen(dev)) return (barlen); } count = 1 << mapsize; if (basezero || base == pci_mapbase(testval)) { start = 0; /* Let the parent decide. */ end = ~0ULL; } else { start = base; end = base + (1 << mapsize) - 1; } resource_list_add(rl, type, reg, start, end, count); /* * Try to allocate the resource for this BAR from our parent * so that this resource range is already reserved. The * driver for this device will later inherit this resource in * pci_alloc_resource(). */ res = resource_list_reserve(rl, bus, dev, type, ®, start, end, count, prefetch ? RF_PREFETCHABLE : 0); if (res == NULL) { /* * If the allocation fails, clear the BAR and delete * the resource list entry to force * pci_alloc_resource() to allocate resources from the * parent. */ resource_list_delete(rl, type, reg); start = 0; } else start = rman_get_start(res); pci_write_bar(dev, reg, start); return (barlen); } /* * For ATA devices we need to decide early what addressing mode to use. * Legacy demands that the primary and secondary ATA ports sits on the * same addresses that old ISA hardware did. This dictates that we use * those addresses and ignore the BAR's if we cannot set PCI native * addressing mode. */ static void pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force, uint32_t prefetchmask) { struct resource *r; int rid, type, progif; #if 0 /* if this device supports PCI native addressing use it */ progif = pci_read_config(dev, PCIR_PROGIF, 1); if ((progif & 0x8a) == 0x8a) { if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) && pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) { printf("Trying ATA native PCI addressing mode\n"); pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1); } } #endif progif = pci_read_config(dev, PCIR_PROGIF, 1); type = SYS_RES_IOPORT; if (progif & PCIP_STORAGE_IDE_MODEPRIM) { pci_add_map(bus, dev, PCIR_BAR(0), rl, force, prefetchmask & (1 << 0)); pci_add_map(bus, dev, PCIR_BAR(1), rl, force, prefetchmask & (1 << 1)); } else { rid = PCIR_BAR(0); resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8); r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8, 0); rid = PCIR_BAR(1); resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1); r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1, 0); } if (progif & PCIP_STORAGE_IDE_MODESEC) { pci_add_map(bus, dev, PCIR_BAR(2), rl, force, prefetchmask & (1 << 2)); pci_add_map(bus, dev, PCIR_BAR(3), rl, force, prefetchmask & (1 << 3)); } else { rid = PCIR_BAR(2); resource_list_add(rl, type, rid, 0x170, 0x177, 8); r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170, 0x177, 8, 0); rid = PCIR_BAR(3); resource_list_add(rl, type, rid, 0x376, 0x376, 1); r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376, 0x376, 1, 0); } pci_add_map(bus, dev, PCIR_BAR(4), rl, force, prefetchmask & (1 << 4)); pci_add_map(bus, dev, PCIR_BAR(5), rl, force, prefetchmask & (1 << 5)); } static void pci_assign_interrupt(device_t bus, device_t dev, int force_route) { struct pci_devinfo *dinfo = device_get_ivars(dev); pcicfgregs *cfg = &dinfo->cfg; char tunable_name[64]; int irq; /* Has to have an intpin to have an interrupt. */ if (cfg->intpin == 0) return; /* Let the user override the IRQ with a tunable. */ irq = PCI_INVALID_IRQ; snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.%d.INT%c.irq", cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1); if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0)) irq = PCI_INVALID_IRQ; /* * If we didn't get an IRQ via the tunable, then we either use the * IRQ value in the intline register or we ask the bus to route an * interrupt for us. If force_route is true, then we only use the * value in the intline register if the bus was unable to assign an * IRQ. */ if (!PCI_INTERRUPT_VALID(irq)) { if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route) irq = PCI_ASSIGN_INTERRUPT(bus, dev); if (!PCI_INTERRUPT_VALID(irq)) irq = cfg->intline; } /* If after all that we don't have an IRQ, just bail. */ if (!PCI_INTERRUPT_VALID(irq)) return; /* Update the config register if it changed. */ if (irq != cfg->intline) { cfg->intline = irq; pci_write_config(dev, PCIR_INTLINE, irq, 1); } /* Add this IRQ as rid 0 interrupt resource. */ resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1); } /* Perform early OHCI takeover from SMM. */ static void ohci_early_takeover(device_t self) { struct resource *res; uint32_t ctl; int rid; int i; rid = PCIR_BAR(0); res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (res == NULL) return; ctl = bus_read_4(res, OHCI_CONTROL); if (ctl & OHCI_IR) { if (bootverbose) printf("ohci early: " "SMM active, request owner change\n"); bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR); for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) { DELAY(1000); ctl = bus_read_4(res, OHCI_CONTROL); } if (ctl & OHCI_IR) { if (bootverbose) printf("ohci early: " "SMM does not respond, resetting\n"); bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET); } /* Disable interrupts */ bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS); } bus_release_resource(self, SYS_RES_MEMORY, rid, res); } /* Perform early UHCI takeover from SMM. */ static void uhci_early_takeover(device_t self) { struct resource *res; int rid; /* * Set the PIRQD enable bit and switch off all the others. We don't * want legacy support to interfere with us XXX Does this also mean * that the BIOS won't touch the keyboard anymore if it is connected * to the ports of the root hub? */ pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2); /* Disable interrupts */ rid = PCI_UHCI_BASE_REG; res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (res != NULL) { bus_write_2(res, UHCI_INTR, 0); bus_release_resource(self, SYS_RES_IOPORT, rid, res); } } /* Perform early EHCI takeover from SMM. */ static void ehci_early_takeover(device_t self) { struct resource *res; uint32_t cparams; uint32_t eec; uint8_t eecp; uint8_t bios_sem; uint8_t offs; int rid; int i; rid = PCIR_BAR(0); res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (res == NULL) return; cparams = bus_read_4(res, EHCI_HCCPARAMS); /* Synchronise with the BIOS if it owns the controller. */ for (eecp = EHCI_HCC_EECP(cparams); eecp != 0; eecp = EHCI_EECP_NEXT(eec)) { eec = pci_read_config(self, eecp, 4); if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) { continue; } bios_sem = pci_read_config(self, eecp + EHCI_LEGSUP_BIOS_SEM, 1); if (bios_sem == 0) { continue; } if (bootverbose) printf("ehci early: " "SMM active, request owner change\n"); pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1); for (i = 0; (i < 100) && (bios_sem != 0); i++) { DELAY(1000); bios_sem = pci_read_config(self, eecp + EHCI_LEGSUP_BIOS_SEM, 1); } if (bios_sem != 0) { if (bootverbose) printf("ehci early: " "SMM does not respond\n"); } /* Disable interrupts */ offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION)); bus_write_4(res, offs + EHCI_USBINTR, 0); } bus_release_resource(self, SYS_RES_MEMORY, rid, res); } void pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask) { struct pci_devinfo *dinfo = device_get_ivars(dev); pcicfgregs *cfg = &dinfo->cfg; struct resource_list *rl = &dinfo->resources; struct pci_quirk *q; int i; /* ATA devices needs special map treatment */ if ((pci_get_class(dev) == PCIC_STORAGE) && (pci_get_subclass(dev) == PCIS_STORAGE_IDE) && ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) || (!pci_read_config(dev, PCIR_BAR(0), 4) && !pci_read_config(dev, PCIR_BAR(2), 4))) ) pci_ata_maps(bus, dev, rl, force, prefetchmask); else for (i = 0; i < cfg->nummaps;) i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force, prefetchmask & (1 << i)); /* * Add additional, quirked resources. */ for (q = &pci_quirks[0]; q->devid; q++) { if (q->devid == ((cfg->device << 16) | cfg->vendor) && q->type == PCI_QUIRK_MAP_REG) pci_add_map(bus, dev, q->arg1, rl, force, 0); } if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) { #ifdef __PCI_REROUTE_INTERRUPT /* * Try to re-route interrupts. Sometimes the BIOS or * firmware may leave bogus values in these registers. * If the re-route fails, then just stick with what we * have. */ pci_assign_interrupt(bus, dev, 1); #else pci_assign_interrupt(bus, dev, 0); #endif } if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS && pci_get_subclass(dev) == PCIS_SERIALBUS_USB) { if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI) ehci_early_takeover(dev); else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI) ohci_early_takeover(dev); else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI) uhci_early_takeover(dev); } } void pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size) { #define REG(n, w) PCIB_READ_CONFIG(pcib, busno, s, f, n, w) device_t pcib = device_get_parent(dev); struct pci_devinfo *dinfo; int maxslots; int s, f, pcifunchigh; uint8_t hdrtype; KASSERT(dinfo_size >= sizeof(struct pci_devinfo), ("dinfo_size too small")); maxslots = PCIB_MAXSLOTS(pcib); for (s = 0; s <= maxslots; s++) { pcifunchigh = 0; f = 0; DELAY(1); hdrtype = REG(PCIR_HDRTYPE, 1); if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if (hdrtype & PCIM_MFDEV) pcifunchigh = PCI_FUNCMAX; for (f = 0; f <= pcifunchigh; f++) { dinfo = pci_read_device(pcib, domain, busno, s, f, dinfo_size); if (dinfo != NULL) { pci_add_child(dev, dinfo); } } } #undef REG } void pci_add_child(device_t bus, struct pci_devinfo *dinfo) { dinfo->cfg.dev = device_add_child(bus, NULL, -1); device_set_ivars(dinfo->cfg.dev, dinfo); resource_list_init(&dinfo->resources); pci_cfg_save(dinfo->cfg.dev, dinfo, 0); pci_cfg_restore(dinfo->cfg.dev, dinfo); pci_print_verbose(dinfo); pci_add_resources(bus, dinfo->cfg.dev, 0, 0); } static int pci_probe(device_t dev) { device_set_desc(dev, "PCI bus"); /* Allow other subclasses to override this driver. */ return (BUS_PROBE_GENERIC); } static int pci_attach(device_t dev) { int busno, domain; /* * Since there can be multiple independantly numbered PCI * busses on systems with multiple PCI domains, we can't use * the unit number to decide which bus we are probing. We ask * the parent pcib what our domain and bus numbers are. */ domain = pcib_get_domain(dev); busno = pcib_get_bus(dev); if (bootverbose) device_printf(dev, "domain=%d, physical bus=%d\n", domain, busno); pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo)); return (bus_generic_attach(dev)); } static void pci_set_power_children(device_t dev, device_t *devlist, int numdevs, int state) { device_t child, pcib; struct pci_devinfo *dinfo; int dstate, i; /* * Set the device to the given state. If the firmware suggests * a different power state, use it instead. If power management * is not present, the firmware is responsible for managing * device power. Skip children who aren't attached since they * are handled separately. */ pcib = device_get_parent(dev); for (i = 0; i < numdevs; i++) { child = devlist[i]; dinfo = device_get_ivars(child); dstate = state; if (device_is_attached(child) && PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0) pci_set_powerstate(child, dstate); } } int pci_suspend(device_t dev) { device_t child, *devlist; struct pci_devinfo *dinfo; int error, i, numdevs; /* * Save the PCI configuration space for each child and set the * device in the appropriate power state for this sleep state. */ if ((error = device_get_children(dev, &devlist, &numdevs)) != 0) return (error); for (i = 0; i < numdevs; i++) { child = devlist[i]; dinfo = device_get_ivars(child); pci_cfg_save(child, dinfo, 0); } /* Suspend devices before potentially powering them down. */ error = bus_generic_suspend(dev); if (error) { free(devlist, M_TEMP); return (error); } if (pci_do_power_suspend) pci_set_power_children(dev, devlist, numdevs, PCI_POWERSTATE_D3); free(devlist, M_TEMP); return (0); } int pci_resume(device_t dev) { device_t child, *devlist; struct pci_devinfo *dinfo; int error, i, numdevs; /* * Set each child to D0 and restore its PCI configuration space. */ if ((error = device_get_children(dev, &devlist, &numdevs)) != 0) return (error); if (pci_do_power_resume) pci_set_power_children(dev, devlist, numdevs, PCI_POWERSTATE_D0); /* Now the device is powered up, restore its config space. */ for (i = 0; i < numdevs; i++) { child = devlist[i]; dinfo = device_get_ivars(child); pci_cfg_restore(child, dinfo); if (!device_is_attached(child)) pci_cfg_save(child, dinfo, 1); } + + /* + * Resume critical devices first, then everything else later. + */ + for (i = 0; i < numdevs; i++) { + child = devlist[i]; + switch (pci_get_class(child)) { + case PCIC_DISPLAY: + case PCIC_MEMORY: + case PCIC_BRIDGE: + case PCIC_BASEPERIPH: + DEVICE_RESUME(child); + break; + } + } + for (i = 0; i < numdevs; i++) { + child = devlist[i]; + switch (pci_get_class(child)) { + case PCIC_DISPLAY: + case PCIC_MEMORY: + case PCIC_BRIDGE: + case PCIC_BASEPERIPH: + break; + default: + DEVICE_RESUME(child); + } + } free(devlist, M_TEMP); - return (bus_generic_resume(dev)); + return (0); } static void pci_load_vendor_data(void) { caddr_t vendordata, info; if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) { info = preload_search_info(vendordata, MODINFO_ADDR); pci_vendordata = *(char **)info; info = preload_search_info(vendordata, MODINFO_SIZE); pci_vendordata_size = *(size_t *)info; /* terminate the database */ pci_vendordata[pci_vendordata_size] = '\n'; } } void pci_driver_added(device_t dev, driver_t *driver) { int numdevs; device_t *devlist; device_t child; struct pci_devinfo *dinfo; int i; if (bootverbose) device_printf(dev, "driver added\n"); DEVICE_IDENTIFY(driver, dev); if (device_get_children(dev, &devlist, &numdevs) != 0) return; for (i = 0; i < numdevs; i++) { child = devlist[i]; if (device_get_state(child) != DS_NOTPRESENT) continue; dinfo = device_get_ivars(child); pci_print_verbose(dinfo); if (bootverbose) pci_printf(&dinfo->cfg, "reprobing on driver added\n"); pci_cfg_restore(child, dinfo); if (device_probe_and_attach(child) != 0) pci_cfg_save(child, dinfo, 1); } free(devlist, M_TEMP); } int pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep) { struct pci_devinfo *dinfo; struct msix_table_entry *mte; struct msix_vector *mv; uint64_t addr; uint32_t data; void *cookie; int error, rid; error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr, arg, &cookie); if (error) return (error); /* If this is not a direct child, just bail out. */ if (device_get_parent(child) != dev) { *cookiep = cookie; return(0); } rid = rman_get_rid(irq); if (rid == 0) { /* Make sure that INTx is enabled */ pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS); } else { /* * Check to see if the interrupt is MSI or MSI-X. * Ask our parent to map the MSI and give * us the address and data register values. * If we fail for some reason, teardown the * interrupt handler. */ dinfo = device_get_ivars(child); if (dinfo->cfg.msi.msi_alloc > 0) { if (dinfo->cfg.msi.msi_addr == 0) { KASSERT(dinfo->cfg.msi.msi_handlers == 0, ("MSI has handlers, but vectors not mapped")); error = PCIB_MAP_MSI(device_get_parent(dev), child, rman_get_start(irq), &addr, &data); if (error) goto bad; dinfo->cfg.msi.msi_addr = addr; dinfo->cfg.msi.msi_data = data; } if (dinfo->cfg.msi.msi_handlers == 0) pci_enable_msi(child, dinfo->cfg.msi.msi_addr, dinfo->cfg.msi.msi_data); dinfo->cfg.msi.msi_handlers++; } else { KASSERT(dinfo->cfg.msix.msix_alloc > 0, ("No MSI or MSI-X interrupts allocated")); KASSERT(rid <= dinfo->cfg.msix.msix_table_len, ("MSI-X index too high")); mte = &dinfo->cfg.msix.msix_table[rid - 1]; KASSERT(mte->mte_vector != 0, ("no message vector")); mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1]; KASSERT(mv->mv_irq == rman_get_start(irq), ("IRQ mismatch")); if (mv->mv_address == 0) { KASSERT(mte->mte_handlers == 0, ("MSI-X table entry has handlers, but vector not mapped")); error = PCIB_MAP_MSI(device_get_parent(dev), child, rman_get_start(irq), &addr, &data); if (error) goto bad; mv->mv_address = addr; mv->mv_data = data; } if (mte->mte_handlers == 0) { pci_enable_msix(child, rid - 1, mv->mv_address, mv->mv_data); pci_unmask_msix(child, rid - 1); } mte->mte_handlers++; } /* Make sure that INTx is disabled if we are using MSI/MSIX */ pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS); bad: if (error) { (void)bus_generic_teardown_intr(dev, child, irq, cookie); return (error); } } *cookiep = cookie; return (0); } int pci_teardown_intr(device_t dev, device_t child, struct resource *irq, void *cookie) { struct msix_table_entry *mte; struct resource_list_entry *rle; struct pci_devinfo *dinfo; int error, rid; if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE)) return (EINVAL); /* If this isn't a direct child, just bail out */ if (device_get_parent(child) != dev) return(bus_generic_teardown_intr(dev, child, irq, cookie)); rid = rman_get_rid(irq); if (rid == 0) { /* Mask INTx */ pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS); } else { /* * Check to see if the interrupt is MSI or MSI-X. If so, * decrement the appropriate handlers count and mask the * MSI-X message, or disable MSI messages if the count * drops to 0. */ dinfo = device_get_ivars(child); rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid); if (rle->res != irq) return (EINVAL); if (dinfo->cfg.msi.msi_alloc > 0) { KASSERT(rid <= dinfo->cfg.msi.msi_alloc, ("MSI-X index too high")); if (dinfo->cfg.msi.msi_handlers == 0) return (EINVAL); dinfo->cfg.msi.msi_handlers--; if (dinfo->cfg.msi.msi_handlers == 0) pci_disable_msi(child); } else { KASSERT(dinfo->cfg.msix.msix_alloc > 0, ("No MSI or MSI-X interrupts allocated")); KASSERT(rid <= dinfo->cfg.msix.msix_table_len, ("MSI-X index too high")); mte = &dinfo->cfg.msix.msix_table[rid - 1]; if (mte->mte_handlers == 0) return (EINVAL); mte->mte_handlers--; if (mte->mte_handlers == 0) pci_mask_msix(child, rid - 1); } } error = bus_generic_teardown_intr(dev, child, irq, cookie); if (rid > 0) KASSERT(error == 0, ("%s: generic teardown failed for MSI/MSI-X", __func__)); return (error); } int pci_print_child(device_t dev, device_t child) { struct pci_devinfo *dinfo; struct resource_list *rl; int retval = 0; dinfo = device_get_ivars(child); rl = &dinfo->resources; retval += bus_print_child_header(dev, child); retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx"); retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx"); retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); if (device_get_flags(dev)) retval += printf(" flags %#x", device_get_flags(dev)); retval += printf(" at device %d.%d", pci_get_slot(child), pci_get_function(child)); retval += bus_print_child_footer(dev, child); return (retval); } static struct { int class; int subclass; char *desc; } pci_nomatch_tab[] = { {PCIC_OLD, -1, "old"}, {PCIC_OLD, PCIS_OLD_NONVGA, "non-VGA display device"}, {PCIC_OLD, PCIS_OLD_VGA, "VGA-compatible display device"}, {PCIC_STORAGE, -1, "mass storage"}, {PCIC_STORAGE, PCIS_STORAGE_SCSI, "SCSI"}, {PCIC_STORAGE, PCIS_STORAGE_IDE, "ATA"}, {PCIC_STORAGE, PCIS_STORAGE_FLOPPY, "floppy disk"}, {PCIC_STORAGE, PCIS_STORAGE_IPI, "IPI"}, {PCIC_STORAGE, PCIS_STORAGE_RAID, "RAID"}, {PCIC_STORAGE, PCIS_STORAGE_ATA_ADMA, "ATA (ADMA)"}, {PCIC_STORAGE, PCIS_STORAGE_SATA, "SATA"}, {PCIC_STORAGE, PCIS_STORAGE_SAS, "SAS"}, {PCIC_NETWORK, -1, "network"}, {PCIC_NETWORK, PCIS_NETWORK_ETHERNET, "ethernet"}, {PCIC_NETWORK, PCIS_NETWORK_TOKENRING, "token ring"}, {PCIC_NETWORK, PCIS_NETWORK_FDDI, "fddi"}, {PCIC_NETWORK, PCIS_NETWORK_ATM, "ATM"}, {PCIC_NETWORK, PCIS_NETWORK_ISDN, "ISDN"}, {PCIC_DISPLAY, -1, "display"}, {PCIC_DISPLAY, PCIS_DISPLAY_VGA, "VGA"}, {PCIC_DISPLAY, PCIS_DISPLAY_XGA, "XGA"}, {PCIC_DISPLAY, PCIS_DISPLAY_3D, "3D"}, {PCIC_MULTIMEDIA, -1, "multimedia"}, {PCIC_MULTIMEDIA, PCIS_MULTIMEDIA_VIDEO, "video"}, {PCIC_MULTIMEDIA, PCIS_MULTIMEDIA_AUDIO, "audio"}, {PCIC_MULTIMEDIA, PCIS_MULTIMEDIA_TELE, "telephony"}, {PCIC_MULTIMEDIA, PCIS_MULTIMEDIA_HDA, "HDA"}, {PCIC_MEMORY, -1, "memory"}, {PCIC_MEMORY, PCIS_MEMORY_RAM, "RAM"}, {PCIC_MEMORY, PCIS_MEMORY_FLASH, "flash"}, {PCIC_BRIDGE, -1, "bridge"}, {PCIC_BRIDGE, PCIS_BRIDGE_HOST, "HOST-PCI"}, {PCIC_BRIDGE, PCIS_BRIDGE_ISA, "PCI-ISA"}, {PCIC_BRIDGE, PCIS_BRIDGE_EISA, "PCI-EISA"}, {PCIC_BRIDGE, PCIS_BRIDGE_MCA, "PCI-MCA"}, {PCIC_BRIDGE, PCIS_BRIDGE_PCI, "PCI-PCI"}, {PCIC_BRIDGE, PCIS_BRIDGE_PCMCIA, "PCI-PCMCIA"}, {PCIC_BRIDGE, PCIS_BRIDGE_NUBUS, "PCI-NuBus"}, {PCIC_BRIDGE, PCIS_BRIDGE_CARDBUS, "PCI-CardBus"}, {PCIC_BRIDGE, PCIS_BRIDGE_RACEWAY, "PCI-RACEway"}, {PCIC_SIMPLECOMM, -1, "simple comms"}, {PCIC_SIMPLECOMM, PCIS_SIMPLECOMM_UART, "UART"}, /* could detect 16550 */ {PCIC_SIMPLECOMM, PCIS_SIMPLECOMM_PAR, "parallel port"}, {PCIC_SIMPLECOMM, PCIS_SIMPLECOMM_MULSER, "multiport serial"}, {PCIC_SIMPLECOMM, PCIS_SIMPLECOMM_MODEM, "generic modem"}, {PCIC_BASEPERIPH, -1, "base peripheral"}, {PCIC_BASEPERIPH, PCIS_BASEPERIPH_PIC, "interrupt controller"}, {PCIC_BASEPERIPH, PCIS_BASEPERIPH_DMA, "DMA controller"}, {PCIC_BASEPERIPH, PCIS_BASEPERIPH_TIMER, "timer"}, {PCIC_BASEPERIPH, PCIS_BASEPERIPH_RTC, "realtime clock"}, {PCIC_BASEPERIPH, PCIS_BASEPERIPH_PCIHOT, "PCI hot-plug controller"}, {PCIC_BASEPERIPH, PCIS_BASEPERIPH_SDHC, "SD host controller"}, {PCIC_INPUTDEV, -1, "input device"}, {PCIC_INPUTDEV, PCIS_INPUTDEV_KEYBOARD, "keyboard"}, {PCIC_INPUTDEV, PCIS_INPUTDEV_DIGITIZER,"digitizer"}, {PCIC_INPUTDEV, PCIS_INPUTDEV_MOUSE, "mouse"}, {PCIC_INPUTDEV, PCIS_INPUTDEV_SCANNER, "scanner"}, {PCIC_INPUTDEV, PCIS_INPUTDEV_GAMEPORT, "gameport"}, {PCIC_DOCKING, -1, "docking station"}, {PCIC_PROCESSOR, -1, "processor"}, {PCIC_SERIALBUS, -1, "serial bus"}, {PCIC_SERIALBUS, PCIS_SERIALBUS_FW, "FireWire"}, {PCIC_SERIALBUS, PCIS_SERIALBUS_ACCESS, "AccessBus"}, {PCIC_SERIALBUS, PCIS_SERIALBUS_SSA, "SSA"}, {PCIC_SERIALBUS, PCIS_SERIALBUS_USB, "USB"}, {PCIC_SERIALBUS, PCIS_SERIALBUS_FC, "Fibre Channel"}, {PCIC_SERIALBUS, PCIS_SERIALBUS_SMBUS, "SMBus"}, {PCIC_WIRELESS, -1, "wireless controller"}, {PCIC_WIRELESS, PCIS_WIRELESS_IRDA, "iRDA"}, {PCIC_WIRELESS, PCIS_WIRELESS_IR, "IR"}, {PCIC_WIRELESS, PCIS_WIRELESS_RF, "RF"}, {PCIC_INTELLIIO, -1, "intelligent I/O controller"}, {PCIC_INTELLIIO, PCIS_INTELLIIO_I2O, "I2O"}, {PCIC_SATCOM, -1, "satellite communication"}, {PCIC_SATCOM, PCIS_SATCOM_TV, "sat TV"}, {PCIC_SATCOM, PCIS_SATCOM_AUDIO, "sat audio"}, {PCIC_SATCOM, PCIS_SATCOM_VOICE, "sat voice"}, {PCIC_SATCOM, PCIS_SATCOM_DATA, "sat data"}, {PCIC_CRYPTO, -1, "encrypt/decrypt"}, {PCIC_CRYPTO, PCIS_CRYPTO_NETCOMP, "network/computer crypto"}, {PCIC_CRYPTO, PCIS_CRYPTO_ENTERTAIN, "entertainment crypto"}, {PCIC_DASP, -1, "dasp"}, {PCIC_DASP, PCIS_DASP_DPIO, "DPIO module"}, {0, 0, NULL} }; void pci_probe_nomatch(device_t dev, device_t child) { int i; char *cp, *scp, *device; /* * Look for a listing for this device in a loaded device database. */ if ((device = pci_describe_device(child)) != NULL) { device_printf(dev, "<%s>", device); free(device, M_DEVBUF); } else { /* * Scan the class/subclass descriptions for a general * description. */ cp = "unknown"; scp = NULL; for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) { if (pci_nomatch_tab[i].class == pci_get_class(child)) { if (pci_nomatch_tab[i].subclass == -1) { cp = pci_nomatch_tab[i].desc; } else if (pci_nomatch_tab[i].subclass == pci_get_subclass(child)) { scp = pci_nomatch_tab[i].desc; } } } device_printf(dev, "<%s%s%s>", cp ? cp : "", ((cp != NULL) && (scp != NULL)) ? ", " : "", scp ? scp : ""); } printf(" at device %d.%d (no driver attached)\n", pci_get_slot(child), pci_get_function(child)); pci_cfg_save(child, device_get_ivars(child), 1); return; } /* * Parse the PCI device database, if loaded, and return a pointer to a * description of the device. * * The database is flat text formatted as follows: * * Any line not in a valid format is ignored. * Lines are terminated with newline '\n' characters. * * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then * the vendor name. * * A DEVICE line is entered immediately below the corresponding VENDOR ID. * - devices cannot be listed without a corresponding VENDOR line. * A DEVICE line consists of a TAB, the 4 digit (hex) device code, * another TAB, then the device name. */ /* * Assuming (ptr) points to the beginning of a line in the database, * return the vendor or device and description of the next entry. * The value of (vendor) or (device) inappropriate for the entry type * is set to -1. Returns nonzero at the end of the database. * * Note that this is slightly unrobust in the face of corrupt data; * we attempt to safeguard against this by spamming the end of the * database with a newline when we initialise. */ static int pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc) { char *cp = *ptr; int left; *device = -1; *vendor = -1; **desc = '\0'; for (;;) { left = pci_vendordata_size - (cp - pci_vendordata); if (left <= 0) { *ptr = cp; return(1); } /* vendor entry? */ if (*cp != '\t' && sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2) break; /* device entry? */ if (*cp == '\t' && sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2) break; /* skip to next line */ while (*cp != '\n' && left > 0) { cp++; left--; } if (*cp == '\n') { cp++; left--; } } /* skip to next line */ while (*cp != '\n' && left > 0) { cp++; left--; } if (*cp == '\n' && left > 0) cp++; *ptr = cp; return(0); } static char * pci_describe_device(device_t dev) { int vendor, device; char *desc, *vp, *dp, *line; desc = vp = dp = NULL; /* * If we have no vendor data, we can't do anything. */ if (pci_vendordata == NULL) goto out; /* * Scan the vendor data looking for this device */ line = pci_vendordata; if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL) goto out; for (;;) { if (pci_describe_parse_line(&line, &vendor, &device, &vp)) goto out; if (vendor == pci_get_vendor(dev)) break; } if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL) goto out; for (;;) { if (pci_describe_parse_line(&line, &vendor, &device, &dp)) { *dp = 0; break; } if (vendor != -1) { *dp = 0; break; } if (device == pci_get_device(dev)) break; } if (dp[0] == '\0') snprintf(dp, 80, "0x%x", pci_get_device(dev)); if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) != NULL) sprintf(desc, "%s, %s", vp, dp); out: if (vp != NULL) free(vp, M_DEVBUF); if (dp != NULL) free(dp, M_DEVBUF); return(desc); } int pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct pci_devinfo *dinfo; pcicfgregs *cfg; dinfo = device_get_ivars(child); cfg = &dinfo->cfg; switch (which) { case PCI_IVAR_ETHADDR: /* * The generic accessor doesn't deal with failure, so * we set the return value, then return an error. */ *((uint8_t **) result) = NULL; return (EINVAL); case PCI_IVAR_SUBVENDOR: *result = cfg->subvendor; break; case PCI_IVAR_SUBDEVICE: *result = cfg->subdevice; break; case PCI_IVAR_VENDOR: *result = cfg->vendor; break; case PCI_IVAR_DEVICE: *result = cfg->device; break; case PCI_IVAR_DEVID: *result = (cfg->device << 16) | cfg->vendor; break; case PCI_IVAR_CLASS: *result = cfg->baseclass; break; case PCI_IVAR_SUBCLASS: *result = cfg->subclass; break; case PCI_IVAR_PROGIF: *result = cfg->progif; break; case PCI_IVAR_REVID: *result = cfg->revid; break; case PCI_IVAR_INTPIN: *result = cfg->intpin; break; case PCI_IVAR_IRQ: *result = cfg->intline; break; case PCI_IVAR_DOMAIN: *result = cfg->domain; break; case PCI_IVAR_BUS: *result = cfg->bus; break; case PCI_IVAR_SLOT: *result = cfg->slot; break; case PCI_IVAR_FUNCTION: *result = cfg->func; break; case PCI_IVAR_CMDREG: *result = cfg->cmdreg; break; case PCI_IVAR_CACHELNSZ: *result = cfg->cachelnsz; break; case PCI_IVAR_MINGNT: *result = cfg->mingnt; break; case PCI_IVAR_MAXLAT: *result = cfg->maxlat; break; case PCI_IVAR_LATTIMER: *result = cfg->lattimer; break; default: return (ENOENT); } return (0); } int pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { struct pci_devinfo *dinfo; dinfo = device_get_ivars(child); switch (which) { case PCI_IVAR_INTPIN: dinfo->cfg.intpin = value; return (0); case PCI_IVAR_ETHADDR: case PCI_IVAR_SUBVENDOR: case PCI_IVAR_SUBDEVICE: case PCI_IVAR_VENDOR: case PCI_IVAR_DEVICE: case PCI_IVAR_DEVID: case PCI_IVAR_CLASS: case PCI_IVAR_SUBCLASS: case PCI_IVAR_PROGIF: case PCI_IVAR_REVID: case PCI_IVAR_IRQ: case PCI_IVAR_DOMAIN: case PCI_IVAR_BUS: case PCI_IVAR_SLOT: case PCI_IVAR_FUNCTION: return (EINVAL); /* disallow for now */ default: return (ENOENT); } } #include "opt_ddb.h" #ifdef DDB #include #include /* * List resources based on pci map registers, used for within ddb */ DB_SHOW_COMMAND(pciregs, db_pci_dump) { struct pci_devinfo *dinfo; struct devlist *devlist_head; struct pci_conf *p; const char *name; int i, error, none_count; none_count = 0; /* get the head of the device queue */ devlist_head = &pci_devq; /* * Go through the list of devices and print out devices */ for (error = 0, i = 0, dinfo = STAILQ_FIRST(devlist_head); (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit; dinfo = STAILQ_NEXT(dinfo, pci_links), i++) { /* Populate pd_name and pd_unit */ name = NULL; if (dinfo->cfg.dev) name = device_get_name(dinfo->cfg.dev); p = &dinfo->conf; db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x " "chip=0x%08x rev=0x%02x hdr=0x%02x\n", (name && *name) ? name : "none", (name && *name) ? (int)device_get_unit(dinfo->cfg.dev) : none_count++, p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev, p->pc_sel.pc_func, (p->pc_class << 16) | (p->pc_subclass << 8) | p->pc_progif, (p->pc_subdevice << 16) | p->pc_subvendor, (p->pc_device << 16) | p->pc_vendor, p->pc_revid, p->pc_hdr); } } #endif /* DDB */ static struct resource * pci_reserve_map(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct pci_devinfo *dinfo = device_get_ivars(child); struct resource_list *rl = &dinfo->resources; struct resource_list_entry *rle; struct resource *res; pci_addr_t map, testval; int mapsize; /* * Weed out the bogons, and figure out how large the BAR/map * is. Bars that read back 0 here are bogus and unimplemented. * Note: atapci in legacy mode are special and handled elsewhere * in the code. If you have a atapci device in legacy mode and * it fails here, that other code is broken. */ res = NULL; pci_read_bar(child, *rid, &map, &testval); /* * Determine the size of the BAR and ignore BARs with a size * of 0. Device ROM BARs use a different mask value. */ if (*rid == PCIR_BIOS) mapsize = pci_romsize(testval); else mapsize = pci_mapsize(testval); if (mapsize == 0) goto out; if (PCI_BAR_MEM(testval) || *rid == PCIR_BIOS) { if (type != SYS_RES_MEMORY) { if (bootverbose) device_printf(dev, "child %s requested type %d for rid %#x," " but the BAR says it is an memio\n", device_get_nameunit(child), type, *rid); goto out; } } else { if (type != SYS_RES_IOPORT) { if (bootverbose) device_printf(dev, "child %s requested type %d for rid %#x," " but the BAR says it is an ioport\n", device_get_nameunit(child), type, *rid); goto out; } } /* * For real BARs, we need to override the size that * the driver requests, because that's what the BAR * actually uses and we would otherwise have a * situation where we might allocate the excess to * another driver, which won't work. */ count = 1UL << mapsize; if (RF_ALIGNMENT(flags) < mapsize) flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize); if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH)) flags |= RF_PREFETCHABLE; /* * Allocate enough resource, and then write back the * appropriate bar for that resource. */ res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid, start, end, count, flags & ~RF_ACTIVE); if (res == NULL) { device_printf(child, "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n", count, *rid, type, start, end); goto out; } resource_list_add(rl, type, *rid, start, end, count); rle = resource_list_find(rl, type, *rid); if (rle == NULL) panic("pci_reserve_map: unexpectedly can't find resource."); rle->res = res; rle->start = rman_get_start(res); rle->end = rman_get_end(res); rle->count = count; rle->flags = RLE_RESERVED; if (bootverbose) device_printf(child, "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n", count, *rid, type, rman_get_start(res)); map = rman_get_start(res); pci_write_bar(child, *rid, map); out:; return (res); } struct resource * pci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct pci_devinfo *dinfo = device_get_ivars(child); struct resource_list *rl = &dinfo->resources; struct resource_list_entry *rle; struct resource *res; pcicfgregs *cfg = &dinfo->cfg; if (device_get_parent(child) != dev) return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid, start, end, count, flags)); /* * Perform lazy resource allocation */ switch (type) { case SYS_RES_IRQ: /* * Can't alloc legacy interrupt once MSI messages have * been allocated. */ if (*rid == 0 && (cfg->msi.msi_alloc > 0 || cfg->msix.msix_alloc > 0)) return (NULL); /* * If the child device doesn't have an interrupt * routed and is deserving of an interrupt, try to * assign it one. */ if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) && (cfg->intpin != 0)) pci_assign_interrupt(dev, child, 0); break; case SYS_RES_IOPORT: case SYS_RES_MEMORY: /* Reserve resources for this BAR if needed. */ rle = resource_list_find(rl, type, *rid); if (rle == NULL) { res = pci_reserve_map(dev, child, type, rid, start, end, count, flags); if (res == NULL) return (NULL); } } return (resource_list_alloc(rl, dev, child, type, rid, start, end, count, flags)); } int pci_activate_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { int error; error = bus_generic_activate_resource(dev, child, type, rid, r); if (error) return (error); /* Enable decoding in the command register when activating BARs. */ if (device_get_parent(child) == dev) { /* Device ROMs need their decoding explicitly enabled. */ if (rid == PCIR_BIOS) pci_write_config(child, rid, rman_get_start(r) | PCIM_BIOS_ENABLE, 4); switch (type) { case SYS_RES_IOPORT: case SYS_RES_MEMORY: error = PCI_ENABLE_IO(dev, child, type); break; } } return (error); } int pci_deactivate_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { int error; error = bus_generic_deactivate_resource(dev, child, type, rid, r); if (error) return (error); /* Disable decoding for device ROMs. */ if (rid == PCIR_BIOS) pci_write_config(child, rid, rman_get_start(r), 4); return (0); } void pci_delete_child(device_t dev, device_t child) { struct resource_list_entry *rle; struct resource_list *rl; struct pci_devinfo *dinfo; dinfo = device_get_ivars(child); rl = &dinfo->resources; if (device_is_attached(child)) device_detach(child); /* Turn off access to resources we're about to free */ pci_write_config(child, PCIR_COMMAND, pci_read_config(child, PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2); /* Free all allocated resources */ STAILQ_FOREACH(rle, rl, link) { if (rle->res) { if (rman_get_flags(rle->res) & RF_ACTIVE || resource_list_busy(rl, rle->type, rle->rid)) { pci_printf(&dinfo->cfg, "Resource still owned, oops. " "(type=%d, rid=%d, addr=%lx)\n", rle->type, rle->rid, rman_get_start(rle->res)); bus_release_resource(child, rle->type, rle->rid, rle->res); } resource_list_unreserve(rl, dev, child, rle->type, rle->rid); } } resource_list_free(rl); device_delete_child(dev, child); pci_freecfg(dinfo); } void pci_delete_resource(device_t dev, device_t child, int type, int rid) { struct pci_devinfo *dinfo; struct resource_list *rl; struct resource_list_entry *rle; if (device_get_parent(child) != dev) return; dinfo = device_get_ivars(child); rl = &dinfo->resources; rle = resource_list_find(rl, type, rid); if (rle == NULL) return; if (rle->res) { if (rman_get_flags(rle->res) & RF_ACTIVE || resource_list_busy(rl, type, rid)) { device_printf(dev, "delete_resource: " "Resource still owned by child, oops. " "(type=%d, rid=%d, addr=%lx)\n", type, rid, rman_get_start(rle->res)); return; } #ifndef __PCI_BAR_ZERO_VALID /* * If this is a BAR, clear the BAR so it stops * decoding before releasing the resource. */ switch (type) { case SYS_RES_IOPORT: case SYS_RES_MEMORY: pci_write_bar(child, rid, 0); break; } #endif resource_list_unreserve(rl, dev, child, type, rid); } resource_list_delete(rl, type, rid); } struct resource_list * pci_get_resource_list (device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); return (&dinfo->resources); } uint32_t pci_read_config_method(device_t dev, device_t child, int reg, int width) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; return (PCIB_READ_CONFIG(device_get_parent(dev), cfg->bus, cfg->slot, cfg->func, reg, width)); } void pci_write_config_method(device_t dev, device_t child, int reg, uint32_t val, int width) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; PCIB_WRITE_CONFIG(device_get_parent(dev), cfg->bus, cfg->slot, cfg->func, reg, val, width); } int pci_child_location_str_method(device_t dev, device_t child, char *buf, size_t buflen) { snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child), pci_get_function(child)); return (0); } int pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf, size_t buflen) { struct pci_devinfo *dinfo; pcicfgregs *cfg; dinfo = device_get_ivars(child); cfg = &dinfo->cfg; snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x " "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device, cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass, cfg->progif); return (0); } int pci_assign_interrupt_method(device_t dev, device_t child) { struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, cfg->intpin)); } static int pci_modevent(module_t mod, int what, void *arg) { static struct cdev *pci_cdev; switch (what) { case MOD_LOAD: STAILQ_INIT(&pci_devq); pci_generation = 0; pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644, "pci"); pci_load_vendor_data(); break; case MOD_UNLOAD: destroy_dev(pci_cdev); break; } return (0); } void pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo) { int i; /* * Only do header type 0 devices. Type 1 devices are bridges, * which we know need special treatment. Type 2 devices are * cardbus bridges which also require special treatment. * Other types are unknown, and we err on the side of safety * by ignoring them. */ if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL) return; /* * Restore the device to full power mode. We must do this * before we restore the registers because moving from D3 to * D0 will cause the chip's BARs and some other registers to * be reset to some unknown power on reset values. Cut down * the noise on boot by doing nothing if we are already in * state D0. */ if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) pci_set_powerstate(dev, PCI_POWERSTATE_D0); for (i = 0; i < dinfo->cfg.nummaps; i++) pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4); pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4); pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2); pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1); pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1); pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1); pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1); pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1); pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1); pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1); pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1); /* Restore MSI and MSI-X configurations if they are present. */ if (dinfo->cfg.msi.msi_location != 0) pci_resume_msi(dev); if (dinfo->cfg.msix.msix_location != 0) pci_resume_msix(dev); } void pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate) { int i; uint32_t cls; int ps; /* * Only do header type 0 devices. Type 1 devices are bridges, which * we know need special treatment. Type 2 devices are cardbus bridges * which also require special treatment. Other types are unknown, and * we err on the side of safety by ignoring them. Powering down * bridges should not be undertaken lightly. */ if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL) return; for (i = 0; i < dinfo->cfg.nummaps; i++) dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4); dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4); /* * Some drivers apparently write to these registers w/o updating our * cached copy. No harm happens if we update the copy, so do so here * so we can restore them. The COMMAND register is modified by the * bus w/o updating the cache. This should represent the normally * writable portion of the 'defined' part of type 0 headers. In * theory we also need to save/restore the PCI capability structures * we know about, but apart from power we don't know any that are * writable. */ dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2); dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2); dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2); dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2); dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2); dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1); dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1); dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1); dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1); dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1); dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1); dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1); dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1); dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1); dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1); /* * don't set the state for display devices, base peripherals and * memory devices since bad things happen when they are powered down. * We should (a) have drivers that can easily detach and (b) use * generic drivers for these devices so that some device actually * attaches. We need to make sure that when we implement (a) we don't * power the device down on a reattach. */ cls = pci_get_class(dev); if (!setstate) return; switch (pci_do_power_nodriver) { case 0: /* NO powerdown at all */ return; case 1: /* Conservative about what to power down */ if (cls == PCIC_STORAGE) return; /*FALLTHROUGH*/ case 2: /* Agressive about what to power down */ if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY || cls == PCIC_BASEPERIPH) return; /*FALLTHROUGH*/ case 3: /* Power down everything */ break; } /* * PCI spec says we can only go into D3 state from D0 state. * Transition from D[12] into D0 before going to D3 state. */ ps = pci_get_powerstate(dev); if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3) pci_set_powerstate(dev, PCI_POWERSTATE_D0); if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3) pci_set_powerstate(dev, PCI_POWERSTATE_D3); } Index: projects/binutils-2.17/sys/dev/pci/pci_pci.c =================================================================== --- projects/binutils-2.17/sys/dev/pci/pci_pci.c (revision 215829) +++ projects/binutils-2.17/sys/dev/pci/pci_pci.c (revision 215830) @@ -1,878 +1,880 @@ /*- * Copyright (c) 1994,1995 Stefan Esser, Wolfgang StanglMeier * Copyright (c) 2000 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * PCI:PCI bridge support. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" static int pcib_probe(device_t dev); static int pcib_suspend(device_t dev); static int pcib_resume(device_t dev); static int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate); static device_method_t pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcib_probe), DEVMETHOD(device_attach, pcib_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, pcib_suspend), DEVMETHOD(device_resume, pcib_resume), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, pcib_read_ivar), DEVMETHOD(bus_write_ivar, pcib_write_ivar), DEVMETHOD(bus_alloc_resource, pcib_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, pcib_maxslots), DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, pcib_map_msi), DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep), { 0, 0 } }; static devclass_t pcib_devclass; DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc)); DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, 0, 0); /* * Is the prefetch window open (eg, can we allocate memory in it?) */ static int pcib_is_prefetch_open(struct pcib_softc *sc) { return (sc->pmembase > 0 && sc->pmembase < sc->pmemlimit); } /* * Is the nonprefetch window open (eg, can we allocate memory in it?) */ static int pcib_is_nonprefetch_open(struct pcib_softc *sc) { return (sc->membase > 0 && sc->membase < sc->memlimit); } /* * Is the io window open (eg, can we allocate ports in it?) */ static int pcib_is_io_open(struct pcib_softc *sc) { return (sc->iobase > 0 && sc->iobase < sc->iolimit); } /* * Get current I/O decode. */ static void pcib_get_io_decode(struct pcib_softc *sc) { device_t dev; uint32_t iolow; dev = sc->dev; iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1); if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) sc->iobase = PCI_PPBIOBASE( pci_read_config(dev, PCIR_IOBASEH_1, 2), iolow); else sc->iobase = PCI_PPBIOBASE(0, iolow); iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1); if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) sc->iolimit = PCI_PPBIOLIMIT( pci_read_config(dev, PCIR_IOLIMITH_1, 2), iolow); else sc->iolimit = PCI_PPBIOLIMIT(0, iolow); } /* * Get current memory decode. */ static void pcib_get_mem_decode(struct pcib_softc *sc) { device_t dev; pci_addr_t pmemlow; dev = sc->dev; sc->membase = PCI_PPBMEMBASE(0, pci_read_config(dev, PCIR_MEMBASE_1, 2)); sc->memlimit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_MEMLIMIT_1, 2)); pmemlow = pci_read_config(dev, PCIR_PMBASEL_1, 2); if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64) sc->pmembase = PCI_PPBMEMBASE( pci_read_config(dev, PCIR_PMBASEH_1, 4), pmemlow); else sc->pmembase = PCI_PPBMEMBASE(0, pmemlow); pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2); if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64) sc->pmemlimit = PCI_PPBMEMLIMIT( pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow); else sc->pmemlimit = PCI_PPBMEMLIMIT(0, pmemlow); } /* * Restore previous I/O decode. */ static void pcib_set_io_decode(struct pcib_softc *sc) { device_t dev; uint32_t iohi; dev = sc->dev; iohi = sc->iobase >> 16; if (iohi > 0) pci_write_config(dev, PCIR_IOBASEH_1, iohi, 2); pci_write_config(dev, PCIR_IOBASEL_1, sc->iobase >> 8, 1); iohi = sc->iolimit >> 16; if (iohi > 0) pci_write_config(dev, PCIR_IOLIMITH_1, iohi, 2); pci_write_config(dev, PCIR_IOLIMITL_1, sc->iolimit >> 8, 1); } /* * Restore previous memory decode. */ static void pcib_set_mem_decode(struct pcib_softc *sc) { device_t dev; pci_addr_t pmemhi; dev = sc->dev; pci_write_config(dev, PCIR_MEMBASE_1, sc->membase >> 16, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, sc->memlimit >> 16, 2); pmemhi = sc->pmembase >> 32; if (pmemhi > 0) pci_write_config(dev, PCIR_PMBASEH_1, pmemhi, 4); pci_write_config(dev, PCIR_PMBASEL_1, sc->pmembase >> 16, 2); pmemhi = sc->pmemlimit >> 32; if (pmemhi > 0) pci_write_config(dev, PCIR_PMLIMITH_1, pmemhi, 4); pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmemlimit >> 16, 2); } /* * Get current bridge configuration. */ static void pcib_cfg_save(struct pcib_softc *sc) { device_t dev; dev = sc->dev; sc->command = pci_read_config(dev, PCIR_COMMAND, 2); sc->pribus = pci_read_config(dev, PCIR_PRIBUS_1, 1); sc->secbus = pci_read_config(dev, PCIR_SECBUS_1, 1); sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1); sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2); sc->seclat = pci_read_config(dev, PCIR_SECLAT_1, 1); if (sc->command & PCIM_CMD_PORTEN) pcib_get_io_decode(sc); if (sc->command & PCIM_CMD_MEMEN) pcib_get_mem_decode(sc); } /* * Restore previous bridge configuration. */ static void pcib_cfg_restore(struct pcib_softc *sc) { device_t dev; dev = sc->dev; pci_write_config(dev, PCIR_COMMAND, sc->command, 2); pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1); pci_write_config(dev, PCIR_SECBUS_1, sc->secbus, 1); pci_write_config(dev, PCIR_SUBBUS_1, sc->subbus, 1); pci_write_config(dev, PCIR_BRIDGECTL_1, sc->bridgectl, 2); pci_write_config(dev, PCIR_SECLAT_1, sc->seclat, 1); if (sc->command & PCIM_CMD_PORTEN) pcib_set_io_decode(sc); if (sc->command & PCIM_CMD_MEMEN) pcib_set_mem_decode(sc); } /* * Generic device interface */ static int pcib_probe(device_t dev) { if ((pci_get_class(dev) == PCIC_BRIDGE) && (pci_get_subclass(dev) == PCIS_BRIDGE_PCI)) { device_set_desc(dev, "PCI-PCI bridge"); return(-10000); } return(ENXIO); } void pcib_attach_common(device_t dev) { struct pcib_softc *sc; struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; sc = device_get_softc(dev); sc->dev = dev; /* * Get current bridge configuration. */ sc->domain = pci_get_domain(dev); sc->secstat = pci_read_config(dev, PCIR_SECSTAT_1, 2); pcib_cfg_save(sc); /* * Setup sysctl reporting nodes */ sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "domain", CTLFLAG_RD, &sc->domain, 0, "Domain number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "pribus", CTLFLAG_RD, &sc->pribus, 0, "Primary bus number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "secbus", CTLFLAG_RD, &sc->secbus, 0, "Secondary bus number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus", CTLFLAG_RD, &sc->subbus, 0, "Subordinate bus number"); /* * Quirk handling. */ switch (pci_get_devid(dev)) { case 0x12258086: /* Intel 82454KX/GX (Orion) */ { uint8_t supbus; supbus = pci_read_config(dev, 0x41, 1); if (supbus != 0xff) { sc->secbus = supbus + 1; sc->subbus = supbus + 1; } break; } /* * The i82380FB mobile docking controller is a PCI-PCI bridge, * and it is a subtractive bridge. However, the ProgIf is wrong * so the normal setting of PCIB_SUBTRACTIVE bit doesn't * happen. There's also a Toshiba bridge that behaves this * way. */ case 0x124b8086: /* Intel 82380FB Mobile */ case 0x060513d7: /* Toshiba ???? */ sc->flags |= PCIB_SUBTRACTIVE; break; /* Compaq R3000 BIOS sets wrong subordinate bus number. */ case 0x00dd10de: { char *cp; if ((cp = getenv("smbios.planar.maker")) == NULL) break; if (strncmp(cp, "Compal", 6) != 0) { freeenv(cp); break; } freeenv(cp); if ((cp = getenv("smbios.planar.product")) == NULL) break; if (strncmp(cp, "08A0", 4) != 0) { freeenv(cp); break; } freeenv(cp); if (sc->subbus < 0xa) { pci_write_config(dev, PCIR_SUBBUS_1, 0xa, 1); sc->subbus = pci_read_config(dev, PCIR_SUBBUS_1, 1); } break; } } if (pci_msi_device_blacklisted(dev)) sc->flags |= PCIB_DISABLE_MSI; /* * Intel 815, 845 and other chipsets say they are PCI-PCI bridges, * but have a ProgIF of 0x80. The 82801 family (AA, AB, BAM/CAM, * BA/CA/DB and E) PCI bridges are HUB-PCI bridges, in Intelese. * This means they act as if they were subtractively decoding * bridges and pass all transactions. Mark them and real ProgIf 1 * parts as subtractive. */ if ((pci_get_devid(dev) & 0xff00ffff) == 0x24008086 || pci_read_config(dev, PCIR_PROGIF, 1) == PCIP_BRIDGE_PCI_SUBTRACTIVE) sc->flags |= PCIB_SUBTRACTIVE; if (bootverbose) { device_printf(dev, " domain %d\n", sc->domain); device_printf(dev, " secondary bus %d\n", sc->secbus); device_printf(dev, " subordinate bus %d\n", sc->subbus); device_printf(dev, " I/O decode 0x%x-0x%x\n", sc->iobase, sc->iolimit); if (pcib_is_nonprefetch_open(sc)) device_printf(dev, " memory decode 0x%jx-0x%jx\n", (uintmax_t)sc->membase, (uintmax_t)sc->memlimit); if (pcib_is_prefetch_open(sc)) device_printf(dev, " prefetched decode 0x%jx-0x%jx\n", (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit); else device_printf(dev, " no prefetched decode\n"); if (sc->flags & PCIB_SUBTRACTIVE) device_printf(dev, " Subtractively decoded bridge.\n"); } /* * XXX If the secondary bus number is zero, we should assign a bus number * since the BIOS hasn't, then initialise the bridge. A simple * bus_alloc_resource with the a couple of busses seems like the right * approach, but we don't know what busses the BIOS might have already * assigned to other bridges on this bus that probe later than we do. * * If the subordinate bus number is less than the secondary bus number, * we should pick a better value. One sensible alternative would be to * pick 255; the only tradeoff here is that configuration transactions * would be more widely routed than absolutely necessary. We could * then do a walk of the tree later and fix it. */ } int pcib_attach(device_t dev) { struct pcib_softc *sc; device_t child; pcib_attach_common(dev); sc = device_get_softc(dev); if (sc->secbus != 0) { child = device_add_child(dev, "pci", sc->secbus); if (child != NULL) return(bus_generic_attach(dev)); } /* no secondary bus; we should have fixed this */ return(0); } int pcib_suspend(device_t dev) { device_t pcib; int dstate, error; pcib_cfg_save(device_get_softc(dev)); error = bus_generic_suspend(dev); if (error == 0 && pci_do_power_suspend) { dstate = PCI_POWERSTATE_D3; pcib = device_get_parent(device_get_parent(dev)); if (PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0) pci_set_powerstate(dev, dstate); } return (error); } int pcib_resume(device_t dev) { device_t pcib; if (pci_do_power_resume) { pcib = device_get_parent(device_get_parent(dev)); if (PCIB_POWER_FOR_SLEEP(pcib, dev, NULL) == 0) pci_set_powerstate(dev, PCI_POWERSTATE_D0); } pcib_cfg_restore(device_get_softc(dev)); return (bus_generic_resume(dev)); } int pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct pcib_softc *sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: *result = sc->domain; return(0); case PCIB_IVAR_BUS: *result = sc->secbus; return(0); } return(ENOENT); } int pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { struct pcib_softc *sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: return(EINVAL); case PCIB_IVAR_BUS: sc->secbus = value; return(0); } return(ENOENT); } /* * We have to trap resource allocation requests and ensure that the bridge * is set up to, or capable of handling them. */ struct resource * pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct pcib_softc *sc = device_get_softc(dev); const char *name, *suffix; int ok; /* * Fail the allocation for this range if it's not supported. */ name = device_get_nameunit(child); if (name == NULL) { name = ""; suffix = ""; } else suffix = " "; switch (type) { case SYS_RES_IOPORT: ok = 0; if (!pcib_is_io_open(sc)) break; ok = (start >= sc->iobase && end <= sc->iolimit); /* * Make sure we allow access to VGA I/O addresses when the * bridge has the "VGA Enable" bit set. */ if (!ok && pci_is_vga_ioport_range(start, end)) ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0; if ((sc->flags & PCIB_SUBTRACTIVE) == 0) { if (!ok) { if (start < sc->iobase) start = sc->iobase; if (end > sc->iolimit) end = sc->iolimit; if (start < end) ok = 1; } } else { ok = 1; #if 0 /* * If we overlap with the subtractive range, then * pick the upper range to use. */ if (start < sc->iolimit && end > sc->iobase) start = sc->iolimit + 1; #endif } if (end < start) { device_printf(dev, "ioport: end (%lx) < start (%lx)\n", end, start); start = 0; end = 0; ok = 0; } if (!ok) { device_printf(dev, "%s%srequested unsupported I/O " "range 0x%lx-0x%lx (decoding 0x%x-0x%x)\n", name, suffix, start, end, sc->iobase, sc->iolimit); return (NULL); } if (bootverbose) device_printf(dev, "%s%srequested I/O range 0x%lx-0x%lx: in range\n", name, suffix, start, end); break; case SYS_RES_MEMORY: ok = 0; if (pcib_is_nonprefetch_open(sc)) ok = ok || (start >= sc->membase && end <= sc->memlimit); if (pcib_is_prefetch_open(sc)) ok = ok || (start >= sc->pmembase && end <= sc->pmemlimit); /* * Make sure we allow access to VGA memory addresses when the * bridge has the "VGA Enable" bit set. */ if (!ok && pci_is_vga_memory_range(start, end)) ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0; if ((sc->flags & PCIB_SUBTRACTIVE) == 0) { if (!ok) { ok = 1; if (flags & RF_PREFETCHABLE) { if (pcib_is_prefetch_open(sc)) { if (start < sc->pmembase) start = sc->pmembase; if (end > sc->pmemlimit) end = sc->pmemlimit; } else { ok = 0; } } else { /* non-prefetchable */ if (pcib_is_nonprefetch_open(sc)) { if (start < sc->membase) start = sc->membase; if (end > sc->memlimit) end = sc->memlimit; } else { ok = 0; } } } } else if (!ok) { ok = 1; /* subtractive bridge: always ok */ #if 0 if (pcib_is_nonprefetch_open(sc)) { if (start < sc->memlimit && end > sc->membase) start = sc->memlimit + 1; } if (pcib_is_prefetch_open(sc)) { if (start < sc->pmemlimit && end > sc->pmembase) start = sc->pmemlimit + 1; } #endif } if (end < start) { device_printf(dev, "memory: end (%lx) < start (%lx)\n", end, start); start = 0; end = 0; ok = 0; } if (!ok && bootverbose) device_printf(dev, "%s%srequested unsupported memory range %#lx-%#lx " "(decoding %#jx-%#jx, %#jx-%#jx)\n", name, suffix, start, end, (uintmax_t)sc->membase, (uintmax_t)sc->memlimit, (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit); if (!ok) return (NULL); if (bootverbose) device_printf(dev,"%s%srequested memory range " "0x%lx-0x%lx: good\n", name, suffix, start, end); break; default: break; } /* * Bridge is OK decoding this resource, so pass it up. */ return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } /* * PCIB interface. */ int pcib_maxslots(device_t dev) { return(PCI_SLOTMAX); } /* * Since we are a child of a PCI bus, its parent must support the pcib interface. */ uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width) { return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, width)); } void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width) { PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, val, width); } /* * Route an interrupt across a PCI bridge. */ int pcib_route_interrupt(device_t pcib, device_t dev, int pin) { device_t bus; int parent_intpin; int intnum; /* * * The PCI standard defines a swizzle of the child-side device/intpin to * the parent-side intpin as follows. * * device = device on child bus * child_intpin = intpin on child bus slot (0-3) * parent_intpin = intpin on parent bus slot (0-3) * * parent_intpin = (device + child_intpin) % 4 */ parent_intpin = (pci_get_slot(dev) + (pin - 1)) % 4; /* * Our parent is a PCI bus. Its parent must export the pcib interface * which includes the ability to route interrupts. */ bus = device_get_parent(pcib); intnum = PCIB_ROUTE_INTERRUPT(device_get_parent(bus), pcib, parent_intpin + 1); if (PCI_INTERRUPT_VALID(intnum) && bootverbose) { device_printf(pcib, "slot %d INT%c is routed to irq %d\n", pci_get_slot(dev), 'A' + pin - 1, intnum); } return(intnum); } /* Pass request to alloc MSI/MSI-X messages up to the parent bridge. */ int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { struct pcib_softc *sc = device_get_softc(pcib); device_t bus; if (sc->flags & PCIB_DISABLE_MSI) return (ENXIO); bus = device_get_parent(pcib); return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, irqs)); } /* Pass request to release MSI/MSI-X messages up to the parent bridge. */ int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) { device_t bus; bus = device_get_parent(pcib); return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs)); } /* Pass request to alloc an MSI-X message up to the parent bridge. */ int pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { struct pcib_softc *sc = device_get_softc(pcib); device_t bus; if (sc->flags & PCIB_DISABLE_MSI) return (ENXIO); bus = device_get_parent(pcib); return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq)); } /* Pass request to release an MSI-X message up to the parent bridge. */ int pcib_release_msix(device_t pcib, device_t dev, int irq) { device_t bus; bus = device_get_parent(pcib); return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq)); } /* Pass request to map MSI/MSI-X message up to parent bridge. */ int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus; int error; bus = device_get_parent(pcib); error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data); if (error) return (error); pci_ht_map_msi(pcib, *addr); return (0); } /* Pass request for device power state up to parent bridge. */ int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate) { device_t bus; bus = device_get_parent(pcib); return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate)); } /* * Try to read the bus number of a host-PCI bridge using appropriate config * registers. */ int host_pcib_get_busno(pci_read_config_fn read_config, int bus, int slot, int func, uint8_t *busnum) { uint32_t id; id = read_config(bus, slot, func, PCIR_DEVVENDOR, 4); if (id == 0xffffffff) return (0); switch (id) { case 0x12258086: /* Intel 824?? */ /* XXX This is a guess */ /* *busnum = read_config(bus, slot, func, 0x41, 1); */ *busnum = bus; break; case 0x84c48086: /* Intel 82454KX/GX (Orion) */ *busnum = read_config(bus, slot, func, 0x4a, 1); break; case 0x84ca8086: /* * For the 450nx chipset, there is a whole bundle of * things pretending to be host bridges. The MIOC will * be seen first and isn't really a pci bridge (the * actual busses are attached to the PXB's). We need to * read the registers of the MIOC to figure out the * bus numbers for the PXB channels. * * Since the MIOC doesn't have a pci bus attached, we * pretend it wasn't there. */ return (0); case 0x84cb8086: switch (slot) { case 0x12: /* Intel 82454NX PXB#0, Bus#A */ *busnum = read_config(bus, 0x10, func, 0xd0, 1); break; case 0x13: /* Intel 82454NX PXB#0, Bus#B */ *busnum = read_config(bus, 0x10, func, 0xd1, 1) + 1; break; case 0x14: /* Intel 82454NX PXB#1, Bus#A */ *busnum = read_config(bus, 0x10, func, 0xd3, 1); break; case 0x15: /* Intel 82454NX PXB#1, Bus#B */ *busnum = read_config(bus, 0x10, func, 0xd4, 1) + 1; break; } break; /* ServerWorks -- vendor 0x1166 */ case 0x00051166: case 0x00061166: case 0x00081166: case 0x00091166: case 0x00101166: case 0x00111166: case 0x00171166: case 0x01011166: case 0x010f1014: + case 0x01101166: case 0x02011166: + case 0x02251166: case 0x03021014: *busnum = read_config(bus, slot, func, 0x44, 1); break; /* Compaq/HP -- vendor 0x0e11 */ case 0x60100e11: *busnum = read_config(bus, slot, func, 0xc8, 1); break; default: /* Don't know how to read bus number. */ return 0; } return 1; } Index: projects/binutils-2.17/sys/dev/usb/controller/usb_controller.c =================================================================== --- projects/binutils-2.17/sys/dev/usb/controller/usb_controller.c (revision 215829) +++ projects/binutils-2.17/sys/dev/usb/controller/usb_controller.c (revision 215830) @@ -1,630 +1,630 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define USB_DEBUG_VAR usb_ctrl_debug #include #include #include #include #include #include #include #include #include #include /* function prototypes */ static device_probe_t usb_probe; static device_attach_t usb_attach; static device_detach_t usb_detach; static void usb_attach_sub(device_t, struct usb_bus *); /* static variables */ #ifdef USB_DEBUG static int usb_ctrl_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, ctrl, CTLFLAG_RW, 0, "USB controller"); SYSCTL_INT(_hw_usb_ctrl, OID_AUTO, debug, CTLFLAG_RW, &usb_ctrl_debug, 0, "Debug level"); #endif static int usb_no_boot_wait = 0; TUNABLE_INT("hw.usb.no_boot_wait", &usb_no_boot_wait); SYSCTL_INT(_hw_usb, OID_AUTO, no_boot_wait, CTLFLAG_RDTUN, &usb_no_boot_wait, 0, "No device enumerate waiting at boot."); static devclass_t usb_devclass; static device_method_t usb_methods[] = { DEVMETHOD(device_probe, usb_probe), DEVMETHOD(device_attach, usb_attach), DEVMETHOD(device_detach, usb_detach), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), DEVMETHOD(device_shutdown, bus_generic_shutdown), {0, 0} }; static driver_t usb_driver = { .name = "usbus", .methods = usb_methods, .size = 0, }; /* Host Only Drivers */ DRIVER_MODULE(usbus, ohci, usb_driver, usb_devclass, 0, 0); DRIVER_MODULE(usbus, uhci, usb_driver, usb_devclass, 0, 0); DRIVER_MODULE(usbus, ehci, usb_driver, usb_devclass, 0, 0); DRIVER_MODULE(usbus, xhci, usb_driver, usb_devclass, 0, 0); /* Device Only Drivers */ DRIVER_MODULE(usbus, at91_udp, usb_driver, usb_devclass, 0, 0); DRIVER_MODULE(usbus, musbotg, usb_driver, usb_devclass, 0, 0); DRIVER_MODULE(usbus, uss820, usb_driver, usb_devclass, 0, 0); /*------------------------------------------------------------------------* * usb_probe * * This function is called from "{ehci,ohci,uhci}_pci_attach()". *------------------------------------------------------------------------*/ static int usb_probe(device_t dev) { DPRINTF("\n"); return (0); } static void usb_root_mount_rel(struct usb_bus *bus) { if (bus->bus_roothold != NULL) { DPRINTF("Releasing root mount hold %p\n", bus->bus_roothold); root_mount_rel(bus->bus_roothold); bus->bus_roothold = NULL; } } /*------------------------------------------------------------------------* * usb_attach *------------------------------------------------------------------------*/ static int usb_attach(device_t dev) { struct usb_bus *bus = device_get_ivars(dev); DPRINTF("\n"); if (bus == NULL) { device_printf(dev, "USB device has no ivars\n"); return (ENXIO); } if (usb_no_boot_wait == 0) { /* delay vfs_mountroot until the bus is explored */ bus->bus_roothold = root_mount_hold(device_get_nameunit(dev)); } usb_attach_sub(dev, bus); return (0); /* return success */ } /*------------------------------------------------------------------------* * usb_detach *------------------------------------------------------------------------*/ static int usb_detach(device_t dev) { struct usb_bus *bus = device_get_softc(dev); DPRINTF("\n"); if (bus == NULL) { /* was never setup properly */ return (0); } /* Stop power watchdog */ usb_callout_drain(&bus->power_wdog); /* Let the USB explore process detach all devices. */ usb_root_mount_rel(bus); USB_BUS_LOCK(bus); if (usb_proc_msignal(&bus->explore_proc, &bus->detach_msg[0], &bus->detach_msg[1])) { /* ignore */ } /* Wait for detach to complete */ usb_proc_mwait(&bus->explore_proc, &bus->detach_msg[0], &bus->detach_msg[1]); USB_BUS_UNLOCK(bus); /* Get rid of USB callback processes */ usb_proc_free(&bus->giant_callback_proc); usb_proc_free(&bus->non_giant_callback_proc); /* Get rid of USB explore process */ usb_proc_free(&bus->explore_proc); /* Get rid of control transfer process */ usb_proc_free(&bus->control_xfer_proc); + usbpf_detach(bus); + return (0); } /*------------------------------------------------------------------------* * usb_bus_explore * * This function is used to explore the device tree from the root. *------------------------------------------------------------------------*/ static void usb_bus_explore(struct usb_proc_msg *pm) { struct usb_bus *bus; struct usb_device *udev; bus = ((struct usb_bus_msg *)pm)->bus; udev = bus->devices[USB_ROOT_HUB_ADDR]; if (udev && udev->hub) { if (bus->do_probe) { bus->do_probe = 0; bus->driver_added_refcount++; } if (bus->driver_added_refcount == 0) { /* avoid zero, hence that is memory default */ bus->driver_added_refcount = 1; } #ifdef DDB /* * The following three lines of code are only here to * recover from DDB: */ usb_proc_rewakeup(&bus->control_xfer_proc); usb_proc_rewakeup(&bus->giant_callback_proc); usb_proc_rewakeup(&bus->non_giant_callback_proc); #endif USB_BUS_UNLOCK(bus); #if USB_HAVE_POWERD /* * First update the USB power state! */ usb_bus_powerd(bus); #endif /* Explore the Root USB HUB. */ (udev->hub->explore) (udev); USB_BUS_LOCK(bus); } usb_root_mount_rel(bus); } /*------------------------------------------------------------------------* * usb_bus_detach * * This function is used to detach the device tree from the root. *------------------------------------------------------------------------*/ static void usb_bus_detach(struct usb_proc_msg *pm) { struct usb_bus *bus; struct usb_device *udev; device_t dev; bus = ((struct usb_bus_msg *)pm)->bus; udev = bus->devices[USB_ROOT_HUB_ADDR]; dev = bus->bdev; /* clear the softc */ device_set_softc(dev, NULL); USB_BUS_UNLOCK(bus); /* detach children first */ mtx_lock(&Giant); bus_generic_detach(dev); mtx_unlock(&Giant); /* * Free USB device and all subdevices, if any. */ usb_free_device(udev, 0); USB_BUS_LOCK(bus); /* clear bdev variable last */ bus->bdev = NULL; } static void usb_power_wdog(void *arg) { struct usb_bus *bus = arg; USB_BUS_LOCK_ASSERT(bus, MA_OWNED); usb_callout_reset(&bus->power_wdog, 4 * hz, usb_power_wdog, arg); #ifdef DDB /* * The following line of code is only here to recover from * DDB: */ usb_proc_rewakeup(&bus->explore_proc); /* recover from DDB */ #endif #if USB_HAVE_POWERD USB_BUS_UNLOCK(bus); usb_bus_power_update(bus); USB_BUS_LOCK(bus); #endif } /*------------------------------------------------------------------------* * usb_bus_attach * * This function attaches USB in context of the explore thread. *------------------------------------------------------------------------*/ static void usb_bus_attach(struct usb_proc_msg *pm) { struct usb_bus *bus; struct usb_device *child; device_t dev; usb_error_t err; enum usb_dev_speed speed; bus = ((struct usb_bus_msg *)pm)->bus; dev = bus->bdev; DPRINTF("\n"); switch (bus->usbrev) { case USB_REV_1_0: speed = USB_SPEED_FULL; device_printf(bus->bdev, "12Mbps Full Speed USB v1.0\n"); break; case USB_REV_1_1: speed = USB_SPEED_FULL; device_printf(bus->bdev, "12Mbps Full Speed USB v1.1\n"); break; case USB_REV_2_0: speed = USB_SPEED_HIGH; device_printf(bus->bdev, "480Mbps High Speed USB v2.0\n"); break; case USB_REV_2_5: speed = USB_SPEED_VARIABLE; device_printf(bus->bdev, "480Mbps Wireless USB v2.5\n"); break; case USB_REV_3_0: speed = USB_SPEED_SUPER; device_printf(bus->bdev, "4.8Gbps Super Speed USB v3.0\n"); break; default: device_printf(bus->bdev, "Unsupported USB revision\n"); usb_root_mount_rel(bus); return; } USB_BUS_UNLOCK(bus); /* default power_mask value */ bus->hw_power_state = USB_HW_POWER_CONTROL | USB_HW_POWER_BULK | USB_HW_POWER_INTERRUPT | USB_HW_POWER_ISOC | USB_HW_POWER_NON_ROOT_HUB; /* make sure power is set at least once */ if (bus->methods->set_hw_power != NULL) { (bus->methods->set_hw_power) (bus); } /* Allocate the Root USB device */ child = usb_alloc_device(bus->bdev, bus, NULL, 0, 0, 1, speed, USB_MODE_HOST); if (child) { err = usb_probe_and_attach(child, USB_IFACE_INDEX_ANY); if (!err) { if ((bus->devices[USB_ROOT_HUB_ADDR] == NULL) || (bus->devices[USB_ROOT_HUB_ADDR]->hub == NULL)) { err = USB_ERR_NO_ROOT_HUB; } } } else { err = USB_ERR_NOMEM; } USB_BUS_LOCK(bus); if (err) { device_printf(bus->bdev, "Root HUB problem, error=%s\n", usbd_errstr(err)); usb_root_mount_rel(bus); } /* set softc - we are ready */ device_set_softc(dev, bus); /* start watchdog */ usb_power_wdog(bus); } /*------------------------------------------------------------------------* * usb_attach_sub * * This function creates a thread which runs the USB attach code. *------------------------------------------------------------------------*/ static void usb_attach_sub(device_t dev, struct usb_bus *bus) { const char *pname = device_get_nameunit(dev); mtx_lock(&Giant); if (usb_devclass_ptr == NULL) usb_devclass_ptr = devclass_find("usbus"); mtx_unlock(&Giant); + usbpf_attach(bus); + /* Initialise USB process messages */ bus->explore_msg[0].hdr.pm_callback = &usb_bus_explore; bus->explore_msg[0].bus = bus; bus->explore_msg[1].hdr.pm_callback = &usb_bus_explore; bus->explore_msg[1].bus = bus; bus->detach_msg[0].hdr.pm_callback = &usb_bus_detach; bus->detach_msg[0].bus = bus; bus->detach_msg[1].hdr.pm_callback = &usb_bus_detach; bus->detach_msg[1].bus = bus; bus->attach_msg[0].hdr.pm_callback = &usb_bus_attach; bus->attach_msg[0].bus = bus; bus->attach_msg[1].hdr.pm_callback = &usb_bus_attach; bus->attach_msg[1].bus = bus; /* Create USB explore and callback processes */ if (usb_proc_create(&bus->giant_callback_proc, &bus->bus_mtx, pname, USB_PRI_MED)) { printf("WARNING: Creation of USB Giant " "callback process failed.\n"); } else if (usb_proc_create(&bus->non_giant_callback_proc, &bus->bus_mtx, pname, USB_PRI_HIGH)) { printf("WARNING: Creation of USB non-Giant " "callback process failed.\n"); } else if (usb_proc_create(&bus->explore_proc, &bus->bus_mtx, pname, USB_PRI_MED)) { printf("WARNING: Creation of USB explore " "process failed.\n"); } else if (usb_proc_create(&bus->control_xfer_proc, &bus->bus_mtx, pname, USB_PRI_MED)) { printf("WARNING: Creation of USB control transfer " "process failed.\n"); } else { /* Get final attach going */ USB_BUS_LOCK(bus); if (usb_proc_msignal(&bus->explore_proc, &bus->attach_msg[0], &bus->attach_msg[1])) { /* ignore */ } USB_BUS_UNLOCK(bus); /* Do initial explore */ usb_needs_explore(bus, 1); } } SYSUNINIT(usb_bus_unload, SI_SUB_KLD, SI_ORDER_ANY, usb_bus_unload, NULL); /*------------------------------------------------------------------------* * usb_bus_mem_flush_all_cb *------------------------------------------------------------------------*/ #if USB_HAVE_BUSDMA static void usb_bus_mem_flush_all_cb(struct usb_bus *bus, struct usb_page_cache *pc, struct usb_page *pg, usb_size_t size, usb_size_t align) { usb_pc_cpu_flush(pc); } #endif /*------------------------------------------------------------------------* * usb_bus_mem_flush_all - factored out code *------------------------------------------------------------------------*/ #if USB_HAVE_BUSDMA void usb_bus_mem_flush_all(struct usb_bus *bus, usb_bus_mem_cb_t *cb) { if (cb) { cb(bus, &usb_bus_mem_flush_all_cb); } } #endif /*------------------------------------------------------------------------* * usb_bus_mem_alloc_all_cb *------------------------------------------------------------------------*/ #if USB_HAVE_BUSDMA static void usb_bus_mem_alloc_all_cb(struct usb_bus *bus, struct usb_page_cache *pc, struct usb_page *pg, usb_size_t size, usb_size_t align) { /* need to initialize the page cache */ pc->tag_parent = bus->dma_parent_tag; if (usb_pc_alloc_mem(pc, pg, size, align)) { bus->alloc_failed = 1; } } #endif /*------------------------------------------------------------------------* * usb_bus_mem_alloc_all - factored out code * * Returns: * 0: Success * Else: Failure *------------------------------------------------------------------------*/ uint8_t usb_bus_mem_alloc_all(struct usb_bus *bus, bus_dma_tag_t dmat, usb_bus_mem_cb_t *cb) { bus->alloc_failed = 0; mtx_init(&bus->bus_mtx, device_get_nameunit(bus->parent), NULL, MTX_DEF | MTX_RECURSE); usb_callout_init_mtx(&bus->power_wdog, &bus->bus_mtx, 0); TAILQ_INIT(&bus->intr_q.head); - usbpf_attach(bus, &bus->uif); - #if USB_HAVE_BUSDMA usb_dma_tag_setup(bus->dma_parent_tag, bus->dma_tags, dmat, &bus->bus_mtx, NULL, 32, USB_BUS_DMA_TAG_MAX); #endif if ((bus->devices_max > USB_MAX_DEVICES) || (bus->devices_max < USB_MIN_DEVICES) || (bus->devices == NULL)) { DPRINTFN(0, "Devices field has not been " "initialised properly\n"); bus->alloc_failed = 1; /* failure */ } #if USB_HAVE_BUSDMA if (cb) { cb(bus, &usb_bus_mem_alloc_all_cb); } #endif if (bus->alloc_failed) { usb_bus_mem_free_all(bus, cb); } return (bus->alloc_failed); } /*------------------------------------------------------------------------* * usb_bus_mem_free_all_cb *------------------------------------------------------------------------*/ #if USB_HAVE_BUSDMA static void usb_bus_mem_free_all_cb(struct usb_bus *bus, struct usb_page_cache *pc, struct usb_page *pg, usb_size_t size, usb_size_t align) { usb_pc_free_mem(pc); } #endif /*------------------------------------------------------------------------* * usb_bus_mem_free_all - factored out code *------------------------------------------------------------------------*/ void usb_bus_mem_free_all(struct usb_bus *bus, usb_bus_mem_cb_t *cb) { #if USB_HAVE_BUSDMA if (cb) { cb(bus, &usb_bus_mem_free_all_cb); } usb_dma_tag_unsetup(bus->dma_parent_tag); #endif - - usbpf_detach(bus); mtx_destroy(&bus->bus_mtx); } struct usb_bus * usb_bus_find(const char *name) { struct usb_bus *ubus; devclass_t dc; device_t *devlist; int devcount, error, i; const char *nameunit; dc = devclass_find("usbus"); if (dc == NULL) return (NULL); error = devclass_get_devices(dc, &devlist, &devcount); if (error != 0) return (NULL); for (i = 0; i < devcount; i++) { nameunit = device_get_nameunit(devlist[i]); if (!strncmp(name, nameunit, strlen(nameunit))) { ubus = device_get_ivars(devlist[i]); free(devlist, M_TEMP); return (ubus); } } free(devlist, M_TEMP); return (NULL); } Index: projects/binutils-2.17/sys/dev/usb/serial/u3g.c =================================================================== --- projects/binutils-2.17/sys/dev/usb/serial/u3g.c (revision 215829) +++ projects/binutils-2.17/sys/dev/usb/serial/u3g.c (revision 215830) @@ -1,982 +1,982 @@ /* * Copyright (c) 2008 AnyWi Technologies * Author: Andrea Guzzo * * based on uark.c 1.1 2006/08/14 08:30:22 jsg * * * parts from ubsa.c 183348 2008-09-25 12:00:56Z phk * * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ /* * NOTE: * * - The detour through the tty layer is ridiculously expensive wrt * buffering due to the high speeds. * * We should consider adding a simple r/w device which allows * attaching of PPP in a more efficient way. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "usbdevs.h" #define USB_DEBUG_VAR u3g_debug #include #include #include #include #include #ifdef USB_DEBUG static int u3g_debug = 0; SYSCTL_NODE(_hw_usb, OID_AUTO, u3g, CTLFLAG_RW, 0, "USB 3g"); SYSCTL_INT(_hw_usb_u3g, OID_AUTO, debug, CTLFLAG_RW, &u3g_debug, 0, "Debug level"); #endif #define U3G_MAXPORTS 12 #define U3G_CONFIG_INDEX 0 #define U3G_BSIZE 2048 #define U3GSP_GPRS 0 #define U3GSP_EDGE 1 #define U3GSP_CDMA 2 #define U3GSP_UMTS 3 #define U3GSP_HSDPA 4 #define U3GSP_HSUPA 5 #define U3GSP_HSPA 6 #define U3GSP_MAX 7 /* Eject methods; See also usb_quirks.h:UQ_MSC_EJECT_* */ #define U3GINIT_HUAWEI 1 /* Requires Huawei init command */ #define U3GINIT_SIERRA 2 /* Requires Sierra init command */ #define U3GINIT_SCSIEJECT 3 /* Requires SCSI eject command */ #define U3GINIT_REZERO 4 /* Requires SCSI rezero command */ #define U3GINIT_ZTESTOR 5 /* Requires ZTE SCSI command */ #define U3GINIT_CMOTECH 6 /* Requires CMOTECH SCSI command */ #define U3GINIT_WAIT 7 /* Device reappears after a delay */ #define U3GINIT_SAEL_M460 8 /* Requires vendor init */ #define U3GINIT_HUAWEISCSI 9 /* Requires Huawei SCSI init command */ #define U3GINIT_TCT 10 /* Requires TCT Mobile init command */ enum { U3G_BULK_WR, U3G_BULK_RD, U3G_N_TRANSFER, }; struct u3g_softc { struct ucom_super_softc sc_super_ucom; struct ucom_softc sc_ucom[U3G_MAXPORTS]; struct usb_xfer *sc_xfer[U3G_MAXPORTS][U3G_N_TRANSFER]; struct usb_device *sc_udev; struct mtx sc_mtx; uint8_t sc_lsr; /* local status register */ uint8_t sc_msr; /* U3G status register */ uint8_t sc_numports; }; static device_probe_t u3g_probe; static device_attach_t u3g_attach; static device_detach_t u3g_detach; static usb_callback_t u3g_write_callback; static usb_callback_t u3g_read_callback; static void u3g_start_read(struct ucom_softc *ucom); static void u3g_stop_read(struct ucom_softc *ucom); static void u3g_start_write(struct ucom_softc *ucom); static void u3g_stop_write(struct ucom_softc *ucom); static void u3g_test_autoinst(void *, struct usb_device *, struct usb_attach_arg *); static int u3g_driver_loaded(struct module *mod, int what, void *arg); static eventhandler_tag u3g_etag; static const struct usb_config u3g_config[U3G_N_TRANSFER] = { [U3G_BULK_WR] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_OUT, .bufsize = U3G_BSIZE,/* bytes */ .flags = {.pipe_bof = 1,.force_short_xfer = 1,}, .callback = &u3g_write_callback, }, [U3G_BULK_RD] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_IN, .bufsize = U3G_BSIZE,/* bytes */ .flags = {.pipe_bof = 1,.short_xfer_ok = 1,}, .callback = &u3g_read_callback, }, }; static const struct ucom_callback u3g_callback = { .ucom_start_read = &u3g_start_read, .ucom_stop_read = &u3g_stop_read, .ucom_start_write = &u3g_start_write, .ucom_stop_write = &u3g_stop_write, }; static device_method_t u3g_methods[] = { DEVMETHOD(device_probe, u3g_probe), DEVMETHOD(device_attach, u3g_attach), DEVMETHOD(device_detach, u3g_detach), {0, 0} }; static devclass_t u3g_devclass; static driver_t u3g_driver = { .name = "u3g", .methods = u3g_methods, .size = sizeof(struct u3g_softc), }; DRIVER_MODULE(u3g, uhub, u3g_driver, u3g_devclass, u3g_driver_loaded, 0); MODULE_DEPEND(u3g, ucom, 1, 1, 1); MODULE_DEPEND(u3g, usb, 1, 1, 1); MODULE_VERSION(u3g, 1); static const struct usb_device_id u3g_devs[] = { #define U3G_DEV(v,p,i) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, i) } U3G_DEV(ACERP, H10, 0), U3G_DEV(AIRPLUS, MCD650, 0), U3G_DEV(AIRPRIME, PC5220, 0), U3G_DEV(ALINK, 3G, 0), U3G_DEV(ALINK, 3GU, 0), U3G_DEV(ALINK, DWM652U5, 0), U3G_DEV(AMOI, H01, 0), U3G_DEV(AMOI, H01A, 0), U3G_DEV(AMOI, H02, 0), U3G_DEV(ANYDATA, ADU_500A, 0), U3G_DEV(ANYDATA, ADU_620UW, 0), U3G_DEV(ANYDATA, ADU_E100X, 0), U3G_DEV(AXESSTEL, DATAMODEM, 0), U3G_DEV(CMOTECH, CDMA_MODEM1, 0), U3G_DEV(CMOTECH, CGU628, U3GINIT_CMOTECH), U3G_DEV(DELL, U5500, 0), U3G_DEV(DELL, U5505, 0), U3G_DEV(DELL, U5510, 0), U3G_DEV(DELL, U5520, 0), U3G_DEV(DELL, U5520_2, 0), U3G_DEV(DELL, U5520_3, 0), U3G_DEV(DELL, U5700, 0), U3G_DEV(DELL, U5700_2, 0), U3G_DEV(DELL, U5700_3, 0), U3G_DEV(DELL, U5700_4, 0), U3G_DEV(DELL, U5720, 0), U3G_DEV(DELL, U5720_2, 0), U3G_DEV(DELL, U5730, 0), U3G_DEV(DELL, U5730_2, 0), U3G_DEV(DELL, U5730_3, 0), U3G_DEV(DELL, U740, 0), U3G_DEV(DLINK3, DWM652, 0), U3G_DEV(HP, EV2200, 0), U3G_DEV(HP, HS2300, 0), U3G_DEV(HUAWEI, E1401, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1402, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1403, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1404, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1405, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1406, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1407, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1408, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1409, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E140A, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E140B, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E140D, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E140E, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E140F, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1410, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1411, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1412, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1413, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1414, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1415, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1416, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1417, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1418, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1419, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E141A, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E141B, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E141C, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E141D, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E141E, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E141F, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1420, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1421, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1422, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1423, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1424, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1425, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1426, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1427, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1428, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1429, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E142A, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E142B, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E142C, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E142D, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E142E, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E142F, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1430, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1431, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1432, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1433, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1434, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1435, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1436, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1437, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1438, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1439, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E143A, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E143B, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E143C, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E143D, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E143E, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E143F, U3GINIT_HUAWEI), - U3G_DEV(HUAWEI, E14AC, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E180V, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E220, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E220BIS, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, MOBILE, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, E1752, U3GINIT_HUAWEISCSI), + U3G_DEV(HUAWEI, E1820, U3GINIT_HUAWEISCSI), U3G_DEV(HUAWEI, K3765, U3GINIT_HUAWEI), U3G_DEV(HUAWEI, K3765_INIT, U3GINIT_HUAWEISCSI), U3G_DEV(KYOCERA2, CDMA_MSM_K, 0), U3G_DEV(KYOCERA2, KPC680, 0), U3G_DEV(LONGCHEER, WM66, U3GINIT_HUAWEI), U3G_DEV(LONGCHEER, DISK, U3GINIT_TCT), U3G_DEV(LONGCHEER, W14, 0), U3G_DEV(MERLIN, V620, 0), U3G_DEV(NEOTEL, PRIME, 0), U3G_DEV(NOVATEL, E725, 0), U3G_DEV(NOVATEL, ES620, 0), U3G_DEV(NOVATEL, ES620_2, 0), U3G_DEV(NOVATEL, EU730, 0), U3G_DEV(NOVATEL, EU740, 0), U3G_DEV(NOVATEL, EU870D, 0), U3G_DEV(NOVATEL, MC760, 0), U3G_DEV(NOVATEL, MC950D, 0), U3G_DEV(NOVATEL, U720, 0), U3G_DEV(NOVATEL, U727, 0), U3G_DEV(NOVATEL, U727_2, 0), U3G_DEV(NOVATEL, U740, 0), U3G_DEV(NOVATEL, U740_2, 0), U3G_DEV(NOVATEL, U760, U3GINIT_SCSIEJECT), U3G_DEV(NOVATEL, U870, 0), U3G_DEV(NOVATEL, V620, 0), U3G_DEV(NOVATEL, V640, 0), U3G_DEV(NOVATEL, V720, 0), U3G_DEV(NOVATEL, V740, 0), U3G_DEV(NOVATEL, X950D, 0), U3G_DEV(NOVATEL, XU870, 0), U3G_DEV(OPTION, E6500, 0), U3G_DEV(OPTION, E6501, 0), U3G_DEV(OPTION, E6601, 0), U3G_DEV(OPTION, E6721, 0), U3G_DEV(OPTION, E6741, 0), U3G_DEV(OPTION, E6761, 0), U3G_DEV(OPTION, E6800, 0), U3G_DEV(OPTION, E7021, 0), U3G_DEV(OPTION, E7041, 0), U3G_DEV(OPTION, E7061, 0), U3G_DEV(OPTION, E7100, 0), U3G_DEV(OPTION, GE40X, 0), U3G_DEV(OPTION, GT3G, 0), U3G_DEV(OPTION, GT3GPLUS, 0), U3G_DEV(OPTION, GT3GQUAD, 0), U3G_DEV(OPTION, GT3G_1, 0), U3G_DEV(OPTION, GT3G_2, 0), U3G_DEV(OPTION, GT3G_3, 0), U3G_DEV(OPTION, GT3G_4, 0), U3G_DEV(OPTION, GT3G_5, 0), U3G_DEV(OPTION, GT3G_6, 0), U3G_DEV(OPTION, GTHSDPA, 0), U3G_DEV(OPTION, GTM380, 0), U3G_DEV(OPTION, GTMAX36, 0), U3G_DEV(OPTION, GTMAX380HSUPAE, 0), U3G_DEV(OPTION, GTMAXHSUPA, 0), U3G_DEV(OPTION, GTMAXHSUPAE, 0), U3G_DEV(OPTION, VODAFONEMC3G, 0), U3G_DEV(QISDA, H20_1, 0), U3G_DEV(QISDA, H20_2, 0), U3G_DEV(QISDA, H21_1, 0), U3G_DEV(QISDA, H21_2, 0), U3G_DEV(QUALCOMM2, AC8700, 0), U3G_DEV(QUALCOMM2, MF330, 0), U3G_DEV(QUALCOMMINC, AC2726, 0), U3G_DEV(QUALCOMMINC, AC8700, 0), U3G_DEV(QUALCOMMINC, AC8710, 0), U3G_DEV(QUALCOMMINC, CDMA_MSM, U3GINIT_SCSIEJECT), U3G_DEV(QUALCOMMINC, E0002, 0), U3G_DEV(QUALCOMMINC, E0003, 0), U3G_DEV(QUALCOMMINC, E0004, 0), U3G_DEV(QUALCOMMINC, E0005, 0), U3G_DEV(QUALCOMMINC, E0006, 0), U3G_DEV(QUALCOMMINC, E0007, 0), U3G_DEV(QUALCOMMINC, E0008, 0), U3G_DEV(QUALCOMMINC, E0009, 0), U3G_DEV(QUALCOMMINC, E000A, 0), U3G_DEV(QUALCOMMINC, E000B, 0), U3G_DEV(QUALCOMMINC, E000C, 0), U3G_DEV(QUALCOMMINC, E000D, 0), U3G_DEV(QUALCOMMINC, E000E, 0), U3G_DEV(QUALCOMMINC, E000F, 0), U3G_DEV(QUALCOMMINC, E0010, 0), U3G_DEV(QUALCOMMINC, E0011, 0), U3G_DEV(QUALCOMMINC, E0012, 0), U3G_DEV(QUALCOMMINC, E0013, 0), U3G_DEV(QUALCOMMINC, E0014, 0), U3G_DEV(QUALCOMMINC, E0017, 0), U3G_DEV(QUALCOMMINC, E0018, 0), U3G_DEV(QUALCOMMINC, E0019, 0), U3G_DEV(QUALCOMMINC, E0020, 0), U3G_DEV(QUALCOMMINC, E0021, 0), U3G_DEV(QUALCOMMINC, E0022, 0), U3G_DEV(QUALCOMMINC, E0023, 0), U3G_DEV(QUALCOMMINC, E0024, 0), U3G_DEV(QUALCOMMINC, E0025, 0), U3G_DEV(QUALCOMMINC, E0026, 0), U3G_DEV(QUALCOMMINC, E0027, 0), U3G_DEV(QUALCOMMINC, E0028, 0), U3G_DEV(QUALCOMMINC, E0029, 0), U3G_DEV(QUALCOMMINC, E0030, 0), U3G_DEV(QUALCOMMINC, E0032, 0), U3G_DEV(QUALCOMMINC, E0033, 0), U3G_DEV(QUALCOMMINC, E0037, 0), U3G_DEV(QUALCOMMINC, E0039, 0), U3G_DEV(QUALCOMMINC, E0042, 0), U3G_DEV(QUALCOMMINC, E0043, 0), U3G_DEV(QUALCOMMINC, E0048, 0), U3G_DEV(QUALCOMMINC, E0049, 0), U3G_DEV(QUALCOMMINC, E0051, 0), U3G_DEV(QUALCOMMINC, E0052, 0), U3G_DEV(QUALCOMMINC, E0054, 0), U3G_DEV(QUALCOMMINC, E0055, 0), U3G_DEV(QUALCOMMINC, E0057, 0), U3G_DEV(QUALCOMMINC, E0058, 0), U3G_DEV(QUALCOMMINC, E0059, 0), U3G_DEV(QUALCOMMINC, E0060, 0), U3G_DEV(QUALCOMMINC, E0061, 0), U3G_DEV(QUALCOMMINC, E0062, 0), U3G_DEV(QUALCOMMINC, E0063, 0), U3G_DEV(QUALCOMMINC, E0064, 0), U3G_DEV(QUALCOMMINC, E0066, 0), U3G_DEV(QUALCOMMINC, E0069, 0), U3G_DEV(QUALCOMMINC, E0070, 0), U3G_DEV(QUALCOMMINC, E0073, 0), U3G_DEV(QUALCOMMINC, E0076, 0), U3G_DEV(QUALCOMMINC, E0078, 0), U3G_DEV(QUALCOMMINC, E0082, 0), U3G_DEV(QUALCOMMINC, E0086, 0), U3G_DEV(QUALCOMMINC, E2002, 0), U3G_DEV(QUALCOMMINC, E2003, 0), U3G_DEV(QUALCOMMINC, MF626, 0), U3G_DEV(QUALCOMMINC, MF628, 0), U3G_DEV(QUALCOMMINC, MF633R, 0), U3G_DEV(QUANTA, GKE, 0), U3G_DEV(QUANTA, GLE, 0), U3G_DEV(QUANTA, GLX, 0), U3G_DEV(QUANTA, Q101, 0), U3G_DEV(QUANTA, Q111, 0), U3G_DEV(SIERRA, AC402, 0), U3G_DEV(SIERRA, AC595U, 0), U3G_DEV(SIERRA, AC597E, 0), U3G_DEV(SIERRA, AC875E, 0), U3G_DEV(SIERRA, AC875U, 0), U3G_DEV(SIERRA, AC875U_2, 0), U3G_DEV(SIERRA, AC880, 0), U3G_DEV(SIERRA, AC880E, 0), U3G_DEV(SIERRA, AC880U, 0), U3G_DEV(SIERRA, AC881, 0), U3G_DEV(SIERRA, AC881E, 0), U3G_DEV(SIERRA, AC881U, 0), U3G_DEV(SIERRA, AC885E, 0), U3G_DEV(SIERRA, AC885E_2, 0), U3G_DEV(SIERRA, AC885U, 0), U3G_DEV(SIERRA, AIRCARD580, 0), U3G_DEV(SIERRA, AIRCARD595, 0), U3G_DEV(SIERRA, AIRCARD875, 0), U3G_DEV(SIERRA, C22, 0), U3G_DEV(SIERRA, C597, 0), U3G_DEV(SIERRA, C888, 0), U3G_DEV(SIERRA, E0029, 0), U3G_DEV(SIERRA, E6892, 0), U3G_DEV(SIERRA, E6893, 0), U3G_DEV(SIERRA, EM5625, 0), U3G_DEV(SIERRA, EM5725, 0), U3G_DEV(SIERRA, MC5720, 0), U3G_DEV(SIERRA, MC5720_2, 0), U3G_DEV(SIERRA, MC5725, 0), U3G_DEV(SIERRA, MC5727, 0), U3G_DEV(SIERRA, MC5727_2, 0), U3G_DEV(SIERRA, MC5728, 0), U3G_DEV(SIERRA, MC8700, 0), U3G_DEV(SIERRA, MC8755, 0), U3G_DEV(SIERRA, MC8755_2, 0), U3G_DEV(SIERRA, MC8755_3, 0), U3G_DEV(SIERRA, MC8755_4, 0), U3G_DEV(SIERRA, MC8765, 0), U3G_DEV(SIERRA, MC8765_2, 0), U3G_DEV(SIERRA, MC8765_3, 0), U3G_DEV(SIERRA, MC8775, 0), U3G_DEV(SIERRA, MC8775_2, 0), U3G_DEV(SIERRA, MC8780, 0), U3G_DEV(SIERRA, MC8780_2, 0), U3G_DEV(SIERRA, MC8780_3, 0), U3G_DEV(SIERRA, MC8781, 0), U3G_DEV(SIERRA, MC8781_2, 0), U3G_DEV(SIERRA, MC8781_3, 0), U3G_DEV(SIERRA, MC8785, 0), U3G_DEV(SIERRA, MC8785_2, 0), U3G_DEV(SIERRA, MC8790, 0), U3G_DEV(SIERRA, MC8791, 0), U3G_DEV(SIERRA, MC8792, 0), U3G_DEV(SIERRA, MINI5725, 0), U3G_DEV(SIERRA, T11, 0), U3G_DEV(SIERRA, T598, 0), U3G_DEV(SILABS, SAEL, U3GINIT_SAEL_M460), U3G_DEV(STELERA, C105, 0), U3G_DEV(STELERA, E1003, 0), U3G_DEV(STELERA, E1004, 0), U3G_DEV(STELERA, E1005, 0), U3G_DEV(STELERA, E1006, 0), U3G_DEV(STELERA, E1007, 0), U3G_DEV(STELERA, E1008, 0), U3G_DEV(STELERA, E1009, 0), U3G_DEV(STELERA, E100A, 0), U3G_DEV(STELERA, E100B, 0), U3G_DEV(STELERA, E100C, 0), U3G_DEV(STELERA, E100D, 0), U3G_DEV(STELERA, E100E, 0), U3G_DEV(STELERA, E100F, 0), U3G_DEV(STELERA, E1010, 0), U3G_DEV(STELERA, E1011, 0), U3G_DEV(STELERA, E1012, 0), U3G_DEV(TCTMOBILE, X060S, 0), U3G_DEV(TCTMOBILE, X080S, U3GINIT_TCT), U3G_DEV(TELIT, UC864E, 0), U3G_DEV(TELIT, UC864G, 0), U3G_DEV(TLAYTECH, TEU800, 0), U3G_DEV(TOSHIBA, G450, 0), U3G_DEV(TOSHIBA, HSDPA, 0), U3G_DEV(YISO, C893, 0), /* Autoinstallers */ U3G_DEV(NOVATEL, ZEROCD, U3GINIT_SCSIEJECT), U3G_DEV(OPTION, GTICON322, U3GINIT_REZERO), U3G_DEV(QUALCOMMINC, ZTE_STOR, U3GINIT_ZTESTOR), U3G_DEV(QUALCOMMINC, ZTE_STOR2, U3GINIT_SCSIEJECT), U3G_DEV(QUANTA, Q101_STOR, U3GINIT_SCSIEJECT), U3G_DEV(SIERRA, TRUINSTALL, U3GINIT_SIERRA), #undef U3G_DEV }; static int u3g_sierra_init(struct usb_device *udev) { struct usb_device_request req; req.bmRequestType = UT_VENDOR; req.bRequest = UR_SET_INTERFACE; USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP); USETW(req.wIndex, UHF_PORT_CONNECTION); USETW(req.wLength, 0); if (usbd_do_request_flags(udev, NULL, &req, NULL, 0, NULL, USB_MS_HZ)) { /* ignore any errors */ } return (0); } static int u3g_huawei_init(struct usb_device *udev) { struct usb_device_request req; req.bmRequestType = UT_WRITE_DEVICE; req.bRequest = UR_SET_FEATURE; USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP); USETW(req.wIndex, UHF_PORT_SUSPEND); USETW(req.wLength, 0); if (usbd_do_request_flags(udev, NULL, &req, NULL, 0, NULL, USB_MS_HZ)) { /* ignore any errors */ } return (0); } static void u3g_sael_m460_init(struct usb_device *udev) { static const uint8_t setup[][24] = { { 0x41, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x13, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, { 0xc1, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x40, 0x02 }, { 0xc1, 0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00 }, { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 }, { 0xc1, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, { 0x41, 0x01, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x19, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x13 }, { 0x41, 0x13, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00 }, { 0x41, 0x12, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x01, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00 }, { 0x41, 0x19, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x13 }, { 0x41, 0x13, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x09, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00 }, { 0x41, 0x07, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00 }, }; struct usb_device_request req; usb_error_t err; uint16_t len; uint8_t buf[0x300]; uint8_t n; DPRINTFN(1, "\n"); if (usbd_req_set_alt_interface_no(udev, NULL, 0, 0)) { DPRINTFN(0, "Alt setting 0 failed\n"); return; } for (n = 0; n != (sizeof(setup)/sizeof(setup[0])); n++) { memcpy(&req, setup[n], sizeof(req)); len = UGETW(req.wLength); if (req.bmRequestType & UE_DIR_IN) { if (len > sizeof(buf)) { DPRINTFN(0, "too small buffer\n"); continue; } err = usbd_do_request(udev, NULL, &req, buf); } else { if (len > (sizeof(setup[0]) - 8)) { DPRINTFN(0, "too small buffer\n"); continue; } err = usbd_do_request(udev, NULL, &req, __DECONST(uint8_t *, &setup[n][8])); } if (err) { DPRINTFN(1, "request %u failed\n", (unsigned int)n); /* * Some of the requests will fail. Stop doing * requests when we are getting timeouts so * that we don't block the explore/attach * thread forever. */ if (err == USB_ERR_TIMEOUT) break; } } } /* * The following function handles 3G modem devices (E220, Mobile, * etc.) with auto-install flash disks for Windows/MacOSX on the first * interface. After some command or some delay they change appearance * to a modem. */ static void u3g_test_autoinst(void *arg, struct usb_device *udev, struct usb_attach_arg *uaa) { struct usb_interface *iface; struct usb_interface_descriptor *id; int error; unsigned long method; if (uaa->dev_state != UAA_DEV_READY) return; iface = usbd_get_iface(udev, 0); if (iface == NULL) return; id = iface->idesc; if (id == NULL || id->bInterfaceClass != UICLASS_MASS) return; if (usb_test_quirk(uaa, UQ_MSC_EJECT_HUAWEI)) method = U3GINIT_HUAWEI; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_SIERRA)) method = U3GINIT_SIERRA; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_SCSIEJECT)) method = U3GINIT_SCSIEJECT; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_REZERO)) method = U3GINIT_REZERO; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_ZTESTOR)) method = U3GINIT_ZTESTOR; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_CMOTECH)) method = U3GINIT_CMOTECH; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_WAIT)) method = U3GINIT_WAIT; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_HUAWEISCSI)) method = U3GINIT_HUAWEISCSI; else if (usb_test_quirk(uaa, UQ_MSC_EJECT_TCT)) method = U3GINIT_TCT; else if (usbd_lookup_id_by_uaa(u3g_devs, sizeof(u3g_devs), uaa) == 0) method = USB_GET_DRIVER_INFO(uaa); else return; /* no device match */ if (bootverbose) { printf("Ejecting %s %s using method %ld\n", usb_get_manufacturer(udev), usb_get_product(udev), method); } switch (method) { case U3GINIT_HUAWEI: error = u3g_huawei_init(udev); break; case U3GINIT_HUAWEISCSI: error = usb_msc_eject(udev, 0, MSC_EJECT_HUAWEI); break; case U3GINIT_SCSIEJECT: error = usb_msc_eject(udev, 0, MSC_EJECT_STOPUNIT); break; case U3GINIT_REZERO: error = usb_msc_eject(udev, 0, MSC_EJECT_REZERO); break; case U3GINIT_ZTESTOR: error = usb_msc_eject(udev, 0, MSC_EJECT_STOPUNIT); error |= usb_msc_eject(udev, 0, MSC_EJECT_ZTESTOR); break; case U3GINIT_CMOTECH: error = usb_msc_eject(udev, 0, MSC_EJECT_CMOTECH); break; case U3GINIT_TCT: error = usb_msc_eject(udev, 0, MSC_EJECT_TCT); break; case U3GINIT_SIERRA: error = u3g_sierra_init(udev); break; case U3GINIT_WAIT: /* Just pretend we ejected, the card will timeout */ error = 0; break; default: /* no 3G eject quirks */ error = EOPNOTSUPP; break; } if (error == 0) { /* success, mark the udev as disappearing */ uaa->dev_state = UAA_DEV_EJECTING; } } static int u3g_driver_loaded(struct module *mod, int what, void *arg) { switch (what) { case MOD_LOAD: /* register our autoinstall handler */ u3g_etag = EVENTHANDLER_REGISTER(usb_dev_configured, u3g_test_autoinst, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(usb_dev_configured, u3g_etag); break; default: return (EOPNOTSUPP); } return (0); } static int u3g_probe(device_t self) { struct usb_attach_arg *uaa = device_get_ivars(self); if (uaa->usb_mode != USB_MODE_HOST) { return (ENXIO); } if (uaa->info.bConfigIndex != U3G_CONFIG_INDEX) { return (ENXIO); } if (uaa->info.bInterfaceClass != UICLASS_VENDOR) { return (ENXIO); } return (usbd_lookup_id_by_uaa(u3g_devs, sizeof(u3g_devs), uaa)); } static int u3g_attach(device_t dev) { struct usb_config u3g_config_tmp[U3G_N_TRANSFER]; struct usb_attach_arg *uaa = device_get_ivars(dev); struct u3g_softc *sc = device_get_softc(dev); struct usb_interface *iface; struct usb_interface_descriptor *id; uint32_t iface_valid; int error, type, nports; int ep, n; uint8_t i; DPRINTF("sc=%p\n", sc); type = USB_GET_DRIVER_INFO(uaa); if (type == U3GINIT_SAEL_M460 || usb_test_quirk(uaa, UQ_MSC_EJECT_SAEL_M460)) { u3g_sael_m460_init(uaa->device); } /* copy in USB config */ for (n = 0; n != U3G_N_TRANSFER; n++) u3g_config_tmp[n] = u3g_config[n]; device_set_usb_desc(dev); mtx_init(&sc->sc_mtx, "u3g", NULL, MTX_DEF); sc->sc_udev = uaa->device; /* Claim all interfaces on the device */ iface_valid = 0; for (i = uaa->info.bIfaceIndex; i < USB_IFACE_MAX; i++) { iface = usbd_get_iface(uaa->device, i); if (iface == NULL) break; id = usbd_get_interface_descriptor(iface); if (id == NULL || id->bInterfaceClass != UICLASS_VENDOR) continue; usbd_set_parent_iface(uaa->device, i, uaa->info.bIfaceIndex); iface_valid |= (1<device, &i, sc->sc_xfer[nports], u3g_config_tmp, U3G_N_TRANSFER, &sc->sc_ucom[nports], &sc->sc_mtx); if (error) { /* next interface */ i++; ep = 0; continue; } /* set stall by default */ mtx_lock(&sc->sc_mtx); usbd_xfer_set_stall(sc->sc_xfer[nports][U3G_BULK_WR]); usbd_xfer_set_stall(sc->sc_xfer[nports][U3G_BULK_RD]); mtx_unlock(&sc->sc_mtx); nports++; /* found one port */ ep++; if (nports == U3G_MAXPORTS) break; } if (nports == 0) { device_printf(dev, "no ports found\n"); goto detach; } sc->sc_numports = nports; error = ucom_attach(&sc->sc_super_ucom, sc->sc_ucom, sc->sc_numports, sc, &u3g_callback, &sc->sc_mtx); if (error) { DPRINTF("ucom_attach failed\n"); goto detach; } ucom_set_pnpinfo_usb(&sc->sc_super_ucom, dev); device_printf(dev, "Found %u port%s.\n", sc->sc_numports, sc->sc_numports > 1 ? "s":""); return (0); detach: u3g_detach(dev); return (ENXIO); } static int u3g_detach(device_t dev) { struct u3g_softc *sc = device_get_softc(dev); uint8_t subunit; DPRINTF("sc=%p\n", sc); /* NOTE: It is not dangerous to detach more ports than attached! */ ucom_detach(&sc->sc_super_ucom, sc->sc_ucom); for (subunit = 0; subunit != U3G_MAXPORTS; subunit++) usbd_transfer_unsetup(sc->sc_xfer[subunit], U3G_N_TRANSFER); mtx_destroy(&sc->sc_mtx); return (0); } static void u3g_start_read(struct ucom_softc *ucom) { struct u3g_softc *sc = ucom->sc_parent; /* start read endpoint */ usbd_transfer_start(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_RD]); return; } static void u3g_stop_read(struct ucom_softc *ucom) { struct u3g_softc *sc = ucom->sc_parent; /* stop read endpoint */ usbd_transfer_stop(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_RD]); return; } static void u3g_start_write(struct ucom_softc *ucom) { struct u3g_softc *sc = ucom->sc_parent; usbd_transfer_start(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_WR]); return; } static void u3g_stop_write(struct ucom_softc *ucom) { struct u3g_softc *sc = ucom->sc_parent; usbd_transfer_stop(sc->sc_xfer[ucom->sc_subunit][U3G_BULK_WR]); return; } static void u3g_write_callback(struct usb_xfer *xfer, usb_error_t error) { struct ucom_softc *ucom = usbd_xfer_softc(xfer); struct usb_page_cache *pc; uint32_t actlen; switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: case USB_ST_SETUP: tr_setup: pc = usbd_xfer_get_frame(xfer, 0); if (ucom_get_data(ucom, pc, 0, U3G_BSIZE, &actlen)) { usbd_xfer_set_frame_len(xfer, 0, actlen); usbd_transfer_submit(xfer); } break; default: /* Error */ if (error != USB_ERR_CANCELLED) { /* do a builtin clear-stall */ usbd_xfer_set_stall(xfer); goto tr_setup; } break; } return; } static void u3g_read_callback(struct usb_xfer *xfer, usb_error_t error) { struct ucom_softc *ucom = usbd_xfer_softc(xfer); struct usb_page_cache *pc; int actlen; usbd_xfer_status(xfer, &actlen, NULL, NULL, NULL); switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: pc = usbd_xfer_get_frame(xfer, 0); ucom_put_data(ucom, pc, 0, actlen); case USB_ST_SETUP: tr_setup: usbd_xfer_set_frame_len(xfer, 0, usbd_xfer_max_len(xfer)); usbd_transfer_submit(xfer); break; default: /* Error */ if (error != USB_ERR_CANCELLED) { /* do a builtin clear-stall */ usbd_xfer_set_stall(xfer); goto tr_setup; } break; } return; } Index: projects/binutils-2.17/sys/dev/usb/usb_bus.h =================================================================== --- projects/binutils-2.17/sys/dev/usb/usb_bus.h (revision 215829) +++ projects/binutils-2.17/sys/dev/usb/usb_bus.h (revision 215830) @@ -1,114 +1,114 @@ /* $FreeBSD$ */ /*- * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _USB_BUS_H_ #define _USB_BUS_H_ /* * The following structure defines the USB explore message sent to the USB * explore process. */ struct usb_bus_msg { struct usb_proc_msg hdr; struct usb_bus *bus; }; /* * The following structure defines the USB statistics structure. */ struct usb_bus_stat { uint32_t uds_requests[4]; }; /* * The following structure defines an USB BUS. There is one USB BUS * for every Host or Device controller. */ struct usb_bus { struct usb_bus_stat stats_err; struct usb_bus_stat stats_ok; struct root_hold_token *bus_roothold; /* * There are two callback processes. One for Giant locked * callbacks. One for non-Giant locked callbacks. This should * avoid congestion and reduce response time in most cases. */ struct usb_process giant_callback_proc; struct usb_process non_giant_callback_proc; /* Explore process */ struct usb_process explore_proc; /* Control request process */ struct usb_process control_xfer_proc; struct usb_bus_msg explore_msg[2]; struct usb_bus_msg detach_msg[2]; struct usb_bus_msg attach_msg[2]; /* * This mutex protects the USB hardware: */ struct mtx bus_mtx; struct usb_xfer_queue intr_q; struct usb_callout power_wdog; /* power management */ device_t parent; device_t bdev; /* filled by HC driver */ #if USB_HAVE_BUSDMA struct usb_dma_parent_tag dma_parent_tag[1]; struct usb_dma_tag dma_tags[USB_BUS_DMA_TAG_MAX]; #endif struct usb_bus_methods *methods; /* filled by HC driver */ struct usb_device **devices; - struct usbpf_if *uif; /* USB Packet Filter */ + struct ifnet *ifp; /* only for USB Packet Filter */ usb_power_mask_t hw_power_state; /* see USB_HW_POWER_XXX */ usb_size_t uframe_usage[USB_HS_MICRO_FRAMES_MAX]; uint16_t isoc_time_last; /* in milliseconds */ uint8_t alloc_failed; /* Set if memory allocation failed. */ uint8_t driver_added_refcount; /* Current driver generation count */ enum usb_revision usbrev; /* USB revision. See "USB_REV_XXX". */ uint8_t devices_max; /* maximum number of USB devices */ uint8_t do_probe; /* set if USB BUS should be re-probed */ /* * The scratch area can only be used inside the explore thread * belonging to the give serial bus. */ union { struct usb_hw_ep_scratch hw_ep_scratch[1]; struct usb_temp_setup temp_setup[1]; uint8_t data[255]; } scratch[1]; }; #endif /* _USB_BUS_H_ */ Index: projects/binutils-2.17/sys/dev/usb/usb_pf.c =================================================================== --- projects/binutils-2.17/sys/dev/usb/usb_pf.c (revision 215829) +++ projects/binutils-2.17/sys/dev/usb/usb_pf.c (revision 215830) @@ -1,1862 +1,247 @@ /*- * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include +#include +#include #include #include #include #include #include #include #include #include #include #include -/* - * All usbpf implementations are extracted from bpf(9) APIs and it's - * specialized for USB packet filtering between the driver and the host - * controller. - */ - -MALLOC_DEFINE(M_USBPF, "USBPktFilter", "USB Packet Filter"); - -/* - * Rotate the packet buffers in descriptor ud. Move the store buffer into the - * hold slot, and the free buffer ino the store slot. Zero the length of the - * new store buffer. Descriptor lock should be held. - */ -#define USBPF_ROTATE_BUFFERS(ud) do { \ - (ud)->ud_hbuf = (ud)->ud_sbuf; \ - (ud)->ud_hlen = (ud)->ud_slen; \ - (ud)->ud_sbuf = (ud)->ud_fbuf; \ - (ud)->ud_slen = 0; \ - (ud)->ud_fbuf = NULL; \ - usbpf_bufheld(ud); \ -} while (0) - -#ifndef __i386__ -#define USBPF_ALIGN -#endif - -#ifndef USBPF_ALIGN -#define USBPF_EXTRACT_SHORT(p) ((u_int16_t)ntohs(*(u_int16_t *)p)) -#define USBPF_EXTRACT_LONG(p) (ntohl(*(u_int32_t *)p)) -#else -#define USBPF_EXTRACT_SHORT(p) \ - ((u_int16_t) \ - ((u_int16_t)*((u_char *)p+0)<<8| \ - (u_int16_t)*((u_char *)p+1)<<0)) -#define USBPF_EXTRACT_LONG(p) \ - ((u_int32_t)*((u_char *)p+0)<<24| \ - (u_int32_t)*((u_char *)p+1)<<16| \ - (u_int32_t)*((u_char *)p+2)<<8| \ - (u_int32_t)*((u_char *)p+3)<<0) -#endif - -/* - * Number of scratch memory words (for USBPF_LD|USBPF_MEM and USBPF_ST). - */ -#define USBPF_MEMWORDS 16 - -/* Values for ud_state */ -#define USBPF_IDLE 0 /* no select in progress */ -#define USBPF_WAITING 1 /* waiting for read timeout in select */ -#define USBPF_TIMED_OUT 2 /* read timeout has expired in select */ - -#define PRIUSB 26 /* interruptible */ - -/* Frame directions */ -enum usbpf_direction { - USBPF_D_IN, /* See incoming frames */ - USBPF_D_INOUT, /* See incoming and outgoing frames */ - USBPF_D_OUT /* See outgoing frames */ -}; - -static void usbpf_append_bytes(struct usbpf_d *, caddr_t, u_int, void *, - u_int); -static void usbpf_attachd(struct usbpf_d *, struct usbpf_if *); -static void usbpf_detachd(struct usbpf_d *); -static int usbpf_canfreebuf(struct usbpf_d *); -static void usbpf_buf_reclaimed(struct usbpf_d *); -static int usbpf_canwritebuf(struct usbpf_d *); - -static d_open_t usbpf_open; -static d_read_t usbpf_read; -static d_write_t usbpf_write; -static d_ioctl_t usbpf_ioctl; -static d_poll_t usbpf_poll; -static d_kqfilter_t usbpf_kqfilter; - -static struct cdevsw usbpf_cdevsw = { - .d_version = D_VERSION, - .d_open = usbpf_open, - .d_read = usbpf_read, - .d_write = usbpf_write, - .d_ioctl = usbpf_ioctl, - .d_poll = usbpf_poll, - .d_name = "usbpf", - .d_kqfilter = usbpf_kqfilter, -}; - -static LIST_HEAD(, usbpf_if) usbpf_iflist; -static struct mtx usbpf_mtx; /* global lock */ -static int usbpf_uifd_cnt; - -static int usbpf_bufsize = 4096; -#define USBPF_MINBUFSIZE 32 -#define USBPF_MAXBUFSIZE 0x80000 -static int usbpf_maxbufsize = USBPF_MAXBUFSIZE; -#define USBPF_MAXINSNS 512 -static int usbpf_maxinsns = USBPF_MAXINSNS; - -static void -usbpf_buffer_init(struct usbpf_d *ud) +void +usbpf_attach(struct usb_bus *ubus) { + struct ifnet *ifp; - ud->ud_bufsize = usbpf_bufsize; -} + ifp = ubus->ifp = if_alloc(IFT_USB); + if_initname(ifp, "usbus", device_get_unit(ubus->bdev)); + if_attach(ifp); -/* - * Free USBPF kernel buffers on device close. - */ -static void -usbpf_buffer_free(struct usbpf_d *ud) -{ + KASSERT(sizeof(struct usbpf_pkthdr) == USBPF_HDR_LEN, + ("wrong USB pf header length (%zd)", sizeof(struct usbpf_pkthdr))); - if (ud->ud_sbuf != NULL) - free(ud->ud_sbuf, M_USBPF); - if (ud->ud_hbuf != NULL) - free(ud->ud_hbuf, M_USBPF); - if (ud->ud_fbuf != NULL) - free(ud->ud_fbuf, M_USBPF); - -#ifdef INVARIANTS - ud->ud_sbuf = ud->ud_hbuf = ud->ud_fbuf = (caddr_t)~0; -#endif -} - -static void -usbpf_buffer_alloc(struct usbpf_d *ud) -{ - - KASSERT(ud->ud_fbuf == NULL, ("%s: ud_fbuf != NULL", __func__)); - KASSERT(ud->ud_sbuf == NULL, ("%s: ud_sbuf != NULL", __func__)); - KASSERT(ud->ud_hbuf == NULL, ("%s: ud_hbuf != NULL", __func__)); - - ud->ud_fbuf = (caddr_t)malloc(ud->ud_bufsize, M_USBPF, M_WAITOK); - ud->ud_sbuf = (caddr_t)malloc(ud->ud_bufsize, M_USBPF, M_WAITOK); - ud->ud_hbuf = NULL; - ud->ud_slen = 0; - ud->ud_hlen = 0; -} - -/* - * Copy buffer storage to user space in read(). - */ -static int -usbpf_buffer_uiomove(struct usbpf_d *ud, caddr_t buf, u_int len, - struct uio *uio) -{ - - return (uiomove(buf, len, uio)); -} - -/* - * Simple data copy to the current kernel buffer. - */ -static void -usbpf_buffer_append_bytes(struct usbpf_d *ud, caddr_t buf, u_int offset, - void *src, u_int len) -{ - u_char *src_bytes; - - src_bytes = (u_char *)src; - bcopy(src_bytes, buf + offset, len); -} - -/* - * Allocate or resize buffers. - */ -static int -usbpf_buffer_ioctl_sblen(struct usbpf_d *ud, u_int *i) -{ - u_int size; - - USBPFD_LOCK(ud); - if (ud->ud_bif != NULL) { - USBPFD_UNLOCK(ud); - return (EINVAL); - } - size = *i; - if (size > usbpf_maxbufsize) - *i = size = usbpf_maxbufsize; - else if (size < USBPF_MINBUFSIZE) - *i = size = USBPF_MINBUFSIZE; - ud->ud_bufsize = size; - USBPFD_UNLOCK(ud); - return (0); -} - -static const u_short usbpf_code_map[] = { - 0x10ff, /* 0x00-0x0f: 1111111100001000 */ - 0x3070, /* 0x10-0x1f: 0000111000001100 */ - 0x3131, /* 0x20-0x2f: 1000110010001100 */ - 0x3031, /* 0x30-0x3f: 1000110000001100 */ - 0x3131, /* 0x40-0x4f: 1000110010001100 */ - 0x1011, /* 0x50-0x5f: 1000100000001000 */ - 0x1013, /* 0x60-0x6f: 1100100000001000 */ - 0x1010, /* 0x70-0x7f: 0000100000001000 */ - 0x0093, /* 0x80-0x8f: 1100100100000000 */ - 0x0000, /* 0x90-0x9f: 0000000000000000 */ - 0x0000, /* 0xa0-0xaf: 0000000000000000 */ - 0x0002, /* 0xb0-0xbf: 0100000000000000 */ - 0x0000, /* 0xc0-0xcf: 0000000000000000 */ - 0x0000, /* 0xd0-0xdf: 0000000000000000 */ - 0x0000, /* 0xe0-0xef: 0000000000000000 */ - 0x0000 /* 0xf0-0xff: 0000000000000000 */ -}; - -#define USBPF_VALIDATE_CODE(c) \ - ((c) <= 0xff && (usbpf_code_map[(c) >> 4] & (1 << ((c) & 0xf))) != 0) - -/* - * Return true if the 'fcode' is a valid filter program. - * The constraints are that each jump be forward and to a valid - * code. The code must terminate with either an accept or reject. - * - * The kernel needs to be able to verify an application's filter code. - * Otherwise, a bogus program could easily crash the system. - */ -static int -usbpf_validate(const struct usbpf_insn *f, int len) -{ - register int i; - register const struct usbpf_insn *p; - - /* Do not accept negative length filter. */ - if (len < 0) - return (0); - - /* An empty filter means accept all. */ - if (len == 0) - return (1); - - for (i = 0; i < len; ++i) { - p = &f[i]; - /* - * Check that the code is valid. - */ - if (!USBPF_VALIDATE_CODE(p->code)) - return (0); - /* - * Check that that jumps are forward, and within - * the code block. - */ - if (USBPF_CLASS(p->code) == USBPF_JMP) { - register u_int offset; - - if (p->code == (USBPF_JMP|USBPF_JA)) - offset = p->k; - else - offset = p->jt > p->jf ? p->jt : p->jf; - if (offset >= (u_int)(len - i) - 1) - return (0); - continue; - } - /* - * Check that memory operations use valid addresses. - */ - if (p->code == USBPF_ST || p->code == USBPF_STX || - p->code == (USBPF_LD|USBPF_MEM) || - p->code == (USBPF_LDX|USBPF_MEM)) { - if (p->k >= USBPF_MEMWORDS) - return (0); - continue; - } - /* - * Check for constant division by 0. - */ - if (p->code == (USBPF_ALU|USBPF_DIV|USBPF_K) && p->k == 0) - return (0); - } - return (USBPF_CLASS(f[len - 1].code) == USBPF_RET); -} - -#ifdef _KERNEL -#define MINDEX(m, k) \ -{ \ - register int len = m->m_len; \ - \ - while (k >= len) { \ - k -= len; \ - m = m->m_next; \ - if (m == 0) \ - return (0); \ - len = m->m_len; \ - } \ -} - -static u_int16_t m_xhalf(struct mbuf *m, usbpf_u_int32 k, int *err); -static u_int32_t m_xword(struct mbuf *m, usbpf_u_int32 k, int *err); - -static u_int32_t -m_xword(struct mbuf *m, usbpf_u_int32 k, int *err) -{ - size_t len; - u_char *cp, *np; - struct mbuf *m0; - - len = m->m_len; - while (k >= len) { - k -= len; - m = m->m_next; - if (m == 0) - goto bad; - len = m->m_len; - } - cp = mtod(m, u_char *) + k; - if (len - k >= 4) { - *err = 0; - return (USBPF_EXTRACT_LONG(cp)); - } - m0 = m->m_next; - if (m0 == 0 || m0->m_len + len - k < 4) - goto bad; - *err = 0; - np = mtod(m0, u_char *); - switch (len - k) { - case 1: - return (((u_int32_t)cp[0] << 24) | - ((u_int32_t)np[0] << 16) | - ((u_int32_t)np[1] << 8) | - (u_int32_t)np[2]); - - case 2: - return (((u_int32_t)cp[0] << 24) | - ((u_int32_t)cp[1] << 16) | - ((u_int32_t)np[0] << 8) | - (u_int32_t)np[1]); - - default: - return (((u_int32_t)cp[0] << 24) | - ((u_int32_t)cp[1] << 16) | - ((u_int32_t)cp[2] << 8) | - (u_int32_t)np[0]); - } - bad: - *err = 1; - return (0); -} - -static u_int16_t -m_xhalf(struct mbuf *m, usbpf_u_int32 k, int *err) -{ - size_t len; - u_char *cp; - struct mbuf *m0; - - len = m->m_len; - while (k >= len) { - k -= len; - m = m->m_next; - if (m == 0) - goto bad; - len = m->m_len; - } - cp = mtod(m, u_char *) + k; - if (len - k >= 2) { - *err = 0; - return (USBPF_EXTRACT_SHORT(cp)); - } - m0 = m->m_next; - if (m0 == 0) - goto bad; - *err = 0; - return ((cp[0] << 8) | mtod(m0, u_char *)[0]); - bad: - *err = 1; - return (0); -} -#endif - -/* - * Execute the filter program starting at pc on the packet p - * wirelen is the length of the original packet - * buflen is the amount of data present - */ -static u_int -usbpf_filter(const struct usbpf_insn *pc, u_char *p, u_int wirelen, - u_int buflen) -{ - u_int32_t A = 0, X = 0; - usbpf_u_int32 k; - u_int32_t mem[USBPF_MEMWORDS]; - /* - * XXX temporarily the filter system is disabled because currently it - * could not handle the some machine code properly that leads to - * kernel crash by invalid usage. + * XXX According to the specification of DLT_USB, it indicates packets + * beginning with USB setup header. But not sure all packets would be. */ - return ((u_int)-1); + bpfattach(ifp, DLT_USB, USBPF_HDR_LEN); - if (pc == NULL) - /* - * No filter means accept all. - */ - return ((u_int)-1); - - --pc; - while (1) { - ++pc; - switch (pc->code) { - default: -#ifdef _KERNEL - return (0); -#else - abort(); -#endif - - case USBPF_RET|USBPF_K: - return ((u_int)pc->k); - - case USBPF_RET|USBPF_A: - return ((u_int)A); - - case USBPF_LD|USBPF_W|USBPF_ABS: - k = pc->k; - if (k > buflen || sizeof(int32_t) > buflen - k) { -#ifdef _KERNEL - int merr; - - if (buflen != 0) - return (0); - A = m_xword((struct mbuf *)p, k, &merr); - if (merr != 0) - return (0); - continue; -#else - return (0); -#endif - } -#ifdef USBPF_ALIGN - if (((intptr_t)(p + k) & 3) != 0) - A = USBPF_EXTRACT_LONG(&p[k]); - else -#endif - A = ntohl(*(int32_t *)(p + k)); - continue; - - case USBPF_LD|USBPF_H|USBPF_ABS: - k = pc->k; - if (k > buflen || sizeof(int16_t) > buflen - k) { -#ifdef _KERNEL - int merr; - - if (buflen != 0) - return (0); - A = m_xhalf((struct mbuf *)p, k, &merr); - continue; -#else - return (0); -#endif - } - A = USBPF_EXTRACT_SHORT(&p[k]); - continue; - - case USBPF_LD|USBPF_B|USBPF_ABS: - k = pc->k; - if (k >= buflen) { -#ifdef _KERNEL - struct mbuf *m; - - if (buflen != 0) - return (0); - m = (struct mbuf *)p; - MINDEX(m, k); - A = mtod(m, u_char *)[k]; - continue; -#else - return (0); -#endif - } - A = p[k]; - continue; - - case USBPF_LD|USBPF_W|USBPF_LEN: - A = wirelen; - continue; - - case USBPF_LDX|USBPF_W|USBPF_LEN: - X = wirelen; - continue; - - case USBPF_LD|USBPF_W|USBPF_IND: - k = X + pc->k; - if (pc->k > buflen || X > buflen - pc->k || - sizeof(int32_t) > buflen - k) { -#ifdef _KERNEL - int merr; - - if (buflen != 0) - return (0); - A = m_xword((struct mbuf *)p, k, &merr); - if (merr != 0) - return (0); - continue; -#else - return (0); -#endif - } -#ifdef USBPF_ALIGN - if (((intptr_t)(p + k) & 3) != 0) - A = USBPF_EXTRACT_LONG(&p[k]); - else -#endif - A = ntohl(*(int32_t *)(p + k)); - continue; - - case USBPF_LD|USBPF_H|USBPF_IND: - k = X + pc->k; - if (X > buflen || pc->k > buflen - X || - sizeof(int16_t) > buflen - k) { -#ifdef _KERNEL - int merr; - - if (buflen != 0) - return (0); - A = m_xhalf((struct mbuf *)p, k, &merr); - if (merr != 0) - return (0); - continue; -#else - return (0); -#endif - } - A = USBPF_EXTRACT_SHORT(&p[k]); - continue; - - case USBPF_LD|USBPF_B|USBPF_IND: - k = X + pc->k; - if (pc->k >= buflen || X >= buflen - pc->k) { -#ifdef _KERNEL - struct mbuf *m; - - if (buflen != 0) - return (0); - m = (struct mbuf *)p; - MINDEX(m, k); - A = mtod(m, u_char *)[k]; - continue; -#else - return (0); -#endif - } - A = p[k]; - continue; - - case USBPF_LDX|USBPF_MSH|USBPF_B: - k = pc->k; - if (k >= buflen) { -#ifdef _KERNEL - register struct mbuf *m; - - if (buflen != 0) - return (0); - m = (struct mbuf *)p; - MINDEX(m, k); - X = (mtod(m, u_char *)[k] & 0xf) << 2; - continue; -#else - return (0); -#endif - } - X = (p[pc->k] & 0xf) << 2; - continue; - - case USBPF_LD|USBPF_IMM: - A = pc->k; - continue; - - case USBPF_LDX|USBPF_IMM: - X = pc->k; - continue; - - case USBPF_LD|USBPF_MEM: - A = mem[pc->k]; - continue; - - case USBPF_LDX|USBPF_MEM: - X = mem[pc->k]; - continue; - - case USBPF_ST: - mem[pc->k] = A; - continue; - - case USBPF_STX: - mem[pc->k] = X; - continue; - - case USBPF_JMP|USBPF_JA: - pc += pc->k; - continue; - - case USBPF_JMP|USBPF_JGT|USBPF_K: - pc += (A > pc->k) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JGE|USBPF_K: - pc += (A >= pc->k) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JEQ|USBPF_K: - pc += (A == pc->k) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JSET|USBPF_K: - pc += (A & pc->k) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JGT|USBPF_X: - pc += (A > X) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JGE|USBPF_X: - pc += (A >= X) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JEQ|USBPF_X: - pc += (A == X) ? pc->jt : pc->jf; - continue; - - case USBPF_JMP|USBPF_JSET|USBPF_X: - pc += (A & X) ? pc->jt : pc->jf; - continue; - - case USBPF_ALU|USBPF_ADD|USBPF_X: - A += X; - continue; - - case USBPF_ALU|USBPF_SUB|USBPF_X: - A -= X; - continue; - - case USBPF_ALU|USBPF_MUL|USBPF_X: - A *= X; - continue; - - case USBPF_ALU|USBPF_DIV|USBPF_X: - if (X == 0) - return (0); - A /= X; - continue; - - case USBPF_ALU|USBPF_AND|USBPF_X: - A &= X; - continue; - - case USBPF_ALU|USBPF_OR|USBPF_X: - A |= X; - continue; - - case USBPF_ALU|USBPF_LSH|USBPF_X: - A <<= X; - continue; - - case USBPF_ALU|USBPF_RSH|USBPF_X: - A >>= X; - continue; - - case USBPF_ALU|USBPF_ADD|USBPF_K: - A += pc->k; - continue; - - case USBPF_ALU|USBPF_SUB|USBPF_K: - A -= pc->k; - continue; - - case USBPF_ALU|USBPF_MUL|USBPF_K: - A *= pc->k; - continue; - - case USBPF_ALU|USBPF_DIV|USBPF_K: - A /= pc->k; - continue; - - case USBPF_ALU|USBPF_AND|USBPF_K: - A &= pc->k; - continue; - - case USBPF_ALU|USBPF_OR|USBPF_K: - A |= pc->k; - continue; - - case USBPF_ALU|USBPF_LSH|USBPF_K: - A <<= pc->k; - continue; - - case USBPF_ALU|USBPF_RSH|USBPF_K: - A >>= pc->k; - continue; - - case USBPF_ALU|USBPF_NEG: - A = -A; - continue; - - case USBPF_MISC|USBPF_TAX: - X = A; - continue; - - case USBPF_MISC|USBPF_TXA: - A = X; - continue; - } - } -} - -static void -usbpf_free(struct usbpf_d *ud) -{ - - switch (ud->ud_bufmode) { - case USBPF_BUFMODE_BUFFER: - return (usbpf_buffer_free(ud)); - default: - panic("usbpf_buf_free"); - } -} - -/* - * Notify the buffer model that a buffer has moved into the hold position. - */ -static void -usbpf_bufheld(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); -} - -/* - * Free buffers currently in use by a descriptor. - * Called on close. - */ -static void -usbpf_freed(struct usbpf_d *ud) -{ - - /* - * We don't need to lock out interrupts since this descriptor has - * been detached from its interface and it yet hasn't been marked - * free. - */ - usbpf_free(ud); - if (ud->ud_rfilter != NULL) - free((caddr_t)ud->ud_rfilter, M_USBPF); - if (ud->ud_wfilter != NULL) - free((caddr_t)ud->ud_wfilter, M_USBPF); - mtx_destroy(&ud->ud_mtx); -} - -/* - * Close the descriptor by detaching it from its interface, - * deallocating its buffers, and marking it free. - */ -static void -usbpf_dtor(void *data) -{ - struct usbpf_d *ud = data; - - USBPFD_LOCK(ud); - if (ud->ud_state == USBPF_WAITING) - callout_stop(&ud->ud_callout); - ud->ud_state = USBPF_IDLE; - USBPFD_UNLOCK(ud); - funsetown(&ud->ud_sigio); - mtx_lock(&usbpf_mtx); - if (ud->ud_bif) - usbpf_detachd(ud); - mtx_unlock(&usbpf_mtx); - selwakeuppri(&ud->ud_sel, PRIUSB); - knlist_destroy(&ud->ud_sel.si_note); - callout_drain(&ud->ud_callout); - usbpf_freed(ud); - free(ud, M_USBPF); -} - -/* - * Open device. Returns ENXIO for illegal minor device number, - * EBUSY if file is open by another process. - */ -/* ARGSUSED */ -static int -usbpf_open(struct cdev *dev, int flags, int fmt, struct thread *td) -{ - struct usbpf_d *ud; - int error; - - ud = malloc(sizeof(*ud), M_USBPF, M_WAITOK | M_ZERO); - error = devfs_set_cdevpriv(ud, usbpf_dtor); - if (error != 0) { - free(ud, M_USBPF); - return (error); - } - - usbpf_buffer_init(ud); - ud->ud_bufmode = USBPF_BUFMODE_BUFFER; - ud->ud_sig = SIGIO; - ud->ud_direction = USBPF_D_INOUT; - ud->ud_pid = td->td_proc->p_pid; - mtx_init(&ud->ud_mtx, devtoname(dev), "usbpf cdev lock", MTX_DEF); - callout_init_mtx(&ud->ud_callout, &ud->ud_mtx, 0); - knlist_init_mtx(&ud->ud_sel.si_note, &ud->ud_mtx); - - return (0); -} - -static int -usbpf_uiomove(struct usbpf_d *ud, caddr_t buf, u_int len, struct uio *uio) -{ - - if (ud->ud_bufmode != USBPF_BUFMODE_BUFFER) - return (EOPNOTSUPP); - return (usbpf_buffer_uiomove(ud, buf, len, uio)); -} - -/* - * usbpf_read - read next chunk of packets from buffers - */ -static int -usbpf_read(struct cdev *dev, struct uio *uio, int ioflag) -{ - struct usbpf_d *ud; - int error; - int non_block; - int timed_out; - - error = devfs_get_cdevpriv((void **)&ud); - if (error != 0) - return (error); - - /* - * Restrict application to use a buffer the same size as - * as kernel buffers. - */ - if (uio->uio_resid != ud->ud_bufsize) - return (EINVAL); - - non_block = ((ioflag & O_NONBLOCK) != 0); - - USBPFD_LOCK(ud); - ud->ud_pid = curthread->td_proc->p_pid; - if (ud->ud_bufmode != USBPF_BUFMODE_BUFFER) { - USBPFD_UNLOCK(ud); - return (EOPNOTSUPP); - } - if (ud->ud_state == USBPF_WAITING) - callout_stop(&ud->ud_callout); - timed_out = (ud->ud_state == USBPF_TIMED_OUT); - ud->ud_state = USBPF_IDLE; - /* - * If the hold buffer is empty, then do a timed sleep, which - * ends when the timeout expires or when enough packets - * have arrived to fill the store buffer. - */ - while (ud->ud_hbuf == NULL) { - if (ud->ud_slen != 0) { - /* - * A packet(s) either arrived since the previous - * read or arrived while we were asleep. - */ - if (ud->ud_immediate || non_block || timed_out) { - /* - * Rotate the buffers and return what's here - * if we are in immediate mode, non-blocking - * flag is set, or this descriptor timed out. - */ - USBPF_ROTATE_BUFFERS(ud); - break; - } - } - - /* - * No data is available, check to see if the usbpf device - * is still pointed at a real interface. If not, return - * ENXIO so that the userland process knows to rebind - * it before using it again. - */ - if (ud->ud_bif == NULL) { - USBPFD_UNLOCK(ud); - return (ENXIO); - } - - if (non_block) { - USBPFD_UNLOCK(ud); - return (EWOULDBLOCK); - } - error = msleep(ud, &ud->ud_mtx, PRIUSB|PCATCH, - "uff", ud->ud_rtout); - if (error == EINTR || error == ERESTART) { - USBPFD_UNLOCK(ud); - return (error); - } - if (error == EWOULDBLOCK) { - /* - * On a timeout, return what's in the buffer, - * which may be nothing. If there is something - * in the store buffer, we can rotate the buffers. - */ - if (ud->ud_hbuf) - /* - * We filled up the buffer in between - * getting the timeout and arriving - * here, so we don't need to rotate. - */ - break; - - if (ud->ud_slen == 0) { - USBPFD_UNLOCK(ud); - return (0); - } - USBPF_ROTATE_BUFFERS(ud); - break; - } - } - /* - * At this point, we know we have something in the hold slot. - */ - USBPFD_UNLOCK(ud); - - /* - * Move data from hold buffer into user space. - * We know the entire buffer is transferred since - * we checked above that the read buffer is usbpf_bufsize bytes. - * - * XXXRW: More synchronization needed here: what if a second thread - * issues a read on the same fd at the same time? Don't want this - * getting invalidated. - */ - error = usbpf_uiomove(ud, ud->ud_hbuf, ud->ud_hlen, uio); - - USBPFD_LOCK(ud); - ud->ud_fbuf = ud->ud_hbuf; - ud->ud_hbuf = NULL; - ud->ud_hlen = 0; - usbpf_buf_reclaimed(ud); - USBPFD_UNLOCK(ud); - - return (error); -} - -static int -usbpf_write(struct cdev *dev, struct uio *uio, int ioflag) -{ - - /* NOT IMPLEMENTED */ - return (ENOSYS); -} - -static int -usbpf_ioctl_sblen(struct usbpf_d *ud, u_int *i) -{ - - if (ud->ud_bufmode != USBPF_BUFMODE_BUFFER) - return (EOPNOTSUPP); - return (usbpf_buffer_ioctl_sblen(ud, i)); -} - -/* - * Reset a descriptor by flushing its packet buffer and clearing the receive - * and drop counts. This is doable for kernel-only buffers, but with - * zero-copy buffers, we can't write to (or rotate) buffers that are - * currently owned by userspace. It would be nice if we could encapsulate - * this logic in the buffer code rather than here. - */ -static void -usbpf_reset_d(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); - - if ((ud->ud_hbuf != NULL) && - (ud->ud_bufmode != USBPF_BUFMODE_ZBUF || usbpf_canfreebuf(ud))) { - /* Free the hold buffer. */ - ud->ud_fbuf = ud->ud_hbuf; - ud->ud_hbuf = NULL; - ud->ud_hlen = 0; - usbpf_buf_reclaimed(ud); - } - if (usbpf_canwritebuf(ud)) - ud->ud_slen = 0; - ud->ud_rcount = 0; - ud->ud_dcount = 0; - ud->ud_fcount = 0; - ud->ud_wcount = 0; - ud->ud_wfcount = 0; - ud->ud_wdcount = 0; - ud->ud_zcopy = 0; -} - -static int -usbpf_setif(struct usbpf_d *ud, struct usbpf_ifreq *ufr) -{ - struct usbpf_if *uif; - struct usb_bus *theywant; - - theywant = usb_bus_find(ufr->ufr_name); - if (theywant == NULL || theywant->uif == NULL) - return (ENXIO); - - uif = theywant->uif; - - switch (ud->ud_bufmode) { - case USBPF_BUFMODE_BUFFER: - if (ud->ud_sbuf == NULL) - usbpf_buffer_alloc(ud); - KASSERT(ud->ud_sbuf != NULL, ("%s: ud_sbuf == NULL", __func__)); - break; - - default: - panic("usbpf_setif: bufmode %d", ud->ud_bufmode); - } - if (uif != ud->ud_bif) { - if (ud->ud_bif) - /* - * Detach if attached to something else. - */ - usbpf_detachd(ud); - - usbpf_attachd(ud, uif); - } - USBPFD_LOCK(ud); - usbpf_reset_d(ud); - USBPFD_UNLOCK(ud); - return (0); -} - -/* - * Set d's packet filter program to fp. If this file already has a filter, - * free it and replace it. Returns EINVAL for bogus requests. - */ -static int -usbpf_setf(struct usbpf_d *ud, struct usbpf_program *fp, u_long cmd) -{ - struct usbpf_insn *fcode, *old; - u_int wfilter, flen, size; - - if (cmd == UIOCSETWF) { - old = ud->ud_wfilter; - wfilter = 1; - } else { - wfilter = 0; - old = ud->ud_rfilter; - } - if (fp->uf_insns == NULL) { - if (fp->uf_len != 0) - return (EINVAL); - USBPFD_LOCK(ud); - if (wfilter) - ud->ud_wfilter = NULL; - else { - ud->ud_rfilter = NULL; - if (cmd == UIOCSETF) - usbpf_reset_d(ud); - } - USBPFD_UNLOCK(ud); - if (old != NULL) - free((caddr_t)old, M_USBPF); - return (0); - } - flen = fp->uf_len; - if (flen > usbpf_maxinsns) - return (EINVAL); - - size = flen * sizeof(*fp->uf_insns); - fcode = (struct usbpf_insn *)malloc(size, M_USBPF, M_WAITOK); - if (copyin((caddr_t)fp->uf_insns, (caddr_t)fcode, size) == 0 && - usbpf_validate(fcode, (int)flen)) { - USBPFD_LOCK(ud); - if (wfilter) - ud->ud_wfilter = fcode; - else { - ud->ud_rfilter = fcode; - if (cmd == UIOCSETF) - usbpf_reset_d(ud); - } - USBPFD_UNLOCK(ud); - if (old != NULL) - free((caddr_t)old, M_USBPF); - - return (0); - } - free((caddr_t)fcode, M_USBPF); - return (EINVAL); -} - -static int -usbpf_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, - struct thread *td) -{ - struct usbpf_d *ud; - int error; - - error = devfs_get_cdevpriv((void **)&ud); - if (error != 0) - return (error); - - /* - * Refresh PID associated with this descriptor. - */ - USBPFD_LOCK(ud); - ud->ud_pid = td->td_proc->p_pid; - if (ud->ud_state == USBPF_WAITING) - callout_stop(&ud->ud_callout); - ud->ud_state = USBPF_IDLE; - USBPFD_UNLOCK(ud); - - if (ud->ud_locked == 1) { - switch (cmd) { - case UIOCGBLEN: - case UIOCSBLEN: - case UIOCVERSION: - break; - default: - return (EPERM); - } - } - - switch (cmd) { - - default: - error = EINVAL; - break; - - /* - * Get buffer len [for read()]. - */ - case UIOCGBLEN: - *(u_int *)addr = ud->ud_bufsize; - break; - - /* - * Set buffer length. - */ - case UIOCSBLEN: - error = usbpf_ioctl_sblen(ud, (u_int *)addr); - break; - - /* - * Set read filter. - */ - case UIOCSETF: - error = usbpf_setf(ud, (struct usbpf_program *)addr, cmd); - break; - - /* - * Set read timeout. - */ - case UIOCSRTIMEOUT: - { - struct timeval *tv = (struct timeval *)addr; - - /* - * Subtract 1 tick from tvtohz() since this isn't - * a one-shot timer. - */ - if ((error = itimerfix(tv)) == 0) - ud->ud_rtout = tvtohz(tv) - 1; - break; - } - - /* - * Get read timeout. - */ - case UIOCGRTIMEOUT: - { - struct timeval *tv = (struct timeval *)addr; - - tv->tv_sec = ud->ud_rtout / hz; - tv->tv_usec = (ud->ud_rtout % hz) * tick; - break; - } - - /* - * Get packet stats. - */ - case UIOCGSTATS: - { - struct usbpf_stat *us = (struct usbpf_stat *)addr; - - /* XXXCSJP overflow */ - us->us_recv = ud->ud_rcount; - us->us_drop = ud->ud_dcount; - break; - } - - case UIOCVERSION: - { - struct usbpf_version *uv = (struct usbpf_version *)addr; - - uv->uv_major = USBPF_MAJOR_VERSION; - uv->uv_minor = USBPF_MINOR_VERSION; - break; - } - - /* - * Set interface. - */ - case UIOCSETIF: - error = usbpf_setif(ud, (struct usbpf_ifreq *)addr); - break; - - } - return (error); -} - -/* - * Support for select() and poll() system calls - * - * Return true iff the specific operation will not block indefinitely. - * Otherwise, return false but make a note that a selwakeup() must be done. - */ -static int -usbpf_poll(struct cdev *dev, int events, struct thread *td) -{ - - /* NOT IMPLEMENTED */ - return (ENOSYS); -} - -/* - * Support for kevent() system call. Register EVFILT_READ filters and - * reject all others. - */ -int -usbpf_kqfilter(struct cdev *dev, struct knote *kn) -{ - - /* NOT IMPLEMENTED */ - return (ENOSYS); -} - -/* - * Attach file to the usbpf interface, i.e. make d listen on bp. - */ -static void -usbpf_attachd(struct usbpf_d *ud, struct usbpf_if *uif) -{ - - USBPFIF_LOCK(uif); - ud->ud_bif = uif; - LIST_INSERT_HEAD(&uif->uif_dlist, ud, ud_next); - - usbpf_uifd_cnt++; - USBPFIF_UNLOCK(uif); -} - -/* - * Detach a file from its interface. - */ -static void -usbpf_detachd(struct usbpf_d *ud) -{ - struct usbpf_if *uif; - struct usb_bus *ubus; - - uif = ud->ud_bif; - USBPFIF_LOCK(uif); - USBPFD_LOCK(ud); - ubus = ud->ud_bif->uif_ubus; - - /* - * Remove d from the interface's descriptor list. - */ - LIST_REMOVE(ud, ud_next); - - usbpf_uifd_cnt--; - ud->ud_bif = NULL; - USBPFD_UNLOCK(ud); - USBPFIF_UNLOCK(uif); -} - -void -usbpf_attach(struct usb_bus *ubus, struct usbpf_if **driverp) -{ - struct usbpf_if *uif; - - uif = malloc(sizeof(*uif), M_USBPF, M_WAITOK | M_ZERO); - LIST_INIT(&uif->uif_dlist); - uif->uif_ubus = ubus; - mtx_init(&uif->uif_mtx, "usbpf interface lock", NULL, MTX_DEF); - KASSERT(*driverp == NULL, - ("usbpf_attach: driverp already initialized")); - *driverp = uif; - - mtx_lock(&usbpf_mtx); - LIST_INSERT_HEAD(&usbpf_iflist, uif, uif_next); - mtx_unlock(&usbpf_mtx); - if (bootverbose) device_printf(ubus->parent, "usbpf attached\n"); } -/* - * If there are processes sleeping on this descriptor, wake them up. - */ -static __inline void -usbpf_wakeup(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); - if (ud->ud_state == USBPF_WAITING) { - callout_stop(&ud->ud_callout); - ud->ud_state = USBPF_IDLE; - } - wakeup(ud); - if (ud->ud_async && ud->ud_sig && ud->ud_sigio) - pgsigio(&ud->ud_sigio, ud->ud_sig, 0); - - selwakeuppri(&ud->ud_sel, PRIUSB); - KNOTE_LOCKED(&ud->ud_sel.si_note, 0); -} - void usbpf_detach(struct usb_bus *ubus) { - struct usbpf_if *uif; - struct usbpf_d *ud; + struct ifnet *ifp = ubus->ifp; - /* Locate USBPF interface information */ - mtx_lock(&usbpf_mtx); - LIST_FOREACH(uif, &usbpf_iflist, uif_next) { - if (ubus == uif->uif_ubus) - break; + if (ifp != NULL) { + bpfdetach(ifp); + if_detach(ifp); + if_free(ifp); } - - /* Interface wasn't attached */ - if ((uif == NULL) || (uif->uif_ubus == NULL)) { - mtx_unlock(&usbpf_mtx); - printf("usbpf_detach: not attached\n"); /* XXX */ - return; - } - - LIST_REMOVE(uif, uif_next); - mtx_unlock(&usbpf_mtx); - - while ((ud = LIST_FIRST(&uif->uif_dlist)) != NULL) { - usbpf_detachd(ud); - USBPFD_LOCK(ud); - usbpf_wakeup(ud); - USBPFD_UNLOCK(ud); - } - - mtx_destroy(&uif->uif_mtx); - free(uif, M_USBPF); + ubus->ifp = NULL; } -/* Time stamping functions */ -#define USBPF_T_MICROTIME 0x0000 -#define USBPF_T_NANOTIME 0x0001 -#define USBPF_T_BINTIME 0x0002 -#define USBPF_T_NONE 0x0003 -#define USBPF_T_FORMAT_MASK 0x0003 -#define USBPF_T_NORMAL 0x0000 -#define USBPF_T_FAST 0x0100 -#define USBPF_T_MONOTONIC 0x0200 -#define USBPF_T_FORMAT(t) ((t) & USBPF_T_FORMAT_MASK) - -#define USBPF_TSTAMP_NONE 0 -#define USBPF_TSTAMP_FAST 1 -#define USBPF_TSTAMP_NORMAL 2 - -static int -usbpf_ts_quality(int tstype) -{ - - if (tstype == USBPF_T_NONE) - return (USBPF_TSTAMP_NONE); - if ((tstype & USBPF_T_FAST) != 0) - return (USBPF_TSTAMP_FAST); - - return (USBPF_TSTAMP_NORMAL); -} - -static int -usbpf_gettime(struct bintime *bt, int tstype) -{ - int quality; - - quality = usbpf_ts_quality(tstype); - if (quality == USBPF_TSTAMP_NONE) - return (quality); - if (quality == USBPF_TSTAMP_NORMAL) - binuptime(bt); - else - getbinuptime(bt); - - return (quality); -} - -/* - * If the buffer mechanism has a way to decide that a held buffer can be made - * free, then it is exposed via the usbpf_canfreebuf() interface. (1) is - * returned if the buffer can be discarded, (0) is returned if it cannot. - */ -static int -usbpf_canfreebuf(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); - - return (0); -} - -/* - * Allow the buffer model to indicate that the current store buffer is - * immutable, regardless of the appearance of space. Return (1) if the - * buffer is writable, and (0) if not. - */ -static int -usbpf_canwritebuf(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); - return (1); -} - -/* - * Notify buffer model that an attempt to write to the store buffer has - * resulted in a dropped packet, in which case the buffer may be considered - * full. - */ -static void -usbpf_buffull(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); -} - -/* - * This function gets called when the free buffer is re-assigned. - */ -static void -usbpf_buf_reclaimed(struct usbpf_d *ud) -{ - - USBPFD_LOCK_ASSERT(ud); - - switch (ud->ud_bufmode) { - case USBPF_BUFMODE_BUFFER: - return; - - default: - panic("usbpf_buf_reclaimed"); - } -} - -#define SIZEOF_USBPF_HDR(type) \ - (offsetof(type, uh_hdrlen) + sizeof(((type *)0)->uh_hdrlen)) - -static int -usbpf_hdrlen(struct usbpf_d *ud) -{ - int hdrlen; - - hdrlen = ud->ud_bif->uif_hdrlen; - hdrlen += SIZEOF_USBPF_HDR(struct usbpf_xhdr); - hdrlen = USBPF_WORDALIGN(hdrlen); - - return (hdrlen - ud->ud_bif->uif_hdrlen); -} - -static void -usbpf_bintime2ts(struct bintime *bt, struct usbpf_ts *ts, int tstype) -{ - struct bintime bt2; - struct timeval tsm; - struct timespec tsn; - - if ((tstype & USBPF_T_MONOTONIC) == 0) { - bt2 = *bt; - bintime_add(&bt2, &boottimebin); - bt = &bt2; - } - switch (USBPF_T_FORMAT(tstype)) { - case USBPF_T_MICROTIME: - bintime2timeval(bt, &tsm); - ts->ut_sec = tsm.tv_sec; - ts->ut_frac = tsm.tv_usec; - break; - case USBPF_T_NANOTIME: - bintime2timespec(bt, &tsn); - ts->ut_sec = tsn.tv_sec; - ts->ut_frac = tsn.tv_nsec; - break; - case USBPF_T_BINTIME: - ts->ut_sec = bt->sec; - ts->ut_frac = bt->frac; - break; - } -} - -/* - * Move the packet data from interface memory (pkt) into the - * store buffer. "cpfn" is the routine called to do the actual data - * transfer. bcopy is passed in to copy contiguous chunks, while - * usbpf_append_mbuf is passed in to copy mbuf chains. In the latter case, - * pkt is really an mbuf. - */ -static void -catchpacket(struct usbpf_d *ud, u_char *pkt, u_int pktlen, u_int snaplen, - void (*cpfn)(struct usbpf_d *, caddr_t, u_int, void *, u_int), - struct bintime *bt) -{ - struct usbpf_xhdr hdr; - int caplen, curlen, hdrlen, totlen; - int do_wakeup = 0; - int do_timestamp; - int tstype; - - USBPFD_LOCK_ASSERT(ud); - - /* - * Detect whether user space has released a buffer back to us, and if - * so, move it from being a hold buffer to a free buffer. This may - * not be the best place to do it (for example, we might only want to - * run this check if we need the space), but for now it's a reliable - * spot to do it. - */ - if (ud->ud_fbuf == NULL && usbpf_canfreebuf(ud)) { - ud->ud_fbuf = ud->ud_hbuf; - ud->ud_hbuf = NULL; - ud->ud_hlen = 0; - usbpf_buf_reclaimed(ud); - } - - /* - * Figure out how many bytes to move. If the packet is - * greater or equal to the snapshot length, transfer that - * much. Otherwise, transfer the whole packet (unless - * we hit the buffer size limit). - */ - hdrlen = usbpf_hdrlen(ud); - totlen = hdrlen + min(snaplen, pktlen); - if (totlen > ud->ud_bufsize) - totlen = ud->ud_bufsize; - - /* - * Round up the end of the previous packet to the next longword. - * - * Drop the packet if there's no room and no hope of room - * If the packet would overflow the storage buffer or the storage - * buffer is considered immutable by the buffer model, try to rotate - * the buffer and wakeup pending processes. - */ - curlen = USBPF_WORDALIGN(ud->ud_slen); - if (curlen + totlen > ud->ud_bufsize || !usbpf_canwritebuf(ud)) { - if (ud->ud_fbuf == NULL) { - /* - * There's no room in the store buffer, and no - * prospect of room, so drop the packet. Notify the - * buffer model. - */ - usbpf_buffull(ud); - ++ud->ud_dcount; - return; - } - USBPF_ROTATE_BUFFERS(ud); - do_wakeup = 1; - curlen = 0; - } else if (ud->ud_immediate || ud->ud_state == USBPF_TIMED_OUT) - /* - * Immediate mode is set, or the read timeout has already - * expired during a select call. A packet arrived, so the - * reader should be woken up. - */ - do_wakeup = 1; - caplen = totlen - hdrlen; - tstype = ud->ud_tstamp; - do_timestamp = tstype != USBPF_T_NONE; - - /* - * Append the usbpf header. Note we append the actual header size, but - * move forward the length of the header plus padding. - */ - bzero(&hdr, sizeof(hdr)); - if (do_timestamp) - usbpf_bintime2ts(bt, &hdr.uh_tstamp, tstype); - hdr.uh_datalen = pktlen; - hdr.uh_hdrlen = hdrlen; - hdr.uh_caplen = caplen; - usbpf_append_bytes(ud, ud->ud_sbuf, curlen, &hdr, sizeof(hdr)); - - /* - * Copy the packet data into the store buffer and update its length. - */ - (*cpfn)(ud, ud->ud_sbuf, curlen + hdrlen, pkt, caplen); - ud->ud_slen = curlen + totlen; - - if (do_wakeup) - usbpf_wakeup(ud); -} - -/* - * Incoming linkage from device drivers. Process the packet pkt, of length - * pktlen, which is stored in a contiguous buffer. The packet is parsed - * by each process' filter, and if accepted, stashed into the corresponding - * buffer. - */ -static void -usbpf_tap(struct usbpf_if *uif, u_char *pkt, u_int pktlen) -{ - struct bintime bt; - struct usbpf_d *ud; - u_int slen; - int gottime; - - gottime = USBPF_TSTAMP_NONE; - USBPFIF_LOCK(uif); - LIST_FOREACH(ud, &uif->uif_dlist, ud_next) { - USBPFD_LOCK(ud); - ++ud->ud_rcount; - slen = usbpf_filter(ud->ud_rfilter, pkt, pktlen, pktlen); - if (slen != 0) { - ud->ud_fcount++; - if (gottime < usbpf_ts_quality(ud->ud_tstamp)) - gottime = usbpf_gettime(&bt, ud->ud_tstamp); - catchpacket(ud, pkt, pktlen, slen, - usbpf_append_bytes, &bt); - } - USBPFD_UNLOCK(ud); - } - USBPFIF_UNLOCK(uif); -} - static uint32_t usbpf_aggregate_xferflags(struct usb_xfer_flags *flags) { uint32_t val = 0; if (flags->force_short_xfer == 1) val |= USBPF_FLAG_FORCE_SHORT_XFER; if (flags->short_xfer_ok == 1) val |= USBPF_FLAG_SHORT_XFER_OK; if (flags->short_frames_ok == 1) val |= USBPF_FLAG_SHORT_FRAMES_OK; if (flags->pipe_bof == 1) val |= USBPF_FLAG_PIPE_BOF; if (flags->proxy_buffer == 1) val |= USBPF_FLAG_PROXY_BUFFER; if (flags->ext_buffer == 1) val |= USBPF_FLAG_EXT_BUFFER; if (flags->manual_status == 1) val |= USBPF_FLAG_MANUAL_STATUS; if (flags->no_pipe_ok == 1) val |= USBPF_FLAG_NO_PIPE_OK; if (flags->stall_pipe == 1) val |= USBPF_FLAG_STALL_PIPE; return (val); } static uint32_t usbpf_aggregate_status(struct usb_xfer_flags_int *flags) { uint32_t val = 0; if (flags->open == 1) val |= USBPF_STATUS_OPEN; if (flags->transferring == 1) val |= USBPF_STATUS_TRANSFERRING; if (flags->did_dma_delay == 1) val |= USBPF_STATUS_DID_DMA_DELAY; if (flags->did_close == 1) val |= USBPF_STATUS_DID_CLOSE; if (flags->draining == 1) val |= USBPF_STATUS_DRAINING; if (flags->started == 1) val |= USBPF_STATUS_STARTED; if (flags->bandwidth_reclaimed == 1) val |= USBPF_STATUS_BW_RECLAIMED; if (flags->control_xfr == 1) val |= USBPF_STATUS_CONTROL_XFR; if (flags->control_hdr == 1) val |= USBPF_STATUS_CONTROL_HDR; if (flags->control_act == 1) val |= USBPF_STATUS_CONTROL_ACT; if (flags->control_stall == 1) val |= USBPF_STATUS_CONTROL_STALL; if (flags->short_frames_ok == 1) val |= USBPF_STATUS_SHORT_FRAMES_OK; if (flags->short_xfer_ok == 1) val |= USBPF_STATUS_SHORT_XFER_OK; #if USB_HAVE_BUSDMA if (flags->bdma_enable == 1) val |= USBPF_STATUS_BDMA_ENABLE; if (flags->bdma_no_post_sync == 1) val |= USBPF_STATUS_BDMA_NO_POST_SYNC; if (flags->bdma_setup == 1) val |= USBPF_STATUS_BDMA_SETUP; #endif if (flags->isochronous_xfr == 1) val |= USBPF_STATUS_ISOCHRONOUS_XFR; if (flags->curr_dma_set == 1) val |= USBPF_STATUS_CURR_DMA_SET; if (flags->can_cancel_immed == 1) val |= USBPF_STATUS_CAN_CANCEL_IMMED; if (flags->doing_callback == 1) val |= USBPF_STATUS_DOING_CALLBACK; return (val); } void usbpf_xfertap(struct usb_xfer *xfer, int type) { struct usb_endpoint *ep = xfer->endpoint; struct usb_page_search res; struct usb_xfer_root *info = xfer->xroot; struct usb_bus *bus = info->bus; struct usbpf_pkthdr *up; usb_frlength_t isoc_offset = 0; int i; char *buf, *ptr, *end; - /* - * NB: usbpf_uifd_cnt isn't protected by USBPFIF_LOCK() because it's - * not harmful. - */ - if (usbpf_uifd_cnt == 0) + if (!bpf_peers_present(bus->ifp->if_bpf)) return; /* * XXX TODO * Allocating the buffer here causes copy operations twice what's * really inefficient. Copying usbpf_pkthdr and data is for USB packet * read filter to pass a virtually linear buffer. */ buf = ptr = malloc(sizeof(struct usbpf_pkthdr) + (USB_PAGE_SIZE * 5), - M_USBPF, M_NOWAIT); + M_TEMP, M_NOWAIT); if (buf == NULL) { printf("usbpf_xfertap: out of memory\n"); /* XXX */ return; } end = buf + sizeof(struct usbpf_pkthdr) + (USB_PAGE_SIZE * 5); bzero(ptr, sizeof(struct usbpf_pkthdr)); up = (struct usbpf_pkthdr *)ptr; up->up_busunit = htole32(device_get_unit(bus->bdev)); up->up_type = type; up->up_xfertype = ep->edesc->bmAttributes & UE_XFERTYPE; up->up_address = xfer->address; up->up_endpoint = xfer->endpointno; up->up_flags = htole32(usbpf_aggregate_xferflags(&xfer->flags)); up->up_status = htole32(usbpf_aggregate_status(&xfer->flags_int)); switch (type) { case USBPF_XFERTAP_SUBMIT: up->up_length = htole32(xfer->sumlen); up->up_frames = htole32(xfer->nframes); break; case USBPF_XFERTAP_DONE: up->up_length = htole32(xfer->actlen); up->up_frames = htole32(xfer->aframes); break; default: panic("wrong usbpf type (%d)", type); } up->up_error = htole32(xfer->error); up->up_interval = htole32(xfer->interval); ptr += sizeof(struct usbpf_pkthdr); for (i = 0; i < up->up_frames; i++) { if (ptr + sizeof(u_int32_t) >= end) goto done; *((u_int32_t *)ptr) = htole32(xfer->frlengths[i]); ptr += sizeof(u_int32_t); if (ptr + xfer->frlengths[i] >= end) goto done; if (xfer->flags_int.isochronous_xfr == 1) { usbd_get_page(&xfer->frbuffers[0], isoc_offset, &res); isoc_offset += xfer->frlengths[i]; } else usbd_get_page(&xfer->frbuffers[i], 0, &res); bcopy(res.buffer, ptr, xfer->frlengths[i]); ptr += xfer->frlengths[i]; } - usbpf_tap(bus->uif, buf, ptr - buf); + bpf_tap(bus->ifp->if_bpf, buf, ptr - buf); done: - free(buf, M_USBPF); + free(buf, M_TEMP); } - -static void -usbpf_append_bytes(struct usbpf_d *ud, caddr_t buf, u_int offset, void *src, - u_int len) -{ - - USBPFD_LOCK_ASSERT(ud); - - switch (ud->ud_bufmode) { - case USBPF_BUFMODE_BUFFER: - return (usbpf_buffer_append_bytes(ud, buf, offset, src, len)); - default: - panic("usbpf_buf_append_bytes"); - } -} - -static void -usbpf_drvinit(void *unused) -{ - struct cdev *dev; - - mtx_init(&usbpf_mtx, "USB packet filter global lock", NULL, - MTX_DEF); - LIST_INIT(&usbpf_iflist); - - dev = make_dev(&usbpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "usbpf"); -} - -SYSINIT(usbpf_dev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, usbpf_drvinit, NULL); Index: projects/binutils-2.17/sys/dev/usb/usb_pf.h =================================================================== --- projects/binutils-2.17/sys/dev/usb/usb_pf.h (revision 215829) +++ projects/binutils-2.17/sys/dev/usb/usb_pf.h (revision 215830) @@ -1,319 +1,98 @@ /*- * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _DEV_USB_PF_H #define _DEV_USB_PF_H -#ifdef _KERNEL -#include -#include -#include -#include -#endif - -typedef int32_t usbpf_int32; -typedef u_int32_t usbpf_u_int32; -typedef int64_t usbpf_int64; -typedef u_int64_t usbpf_u_int64; - -struct usbpf_if; - -/* - * Alignment macros. USBPF_WORDALIGN rounds up to the next - * even multiple of USBPF_ALIGNMENT. - */ -#define USBPF_ALIGNMENT sizeof(long) -#define USBPF_WORDALIGN(x) (((x)+(USBPF_ALIGNMENT-1))&~(USBPF_ALIGNMENT-1)) - -/* - * The instruction encodings. - */ - -/* instruction classes */ -#define USBPF_CLASS(code) ((code) & 0x07) -#define USBPF_LD 0x00 -#define USBPF_LDX 0x01 -#define USBPF_ST 0x02 -#define USBPF_STX 0x03 -#define USBPF_ALU 0x04 -#define USBPF_JMP 0x05 -#define USBPF_RET 0x06 -#define USBPF_MISC 0x07 - -/* ld/ldx fields */ -#define USBPF_SIZE(code) ((code) & 0x18) -#define USBPF_W 0x00 -#define USBPF_H 0x08 -#define USBPF_B 0x10 -#define USBPF_MODE(code) ((code) & 0xe0) -#define USBPF_IMM 0x00 -#define USBPF_ABS 0x20 -#define USBPF_IND 0x40 -#define USBPF_MEM 0x60 -#define USBPF_LEN 0x80 -#define USBPF_MSH 0xa0 - -/* alu/jmp fields */ -#define USBPF_OP(code) ((code) & 0xf0) -#define USBPF_ADD 0x00 -#define USBPF_SUB 0x10 -#define USBPF_MUL 0x20 -#define USBPF_DIV 0x30 -#define USBPF_OR 0x40 -#define USBPF_AND 0x50 -#define USBPF_LSH 0x60 -#define USBPF_RSH 0x70 -#define USBPF_NEG 0x80 -#define USBPF_JA 0x00 -#define USBPF_JEQ 0x10 -#define USBPF_JGT 0x20 -#define USBPF_JGE 0x30 -#define USBPF_JSET 0x40 -#define USBPF_SRC(code) ((code) & 0x08) -#define USBPF_K 0x00 -#define USBPF_X 0x08 - -/* ret - USBPF_K and USBPF_X also apply */ -#define USBPF_RVAL(code) ((code) & 0x18) -#define USBPF_A 0x10 - -/* misc */ -#define USBPF_MISCOP(code) ((code) & 0xf8) -#define USBPF_TAX 0x00 -#define USBPF_TXA 0x80 - -/* - * The instruction data structure. - */ -struct usbpf_insn { - u_short code; - u_char jt; - u_char jf; - usbpf_u_int32 k; -}; - -#ifdef _KERNEL - -/* - * Descriptor associated with each open uff file. - */ - -struct usbpf_d { - LIST_ENTRY(usbpf_d) ud_next; /* Linked list of descriptors */ - /* - * Buffer slots: two memory buffers store the incoming packets. - * The model has three slots. Sbuf is always occupied. - * sbuf (store) - Receive interrupt puts packets here. - * hbuf (hold) - When sbuf is full, put buffer here and - * wakeup read (replace sbuf with fbuf). - * fbuf (free) - When read is done, put buffer here. - * On receiving, if sbuf is full and fbuf is 0, packet is dropped. - */ - caddr_t ud_sbuf; /* store slot */ - caddr_t ud_hbuf; /* hold slot */ - caddr_t ud_fbuf; /* free slot */ - int ud_slen; /* current length of store buffer */ - int ud_hlen; /* current length of hold buffer */ - - int ud_bufsize; /* absolute length of buffers */ - - struct usbpf_if *ud_bif; /* interface descriptor */ - u_long ud_rtout; /* Read timeout in 'ticks' */ - struct usbpf_insn *ud_rfilter; /* read filter code */ - struct usbpf_insn *ud_wfilter; /* write filter code */ - void *ud_bfilter; /* binary filter code */ - u_int64_t ud_rcount; /* number of packets received */ - u_int64_t ud_dcount; /* number of packets dropped */ - - u_char ud_promisc; /* true if listening promiscuously */ - u_char ud_state; /* idle, waiting, or timed out */ - u_char ud_immediate; /* true to return on packet arrival */ - int ud_hdrcmplt; /* false to fill in src lladdr automatically */ - int ud_direction; /* select packet direction */ - int ud_tstamp; /* select time stamping function */ - int ud_feedback; /* true to feed back sent packets */ - int ud_async; /* non-zero if packet reception should generate signal */ - int ud_sig; /* signal to send upon packet reception */ - struct sigio * ud_sigio; /* information for async I/O */ - struct selinfo ud_sel; /* bsd select info */ - struct mtx ud_mtx; /* mutex for this descriptor */ - struct callout ud_callout; /* for USBPF timeouts with select */ - struct label *ud_label; /* MAC label for descriptor */ - u_int64_t ud_fcount; /* number of packets which matched filter */ - pid_t ud_pid; /* PID which created descriptor */ - int ud_locked; /* true if descriptor is locked */ - u_int ud_bufmode; /* Current buffer mode. */ - u_int64_t ud_wcount; /* number of packets written */ - u_int64_t ud_wfcount; /* number of packets that matched write filter */ - u_int64_t ud_wdcount; /* number of packets dropped during a write */ - u_int64_t ud_zcopy; /* number of zero copy operations */ - u_char ud_compat32; /* 32-bit stream on LP64 system */ -}; - -#define USBPFD_LOCK(ud) mtx_lock(&(ud)->ud_mtx) -#define USBPFD_UNLOCK(ud) mtx_unlock(&(ud)->ud_mtx) -#define USBPFD_LOCK_ASSERT(ud) mtx_assert(&(ud)->ud_mtx, MA_OWNED) - -/* - * Descriptor associated with each attached hardware interface. - */ -struct usbpf_if { - LIST_ENTRY(usbpf_if) uif_next; /* list of all interfaces */ - LIST_HEAD(, usbpf_d) uif_dlist; /* descriptor list */ - u_int uif_hdrlen; /* length of link header */ - struct usb_bus *uif_ubus; /* corresponding interface */ - struct mtx uif_mtx; /* mutex for interface */ -}; - -#define USBPFIF_LOCK(uif) mtx_lock(&(uif)->uif_mtx) -#define USBPFIF_UNLOCK(uif) mtx_unlock(&(uif)->uif_mtx) - -#endif - -/* - * Structure prepended to each packet. - */ -struct usbpf_ts { - usbpf_int64 ut_sec; /* seconds */ - usbpf_u_int64 ut_frac; /* fraction */ -}; -struct usbpf_xhdr { - struct usbpf_ts uh_tstamp; /* time stamp */ - usbpf_u_int32 uh_caplen; /* length of captured portion */ - usbpf_u_int32 uh_datalen; /* original length of packet */ - u_short uh_hdrlen; /* length of uff header (this struct - plus alignment padding) */ -}; - -#define USBPF_BUFMODE_BUFFER 1 /* Kernel buffers with read(). */ -#define USBPF_BUFMODE_ZBUF 2 /* Zero-copy buffers. */ - struct usbpf_pkthdr { int up_busunit; /* Host controller unit number */ u_char up_address; /* USB device address */ u_char up_endpoint; /* USB endpoint */ u_char up_type; /* points SUBMIT / DONE */ u_char up_xfertype; /* Transfer type */ u_int32_t up_flags; /* Transfer flags */ #define USBPF_FLAG_FORCE_SHORT_XFER (1 << 0) #define USBPF_FLAG_SHORT_XFER_OK (1 << 1) #define USBPF_FLAG_SHORT_FRAMES_OK (1 << 2) #define USBPF_FLAG_PIPE_BOF (1 << 3) #define USBPF_FLAG_PROXY_BUFFER (1 << 4) #define USBPF_FLAG_EXT_BUFFER (1 << 5) #define USBPF_FLAG_MANUAL_STATUS (1 << 6) #define USBPF_FLAG_NO_PIPE_OK (1 << 7) #define USBPF_FLAG_STALL_PIPE (1 << 8) u_int32_t up_status; /* Transfer status */ #define USBPF_STATUS_OPEN (1 << 0) #define USBPF_STATUS_TRANSFERRING (1 << 1) #define USBPF_STATUS_DID_DMA_DELAY (1 << 2) #define USBPF_STATUS_DID_CLOSE (1 << 3) #define USBPF_STATUS_DRAINING (1 << 4) #define USBPF_STATUS_STARTED (1 << 5) #define USBPF_STATUS_BW_RECLAIMED (1 << 6) #define USBPF_STATUS_CONTROL_XFR (1 << 7) #define USBPF_STATUS_CONTROL_HDR (1 << 8) #define USBPF_STATUS_CONTROL_ACT (1 << 9) #define USBPF_STATUS_CONTROL_STALL (1 << 10) #define USBPF_STATUS_SHORT_FRAMES_OK (1 << 11) #define USBPF_STATUS_SHORT_XFER_OK (1 << 12) #if USB_HAVE_BUSDMA #define USBPF_STATUS_BDMA_ENABLE (1 << 13) #define USBPF_STATUS_BDMA_NO_POST_SYNC (1 << 14) #define USBPF_STATUS_BDMA_SETUP (1 << 15) #endif #define USBPF_STATUS_ISOCHRONOUS_XFR (1 << 16) #define USBPF_STATUS_CURR_DMA_SET (1 << 17) #define USBPF_STATUS_CAN_CANCEL_IMMED (1 << 18) #define USBPF_STATUS_DOING_CALLBACK (1 << 19) u_int32_t up_length; /* Total data length (submit/actual) */ u_int32_t up_frames; /* USB frame number (submit/actual) */ u_int32_t up_error; /* usb_error_t */ u_int32_t up_interval; /* for interrupt and isoc */ /* sizeof(struct usbpf_pkthdr) == 128 bytes */ u_char up_reserved[96]; }; -struct usbpf_version { - u_short uv_major; - u_short uv_minor; -}; -#define USBPF_MAJOR_VERSION 1 -#define USBPF_MINOR_VERSION 1 +#define USBPF_HDR_LEN 128 -#define USBPF_IFNAMSIZ 32 -struct usbpf_ifreq { - /* bus name, e.g. "usbus0" */ - char ufr_name[USBPF_IFNAMSIZ]; -}; - -/* - * Structure for UIOCSETF. - */ -struct usbpf_program { - u_int uf_len; - struct usbpf_insn *uf_insns; -}; - -/* - * Struct returned by UIOCGSTATS. - */ -struct usbpf_stat { - u_int us_recv; /* number of packets received */ - u_int us_drop; /* number of packets dropped */ -}; - -#define UIOCGBLEN _IOR('U', 102, u_int) -#define UIOCSBLEN _IOWR('U', 102, u_int) -#define UIOCSETF _IOW('U', 103, struct usbpf_program) -#define UIOCSETIF _IOW('U', 108, struct usbpf_ifreq) -#define UIOCSRTIMEOUT _IOW('U', 109, struct timeval) -#define UIOCGRTIMEOUT _IOR('U', 110, struct timeval) -#define UIOCGSTATS _IOR('U', 111, struct usbpf_stat) -#define UIOCVERSION _IOR('U', 113, struct usbpf_version) -#define UIOCSETWF _IOW('U', 123, struct usbpf_program) - #define USBPF_XFERTAP_SUBMIT 0 #define USBPF_XFERTAP_DONE 1 #ifdef _KERNEL -void usbpf_attach(struct usb_bus *, struct usbpf_if **); +void usbpf_attach(struct usb_bus *); void usbpf_detach(struct usb_bus *); void usbpf_xfertap(struct usb_xfer *, int); #endif #endif Index: projects/binutils-2.17/sys/dev/usb/usbdevs =================================================================== --- projects/binutils-2.17/sys/dev/usb/usbdevs (revision 215829) +++ projects/binutils-2.17/sys/dev/usb/usbdevs (revision 215830) @@ -1,3419 +1,3419 @@ $FreeBSD$ /* $NetBSD: usbdevs,v 1.392 2004/12/29 08:38:44 imp Exp $ */ /*- * Copyright (c) 1998-2004 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * List of known USB vendors * * USB.org publishes a VID list of USB-IF member companies at * http://www.usb.org/developers/tools * Note that it does not show companies that have obtained a Vendor ID * without becoming full members. * * Please note that these IDs do not do anything. Adding an ID here and * regenerating the usbdevs.h and usbdevs_data.h only makes a symbolic name * available to the source code and does not change any functionality, nor * does it make your device available to a specific driver. * It will however make the descriptive string available if a device does not * provide the string itself. * * After adding a vendor ID VNDR and a product ID PRDCT you will have the * following extra defines: * #define USB_VENDOR_VNDR 0x???? * #define USB_PRODUCT_VNDR_PRDCT 0x???? * * You may have to add these defines to the respective probe routines to * make the device recognised by the appropriate device driver. */ vendor UNKNOWN1 0x0053 Unknown vendor vendor UNKNOWN2 0x0105 Unknown vendor vendor EGALAX2 0x0123 eGalax, Inc. vendor CHIPSBANK 0x0204 Chipsbank Microelectronics Co. vendor HUMAX 0x02ad HUMAX vendor LTS 0x0386 LTS vendor BWCT 0x03da Bernd Walter Computer Technology vendor AOX 0x03e8 AOX vendor THESYS 0x03e9 Thesys vendor DATABROADCAST 0x03ea Data Broadcasting vendor ATMEL 0x03eb Atmel vendor IWATSU 0x03ec Iwatsu America vendor MITSUMI 0x03ee Mitsumi vendor HP 0x03f0 Hewlett Packard vendor GENOA 0x03f1 Genoa vendor OAK 0x03f2 Oak vendor ADAPTEC 0x03f3 Adaptec vendor DIEBOLD 0x03f4 Diebold vendor SIEMENSELECTRO 0x03f5 Siemens Electromechanical vendor EPSONIMAGING 0x03f8 Epson Imaging vendor KEYTRONIC 0x03f9 KeyTronic vendor OPTI 0x03fb OPTi vendor ELITEGROUP 0x03fc Elitegroup vendor XILINX 0x03fd Xilinx vendor FARALLON 0x03fe Farallon Communications vendor NATIONAL 0x0400 National Semiconductor vendor NATIONALREG 0x0401 National Registry vendor ACERLABS 0x0402 Acer Labs vendor FTDI 0x0403 Future Technology Devices vendor NCR 0x0404 NCR vendor SYNOPSYS2 0x0405 Synopsys vendor FUJITSUICL 0x0406 Fujitsu-ICL vendor FUJITSU2 0x0407 Fujitsu Personal Systems vendor QUANTA 0x0408 Quanta vendor NEC 0x0409 NEC vendor KODAK 0x040a Eastman Kodak vendor WELTREND 0x040b Weltrend vendor VIA 0x040d VIA vendor MCCI 0x040e MCCI vendor MELCO 0x0411 Melco vendor LEADTEK 0x0413 Leadtek vendor WINBOND 0x0416 Winbond vendor PHOENIX 0x041a Phoenix vendor CREATIVE 0x041e Creative Labs vendor NOKIA 0x0421 Nokia vendor ADI 0x0422 ADI Systems vendor CATC 0x0423 Computer Access Technology vendor SMC2 0x0424 Standard Microsystems vendor MOTOROLA_HK 0x0425 Motorola HK vendor GRAVIS 0x0428 Advanced Gravis Computer vendor CIRRUSLOGIC 0x0429 Cirrus Logic vendor INNOVATIVE 0x042c Innovative Semiconductors vendor MOLEX 0x042f Molex vendor SUN 0x0430 Sun Microsystems vendor UNISYS 0x0432 Unisys vendor TAUGA 0x0436 Taugagreining HF vendor AMD 0x0438 Advanced Micro Devices vendor LEXMARK 0x043d Lexmark International vendor LG 0x043e LG Electronics vendor NANAO 0x0440 NANAO vendor GATEWAY 0x0443 Gateway 2000 vendor NMB 0x0446 NMB vendor ALPS 0x044e Alps Electric vendor THRUST 0x044f Thrustmaster vendor TI 0x0451 Texas Instruments vendor ANALOGDEVICES 0x0456 Analog Devices vendor SIS 0x0457 Silicon Integrated Systems Corp. vendor KYE 0x0458 KYE Systems vendor DIAMOND2 0x045a Diamond (Supra) vendor RENESAS 0x045b Renesas vendor MICROSOFT 0x045e Microsoft vendor PRIMAX 0x0461 Primax Electronics vendor MGE 0x0463 MGE UPS Systems vendor AMP 0x0464 AMP vendor CHERRY 0x046a Cherry Mikroschalter vendor MEGATRENDS 0x046b American Megatrends vendor LOGITECH 0x046d Logitech vendor BTC 0x046e Behavior Tech. Computer vendor PHILIPS 0x0471 Philips vendor SUN2 0x0472 Sun Microsystems (offical) vendor SANYO 0x0474 Sanyo Electric vendor SEAGATE 0x0477 Seagate vendor CONNECTIX 0x0478 Connectix vendor SEMTECH 0x047a Semtech vendor KENSINGTON 0x047d Kensington vendor LUCENT 0x047e Lucent vendor PLANTRONICS 0x047f Plantronics vendor KYOCERA 0x0482 Kyocera Wireless Corp. vendor STMICRO 0x0483 STMicroelectronics vendor FOXCONN 0x0489 Foxconn vendor MEIZU 0x0492 Meizu Electronics vendor YAMAHA 0x0499 YAMAHA vendor COMPAQ 0x049f Compaq vendor HITACHI 0x04a4 Hitachi vendor ACERP 0x04a5 Acer Peripherals vendor DAVICOM 0x04a6 Davicom vendor VISIONEER 0x04a7 Visioneer vendor CANON 0x04a9 Canon vendor NIKON 0x04b0 Nikon vendor PAN 0x04b1 Pan International vendor IBM 0x04b3 IBM vendor CYPRESS 0x04b4 Cypress Semiconductor vendor ROHM 0x04b5 ROHM vendor COMPAL 0x04b7 Compal vendor EPSON 0x04b8 Seiko Epson vendor RAINBOW 0x04b9 Rainbow Technologies vendor IODATA 0x04bb I-O Data vendor TDK 0x04bf TDK vendor 3COMUSR 0x04c1 U.S. Robotics vendor METHODE 0x04c2 Methode Electronics Far East vendor MAXISWITCH 0x04c3 Maxi Switch vendor LOCKHEEDMER 0x04c4 Lockheed Martin Energy Research vendor FUJITSU 0x04c5 Fujitsu vendor TOSHIBAAM 0x04c6 Toshiba America vendor MICROMACRO 0x04c7 Micro Macro Technologies vendor KONICA 0x04c8 Konica vendor LITEON 0x04ca Lite-On Technology vendor FUJIPHOTO 0x04cb Fuji Photo Film vendor PHILIPSSEMI 0x04cc Philips Semiconductors vendor TATUNG 0x04cd Tatung Co. Of America vendor SCANLOGIC 0x04ce ScanLogic vendor MYSON 0x04cf Myson Technology vendor DIGI2 0x04d0 Digi vendor ITTCANON 0x04d1 ITT Canon vendor ALTEC 0x04d2 Altec Lansing vendor LSI 0x04d4 LSI vendor MENTORGRAPHICS 0x04d6 Mentor Graphics vendor ITUNERNET 0x04d8 I-Tuner Networks vendor HOLTEK 0x04d9 Holtek Semiconductor, Inc. vendor PANASONIC 0x04da Panasonic (Matsushita) vendor HUANHSIN 0x04dc Huan Hsin vendor SHARP 0x04dd Sharp vendor IIYAMA 0x04e1 Iiyama vendor SHUTTLE 0x04e6 Shuttle Technology vendor ELO 0x04e7 Elo TouchSystems vendor SAMSUNG 0x04e8 Samsung Electronics vendor NORTHSTAR 0x04eb Northstar vendor TOKYOELECTRON 0x04ec Tokyo Electron vendor ANNABOOKS 0x04ed Annabooks vendor JVC 0x04f1 JVC vendor CHICONY 0x04f2 Chicony Electronics vendor ELAN 0x04f3 Elan vendor NEWNEX 0x04f7 Newnex vendor BROTHER 0x04f9 Brother Industries vendor DALLAS 0x04fa Dallas Semiconductor vendor AIPTEK2 0x04fc AIPTEK International vendor PFU 0x04fe PFU vendor FUJIKURA 0x0501 Fujikura/DDK vendor ACER 0x0502 Acer vendor 3COM 0x0506 3Com vendor HOSIDEN 0x0507 Hosiden Corporation vendor AZTECH 0x0509 Aztech Systems vendor BELKIN 0x050d Belkin Components vendor KAWATSU 0x050f Kawatsu Semiconductor vendor FCI 0x0514 FCI vendor LONGWELL 0x0516 Longwell vendor COMPOSITE 0x0518 Composite vendor STAR 0x0519 Star Micronics vendor APC 0x051d American Power Conversion vendor SCIATLANTA 0x051e Scientific Atlanta vendor TSM 0x0520 TSM vendor CONNECTEK 0x0522 Advanced Connectek USA vendor NETCHIP 0x0525 NetChip Technology vendor ALTRA 0x0527 ALTRA vendor ATI 0x0528 ATI Technologies vendor AKS 0x0529 Aladdin Knowledge Systems vendor TEKOM 0x052b Tekom vendor CANONDEV 0x052c Canon vendor WACOMTECH 0x0531 Wacom vendor INVENTEC 0x0537 Inventec vendor SHYHSHIUN 0x0539 Shyh Shiun Terminals vendor PREHWERKE 0x053a Preh Werke Gmbh & Co. KG vendor SYNOPSYS 0x053f Synopsys vendor UNIACCESS 0x0540 Universal Access vendor VIEWSONIC 0x0543 ViewSonic vendor XIRLINK 0x0545 Xirlink vendor ANCHOR 0x0547 Anchor Chips vendor SONY 0x054c Sony vendor FUJIXEROX 0x0550 Fuji Xerox vendor VISION 0x0553 VLSI Vision vendor ASAHIKASEI 0x0556 Asahi Kasei Microsystems vendor ATEN 0x0557 ATEN International vendor SAMSUNG2 0x055d Samsung Electronics vendor MUSTEK 0x055f Mustek Systems vendor TELEX 0x0562 Telex Communications vendor CHINON 0x0564 Chinon vendor PERACOM 0x0565 Peracom Networks vendor ALCOR2 0x0566 Alcor Micro vendor XYRATEX 0x0567 Xyratex vendor WACOM 0x056a WACOM vendor ETEK 0x056c e-TEK Labs vendor EIZO 0x056d EIZO vendor ELECOM 0x056e Elecom vendor CONEXANT 0x0572 Conexant vendor HAUPPAUGE 0x0573 Hauppauge Computer Works vendor BAFO 0x0576 BAFO/Quality Computer Accessories vendor YEDATA 0x057b Y-E Data vendor AVM 0x057c AVM vendor QUICKSHOT 0x057f Quickshot vendor ROLAND 0x0582 Roland vendor ROCKFIRE 0x0583 Rockfire vendor RATOC 0x0584 RATOC Systems vendor ZYXEL 0x0586 ZyXEL Communication vendor INFINEON 0x058b Infineon vendor MICREL 0x058d Micrel vendor ALCOR 0x058f Alcor Micro vendor OMRON 0x0590 OMRON vendor ZORAN 0x0595 Zoran Microelectronics vendor NIIGATA 0x0598 Niigata vendor IOMEGA 0x059b Iomega vendor ATREND 0x059c A-Trend Technology vendor AID 0x059d Advanced Input Devices vendor LACIE 0x059f LaCie vendor FUJIFILM 0x05a2 Fuji Film vendor ARC 0x05a3 ARC vendor ORTEK 0x05a4 Ortek vendor CISCOLINKSYS3 0x05a6 Cisco-Linksys vendor BOSE 0x05a7 Bose vendor OMNIVISION 0x05a9 OmniVision vendor INSYSTEM 0x05ab In-System Design vendor APPLE 0x05ac Apple Computer vendor YCCABLE 0x05ad Y.C. Cable vendor DIGITALPERSONA 0x05ba DigitalPersona vendor 3G 0x05bc 3G Green Green Globe vendor RAFI 0x05bd RAFI vendor TYCO 0x05be Tyco vendor KAWASAKI 0x05c1 Kawasaki vendor DIGI 0x05c5 Digi International vendor QUALCOMM2 0x05c6 Qualcomm vendor QTRONIX 0x05c7 Qtronix vendor FOXLINK 0x05c8 Foxlink vendor RICOH 0x05ca Ricoh vendor ELSA 0x05cc ELSA vendor SCIWORX 0x05ce sci-worx vendor BRAINBOXES 0x05d1 Brainboxes Limited vendor ULTIMA 0x05d8 Ultima vendor AXIOHM 0x05d9 Axiohm Transaction Solutions vendor MICROTEK 0x05da Microtek vendor SUNTAC 0x05db SUN Corporation vendor LEXAR 0x05dc Lexar Media vendor ADDTRON 0x05dd Addtron vendor SYMBOL 0x05e0 Symbol Technologies vendor SYNTEK 0x05e1 Syntek vendor GENESYS 0x05e3 Genesys Logic vendor FUJI 0x05e5 Fuji Electric vendor KEITHLEY 0x05e6 Keithley Instruments vendor EIZONANAO 0x05e7 EIZO Nanao vendor KLSI 0x05e9 Kawasaki LSI vendor FFC 0x05eb FFC vendor ANKO 0x05ef Anko Electronic vendor PIENGINEERING 0x05f3 P.I. Engineering vendor AOC 0x05f6 AOC International vendor CHIC 0x05fe Chic Technology vendor BARCO 0x0600 Barco Display Systems vendor BRIDGE 0x0607 Bridge Information vendor SOLIDYEAR 0x060b Solid Year vendor BIORAD 0x0614 Bio-Rad Laboratories vendor MACALLY 0x0618 Macally vendor ACTLABS 0x061c Act Labs vendor ALARIS 0x0620 Alaris vendor APEX 0x0624 Apex vendor CREATIVE3 0x062a Creative Labs vendor VIVITAR 0x0636 Vivitar vendor GUNZE 0x0637 Gunze Electronics USA vendor AVISION 0x0638 Avision vendor TEAC 0x0644 TEAC vendor SGI 0x065e Silicon Graphics vendor SANWASUPPLY 0x0663 Sanwa Supply vendor MEGATEC 0x0665 Megatec vendor LINKSYS 0x066b Linksys vendor ACERSA 0x066e Acer Semiconductor America vendor SIGMATEL 0x066f Sigmatel vendor DRAYTEK 0x0675 DrayTek vendor AIWA 0x0677 Aiwa vendor ACARD 0x0678 ACARD Technology vendor PROLIFIC 0x067b Prolific Technology vendor SIEMENS 0x067c Siemens vendor AVANCELOGIC 0x0680 Avance Logic vendor SIEMENS2 0x0681 Siemens vendor MINOLTA 0x0686 Minolta vendor CHPRODUCTS 0x068e CH Products vendor HAGIWARA 0x0693 Hagiwara Sys-Com vendor CTX 0x0698 Chuntex vendor ASKEY 0x069a Askey Computer vendor SAITEK 0x06a3 Saitek vendor ALCATELT 0x06b9 Alcatel Telecom vendor AGFA 0x06bd AGFA-Gevaert vendor ASIAMD 0x06be Asia Microelectronic Development vendor BIZLINK 0x06c4 Bizlink International vendor KEYSPAN 0x06cd Keyspan / InnoSys Inc. vendor AASHIMA 0x06d6 Aashima Technology vendor LIEBERT 0x06da Liebert vendor MULTITECH 0x06e0 MultiTech vendor ADS 0x06e1 ADS Technologies vendor ALCATELM 0x06e4 Alcatel Microelectronics vendor SIRIUS 0x06ea Sirius Technologies vendor GUILLEMOT 0x06f8 Guillemot vendor BOSTON 0x06fd Boston Acoustics vendor SMC 0x0707 Standard Microsystems vendor PUTERCOM 0x0708 Putercom vendor MCT 0x0711 MCT vendor IMATION 0x0718 Imation vendor TECLAST 0x071b Teclast vendor SONYERICSSON 0x0731 Sony Ericsson vendor EICON 0x0734 Eicon Networks vendor SYNTECH 0x0745 Syntech Information vendor DIGITALSTREAM 0x074e Digital Stream vendor AUREAL 0x0755 Aureal Semiconductor vendor MIDIMAN 0x0763 Midiman vendor CYBERPOWER 0x0764 Cyber Power Systems, Inc. vendor SURECOM 0x0769 Surecom Technology vendor HIDGLOBAL 0x076b HID Global vendor LINKSYS2 0x077b Linksys vendor GRIFFIN 0x077d Griffin Technology vendor SANDISK 0x0781 SanDisk vendor JENOPTIK 0x0784 Jenoptik vendor LOGITEC 0x0789 Logitec vendor NOKIA2 0x078b Nokia vendor BRIMAX 0x078e Brimax vendor AXIS 0x0792 Axis Communications vendor ABL 0x0794 ABL Electronics vendor SAGEM 0x079b Sagem vendor SUNCOMM 0x079c Sun Communications, Inc. vendor ALFADATA 0x079d Alfadata Computer vendor NATIONALTECH 0x07a2 National Technical Systems vendor ONNTO 0x07a3 Onnto vendor BE 0x07a4 Be vendor ADMTEK 0x07a6 ADMtek vendor COREGA 0x07aa Corega vendor FREECOM 0x07ab Freecom vendor MICROTECH 0x07af Microtech vendor GENERALINSTMNTS 0x07b2 General Instruments (Motorola) vendor OLYMPUS 0x07b4 Olympus vendor ABOCOM 0x07b8 AboCom Systems vendor KEISOKUGIKEN 0x07c1 Keisokugiken vendor ONSPEC 0x07c4 OnSpec vendor APG 0x07c5 APG Cash Drawer vendor BUG 0x07c8 B.U.G. vendor ALLIEDTELESYN 0x07c9 Allied Telesyn International vendor AVERMEDIA 0x07ca AVerMedia Technologies vendor SIIG 0x07cc SIIG vendor CASIO 0x07cf CASIO vendor DLINK2 0x07d1 D-Link vendor APTIO 0x07d2 Aptio Products vendor ARASAN 0x07da Arasan Chip Systems vendor ALLIEDCABLE 0x07e6 Allied Cable vendor STSN 0x07ef STSN vendor CENTURY 0x07f7 Century Corp vendor NEWLINK 0x07ff NEWlink vendor ZOOM 0x0803 Zoom Telephonics vendor PCS 0x0810 Personal Communication Systems vendor ALPHASMART 0x081e AlphaSmart, Inc. vendor BROADLOGIC 0x0827 BroadLogic vendor HANDSPRING 0x082d Handspring vendor PALM 0x0830 Palm Computing vendor SOURCENEXT 0x0833 SOURCENEXT vendor ACTIONSTAR 0x0835 Action Star Enterprise vendor SAMSUNG_TECHWIN 0x0839 Samsung Techwin vendor ACCTON 0x083a Accton Technology vendor DIAMOND 0x0841 Diamond vendor NETGEAR 0x0846 BayNETGEAR vendor TOPRE 0x0853 Topre Corporation vendor ACTIVEWIRE 0x0854 ActiveWire vendor BBELECTRONICS 0x0856 B&B Electronics vendor PORTGEAR 0x085a PortGear vendor NETGEAR2 0x0864 Netgear vendor SYSTEMTALKS 0x086e System Talks vendor METRICOM 0x0870 Metricom vendor ADESSOKBTEK 0x087c ADESSO/Kbtek America vendor JATON 0x087d Jaton vendor APT 0x0880 APT Technologies vendor BOCARESEARCH 0x0885 Boca Research vendor ANDREA 0x08a8 Andrea Electronics vendor BURRBROWN 0x08bb Burr-Brown Japan vendor 2WIRE 0x08c8 2Wire vendor AIPTEK 0x08ca AIPTEK International vendor SMARTBRIDGES 0x08d1 SmartBridges vendor FUJITSUSIEMENS 0x08d4 Fujitsu-Siemens vendor BILLIONTON 0x08dd Billionton Systems vendor GEMALTO 0x08e6 Gemalto SA vendor EXTENDED 0x08e9 Extended Systems vendor MSYSTEMS 0x08ec M-Systems vendor DIGIANSWER 0x08fd Digianswer vendor AUTHENTEC 0x08ff AuthenTec vendor AUDIOTECHNICA 0x0909 Audio-Technica vendor TRUMPION 0x090a Trumpion Microelectronics vendor FEIYA 0x090c Feiya vendor ALATION 0x0910 Alation Systems vendor GLOBESPAN 0x0915 Globespan vendor CONCORDCAMERA 0x0919 Concord Camera vendor GARMIN 0x091e Garmin International vendor GOHUBS 0x0921 GoHubs vendor XEROX 0x0924 Xerox vendor BIOMETRIC 0x0929 American Biometric Company vendor TOSHIBA 0x0930 Toshiba vendor PLEXTOR 0x093b Plextor vendor INTREPIDCS 0x093c Intrepid vendor YANO 0x094f Yano vendor KINGSTON 0x0951 Kingston Technology vendor BLUEWATER 0x0956 BlueWater Systems vendor AGILENT 0x0957 Agilent Technologies vendor GUDE 0x0959 Gude ADS vendor PORTSMITH 0x095a Portsmith vendor ACERW 0x0967 Acer vendor ADIRONDACK 0x0976 Adirondack Wire & Cable vendor BECKHOFF 0x0978 Beckhoff vendor MINDSATWORK 0x097a Minds At Work vendor POINTCHIPS 0x09a6 PointChips vendor INTERSIL 0x09aa Intersil vendor ALTIUS 0x09b3 Altius Solutions vendor ARRIS 0x09c1 Arris Interactive vendor ACTIVCARD 0x09c3 ACTIVCARD vendor ACTISYS 0x09c4 ACTiSYS vendor NOVATEL2 0x09d7 Novatel Wireless vendor AFOURTECH 0x09da A-FOUR TECH vendor AIMEX 0x09dc AIMEX vendor ADDONICS 0x09df Addonics Technologies vendor AKAI 0x09e8 AKAI professional M.I. vendor ARESCOM 0x09f5 ARESCOM vendor BAY 0x09f9 Bay Associates vendor ALTERA 0x09fb Altera vendor CSR 0x0a12 Cambridge Silicon Radio vendor TREK 0x0a16 Trek Technology vendor ASAHIOPTICAL 0x0a17 Asahi Optical vendor BOCASYSTEMS 0x0a43 Boca Systems vendor SHANTOU 0x0a46 ShanTou vendor MEDIAGEAR 0x0a48 MediaGear vendor BROADCOM 0x0a5c Broadcom vendor GREENHOUSE 0x0a6b GREENHOUSE vendor GEOCAST 0x0a79 Geocast Network Systems vendor IDQUANTIQUE 0x0aba id Quantique vendor ZYDAS 0x0ace Zydas Technology Corporation vendor NEODIO 0x0aec Neodio vendor OPTION 0x0af0 Option N.V. vendor ASUS 0x0b05 ASUSTeK Computer vendor TODOS 0x0b0c Todos Data System vendor SIIG2 0x0b39 SIIG vendor TEKRAM 0x0b3b Tekram Technology vendor HAL 0x0b41 HAL Corporation vendor EMS 0x0b43 EMS Production vendor NEC2 0x0b62 NEC vendor ADLINK 0x0b63 ADLINK Technoligy, Inc. vendor ATI2 0x0b6f ATI vendor ZEEVO 0x0b7a Zeevo, Inc. vendor KURUSUGAWA 0x0b7e Kurusugawa Electronics, Inc. vendor SMART 0x0b8c Smart Technologies vendor ASIX 0x0b95 ASIX Electronics vendor O2MICRO 0x0b97 O2 Micro, Inc. vendor USR 0x0baf U.S. Robotics vendor AMBIT 0x0bb2 Ambit Microsystems vendor HTC 0x0bb4 HTC vendor REALTEK 0x0bda Realtek vendor MEI 0x0bed MEI vendor ADDONICS2 0x0bf6 Addonics Technology vendor FSC 0x0bf8 Fujitsu Siemens Computers vendor AGATE 0x0c08 Agate Technologies vendor DMI 0x0c0b DMI vendor CHICONY2 0x0c45 Chicony vendor REINERSCT 0x0c4b Reiner-SCT vendor SEALEVEL 0x0c52 Sealevel System vendor LUWEN 0x0c76 Luwen vendor KYOCERA2 0x0c88 Kyocera Wireless Corp. vendor ZCOM 0x0cde Z-Com vendor ATHEROS2 0x0cf3 Atheros Communications vendor TANGTOP 0x0d3d Tangtop vendor SMC3 0x0d5c Standard Microsystems vendor ADDON 0x0d7d Add-on Technology vendor ACDC 0x0d7e American Computer & Digital Components vendor CMEDIA 0x0d8c CMEDIA vendor CONCEPTRONIC 0x0d8e Conceptronic vendor SKANHEX 0x0d96 Skanhex Technology, Inc. vendor MSI 0x0db0 Micro Star International vendor ELCON 0x0db7 ELCON Systemtechnik vendor NETAC 0x0dd8 Netac vendor SITECOMEU 0x0df6 Sitecom Europe vendor MOBILEACTION 0x0df7 Mobile Action vendor AMIGO 0x0e0b Amigo Technology vendor SPEEDDRAGON 0x0e55 Speed Dragon Multimedia vendor HAWKING 0x0e66 Hawking vendor FOSSIL 0x0e67 Fossil, Inc vendor GMATE 0x0e7e G.Mate, Inc vendor OTI 0x0ea0 Ours Technology vendor YISO 0x0eab Yiso Wireless Co. vendor PILOTECH 0x0eaf Pilotech vendor NOVATECH 0x0eb0 NovaTech vendor ITEGNO 0x0eba iTegno vendor WINMAXGROUP 0x0ed1 WinMaxGroup vendor TOD 0x0ede TOD vendor EGALAX 0x0eef eGalax, Inc. vendor AIRPRIME 0x0f3d AirPrime, Inc. vendor MICROTUNE 0x0f4d Microtune vendor VTECH 0x0f88 VTech vendor FALCOM 0x0f94 Falcom Wireless Communications GmbH vendor RIM 0x0fca Research In Motion vendor DYNASTREAM 0x0fcf Dynastream Innovations vendor QUALCOMM 0x1004 Qualcomm vendor APACER 0x1005 Apacer vendor MOTOROLA4 0x100d Motorola vendor AIRPLUS 0x1011 Airplus vendor DESKNOTE 0x1019 Desknote vendor GIGABYTE 0x1044 GIGABYTE vendor WESTERN 0x1058 Western Digital vendor MOTOROLA 0x1063 Motorola vendor CCYU 0x1065 CCYU Technology vendor CURITEL 0x106c Curitel Communications Inc vendor SILABS2 0x10a6 SILABS2 vendor USI 0x10ab USI vendor PLX 0x10b5 PLX vendor ASANTE 0x10bd Asante vendor SILABS 0x10c4 Silicon Labs vendor SILABS3 0x10c5 Silicon Labs vendor SILABS4 0x10ce Silicon Labs vendor ACTIONS 0x10d6 Actions vendor ANALOG 0x1110 Analog Devices vendor TENX 0x1130 Ten X Technology, Inc. vendor ISSC 0x1131 Integrated System Solution Corp. vendor JRC 0x1145 Japan Radio Company vendor SPHAIRON 0x114b Sphairon Access Systems GmbH vendor DELORME 0x1163 DeLorme vendor SERVERWORKS 0x1166 ServerWorks vendor DLINK3 0x1186 Dlink vendor ACERCM 0x1189 Acer Communications & Multimedia vendor SIERRA 0x1199 Sierra Wireless vendor SANWA 0x11ad Sanwa Electric Instrument Co., Ltd. vendor TOPFIELD 0x11db Topfield Co., Ltd vendor SIEMENS3 0x11f5 Siemens vendor NETINDEX 0x11f6 NetIndex vendor ALCATEL 0x11f7 Alcatel vendor UNKNOWN3 0x1233 Unknown vendor vendor TSUNAMI 0x1241 Tsunami vendor PHEENET 0x124a Pheenet vendor TARGUS 0x1267 Targus vendor TWINMOS 0x126f TwinMOS vendor TENDA 0x1286 Tenda vendor CREATIVE2 0x1292 Creative Labs vendor BELKIN2 0x1293 Belkin Components vendor CYBERTAN 0x129b CyberTAN Technology vendor HUAWEI 0x12d1 Huawei Technologies vendor ARANEUS 0x12d8 Araneus Information Systems vendor TAPWAVE 0x12ef Tapwave vendor AINCOMM 0x12fd Aincomm vendor MOBILITY 0x1342 Mobility vendor DICKSMITH 0x1371 Dick Smith Electronics vendor NETGEAR3 0x1385 Netgear vendor BALTECH 0x13ad Baltech vendor CISCOLINKSYS 0x13b1 Cisco-Linksys vendor SHARK 0x13d2 Shark vendor AZUREWAVE 0x13d3 AsureWave vendor EMTEC 0x13fe Emtec vendor NOVATEL 0x1410 Novatel Wireless vendor MERLIN 0x1416 Merlin vendor WISTRONNEWEB 0x1435 Wistron NeWeb vendor RADIOSHACK 0x1453 Radio Shack vendor HUAWEI3COM 0x1472 Huawei-3Com vendor ABOCOM2 0x1482 AboCom Systems vendor SILICOM 0x1485 Silicom vendor RALINK 0x148f Ralink Technology vendor IMAGINATION 0x149a Imagination Technologies vendor CONCEPTRONIC2 0x14b2 Conceptronic vendor SUPERTOP 0x14cd Super Top vendor PLANEX3 0x14ea Planex Communications vendor SILICONPORTALS 0x1527 Silicon Portals vendor UBIQUAM 0x1529 UBIQUAM Co., Ltd. vendor JMICRON 0x152d JMicron vendor UBLOX 0x1546 U-blox vendor PNY 0x154b PNY vendor OWEN 0x1555 Owen vendor OQO 0x1557 OQO vendor UMEDIA 0x157e U-MEDIA Communications vendor FIBERLINE 0x1582 Fiberline vendor SPARKLAN 0x15a9 SparkLAN vendor AMIT2 0x15c5 AMIT vendor SOHOWARE 0x15e8 SOHOware vendor UMAX 0x1606 UMAX Data Systems vendor INSIDEOUT 0x1608 Inside Out Networks vendor AMOI 0x1614 Amoi Electronics vendor GOODWAY 0x1631 Good Way Technology vendor ENTREGA 0x1645 Entrega vendor ACTIONTEC 0x1668 Actiontec Electronics vendor CLIPSAL 0x166a Clipsal vendor CISCOLINKSYS2 0x167b Cisco-Linksys vendor ATHEROS 0x168c Atheros Communications vendor GIGASET 0x1690 Gigaset vendor GLOBALSUN 0x16ab Global Sun Technology vendor ANYDATA 0x16d5 AnyDATA Corporation vendor JABLOTRON 0x16d6 Jablotron vendor CMOTECH 0x16d8 C-motech vendor AXESSTEL 0x1726 Axesstel Co., Ltd. vendor LINKSYS4 0x1737 Linksys vendor SENAO 0x1740 Senao vendor ASUS2 0x1761 ASUS vendor SWEEX2 0x177f Sweex vendor METAGEEK 0x1781 MetaGeek vendor WAVESENSE 0x17f4 WaveSense vendor VAISALA 0x1843 Vaisala vendor AMIT 0x18c5 AMIT vendor GOOGLE 0x18d1 Google vendor QCOM 0x18e8 Qcom vendor ELV 0x18ef ELV vendor LINKSYS3 0x1915 Linksys vendor QUALCOMMINC 0x19d2 Qualcomm, Incorporated vendor WCH2 0x1a86 QinHeng Electronics vendor STELERA 0x1a8d Stelera Wireless vendor MATRIXORBITAL 0x1b3d Matrix Orbital vendor OVISLINK 0x1b75 OvisLink vendor TCTMOBILE 0x1bbb TCT Mobile vendor TELIT 0x1bc7 Telit vendor LONGCHEER 0x1c9e Longcheer Holdings, Ltd. vendor MPMAN 0x1cae MpMan vendor DRESDENELEKTRONIK 0x1cf1 dresden elektronik vendor NEOTEL 0x1d09 Neotel vendor PEGATRON 0x1d4d Pegatron vendor QISDA 0x1da5 Qisda vendor METAGEEK2 0x1dd5 MetaGeek vendor ALINK 0x1e0e Alink vendor AIRTIES 0x1eda AirTies vendor DLINK 0x2001 D-Link vendor PLANEX2 0x2019 Planex Communications vendor HAUPPAUGE2 0x2040 Hauppauge Computer Works vendor TLAYTECH 0x20b9 Tlay Tech vendor ENCORE 0x203d Encore vendor PARA 0x20b8 PARA Industrial vendor ERICSSON 0x2282 Ericsson vendor MOTOROLA2 0x22b8 Motorola vendor TRIPPLITE 0x2478 Tripp-Lite vendor HIROSE 0x2631 Hirose Electric vendor NHJ 0x2770 NHJ vendor PLANEX 0x2c02 Planex Communications vendor VIDZMEDIA 0x3275 VidzMedia Pte Ltd vendor AEI 0x3334 AEI vendor HANK 0x3353 Hank Connection vendor PQI 0x3538 PQI vendor DAISY 0x3579 Daisy Technology vendor NI 0x3923 National Instruments vendor MICRONET 0x3980 Micronet Communications vendor IODATA2 0x40bb I-O Data vendor IRIVER 0x4102 iRiver vendor DELL 0x413c Dell vendor WCH 0x4348 QinHeng Electronics vendor ACEECA 0x4766 Aceeca vendor AVERATEC 0x50c2 Averatec vendor SWEEX 0x5173 Sweex vendor PROLIFIC2 0x5372 Prolific Technologies vendor ONSPEC2 0x55aa OnSpec Electronic Inc. vendor ZINWELL 0x5a57 Zinwell vendor SITECOM 0x6189 Sitecom vendor ARKMICRO 0x6547 Arkmicro Technologies Inc. vendor 3COM2 0x6891 3Com vendor EDIMAX 0x7392 Edimax vendor INTEL 0x8086 Intel vendor INTEL2 0x8087 Intel vendor ALLWIN 0x8516 ALLWIN Tech vendor SITECOM2 0x9016 Sitecom vendor MOSCHIP 0x9710 MosChip Semiconductor vendor MARVELL 0x9e88 Marvell Technology Group Ltd. vendor 3COM3 0xa727 3Com vendor DATAAPEX 0xdaae DataApex vendor HP2 0xf003 Hewlett Packard vendor USRP 0xfffe GNU Radio USRP /* * List of known products. Grouped by vendor. */ /* 3Com products */ product 3COM HOMECONN 0x009d HomeConnect Camera product 3COM 3CREB96 0x00a0 Bluetooth USB Adapter product 3COM 3C19250 0x03e8 3C19250 Ethernet Adapter product 3COM 3CRSHEW696 0x0a01 3CRSHEW696 Wireless Adapter product 3COM 3C460 0x11f8 HomeConnect 3C460 product 3COM USR56K 0x3021 U.S.Robotics 56000 Voice FaxModem Pro product 3COM 3C460B 0x4601 HomeConnect 3C460B product 3COM2 3CRUSB10075 0xa727 3CRUSB10075 product 3COM3 AR5523_1 0x6893 AR5523 product 3COM3 AR5523_2 0x6895 AR5523 product 3COM3 AR5523_3 0x6897 AR5523 product 3COMUSR OFFICECONN 0x0082 3Com OfficeConnect Analog Modem product 3COMUSR USRISDN 0x008f 3Com U.S. Robotics Pro ISDN TA product 3COMUSR HOMECONN 0x009d 3Com HomeConnect Camera product 3COMUSR USR56K 0x3021 U.S. Robotics 56000 Voice FaxModem Pro /* AboCom products */ product ABOCOM XX1 0x110c XX1 product ABOCOM XX2 0x200c XX2 product ABOCOM RT2770 0x2770 RT2770 product ABOCOM RT2870 0x2870 RT2870 product ABOCOM RT3070 0x3070 RT3070 product ABOCOM RT3071 0x3071 RT3071 product ABOCOM RT3072 0x3072 RT3072 product ABOCOM2 RT2870_1 0x3c09 RT2870 product ABOCOM URE450 0x4000 URE450 Ethernet Adapter product ABOCOM UFE1000 0x4002 UFE1000 Fast Ethernet Adapter product ABOCOM DSB650TX_PNA 0x4003 1/10/100 Ethernet Adapter product ABOCOM XX4 0x4004 XX4 product ABOCOM XX5 0x4007 XX5 product ABOCOM XX6 0x400b XX6 product ABOCOM XX7 0x400c XX7 product ABOCOM RTL8151 0x401a RTL8151 product ABOCOM XX8 0x4102 XX8 product ABOCOM XX9 0x4104 XX9 product ABOCOM UF200 0x420a UF200 Ethernet product ABOCOM WL54 0x6001 WL54 product ABOCOM XX10 0xabc1 XX10 product ABOCOM BWU613 0xb000 BWU613 product ABOCOM HWU54DM 0xb21b HWU54DM product ABOCOM RT2573_2 0xb21c RT2573 product ABOCOM RT2573_3 0xb21d RT2573 product ABOCOM RT2573_4 0xb21e RT2573 product ABOCOM WUG2700 0xb21f WUG2700 /* Accton products */ product ACCTON USB320_EC 0x1046 USB320-EC Ethernet Adapter product ACCTON 2664W 0x3501 2664W product ACCTON 111 0x3503 T-Sinus 111 Wireless Adapter product ACCTON SMCWUSBG_NF 0x4505 SMCWUSB-G (no firmware) product ACCTON SMCWUSBG 0x4506 SMCWUSB-G product ACCTON SMCWUSBTG2_NF 0x4507 SMCWUSBT-G2 (no firmware) product ACCTON SMCWUSBTG2 0x4508 SMCWUSBT-G2 product ACCTON PRISM_GT 0x4521 PrismGT USB 2.0 WLAN product ACCTON SS1001 0x5046 SpeedStream Ethernet Adapter product ACCTON RT2870_2 0x6618 RT2870 product ACCTON RT3070 0x7511 RT3070 product ACCTON RT2770 0x7512 RT2770 product ACCTON RT2870_3 0x7522 RT2870 product ACCTON RT2870_5 0x8522 RT2870 product ACCTON RT3070_4 0xa512 RT3070 product ACCTON RT2870_4 0xa618 RT2870 product ACCTON RT3070_1 0xa701 RT3070 product ACCTON RT3070_2 0xa702 RT3070 product ACCTON RT2870_1 0xb522 RT2870 product ACCTON RT3070_3 0xc522 RT3070 product ACCTON RT3070_5 0xd522 RT3070 product ACCTON ZD1211B 0xe501 ZD1211B /* Aceeca products */ product ACEECA MEZ1000 0x0001 MEZ1000 RDA /* Acer Communications & Multimedia (oemd by Surecom) */ product ACERCM EP1427X2 0x0893 EP-1427X-2 Ethernet Adapter /* Acer Labs products */ product ACERLABS M5632 0x5632 USB 2.0 Data Link /* Acer Peripherals, Inc. products */ product ACERP ACERSCAN_C310U 0x12a6 Acerscan C310U product ACERP ACERSCAN_320U 0x2022 Acerscan 320U product ACERP ACERSCAN_640U 0x2040 Acerscan 640U product ACERP ACERSCAN_620U 0x2060 Acerscan 620U product ACERP ACERSCAN_4300U 0x20b0 Benq 3300U/4300U product ACERP ACERSCAN_640BT 0x20be Acerscan 640BT product ACERP ACERSCAN_1240U 0x20c0 Acerscan 1240U product ACERP S81 0x4027 BenQ S81 phone product ACERP H10 0x4068 AWL400 Wireless Adapter product ACERP ATAPI 0x6003 ATA/ATAPI Adapter product ACERP AWL300 0x9000 AWL300 Wireless Adapter product ACERP AWL400 0x9001 AWL400 Wireless Adapter /* Acer Warp products */ product ACERW WARPLINK 0x0204 Warplink /* Actions products */ product ACTIONS MP4 0x1101 Actions MP4 Player /* Actiontec, Inc. products */ product ACTIONTEC PRISM_25 0x0408 Prism2.5 Wireless Adapter product ACTIONTEC PRISM_25A 0x0421 Prism2.5 Wireless Adapter A product ACTIONTEC FREELAN 0x6106 ROPEX FreeLan 802.11b product ACTIONTEC UAT1 0x7605 UAT1 Wireless Ethernet Adapter /* ACTiSYS products */ product ACTISYS IR2000U 0x0011 ACT-IR2000U FIR /* ActiveWire, Inc. products */ product ACTIVEWIRE IOBOARD 0x0100 I/O Board product ACTIVEWIRE IOBOARD_FW1 0x0101 I/O Board, rev. 1 firmware /* Adaptec products */ product ADAPTEC AWN8020 0x0020 AWN-8020 WLAN /* Addtron products */ product ADDTRON AWU120 0xff31 AWU-120 /* ADLINK Texhnology products */ product ADLINK ND6530 0x6530 ND-6530 USB-Serial /* ADMtek products */ product ADMTEK PEGASUSII_4 0x07c2 AN986A Ethernet product ADMTEK PEGASUS 0x0986 AN986 Ethernet product ADMTEK PEGASUSII 0x8511 AN8511 Ethernet product ADMTEK PEGASUSII_2 0x8513 AN8513 Ethernet product ADMTEK PEGASUSII_3 0x8515 AN8515 Ethernet /* ADDON products */ /* PNY OEMs these */ product ADDON ATTACHE 0x1300 USB 2.0 Flash Drive product ADDON ATTACHE 0x1300 USB 2.0 Flash Drive product ADDON A256MB 0x1400 Attache 256MB USB 2.0 Flash Drive product ADDON DISKPRO512 0x1420 USB 2.0 Flash Drive (DANE-ELEC zMate 512MB USB flash drive) /* Addonics products */ product ADDONICS2 CABLE_205 0xa001 Cable 205 /* ADS products */ product ADS UBS10BT 0x0008 UBS-10BT Ethernet product ADS UBS10BTX 0x0009 UBS-10BT Ethernet /* AEI products */ product AEI FASTETHERNET 0x1701 Fast Ethernet /* Agate Technologies products */ product AGATE QDRIVE 0x0378 Q-Drive /* AGFA products */ product AGFA SNAPSCAN1212U 0x0001 SnapScan 1212U product AGFA SNAPSCAN1236U 0x0002 SnapScan 1236U product AGFA SNAPSCANTOUCH 0x0100 SnapScan Touch product AGFA SNAPSCAN1212U2 0x2061 SnapScan 1212U product AGFA SNAPSCANE40 0x208d SnapScan e40 product AGFA SNAPSCANE50 0x208f SnapScan e50 product AGFA SNAPSCANE20 0x2091 SnapScan e20 product AGFA SNAPSCANE25 0x2095 SnapScan e25 product AGFA SNAPSCANE26 0x2097 SnapScan e26 product AGFA SNAPSCANE52 0x20fd SnapScan e52 /* Ain Communication Technology products */ product AINCOMM AWU2000B 0x1001 AWU2000B Wireless Adapter /* AIPTEK products */ product AIPTEK POCKETCAM3M 0x2011 PocketCAM 3Mega product AIPTEK2 PENCAM_MEGA_1_3 0x504a PenCam Mega 1.3 product AIPTEK2 SUNPLUS_TECH 0x0c15 Sunplus Technology Inc. /* AirPlis products */ product AIRPLUS MCD650 0x3198 MCD650 modem /* AirPrime products */ product AIRPRIME PC5220 0x0112 CDMA Wireless PC Card /* AirTies products */ product AIRTIES RT3070 0x2310 RT3070 /* AKS products */ product AKS USBHASP 0x0001 USB-HASP 0.06 /* Alcatel products */ product ALCATEL OT535 0x02df One Touch 535/735 /* Alcor Micro, Inc. products */ product ALCOR2 KBD_HUB 0x2802 Kbd Hub product ALCOR SDCR_6335 0x6335 SD/MMC Card Reader product ALCOR SDCR_6362 0x6362 SD/MMC Card Reader product ALCOR TRANSCEND 0x6387 Transcend JetFlash Drive product ALCOR MA_KBD_HUB 0x9213 MacAlly Kbd Hub product ALCOR AU9814 0x9215 AU9814 Hub product ALCOR UMCR_9361 0x9361 USB Multimedia Card Reader product ALCOR SM_KBD 0x9410 MicroConnectors/StrongMan Keyboard product ALCOR NEC_KBD_HUB 0x9472 NEC Kbd Hub product ALCOR AU9720 0x9720 USB2 - RS-232 product ALCOR AU6390 0x6390 AU6390 USB-IDE converter /* Alink products */ product ALINK DWM652U5 0xce16 DWM-652 product ALINK 3G 0x9000 3G modem product ALINK 3GU 0x9200 3G modem /* Altec Lansing products */ product ALTEC ADA70 0x0070 ADA70 Speakers product ALTEC ASC495 0xff05 ASC495 Speakers /* Allied Telesyn International products */ product ALLIEDTELESYN ATUSB100 0xb100 AT-USB100 /* ALLWIN Tech products */ product ALLWIN RT2070 0x2070 RT2070 product ALLWIN RT2770 0x2770 RT2770 product ALLWIN RT2870 0x2870 RT2870 product ALLWIN RT3070 0x3070 RT3070 product ALLWIN RT3071 0x3071 RT3071 product ALLWIN RT3072 0x3072 RT3072 product ALLWIN RT3572 0x3572 RT3572 /* AlphaSmart, Inc. products */ product ALPHASMART DANA_KB 0xdbac AlphaSmart Dana Keyboard product ALPHASMART DANA_SYNC 0xdf00 AlphaSmart Dana HotSync /* Amoi products */ product AMOI H01 0x0800 H01 3G modem product AMOI H01A 0x7002 H01A 3G modem product AMOI H02 0x0802 H02 3G modem /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Ambit Microsystems products */ product AMBIT WLAN 0x0302 WLAN product AMBIT NTL_250 0x6098 NTL 250 cable modem /* Apacer products */ product APACER HT202 0xb113 USB 2.0 Flash Drive /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Amigo Technology products */ product AMIGO RT2870_1 0x9031 RT2870 product AMIGO RT2870_2 0x9041 RT2870 /* AMIT products */ product AMIT CGWLUSB2GO 0x0002 CG-WLUSB2GO product AMIT CGWLUSB2GNR 0x0008 CG-WLUSB2GNR product AMIT RT2870_1 0x0012 RT2870 /* AMIT(2) products */ product AMIT2 RT2870 0x0008 RT2870 /* Anchor products */ product ANCHOR SERIAL 0x2008 Serial product ANCHOR EZUSB 0x2131 EZUSB product ANCHOR EZLINK 0x2720 EZLINK /* AnyData products */ product ANYDATA ADU_620UW 0x6202 CDMA 2000 EV-DO USB Modem product ANYDATA ADU_E100X 0x6501 CDMA 2000 1xRTT/EV-DO USB Modem product ANYDATA ADU_500A 0x6502 CDMA 2000 EV-DO USB Modem /* AOX, Inc. products */ product AOX USB101 0x0008 Ethernet /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Apple Computer products */ product APPLE IMAC_KBD 0x0201 USB iMac Keyboard product APPLE KBD 0x0202 USB Keyboard M2452 product APPLE EXT_KBD 0x020c Apple Extended USB Keyboard product APPLE KBD_TP_ANSI 0x0223 Apple Internal Keyboard/Trackpad (Wellspring/ANSI) product APPLE KBD_TP_ISO 0x0224 Apple Internal Keyboard/Trackpad (Wellspring/ISO) product APPLE KBD_TP_JIS 0x0225 Apple Internal Keyboard/Trackpad (Wellspring/JIS) product APPLE KBD_TP_ANSI2 0x0230 Apple Internal Keyboard/Trackpad (Wellspring2/ANSI) product APPLE KBD_TP_ISO2 0x0231 Apple Internal Keyboard/Trackpad (Wellspring2/ISO) product APPLE KBD_TP_JIS2 0x0232 Apple Internal Keyboard/Trackpad (Wellspring2/JIS) product APPLE MOUSE 0x0301 Mouse M4848 product APPLE OPTMOUSE 0x0302 Optical mouse product APPLE MIGHTYMOUSE 0x0304 Mighty Mouse product APPLE KBD_HUB 0x1001 Hub in Apple USB Keyboard product APPLE EXT_KBD_HUB 0x1003 Hub in Apple Extended USB Keyboard product APPLE SPEAKERS 0x1101 Speakers product APPLE IPOD 0x1201 iPod product APPLE IPOD2G 0x1202 iPod 2G product APPLE IPOD3G 0x1203 iPod 3G product APPLE IPOD_04 0x1204 iPod '04' product APPLE IPODMINI 0x1205 iPod Mini product APPLE IPOD_06 0x1206 iPod '06' product APPLE IPOD_07 0x1207 iPod '07' product APPLE IPOD_08 0x1208 iPod '08' product APPLE IPODVIDEO 0x1209 iPod Video product APPLE IPODNANO 0x120a iPod Nano product APPLE IPHONE 0x1290 iPhone product APPLE IPOD_TOUCH 0x1291 iPod Touch product APPLE IPHONE_3G 0x1292 iPhone 3G product APPLE IPHONE_3GS 0x1294 iPhone 3GS product APPLE IPHONE_4 0x1297 iPhone 4 product APPLE IPAD 0x129a iPad product APPLE ETHERNET 0x1402 Ethernet A1277 /* Arkmicro Technologies */ product ARKMICRO ARK3116 0x0232 ARK3116 Serial /* Asahi Optical products */ product ASAHIOPTICAL OPTIO230 0x0004 Digital camera product ASAHIOPTICAL OPTIO330 0x0006 Digital camera /* Asante products */ product ASANTE EA 0x1427 Ethernet /* ASIX Electronics products */ product ASIX AX88172 0x1720 10/100 Ethernet product ASIX AX88178 0x1780 AX88178 product ASIX AX88772 0x7720 AX88772 product ASIX AX88772A 0x772a AX88772A USB 2.0 10/100 Ethernet /* ASUS products */ product ASUS2 USBN11 0x0b05 USB-N11 product ASUS WL167G 0x1707 WL-167g Wireless Adapter product ASUS WL159G 0x170c WL-159g product ASUS A9T_WIFI 0x171b A9T wireless product ASUS P5B_WIFI 0x171d P5B wireless product ASUS RT2573_1 0x1723 RT2573 product ASUS RT2573_2 0x1724 RT2573 product ASUS LCM 0x1726 LCM display product ASUS RT2870_1 0x1731 RT2870 product ASUS RT2870_2 0x1732 RT2870 product ASUS RT2870_3 0x1742 RT2870 product ASUS RT2870_4 0x1760 RT2870 product ASUS RT2870_5 0x1761 RT2870 product ASUS USBN13 0x1784 USB-N13 product ASUS RT3070_1 0x1790 RT3070 product ASUS A730W 0x4202 ASUS MyPal A730W product ASUS P535 0x420f ASUS P535 PDA product ASUS GMSC 0x422f ASUS Generic Mass Storage product ASUS RT2570 0x1706 RT2500USB Wireless Adapter /* ATen products */ product ATEN UC1284 0x2001 Parallel printer product ATEN UC10T 0x2002 10Mbps Ethernet product ATEN UC110T 0x2007 UC-110T Ethernet product ATEN UC232A 0x2008 Serial product ATEN UC210T 0x2009 UC-210T Ethernet product ATEN DSB650C 0x4000 DSB-650C /* Atheros Communications products */ product ATHEROS AR5523 0x0001 AR5523 product ATHEROS AR5523_NF 0x0002 AR5523 (no firmware) product ATHEROS2 AR5523_1 0x0001 AR5523 product ATHEROS2 AR5523_1_NF 0x0002 AR5523 (no firmware) product ATHEROS2 AR5523_2 0x0003 AR5523 product ATHEROS2 AR5523_2_NF 0x0004 AR5523 (no firmware) product ATHEROS2 AR5523_3 0x0005 AR5523 product ATHEROS2 AR5523_3_NF 0x0006 AR5523 (no firmware) /* Atmel Comp. products */ product ATMEL STK541 0x2109 Zigbee Controller product ATMEL UHB124 0x3301 UHB124 hub product ATMEL DWL120 0x7603 DWL-120 Wireless Adapter product ATMEL BW002 0x7605 BW002 Wireless Adapter product ATMEL WL1130USB 0x7613 WL-1130 USB product ATMEL AT76C505A 0x7614 AT76c505a Wireless Adapter /* AuthenTec products */ product AUTHENTEC AES1610 0x1600 AES1610 Fingerprint Sensor /* Avision products */ product AVISION 1200U 0x0268 1200U scanner /* Axesstel products */ product AXESSTEL DATAMODEM 0x1000 Data Modem /* AsureWave products */ product AZUREWAVE RT2870_1 0x3247 RT2870 product AZUREWAVE RT2870_2 0x3262 RT2870 product AZUREWAVE RT3070_1 0x3273 RT3070 product AZUREWAVE RT3070_2 0x3284 RT3070 product AZUREWAVE RT3070_3 0x3305 RT3070 /* Baltech products */ product BALTECH CARDREADER 0x9999 Card reader /* B&B Electronics products */ product BBELECTRONICS USOTL4 0xAC01 RS-422/485 /* Belkin products */ /*product BELKIN F5U111 0x???? F5U111 Ethernet*/ product BELKIN F5D6050 0x0050 F5D6050 802.11b Wireless Adapter product BELKIN FBT001V 0x0081 FBT001v2 Bluetooth product BELKIN FBT003V 0x0084 FBT003v2 Bluetooth product BELKIN F5U103 0x0103 F5U103 Serial product BELKIN F5U109 0x0109 F5U109 Serial product BELKIN USB2SCSI 0x0115 USB to SCSI product BELKIN F8T012 0x0121 F8T012xx1 Bluetooth USB Adapter product BELKIN USB2LAN 0x0121 USB to LAN product BELKIN F5U208 0x0208 F5U208 VideoBus II product BELKIN F5U237 0x0237 F5U237 USB 2.0 7-Port Hub product BELKIN F5U257 0x0257 F5U257 Serial product BELKIN F5U409 0x0409 F5U409 Serial product BELKIN F6C550AVR 0x0551 F6C550-AVR UPS product BELKIN F5U120 0x1203 F5U120-PC Hub product BELKIN ZD1211B 0x4050 ZD1211B product BELKIN F5D5055 0x5055 F5D5055 product BELKIN F5D7050 0x7050 F5D7050 Wireless Adapter product BELKIN F5D7051 0x7051 F5D7051 54g USB Network Adapter product BELKIN F5D7050A 0x705a F5D7050A Wireless Adapter /* Also sold as 'Ativa 802.11g wireless card' */ product BELKIN F5D7050_V4000 0x705c F5D7050 v4000 Wireless Adapter product BELKIN F5D7050E 0x705e F5D7050E Wireless Adapter product BELKIN RT2870_1 0x8053 RT2870 product BELKIN RT2870_2 0x805c RT2870 product BELKIN F5D8053V3 0x815c F5D8053 v3 product BELKIN F5D8055 0x825a F5D8055 product BELKIN F5D9050V3 0x905b F5D9050 ver 3 Wireless Adapter product BELKIN2 F5U002 0x0002 F5U002 Parallel printer product BELKIN F6D4050V1 0x935a F6D4050 v1 /* Billionton products */ product BILLIONTON USB100 0x0986 USB100N 10/100 FastEthernet product BILLIONTON USBLP100 0x0987 USB100LP product BILLIONTON USBEL100 0x0988 USB100EL product BILLIONTON USBE100 0x8511 USBE100 product BILLIONTON USB2AR 0x90ff USB2AR Ethernet /* Broadcom products */ product BROADCOM BCM2033 0x2033 BCM2033 Bluetooth USB dongle /* Brother Industries products */ product BROTHER HL1050 0x0002 HL-1050 laser printer product BROTHER MFC8600_9650 0x0100 MFC8600/9650 multifunction device /* Behavior Technology Computer products */ product BTC BTC6100 0x5550 6100C Keyboard product BTC BTC7932 0x6782 Keyboard with mouse port /* Canon, Inc. products */ product CANON N656U 0x2206 CanoScan N656U product CANON N1220U 0x2207 CanoScan N1220U product CANON D660U 0x2208 CanoScan D660U product CANON N676U 0x220d CanoScan N676U product CANON N1240U 0x220e CanoScan N1240U product CANON LIDE25 0x2220 CanoScan LIDE 25 product CANON S10 0x3041 PowerShot S10 product CANON S100 0x3045 PowerShot S100 product CANON S200 0x3065 PowerShot S200 product CANON REBELXT 0x30ef Digital Rebel XT /* CATC products */ product CATC NETMATE 0x000a Netmate Ethernet product CATC NETMATE2 0x000c Netmate2 Ethernet product CATC CHIEF 0x000d USB Chief Bus & Protocol Analyzer product CATC ANDROMEDA 0x1237 Andromeda hub /* CASIO products */ product CASIO QV_DIGICAM 0x1001 QV DigiCam product CASIO EXS880 0x1105 Exilim EX-S880 product CASIO BE300 0x2002 BE-300 PDA product CASIO NAMELAND 0x4001 CASIO Nameland EZ-USB /* CCYU products */ product CCYU ED1064 0x2136 EasyDisk ED1064 /* Century products */ product CENTURY EX35QUAT 0x011e Century USB Disk Enclosure product CENTURY EX35SW4_SB4 0x011f Century USB Disk Enclosure /* Cherry products */ product CHERRY MY3000KBD 0x0001 My3000 keyboard product CHERRY MY3000HUB 0x0003 My3000 hub product CHERRY CYBOARD 0x0004 CyBoard Keyboard /* Chic Technology products */ product CHIC MOUSE1 0x0001 mouse product CHIC CYPRESS 0x0003 Cypress USB Mouse /* Chicony products */ product CHICONY KB8933 0x0001 KB-8933 keyboard product CHICONY KU0325 0x0116 KU-0325 keyboard product CHICONY CNF7129 0xb071 Notebook Web Camera product CHICONY2 TWINKLECAM 0x600d TwinkleCam USB camera /* CH Products */ product CHPRODUCTS PROTHROTTLE 0x00f1 Pro Throttle product CHPRODUCTS PROPEDALS 0x00f2 Pro Pedals product CHPRODUCTS FIGHTERSTICK 0x00f3 Fighterstick product CHPRODUCTS FLIGHTYOKE 0x00ff Flight Sim Yoke /* Cisco-Linksys products */ product CISCOLINKSYS WUSB54AG 0x000c WUSB54AG Wireless Adapter product CISCOLINKSYS WUSB54G 0x000d WUSB54G Wireless Adapter product CISCOLINKSYS WUSB54GP 0x0011 WUSB54GP Wireless Adapter product CISCOLINKSYS USB200MV2 0x0018 USB200M v2 product CISCOLINKSYS HU200TS 0x001a HU200TS Wireless Adapter product CISCOLINKSYS WUSB54GC 0x0020 WUSB54GC product CISCOLINKSYS WUSB54GR 0x0023 WUSB54GR product CISCOLINKSYS WUSBF54G 0x0024 WUSBF54G product CISCOLINKSYS2 RT3070 0x4001 RT3070 product CISCOLINKSYS3 RT3070 0x0101 RT3070 /* Clipsal products */ product CLIPSAL 5500PCU 0x0303 5500PCU C-Bus /* CMOTECH products */ product CMOTECH CNU510 0x5141 CDMA Technologies USB modem product CMOTECH CNU550 0x5543 CDMA 2000 1xRTT/1xEVDO USB modem product CMOTECH CGU628 0x6006 CGU-628 product CMOTECH CDMA_MODEM1 0x6280 CDMA Technologies USB modem product CMOTECH DISK 0xf000 disk mode /* Compaq products */ product COMPAQ IPAQPOCKETPC 0x0003 iPAQ PocketPC product COMPAQ PJB100 0x504a Personal Jukebox PJB100 product COMPAQ IPAQLINUX 0x505a iPAQ Linux /* Composite Corp products looks the same as "TANGTOP" */ product COMPOSITE USBPS2 0x0001 USB to PS2 Adaptor /* Conceptronic products */ product CONCEPTRONIC PRISM_GT 0x3762 PrismGT USB 2.0 WLAN product CONCEPTRONIC C11U 0x7100 C11U product CONCEPTRONIC WL210 0x7110 WL-210 product CONCEPTRONIC AR5523_1 0x7801 AR5523 product CONCEPTRONIC AR5523_1_NF 0x7802 AR5523 (no firmware) product CONCEPTRONIC AR5523_2 0x7811 AR5523 product CONCEPTRONIC AR5523_2_NF 0x7812 AR5523 (no firmware) product CONCEPTRONIC2 C54RU 0x3c02 C54RU WLAN product CONCEPTRONIC2 C54RU2 0x3c22 C54RU product CONCEPTRONIC2 RT3070_1 0x3c08 RT3070 product CONCEPTRONIC2 RT3070_2 0x3c11 RT3070 product CONCEPTRONIC2 VIGORN61 0x3c25 VIGORN61 product CONCEPTRONIC2 RT2870_1 0x3c06 RT2870 product CONCEPTRONIC2 RT2870_2 0x3c07 RT2870 product CONCEPTRONIC2 RT2870_7 0x3c09 RT2870 product CONCEPTRONIC2 RT2870_8 0x3c12 RT2870 product CONCEPTRONIC2 RT2870_3 0x3c23 RT2870 product CONCEPTRONIC2 RT2870_4 0x3c25 RT2870 product CONCEPTRONIC2 RT2870_5 0x3c27 RT2870 product CONCEPTRONIC2 RT2870_6 0x3c28 RT2870 /* Connectix products */ product CONNECTIX QUICKCAM 0x0001 QuickCam /* Corega products */ product COREGA ETHER_USB_T 0x0001 Ether USB-T product COREGA FETHER_USB_TX 0x0004 FEther USB-TX product COREGA WLAN_USB_USB_11 0x000c WirelessLAN USB-11 product COREGA FETHER_USB_TXS 0x000d FEther USB-TXS product COREGA WLANUSB 0x0012 Wireless LAN Stick-11 product COREGA FETHER_USB2_TX 0x0017 FEther USB2-TX product COREGA WLUSB_11_KEY 0x001a ULUSB-11 Key product COREGA CGUSBRS232R 0x002a CG-USBRS232R product COREGA CGWLUSB2GL 0x002d CG-WLUSB2GL product COREGA CGWLUSB2GPX 0x002e CG-WLUSB2GPX product COREGA RT2870_1 0x002f RT2870 product COREGA RT2870_2 0x003c RT2870 product COREGA RT2870_3 0x003f RT2870 product COREGA RT3070 0x0041 RT3070 product COREGA CGWLUSB300GNM 0x0042 CG-WLUSB300GNM product COREGA WLUSB_11_STICK 0x7613 WLAN USB Stick 11 product COREGA FETHER_USB_TXC 0x9601 FEther USB-TXC /* Creative products */ product CREATIVE NOMAD_II 0x1002 Nomad II MP3 player product CREATIVE NOMAD_IIMG 0x4004 Nomad II MG product CREATIVE NOMAD 0x4106 Nomad product CREATIVE2 VOIP_BLASTER 0x0258 Voip Blaster product CREATIVE3 OPTICAL_MOUSE 0x0001 Notebook Optical Mouse /* Cambridge Silicon Radio Ltd. products */ product CSR BT_DONGLE 0x0001 Bluetooth USB dongle product CSR CSRDFU 0xffff USB Bluetooth Device in DFU State /* Chipsbank Microelectronics Co., Ltd */ product CHIPSBANK USBMEMSTICK 0x6025 CBM2080 Flash drive controller product CHIPSBANK USBMEMSTICK1 0x6026 CBM1180 Flash drive controller /* CTX products */ product CTX EX1300 0x9999 Ex1300 hub /* Curitel products */ product CURITEL HX550C 0x1101 CDMA 2000 1xRTT USB modem (HX-550C) product CURITEL HX57XB 0x2101 CDMA 2000 1xRTT USB modem (HX-570/575B/PR-600) product CURITEL PC5740 0x3701 Broadband Wireless modem product CURITEL UM175 0x3714 EVDO modem /* CyberPower products */ product CYBERPOWER 1500CAVRLCD 0x0501 1500CAVRLCD /* CyberTAN Technology products */ product CYBERTAN TG54USB 0x1666 TG54USB product CYBERTAN RT2870 0x1828 RT2870 /* Cypress Semiconductor products */ product CYPRESS MOUSE 0x0001 mouse product CYPRESS THERMO 0x0002 thermometer product CYPRESS WISPY1A 0x0bad MetaGeek Wi-Spy product CYPRESS KBDHUB 0x0101 Keyboard/Hub product CYPRESS FMRADIO 0x1002 FM Radio product CYPRESS IKARILASER 0x121f Ikari Laser SteelSeries ApS product CYPRESS USBRS232 0x5500 USB-RS232 Interface product CYPRESS SLIM_HUB 0x6560 Slim Hub product CYPRESS XX6830XX 0x6830 PATA Storage Device product CYPRESS SILVERSHIELD 0xfd13 Gembird Silver Shield PM /* Daisy Technology products */ product DAISY DMC 0x6901 USB MultiMedia Reader /* Dallas Semiconductor products */ product DALLAS J6502 0x4201 J-6502 speakers /* DataApex products */ product DATAAPEX MULTICOM 0xead6 MultiCom /* Dell products */ product DELL PORT 0x0058 Port Replicator product DELL AIO926 0x5115 Photo AIO Printer 926 product DELL BC02 0x8000 BC02 Bluetooth USB Adapter product DELL PRISM_GT_1 0x8102 PrismGT USB 2.0 WLAN product DELL TM350 0x8103 TrueMobile 350 Bluetooth USB Adapter product DELL PRISM_GT_2 0x8104 PrismGT USB 2.0 WLAN product DELL U5700 0x8114 Dell 5700 3G product DELL U5500 0x8115 Dell 5500 3G product DELL U5505 0x8116 Dell 5505 3G product DELL U5700_2 0x8117 Dell 5700 3G product DELL U5510 0x8118 Dell 5510 3G product DELL U5700_3 0x8128 Dell 5700 3G product DELL U5700_4 0x8129 Dell 5700 3G product DELL U5720 0x8133 Dell 5720 3G product DELL U5720_2 0x8134 Dell 5720 3G product DELL U740 0x8135 Dell U740 CDMA product DELL U5520 0x8136 Dell 5520 3G product DELL U5520_2 0x8137 Dell 5520 3G product DELL U5520_3 0x8138 Dell 5520 3G product DELL U5730 0x8180 Dell 5730 3G product DELL U5730_2 0x8181 Dell 5730 3G product DELL U5730_3 0x8182 Dell 5730 3G product DELL DW700 0x9500 Dell DW700 GPS /* Delorme Paublishing products */ product DELORME EARTHMATE 0x0100 Earthmate GPS /* Desknote products */ product DESKNOTE UCR_61S2B 0x0c55 UCR-61S2B /* Diamond products */ product DIAMOND RIO500USB 0x0001 Rio 500 USB /* Dick Smith Electronics (really C-Net) products */ product DICKSMITH RT2573 0x9022 RT2573 product DICKSMITH CWD854F 0x9032 C-Net CWD-854 rev F /* Digi International products */ product DIGI ACCELEPORT2 0x0002 AccelePort USB 2 product DIGI ACCELEPORT4 0x0004 AccelePort USB 4 product DIGI ACCELEPORT8 0x0008 AccelePort USB 8 /* Digianswer A/S products */ product DIGIANSWER ZIGBEE802154 0x000a ZigBee/802.15.4 MAC /* D-Link products */ /*product DLINK DSBS25 0x0100 DSB-S25 serial*/ product DLINK DUBE100 0x1a00 10/100 Ethernet product DLINK DSB650TX4 0x200c 10/100 Ethernet product DLINK DWL120E 0x3200 DWL-120 rev E product DLINK DWL122 0x3700 DWL-122 product DLINK DWLG120 0x3701 DWL-G120 product DLINK DWL120F 0x3702 DWL-120 rev F product DLINK DWLAG132 0x3a00 DWL-AG132 product DLINK DWLAG132_NF 0x3a01 DWL-AG132 (no firmware) product DLINK DWLG132 0x3a02 DWL-G132 product DLINK DWLG132_NF 0x3a03 DWL-G132 (no firmware) product DLINK DWLAG122 0x3a04 DWL-AG122 product DLINK DWLAG122_NF 0x3a05 DWL-AG122 (no firmware) product DLINK DWLG122 0x3c00 DWL-G122 b1 Wireless Adapter product DLINK DUBE100B1 0x3c05 DUB-E100 rev B1 product DLINK RT2870 0x3c09 RT2870 product DLINK RT3072 0x3c0a RT3072 product DLINK DSB650C 0x4000 10Mbps Ethernet product DLINK DSB650TX1 0x4001 10/100 Ethernet product DLINK DSB650TX 0x4002 10/100 Ethernet product DLINK DSB650TX_PNA 0x4003 1/10/100 Ethernet product DLINK DSB650TX3 0x400b 10/100 Ethernet product DLINK DSB650TX2 0x4102 10/100 Ethernet product DLINK DSB650 0xabc1 10/100 Ethernet product DLINK DUBH7 0xf103 DUB-H7 USB 2.0 7-Port Hub product DLINK2 DWA120 0x3a0c DWA-120 product DLINK2 DWA120_NF 0x3a0d DWA-120 (no firmware) product DLINK2 DWLG122C1 0x3c03 DWL-G122 c1 product DLINK2 WUA1340 0x3c04 WUA-1340 product DLINK2 DWA111 0x3c06 DWA-111 product DLINK2 RT2870_1 0x3c09 RT2870 product DLINK2 DWA110 0x3c07 DWA-110 product DLINK2 RT3072 0x3c0a RT3072 product DLINK2 RT3072_1 0x3c0b RT3072 product DLINK2 RT3070_1 0x3c0d RT3070 product DLINK2 RT3070_2 0x3c0e RT3070 product DLINK2 RT3070_3 0x3c0f RT3070 product DLINK2 RT2870_2 0x3c11 RT2870 product DLINK2 DWA130 0x3c13 DWA-130 product DLINK2 RT3070_4 0x3c15 RT3070 product DLINK2 RT3070_5 0x3c16 RT3070 product DLINK3 DWM652 0x3e04 DWM-652 /* DMI products */ product DMI CFSM_RW 0xa109 CF/SM Reader/Writer product DMI DISK 0x2bcf Generic Disk /* DrayTek products */ product DRAYTEK VIGOR550 0x0550 Vigor550 /* dresden elektronik products */ product DRESDENELEKTRONIK SENSORTERMINALBOARD 0x0001 SensorTerminalBoard product DRESDENELEKTRONIK WIRELESSHANDHELDTERMINAL 0x0004 Wireless Handheld Terminal /* Dynastream Innovations */ product DYNASTREAM ANTDEVBOARD 0x1003 ANT dev board product DYNASTREAM ANT2USB 0x1004 ANT2USB product DYNASTREAM ANTDEVBOARD2 0x1006 ANT dev board /* Edimax products */ product EDIMAX EW7318USG 0x7318 USB Wireless dongle product EDIMAX RT2870_1 0x7711 RT2870 product EDIMAX EW7717 0x7717 EW-7717 product EDIMAX EW7718 0x7718 EW-7718 /* eGalax Products */ product EGALAX TPANEL 0x0001 Touch Panel product EGALAX TPANEL2 0x0002 Touch Panel product EGALAX2 TPANEL 0x0001 Touch Panel /* Eicon Networks */ product EICON DIVA852 0x4905 Diva 852 ISDN TA /* EIZO products */ product EIZO HUB 0x0000 hub product EIZO MONITOR 0x0001 monitor /* ELCON Systemtechnik products */ product ELCON PLAN 0x0002 Goldpfeil P-LAN /* Elecom products */ product ELECOM MOUSE29UO 0x0002 mouse 29UO product ELECOM LDUSBTX0 0x200c LD-USB/TX product ELECOM LDUSBTX1 0x4002 LD-USB/TX product ELECOM LDUSBLTX 0x4005 LD-USBL/TX product ELECOM LDUSBTX2 0x400b LD-USB/TX product ELECOM LDUSB20 0x4010 LD-USB20 product ELECOM UCSGT 0x5003 UC-SGT product ELECOM UCSGT0 0x5004 UC-SGT product ELECOM LDUSBTX3 0xabc1 LD-USB/TX /* Elsa products */ product ELSA MODEM1 0x2265 ELSA Modem Board product ELSA USB2ETHERNET 0x3000 Microlink USB2Ethernet /* ELV products */ product ELV USBI2C 0xe00f USB-I2C interface /* EMS products */ product EMS DUAL_SHOOTER 0x0003 PSX gun controller converter /* Encore products */ product ENCORE RT3070_1 0x1480 RT3070 product ENCORE RT3070_2 0x14a1 RT3070 product ENCORE RT3070_3 0x14a9 RT3070 /* Entrega products */ product ENTREGA 1S 0x0001 1S serial product ENTREGA 2S 0x0002 2S serial product ENTREGA 1S25 0x0003 1S25 serial product ENTREGA 4S 0x0004 4S serial product ENTREGA E45 0x0005 E45 Ethernet product ENTREGA CENTRONICS 0x0006 Parallel Port product ENTREGA XX1 0x0008 Ethernet product ENTREGA 1S9 0x0093 1S9 serial product ENTREGA EZUSB 0x8000 EZ-USB /*product ENTREGA SERIAL 0x8001 DB25 Serial*/ product ENTREGA 2U4S 0x8004 2U4S serial/usb hub product ENTREGA XX2 0x8005 Ethernet /*product ENTREGA SERIAL_DB9 0x8093 DB9 Serial*/ /* Epson products */ product EPSON PRINTER1 0x0001 USB Printer product EPSON PRINTER2 0x0002 ISD USB Smart Cable for Mac product EPSON PRINTER3 0x0003 ISD USB Smart Cable product EPSON PRINTER5 0x0005 USB Printer product EPSON 636 0x0101 Perfection 636U / 636Photo scanner product EPSON 610 0x0103 Perfection 610 scanner product EPSON 1200 0x0104 Perfection 1200U / 1200Photo scanner product EPSON 1600 0x0107 Expression 1600 scanner product EPSON 1640 0x010a Perfection 1640SU scanner product EPSON 1240 0x010b Perfection 1240U / 1240Photo scanner product EPSON 640U 0x010c Perfection 640U scanner product EPSON 1250 0x010f Perfection 1250U / 1250Photo scanner product EPSON 1650 0x0110 Perfection 1650 scanner product EPSON GT9700F 0x0112 GT-9700F scanner product EPSON GT9300UF 0x011b GT-9300UF scanner product EPSON 3200 0x011c Perfection 3200 scanner product EPSON 1260 0x011d Perfection 1260 scanner product EPSON 1660 0x011e Perfection 1660 scanner product EPSON 1670 0x011f Perfection 1670 scanner product EPSON 1270 0x0120 Perfection 1270 scanner product EPSON 2480 0x0121 Perfection 2480 scanner product EPSON 3590 0x0122 Perfection 3590 scanner product EPSON 4990 0x012a Perfection 4990 Photo scanner product EPSON CRESSI_EDY 0x0521 Cressi Edy diving computer product EPSON STYLUS_875DC 0x0601 Stylus Photo 875DC Card Reader product EPSON STYLUS_895 0x0602 Stylus Photo 895 Card Reader product EPSON CX5400 0x0808 CX5400 scanner product EPSON 3500 0x080e CX-3500/3600/3650 MFP product EPSON RX425 0x080f Stylus Photo RX425 scanner product EPSON DX3800 0x0818 CX3700/CX3800/DX38x0 MFP scanner product EPSON 4800 0x0819 CX4700/CX4800/DX48x0 MFP scanner product EPSON 4200 0x0820 CX4100/CX4200/DX4200 MFP scanner product EPSON 5000 0x082b CX4900/CX5000/DX50x0 MFP scanner product EPSON 6000 0x082e CX5900/CX6000/DX60x0 MFP scanner product EPSON DX4000 0x082f DX4000 MFP scanner product EPSON DX7400 0x0838 CX7300/CX7400/DX7400 MFP scanner product EPSON DX8400 0x0839 CX8300/CX8400/DX8400 MFP scanner product EPSON SX100 0x0841 SX100/NX100 MFP scanner product EPSON NX300 0x0848 NX300 MFP scanner product EPSON SX200 0x0849 SX200/SX205 MFP scanner product EPSON SX400 0x084a SX400/NX400/TX400 MFP scanner /* e-TEK Labs products */ product ETEK 1COM 0x8007 Serial /* Extended Systems products */ product EXTENDED XTNDACCESS 0x0100 XTNDAccess IrDA /* FEIYA products */ product FEIYA 5IN1 0x1132 5-in-1 Card Reader /* Fiberline */ product FIBERLINE WL430U 0x6003 WL-430U /* Fossil, Inc products */ product FOSSIL WRISTPDA 0x0002 Wrist PDA /* Foxconn products */ product FOXCONN PIRELLI_DP_L10 0xe000 Pirelli DP-L10 /* Freecom products */ product FREECOM DVD 0xfc01 DVD drive product FREECOM HDD 0xfc05 Classic SL Hard Drive /* Fujitsu Siemens Computers products */ product FSC E5400 0x1009 PrismGT USB 2.0 WLAN /* Future Technology Devices products */ product FTDI SERIAL_8U100AX 0x8372 8U100AX Serial product FTDI SERIAL_8U232AM 0x6001 8U232AM Serial product FTDI SERIAL_8U232AM4 0x6004 8U232AM Serial product FTDI SERIAL_2232C 0x6010 FT2232C Dual port Serial product FTDI SERIAL_2232D 0x9e90 FT2232D Dual port Serial product FTDI SERIAL_4232H 0x6011 FT4232H Quad port Serial /* Gude Analog- und Digitalsysteme products also uses FTDI's id: */ product FTDI TACTRIX_OPENPORT_13M 0xcc48 OpenPort 1.3 Mitsubishi product FTDI TACTRIX_OPENPORT_13S 0xcc49 OpenPort 1.3 Subaru product FTDI TACTRIX_OPENPORT_13U 0xcc4a OpenPort 1.3 Universal product FTDI GAMMASCOUT 0xd678 Gamma-Scout product FTDI KBS 0xe6c8 Pyramid KBS USB LCD product FTDI EISCOU 0xe888 Expert ISDN Control USB product FTDI UOPTBR 0xe889 USB-RS232 OptoBridge product FTDI EMCU2D 0xe88a Expert mouseCLOCK USB II product FTDI PCMSFU 0xe88b Precision Clock MSF USB product FTDI EMCU2H 0xe88c Expert mouseCLOCK USB II HBG product FTDI MAXSTREAM 0xee18 Maxstream PKG-U product FTDI USB_UIRT 0xf850 USB-UIRT product FTDI USBSERIAL 0xfa00 Matrix Orbital USB Serial product FTDI MX2_3 0xfa01 Matrix Orbital MX2 or MX3 product FTDI MX4_5 0xfa02 Matrix Orbital MX4 or MX5 product FTDI LK202 0xfa03 Matrix Orbital VK/LK202 Family product FTDI LK204 0xfa04 Matrix Orbital VK/LK204 Family product FTDI CFA_632 0xfc08 Crystalfontz CFA-632 USB LCD product FTDI CFA_634 0xfc09 Crystalfontz CFA-634 USB LCD product FTDI CFA_633 0xfc0b Crystalfontz CFA-633 USB LCD product FTDI CFA_631 0xfc0c Crystalfontz CFA-631 USB LCD product FTDI CFA_635 0xfc0d Crystalfontz CFA-635 USB LCD product FTDI SEMC_DSS20 0xfc82 SEMC DSS-20 SyncStation /* Commerzielle und Technische Informationssysteme GmbH products */ product FTDI CTI_USB_NANO_485 0xf60b CTI USB-Nano 485 product FTDI CTI_USB_MINI_485 0xf608 CTI USB-Mini 485 /* Fuji photo products */ product FUJIPHOTO MASS0100 0x0100 Mass Storage /* Fujitsu protducts */ product FUJITSU AH_F401U 0x105b AH-F401U Air H device /* Fujitsu-Siemens protducts */ product FUJITSUSIEMENS SCR 0x0009 Fujitsu-Siemens SCR USB Reader /* Garmin products */ product GARMIN IQUE_3600 0x0004 iQue 3600 /* Gemalto products */ product GEMALTO PROXPU 0x5501 Prox-PU/CU /* General Instruments (Motorola) products */ product GENERALINSTMNTS SB5100 0x5100 SURFboard SB5100 Cable modem /* Genesys Logic products */ product GENESYS GL620USB 0x0501 GL620USB Host-Host interface product GENESYS GL650 0x0604 GL650 HUB product GENESYS GL606 0x0606 USB 2.0 HUB product GENESYS GL641USB 0x0700 GL641USB CompactFlash Card Reader product GENESYS GL641USB2IDE_2 0x0701 GL641USB USB-IDE Bridge No 2 product GENESYS GL641USB2IDE 0x0702 GL641USB USB-IDE Bridge product GENESYS GL641USB_2 0x0760 GL641USB 6-in-1 Card Reader /* GIGABYTE products */ product GIGABYTE GN54G 0x8001 GN-54G product GIGABYTE GNBR402W 0x8002 GN-BR402W product GIGABYTE GNWLBM101 0x8003 GN-WLBM101 product GIGABYTE GNWBKG 0x8007 GN-WBKG product GIGABYTE GNWB01GS 0x8008 GN-WB01GS product GIGABYTE GNWI05GS 0x800a GN-WI05GS /* Gigaset products */ product GIGASET WLAN 0x0701 WLAN product GIGASET SMCWUSBTG 0x0710 SMCWUSBT-G product GIGASET SMCWUSBTG_NF 0x0711 SMCWUSBT-G (no firmware) product GIGASET AR5523 0x0712 AR5523 product GIGASET AR5523_NF 0x0713 AR5523 (no firmware) product GIGASET RT2573 0x0722 RT2573 product GIGASET RT3070_1 0x0740 RT3070 product GIGASET RT3070_2 0x0744 RT3070 product GIGABYTE RT2870_1 0x800b RT2870 product GIGABYTE GNWB31N 0x800c GN-WB31N product GIGABYTE GNWB32L 0x800d GN-WB32L /* Global Sun Technology product */ product GLOBALSUN AR5523_1 0x7801 AR5523 product GLOBALSUN AR5523_1_NF 0x7802 AR5523 (no firmware) product GLOBALSUN AR5523_2 0x7811 AR5523 product GLOBALSUN AR5523_2_NF 0x7812 AR5523 (no firmware) /* Globespan products */ product GLOBESPAN PRISM_GT_1 0x2000 PrismGT USB 2.0 WLAN product GLOBESPAN PRISM_GT_2 0x2002 PrismGT USB 2.0 WLAN /* G.Mate, Inc products */ product GMATE YP3X00 0x1001 YP3X00 PDA /* GoHubs products */ product GOHUBS GOCOM232 0x1001 GoCOM232 Serial /* Good Way Technology products */ product GOODWAY GWUSB2E 0x6200 GWUSB2E product GOODWAY RT2573 0xc019 RT2573 /* Google products */ product GOOGLE NEXUSONE 0x4e11 Nexus One /* Gravis products */ product GRAVIS GAMEPADPRO 0x4001 GamePad Pro /* GREENHOUSE products */ product GREENHOUSE KANA21 0x0001 CF-writer with MP3 /* Griffin Technology */ product GRIFFIN IMATE 0x0405 iMate, ADB Adapter /* Guillemot Corporation */ product GUILLEMOT DALEADER 0xa300 DA Leader product GUILLEMOT HWGUSB254 0xe000 HWGUSB2-54 WLAN product GUILLEMOT HWGUSB254LB 0xe010 HWGUSB2-54-LB product GUILLEMOT HWGUSB254V2AP 0xe020 HWGUSB2-54V2-AP product GUILLEMOT HWNU300 0xe030 HWNU-300 /* Hagiwara products */ product HAGIWARA FGSM 0x0002 FlashGate SmartMedia Card Reader product HAGIWARA FGCF 0x0003 FlashGate CompactFlash Card Reader product HAGIWARA FG 0x0005 FlashGate /* HAL Corporation products */ product HAL IMR001 0x0011 Crossam2+USB IR commander /* Handspring, Inc. */ product HANDSPRING VISOR 0x0100 Handspring Visor product HANDSPRING TREO 0x0200 Handspring Treo product HANDSPRING TREO600 0x0300 Handspring Treo 600 /* Hauppauge Computer Works */ product HAUPPAUGE WINTV_USB_FM 0x4d12 WinTV USB FM product HAUPPAUGE2 NOVAT500 0x9580 NovaT 500Stick /* Hawking Technologies products */ product HAWKING RT2870_1 0x0001 RT2870 product HAWKING RT2870_2 0x0003 RT2870 product HAWKING HWUN2 0x0009 HWUN2 product HAWKING RT3070 0x000b RT3070 product HAWKING UF100 0x400c 10/100 USB Ethernet /* HID Global GmbH products */ product HIDGLOBAL CM2020 0x0596 Omnikey Cardman 2020 product HIDGLOBAL CM6020 0x1784 Omnikey Cardman 6020 /* Hitachi, Ltd. products */ product HITACHI DVDCAM_DZ_MV100A 0x0004 DVD-CAM DZ-MV100A Camcorder product HITACHI DVDCAM_USB 0x001e DVDCAM USB HS Interface /* HP products */ product HP 895C 0x0004 DeskJet 895C product HP 4100C 0x0101 Scanjet 4100C product HP S20 0x0102 Photosmart S20 product HP 880C 0x0104 DeskJet 880C product HP 4200C 0x0105 ScanJet 4200C product HP CDWRITERPLUS 0x0107 CD-Writer Plus product HP KBDHUB 0x010c Multimedia Keyboard Hub product HP G55XI 0x0111 OfficeJet G55xi product HP HN210W 0x011c HN210W 802.11b WLAN product HP 49GPLUS 0x0121 49g+ graphing calculator product HP 6200C 0x0201 ScanJet 6200C product HP S20b 0x0202 PhotoSmart S20 product HP 815C 0x0204 DeskJet 815C product HP 3300C 0x0205 ScanJet 3300C product HP CDW8200 0x0207 CD-Writer Plus 8200e product HP MMKEYB 0x020c Multimedia keyboard product HP 1220C 0x0212 DeskJet 1220C product HP 810C 0x0304 DeskJet 810C/812C product HP 4300C 0x0305 Scanjet 4300C product HP CDW4E 0x0307 CD-Writer+ CD-4e product HP G85XI 0x0311 OfficeJet G85xi product HP 1200 0x0317 LaserJet 1200 product HP 5200C 0x0401 Scanjet 5200C product HP 830C 0x0404 DeskJet 830C product HP 3400CSE 0x0405 ScanJet 3400cse product HP 6300C 0x0601 Scanjet 6300C product HP 840C 0x0604 DeskJet 840c product HP 2200C 0x0605 ScanJet 2200C product HP 5300C 0x0701 Scanjet 5300C product HP 4400C 0x0705 Scanjet 4400C product HP 4470C 0x0805 Scanjet 4470C product HP 82x0C 0x0b01 Scanjet 82x0C product HP 2300D 0x0b17 Laserjet 2300d product HP 970CSE 0x1004 Deskjet 970Cse product HP 5400C 0x1005 Scanjet 5400C product HP 2215 0x1016 iPAQ 22xx/Jornada 548 product HP 568J 0x1116 Jornada 568 product HP 930C 0x1204 DeskJet 930c product HP P2000U 0x1801 Inkjet P-2000U product HP HS2300 0x1e1d HS2300 HSDPA (aka MC8775) product HP 640C 0x2004 DeskJet 640c product HP 4670V 0x3005 ScanJet 4670v product HP P1100 0x3102 Photosmart P1100 product HP LD220 0x3524 LD220 POS Display product HP OJ4215 0x3d11 OfficeJet 4215 product HP HN210E 0x811c Ethernet HN210E product HP2 C500 0x6002 PhotoSmart C500 product HP EV2200 0x1b1d ev2200 HSDPA (aka MC5720) product HP HS2300 0x1e1d hs2300 HSDPA (aka MC8775) /* HTC products */ product HTC WINMOBILE 0x00ce HTC USB Sync product HTC PPC6700MODEM 0x00cf PPC6700 Modem product HTC SMARTPHONE 0x0a51 SmartPhone USB Sync product HTC WIZARD 0x0bce HTC Wizard USB Sync product HTC LEGENDSYNC 0x0c97 HTC Legend USB Sync product HTC LEGEND 0x0ff9 HTC Legend product HTC LEGENDINTERNET 0x0ffe HTC Legend Internet Sharing /* HUAWEI products */ product HUAWEI MOBILE 0x1001 Huawei Mobile -product HUAWEI E220 0x1003 Huawei HSDPA modem -product HUAWEI E220BIS 0x1004 Huawei HSDPA modem +product HUAWEI E220 0x1003 HSDPA modem +product HUAWEI E220BIS 0x1004 HSDPA modem product HUAWEI E1401 0x1401 3G modem product HUAWEI E1402 0x1402 3G modem product HUAWEI E1403 0x1403 3G modem product HUAWEI E1404 0x1404 3G modem product HUAWEI E1405 0x1405 3G modem product HUAWEI E1406 0x1406 3G modem product HUAWEI E1407 0x1407 3G modem product HUAWEI E1408 0x1408 3G modem product HUAWEI E1409 0x1409 3G modem product HUAWEI E140A 0x140a 3G modem product HUAWEI E140B 0x140b 3G modem -product HUAWEI E180V 0x140c Huawei Mobile E180V +product HUAWEI E180V 0x140c E180V product HUAWEI E140D 0x140d 3G modem product HUAWEI E140E 0x140e 3G modem product HUAWEI E140F 0x140f 3G modem product HUAWEI E1410 0x1410 3G modem product HUAWEI E1411 0x1411 3G modem product HUAWEI E1412 0x1412 3G modem product HUAWEI E1413 0x1413 3G modem product HUAWEI E1414 0x1414 3G modem product HUAWEI E1415 0x1415 3G modem product HUAWEI E1416 0x1416 3G modem product HUAWEI E1417 0x1417 3G modem product HUAWEI E1418 0x1418 3G modem product HUAWEI E1419 0x1419 3G modem product HUAWEI E141A 0x141a 3G modem product HUAWEI E141B 0x141b 3G modem product HUAWEI E141C 0x141c 3G modem product HUAWEI E141D 0x141d 3G modem product HUAWEI E141E 0x141e 3G modem product HUAWEI E141F 0x141f 3G modem product HUAWEI E1420 0x1420 3G modem product HUAWEI E1421 0x1421 3G modem product HUAWEI E1422 0x1422 3G modem product HUAWEI E1423 0x1423 3G modem product HUAWEI E1424 0x1424 3G modem product HUAWEI E1425 0x1425 3G modem product HUAWEI E1426 0x1426 3G modem product HUAWEI E1427 0x1427 3G modem product HUAWEI E1428 0x1428 3G modem product HUAWEI E1429 0x1429 3G modem product HUAWEI E142A 0x142a 3G modem product HUAWEI E142B 0x142b 3G modem product HUAWEI E142C 0x142c 3G modem product HUAWEI E142D 0x142d 3G modem product HUAWEI E142E 0x142e 3G modem product HUAWEI E142F 0x142f 3G modem product HUAWEI E1430 0x1430 3G modem product HUAWEI E1431 0x1431 3G modem product HUAWEI E1432 0x1432 3G modem product HUAWEI E1433 0x1433 3G modem product HUAWEI E1434 0x1434 3G modem product HUAWEI E1435 0x1435 3G modem product HUAWEI E1436 0x1436 3G modem product HUAWEI E1437 0x1437 3G modem product HUAWEI E1438 0x1438 3G modem product HUAWEI E1439 0x1439 3G modem product HUAWEI E143A 0x143a 3G modem product HUAWEI E143B 0x143b 3G modem product HUAWEI E143C 0x143c 3G modem product HUAWEI E143D 0x143d 3G modem product HUAWEI E143E 0x143e 3G modem product HUAWEI E143F 0x143f 3G modem product HUAWEI E1752 0x1446 3G modem product HUAWEI K3765 0x1465 3G modem -product HUAWEI E14AC 0x14ac 3G modem -product HUAWEI K3765_INIT 0x1520 HUAWEI Mobile K3765 Initial +product HUAWEI E1820 0x14ac E1820 HSPA+ USB Slider +product HUAWEI K3765_INIT 0x1520 K3765 Initial /* HUAWEI 3com products */ product HUAWEI3COM WUB320G 0x0009 Aolynk WUB320g /* IBM Corporation */ product IBM USBCDROMDRIVE 0x4427 USB CD-ROM Drive /* Imagination Technologies products */ product IMAGINATION DBX1 0x2107 DBX1 DSP core /* Inside Out Networks products */ product INSIDEOUT EDGEPORT4 0x0001 EdgePort/4 serial ports /* In-System products */ product INSYSTEM F5U002 0x0002 Parallel printer product INSYSTEM ATAPI 0x0031 ATAPI Adapter product INSYSTEM ISD110 0x0200 IDE Adapter ISD110 product INSYSTEM ISD105 0x0202 IDE Adapter ISD105 product INSYSTEM USBCABLE 0x081a USB cable product INSYSTEM STORAGE_V2 0x5701 USB Storage Adapter V2 /* Intel products */ product INTEL EASYPC_CAMERA 0x0110 Easy PC Camera product INTEL TESTBOARD 0x9890 82930 test board product INTEL2 IRMH 0x0020 Integrated Rate Matching Hub /* Intersil products */ product INTERSIL PRISM_GT 0x1000 PrismGT USB 2.0 WLAN product INTERSIL PRISM_2X 0x3642 Prism2.x or Atmel WLAN /* Interpid Control Systems products */ product INTREPIDCS VALUECAN 0x0601 ValueCAN CAN bus interface product INTREPIDCS NEOVI 0x0701 NeoVI Blue vehicle bus interface /* I/O DATA products */ product IODATA IU_CD2 0x0204 DVD Multi-plus unit iU-CD2 product IODATA DVR_UEH8 0x0206 DVD Multi-plus unit DVR-UEH8 product IODATA USBSSMRW 0x0314 USB-SSMRW SD-card product IODATA USBSDRW 0x031e USB-SDRW SD-card product IODATA USBETT 0x0901 USB ETT product IODATA USBETTX 0x0904 USB ETTX product IODATA USBETTXS 0x0913 USB ETTX product IODATA USBWNB11A 0x0919 USB WN-B11 product IODATA USBWNB11 0x0922 USB Airport WN-B11 product IODATA ETGUS2 0x0930 ETG-US2 product IODATA RT3072_1 0x0944 RT3072 product IODATA RT3072_2 0x0945 RT3072 product IODATA RT3072_3 0x0947 RT3072 product IODATA RT3072_4 0x0948 RT3072 product IODATA USBRSAQ 0x0a03 Serial USB-RSAQ1 product IODATA USBRSAQ5 0x0a0e Serial USB-RSAQ5 product IODATA2 USB2SC 0x0a09 USB2.0-SCSI Bridge USB2-SC /* Iomega products */ product IOMEGA ZIP100 0x0001 Zip 100 product IOMEGA ZIP250 0x0030 Zip 250 /* Integrated System Solution Corp. products */ product ISSC ISSCBTA 0x1001 Bluetooth USB Adapter /* iTegno products */ product ITEGNO WM1080A 0x1080 WM1080A GSM/GPRS modem product ITEGNO WM2080A 0x2080 WM2080A CDMA modem /* Ituner networks products */ product ITUNERNET USBLCD2X20 0x0002 USB-LCD 2x20 product ITUNERNET USBLCD4X20 0xc001 USB-LCD 4x20 /* Jablotron products */ product JABLOTRON PC60B 0x0001 PC-60B /* Jaton products */ product JATON EDA 0x5704 Ethernet /* JMicron products */ product JMICRON JM20336 0x2336 USB to SATA Bridge product JMICRON JM20337 0x2338 USB to ATA/ATAPI Bridge /* JVC products */ product JVC GR_DX95 0x000a GR-DX95 product JVC MP_PRX1 0x3008 MP-PRX1 Ethernet /* JRC products */ product JRC AH_J3001V_J3002V 0x0001 AirH PHONE AH-J3001V/J3002V /* Kawatsu products */ product KAWATSU MH4000P 0x0003 MiniHub 4000P /* Keisokugiken Corp. products */ product KEISOKUGIKEN USBDAQ 0x0068 HKS-0200 USBDAQ /* Kensington products */ product KENSINGTON ORBIT 0x1003 Orbit USB/PS2 trackball product KENSINGTON TURBOBALL 0x1005 TurboBall /* Keyspan products */ product KEYSPAN USA28_NF 0x0101 USA-28 serial Adapter (no firmware) product KEYSPAN USA28X_NF 0x0102 USA-28X serial Adapter (no firmware) product KEYSPAN USA19_NF 0x0103 USA-19 serial Adapter (no firmware) product KEYSPAN USA18_NF 0x0104 USA-18 serial Adapter (no firmware) product KEYSPAN USA18X_NF 0x0105 USA-18X serial Adapter (no firmware) product KEYSPAN USA19W_NF 0x0106 USA-19W serial Adapter (no firmware) product KEYSPAN USA19 0x0107 USA-19 serial Adapter product KEYSPAN USA19W 0x0108 USA-19W serial Adapter product KEYSPAN USA49W_NF 0x0109 USA-49W serial Adapter (no firmware) product KEYSPAN USA49W 0x010a USA-49W serial Adapter product KEYSPAN USA19QI_NF 0x010b USA-19QI serial Adapter (no firmware) product KEYSPAN USA19QI 0x010c USA-19QI serial Adapter product KEYSPAN USA19Q_NF 0x010d USA-19Q serial Adapter (no firmware) product KEYSPAN USA19Q 0x010e USA-19Q serial Adapter product KEYSPAN USA28 0x010f USA-28 serial Adapter product KEYSPAN USA28XXB 0x0110 USA-28X/XB serial Adapter product KEYSPAN USA18 0x0111 USA-18 serial Adapter product KEYSPAN USA18X 0x0112 USA-18X serial Adapter product KEYSPAN USA28XB_NF 0x0113 USA-28XB serial Adapter (no firmware) product KEYSPAN USA28XA_NF 0x0114 USA-28XB serial Adapter (no firmware) product KEYSPAN USA28XA 0x0115 USA-28XA serial Adapter product KEYSPAN USA18XA_NF 0x0116 USA-18XA serial Adapter (no firmware) product KEYSPAN USA18XA 0x0117 USA-18XA serial Adapter product KEYSPAN USA19QW_NF 0x0118 USA-19WQ serial Adapter (no firmware) product KEYSPAN USA19QW 0x0119 USA-19WQ serial Adapter product KEYSPAN USA19HA 0x0121 USA-19HS serial Adapter product KEYSPAN UIA10 0x0201 UIA-10 remote control product KEYSPAN UIA11 0x0202 UIA-11 remote control /* Kingston products */ product KINGSTON XX1 0x0008 Ethernet product KINGSTON KNU101TX 0x000a KNU101TX USB Ethernet /* Kawasaki products */ product KLSI DUH3E10BT 0x0008 USB Ethernet product KLSI DUH3E10BTN 0x0009 USB Ethernet /* Kodak products */ product KODAK DC220 0x0100 Digital Science DC220 product KODAK DC260 0x0110 Digital Science DC260 product KODAK DC265 0x0111 Digital Science DC265 product KODAK DC290 0x0112 Digital Science DC290 product KODAK DC240 0x0120 Digital Science DC240 product KODAK DC280 0x0130 Digital Science DC280 /* Konica Corp. Products */ product KONICA CAMERA 0x0720 Digital Color Camera /* KYE products */ product KYE NICHE 0x0001 Niche mouse product KYE NETSCROLL 0x0003 Genius NetScroll mouse product KYE FLIGHT2000 0x1004 Flight 2000 joystick product KYE VIVIDPRO 0x2001 ColorPage Vivid-Pro scanner /* Kyocera products */ product KYOCERA FINECAM_S3X 0x0100 Finecam S3x product KYOCERA FINECAM_S4 0x0101 Finecam S4 product KYOCERA FINECAM_S5 0x0103 Finecam S5 product KYOCERA FINECAM_L3 0x0105 Finecam L3 product KYOCERA AHK3001V 0x0203 AH-K3001V product KYOCERA2 CDMA_MSM_K 0x17da Qualcomm Kyocera CDMA Technologies MSM product KYOCERA2 KPC680 0x180a Qualcomm Kyocera CDMA Technologies MSM /* LaCie products */ product LACIE HD 0xa601 Hard Disk product LACIE CDRW 0xa602 CD R/W /* Leadtek products */ product LEADTEK 9531 0x2101 9531 GPS /* Lexar products */ product LEXAR JUMPSHOT 0x0001 jumpSHOT CompactFlash Reader product LEXAR CF_READER 0xb002 USB CF Reader /* Lexmark products */ product LEXMARK S2450 0x0009 Optra S 2450 /* Liebert products */ product LIEBERT POWERSURE_PXT 0xffff PowerSure Personal XT /* Linksys products */ product LINKSYS MAUSB2 0x0105 Camedia MAUSB-2 product LINKSYS USB10TX1 0x200c USB10TX product LINKSYS USB10T 0x2202 USB10T Ethernet product LINKSYS USB100TX 0x2203 USB100TX Ethernet product LINKSYS USB100H1 0x2204 USB100H1 Ethernet/HPNA product LINKSYS USB10TA 0x2206 USB10TA Ethernet product LINKSYS USB10TX2 0x400b USB10TX product LINKSYS2 WUSB11 0x2219 WUSB11 Wireless Adapter product LINKSYS2 USB200M 0x2226 USB 2.0 10/100 Ethernet product LINKSYS3 WUSB11v28 0x2233 WUSB11 v2.8 Wireless Adapter product LINKSYS4 USB1000 0x0039 USB1000 product LINKSYS4 WUSB100 0x0070 WUSB100 product LINKSYS4 WUSB600N 0x0071 WUSB600N product LINKSYS4 WUSB54GCV2 0x0073 WUSB54GC v2 product LINKSYS4 WUSB54GCV3 0x0077 WUSB54GC v3 product LINKSYS4 RT3070 0x0078 RT3070 product LINKSYS4 WUSB600NV2 0x0079 WUSB600N v2 /* Logitech products */ product LOGITECH M2452 0x0203 M2452 keyboard product LOGITECH M4848 0x0301 M4848 mouse product LOGITECH PAGESCAN 0x040f PageScan product LOGITECH QUICKCAMWEB 0x0801 QuickCam Web product LOGITECH QUICKCAMPRO 0x0810 QuickCam Pro product LOGITECH QUICKCAMEXP 0x0840 QuickCam Express product LOGITECH QUICKCAM 0x0850 QuickCam product LOGITECH QUICKCAMPRO3 0x0990 QuickCam Pro 9000 product LOGITECH N43 0xc000 N43 product LOGITECH N48 0xc001 N48 mouse product LOGITECH MBA47 0xc002 M-BA47 mouse product LOGITECH WMMOUSE 0xc004 WingMan Gaming Mouse product LOGITECH BD58 0xc00c BD58 mouse product LOGITECH UN58A 0xc030 iFeel Mouse product LOGITECH UN53B 0xc032 iFeel MouseMan product LOGITECH WMPAD 0xc208 WingMan GamePad Extreme product LOGITECH WMRPAD 0xc20a WingMan RumblePad product LOGITECH WMJOY 0xc281 WingMan Force joystick product LOGITECH BB13 0xc401 USB-PS/2 Trackball product LOGITECH RK53 0xc501 Cordless mouse product LOGITECH RB6 0xc503 Cordless keyboard product LOGITECH MX700 0xc506 Cordless optical mouse product LOGITECH QUICKCAMPRO2 0xd001 QuickCam Pro /* Logitec Corp. products */ product LOGITEC LDR_H443SU2 0x0033 DVD Multi-plus unit LDR-H443SU2 product LOGITEC LDR_H443U2 0x00b3 DVD Multi-plus unit LDR-H443U2 product LOGITEC LAN_GTJU2A 0x0160 LAN-GTJ/U2A Ethernet product LOGITEC RT2870_1 0x0162 RT2870 product LOGITEC RT2870_2 0x0163 RT2870 product LOGITEC RT2870_3 0x0164 RT2870 /* Longcheer Holdings, Ltd. products */ product LONGCHEER WM66 0x6061 Longcheer WM66 HSDPA product LONGCHEER W14 0x9603 Mobilcom W14 product LONGCHEER DISK 0xf000 Driver disk /* Lucent products */ product LUCENT EVALKIT 0x1001 USS-720 evaluation kit /* Luwen products */ product LUWEN EASYDISK 0x0005 EasyDisc /* Macally products */ product MACALLY MOUSE1 0x0101 mouse /* Marvell Technology Group, Ltd. products */ product MARVELL SHEEVAPLUG 0x9e8f SheevaPlug serial interface /* Matrix Orbital products */ product MATRIXORBITAL MOUA 0x0153 Martrix Orbital MOU-Axxxx LCD displays /* MCT Corp. */ product MCT HUB0100 0x0100 Hub product MCT DU_H3SP_USB232 0x0200 D-Link DU-H3SP USB BAY Hub product MCT USB232 0x0210 USB-232 Interface product MCT SITECOM_USB232 0x0230 Sitecom USB-232 Products /* Meizu Electronics */ product MEIZU M6_SL 0x0140 MiniPlayer M6 (SL) /* Melco, Inc products */ product MELCO LUATX1 0x0001 LUA-TX Ethernet product MELCO LUATX5 0x0005 LUA-TX Ethernet product MELCO LUA2TX5 0x0009 LUA2-TX Ethernet product MELCO LUAKTX 0x0012 LUA-KTX Ethernet product MELCO DUBPXXG 0x001c DUB-PxxG product MELCO LUAU2KTX 0x003d LUA-U2-KTX Ethernet product MELCO KG54YB 0x005e WLI-U2-KG54-YB WLAN product MELCO KG54 0x0066 WLI-U2-KG54 WLAN product MELCO KG54AI 0x0067 WLI-U2-KG54-AI WLAN product MELCO LUA3U2AGT 0x006e LUA3-U2-AGT product MELCO NINWIFI 0x008b Nintendo Wi-Fi product MELCO PCOPRS1 0x00b3 PC-OP-RS1 RemoteStation product MELCO SG54HP 0x00d8 WLI-U2-SG54HP product MELCO G54HP 0x00d9 WLI-U2-G54HP product MELCO KG54L 0x00da WLI-U2-KG54L product MELCO WLIUCG300N 0x00e8 WLI-UC-G300N product MELCO SG54HG 0x00f4 WLI-U2-SG54HG product MELCO WLRUCG 0x0116 WLR-UC-G product MELCO WLRUCGAOSS 0x0119 WLR-UC-G-AOSS product MELCO WLIUCAG300N 0x012e WLI-UC-AG300N product MELCO RT2870_1 0x0148 RT2870 product MELCO RT2870_2 0x0150 RT2870 product MELCO WLIUCGN 0x015d WLI-UC-GN /* Merlin products */ product MERLIN V620 0x1110 Merlin V620 /* MetaGeek products */ product METAGEEK WISPY1B 0x083e MetaGeek Wi-Spy product METAGEEK WISPY24X 0x083f MetaGeek Wi-Spy 2.4x product METAGEEK2 WISPYDBX 0x5000 MetaGeek Wi-Spy DBx /* Metricom products */ product METRICOM RICOCHET_GS 0x0001 Ricochet GS /* MGE UPS Systems */ product MGE UPS1 0x0001 MGE UPS SYSTEMS PROTECTIONCENTER 1 product MGE UPS2 0xffff MGE UPS SYSTEMS PROTECTIONCENTER 2 /* MEI products */ product MEI CASHFLOW_SC 0x1100 Cashflow-SC Cash Acceptor product MEI S2000 0x1101 Seies 2000 Combo Acceptor /* Micro Star International products */ product MSI BT_DONGLE 0x1967 Bluetooth USB dongle product MSI RT3070_1 0x3820 RT3070 product MSI RT3070_2 0x3821 RT3070 product MSI RT3070_8 0x3822 RT3070 product MSI RT3070_3 0x3870 RT3070 product MSI RT3070_9 0x3871 RT3070 product MSI UB11B 0x6823 UB11B product MSI RT2570 0x6861 RT2570 product MSI RT2570_2 0x6865 RT2570 product MSI RT2570_3 0x6869 RT2570 product MSI RT2573_1 0x6874 RT2573 product MSI RT2573_2 0x6877 RT2573 product MSI RT3070_4 0x6899 RT3070 product MSI RT3070_5 0x821a RT3070 product MSI RT3070_10 0x822a RT3070 product MSI RT3070_6 0x870a RT3070 product MSI RT3070_11 0x871a RT3070 product MSI RT3070_7 0x899a RT3070 product MSI RT2573_3 0xa861 RT2573 product MSI RT2573_4 0xa874 RT2573 /* Microsoft products */ product MICROSOFT SIDEPREC 0x0008 SideWinder Precision Pro product MICROSOFT INTELLIMOUSE 0x0009 IntelliMouse product MICROSOFT NATURALKBD 0x000b Natural Keyboard Elite product MICROSOFT DDS80 0x0014 Digital Sound System 80 product MICROSOFT SIDEWINDER 0x001a Sidewinder Precision Racing Wheel product MICROSOFT INETPRO 0x001c Internet Keyboard Pro product MICROSOFT TBEXPLORER 0x0024 Trackball Explorer product MICROSOFT INTELLIEYE 0x0025 IntelliEye mouse product MICROSOFT INETPRO2 0x002b Internet Keyboard Pro product MICROSOFT INTELLIMOUSE5 0x0039 IntelliMouse 1.1 5-Button Mouse product MICROSOFT WHEELMOUSE 0x0040 Wheel Mouse Optical product MICROSOFT MN510 0x006e MN510 Wireless product MICROSOFT 700WX 0x0079 Palm 700WX product MICROSOFT MN110 0x007a 10/100 USB NIC product MICROSOFT WLINTELLIMOUSE 0x008c Wireless Optical IntelliMouse product MICROSOFT WLNOTEBOOK 0x00b9 Wireless Optical Mouse (Model 1023) product MICROSOFT COMFORT3000 0x00d1 Comfort Optical Mouse 3000 (Model 1043) product MICROSOFT WLNOTEBOOK3 0x00d2 Wireless Optical Mouse 3000 (Model 1049) product MICROSOFT NATURAL4000 0x00db Natural Ergonomic Keyboard 4000 product MICROSOFT WLNOTEBOOK2 0x00e1 Wireless Optical Mouse 3000 (Model 1056) product MICROSOFT XBOX360 0x0292 XBOX 360 WLAN /* Microtech products */ product MICROTECH SCSIDB25 0x0004 USB-SCSI-DB25 product MICROTECH SCSIHD50 0x0005 USB-SCSI-HD50 product MICROTECH DPCM 0x0006 USB CameraMate product MICROTECH FREECOM 0xfc01 Freecom USB-IDE /* Microtek products */ product MICROTEK 336CX 0x0094 Phantom 336CX - C3 scanner product MICROTEK X6U 0x0099 ScanMaker X6 - X6U product MICROTEK C6 0x009a Phantom C6 scanner product MICROTEK 336CX2 0x00a0 Phantom 336CX - C3 scanner product MICROTEK V6USL 0x00a3 ScanMaker V6USL product MICROTEK V6USL2 0x80a3 ScanMaker V6USL product MICROTEK V6UL 0x80ac ScanMaker V6UL /* Microtune, Inc. products */ product MICROTUNE BT_DONGLE 0x1000 Bluetooth USB dongle /* Midiman products */ product MIDIMAN MIDISPORT2X2 0x1001 Midisport 2x2 /* MindsAtWork products */ product MINDSATWORK WALLET 0x0001 Digital Wallet /* Minolta Co., Ltd. */ product MINOLTA 2300 0x4001 Dimage 2300 product MINOLTA S304 0x4007 Dimage S304 product MINOLTA X 0x4009 Dimage X product MINOLTA 5400 0x400e Dimage 5400 product MINOLTA F300 0x4011 Dimage F300 product MINOLTA E223 0x4017 Dimage E223 /* Mitsumi products */ product MITSUMI CDRRW 0x0000 CD-R/RW Drive product MITSUMI BT_DONGLE 0x641f Bluetooth USB dongle product MITSUMI FDD 0x6901 USB FDD /* Mobile Action products */ product MOBILEACTION MA620 0x0620 MA-620 Infrared Adapter /* Mobility products */ product MOBILITY EA 0x0204 Ethernet product MOBILITY EASIDOCK 0x0304 EasiDock Ethernet /* MosChip products */ product MOSCHIP MCS7703 0x7703 MCS7703 Serial Port Adapter product MOSCHIP MCS7830 0x7830 MCS7830 Ethernet /* Motorola products */ product MOTOROLA MC141555 0x1555 MC141555 hub controller product MOTOROLA SB4100 0x4100 SB4100 USB Cable Modem product MOTOROLA2 T720C 0x2822 T720c product MOTOROLA2 A41XV32X 0x2a22 A41x/V32x Mobile Phones product MOTOROLA2 E398 0x4810 E398 Mobile Phone product MOTOROLA2 USBLAN 0x600c USBLAN product MOTOROLA2 USBLAN2 0x6027 USBLAN product MOTOROLA4 RT2770 0x9031 RT2770 product MOTOROLA4 RT3070 0x9032 RT3070 /* MultiTech products */ product MULTITECH ATLAS 0xf101 MT5634ZBA-USB modem /* Mustek products */ product MUSTEK 1200CU 0x0001 1200 CU scanner product MUSTEK 600CU 0x0002 600 CU scanner product MUSTEK 1200USB 0x0003 1200 USB scanner product MUSTEK 1200UB 0x0006 1200 UB scanner product MUSTEK 1200USBPLUS 0x0007 1200 USB Plus scanner product MUSTEK 1200CUPLUS 0x0008 1200 CU Plus scanner product MUSTEK BEARPAW1200F 0x0010 BearPaw 1200F scanner product MUSTEK BEARPAW2400TA 0x0218 BearPaw 2400TA scanner product MUSTEK BEARPAW1200TA 0x021e BearPaw 1200TA scanner product MUSTEK 600USB 0x0873 600 USB scanner product MUSTEK MDC800 0xa800 MDC-800 digital camera /* M-Systems products */ product MSYSTEMS DISKONKEY 0x0010 DiskOnKey product MSYSTEMS DISKONKEY2 0x0011 DiskOnKey /* Myson products */ product MYSON HEDEN_8813 0x8813 USB-IDE product MYSON HEDEN 0x8818 USB-IDE product MYSON HUBREADER 0x8819 COMBO Card reader with USB HUB product MYSON STARREADER 0x9920 USB flash card adapter /* National Semiconductor */ product NATIONAL BEARPAW1200 0x1000 BearPaw 1200 product NATIONAL BEARPAW2400 0x1001 BearPaw 2400 /* NEC products */ product NEC HUB_0050 0x0050 USB 2.0 7-Port Hub product NEC HUB_005A 0x005a USB 2.0 4-Port Hub product NEC HUB 0x55aa hub product NEC HUB_B 0x55ab hub /* NEODIO products */ product NEODIO ND3260 0x3260 8-in-1 Multi-format Flash Controller product NEODIO ND5010 0x5010 Multi-format Flash Controller /* Neotel products */ product NEOTEL PRIME 0x4000 Prime USB modem /* Netac products */ product NETAC CF_CARD 0x1060 USB-CF-Card product NETAC ONLYDISK 0x0003 OnlyDisk /* NetChip Technology Products */ product NETCHIP TURBOCONNECT 0x1080 Turbo-Connect product NETCHIP CLIK_40 0xa140 USB Clik! 40 product NETCHIP ETHERNETGADGET 0xa4a2 Linux Ethernet/RNDIS gadget on pxa210/25x/26x /* Netgear products */ product NETGEAR EA101 0x1001 Ethernet product NETGEAR EA101X 0x1002 Ethernet product NETGEAR FA101 0x1020 Ethernet 10/100, USB1.1 product NETGEAR FA120 0x1040 USB 2.0 Ethernet product NETGEAR WG111V2_2 0x4240 PrismGT USB 2.0 WLAN product NETGEAR WG111V3 0x4260 WG111v3 product NETGEAR WG111U 0x4300 WG111U product NETGEAR WG111U_NF 0x4301 WG111U (no firmware) product NETGEAR WG111V2 0x6a00 WG111V2 product NETGEAR2 MA101 0x4100 MA101 product NETGEAR2 MA101B 0x4102 MA101 Rev B product NETGEAR3 WG111T 0x4250 WG111T product NETGEAR3 WG111T_NF 0x4251 WG111T (no firmware) product NETGEAR3 WPN111 0x5f00 WPN111 product NETGEAR3 WPN111_NF 0x5f01 WPN111 (no firmware) product NETGEAR3 WPN111_2 0x5f02 WPN111 /* NetIndex products */ product NETINDEX WS002IN 0x2001 Willcom WS002IN /* NEWlink */ product NEWLINK USB2IDEBRIDGE 0x00ff USB 2.0 Hard Drive Enclosure /* Nikon products */ product NIKON E990 0x0102 Digital Camera E990 product NIKON LS40 0x4000 CoolScan LS40 ED product NIKON D300 0x041a Digital Camera D300 /* NovaTech Products */ product NOVATECH NV902 0x9020 NovaTech NV-902W product NOVATECH RT2573 0x9021 RT2573 /* Nokia products */ product NOKIA N958GB 0x0070 Nokia N95 8GBc product NOKIA2 CA42 0x1234 CA-42 cable /* Novatel Wireless products */ product NOVATEL V640 0x1100 Merlin V620 product NOVATEL CDMA_MODEM 0x1110 Novatel Wireless Merlin CDMA product NOVATEL V620 0x1110 Merlin V620 product NOVATEL V740 0x1120 Merlin V740 product NOVATEL V720 0x1130 Merlin V720 product NOVATEL U740 0x1400 Merlin U740 product NOVATEL U740_2 0x1410 Merlin U740 product NOVATEL U870 0x1420 Merlin U870 product NOVATEL XU870 0x1430 Merlin XU870 product NOVATEL X950D 0x1450 Merlin X950D product NOVATEL ES620 0x2100 Expedite ES620 product NOVATEL E725 0x2120 Expedite E725 product NOVATEL ES620_2 0x2130 Expedite ES620 product NOVATEL ES620 0x2100 ES620 CDMA product NOVATEL U720 0x2110 Merlin U720 product NOVATEL EU730 0x2400 Expedite EU730 product NOVATEL EU740 0x2410 Expedite EU740 product NOVATEL EU870D 0x2420 Expedite EU870D product NOVATEL U727 0x4100 Merlin U727 CDMA product NOVATEL MC950D 0x4400 Novatel MC950D HSUPA product NOVATEL ZEROCD 0x5010 Novatel ZeroCD product NOVATEL ZEROCD2 0x5030 Novatel ZeroCD product NOVATEL U727_2 0x5100 Merlin U727 CDMA product NOVATEL U760 0x6000 Novatel U760 product NOVATEL MC760 0x6002 Novatel MC760 product NOVATEL2 FLEXPACKGPS 0x0100 NovAtel FlexPack GPS receiver /* Merlin products */ product MERLIN V620 0x1110 Merlin V620 /* O2Micro products */ product O2MICRO OZ776_HUB 0x7761 OZ776 hub product O2MICRO OZ776_CCID_SC 0x7772 OZ776 CCID SC Reader /* Olympus products */ product OLYMPUS C1 0x0102 C-1 Digital Camera product OLYMPUS C700 0x0105 C-700 Ultra Zoom /* OmniVision Technologies, Inc. products */ product OMNIVISION OV511 0x0511 OV511 Camera product OMNIVISION OV511PLUS 0xa511 OV511+ Camera /* OnSpec Electronic, Inc. */ product ONSPEC SDS_HOTFIND_D 0x0400 SDS-infrared.com Hotfind-D Infrared Camera product ONSPEC MDCFE_B_CF_READER 0xa000 MDCFE-B USB CF Reader product ONSPEC CFMS_RW 0xa001 SIIG/Datafab Memory Stick+CF Reader/Writer product ONSPEC READER 0xa003 Datafab-based Reader product ONSPEC CFSM_READER 0xa005 PNY/Datafab CF+SM Reader product ONSPEC CFSM_READER2 0xa006 Simple Tech/Datafab CF+SM Reader product ONSPEC MDSM_B_READER 0xa103 MDSM-B reader product ONSPEC CFSM_COMBO 0xa109 USB to CF + SM Combo (LC1) product ONSPEC UCF100 0xa400 FlashLink UCF-100 CompactFlash Reader product ONSPEC2 IMAGEMATE_SDDR55 0xa103 ImageMate SDDR55 /* Option products */ product OPTION VODAFONEMC3G 0x5000 Vodafone Mobile Connect 3G datacard product OPTION GT3G 0x6000 GlobeTrotter 3G datacard product OPTION GT3GQUAD 0x6300 GlobeTrotter 3G QUAD datacard product OPTION GT3GPLUS 0x6600 GlobeTrotter 3G+ datacard product OPTION GTICON322 0xd033 GlobeTrotter Icon322 storage product OPTION GTMAX36 0x6701 GlobeTrotter Max 3.6 Modem product OPTION GTHSDPA 0x6971 GlobeTrotter HSDPA product OPTION GTMAXHSUPA 0x7001 GlobeTrotter HSUPA product OPTION GTMAXHSUPAE 0x6901 GlobeTrotter HSUPA PCIe product OPTION GTMAX380HSUPAE 0x7211 GlobeTrotter 380HSUPA PCIe product OPTION GT3G_1 0x6050 3G modem product OPTION GT3G_2 0x6100 3G modem product OPTION GT3G_3 0x6150 3G modem product OPTION GT3G_4 0x6200 3G modem product OPTION GT3G_5 0x6250 3G modem product OPTION GT3G_6 0x6350 3G modem product OPTION E6500 0x6500 3G modem product OPTION E6501 0x6501 3G modem product OPTION E6601 0x6601 3G modem product OPTION E6721 0x6721 3G modem product OPTION E6741 0x6741 3G modem product OPTION E6761 0x6761 3G modem product OPTION E6800 0x6800 3G modem product OPTION E7021 0x7021 3G modem product OPTION E7041 0x7041 3G modem product OPTION E7061 0x7061 3G modem product OPTION E7100 0x7100 3G modem product OPTION GTM380 0x7201 3G modem product OPTION GE40X 0x7601 Globetrotter HSUPA product OPTION GSICON72 0x6911 GlobeSurfer iCON product OPTION GSICONHSUPA 0x7251 Globetrotter HSUPA product OPTION ICON401 0x7401 GlobeSurfer iCON 401 product OPTION GTHSUPA 0x7011 Globetrotter HSUPA product OPTION GMT382 0x7501 Globetrotter HSUPA product OPTION GE40X_1 0x7301 Globetrotter HSUPA product OPTION GE40X_2 0x7361 Globetrotter HSUPA product OPTION GE40X_3 0x7381 Globetrotter HSUPA product OPTION ICONEDGE 0xc031 GlobeSurfer iCON EDGE product OPTION MODHSXPA 0xd013 Globetrotter HSUPA product OPTION ICON321 0xd031 Globetrotter HSUPA product OPTION ICON505 0xd055 Globetrotter iCON 505 product OPTION ICON452 0x7901 Globetrotter iCON 452 /* OvisLink product */ product OVISLINK RT3072 0x3072 RT3072 /* OQO */ product OQO WIFI01 0x0002 model 01 WiFi interface product OQO BT01 0x0003 model 01 Bluetooth interface product OQO ETHER01PLUS 0x7720 model 01+ Ethernet product OQO ETHER01 0x8150 model 01 Ethernet interface /* Ours Technology Inc. */ product OTI DKU5 0x6858 DKU-5 Serial /* Owen.ru products */ product OWEN AC4 0x0004 AC4 USB-RS485 converter /* Palm Computing, Inc. product */ product PALM SERIAL 0x0080 USB Serial product PALM M500 0x0001 Palm m500 product PALM M505 0x0002 Palm m505 product PALM M515 0x0003 Palm m515 product PALM I705 0x0020 Palm i705 product PALM TUNGSTEN_Z 0x0031 Palm Tungsten Z product PALM M125 0x0040 Palm m125 product PALM M130 0x0050 Palm m130 product PALM TUNGSTEN_T 0x0060 Palm Tungsten T product PALM ZIRE31 0x0061 Palm Zire 31 product PALM ZIRE 0x0070 Palm Zire /* Panasonic products */ product PANASONIC LS120CAM 0x0901 LS-120 Camera product PANASONIC KXL840AN 0x0d01 CD-R Drive KXL-840AN product PANASONIC KXLRW32AN 0x0d09 CD-R Drive KXL-RW32AN product PANASONIC KXLCB20AN 0x0d0a CD-R Drive KXL-CB20AN product PANASONIC KXLCB35AN 0x0d0e DVD-ROM & CD-R/RW product PANASONIC SDCAAE 0x1b00 MultiMediaCard product PANASONIC TYTP50P6S 0x3900 TY-TP50P6-S 50in Touch Panel /* PARA Industrial products */ product PARA RT3070 0x8888 RT3070 /* Pegatron products */ product PEGATRON RT2870 0x0002 RT2870 product PEGATRON RT3070 0x000c RT3070 product PEGATRON RT3070_2 0x000e RT3070 product PEGATRON RT3070_3 0x0010 RT3070 /* Peracom products */ product PERACOM SERIAL1 0x0001 Serial product PERACOM ENET 0x0002 Ethernet product PERACOM ENET3 0x0003 At Home Ethernet product PERACOM ENET2 0x0005 Ethernet /* Philips products */ product PHILIPS DSS350 0x0101 DSS 350 Digital Speaker System product PHILIPS DSS 0x0104 DSS XXX Digital Speaker System product PHILIPS HUB 0x0201 hub product PHILIPS PCA646VC 0x0303 PCA646VC PC Camera product PHILIPS PCVC680K 0x0308 PCVC680K Vesta Pro PC Camera product PHILIPS DSS150 0x0471 DSS 150 Digital Speaker System product PHILIPS ACE1001 0x066a AKTAKOM ACE-1001 cable product PHILIPS SPE3030CC 0x083a USB 2.0 External Disk product PHILIPS SNU5600 0x1236 SNU5600 product PHILIPS UM10016 0x1552 ISP 1581 Hi-Speed USB MPEG2 Encoder Reference Kit product PHILIPS DIVAUSB 0x1801 DIVA USB mp3 player product PHILIPS RT2870 0x200f RT2870 /* Philips Semiconductor products */ product PHILIPSSEMI HUB1122 0x1122 HUB /* Megatec */ product MEGATEC UPS 0x5161 Phoenixtec protocol based UPS /* P.I. Engineering products */ product PIENGINEERING PS2USB 0x020b PS2 to Mac USB Adapter /* Planex Communications products */ product PLANEX GW_US11H 0x14ea GW-US11H WLAN product PLANEX2 GW_US11S 0x3220 GW-US11S WLAN product PLANEX2 GW_US54GXS 0x5303 GW-US54GXS WLAN product PLANEX2 GWUS54HP 0xab01 GW-US54HP product PLANEX2 GWUS300MINIS 0xab24 GW-US300MiniS product PLANEX2 RT3070 0xab25 RT3070 product PLANEX2 GWUS54MINI2 0xab50 GW-US54Mini2 product PLANEX2 GWUS54SG 0xc002 GW-US54SG product PLANEX2 GWUS54GZL 0xc007 GW-US54GZL product PLANEX2 GWUS54GD 0xed01 GW-US54GD product PLANEX2 GWUSMM 0xed02 GW-USMM product PLANEX2 RT2870 0xed06 RT2870 product PLANEX2 GWUSMICRON 0xed14 GW-USMicroN product PLANEX3 GWUS54GZ 0xab10 GW-US54GZ product PLANEX3 GU1000T 0xab11 GU-1000T product PLANEX3 GWUS54MINI 0xab13 GW-US54Mini /* Plextor Corp. */ product PLEXTOR 40_12_40U 0x0011 PlexWriter 40/12/40U /* PLX products */ product PLX TESTBOARD 0x9060 test board product PLX CA42 0xac70 CA-42 /* PNY products */ product PNY ATTACHE2 0x0010 USB 2.0 Flash Drive /* PortGear products */ product PORTGEAR EA8 0x0008 Ethernet product PORTGEAR EA9 0x0009 Ethernet /* Portsmith products */ product PORTSMITH EEA 0x3003 Express Ethernet /* Primax products */ product PRIMAX G2X300 0x0300 G2-200 scanner product PRIMAX G2E300 0x0301 G2E-300 scanner product PRIMAX G2300 0x0302 G2-300 scanner product PRIMAX G2E3002 0x0303 G2E-300 scanner product PRIMAX 9600 0x0340 Colorado USB 9600 scanner product PRIMAX 600U 0x0341 Colorado 600u scanner product PRIMAX 6200 0x0345 Visioneer 6200 scanner product PRIMAX 19200 0x0360 Colorado USB 19200 scanner product PRIMAX 1200U 0x0361 Colorado 1200u scanner product PRIMAX G600 0x0380 G2-600 scanner product PRIMAX 636I 0x0381 ReadyScan 636i product PRIMAX G2600 0x0382 G2-600 scanner product PRIMAX G2E600 0x0383 G2E-600 scanner product PRIMAX COMFORT 0x4d01 Comfort product PRIMAX MOUSEINABOX 0x4d02 Mouse-in-a-Box product PRIMAX PCGAUMS1 0x4d04 Sony PCGA-UMS1 product PRIMAX HP_RH304AA 0x4d17 HP RH304AA mouse /* Prolific products */ product PROLIFIC PL2301 0x0000 PL2301 Host-Host interface product PROLIFIC PL2302 0x0001 PL2302 Host-Host interface product PROLIFIC RSAQ2 0x04bb PL2303 Serial (IODATA USB-RSAQ2) product PROLIFIC ALLTRONIX_GPRS 0x0609 Alltronix ACM003U00 modem product PROLIFIC ALDIGA_AL11U 0x0611 AlDiga AL-11U modem product PROLIFIC MICROMAX_610U 0x0612 Micromax 610U product PROLIFIC DCU11 0x1234 DCU-11 Phone Cable product PROLIFIC PL2303 0x2303 PL2303 Serial (ATEN/IOGEAR UC232A) product PROLIFIC PL2305 0x2305 Parallel printer product PROLIFIC ATAPI4 0x2307 ATAPI-4 Controller product PROLIFIC PL2501 0x2501 PL2501 Host-Host interface product PROLIFIC PL2506 0x2506 PL2506 USB to IDE Bridge product PROLIFIC HCR331 0x331a HCR331 Hybrid Card Reader product PROLIFIC PHAROS 0xaaa0 Prolific Pharos product PROLIFIC RSAQ3 0xaaa2 PL2303 Serial Adapter (IODATA USB-RSAQ3) product PROLIFIC2 PL2303 0x2303 PL2303 Serial Adapter /* Putercom products */ product PUTERCOM UPA100 0x047e USB-1284 BRIDGE /* Qcom products */ product QCOM RT2573 0x6196 RT2573 product QCOM RT2573_2 0x6229 RT2573 product QCOM RT2573_3 0x6238 RT2573 product QCOM RT2870 0x6259 RT2870 /* Qisda products */ product QISDA H21_1 0x4512 3G modem product QISDA H21_2 0x4523 3G modem product QISDA H20_1 0x4515 3G modem product QISDA H20_2 0x4519 3G modem /* Qualcomm products */ product QUALCOMM CDMA_MSM 0x6000 CDMA Technologies MSM phone product QUALCOMM2 MF330 0x6613 MF330 product QUALCOMM2 RWT_FCT 0x3100 RWT FCT-CDMA 2000 1xRTT modem product QUALCOMM2 CDMA_MSM 0x3196 CDMA Technologies MSM modem product QUALCOMM2 AC8700 0x6000 AC8700 product QUALCOMMINC CDMA_MSM 0x0001 CDMA Technologies MSM modem product QUALCOMMINC E0002 0x0002 3G modem product QUALCOMMINC E0003 0x0003 3G modem product QUALCOMMINC E0004 0x0004 3G modem product QUALCOMMINC E0005 0x0005 3G modem product QUALCOMMINC E0006 0x0006 3G modem product QUALCOMMINC E0007 0x0007 3G modem product QUALCOMMINC E0008 0x0008 3G modem product QUALCOMMINC E0009 0x0009 3G modem product QUALCOMMINC E000A 0x000a 3G modem product QUALCOMMINC E000B 0x000b 3G modem product QUALCOMMINC E000C 0x000c 3G modem product QUALCOMMINC E000D 0x000d 3G modem product QUALCOMMINC E000E 0x000e 3G modem product QUALCOMMINC E000F 0x000f 3G modem product QUALCOMMINC E0010 0x0010 3G modem product QUALCOMMINC E0011 0x0011 3G modem product QUALCOMMINC E0012 0x0012 3G modem product QUALCOMMINC E0013 0x0013 3G modem product QUALCOMMINC E0014 0x0014 3G modem product QUALCOMMINC MF628 0x0015 3G modem product QUALCOMMINC MF633R 0x0016 ZTE WCDMA modem product QUALCOMMINC E0017 0x0017 3G modem product QUALCOMMINC E0018 0x0018 3G modem product QUALCOMMINC E0019 0x0019 3G modem product QUALCOMMINC E0020 0x0020 3G modem product QUALCOMMINC E0021 0x0021 3G modem product QUALCOMMINC E0022 0x0022 3G modem product QUALCOMMINC E0023 0x0023 3G modem product QUALCOMMINC E0024 0x0024 3G modem product QUALCOMMINC E0025 0x0025 3G modem product QUALCOMMINC E0026 0x0026 3G modem product QUALCOMMINC E0027 0x0027 3G modem product QUALCOMMINC E0028 0x0028 3G modem product QUALCOMMINC E0029 0x0029 3G modem product QUALCOMMINC E0030 0x0030 3G modem product QUALCOMMINC MF626 0x0031 3G modem product QUALCOMMINC E0032 0x0032 3G modem product QUALCOMMINC E0033 0x0033 3G modem product QUALCOMMINC E0037 0x0037 3G modem product QUALCOMMINC E0039 0x0039 3G modem product QUALCOMMINC E0042 0x0042 3G modem product QUALCOMMINC E0043 0x0043 3G modem product QUALCOMMINC E0048 0x0048 3G modem product QUALCOMMINC E0049 0x0049 3G modem product QUALCOMMINC E0051 0x0051 3G modem product QUALCOMMINC E0052 0x0052 3G modem product QUALCOMMINC ZTE_STOR2 0x0053 USB ZTE Storage product QUALCOMMINC E0054 0x0054 3G modem product QUALCOMMINC E0055 0x0055 3G modem product QUALCOMMINC E0057 0x0057 3G modem product QUALCOMMINC E0058 0x0058 3G modem product QUALCOMMINC E0059 0x0059 3G modem product QUALCOMMINC E0060 0x0060 3G modem product QUALCOMMINC E0061 0x0061 3G modem product QUALCOMMINC E0062 0x0062 3G modem product QUALCOMMINC E0063 0x0063 3G modem product QUALCOMMINC E0064 0x0064 3G modem product QUALCOMMINC E0066 0x0066 3G modem product QUALCOMMINC E0069 0x0069 3G modem product QUALCOMMINC E0070 0x0070 3G modem product QUALCOMMINC E0073 0x0073 3G modem product QUALCOMMINC E0076 0x0076 3G modem product QUALCOMMINC E0078 0x0078 3G modem product QUALCOMMINC E0082 0x0082 3G modem product QUALCOMMINC E0086 0x0086 3G modem product QUALCOMMINC ZTE_STOR 0x2000 USB ZTE Storage product QUALCOMMINC E2002 0x2002 3G modem product QUALCOMMINC E2003 0x2003 3G modem product QUALCOMMINC AC8710 0xfff1 3G modem product QUALCOMMINC AC2726 0xfff5 3G modem product QUALCOMMINC AC8700 0xfffe CDMA 1xEVDO USB modem /* Quanta products */ product QUANTA RW6815_1 0x00ce HP iPAQ rw6815 product QUANTA RT3070 0x0304 RT3070 product QUANTA Q101_STOR 0x1000 USB Q101 Storage product QUANTA Q101 0xea02 HSDPA modem product QUANTA Q111 0xea03 HSDPA modem product QUANTA GLX 0xea04 HSDPA modem product QUANTA GKE 0xea05 HSDPA modem product QUANTA GLE 0xea06 HSDPA modem product QUANTA RW6815R 0xf003 HP iPAQ rw6815 RNDIS /* Qtronix products */ product QTRONIX 980N 0x2011 Scorpion-980N keyboard /* Quickshot products */ product QUICKSHOT STRIKEPAD 0x6238 USB StrikePad /* Radio Shack */ product RADIOSHACK USBCABLE 0x4026 USB to Serial Cable /* Rainbow Technologies products */ product RAINBOW IKEY2000 0x1200 i-Key 2000 /* Ralink Technology products */ product RALINK RT2570 0x1706 RT2500USB Wireless Adapter product RALINK RT2070 0x2070 RT2070 product RALINK RT2570_2 0x2570 RT2500USB Wireless Adapter product RALINK RT2573 0x2573 RT2501USB Wireless Adapter product RALINK RT2671 0x2671 RT2601USB Wireless Adapter product RALINK RT2770 0x2770 RT2770 product RALINK RT2870 0x2870 RT2870 product RALINK RT3070 0x3070 RT3070 product RALINK RT3071 0x3071 RT3071 product RALINK RT3072 0x3072 RT3072 product RALINK RT3370 0x3370 RT3370 product RALINK RT3572 0x3572 RT3572 product RALINK RT8070 0x8070 RT8070 product RALINK RT2570_3 0x9020 RT2500USB Wireless Adapter product RALINK RT2573_2 0x9021 RT2501USB Wireless Adapter /* RATOC Systems products */ product RATOC REXUSB60 0xb000 USB serial adapter REX-USB60 product RATOC REXUSB60F 0xb020 USB serial adapter REX-USB60F /* ReakTek products */ /* Green House and CompUSA OEM this part */ product REALTEK USB20CRW 0x0158 USB20CRW Card Reader product REALTEK USBKR100 0x8150 USBKR100 USB Ethernet product REALTEK RTL8187 0x8187 RTL8187 Wireless Adapter product REALTEK RTL8187B_0 0x8189 RTL8187B Wireless Adapter product REALTEK RTL8187B_1 0x8197 RTL8187B Wireless Adapter product REALTEK RTL8187B_2 0x8198 RTL8187B Wireless Adapter /* Ricoh products */ product RICOH VGPVCC2 0x1830 VGP-VCC2 Camera product RICOH VGPVCC3 0x1832 VGP-VCC3 Camera product RICOH VGPVCC2_2 0x1833 VGP-VCC2 Camera product RICOH VGPVCC2_3 0x1834 VGP-VCC2 Camera product RICOH VGPVCC7 0x183a VGP-VCC7 Camera product RICOH VGPVCC8 0x183b VGP-VCC8 Camera /* Reiner-SCT products */ product REINERSCT CYBERJACK_ECOM 0x0100 e-com cyberJack /* Roland products */ product ROLAND UM1 0x0009 UM-1 MIDI I/F product ROLAND UM880N 0x0014 EDIROL UM-880 MIDI I/F (native) product ROLAND UM880G 0x0015 EDIROL UM-880 MIDI I/F (generic) /* Rockfire products */ product ROCKFIRE GAMEPAD 0x2033 gamepad 203USB /* RATOC Systems products */ product RATOC REXUSB60 0xb000 REX-USB60 product RATOC REXUSB60F 0xb020 REX-USB60F /* Sagem products */ product SAGEM USBSERIAL 0x0027 USB-Serial Controller product SAGEM XG760A 0x004a XG-760A product SAGEM XG76NA 0x0062 XG-76NA /* Samsung products */ product SAMSUNG ML6060 0x3008 ML-6060 laser printer product SAMSUNG YP_U2 0x5050 YP-U2 MP3 Player product SAMSUNG YP_U4 0x5092 YP-U4 MP3 Player product SAMSUNG I500 0x6601 I500 Palm USB Phone product SAMSUNG I330 0x8001 I330 phone cradle product SAMSUNG2 RT2870_1 0x2018 RT2870 /* Samsung Techwin products */ product SAMSUNG_TECHWIN DIGIMAX_410 0x000a Digimax 410 /* SanDisk products */ product SANDISK SDDR05A 0x0001 ImageMate SDDR-05a product SANDISK SDDR31 0x0002 ImageMate SDDR-31 product SANDISK SDDR05 0x0005 ImageMate SDDR-05 product SANDISK SDDR12 0x0100 ImageMate SDDR-12 product SANDISK SDDR09 0x0200 ImageMate SDDR-09 product SANDISK SDDR75 0x0810 ImageMate SDDR-75 product SANDISK SDCZ2_256 0x7104 Cruzer Mini 256MB product SANDISK SDCZ4_128 0x7112 Cruzer Micro 128MB product SANDISK SDCZ4_256 0x7113 Cruzer Micro 256MB /* Sanwa Electric Instrument Co., Ltd. products */ product SANWA KB_USB2 0x0701 KB-USB2 multimeter cable /* Sanyo Electric products */ product SANYO SCP4900 0x0701 Sanyo SCP-4900 USB Phone /* ScanLogic products */ product SCANLOGIC SL11R 0x0002 SL11R IDE Adapter product SCANLOGIC 336CX 0x0300 Phantom 336CX - C3 scanner /* Senao products */ product SENAO RT2870_3 0x0605 RT2870 product SENAO RT2870_4 0x0615 RT2870 product SENAO NUB8301 0x2000 NUB-8301 product SENAO RT2870_1 0x9701 RT2870 product SENAO RT2870_2 0x9702 RT2870 product SENAO RT3070 0x9703 RT3070 product SENAO RT3071 0x9705 RT3071 product SENAO RT3072_1 0x9706 RT3072 product SENAO RT3072_2 0x9707 RT3072 product SENAO RT3072_3 0x9708 RT3072 product SENAO RT3072_4 0x9709 RT3072 product SENAO RT3072_5 0x9801 RT3072 /* ShanTou products */ product SHANTOU ST268 0x0268 ST268 product SHANTOU DM9601 0x9601 DM 9601 /* Shark products */ product SHARK PA 0x0400 Pocket Adapter /* Sharp products */ product SHARP SL5500 0x8004 Zaurus SL-5500 PDA product SHARP SLA300 0x8005 Zaurus SL-A300 PDA product SHARP SL5600 0x8006 Zaurus SL-5600 PDA product SHARP SLC700 0x8007 Zaurus SL-C700 PDA product SHARP SLC750 0x9031 Zaurus SL-C750 PDA product SHARP WZERO3ES 0x9123 W-ZERO3 ES Smartphone product SHARP WZERO3ADES 0x91ac Advanced W-ZERO3 ES Smartphone product SHARP WILLCOM03 0x9242 WILLCOM03 /* Shuttle Technology products */ product SHUTTLE EUSB 0x0001 E-USB Bridge product SHUTTLE EUSCSI 0x0002 eUSCSI Bridge product SHUTTLE SDDR09 0x0003 ImageMate SDDR09 product SHUTTLE EUSBCFSM 0x0005 eUSB SmartMedia / CompactFlash Adapter product SHUTTLE ZIOMMC 0x0006 eUSB MultiMediaCard Adapter product SHUTTLE HIFD 0x0007 Sony Hifd product SHUTTLE EUSBATAPI 0x0009 eUSB ATA/ATAPI Adapter product SHUTTLE CF 0x000a eUSB CompactFlash Adapter product SHUTTLE EUSCSI_B 0x000b eUSCSI Bridge product SHUTTLE EUSCSI_C 0x000c eUSCSI Bridge product SHUTTLE CDRW 0x0101 CD-RW Device product SHUTTLE EUSBORCA 0x0325 eUSB ORCA Quad Reader /* Siemens products */ product SIEMENS SPEEDSTREAM 0x1001 SpeedStream product SIEMENS SPEEDSTREAM22 0x1022 SpeedStream 1022 product SIEMENS2 WLL013 0x001b WLL013 product SIEMENS2 ES75 0x0034 GSM module MC35 product SIEMENS2 WL54G 0x3c06 54g USB Network Adapter product SIEMENS3 SX1 0x0001 SX1 product SIEMENS3 X65 0x0003 X65 product SIEMENS3 X75 0x0004 X75 product SIEMENS3 EF81 0x0005 EF81 /* Sierra Wireless products */ product SIERRA EM5625 0x0017 EM5625 product SIERRA MC5720_2 0x0018 MC5720 product SIERRA MC5725 0x0020 MC5725 product SIERRA AIRCARD580 0x0112 Sierra Wireless AirCard 580 product SIERRA AIRCARD595 0x0019 Sierra Wireless AirCard 595 product SIERRA AC595U 0x0120 Sierra Wireless AirCard 595U product SIERRA AC597E 0x0021 Sierra Wireless AirCard 597E product SIERRA EM5725 0x0022 EM5725 product SIERRA C597 0x0023 Sierra Wireless Compass 597 product SIERRA MC5727 0x0024 MC5727 product SIERRA T598 0x0025 T598 product SIERRA T11 0x0026 T11 product SIERRA AC402 0x0027 AC402 product SIERRA MC5728 0x0028 MC5728 product SIERRA E0029 0x0029 E0029 product SIERRA AIRCARD580 0x0112 Sierra Wireless AirCard 580 product SIERRA AC595U 0x0120 Sierra Wireless AirCard 595U product SIERRA MC5720 0x0218 MC5720 Wireless Modem product SIERRA MINI5725 0x0220 Sierra Wireless miniPCI 5275 product SIERRA MC5727_2 0x0224 MC5727 product SIERRA MC8755_2 0x6802 MC8755 product SIERRA MC8765 0x6803 MC8765 product SIERRA MC8755 0x6804 MC8755 product SIERRA MC8765_2 0x6805 MC8765 product SIERRA MC8755_4 0x6808 MC8755 product SIERRA MC8765_3 0x6809 MC8765 product SIERRA AC875U 0x6812 AC875U HSDPA USB Modem product SIERRA MC8755_3 0x6813 MC8755 HSDPA product SIERRA MC8775_2 0x6815 MC8775 product SIERRA MC8775 0x6816 MC8775 product SIERRA AC875 0x6820 Sierra Wireless AirCard 875 product SIERRA AC875U_2 0x6821 AC875U product SIERRA AC875E 0x6822 AC875E product SIERRA MC8780 0x6832 MC8780 product SIERRA MC8781 0x6833 MC8781 product SIERRA MC8780_2 0x6834 MC8780 product SIERRA MC8781_2 0x6835 MC8781 product SIERRA MC8780_3 0x6838 MC8780 product SIERRA MC8781_3 0x6839 MC8781 product SIERRA MC8785 0x683A MC8785 product SIERRA MC8785_2 0x683B MC8785 product SIERRA MC8790 0x683C MC8790 product SIERRA MC8791 0x683D MC8791 product SIERRA MC8792 0x683E MC8792 product SIERRA AC880 0x6850 Sierra Wireless AirCard 880 product SIERRA AC881 0x6851 Sierra Wireless AirCard 881 product SIERRA AC880E 0x6852 Sierra Wireless AirCard 880E product SIERRA AC881E 0x6853 Sierra Wireless AirCard 881E product SIERRA AC880U 0x6855 Sierra Wireless AirCard 880U product SIERRA AC881U 0x6856 Sierra Wireless AirCard 881U product SIERRA AC885E 0x6859 AC885E product SIERRA AC885E_2 0x685A AC885E product SIERRA AC885U 0x6880 Sierra Wireless AirCard 885U product SIERRA C888 0x6890 C888 product SIERRA C22 0x6891 C22 product SIERRA E6892 0x6892 E6892 product SIERRA E6893 0x6893 E6893 product SIERRA MC8700 0x68A3 MC8700 product SIERRA AIRCARD875 0x6820 Aircard 875 HSDPA product SIERRA TRUINSTALL 0x0fff Aircard Tru Installer /* Sigmatel products */ product SIGMATEL WBT_3052 0x4200 WBT-3052 IrDA/USB Bridge product SIGMATEL I_BEAD100 0x8008 i-Bead 100 MP3 Player /* SIIG products */ /* Also: Omnidirectional Control Technology products */ product SIIG DIGIFILMREADER 0x0004 DigiFilm-Combo Reader product SIIG WINTERREADER 0x0330 WINTERREADER Reader product SIIG2 USBTOETHER 0x0109 USB TO Ethernet product SIIG2 US2308 0x0421 Serial /* Silicom products */ product SILICOM U2E 0x0001 U2E product SILICOM GPE 0x0002 Psion Gold Port Ethernet /* SI Labs */ product SILABS VSTABI 0x0f91 Vstabi product SILABS ARKHAM_DS101_M 0x1101 Arkham DS101 Monitor product SILABS ARKHAM_DS101_A 0x1601 Arkham DS101 Adapter product SILABS BSM7DUSB 0x800a BSM7-D-USB product SILABS POLOLU 0x803b Pololu Serial product SILABS CYGNAL_DEBUG 0x8044 Cygnal Debug Adapter product SILABS SB_PARAMOUNT_ME 0x8043 Software Bisque Paramount ME product SILABS SAEL 0x8053 SA-EL USB product SILABS GSM2228 0x8054 Enfora GSM2228 USB product SILABS ARGUSISP 0x8066 Argussoft ISP product SILABS IMS_USB_RS422 0x806f IMS USB-RS422 product SILABS CRUMB128 0x807a Crumb128 board product SILABS DEGREE 0x80ca Degree Controls Inc product SILABS TRACIENT 0x80dd Tracient RFID product SILABS TRAQMATE 0x80ed Track Systems Traqmate product SILABS SUUNTO 0x80f6 Suunto Sports Instrument product SILABS ARYGON_MIFARE 0x8115 Arygon Mifare RFID reader product SILABS BURNSIDE 0x813d Burnside Telecon Deskmobile product SILABS TAMSMASTER 0x813f Tams Master Easy Control product SILABS WMRBATT 0x814a WMR RIGblaster Plug&Play product SILABS WMRRIGBLASTER 0x814a WMR RIGblaster Plug&Play product SILABS WMRRIGTALK 0x814b WMR RIGtalk RT1 product SILABS HELICOM 0x815e Helicomm IP-Link 1220-DVM product SILABS AVIT_USB_TTL 0x818b AVIT Research USB-TTL product SILABS MJS_TOSLINK 0x819f MJS USB-TOSLINk product SILABS WAVIT 0x81a6 ThinkOptics WavIt product SILABS MSD_DASHHAWK 0x81ac MSD DashHawk product SILABS INSYS_MODEM 0x81ad INSYS Modem product SILABS LIPOWSKY_JTAG 0x81c8 Lipowsky Baby-JTAG product SILABS LIPOWSKY_LIN 0x81e2 Lipowsky Baby-LIN product SILABS AEROCOMM 0x81e7 Aerocomm Radio product SILABS ZEPHYR_BIO 0x81e8 Zephyr Bioharness product SILABS EMS_C1007 0x81f2 EMS C1007 HF RFID controller product SILABS LIPOWSKY_HARP 0x8218 Lipowsky HARP-1 product SILABS C2_EDGE_MODEM 0x822b Commander 2 EDGE(GSM) Modem product SILABS CYGNAL_GPS 0x826b Cygnal Fasttrax GPS product SILABS TELEGESYS_ETRX2 0x8293 Telegesys ETRX2USB product SILABS PROCYON_AVS 0x82f9 Procyon AVS product SILABS MC35PU 0x8341 MC35pu product SILABS CYGNAL 0x8382 Cygnal product SILABS AMBER_AMB2560 0x83a8 Amber Wireless AMB2560 product SILABS KYOCERA_GPS 0x8411 Kyocera GPS product SILABS BEI_VCP 0x846e BEI USB Sensor (VCP) product SILABS CP2102 0xea60 SILABS USB UART product SILABS CP210X_2 0xea61 CP210x Serial product SILABS INFINITY_MIC 0xea71 Infinity GPS-MIC-1 Radio Monophone product SILABS USBSCOPE50 0xf001 USBscope50 product SILABS USBWAVE12 0xf002 USBwave12 product SILABS USBPULSE100 0xf003 USBpulse100 product SILABS USBCOUNT50 0xf004 USBcount50 product SILABS2 DCU11CLONE 0xaa26 DCU-11 clone product SILABS3 GPRS_MODEM 0xea61 GPRS Modem product SILABS4 100EU_MODEM 0xea61 GPRS Modem 100EU /* Silicon Portals Inc. */ product SILICONPORTALS YAPPH_NF 0x0200 YAP Phone (no firmware) product SILICONPORTALS YAPPHONE 0x0201 YAP Phone /* Sirius Technologies products */ product SIRIUS ROADSTER 0x0001 NetComm Roadster II 56 USB /* Sitecom products */ product SITECOM LN029 0x182d USB 2.0 Ethernet product SITECOM SERIAL 0x2068 USB to serial cable (v2) product SITECOM2 WL022 0x182d WL-022 /* Sitecom Europe products */ product SITECOMEU RT2870_1 0x0017 RT2870 product SITECOMEU WL168V1 0x000d WL-168 v1 product SITECOMEU WL168V4 0x0028 WL-168 v4 product SITECOMEU RT2870_2 0x002b RT2870 product SITECOMEU RT2870_3 0x002c RT2870 product SITECOMEU RT2870_4 0x002d RT2870 product SITECOMEU RT2770 0x0039 RT2770 product SITECOMEU RT3070_2 0x003b RT3070 product SITECOMEU RT3070_3 0x003c RT3070 product SITECOMEU RT3070_4 0x003d RT3070 product SITECOMEU RT3070 0x003e RT3070 product SITECOMEU WL608 0x003f WL-608 product SITECOMEU RT3071 0x0040 RT3071 product SITECOMEU RT3072_1 0x0041 RT3072 product SITECOMEU RT3072_2 0x0042 RT3072 product SITECOMEU RT3072_3 0x0047 RT3072 product SITECOMEU RT3072_4 0x0048 RT3072 product SITECOMEU RT3072_5 0x004a RT3072 product SITECOMEU RT3072_6 0x004d RT3072 product SITECOMEU LN028 0x061c LN-028 product SITECOMEU WL113 0x9071 WL-113 product SITECOMEU ZD1211B 0x9075 ZD1211B product SITECOMEU WL172 0x90ac WL-172 product SITECOMEU WL113R2 0x9712 WL-113 rev 2 /* Skanhex Technology products */ product SKANHEX MD_7425 0x410a MD 7425 Camera product SKANHEX SX_520Z 0x5200 SX 520z Camera /* Smart Technologies products */ product SMART PL2303 0x2303 Serial adapter /* SmartBridges products */ product SMARTBRIDGES SMARTLINK 0x0001 SmartLink USB Ethernet product SMARTBRIDGES SMARTNIC 0x0003 smartNIC 2 PnP Ethernet /* SMC products */ product SMC 2102USB 0x0100 10Mbps Ethernet product SMC 2202USB 0x0200 10/100 Ethernet product SMC 2206USB 0x0201 EZ Connect USB Ethernet product SMC 2862WG 0xee13 EZ Connect Wireless Adapter product SMC2 2020HUB 0x2020 USB Hub product SMC2 2514HUB 0x2514 USB Hub product SMC3 2662WUSB 0xa002 2662W-AR Wireless /* SOHOware products */ product SOHOWARE NUB100 0x9100 10/100 USB Ethernet product SOHOWARE NUB110 0x9110 10/100 USB Ethernet /* SOLID YEAR products */ product SOLIDYEAR KEYBOARD 0x2101 Solid Year USB keyboard /* SONY products */ product SONY DSC 0x0010 DSC cameras product SONY MS_NW_MS7 0x0025 Memorystick NW-MS7 product SONY PORTABLE_HDD_V2 0x002b Portable USB Harddrive V2 product SONY MSACUS1 0x002d Memorystick MSAC-US1 product SONY HANDYCAM 0x002e Handycam product SONY MSC 0x0032 MSC memory stick slot product SONY CLIE_35 0x0038 Sony Clie v3.5 product SONY MS_PEG_N760C 0x0058 PEG N760c Memorystick product SONY CLIE_40 0x0066 Sony Clie v4.0 product SONY MS_MSC_U03 0x0069 Memorystick MSC-U03 product SONY CLIE_40_MS 0x006d Sony Clie v4.0 Memory Stick slot product SONY CLIE_S360 0x0095 Sony Clie s360 product SONY CLIE_41_MS 0x0099 Sony Clie v4.1 Memory Stick slot product SONY CLIE_41 0x009a Sony Clie v4.1 product SONY CLIE_NX60 0x00da Sony Clie nx60 product SONY CLIE_TH55 0x0144 Sony Clie th55 product SONY CLIE_TJ37 0x0169 Sony Clie tj37 product SONY RF_RECEIVER 0x01db Sony RF mouse/kbd Receiver VGP-WRC1 product SONY QN3 0x0437 Sony QN3 CMD-Jxx phone cable /* Sony Ericsson products */ product SONYERICSSON DCU10 0x0528 DCU-10 Phone Data Cable product SONYERICSSON DATAPILOT 0x2003 Datapilot Phone Cable /* SOURCENEXT products */ product SOURCENEXT KEIKAI8 0x039f KeikaiDenwa 8 product SOURCENEXT KEIKAI8_CHG 0x012e KeikaiDenwa 8 with charger /* SparkLAN products */ product SPARKLAN RT2573 0x0004 RT2573 product SPARKLAN RT2870_1 0x0006 RT2870 product SPARKLAN RT3070 0x0010 RT3070 /* Speed Dragon Multimedia products */ product SPEEDDRAGON MS3303H 0x110b MS3303H Serial /* Sphairon Access Systems GmbH products */ product SPHAIRON UB801R 0x0110 UB801R /* Stelera Wireless products */ product STELERA ZEROCD 0x1000 Zerocd Installer product STELERA C105 0x1002 Stelera/Bandrish C105 USB product STELERA E1003 0x1003 3G modem product STELERA E1004 0x1004 3G modem product STELERA E1005 0x1005 3G modem product STELERA E1006 0x1006 3G modem product STELERA E1007 0x1007 3G modem product STELERA E1008 0x1008 3G modem product STELERA E1009 0x1009 3G modem product STELERA E100A 0x100a 3G modem product STELERA E100B 0x100b 3G modem product STELERA E100C 0x100c 3G modem product STELERA E100D 0x100d 3G modem product STELERA E100E 0x100e 3G modem product STELERA E100F 0x100f 3G modem product STELERA E1010 0x1010 3G modem product STELERA E1011 0x1011 3G modem product STELERA E1012 0x1012 3G modem /* MpMan products */ product MPMAN MPF400_1 0x36d0 MPF400 Music Player 1Go product MPMAN MPF400_2 0x25a8 MPF400 Music Player 2Go /* STMicroelectronics products */ product STMICRO BIOCPU 0x2016 Biometric Coprocessor product STMICRO COMMUNICATOR 0x7554 USB Communicator /* STSN products */ product STSN STSN0001 0x0001 Internet Access Device /* SUN Corporation products */ product SUNTAC DS96L 0x0003 SUNTAC U-Cable type D2 product SUNTAC PS64P1 0x0005 SUNTAC U-Cable type P1 product SUNTAC VS10U 0x0009 SUNTAC Slipper U product SUNTAC IS96U 0x000a SUNTAC Ir-Trinity product SUNTAC AS64LX 0x000b SUNTAC U-Cable type A3 product SUNTAC AS144L4 0x0011 SUNTAC U-Cable type A4 /* Sun Microsystems products */ product SUN KEYBOARD_TYPE_6 0x0005 Type 6 USB keyboard product SUN KEYBOARD_TYPE_7 0x00a2 Type 7 USB keyboard /* XXX The above is a North American PC style keyboard possibly */ product SUN MOUSE 0x0100 Type 6 USB mouse product SUN KBD_HUB 0x100e Kbd Hub /* Super Top products */ product SUPERTOP IDE 0x6600 USB-IDE /* Syntech products */ product SYNTECH CPT8001C 0x0001 CPT-8001C Barcode scanner product SYNTECH CYPHERLAB100 0x1000 CipherLab USB Barcode Scanner /* Teclast products */ product TECLAST TLC300 0x3203 USB Media Player /* Supra products */ product DIAMOND2 SUPRAEXPRESS56K 0x07da Supra Express 56K modem product DIAMOND2 SUPRA2890 0x0b4a SupraMax 2890 56K Modem product DIAMOND2 RIO600USB 0x5001 Rio 600 USB product DIAMOND2 RIO800USB 0x5002 Rio 800 USB /* Surecom Technology products */ product SURECOM EP9001G2A 0x11f2 EP-9001-G rev 2A product SURECOM RT2570 0x11f3 RT2570 product SURECOM RT2573 0x31f3 RT2573 /* Sweex products */ product SWEEX ZD1211 0x1809 ZD1211 product SWEEX2 LW153 0x0153 LW153 product SWEEX2 LW303 0x0302 LW303 product SWEEX2 LW313 0x0313 LW313 /* System TALKS, Inc. */ product SYSTEMTALKS SGCX2UL 0x1920 SGC-X2UL /* Tapwave products */ product TAPWAVE ZODIAC 0x0100 Zodiac /* Taugagreining products */ product TAUGA CAMERAMATE 0x0005 CameraMate (DPCM_USB) /* TCTMobile products */ product TCTMOBILE X060S 0x0000 X060S 3G modem product TCTMOBILE X080S 0xf000 X080S 3G modem /* TDK products */ product TDK UPA9664 0x0115 USB-PDC Adapter UPA9664 product TDK UCA1464 0x0116 USB-cdmaOne Adapter UCA1464 product TDK UHA6400 0x0117 USB-PHS Adapter UHA6400 product TDK UPA6400 0x0118 USB-PHS Adapter UPA6400 product TDK BT_DONGLE 0x0309 Bluetooth USB dongle /* TEAC products */ product TEAC FD05PUB 0x0000 FD-05PUB floppy /* Tekram Technology products */ product TEKRAM QUICKWLAN 0x1630 QuickWLAN product TEKRAM ZD1211_1 0x5630 ZD1211 product TEKRAM ZD1211_2 0x6630 ZD1211 /* Telex Communications products */ product TELEX MIC1 0x0001 Enhanced USB Microphone /* Telit products */ product TELIT UC864E 0x1003 UC864E 3G modem product TELIT UC864G 0x1004 UC864G 3G modem /* Ten X Technology, Inc. */ product TENX UAUDIO0 0xf211 USB audio headset /* Texas Intel products */ product TI UTUSB41 0x1446 UT-USB41 hub product TI TUSB2046 0x2046 TUSB2046 hub /* Thrustmaster products */ product THRUST FUSION_PAD 0xa0a3 Fusion Digital Gamepad /* TLayTech products */ product TLAYTECH TEU800 0x1682 TEU800 3G modem /* Topre Corporation products */ product TOPRE HHKB 0x0100 HHKB Professional /* Toshiba Corporation products */ product TOSHIBA POCKETPC_E740 0x0706 PocketPC e740 product TOSHIBA RT3070 0x0a07 RT3070 product TOSHIBA G450 0x0d45 G450 modem product TOSHIBA HSDPA 0x1302 G450 modem /* Trek Technology products */ product TREK THUMBDRIVE 0x1111 ThumbDrive product TREK MEMKEY 0x8888 IBM USB Memory Key product TREK THUMBDRIVE_8MB 0x9988 ThumbDrive_8MB /* Tripp-Lite products */ product TRIPPLITE U209 0x2008 Serial /* Trumpion products */ product TRUMPION T33520 0x1001 T33520 USB Flash Card Controller product TRUMPION C3310 0x1100 Comotron C3310 MP3 player product TRUMPION MP3 0x1200 MP3 player /* TwinMOS */ product TWINMOS G240 0xa006 G240 product TWINMOS MDIV 0x1325 Memory Disk IV /* Ubiquam products */ product UBIQUAM UALL 0x3100 CDMA 1xRTT USB Modem (U-100/105/200/300/520) /* Ultima products */ product ULTIMA 1200UBPLUS 0x4002 1200 UB Plus scanner /* UMAX products */ product UMAX ASTRA1236U 0x0002 Astra 1236U Scanner product UMAX ASTRA1220U 0x0010 Astra 1220U Scanner product UMAX ASTRA2000U 0x0030 Astra 2000U Scanner product UMAX ASTRA2100U 0x0130 Astra 2100U Scanner product UMAX ASTRA2200U 0x0230 Astra 2200U Scanner product UMAX ASTRA3400 0x0060 Astra 3400 Scanner /* U-MEDIA Communications products */ product UMEDIA TEW444UBEU 0x3006 TEW-444UB EU product UMEDIA TEW444UBEU_NF 0x3007 TEW-444UB EU (no firmware) product UMEDIA TEW429UB_A 0x300a TEW-429UB_A product UMEDIA TEW429UB 0x300b TEW-429UB product UMEDIA TEW429UBC1 0x300d TEW-429UB C1 product UMEDIA RT2870_1 0x300e RT2870 product UMEDIA ALL0298V2 0x3204 ALL0298 v2 product UMEDIA AR5523_2 0x3205 AR5523 product UMEDIA AR5523_2_NF 0x3206 AR5523 (no firmware) /* Universal Access products */ product UNIACCESS PANACHE 0x0101 Panache Surf USB ISDN Adapter /* USI products */ product USI MC60 0x10c5 MC60 Serial /* U.S. Robotics products */ product USR USR5422 0x0118 USR5422 WLAN product USR USR5423 0x0121 USR5423 WLAN /* VIA Technologies products */ product VIA USB2IDEBRIDGE 0x6204 USB 2.0 IDE Bridge /* Vaisala products */ product VAISALA CABLE 0x0200 USB Interface cable /* VidzMedia products */ product VIDZMEDIA MONSTERTV 0x4fb1 MonsterTV P2H /* Vision products */ product VISION VC6452V002 0x0002 CPiA Camera /* Visioneer products */ product VISIONEER 7600 0x0211 OneTouch 7600 product VISIONEER 5300 0x0221 OneTouch 5300 product VISIONEER 3000 0x0224 Scanport 3000 product VISIONEER 6100 0x0231 OneTouch 6100 product VISIONEER 6200 0x0311 OneTouch 6200 product VISIONEER 8100 0x0321 OneTouch 8100 product VISIONEER 8600 0x0331 OneTouch 8600 /* Vivitar products */ product VIVITAR 35XX 0x0003 Vivicam 35Xx /* VTech products */ product VTECH RT2570 0x3012 RT2570 product VTECH ZD1211B 0x3014 ZD1211B /* Wacom products */ product WACOM CT0405U 0x0000 CT-0405-U Tablet product WACOM GRAPHIRE 0x0010 Graphire product WACOM GRAPHIRE3_4X5 0x0013 Graphire 3 4x5 product WACOM INTUOSA5 0x0021 Intuos A5 product WACOM GD0912U 0x0022 Intuos 9x12 Graphics Tablet /* WaveSense products */ product WAVESENSE JAZZ 0xaaaa Jazz blood glucose meter /* WCH products */ product WCH CH341SER 0x5523 CH341/CH340 USB-Serial Bridge product WCH2 CH341SER 0x7523 CH341/CH340 USB-Serial Bridge /* Western Digital products */ product WESTERN COMBO 0x0200 Firewire USB Combo product WESTERN EXTHDD 0x0400 External HDD product WESTERN HUB 0x0500 USB HUB product WESTERN MYBOOK 0x0901 MyBook External HDD product WESTERN MYPASSWORD 0x0704 MyPassword External HDD /* Windbond Electronics */ product WINBOND UH104 0x5518 4-port USB Hub /* WinMaxGroup products */ product WINMAXGROUP FLASH64MC 0x6660 USB Flash Disk 64M-C /* Wistron NeWeb products */ product WISTRONNEWEB UR045G 0x0427 PrismGT USB 2.0 WLAN product WISTRONNEWEB UR055G 0x0711 UR055G product WISTRONNEWEB AR5523_1 0x0826 AR5523 product WISTRONNEWEB AR5523_1_NF 0x0827 AR5523 (no firmware) product WISTRONNEWEB AR5523_2 0x082a AR5523 product WISTRONNEWEB AR5523_2_NF 0x0829 AR5523 (no firmware) /* Xerox products */ product XEROX WCM15 0xffef WorkCenter M15 /* Xirlink products */ product XIRLINK PCCAM 0x8080 IBM PC Camera /* Xyratex products */ product XYRATEX PRISM_GT_1 0x2000 PrismGT USB 2.0 WLAN product XYRATEX PRISM_GT_2 0x2002 PrismGT USB 2.0 WLAN /* Yamaha products */ product YAMAHA UX256 0x1000 UX256 MIDI I/F product YAMAHA UX96 0x1008 UX96 MIDI I/F product YAMAHA RTA54I 0x4000 NetVolante RTA54i Broadband&ISDN Router product YAMAHA RTA55I 0x4004 NetVolante RTA55i Broadband VoIP Router product YAMAHA RTW65B 0x4001 NetVolante RTW65b Broadband Wireless Router product YAMAHA RTW65I 0x4002 NetVolante RTW65i Broadband&ISDN Wireless Router /* Yano products */ product YANO U640MO 0x0101 U640MO-03 product YANO FW800HD 0x05fc METALWEAR-HDD /* Y.C. Cable products */ product YCCABLE PL2303 0x0fba PL2303 Serial /* Y-E Data products */ product YEDATA FLASHBUSTERU 0x0000 Flashbuster-U /* Yiso Wireless Co. products */ product YISO C893 0xc893 CDMA 2000 1xEVDO PC Card /* Z-Com products */ product ZCOM M4Y750 0x0001 M4Y-750 product ZCOM XI725 0x0002 XI-725/726 product ZCOM XI735 0x0005 XI-735 product ZCOM XG703A 0x0008 PrismGT USB 2.0 WLAN product ZCOM ZD1211 0x0011 ZD1211 product ZCOM AR5523 0x0012 AR5523 product ZCOM AR5523_NF 0x0013 AR5523 driver (no firmware) product ZCOM XM142 0x0015 XM-142 product ZCOM ZD1211B 0x001a ZD1211B product ZCOM RT2870_1 0x0022 RT2870 product ZCOM RT2870_2 0x0025 RT2870 /* Zinwell products */ product ZINWELL RT2570 0x0260 RT2570 product ZINWELL RT2870_1 0x0280 RT2870 product ZINWELL RT2870_2 0x0282 RT2870 product ZINWELL RT3072_1 0x0283 RT3072 product ZINWELL RT3072_2 0x0284 RT3072 product ZINWELL RT3070 0x5257 RT3070 /* Zoom Telephonics, Inc. products */ product ZOOM 2986L 0x9700 2986L Fax modem /* Zoran Microelectronics products */ product ZORAN EX20DSC 0x4343 Digital Camera EX-20 DSC /* Zydas Technology Corporation products */ product ZYDAS ZD1211 0x1211 ZD1211 WLAN abg product ZYDAS ZD1211B 0x1215 ZD1211B /* ZyXEL Communication Co. products */ product ZYXEL OMNI56K 0x1500 Omni 56K Plus product ZYXEL 980N 0x2011 Scorpion-980N keyboard product ZYXEL ZYAIRG220 0x3401 ZyAIR G-220 product ZYXEL G200V2 0x3407 G-200 v2 product ZYXEL AG225H 0x3409 AG-225H product ZYXEL M202 0x340a M-202 product ZYXEL G220V2 0x340f G-220 v2 product ZYXEL G202 0x3410 G-202 product ZYXEL RT2870_1 0x3416 RT2870 product ZYXEL RT2870_2 0x341a RT2870 Index: projects/binutils-2.17/sys/i386/i386/pmap.c =================================================================== --- projects/binutils-2.17/sys/i386/i386/pmap.c (revision 215829) +++ projects/binutils-2.17/sys/i386/i386/pmap.c (revision 215830) @@ -1,5208 +1,5207 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_cpu.h" #include "opt_pmap.h" #include "opt_msgbuf.h" #include "opt_smp.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifdef XBOX #include #endif #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #define PV_STATS #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ atomic_clear_int((u_int *)(pte), PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ int pgeflag = 0; /* PG_G or-in */ int pseflag = 0; /* PG_PS or-in */ static int nkpt = NKPT; vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR; extern u_int32_t KERNend; extern u_int32_t KPTphys; #ifdef PAE pt_entry_t pg_nx; static uma_zone_t pdptzone; #endif SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pat_works = 1; -TUNABLE_INT("vm.pmap.pat_works", &pat_works); -SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RDTUN, &pat_works, 1, +SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, "Is page attribute table fully functional?"); static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, "Are large page mappings enabled?"); #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ /* * Data for the pv entry allocation mechanism */ static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* freelist stored in the PTE */ /* * All those kernel PT submaps that BSD is so fond of */ struct sysmaps { struct mtx lock; pt_entry_t *CMAP1; pt_entry_t *CMAP2; caddr_t CADDR1; caddr_t CADDR2; }; static struct sysmaps sysmaps_pcpu[MAXCPU]; pt_entry_t *CMAP1 = 0; static pt_entry_t *CMAP3; static pd_entry_t *KPTD; caddr_t CADDR1 = 0, ptvmmap = 0; static caddr_t CADDR3; struct msgbuf *msgbufp = 0; /* * Crashdump maps. */ static caddr_t crashdumpmap; static pt_entry_t *PMAP1 = 0, *PMAP2; static pt_entry_t *PADDR1 = 0, *PADDR2; #ifdef SMP static int PMAP1cpu; static int PMAP1changedcpu; SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, &PMAP1changedcpu, 0, "Number of times pmap_pte_quick changed CPU with same PMAP1"); #endif static int PMAP1changed; SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, &PMAP1changed, 0, "Number of times pmap_pte_quick changed PMAP1"); static int PMAP1unchanged; SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, &PMAP1unchanged, 0, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_pvh_wired_mappings(struct md_page *pvh, int count); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, vm_page_t *free); static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, vm_page_t *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); #ifdef PAE static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); #endif static void pmap_set_pg(void); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * If you get an error here, then you set KVA_PAGES wrong! See the * description of KVA_PAGES in sys/i386/include/pmap.h. It must be * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. */ CTASSERT(KERNBASE % (1 << 24) == 0); /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; struct sysmaps *sysmaps; int i; /* * Initialize the first available kernel virtual address. However, * using "firstaddr" may waste a few pages of the kernel virtual * address space, because locore may not have mapped every physical * page that it allocated. Preferably, locore would provide a first * unused virtual address in addition to "firstaddr". */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif kernel_pmap->pm_root = NULL; kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); /* * Request a spin mutex so that changes to allpmaps cannot be * preempted by smp_rendezvous_cpus(). Otherwise, * pmap_update_pde_kernel() could access allpmaps while it is * being changed. */ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the idle process page zeroing. */ for (i = 0; i < MAXCPU; i++) { sysmaps = &sysmaps_pcpu[i]; mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) } SYSMAP(caddr_t, CMAP1, CADDR1, 1) SYSMAP(caddr_t, CMAP3, CADDR3, 1) /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. */ SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) /* * KPTmap is used by pmap_kextract(). * * KPTmap is first initialized by locore. However, that initial * KPTmap can only support NKPT page table pages. Here, a larger * KPTmap is created that can support KVA_PAGES page table pages. */ SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES) for (i = 0; i < NKPT; i++) KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V; /* * Adjust the start of the KPTD and KPTmap so that the implementation * of pmap_kextract() and pmap_growkernel() can be made simpler. */ KPTD -= KPTDI; KPTmap -= i386_btop(KPTDI << PDRSHIFT); /* * ptemap is used for pmap_pte_quick */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); virtual_avail = va; /* * Leave in place an identity mapping (virt == phys) for the low 1 MB * physical memory region that is used by the ACPI wakeup code. This * mapping must not have PG_G set. */ #ifdef XBOX /* FIXME: This is gross, but needed for the XBOX. Since we are in such * an early stadium, we cannot yet neatly map video memory ... :-( * Better fixes are very welcome! */ if (!arch_i386_is_xbox) #endif for (i = 1; i < NKPT; i++) PTD[i] = 0; /* Initialize the PAT MSR if present. */ pmap_init_pat(); /* Turn on PG_G on kernel page(s) */ pmap_set_pg(); } /* * Setup the PAT MSR. */ void pmap_init_pat(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* Bail if this CPU doesn't implement PAT. */ if ((cpu_feature & CPUID_PAT) == 0) { for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; pat_works = 0; return; } /* * Due to some Intel errata, we can only safely use the lower 4 * PAT entries. * * Intel Pentium III Processor Specification Update * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B * or Mode C Paging) * * Intel Pentium IV Processor Specification Update * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) */ if (cpu_vendor_id == CPU_VENDOR_INTEL && !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) pat_works = 0; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } /* * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. */ static void pmap_set_pg(void) { pt_entry_t *pte; vm_offset_t va, endva; if (pgeflag == 0) return; endva = KERNBASE + KERNend; if (pseflag) { va = KERNBASE + KERNLOAD; while (va < endva) { pdir_pde(PTD, va) |= pgeflag; invltlb(); /* Play it safe, invltlb() every time */ va += NBPDR; } } else { va = (vm_offset_t)btext; while (va < endva) { pte = vtopte(va); if (*pte) *pte |= pgeflag; invltlb(); /* Play it safe, invltlb() every time */ va += PAGE_SIZE; } } } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } #ifdef PAE static void * pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); } #endif /* * ABuse the pte nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PG_* bits * are ever set, PG_V in particular. * - Assumes we can write to ptes without pte_store() atomic ops, even * on PAE systems. This should be ok. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PG_V. * - Assumes a vm_offset_t will fit in a pte (true for i386). * Because PG_V is never set, there can be no mappings to invalidate. */ static vm_offset_t pmap_ptelist_alloc(vm_offset_t *head) { pt_entry_t *pte; vm_offset_t va; va = *head; if (va == 0) return (va); /* Out of memory */ pte = vtopte(va); *head = *pte; if (*head & PG_V) panic("pmap_ptelist_alloc: va with PG_V set!"); *pte = 0; return (va); } static void pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) { pt_entry_t *pte; if (va & PG_V) panic("pmap_ptelist_free: freeing va with PG_V set!"); pte = vtopte(va); *pte = *head; /* virtual! PG_V is 0 though */ *head = va; } static void pmap_ptelist_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_ptelist_free(head, va); } } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_page_t mpte; vm_size_t s; int i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ for (i = 0; i < NKPT; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = i + KPTDI; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * If the kernel is running in a virtual machine on an AMD Family 10h * processor, then it must assume that MCA is enabled by the virtual * machine monitor. */ if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10) workaround_erratum383 = 1; /* * Are large page mappings supported and enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pseflag == 0) pg_ps_enabled = 0; else if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Calculate the size of the pv head table for superpages. */ for (i = 0; phys_avail[i + 1]; i += 2); pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_alloc(kernel_map, s); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); #ifdef PAE pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, UMA_ZONE_VM | UMA_ZONE_NOFREE); uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); #endif } SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2/4MB page mapping counters"); static u_long pmap_pde_demotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pde_demotions, 0, "2/4MB page demotions"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2/4MB page mappings"); static u_long pmap_pde_p_failures; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); static u_long pmap_pde_promotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pde_promotions, 0, "2/4MB page promotions"); /*************************************************** * Low level helper routines..... ***************************************************/ /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ int pmap_cache_bits(int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0) panic("Unknown caching mode %d\n", mode); /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; return (cache_bits); } /* * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde) { pd_entry_t *pde; pmap_t pmap; boolean_t PTD_updated; PTD_updated = FALSE; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)) PTD_updated = TRUE; pde = pmap_pde(pmap, va); pde_store(pde, newpde); } mtx_unlock_spin(&allpmaps_lock); KASSERT(PTD_updated, ("pmap_kenter_pde: current page table is not in allpmaps")); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) { u_long cr4; if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); else { /* * Promotion: flush every 4KB page mapping from the TLB, * including any global (PG_G) mappings. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpumask_t cpumask, other_cpus; sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invlpg(va); if (pmap->pm_active & other_cpus) smp_masked_invlpg(pmap->pm_active & other_cpus, va); } sched_unpin(); } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpumask_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); if (pmap->pm_active & other_cpus) smp_masked_invlpg_range(pmap->pm_active & other_cpus, sva, eva); } sched_unpin(); } void pmap_invalidate_all(pmap_t pmap) { cpumask_t cpumask, other_cpus; sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invltlb(); if (pmap->pm_active & other_cpus) smp_masked_invltlb(pmap->pm_active & other_cpus); } sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpumask_t store; /* processor that updates the PDE */ cpumask_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; }; static void pmap_update_pde_kernel(void *arg) { struct pde_action *act = arg; pd_entry_t *pde; pmap_t pmap; if (act->store == PCPU_GET(cpumask)) /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being * performed in the context of an all_cpus rendezvous. */ LIST_FOREACH(pmap, &allpmaps, pm_list) { pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } } static void pmap_update_pde_user(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpumask)) pde_store(act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if ((act->invalidate & PCPU_GET(cpumask)) != 0) pmap_update_pde_invalidate(act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpumask_t active, cpumask; sched_pin(); cpumask = PCPU_GET(cpumask); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; if ((active & PCPU_GET(other_cpus)) != 0) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; smp_rendezvous_cpus(cpumask | active, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); } else { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if ((active & cpumask) != 0) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || pmap->pm_active) invlpg(va); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap || pmap->pm_active) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (pmap == kernel_pmap || pmap->pm_active) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); if (cpu_feature & CPUID_SS) ; /* If "Self Snoop" is supported, do nothing. */ else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < 2 * 1024 * 1024) { /* * Otherwise, do per-cache line flush. Use the mfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); mfence(); } else { /* * No targeted cache flush methods are supported by CPU, * or the supplied range is bigger than 2MB. * Globally invalidate cache. */ pmap_invalidate_cache(); } } /* * Are we current address space or kernel? N.B. We return FALSE when * a pmap's page table is in use because a kernel thread is borrowing * it. The borrowed page table can change spontaneously, making any * dependence on its continued use subject to a race condition. */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); } /* * If the given pmap is not the current or kernel pmap, the returned pte must * be released by passing it to pmap_pte_release(). */ pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_lock(&PMAP2mutex); newpf = *pde & PG_FRAME; if ((*PMAP2 & PG_FRAME) != newpf) { *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } /* * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte * being NULL. */ static __inline void pmap_pte_release(pt_entry_t *pte) { if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) mtx_unlock(&PMAP2mutex); } static __inline void invlcaddr(void *caddr) { invlpg((u_int)caddr); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. * * If the given pmap is not the current pmap, vm_page_queue_mtx * must be held and curthread pinned to a CPU. */ static pt_entry_t * pmap_pte_quick(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); newpf = *pde & PG_FRAME; if ((*PMAP1 & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); else { pte = pmap_pte(pmap, va); rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); pmap_pte_release(pte); } } PMAP_UNLOCK(pmap); return (rtval); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; pt_entry_t pte; vm_page_t m; vm_paddr_t pa; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { sched_pin(); pte = *pmap_pte_quick(pmap, va); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pgeflag); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; va = sva = *virt; while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; pte_store(pte, pa | pgeflag | PG_RW | PG_V); } pte++; } if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(vm_page_t free) { vm_page_t m; while (free != NULL) { m = free; free = m->right; /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; m->right = *free; *free = m; } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { vm_page_t root; PMAP_LOCK_ASSERT(pmap, MA_OWNED); root = pmap->pm_root; if (root == NULL) { mpte->left = NULL; mpte->right = NULL; } else { root = vm_page_splay(mpte->pindex, root); if (mpte->pindex < root->pindex) { mpte->left = root->left; mpte->right = root; root->left = NULL; } else if (mpte->pindex == root->pindex) panic("pmap_insert_pt_page: pindex already inserted"); else { mpte->right = root->right; mpte->left = root; root->right = NULL; } } pmap->pm_root = mpte; } /* * Looks for a page table page mapping the specified virtual address in the * specified pmap's collection of idle page table pages. Returns NULL if there * is no page table page corresponding to the specified virtual address. */ static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) { vm_page_t mpte; vm_pindex_t pindex = va >> PDRSHIFT; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { mpte = vm_page_splay(pindex, mpte); if ((pmap->pm_root = mpte)->pindex != pindex) mpte = NULL; } return (mpte); } /* * Removes the specified page table page from the specified pmap's collection * of idle page table pages. The specified page table page must be a member of * the pmap's collection. */ static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) { vm_page_t root; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (mpte != pmap->pm_root) vm_page_splay(mpte->pindex, pmap->pm_root); if (mpte->left == NULL) root = mpte->right; else { root = vm_page_splay(mpte->pindex, mpte->left); root->right = mpte->right; } pmap->pm_root = root; } /* * This routine unholds page table pages, and if the hold count * drops to zero, then it decrements the wire count. */ static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) { --m->wire_count; if (m->wire_count == 0) return (_pmap_unwire_pte_hold(pmap, m, free)); else return (0); } static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) { vm_offset_t pteva; /* * unmap the page table page */ pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&cnt.v_wire_count, 1); /* * Do an invltlb to make the invalidated mapping * take effect immediately. */ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); return (1); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) { pd_entry_t ptepde; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); ptepde = *pmap_pde(pmap, va); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_pte_hold(pmap, mpte, free)); } /* * Initialize the pmap for the swapper process. */ void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); /* * Since the page table directory is shared with the kernel pmap, * which is already included in the list "allpmaps", this pmap does * not need to be inserted into that list. */ pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root = NULL; pmap->pm_active = 0; PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD]; vm_paddr_t pa; static int color; int i; PMAP_LOCK_INIT(pmap); /* * No need to allocate page table space yet but we do need a valid * page directory table. */ if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, NBPTD); if (pmap->pm_pdir == NULL) { PMAP_LOCK_DESTROY(pmap); return (0); } #ifdef PAE pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); KASSERT(((vm_offset_t)pmap->pm_pdpt & ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, ("pmap_pinit: pdpt misaligned")); KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), ("pmap_pinit: pdpt above 4g")); #endif pmap->pm_root = NULL; } KASSERT(pmap->pm_root == NULL, ("pmap_pinit: pmap has reserved page table page(s)")); /* * allocate the page directory page(s) */ for (i = 0; i < NPGPTD;) { m = vm_page_alloc(NULL, color++, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) VM_WAIT; else { ptdpg[i++] = m; } } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) { if ((ptdpg[i]->flags & PG_ZERO) == 0) bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); } mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); /* Copy the kernel page table directory entries. */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); mtx_unlock_spin(&allpmaps_lock); /* install self-referential address mapping entry(s) */ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; #ifdef PAE pmap->pm_pdpt[i] = pa | PG_V; #endif } pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) { vm_paddr_t ptepa; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); vm_page_unlock_queues(); VM_WAIT; vm_page_lock_queues(); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); return (m); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) { unsigned ptepindex; pd_entry_t ptepa; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; retry: /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptepa & PG_PS) { (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); ptepa = pmap->pm_pdir[ptepindex]; } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptepa) { m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has * been deallocated. */ m = _pmap_allocpte(pmap, ptepindex, flags); if (m == NULL && (flags & M_WAITOK)) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ #ifdef SMP /* * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ static cpumask_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); atomic_clear_int(lazymask, mymask); atomic_store_rel_int(&lazywait, 1); } static void pmap_lazyfix_self(cpumask_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); atomic_clear_int(lazymask, mymask); } static void pmap_lazyfix(pmap_t pmap) { cpumask_t mymask, mask; u_int spins; while ((mask = pmap->pm_active) != 0) { spins = 50000000; mask = mask & -mask; /* Find least significant set bit */ mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); #else lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); if (mask == mymask) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { atomic_store_rel_int((u_int *)&lazymask, (u_int)&pmap->pm_active); atomic_store_rel_int(&lazywait, 0); ipi_selected(mask, IPI_LAZYPMAP); while (lazywait == 0) { ia32_pause(); if (--spins == 0) break; } } mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); } } #else /* SMP */ /* * Cleaning up on uniprocessor is easy. For various reasons, we're * unlikely to have to even execute this code, including the fact * that the cleanup is deferred until the parent does a wait(2), which * means that another userland process has run. */ static void pmap_lazyfix(pmap_t pmap) { u_int cr3; cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); pmap->pm_active &= ~(PCPU_GET(cpumask)); } } #endif /* SMP */ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD]; int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(pmap->pm_root == NULL, ("pmap_release: pmap has reserved page table page(s)")); pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); for (i = 0; i < NPGPTD; i++) ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] & PG_FRAME); bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * sizeof(*pmap->pm_pdir)); pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); for (i = 0; i < NPGPTD; i++) { m = ptdpg[i]; #ifdef PAE KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); #endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free_zero(m); } PMAP_LOCK_DESTROY(pmap); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, NBPDR); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir; pmap_kenter_pde(kernel_vm_end, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static uint32_t pc_freemask[11] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); static int pmap_collect_inactive, pmap_collect_active; SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, "Current number times pmap_collect called on inactive queue"); SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. This is normally called to * unmap inactive pages, and if necessary, active pages. */ static void pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) { pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; pv_entry_t next_pv, pv; vm_offset_t va; vm_page_t m, free; sched_pin(); TAILQ_FOREACH(m, &vpq->pl, pageq) { if (m->hold_count || m->busy) continue; TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) continue; pmap->pm_stats.resident_count--; pde = pmap_pde(pmap, va); KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, va); tpte = pte_load_clear(pte); KASSERT((tpte & PG_W) == 0, ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); free = NULL; pmap_unuse_pt(pmap, va, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } sched_unpin(); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { vm_page_t m; struct pv_chunk *pc; int idx, field, bit; mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; /* move to head of list */ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); return; } PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, 0); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, int try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; static vm_pindex_t colour; struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfl(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the ptelist "pv_vafree" is synchronized by the page * queues lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq == &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } /* * Reclaim pv entries: At first, destroy mappings to * inactive pages. After that, if a pv chunk entry * is still needed, destroy mappings to active pages. */ if (pq == NULL) { PV_STAT(pmap_collect_inactive++); pq = &vm_page_queues[PQ_INACTIVE]; } else if (pq == &vm_page_queues[PQ_INACTIVE]) { PV_STAT(pmap_collect_active++); pq = &vm_page_queues[PQ_ACTIVE]; } else panic("get_pv_entry: increase vm.pmap.shpgperproc"); pmap_collect(pmap, pq); goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); colour++; pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); break; } } return (pv); } static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 4mpage aligned")); /* * Transfer the 4mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 4mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 4mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_collect() and that pmap_collect() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; mtx_assert(&vm_page_queue_mtx, MA_OWNED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); return (TRUE); } else return (FALSE); } /* * Create the pv entries for each of the pages within a superpage. */ static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); return (TRUE); } else return (FALSE); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2- or 4MB page mapping. If demotion fails, the * 2- or 4MB page mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t free, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); mpte = pmap_lookup_pt_page(pmap, va); if (mpte != NULL) pmap_remove_pt_page(pmap, mpte); else { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2- or 4MB page mapping and return * "failure" if the mapping was never accessed or the * allocation of the new page table page fails. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { free = NULL; pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); pmap_invalidate_page(pmap, trunc_4mpage(va)); pmap_free_zero_pages(free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" " in pmap %p", va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); /* * If the page mapping is in the kernel's address space, then the * KPTmap can provide access to the page table page. Otherwise, * temporarily map the page table page (mpte) into the kernel's * address space at either PADDR1 or PADDR2. */ if (va >= KERNBASE) firstpte = &KPTmap[i386_btop(trunc_4mpage(va))]; else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) { if ((*PMAP1 & PG_FRAME) != mptepa) { *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; firstpte = PADDR1; } else { mtx_lock(&PMAP2mutex); if ((*PMAP2 & PG_FRAME) != mptepa) { *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); } firstpte = PADDR2; } newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (firstpte == PADDR2) mtx_unlock(&PMAP2mutex); /* * Invalidate the recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_collect(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PDE must have already changed from mapping * the 2mpage to referencing the page table page. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); pmap_pde_demotions++; CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" " in pmap %p", va, pmap); return (TRUE); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 4mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpde & PG_G) pmap_invalidate_page(kernel_pmap, sva); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_flag_set(m, PG_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } } if (pmap == kernel_pmap) { if (!pmap_demote_pde(pmap, pdq, sva)) panic("pmap_remove_pde: failed demotion"); } else { mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); atomic_subtract_int(&cnt.v_wire_count, 1); } } } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) { pt_entry_t oldpte; vm_page_t m; mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt(pmap, va, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) { pt_entry_t *pte; mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va, free); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; vm_page_t free = NULL; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; vm_page_lock_queues(); sched_pin(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva, &free); goto out; } for (; sva < eva; sva = pdnxt) { unsigned pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, &pmap->pm_pdir[pdirindex], sva, &free); continue; } else if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* The large page mapping was destroyed. */ continue; } } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if (*pte == 0) continue; /* * The TLB entry for a PG_G mapping is invalidated * by pmap_remove_pte(). */ if ((*pte & PG_G) == 0) anyvalid = 1; if (pmap_remove_pte(pmap, pte, sva, &free)) break; } } out: sched_unpin(); if (anyvalid) pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; pd_entry_t *pde; vm_offset_t va; vm_page_t free; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); free = NULL; vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); tpte = pte_load_clear(pte); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); sched_unpin(); vm_page_unlock_queues(); pmap_free_zero_pages(free); } /* * pmap_protect_pde: do the things to protect a 4mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 4mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (newpde != oldpde) { if (!pde_cmpset(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; int anychanged; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } #ifdef PAE if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; #else if (prot & VM_PROT_WRITE) return; #endif anychanged = 0; vm_page_lock_queues(); sched_pin(); PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; unsigned pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, &pmap->pm_pdir[pdirindex], sva, prot)) anychanged = 1; continue; } else if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* The large page mapping was destroyed. */ continue; } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { vm_page_t m; retry: /* * Regardless of whether a pte is 32 or 64 bits in * size, PG_RW, PG_A, and PG_M are among the least * significant 32 bits. */ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; #endif if (pbits != obits) { #ifdef PAE if (!atomic_cmpset_64(pte, obits, pbits)) goto retry; #else if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) goto retry; #endif if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = 1; } } } sched_unpin(); if (anychanged) pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are * within a single page table page (PTP) to a single 2- or 4MB page mapping. * For promotion to occur, two conditions must be met: (1) the 4KB page * mappings must map aligned, contiguous physical memory and (2) the 4KB page * mappings must have identical characteristics. * * Managed (PG_MANAGED) mappings within the kernel address space are not * promoted. The reason is that kernel PDEs are replicated in each pmap but * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel * pmap. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; vm_offset_t oldpteva; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2- or 4MB page. */ firstpte = pmap_pte_quick(pmap, trunc_4mpage(va)); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; oldpteva = (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK); CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" " in pmap %p", oldpteva, pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == va >> PDRSHIFT, ("pmap_promote_pde: page table page's pindex is wrong")); pmap_insert_pt_page(pmap, mpte); /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); /* * Propagate the PAT index to its proper position. */ if ((newpde & PG_PTE_PAT) != 0) newpde ^= PG_PDE_PAT | PG_PTE_PAT; /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, PG_PS | newpde); else pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" " in pmap %p", va, pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { pd_entry_t *pde; pt_entry_t *pte; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva; va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || (m->oflags & VPO_BUSY) != 0, ("pmap_enter: page %p is not busy", m)); mpte = NULL; vm_page_lock_queues(); PMAP_LOCK(pmap); sched_pin(); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte = pmap_allocpte(pmap, va, M_WAITOK); } pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) panic("pmap_enter: attempted pmap_enter on 4MB page"); pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", (uintmax_t)pmap->pm_pdir[PTDPTDI], va); } pa = VM_PAGE_TO_PHYS(m); om = NULL; origpte = *pte; opa = origpte & PG_FRAME; /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; /* * Remove extra pte reference */ if (mpte) mpte->wire_count--; if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); pv = pmap_pvh_remove(&om->md, pmap, va); } if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%x", va)); } } else pmap->pm_stats.resident_count++; /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; if ((newpte & PG_MANAGED) != 0) vm_page_flag_set(m, PG_WRITEABLE); } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= pgeflag; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { newpte |= PG_A; if ((access & VM_PROT_WRITE) != 0) newpte |= PG_M; if (origpte & PG_V) { invlva = FALSE; origpte = pte_load_store(pte, newpte); if (origpte & PG_A) { if (origpte & PG_MANAGED) vm_page_flag_set(om, PG_REFERENCED); if (opa != VM_PAGE_TO_PHYS(m)) invlva = TRUE; #ifdef PAE if ((origpte & PG_NX) == 0 && (newpte & PG_NX) != 0) invlva = TRUE; #endif } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(om); if ((prot & VM_PROT_WRITE) == 0) invlva = TRUE; } if ((origpte & PG_MANAGED) != 0 && TAILQ_EMPTY(&om->md.pv_list) && TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)) vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else pte_store(pte, newpte); } /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and * FALSE otherwise. Fails if (1) a page table page cannot be allocated without * blocking, (2) a mapping already exists at the specified virtual address, or * (3) a pv entry cannot be allocated without reclaiming another pv entry. */ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pd_entry_t *pde, newpde; mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pde = pmap_pde(pmap, va); if (*pde != 0) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { newpde |= PG_MANAGED; /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; /* * Increment counters. */ pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; /* * Map the superpage. */ pde_store(pde, newpde); pmap_pde_mappings++; CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); mpte = NULL; m = m_start; vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && pmap_enter_pde(pmap, va, m, prot)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); m = TAILQ_NEXT(m, listq); } vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { vm_page_lock_queues(); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { pt_entry_t *pte; vm_paddr_t pa; vm_page_t free; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { unsigned ptepindex; pd_entry_t ptepa; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa) { if (ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); mpte->wire_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex, M_NOWAIT); if (mpte == NULL) return (mpte); } } } else { mpte = NULL; } /* * This call to vtopte makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ pte = vtopte(va); if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpte != NULL) { free = NULL; if (pmap_unwire_pte_hold(pmap, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap->pm_stats.resident_count++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; #endif /* * Now validate mapping with RO protection */ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p; int pat_mode; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pseflag && (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2/4MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and * "size" is a multiple of 2/4M, adding the PAT setting to * "pa" will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pde = pmap_pde(pmap, addr); if (*pde == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } /* Else continue on if the PDE is already valid. */ addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) { pd_entry_t *pde; pt_entry_t *pte; boolean_t are_queues_locked; are_queues_locked = FALSE; retry: PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) { if (!wired != ((*pde & PG_W) == 0)) { if (!are_queues_locked) { are_queues_locked = TRUE; if (!mtx_trylock(&vm_page_queue_mtx)) { PMAP_UNLOCK(pmap); vm_page_lock_queues(); goto retry; } } if (!pmap_demote_pde(pmap, pde, va)) panic("pmap_change_wiring: demotion failed"); } else goto out; } pte = pmap_pte(pmap, va); if (wired && !pmap_pte_w(pte)) pmap->pm_stats.wired_count++; else if (!wired && pmap_pte_w(pte)) pmap->pm_stats.wired_count--; /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ pmap_pte_set_w(pte, wired); pmap_pte_release(pte); out: if (are_queues_locked) vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { vm_page_t free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; vm_page_lock_queues(); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; unsigned ptepindex; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pdnxt = (addr + NBPDR) & ~PDRMASK; if (pdnxt < addr) pdnxt = end_addr; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if (dst_pmap->pm_pdir[ptepindex] == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME))) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr & ~PG_W; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; } continue; } srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (pdnxt > end_addr) pdnxt = end_addr; src_pte = vtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { dstmpte = pmap_allocpte(dst_pmap, addr, M_NOWAIT); if (dstmpte == NULL) goto out; dst_pte = pmap_pte_quick(dst_pmap, addr); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); dst_pmap->pm_stats.resident_count++; } else { free = NULL; if (pmap_unwire_pte_hold(dst_pmap, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, addr); pmap_free_zero_pages(free); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } static __inline void pagezero(void *page) { #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) { #if defined(CPU_ENABLE_SSE) if (cpu_feature & CPUID_SSE2) sse2_pagezero(page); else #endif i686_pagezero(page); } else #endif bzero(page, PAGE_SIZE); } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_zero_page: CMAP2 busy"); sched_pin(); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); pagezero(sysmaps->CADDR2); *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_zero_page_area: CMAP2 busy"); sched_pin(); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); if (off == 0 && size == PAGE_SIZE) pagezero(sysmaps->CADDR2); else bzero((char *)sysmaps->CADDR2 + off, size); *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { if (*CMAP3) panic("pmap_zero_page_idle: CMAP3 busy"); sched_pin(); *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(CADDR3); pagezero(CADDR3); *CMAP3 = 0; sched_unpin(); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*sysmaps->CMAP2) panic("pmap_copy_page: CMAP2 busy"); sched_pin(); invlpg((u_int)sysmaps->CADDR1); invlpg((u_int)sysmaps->CADDR2); *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | pmap_cache_bits(src->md.pat_mode, 0); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | pmap_cache_bits(dst->md.pat_mode, 0); bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); *sysmaps->CMAP1 = 0; *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } vm_page_unlock_queues(); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { int count; count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); vm_page_lock_queues(); count = pmap_pvh_wired_mappings(&m->md, count); count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); vm_page_unlock_queues(); return (count); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } sched_unpin(); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 4mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); vm_page_lock_queues(); rv = !TAILQ_EMPTY(&m->md.pv_list) || !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); vm_page_unlock_queues(); return (rv); } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pt_entry_t *pte, tpte; vm_page_t free = NULL; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; int field, idx; int32_t bit; uint32_t inuse, bitmask; int allfree; if (pmap != PCPU_GET(curpmap)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } vm_page_lock_queues(); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = (~(pc->pc_map[field])) & pc_freemask[field]; while (inuse != 0) { bit = bsfl(inuse); bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pde(pmap, pv->pv_va); tpte = *pte; if ((tpte & PG_PS) == 0) { pte = vtopte(pv->pv_va); tpte = *pte & ~PG_PTE_PAT; } if (tpte == 0) { printf( "TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT(m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((tpte & PG_PS) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_flag_clear(mt, PG_WRITEABLE); } mpte = pmap_lookup_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_remove_pt_page(pmap, mpte); pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); atomic_subtract_int(&cnt.v_wire_count, 1); } } else { pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } pmap_unuse_pt(pmap, pv->pv_va, &free); } } } if (allfree) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, 0); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } } sched_unpin(); pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PG_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return (FALSE); vm_page_lock_queues(); rv = pmap_is_modified_pvh(&m->md) || pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); vm_page_unlock_queues(); return (rv); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 2mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); if (*pde != 0 && (*pde & PG_PS) == 0) { pte = vtopte(addr); rv = *pte == 0; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_is_referenced: page %p is not managed", m)); vm_page_lock_queues(); rv = pmap_is_referenced_pvh(&m->md) || pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))); vm_page_unlock_queues(); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 4mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PG_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); retry: oldpte = *pte; if ((oldpte & PG_RW) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); sched_unpin(); vm_page_unlock_queues(); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf, pvn; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t va; int rtval = 0; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_ts_referenced: page %p is not managed", m)); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Remove the mapping to a single page * so that a subsequent access may * repromote. Since the underlying * page table page is fully populated, * this removal never frees a page * table page. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pmap_remove_page(pmap, va, NULL); rtval++; if (rtval > 4) { PMAP_UNLOCK(pmap); goto out; } } } } PMAP_UNLOCK(pmap); } if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { pvn = TAILQ_NEXT(pv, pv_list); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:" " found a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_A) != 0) { atomic_clear_int((u_int *)pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); rtval++; if (rtval > 4) pvn = NULL; } PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } out: sched_unpin(); vm_page_unlock_queues(); return (rtval); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); KASSERT((m->oflags & VPO_BUSY) == 0, ("pmap_clear_modify: page %p is busy", m)); /* * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. */ if ((m->flags & PG_WRITEABLE) == 0) return; vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pte_quick(pmap, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ while (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_M is among the least significant * 32 bits. */ atomic_clear_int((u_int *)pte, PG_M); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); vm_page_unlock_queues(); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t va; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_clear_reference: page %p is not managed", m)); vm_page_lock_queues(); sched_pin(); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_A) != 0) { if (pmap_demote_pde(pmap, pde, va)) { /* * Remove the mapping to a single page so * that a subsequent access may repromote. * Since the underlying page table page is * fully populated, this removal never frees * a page table page. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pmap_remove_page(pmap, va, NULL); } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_A) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_A is among the least significant * 32 bits. */ atomic_clear_int((u_int *)pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); vm_page_unlock_queues(); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~PG_PTE_CACHE; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~PG_PDE_CACHE; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { vm_offset_t va, offset; vm_size_t tmpsize; offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); pa = pa & PG_FRAME; if (pa < KERNLOAD && pa + size <= KERNLOAD) va = KERNBASE + pa; else va = kmem_alloc_nofault(kernel_map, size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + size); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { vm_offset_t base, offset, tmpva; if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) return; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) pmap_kremove(tmpva); pmap_invalidate_range(kernel_pmap, va, tmpva); kmem_free(kernel_map, base, size); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { struct sysmaps *sysmaps; vm_offset_t sva, eva; m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) return; /* * If "m" is a normal page, flush it from the cache. * See pmap_invalidate_cache_range(). * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m)) return; /* * If page is not mapped by sf buffer, but CPU does not * support self snoop, map the page transient and do * invalidation. In the worst case, whole cache is flushed by * pmap_invalidate_cache_range(). */ if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_page_set_memattr: CMAP2 busy"); sched_pin(); *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(sysmaps->CADDR2); sva = (vm_offset_t)sysmaps->CADDR2; eva = sva + PAGE_SIZE; } else sva = eva = 0; /* gcc */ pmap_invalidate_cache_range(sva, eva); if (sva != 0) { *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); } } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the kernel map. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde; boolean_t changed; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); /* * Only supported on kernel virtual addresses above the recursive map. */ if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(mode, 1); cache_bits_pte = pmap_cache_bits(mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down * 2/4MB pages into 4KB pages if required. */ PMAP_LOCK(kernel_pmap); for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } if (*pde & PG_PS) { /* * If the current 2/4MB page already has * the required memory type, then we need not * demote this page. Just increment tmpva to * the next 2/4MB page frame. */ if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_4mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2/4MB * page frame and there is at least 2/4MB left * within the range, then we need not break * down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { PMAP_UNLOCK(kernel_pmap); return (ENOMEM); } } pte = vtopte(tmpva); if (*pte == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } tmpva += PAGE_SIZE; } PMAP_UNLOCK(kernel_pmap); /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde & PG_PS) { if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde); changed = TRUE; } tmpva = trunc_4mpage(tmpva) + NBPDR; } else { pte = vtopte(tmpva); if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte); changed = TRUE; } tmpva += PAGE_SIZE; } } /* * Flush CPU caches to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); } return (0); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t *ptep, pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, addr); if (*pdep != 0) { if (*pdep & PG_PS) { pte = *pdep; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { ptep = pmap_pte(pmap, addr); pte = *ptep; pmap_pte_release(ptep); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int32_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); #else oldpmap->pm_active &= ~1; pmap->pm_active |= 1; #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); #else cr3 = vtophys(pmap->pm_pdir); #endif /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); PCPU_SET(curpmap, pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = *pte; m = PHYS_TO_VM_PAGE(pa & PG_FRAME); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return (npte); } #endif #if defined(DEBUG) static void pads(pmap_t pm); void pmap_pvdump(vm_offset_t pa); /* print address space of pmap*/ static void pads(pmap_t pm) { int i, j; vm_paddr_t va; pt_entry_t *ptep; if (pm == kernel_pmap) return; for (i = 0; i < NPDEPTD; i++) if (pm->pm_pdir[i]) for (j = 0; j < NPTEPG; j++) { va = (i << PDRSHIFT) + (j << PAGE_SHIFT); if (pm == kernel_pmap && va < KERNBASE) continue; if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) continue; ptep = pmap_pte(pm, va); if (pmap_pte_v(ptep)) printf("%x:%x ", va, *ptep); }; } void pmap_pvdump(vm_paddr_t pa) { pv_entry_t pv; pmap_t pmap; vm_page_t m; printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); pads(pmap); } printf(" "); } #endif Index: projects/binutils-2.17/sys/i386/include/specialreg.h =================================================================== --- projects/binutils-2.17/sys/i386/include/specialreg.h (revision 215829) +++ projects/binutils-2.17/sys/i386/include/specialreg.h (revision 215830) @@ -1,600 +1,609 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)specialreg.h 7.1 (Berkeley) 5/9/91 * $FreeBSD$ */ #ifndef _MACHINE_SPECIALREG_H_ #define _MACHINE_SPECIALREG_H_ /* * Bits in 386 special registers: */ #define CR0_PE 0x00000001 /* Protected mode Enable */ #define CR0_MP 0x00000002 /* "Math" (fpu) Present */ #define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */ #define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */ #define CR0_PG 0x80000000 /* PaGing enable */ /* * Bits in 486 special registers: */ #define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */ #define CR0_WP 0x00010000 /* Write Protect (honor page protect in all modes) */ #define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */ #define CR0_NW 0x20000000 /* Not Write-through */ #define CR0_CD 0x40000000 /* Cache Disable */ /* * Bits in PPro special registers */ #define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */ #define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */ #define CR4_TSD 0x00000004 /* Time stamp disable */ #define CR4_DE 0x00000008 /* Debugging extensions */ #define CR4_PSE 0x00000010 /* Page size extensions */ #define CR4_PAE 0x00000020 /* Physical address extension */ #define CR4_MCE 0x00000040 /* Machine check enable */ #define CR4_PGE 0x00000080 /* Page global enable */ #define CR4_PCE 0x00000100 /* Performance monitoring counter enable */ #define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */ #define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */ /* * Bits in AMD64 special registers. EFER is 64 bits wide. */ #define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */ /* * CPUID instruction features register */ #define CPUID_FPU 0x00000001 #define CPUID_VME 0x00000002 #define CPUID_DE 0x00000004 #define CPUID_PSE 0x00000008 #define CPUID_TSC 0x00000010 #define CPUID_MSR 0x00000020 #define CPUID_PAE 0x00000040 #define CPUID_MCE 0x00000080 #define CPUID_CX8 0x00000100 #define CPUID_APIC 0x00000200 #define CPUID_B10 0x00000400 #define CPUID_SEP 0x00000800 #define CPUID_MTRR 0x00001000 #define CPUID_PGE 0x00002000 #define CPUID_MCA 0x00004000 #define CPUID_CMOV 0x00008000 #define CPUID_PAT 0x00010000 #define CPUID_PSE36 0x00020000 #define CPUID_PSN 0x00040000 #define CPUID_CLFSH 0x00080000 #define CPUID_B20 0x00100000 #define CPUID_DS 0x00200000 #define CPUID_ACPI 0x00400000 #define CPUID_MMX 0x00800000 #define CPUID_FXSR 0x01000000 #define CPUID_SSE 0x02000000 #define CPUID_XMM 0x02000000 #define CPUID_SSE2 0x04000000 #define CPUID_SS 0x08000000 #define CPUID_HTT 0x10000000 #define CPUID_TM 0x20000000 #define CPUID_IA64 0x40000000 #define CPUID_PBE 0x80000000 #define CPUID2_SSE3 0x00000001 #define CPUID2_PCLMULQDQ 0x00000002 #define CPUID2_DTES64 0x00000004 #define CPUID2_MON 0x00000008 #define CPUID2_DS_CPL 0x00000010 #define CPUID2_VMX 0x00000020 #define CPUID2_SMX 0x00000040 #define CPUID2_EST 0x00000080 #define CPUID2_TM2 0x00000100 #define CPUID2_SSSE3 0x00000200 #define CPUID2_CNXTID 0x00000400 #define CPUID2_CX16 0x00002000 #define CPUID2_XTPR 0x00004000 #define CPUID2_PDCM 0x00008000 #define CPUID2_PCID 0x00020000 #define CPUID2_DCA 0x00040000 #define CPUID2_SSE41 0x00080000 #define CPUID2_SSE42 0x00100000 #define CPUID2_X2APIC 0x00200000 #define CPUID2_MOVBE 0x00400000 #define CPUID2_POPCNT 0x00800000 #define CPUID2_AESNI 0x02000000 /* + * Important bits in the Thermal and Power Management flags + * CPUID.6 EAX and ECX. + */ +#define CPUTPM1_SENSOR 0x00000001 +#define CPUTPM1_TURBO 0x00000002 +#define CPUTPM1_ARAT 0x00000004 +#define CPUTPM2_EFFREQ 0x00000001 + +/* * Important bits in the AMD extended cpuid flags */ #define AMDID_SYSCALL 0x00000800 #define AMDID_MP 0x00080000 #define AMDID_NX 0x00100000 #define AMDID_EXT_MMX 0x00400000 #define AMDID_FFXSR 0x01000000 #define AMDID_PAGE1GB 0x04000000 #define AMDID_RDTSCP 0x08000000 #define AMDID_LM 0x20000000 #define AMDID_EXT_3DNOW 0x40000000 #define AMDID_3DNOW 0x80000000 #define AMDID2_LAHF 0x00000001 #define AMDID2_CMP 0x00000002 #define AMDID2_SVM 0x00000004 #define AMDID2_EXT_APIC 0x00000008 #define AMDID2_CR8 0x00000010 #define AMDID2_ABM 0x00000020 #define AMDID2_SSE4A 0x00000040 #define AMDID2_MAS 0x00000080 #define AMDID2_PREFETCH 0x00000100 #define AMDID2_OSVW 0x00000200 #define AMDID2_IBS 0x00000400 #define AMDID2_SSE5 0x00000800 #define AMDID2_SKINIT 0x00001000 #define AMDID2_WDT 0x00002000 /* * CPUID instruction 1 eax info */ #define CPUID_STEPPING 0x0000000f #define CPUID_MODEL 0x000000f0 #define CPUID_FAMILY 0x00000f00 #define CPUID_EXT_MODEL 0x000f0000 #define CPUID_EXT_FAMILY 0x0ff00000 #define CPUID_TO_MODEL(id) \ ((((id) & CPUID_MODEL) >> 4) | \ ((((id) & CPUID_FAMILY) >= 0x600) ? \ (((id) & CPUID_EXT_MODEL) >> 12) : 0)) #define CPUID_TO_FAMILY(id) \ ((((id) & CPUID_FAMILY) >> 8) + \ ((((id) & CPUID_FAMILY) == 0xf00) ? \ (((id) & CPUID_EXT_FAMILY) >> 20) : 0)) /* * CPUID instruction 1 ebx info */ #define CPUID_BRAND_INDEX 0x000000ff #define CPUID_CLFUSH_SIZE 0x0000ff00 #define CPUID_HTT_CORES 0x00ff0000 #define CPUID_LOCAL_APIC_ID 0xff000000 /* * CPUID instruction 0xb ebx info. */ #define CPUID_TYPE_INVAL 0 #define CPUID_TYPE_SMT 1 #define CPUID_TYPE_CORE 2 /* * AMD extended function 8000_0007h edx info */ #define AMDPM_TS 0x00000001 #define AMDPM_FID 0x00000002 #define AMDPM_VID 0x00000004 #define AMDPM_TTP 0x00000008 #define AMDPM_TM 0x00000010 #define AMDPM_STC 0x00000020 #define AMDPM_100MHZ_STEPS 0x00000040 #define AMDPM_HW_PSTATE 0x00000080 #define AMDPM_TSC_INVARIANT 0x00000100 #define AMDPM_CPB 0x00000200 /* * AMD extended function 8000_0008h ecx info */ #define AMDID_CMP_CORES 0x000000ff /* * CPUID manufacturers identifiers */ #define AMD_VENDOR_ID "AuthenticAMD" #define CENTAUR_VENDOR_ID "CentaurHauls" #define CYRIX_VENDOR_ID "CyrixInstead" #define INTEL_VENDOR_ID "GenuineIntel" #define NEXGEN_VENDOR_ID "NexGenDriven" #define NSC_VENDOR_ID "Geode by NSC" #define RISE_VENDOR_ID "RiseRiseRise" #define SIS_VENDOR_ID "SiS SiS SiS " #define TRANSMETA_VENDOR_ID "GenuineTMx86" #define UMC_VENDOR_ID "UMC UMC UMC " /* * Model-specific registers for the i386 family */ #define MSR_P5_MC_ADDR 0x000 #define MSR_P5_MC_TYPE 0x001 #define MSR_TSC 0x010 #define MSR_P5_CESR 0x011 #define MSR_P5_CTR0 0x012 #define MSR_P5_CTR1 0x013 #define MSR_IA32_PLATFORM_ID 0x017 #define MSR_APICBASE 0x01b #define MSR_EBL_CR_POWERON 0x02a #define MSR_TEST_CTL 0x033 #define MSR_BIOS_UPDT_TRIG 0x079 #define MSR_BBL_CR_D0 0x088 #define MSR_BBL_CR_D1 0x089 #define MSR_BBL_CR_D2 0x08a #define MSR_BIOS_SIGN 0x08b #define MSR_PERFCTR0 0x0c1 #define MSR_PERFCTR1 0x0c2 #define MSR_MPERF 0x0e7 #define MSR_APERF 0x0e8 #define MSR_IA32_EXT_CONFIG 0x0ee /* Undocumented. Core Solo/Duo only */ #define MSR_MTRRcap 0x0fe #define MSR_BBL_CR_ADDR 0x116 #define MSR_BBL_CR_DECC 0x118 #define MSR_BBL_CR_CTL 0x119 #define MSR_BBL_CR_TRIG 0x11a #define MSR_BBL_CR_BUSY 0x11b #define MSR_BBL_CR_CTL3 0x11e #define MSR_SYSENTER_CS_MSR 0x174 #define MSR_SYSENTER_ESP_MSR 0x175 #define MSR_SYSENTER_EIP_MSR 0x176 #define MSR_MCG_CAP 0x179 #define MSR_MCG_STATUS 0x17a #define MSR_MCG_CTL 0x17b #define MSR_EVNTSEL0 0x186 #define MSR_EVNTSEL1 0x187 #define MSR_THERM_CONTROL 0x19a #define MSR_THERM_INTERRUPT 0x19b #define MSR_THERM_STATUS 0x19c #define MSR_IA32_MISC_ENABLE 0x1a0 #define MSR_IA32_TEMPERATURE_TARGET 0x1a2 #define MSR_DEBUGCTLMSR 0x1d9 #define MSR_LASTBRANCHFROMIP 0x1db #define MSR_LASTBRANCHTOIP 0x1dc #define MSR_LASTINTFROMIP 0x1dd #define MSR_LASTINTTOIP 0x1de #define MSR_ROB_CR_BKUPTMPDR6 0x1e0 #define MSR_MTRRVarBase 0x200 #define MSR_MTRR64kBase 0x250 #define MSR_MTRR16kBase 0x258 #define MSR_MTRR4kBase 0x268 #define MSR_PAT 0x277 #define MSR_MC0_CTL2 0x280 #define MSR_MTRRdefType 0x2ff #define MSR_MC0_CTL 0x400 #define MSR_MC0_STATUS 0x401 #define MSR_MC0_ADDR 0x402 #define MSR_MC0_MISC 0x403 #define MSR_MC1_CTL 0x404 #define MSR_MC1_STATUS 0x405 #define MSR_MC1_ADDR 0x406 #define MSR_MC1_MISC 0x407 #define MSR_MC2_CTL 0x408 #define MSR_MC2_STATUS 0x409 #define MSR_MC2_ADDR 0x40a #define MSR_MC2_MISC 0x40b #define MSR_MC3_CTL 0x40c #define MSR_MC3_STATUS 0x40d #define MSR_MC3_ADDR 0x40e #define MSR_MC3_MISC 0x40f #define MSR_MC4_CTL 0x410 #define MSR_MC4_STATUS 0x411 #define MSR_MC4_ADDR 0x412 #define MSR_MC4_MISC 0x413 /* * Constants related to MSR's. */ #define APICBASE_RESERVED 0x000006ff #define APICBASE_BSP 0x00000100 #define APICBASE_ENABLED 0x00000800 #define APICBASE_ADDRESS 0xfffff000 /* * PAT modes. */ #define PAT_UNCACHEABLE 0x00 #define PAT_WRITE_COMBINING 0x01 #define PAT_WRITE_THROUGH 0x04 #define PAT_WRITE_PROTECTED 0x05 #define PAT_WRITE_BACK 0x06 #define PAT_UNCACHED 0x07 #define PAT_VALUE(i, m) ((long long)(m) << (8 * (i))) #define PAT_MASK(i) PAT_VALUE(i, 0xff) /* * Constants related to MTRRs */ #define MTRR_UNCACHEABLE 0x00 #define MTRR_WRITE_COMBINING 0x01 #define MTRR_WRITE_THROUGH 0x04 #define MTRR_WRITE_PROTECTED 0x05 #define MTRR_WRITE_BACK 0x06 #define MTRR_N64K 8 /* numbers of fixed-size entries */ #define MTRR_N16K 16 #define MTRR_N4K 64 #define MTRR_CAP_WC 0x0000000000000400 #define MTRR_CAP_FIXED 0x0000000000000100 #define MTRR_CAP_VCNT 0x00000000000000ff #define MTRR_DEF_ENABLE 0x0000000000000800 #define MTRR_DEF_FIXED_ENABLE 0x0000000000000400 #define MTRR_DEF_TYPE 0x00000000000000ff #define MTRR_PHYSBASE_PHYSBASE 0x000ffffffffff000 #define MTRR_PHYSBASE_TYPE 0x00000000000000ff #define MTRR_PHYSMASK_PHYSMASK 0x000ffffffffff000 #define MTRR_PHYSMASK_VALID 0x0000000000000800 /* * Cyrix configuration registers, accessible as IO ports. */ #define CCR0 0xc0 /* Configuration control register 0 */ #define CCR0_NC0 0x01 /* First 64K of each 1M memory region is non-cacheable */ #define CCR0_NC1 0x02 /* 640K-1M region is non-cacheable */ #define CCR0_A20M 0x04 /* Enables A20M# input pin */ #define CCR0_KEN 0x08 /* Enables KEN# input pin */ #define CCR0_FLUSH 0x10 /* Enables FLUSH# input pin */ #define CCR0_BARB 0x20 /* Flushes internal cache when entering hold state */ #define CCR0_CO 0x40 /* Cache org: 1=direct mapped, 0=2x set assoc */ #define CCR0_SUSPEND 0x80 /* Enables SUSP# and SUSPA# pins */ #define CCR1 0xc1 /* Configuration control register 1 */ #define CCR1_RPL 0x01 /* Enables RPLSET and RPLVAL# pins */ #define CCR1_SMI 0x02 /* Enables SMM pins */ #define CCR1_SMAC 0x04 /* System management memory access */ #define CCR1_MMAC 0x08 /* Main memory access */ #define CCR1_NO_LOCK 0x10 /* Negate LOCK# */ #define CCR1_SM3 0x80 /* SMM address space address region 3 */ #define CCR2 0xc2 #define CCR2_WB 0x02 /* Enables WB cache interface pins */ #define CCR2_SADS 0x02 /* Slow ADS */ #define CCR2_LOCK_NW 0x04 /* LOCK NW Bit */ #define CCR2_SUSP_HLT 0x08 /* Suspend on HALT */ #define CCR2_WT1 0x10 /* WT region 1 */ #define CCR2_WPR1 0x10 /* Write-protect region 1 */ #define CCR2_BARB 0x20 /* Flushes write-back cache when entering hold state. */ #define CCR2_BWRT 0x40 /* Enables burst write cycles */ #define CCR2_USE_SUSP 0x80 /* Enables suspend pins */ #define CCR3 0xc3 #define CCR3_SMILOCK 0x01 /* SMM register lock */ #define CCR3_NMI 0x02 /* Enables NMI during SMM */ #define CCR3_LINBRST 0x04 /* Linear address burst cycles */ #define CCR3_SMMMODE 0x08 /* SMM Mode */ #define CCR3_MAPEN0 0x10 /* Enables Map0 */ #define CCR3_MAPEN1 0x20 /* Enables Map1 */ #define CCR3_MAPEN2 0x40 /* Enables Map2 */ #define CCR3_MAPEN3 0x80 /* Enables Map3 */ #define CCR4 0xe8 #define CCR4_IOMASK 0x07 #define CCR4_MEM 0x08 /* Enables momory bypassing */ #define CCR4_DTE 0x10 /* Enables directory table entry cache */ #define CCR4_FASTFPE 0x20 /* Fast FPU exception */ #define CCR4_CPUID 0x80 /* Enables CPUID instruction */ #define CCR5 0xe9 #define CCR5_WT_ALLOC 0x01 /* Write-through allocate */ #define CCR5_SLOP 0x02 /* LOOP instruction slowed down */ #define CCR5_LBR1 0x10 /* Local bus region 1 */ #define CCR5_ARREN 0x20 /* Enables ARR region */ #define CCR6 0xea #define CCR7 0xeb /* Performance Control Register (5x86 only). */ #define PCR0 0x20 #define PCR0_RSTK 0x01 /* Enables return stack */ #define PCR0_BTB 0x02 /* Enables branch target buffer */ #define PCR0_LOOP 0x04 /* Enables loop */ #define PCR0_AIS 0x08 /* Enables all instrcutions stalled to serialize pipe. */ #define PCR0_MLR 0x10 /* Enables reordering of misaligned loads */ #define PCR0_BTBRT 0x40 /* Enables BTB test register. */ #define PCR0_LSSER 0x80 /* Disable reorder */ /* Device Identification Registers */ #define DIR0 0xfe #define DIR1 0xff /* * Machine Check register constants. */ #define MCG_CAP_COUNT 0x000000ff #define MCG_CAP_CTL_P 0x00000100 #define MCG_CAP_EXT_P 0x00000200 #define MCG_CAP_CMCI_P 0x00000400 #define MCG_CAP_TES_P 0x00000800 #define MCG_CAP_EXT_CNT 0x00ff0000 #define MCG_CAP_SER_P 0x01000000 #define MCG_STATUS_RIPV 0x00000001 #define MCG_STATUS_EIPV 0x00000002 #define MCG_STATUS_MCIP 0x00000004 #define MCG_CTL_ENABLE 0xffffffffffffffff #define MCG_CTL_DISABLE 0x0000000000000000 #define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4) #define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4) #define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4) #define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4) #define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */ #define MC_STATUS_MCA_ERROR 0x000000000000ffff #define MC_STATUS_MODEL_ERROR 0x00000000ffff0000 #define MC_STATUS_OTHER_INFO 0x01ffffff00000000 #define MC_STATUS_COR_COUNT 0x001fffc000000000 /* If MCG_CAP_CMCI_P */ #define MC_STATUS_TES_STATUS 0x0060000000000000 /* If MCG_CAP_TES_P */ #define MC_STATUS_AR 0x0080000000000000 /* If MCG_CAP_TES_P */ #define MC_STATUS_S 0x0100000000000000 /* If MCG_CAP_TES_P */ #define MC_STATUS_PCC 0x0200000000000000 #define MC_STATUS_ADDRV 0x0400000000000000 #define MC_STATUS_MISCV 0x0800000000000000 #define MC_STATUS_EN 0x1000000000000000 #define MC_STATUS_UC 0x2000000000000000 #define MC_STATUS_OVER 0x4000000000000000 #define MC_STATUS_VAL 0x8000000000000000 #define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */ #define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */ #define MC_CTL2_THRESHOLD 0x0000000000007fff #define MC_CTL2_CMCI_EN 0x0000000040000000 /* * The following four 3-byte registers control the non-cacheable regions. * These registers must be written as three separate bytes. * * NCRx+0: A31-A24 of starting address * NCRx+1: A23-A16 of starting address * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx. * * The non-cacheable region's starting address must be aligned to the * size indicated by the NCR_SIZE_xx field. */ #define NCR1 0xc4 #define NCR2 0xc7 #define NCR3 0xca #define NCR4 0xcd #define NCR_SIZE_0K 0 #define NCR_SIZE_4K 1 #define NCR_SIZE_8K 2 #define NCR_SIZE_16K 3 #define NCR_SIZE_32K 4 #define NCR_SIZE_64K 5 #define NCR_SIZE_128K 6 #define NCR_SIZE_256K 7 #define NCR_SIZE_512K 8 #define NCR_SIZE_1M 9 #define NCR_SIZE_2M 10 #define NCR_SIZE_4M 11 #define NCR_SIZE_8M 12 #define NCR_SIZE_16M 13 #define NCR_SIZE_32M 14 #define NCR_SIZE_4G 15 /* * The address region registers are used to specify the location and * size for the eight address regions. * * ARRx + 0: A31-A24 of start address * ARRx + 1: A23-A16 of start address * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx */ #define ARR0 0xc4 #define ARR1 0xc7 #define ARR2 0xca #define ARR3 0xcd #define ARR4 0xd0 #define ARR5 0xd3 #define ARR6 0xd6 #define ARR7 0xd9 #define ARR_SIZE_0K 0 #define ARR_SIZE_4K 1 #define ARR_SIZE_8K 2 #define ARR_SIZE_16K 3 #define ARR_SIZE_32K 4 #define ARR_SIZE_64K 5 #define ARR_SIZE_128K 6 #define ARR_SIZE_256K 7 #define ARR_SIZE_512K 8 #define ARR_SIZE_1M 9 #define ARR_SIZE_2M 10 #define ARR_SIZE_4M 11 #define ARR_SIZE_8M 12 #define ARR_SIZE_16M 13 #define ARR_SIZE_32M 14 #define ARR_SIZE_4G 15 /* * The region control registers specify the attributes associated with * the ARRx addres regions. */ #define RCR0 0xdc #define RCR1 0xdd #define RCR2 0xde #define RCR3 0xdf #define RCR4 0xe0 #define RCR5 0xe1 #define RCR6 0xe2 #define RCR7 0xe3 #define RCR_RCD 0x01 /* Disables caching for ARRx (x = 0-6). */ #define RCR_RCE 0x01 /* Enables caching for ARR7. */ #define RCR_WWO 0x02 /* Weak write ordering. */ #define RCR_WL 0x04 /* Weak locking. */ #define RCR_WG 0x08 /* Write gathering. */ #define RCR_WT 0x10 /* Write-through. */ #define RCR_NLB 0x20 /* LBA# pin is not asserted. */ /* AMD Write Allocate Top-Of-Memory and Control Register */ #define AMD_WT_ALLOC_TME 0x40000 /* top-of-memory enable */ #define AMD_WT_ALLOC_PRE 0x20000 /* programmable range enable */ #define AMD_WT_ALLOC_FRE 0x10000 /* fixed (A0000-FFFFF) range enable */ /* AMD64 MSR's */ #define MSR_EFER 0xc0000080 /* extended features */ #define MSR_HWCR 0xc0010015 #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ #define MSR_MC0_CTL_MASK 0xc0010044 /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */ /* VIA ACE crypto featureset: for via_feature_xcrypt */ #define VIA_HAS_AES 1 /* cpu has AES */ #define VIA_HAS_SHA 2 /* cpu has SHA1 & SHA256 */ #define VIA_HAS_MM 4 /* cpu has RSA instructions */ #define VIA_HAS_AESCTR 8 /* cpu has AES-CTR instructions */ /* Centaur Extended Feature flags */ #define VIA_CPUID_HAS_RNG 0x000004 #define VIA_CPUID_DO_RNG 0x000008 #define VIA_CPUID_HAS_ACE 0x000040 #define VIA_CPUID_DO_ACE 0x000080 #define VIA_CPUID_HAS_ACE2 0x000100 #define VIA_CPUID_DO_ACE2 0x000200 #define VIA_CPUID_HAS_PHE 0x000400 #define VIA_CPUID_DO_PHE 0x000800 #define VIA_CPUID_HAS_PMM 0x001000 #define VIA_CPUID_DO_PMM 0x002000 /* VIA ACE xcrypt-* instruction context control options */ #define VIA_CRYPT_CWLO_ROUND_M 0x0000000f #define VIA_CRYPT_CWLO_ALG_M 0x00000070 #define VIA_CRYPT_CWLO_ALG_AES 0x00000000 #define VIA_CRYPT_CWLO_KEYGEN_M 0x00000080 #define VIA_CRYPT_CWLO_KEYGEN_HW 0x00000000 #define VIA_CRYPT_CWLO_KEYGEN_SW 0x00000080 #define VIA_CRYPT_CWLO_NORMAL 0x00000000 #define VIA_CRYPT_CWLO_INTERMEDIATE 0x00000100 #define VIA_CRYPT_CWLO_ENCRYPT 0x00000000 #define VIA_CRYPT_CWLO_DECRYPT 0x00000200 #define VIA_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */ #define VIA_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */ #define VIA_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */ #endif /* !_MACHINE_SPECIALREG_H_ */ Index: projects/binutils-2.17/sys/i386/include/xen/hypercall.h =================================================================== --- projects/binutils-2.17/sys/i386/include/xen/hypercall.h (revision 215829) +++ projects/binutils-2.17/sys/i386/include/xen/hypercall.h (revision 215830) @@ -1,405 +1,406 @@ /****************************************************************************** * hypercall.h * * Linux-specific hypervisor handling. * * Copyright (c) 2002-2004, K A Fraser * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ #include #include #include #define __STR(x) #x #define STR(x) __STR(x) #define ENOXENSYS 38 #define CONFIG_XEN_COMPAT 0x030002 #if defined(XEN) #define HYPERCALL_STR(name) \ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)" #else #define HYPERCALL_STR(name) \ "mov hypercall_stubs,%%eax; " \ "add $("STR(__HYPERVISOR_##name)" * 32),%%eax; " \ "call *%%eax" #endif #define _hypercall0(type, name) \ ({ \ long __res; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res) \ : \ : "memory" ); \ (type)__res; \ }) #define _hypercall1(type, name, a1) \ ({ \ long __res, __ign1; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=b" (__ign1) \ : "1" ((long)(a1)) \ : "memory" ); \ (type)__res; \ }) #define _hypercall2(type, name, a1, a2) \ ({ \ long __res, __ign1, __ign2; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ : "1" ((long)(a1)), "2" ((long)(a2)) \ : "memory" ); \ (type)__res; \ }) #define _hypercall3(type, name, a1, a2, a3) \ ({ \ long __res, __ign1, __ign2, __ign3; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ "=d" (__ign3) \ : "1" ((long)(a1)), "2" ((long)(a2)), \ "3" ((long)(a3)) \ : "memory" ); \ (type)__res; \ }) #define _hypercall4(type, name, a1, a2, a3, a4) \ ({ \ long __res, __ign1, __ign2, __ign3, __ign4; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ "=d" (__ign3), "=S" (__ign4) \ : "1" ((long)(a1)), "2" ((long)(a2)), \ "3" ((long)(a3)), "4" ((long)(a4)) \ : "memory" ); \ (type)__res; \ }) #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ ({ \ long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ __asm__ volatile ( \ HYPERCALL_STR(name) \ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ : "1" ((long)(a1)), "2" ((long)(a2)), \ "3" ((long)(a3)), "4" ((long)(a4)), \ "5" ((long)(a5)) \ : "memory" ); \ (type)__res; \ }) static inline int HYPERVISOR_set_trap_table( trap_info_t *table) { return _hypercall1(int, set_trap_table, table); } static inline int HYPERVISOR_mmu_update( mmu_update_t *req, int count, int *success_count, domid_t domid) { return _hypercall4(int, mmu_update, req, count, success_count, domid); } static inline int HYPERVISOR_mmuext_op( mmuext_op_t *op, int count, int *success_count, domid_t domid) { return _hypercall4(int, mmuext_op, op, count, success_count, domid); } static inline int HYPERVISOR_set_gdt( unsigned long *frame_list, int entries) { return _hypercall2(int, set_gdt, frame_list, entries); } static inline int HYPERVISOR_stack_switch( unsigned long ss, unsigned long esp) { return _hypercall2(int, stack_switch, ss, esp); } static inline int HYPERVISOR_set_callbacks( unsigned long event_selector, unsigned long event_address, unsigned long failsafe_selector, unsigned long failsafe_address) { return _hypercall4(int, set_callbacks, event_selector, event_address, failsafe_selector, failsafe_address); } static inline int HYPERVISOR_fpu_taskswitch( int set) { return _hypercall1(int, fpu_taskswitch, set); } static inline int HYPERVISOR_sched_op_compat( int cmd, unsigned long arg) { return _hypercall2(int, sched_op_compat, cmd, arg); } static inline int HYPERVISOR_sched_op( int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); } static inline long HYPERVISOR_set_timer_op( uint64_t timeout) { unsigned long timeout_hi = (unsigned long)(timeout>>32); unsigned long timeout_lo = (unsigned long)timeout; return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); } #if 0 static inline int HYPERVISOR_platform_op( struct xen_platform_op *platform_op) { platform_op->interface_version = XENPF_INTERFACE_VERSION; return _hypercall1(int, platform_op, platform_op); } #endif static inline int HYPERVISOR_set_debugreg( int reg, unsigned long value) { return _hypercall2(int, set_debugreg, reg, value); } static inline unsigned long HYPERVISOR_get_debugreg( int reg) { return _hypercall1(unsigned long, get_debugreg, reg); } static inline int HYPERVISOR_update_descriptor( uint64_t ma, uint64_t desc) { return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); } static inline int HYPERVISOR_memory_op( unsigned int cmd, void *arg) { return _hypercall2(int, memory_op, cmd, arg); } +int HYPERVISOR_multicall(multicall_entry_t *, int); static inline int -HYPERVISOR_multicall( +_HYPERVISOR_multicall( void *call_list, int nr_calls) { return _hypercall2(int, multicall, call_list, nr_calls); } static inline int HYPERVISOR_update_va_mapping( unsigned long va, uint64_t new_val, unsigned long flags) { uint32_t hi, lo; lo = (uint32_t)(new_val & 0xffffffff); hi = (uint32_t)(new_val >> 32); return _hypercall4(int, update_va_mapping, va, lo, hi, flags); } static inline int HYPERVISOR_event_channel_op( int cmd, void *arg) { int rc = _hypercall2(int, event_channel_op, cmd, arg); #if CONFIG_XEN_COMPAT <= 0x030002 if (__predict_false(rc == -ENOXENSYS)) { struct evtchn_op op; op.cmd = cmd; memcpy(&op.u, arg, sizeof(op.u)); rc = _hypercall1(int, event_channel_op_compat, &op); memcpy(arg, &op.u, sizeof(op.u)); } #endif return (rc); } static inline int HYPERVISOR_xen_version( int cmd, void *arg) { return _hypercall2(int, xen_version, cmd, arg); } static inline int HYPERVISOR_console_io( int cmd, int count, char *str) { return _hypercall3(int, console_io, cmd, count, str); } static inline int HYPERVISOR_physdev_op( int cmd, void *arg) { int rc = _hypercall2(int, physdev_op, cmd, arg); #if CONFIG_XEN_COMPAT <= 0x030002 if (__predict_false(rc == -ENOXENSYS)) { struct physdev_op op; op.cmd = cmd; memcpy(&op.u, arg, sizeof(op.u)); rc = _hypercall1(int, physdev_op_compat, &op); memcpy(arg, &op.u, sizeof(op.u)); } #endif return (rc); } static inline int HYPERVISOR_grant_table_op( unsigned int cmd, void *uop, unsigned int count) { return _hypercall3(int, grant_table_op, cmd, uop, count); } static inline int HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid) { uint32_t hi, lo; lo = (uint32_t)(new_val & 0xffffffff); hi = (uint32_t)(new_val >> 32); return _hypercall5(int, update_va_mapping_otherdomain, va, lo, hi, flags, domid); } static inline int HYPERVISOR_vm_assist( unsigned int cmd, unsigned int type) { return _hypercall2(int, vm_assist, cmd, type); } static inline int HYPERVISOR_vcpu_op( int cmd, int vcpuid, void *extra_args) { return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } static inline int HYPERVISOR_suspend( unsigned long srec) { struct sched_shutdown sched_shutdown = { .reason = SHUTDOWN_suspend }; int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, &sched_shutdown, srec); #if CONFIG_XEN_COMPAT <= 0x030002 if (rc == -ENOXENSYS) rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, SHUTDOWN_suspend, srec); #endif return (rc); } #if CONFIG_XEN_COMPAT <= 0x030002 static inline int HYPERVISOR_nmi_op( unsigned long op, void *arg) { return _hypercall2(int, nmi_op, op, arg); } #endif static inline int HYPERVISOR_callback_op( int cmd, void *arg) { return _hypercall2(int, callback_op, cmd, arg); } #ifndef CONFIG_XEN static inline unsigned long HYPERVISOR_hvm_op( int op, void *arg) { return _hypercall2(unsigned long, hvm_op, op, arg); } #endif static inline int HYPERVISOR_xenoprof_op( int op, void *arg) { return _hypercall2(int, xenoprof_op, op, arg); } static inline int HYPERVISOR_kexec_op( unsigned long op, void *args) { return _hypercall2(int, kexec_op, op, args); } #endif /* __HYPERCALL_H__ */ /* * Local variables: * c-file-style: "linux" * indent-tabs-mode: t * c-indent-level: 8 * c-basic-offset: 8 * tab-width: 8 * End: */ Index: projects/binutils-2.17/sys/i386/isa/npx.c =================================================================== --- projects/binutils-2.17/sys/i386/isa/npx.c (revision 215829) +++ projects/binutils-2.17/sys/i386/isa/npx.c (revision 215830) @@ -1,1104 +1,1104 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_isa.h" #include "opt_npx.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NPX_DEBUG #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #ifdef XEN #include #include #endif #ifdef DEV_ISA #include #endif #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fp_divide_by_0() __asm __volatile( \ "fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) #ifdef CPU_ENABLE_SSE #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #endif #ifdef XEN #define start_emulating() (HYPERVISOR_fpu_taskswitch(1)) #define stop_emulating() (HYPERVISOR_fpu_taskswitch(0)) #else #define start_emulating() __asm __volatile( \ "smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm __volatile("clts") #endif #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnsave(caddr_t addr); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fp_divide_by_0(void); void frstor(caddr_t addr); #ifdef CPU_ENABLE_SSE void fxsave(caddr_t addr); void fxrstor(caddr_t addr); #endif void start_emulating(void); void stop_emulating(void); #endif /* __GNUCLIKE_ASM && !lint */ #ifdef CPU_ENABLE_SSE #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ else \ (savefpu)->sv_87.sv_env.en_cw = (value); \ } while (0) #else /* CPU_ENABLE_SSE */ #define GET_FPU_CW(thread) \ (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) \ (savefpu)->sv_87.sv_env.en_cw = (value) #endif /* CPU_ENABLE_SSE */ typedef u_char bool_t; #ifdef CPU_ENABLE_SSE static void fpu_clean_state(void); #endif static void fpusave(union savefpu *); static void fpurstor(union savefpu *); static int npx_attach(device_t dev); static void npx_identify(driver_t *driver, device_t parent); static int npx_probe(device_t dev); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); static volatile u_int npx_traps_while_probing; static union savefpu npx_initialstate; alias_for_inthand_t probetrap; __asm(" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(probetrap)) ",@function \n\ " __XSTRING(CNAME(probetrap)) ": \n\ ss \n\ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ fnclex \n\ iret \n\ "); /* * Identify routine. Create a connection point on our parent for probing. */ static void npx_identify(driver, parent) driver_t *driver; device_t parent; { device_t child; child = BUS_ADD_CHILD(parent, 0, "npx", 0); if (child == NULL) panic("npx_identify"); } /* * Probe routine. Set flags to tell npxattach() what to do. Set up an * interrupt handler if npx needs to use interrupts. */ static int npx_probe(device_t dev) { struct gate_descriptor save_idt_npxtrap; u_short control, status; device_set_desc(dev, "math processor"); /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the * common case right away. */ if (cpu_feature & CPUID_FPU) { hw_float = 1; device_quiet(dev); return (0); } save_idt_npxtrap = idt[IDT_MF]; setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Don't trap while we're probing. */ stop_emulating(); /* * Finish resetting the coprocessor, if any. If there is an error * pending, then we may get a bogus IRQ13, but npx_intr() will handle * it OK. Bogus halts have never been observed, but we enabled * IRQ13 and cleared the BUSY# latch early to handle them anyway. */ fninit(); /* * Don't use fwait here because it might hang. * Don't use fnop here because it usually hangs if there is no FPU. */ DELAY(1000); /* wait for any IRQ13 */ #ifdef DIAGNOSTIC if (npx_traps_while_probing != 0) printf("fninit caused %u bogus npx trap(s)\n", npx_traps_while_probing); #endif /* * Check for a status of mostly zero. */ status = 0x5a5a; fnstsw(&status); if ((status & 0xb8ff) == 0) { /* * Good, now check for a proper control word. */ control = 0x5a5a; fnstcw(&control); if ((control & 0x1f3f) == 0x033f) { /* * We have an npx, now divide by 0 to see if exception * 16 works. */ control &= ~(1 << 2); /* enable divide by 0 trap */ fldcw(control); #ifdef FPU_ERROR_BROKEN /* * FPU error signal doesn't work on some CPU * accelerator board. */ hw_float = 1; return (0); #endif npx_traps_while_probing = 0; fp_divide_by_0(); if (npx_traps_while_probing != 0) { /* * Good, exception 16 works. */ hw_float = 1; goto cleanup; } device_printf(dev, "FPU does not use exception 16 for error reporting\n"); goto cleanup; } } /* * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ device_printf(dev, "WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; return (hw_float ? 0 : ENXIO); } /* * Attach routine - announce which it is, and wire into system */ static int npx_attach(device_t dev) { npxinit(); critical_enter(); stop_emulating(); fpusave(&npx_initialstate); start_emulating(); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask) cpu_mxcsr_mask = npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; bzero(npx_initialstate.sv_xmm.sv_fp, sizeof(npx_initialstate.sv_xmm.sv_fp)); bzero(npx_initialstate.sv_xmm.sv_xmm, sizeof(npx_initialstate.sv_xmm.sv_xmm)); /* XXX might need even more zeroing. */ } else #endif bzero(npx_initialstate.sv_87.sv_ac, sizeof(npx_initialstate.sv_87.sv_ac)); critical_exit(); return (0); } /* * Initialize floating point unit. */ void npxinit(void) { static union savefpu dummy; register_t saveintr; u_short control; if (!hw_float) return; /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. npxsave() initializes * the fpu and sets fpcurthread = NULL as important side effects. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); npxsave(&dummy); stop_emulating(); #ifdef CPU_ENABLE_SSE /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ if (cpu_fxsr) fninit(); #endif control = __INITIAL_NPXCW__; fldcw(control); start_emulating(); intr_restore(saveintr); } /* * Free coprocessor (if we have it). */ void npxexit(td) struct thread *td; { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxsave(PCPU_GET(curpcb)->pcb_save); critical_exit(); #ifdef NPX_DEBUG if (hw_float) { u_int masked_exceptions; masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). */ if (masked_exceptions & 0x0d) log(LOG_ERR, "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", td->td_proc->p_pid, td->td_proc->p_comm, masked_exceptions); } #endif } int npxformat() { if (!hw_float) return (_MC_FPFMT_NODEV); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) return (_MC_FPFMT_XMM); #endif return (_MC_FPFMT_387); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. * * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now * depend on longjmp() restoring a usable state. Restoring the state * or examining it might fail if we didn't clear exceptions. * * The error code chosen will be one of the FPE_... macros. It will be * sent as the second argument to old BSD-style signal handlers and as * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. * * XXX the FP state is not preserved across signal handlers. So signal * handlers cannot afford to do FP unless they preserve the state or * longjmp() out. Both preserving the state and longjmp()ing may be * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable * solution for signals other than SIGFPE. */ int npxtrap() { u_short control, status; if (!hw_float) { printf("npxtrap: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } if (PCPU_GET(fpcurthread) == curthread) fnclex(); critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ static int err_count = 0; int npxdna(void) { struct pcb *pcb; if (!hw_float) return (0); critical_enter(); if (PCPU_GET(fpcurthread) == curthread) { printf("npxdna: fpcurthread == curthread %d times\n", ++err_count); stop_emulating(); critical_exit(); return (1); } if (PCPU_GET(fpcurthread) != NULL) { printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, curthread, curthread->td_proc->p_pid); panic("npxdna"); } stop_emulating(); /* * Record new context early in case frstor causes an IRQ13. */ PCPU_SET(fpcurthread, curthread); pcb = PCPU_GET(curpcb); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpu_clean_state(); #endif if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. */ fpurstor(&npx_initialstate); if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(pcb->pcb_initial_npxcw); pcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { /* * The following fpurstor() may cause an IRQ13 when the * state being restored has a pending error. The error will * appear to have been triggered by the current (npx) user * instruction even when that instruction is a no-wait * instruction that should not trigger an error (e.g., * fnclex). On at least one 486 system all of the no-wait * instructions are broken the same as frstor, so our * treatment does not amplify the breakage. On at least * one 386/Cyrix 387 system, fnclex works correctly while * frstor and fnsave are broken, so our treatment breaks * fnclex if it is the first FPU instruction after a context * switch. */ fpurstor(pcb->pcb_save); } critical_exit(); return (1); } /* * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by * no-wait npx instructions. See the Intel application note AP-578 for * details. This doesn't cause any additional complications here. IRQ13's * are inherently asynchronous unless the CPU is frozen to deliver them -- * one that started in userland may be delivered many instructions later, * after the process has entered the kernel. It may even be delivered after * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in * the same way as a very-late-arriving non-spurious IRQ13 from user mode: * it is normally ignored at first because we set fpcurthread to NULL; it is * normally retriggered in npxdna() after return to user mode. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. * * A previous version of npxsave() went to great lengths to excecute fnsave * with interrupts enabled in case executing it froze the CPU. This case * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply * spurious freezes. */ void npxsave(addr) union savefpu *addr; { stop_emulating(); fpusave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } void npxdrop() { struct thread *td; /* * Discard pending exceptions in the !cpu_fxsr case so that unmasked * ones don't cause a panic on the next frstor. */ #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif fnclex(); td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; start_emulating(); } /* * Get the state of the FPU without dropping ownership (if possible). * It returns the FPU ownership status. */ int npxgetregs(struct thread *td, union savefpu *addr) { struct pcb *pcb; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); SET_FPU_CW(addr, pcb->pcb_initial_npxcw); return (_MC_FPOWNED_NONE); } critical_enter(); if (td == PCPU_GET(fpcurthread)) { fpusave(addr); #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); critical_exit(); return (_MC_FPOWNED_FPU); } else { critical_exit(); bcopy(pcb->pcb_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } int npxgetuserregs(struct thread *td, union savefpu *addr) { struct pcb *pcb; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) { bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); SET_FPU_CW(addr, pcb->pcb_initial_npxcw); return (_MC_FPOWNED_NONE); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(addr); #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); critical_exit(); return (_MC_FPOWNED_FPU); } else { critical_exit(); bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } /* * Set the state of the FPU. */ void npxsetregs(struct thread *td, union savefpu *addr) { struct pcb *pcb; if (!hw_float) return; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread)) { #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif fnclex(); /* As in npxdrop(). */ fpurstor(addr); critical_exit(); } else { critical_exit(); bcopy(addr, pcb->pcb_save, sizeof(*addr)); } if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXUSERINITDONE; pcb->pcb_flags |= PCB_NPXINITDONE; } void npxsetuserregs(struct thread *td, union savefpu *addr) { struct pcb *pcb; if (!hw_float) return; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { #ifdef CPU_ENABLE_SSE if (!cpu_fxsr) #endif fnclex(); /* As in npxdrop(). */ fpurstor(addr); critical_exit(); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } else { critical_exit(); bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXINITDONE; pcb->pcb_flags |= PCB_NPXUSERINITDONE; } } static void fpusave(addr) union savefpu *addr; { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fxsave(addr); else #endif fnsave(addr); } #ifdef CPU_ENABLE_SSE /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ - __asm __volatile("ffree %%st(7); fld %0" : : "m" (dummy_variable)); + __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } #endif /* CPU_ENABLE_SSE */ static void fpurstor(addr) union savefpu *addr; { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fxrstor(addr); else #endif frstor(addr); } static device_method_t npx_methods[] = { /* Device interface */ DEVMETHOD(device_identify, npx_identify), DEVMETHOD(device_probe, npx_probe), DEVMETHOD(device_attach, npx_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npx_driver = { "npx", npx_methods, 1, /* no softc */ }; static devclass_t npx_devclass; /* * We prefer to attach to the root nexus so that the usual case (exception 16) * doesn't describe the processor as being `on isa'. */ DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id npxisa_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int npxisa_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { device_quiet(dev); } return(result); } static int npxisa_attach(device_t dev) { return (0); } static device_method_t npxisa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, npxisa_probe), DEVMETHOD(device_attach, npxisa_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npxisa_driver = { "npxisa", npxisa_methods, 1, /* no softc */ }; static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); #ifndef PC98 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); ctx->flags = 0; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = &ctx->hwstate; pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; return (0); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); pcb->pcb_save = ctx->prev; if (pcb->pcb_save == &pcb->pcb_user_save) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; pcb->pcb_flags &= ~PCB_KERNNPX; } else { if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { struct pcb *pcb; pcb = PCPU_GET(curpcb); KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); KASSERT(PCB_USER_FPU(pcb), ("recursive call")); pcb->pcb_flags |= PCB_KERNNPX; return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNNPX) != 0); } Index: projects/binutils-2.17/sys/i386/pci/pci_bus.c =================================================================== --- projects/binutils-2.17/sys/i386/pci/pci_bus.c (revision 215829) +++ projects/binutils-2.17/sys/i386/pci/pci_bus.c (revision 215830) @@ -1,683 +1,688 @@ /*- * Copyright (c) 1997, Stefan Esser * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef CPU_ELAN #include #endif #include #include #include #include "pcib_if.h" static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin); int legacy_pcib_maxslots(device_t dev) { return 31; } /* read configuration space register */ u_int32_t legacy_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { return(pci_cfgregread(bus, slot, func, reg, bytes)); } /* write configuration space register */ void legacy_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, u_int32_t data, int bytes) { pci_cfgregwrite(bus, slot, func, reg, data, bytes); } /* Pass MSI requests up to the nexus. */ static int legacy_pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { device_t bus; bus = device_get_parent(pcib); return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, irqs)); } static int legacy_pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { device_t bus; bus = device_get_parent(pcib); return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq)); } static int legacy_pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus; bus = device_get_parent(pcib); return (PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data)); } static const char * legacy_pcib_is_host_bridge(int bus, int slot, int func, uint32_t id, uint8_t class, uint8_t subclass, uint8_t *busnum) { const char *s = NULL; static uint8_t pxb[4]; /* hack for 450nx */ *busnum = 0; switch (id) { case 0x12258086: s = "Intel 824?? host to PCI bridge"; /* XXX This is a guess */ /* *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x41, 1); */ *busnum = bus; break; case 0x71208086: s = "Intel 82810 (i810 GMCH) Host To Hub bridge"; break; case 0x71228086: s = "Intel 82810-DC100 (i810-DC100 GMCH) Host To Hub bridge"; break; case 0x71248086: s = "Intel 82810E (i810E GMCH) Host To Hub bridge"; break; case 0x11308086: s = "Intel 82815 (i815 GMCH) Host To Hub bridge"; break; case 0x71808086: s = "Intel 82443LX (440 LX) host to PCI bridge"; break; case 0x71908086: s = "Intel 82443BX (440 BX) host to PCI bridge"; break; case 0x71928086: s = "Intel 82443BX host to PCI bridge (AGP disabled)"; break; case 0x71948086: s = "Intel 82443MX host to PCI bridge"; break; case 0x71a08086: s = "Intel 82443GX host to PCI bridge"; break; case 0x71a18086: s = "Intel 82443GX host to AGP bridge"; break; case 0x71a28086: s = "Intel 82443GX host to PCI bridge (AGP disabled)"; break; case 0x84c48086: s = "Intel 82454KX/GX (Orion) host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x4a, 1); break; case 0x84ca8086: /* * For the 450nx chipset, there is a whole bundle of * things pretending to be host bridges. The MIOC will * be seen first and isn't really a pci bridge (the * actual busses are attached to the PXB's). We need to * read the registers of the MIOC to figure out the * bus numbers for the PXB channels. * * Since the MIOC doesn't have a pci bus attached, we * pretend it wasn't there. */ pxb[0] = legacy_pcib_read_config(0, bus, slot, func, 0xd0, 1); /* BUSNO[0] */ pxb[1] = legacy_pcib_read_config(0, bus, slot, func, 0xd1, 1) + 1; /* SUBA[0]+1 */ pxb[2] = legacy_pcib_read_config(0, bus, slot, func, 0xd3, 1); /* BUSNO[1] */ pxb[3] = legacy_pcib_read_config(0, bus, slot, func, 0xd4, 1) + 1; /* SUBA[1]+1 */ return NULL; case 0x84cb8086: switch (slot) { case 0x12: s = "Intel 82454NX PXB#0, Bus#A"; *busnum = pxb[0]; break; case 0x13: s = "Intel 82454NX PXB#0, Bus#B"; *busnum = pxb[1]; break; case 0x14: s = "Intel 82454NX PXB#1, Bus#A"; *busnum = pxb[2]; break; case 0x15: s = "Intel 82454NX PXB#1, Bus#B"; *busnum = pxb[3]; break; } break; + case 0x1A308086: + s = "Intel 82845 Host to PCI bridge"; + break; /* AMD -- vendor 0x1022 */ case 0x30001022: s = "AMD Elan SC520 host to PCI bridge"; #ifdef CPU_ELAN init_AMD_Elan_sc520(); #else printf( "*** WARNING: missing CPU_ELAN -- timekeeping may be wrong\n"); #endif break; case 0x70061022: s = "AMD-751 host to PCI bridge"; break; case 0x700e1022: s = "AMD-761 host to PCI bridge"; break; /* SiS -- vendor 0x1039 */ case 0x04961039: s = "SiS 85c496"; break; case 0x04061039: s = "SiS 85c501"; break; case 0x06011039: s = "SiS 85c601"; break; case 0x55911039: s = "SiS 5591 host to PCI bridge"; break; case 0x00011039: s = "SiS 5591 host to AGP bridge"; break; /* VLSI -- vendor 0x1004 */ case 0x00051004: s = "VLSI 82C592 Host to PCI bridge"; break; /* XXX Here is MVP3, I got the datasheet but NO M/B to test it */ /* totally. Please let me know if anything wrong. -F */ /* XXX need info on the MVP3 -- any takers? */ case 0x05981106: s = "VIA 82C598MVP (Apollo MVP3) host bridge"; break; /* AcerLabs -- vendor 0x10b9 */ /* Funny : The datasheet told me vendor id is "10b8",sub-vendor */ /* id is '10b9" but the register always shows "10b9". -Foxfair */ case 0x154110b9: s = "AcerLabs M1541 (Aladdin-V) PCI host bridge"; break; /* OPTi -- vendor 0x1045 */ case 0xc7011045: s = "OPTi 82C700 host to PCI bridge"; break; case 0xc8221045: s = "OPTi 82C822 host to PCI Bridge"; break; /* ServerWorks -- vendor 0x1166 */ case 0x00051166: s = "ServerWorks NB6536 2.0HE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00061166: /* FALLTHROUGH */ case 0x00081166: /* FALLTHROUGH */ case 0x02011166: /* FALLTHROUGH */ case 0x010f1014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00091166: s = "ServerWorks NB6635 3.0LE host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00101166: s = "ServerWorks CIOB30 host to PCI bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; case 0x00111166: /* FALLTHROUGH */ case 0x03021014: /* IBM re-badged ServerWorks chipset */ s = "ServerWorks CMIC-HE host to PCI-X bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* XXX unknown chipset, but working */ case 0x00171166: /* FALLTHROUGH */ case 0x01011166: + case 0x01101166: + case 0x02251166: s = "ServerWorks host to PCI bridge(unknown chipset)"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0x44, 1); break; /* Compaq/HP -- vendor 0x0e11 */ case 0x60100e11: s = "Compaq/HP Model 6010 HotPlug PCI Bridge"; *busnum = legacy_pcib_read_config(0, bus, slot, func, 0xc8, 1); break; /* Integrated Micro Solutions -- vendor 0x10e0 */ case 0x884910e0: s = "Integrated Micro Solutions VL Bridge"; break; default: if (class == PCIC_BRIDGE && subclass == PCIS_BRIDGE_HOST) s = "Host to PCI bridge"; break; } return s; } /* * Scan the first pci bus for host-pci bridges and add pcib instances * to the nexus for each bridge. */ static void legacy_pcib_identify(driver_t *driver, device_t parent) { int bus, slot, func; u_int8_t hdrtype; int found = 0; int pcifunchigh; int found824xx = 0; int found_orion = 0; device_t child; devclass_t pci_devclass; if (pci_cfgregopen() == 0) return; /* * Check to see if we haven't already had a PCI bus added * via some other means. If we have, bail since otherwise * we're going to end up duplicating it. */ if ((pci_devclass = devclass_find("pci")) && devclass_get_device(pci_devclass, 0)) return; bus = 0; retry: for (slot = 0; slot <= PCI_SLOTMAX; slot++) { func = 0; hdrtype = legacy_pcib_read_config(0, bus, slot, func, PCIR_HDRTYPE, 1); /* * When enumerating bus devices, the standard says that * one should check the header type and ignore the slots whose * header types that the software doesn't know about. We use * this to filter out devices. */ if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if ((hdrtype & PCIM_MFDEV) && (!found_orion || hdrtype != 0xff)) pcifunchigh = PCI_FUNCMAX; else pcifunchigh = 0; for (func = 0; func <= pcifunchigh; func++) { /* * Read the IDs and class from the device. */ u_int32_t id; u_int8_t class, subclass, busnum; const char *s; device_t *devs; int ndevs, i; id = legacy_pcib_read_config(0, bus, slot, func, PCIR_DEVVENDOR, 4); if (id == -1) continue; class = legacy_pcib_read_config(0, bus, slot, func, PCIR_CLASS, 1); subclass = legacy_pcib_read_config(0, bus, slot, func, PCIR_SUBCLASS, 1); s = legacy_pcib_is_host_bridge(bus, slot, func, id, class, subclass, &busnum); if (s == NULL) continue; /* * Check to see if the physical bus has already * been seen. Eg: hybrid 32 and 64 bit host * bridges to the same logical bus. */ if (device_get_children(parent, &devs, &ndevs) == 0) { for (i = 0; s != NULL && i < ndevs; i++) { if (strcmp(device_get_name(devs[i]), "pcib") != 0) continue; if (legacy_get_pcibus(devs[i]) == busnum) s = NULL; } free(devs, M_TEMP); } if (s == NULL) continue; /* * Add at priority 100 to make sure we * go after any motherboard resources */ child = BUS_ADD_CHILD(parent, 100, "pcib", busnum); device_set_desc(child, s); legacy_set_pcibus(child, busnum); found = 1; if (id == 0x12258086) found824xx = 1; if (id == 0x84c48086) found_orion = 1; } } if (found824xx && bus == 0) { bus++; goto retry; } /* * Make sure we add at least one bridge since some old * hardware doesn't actually have a host-pci bridge device. * Note that pci_cfgregopen() thinks we have PCI devices.. */ if (!found) { if (bootverbose) printf( "legacy_pcib_identify: no bridge found, adding pcib0 anyway\n"); child = BUS_ADD_CHILD(parent, 100, "pcib", 0); legacy_set_pcibus(child, 0); } } static int legacy_pcib_probe(device_t dev) { if (pci_cfgregopen() == 0) return ENXIO; return -100; } static int legacy_pcib_attach(device_t dev) { device_t pir; int bus; /* * Look for a PCI BIOS interrupt routing table as that will be * our method of routing interrupts if we have one. */ bus = pcib_get_bus(dev); if (pci_pir_probe(bus, 0)) { pir = BUS_ADD_CHILD(device_get_parent(dev), 0, "pir", 0); if (pir != NULL) device_probe_and_attach(pir); } device_add_child(dev, "pci", bus); return bus_generic_attach(dev); } int legacy_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { switch (which) { case PCIB_IVAR_DOMAIN: *result = 0; return 0; case PCIB_IVAR_BUS: *result = legacy_get_pcibus(dev); return 0; } return ENOENT; } int legacy_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { switch (which) { case PCIB_IVAR_DOMAIN: return EINVAL; case PCIB_IVAR_BUS: legacy_set_pcibus(dev, value); return 0; } return ENOENT; } SYSCTL_DECL(_hw_pci); static unsigned long legacy_host_mem_start = 0x80000000; TUNABLE_ULONG("hw.pci.host_mem_start", &legacy_host_mem_start); SYSCTL_ULONG(_hw_pci, OID_AUTO, host_mem_start, CTLFLAG_RDTUN, &legacy_host_mem_start, 0x80000000, "Limit the host bridge memory to being above this address. Must be\n\ set at boot via a tunable."); struct resource * legacy_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { /* * If no memory preference is given, use upper 32MB slot most * bioses use for their memory window. Typically other bridges * before us get in the way to assert their preferences on memory. * Hardcoding like this sucks, so a more MD/MI way needs to be * found to do it. This is typically only used on older laptops * that don't have pci busses behind pci bridge, so assuming > 32MB * is liekly OK. * * However, this can cause problems for other chipsets, so we make * this tunable by hw.pci.host_mem_start. */ if (type == SYS_RES_MEMORY && start == 0UL && end == ~0UL) start = legacy_host_mem_start; if (type == SYS_RES_IOPORT && start == 0UL && end == ~0UL) start = 0x1000; return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } static device_method_t legacy_pcib_methods[] = { /* Device interface */ DEVMETHOD(device_identify, legacy_pcib_identify), DEVMETHOD(device_probe, legacy_pcib_probe), DEVMETHOD(device_attach, legacy_pcib_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, legacy_pcib_read_ivar), DEVMETHOD(bus_write_ivar, legacy_pcib_write_ivar), DEVMETHOD(bus_alloc_resource, legacy_pcib_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, legacy_pcib_maxslots), DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, legacy_pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, legacy_pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, legacy_pcib_map_msi), { 0, 0 } }; static devclass_t hostb_devclass; DEFINE_CLASS_0(pcib, legacy_pcib_driver, legacy_pcib_methods, 1); DRIVER_MODULE(pcib, legacy, legacy_pcib_driver, hostb_devclass, 0, 0); /* * Install placeholder to claim the resources owned by the * PCI bus interface. This could be used to extract the * config space registers in the extreme case where the PnP * ID is available and the PCI BIOS isn't, but for now we just * eat the PnP ID and do nothing else. * * XXX we should silence this probe, as it will generally confuse * people. */ static struct isa_pnp_id pcibus_pnp_ids[] = { { 0x030ad041 /* PNP0A03 */, "PCI Bus" }, { 0x080ad041 /* PNP0A08 */, "PCIe Bus" }, { 0 } }; static int pcibus_pnp_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, pcibus_pnp_ids)) <= 0) device_quiet(dev); return(result); } static int pcibus_pnp_attach(device_t dev) { return(0); } static device_method_t pcibus_pnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibus_pnp_probe), DEVMETHOD(device_attach, pcibus_pnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static devclass_t pcibus_pnp_devclass; DEFINE_CLASS_0(pcibus_pnp, pcibus_pnp_driver, pcibus_pnp_methods, 1); DRIVER_MODULE(pcibus_pnp, isa, pcibus_pnp_driver, pcibus_pnp_devclass, 0, 0); /* * Provide a PCI-PCI bridge driver for PCI busses behind PCI-PCI bridges * that appear in the PCIBIOS Interrupt Routing Table to use the routing * table for interrupt routing when possible. */ static int pcibios_pcib_probe(device_t bus); static device_method_t pcibios_pcib_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcibios_pcib_probe), /* pcib interface */ DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), {0, 0} }; static devclass_t pcib_devclass; DEFINE_CLASS_1(pcib, pcibios_pcib_driver, pcibios_pcib_pci_methods, sizeof(struct pcib_softc), pcib_driver); DRIVER_MODULE(pcibios_pcib, pci, pcibios_pcib_driver, pcib_devclass, 0, 0); static int pcibios_pcib_probe(device_t dev) { int bus; if ((pci_get_class(dev) != PCIC_BRIDGE) || (pci_get_subclass(dev) != PCIS_BRIDGE_PCI)) return (ENXIO); bus = pci_read_config(dev, PCIR_SECBUS_1, 1); if (bus == 0) return (ENXIO); if (!pci_pir_probe(bus, 1)) return (ENXIO); device_set_desc(dev, "PCIBIOS PCI-PCI bridge"); return (-2000); } static int pcibios_pcib_route_interrupt(device_t pcib, device_t dev, int pin) { return (pci_pir_route_interrupt(pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev), pin)); } Index: projects/binutils-2.17/sys/i386/xen/pmap.c =================================================================== --- projects/binutils-2.17/sys/i386/xen/pmap.c (revision 215829) +++ projects/binutils-2.17/sys/i386/xen/pmap.c (revision 215830) @@ -1,4317 +1,4329 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * In addition to hardware address maps, this * module is called upon to provide software-use-only * maps which may or may not be stored in the same * form as hardware maps. These pseudo-maps are * used to store intermediate results from copy * operations to and from address spaces. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_cpu.h" #include "opt_pmap.h" #include "opt_msgbuf.h" #include "opt_smp.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifdef XBOX #include #endif #include #include #include #include #include #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #define DIAGNOSTIC #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #define PV_STATS #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ int pgeflag = 0; /* PG_G or-in */ int pseflag = 0; /* PG_PS or-in */ int nkpt; vm_offset_t kernel_vm_end; extern u_int32_t KERNend; #ifdef PAE pt_entry_t pg_nx; #endif static int pat_works; /* Is page attribute table sane? */ /* * Data for the pv entry allocation mechanism */ static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* freelist stored in the PTE */ /* * All those kernel PT submaps that BSD is so fond of */ struct sysmaps { struct mtx lock; pt_entry_t *CMAP1; pt_entry_t *CMAP2; caddr_t CADDR1; caddr_t CADDR2; }; static struct sysmaps sysmaps_pcpu[MAXCPU]; static pt_entry_t *CMAP3; caddr_t ptvmmap = 0; static caddr_t CADDR3; struct msgbuf *msgbufp = 0; /* * Crashdump maps. */ static caddr_t crashdumpmap; static pt_entry_t *PMAP1 = 0, *PMAP2; static pt_entry_t *PADDR1 = 0, *PADDR2; #ifdef SMP static int PMAP1cpu; static int PMAP1changedcpu; SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, &PMAP1changedcpu, 0, "Number of times pmap_pte_quick changed CPU with same PMAP1"); #endif static int PMAP1changed; SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, &PMAP1changed, 0, "Number of times pmap_pte_quick changed PMAP1"); static int PMAP1unchanged; SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, &PMAP1unchanged, 0, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pg_ps_enabled; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, "Are large page mappings enabled?"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2/4MB page mapping counters"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2/4MB page mappings"); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, vm_page_t *free); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, vm_page_t *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static __inline void pagezero(void *page); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * If you get an error here, then you set KVA_PAGES wrong! See the * description of KVA_PAGES in sys/i386/include/pmap.h. It must be * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. */ CTASSERT(KERNBASE % (1 << 24) == 0); void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) { vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]); switch (type) { case SH_PD_SET_VA: #if 0 xen_queue_pt_update(shadow_pdir_ma, xpmap_ptom(val & ~(PG_RW))); #endif xen_queue_pt_update(pdir_ma, xpmap_ptom(val)); break; case SH_PD_SET_VA_MA: #if 0 xen_queue_pt_update(shadow_pdir_ma, val & ~(PG_RW)); #endif xen_queue_pt_update(pdir_ma, val); break; case SH_PD_SET_VA_CLEAR: #if 0 xen_queue_pt_update(shadow_pdir_ma, 0); #endif xen_queue_pt_update(pdir_ma, 0); break; } } /* * Move the kernel virtual free pointer to the next * 4MB. This is used to help improve performance * by using a large (4MB) page for much of the kernel * (.text, .data, .bss) */ static vm_offset_t pmap_kmem_choose(vm_offset_t addr) { vm_offset_t newaddr = addr; #ifndef DISABLE_PSE if (cpu_feature & CPUID_PSE) newaddr = (addr + PDRMASK) & ~PDRMASK; #endif return newaddr; } /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; struct sysmaps *sysmaps; int i; /* * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too * large. It should instead be correctly calculated in locore.s and * not based on 'first' (which is a physical address, not a virtual * address, for the start of unused physical memory). The kernel * page tables are NOT double mapped and thus should not be included * in this calculation. */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; virtual_avail = pmap_kmem_choose(virtual_avail); virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif kernel_pmap->pm_active = -1; /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); if (nkpt == 0) nkpt = NKPT; /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the idle process page zeroing. */ for (i = 0; i < MAXCPU; i++) { sysmaps = &sysmaps_pcpu[i]; mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) PT_SET_MA(sysmaps->CADDR1, 0); PT_SET_MA(sysmaps->CADDR2, 0); } SYSMAP(caddr_t, CMAP3, CADDR3, 1) PT_SET_MA(CADDR3, 0); /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. */ SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) /* * ptemap is used for pmap_pte_quick */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); virtual_avail = va; /* * Leave in place an identity mapping (virt == phys) for the low 1 MB * physical memory region that is used by the ACPI wakeup code. This * mapping must not have PG_G set. */ #ifndef XEN /* * leave here deliberately to show that this is not supported */ #ifdef XBOX /* FIXME: This is gross, but needed for the XBOX. Since we are in such * an early stadium, we cannot yet neatly map video memory ... :-( * Better fixes are very welcome! */ if (!arch_i386_is_xbox) #endif for (i = 1; i < NKPT; i++) PTD[i] = 0; /* Initialize the PAT MSR if present. */ pmap_init_pat(); /* Turn on PG_G on kernel page(s) */ pmap_set_pg(); #endif } /* * Setup the PAT MSR. */ void pmap_init_pat(void) { uint64_t pat_msr; /* Bail if this CPU doesn't implement PAT. */ if (!(cpu_feature & CPUID_PAT)) return; if (cpu_vendor_id != CPU_VENDOR_INTEL || (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) { /* * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. * Program 4 and 5 as WP and WC. * Leave 6 and 7 as UC and UC-. */ pat_msr = rdmsr(MSR_PAT); pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | PAT_VALUE(5, PAT_WRITE_COMBINING); pat_works = 1; } else { /* * Due to some Intel errata, we can only safely use the lower 4 * PAT entries. Thus, just replace PAT Index 2 with WC instead * of UC-. * * Intel Pentium III Processor Specification Update * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B * or Mode C Paging) * * Intel Pentium IV Processor Specification Update * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) */ pat_msr = rdmsr(MSR_PAT); pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_works = 0; } wrmsr(MSR_PAT, pat_msr); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } /* * ABuse the pte nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PG_* bits * are ever set, PG_V in particular. * - Assumes we can write to ptes without pte_store() atomic ops, even * on PAE systems. This should be ok. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PG_V. * - Assumes a vm_offset_t will fit in a pte (true for i386). * Because PG_V is never set, there can be no mappings to invalidate. */ static int ptelist_count = 0; static vm_offset_t pmap_ptelist_alloc(vm_offset_t *head) { vm_offset_t va; vm_offset_t *phead = (vm_offset_t *)*head; if (ptelist_count == 0) { printf("out of memory!!!!!!\n"); return (0); /* Out of memory */ } ptelist_count--; va = phead[ptelist_count]; return (va); } static void pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) { vm_offset_t *phead = (vm_offset_t *)*head; phead[ptelist_count++] = va; } static void pmap_ptelist_init(vm_offset_t *head, void *base, int npages) { int i, nstackpages; vm_offset_t va; vm_page_t m; nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t)); for (i = 0; i < nstackpages; i++) { va = (vm_offset_t)base + i * PAGE_SIZE; m = vm_page_alloc(NULL, i, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); pmap_qenter(va, &m, 1); } *head = (vm_offset_t)base; for (i = npages - 1; i >= nstackpages; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_ptelist_free(head, va); } } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_page_t mpte; vm_size_t s; int i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ for (i = 0; i < nkpt; i++) { mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = i + KPTDI; mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME); } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); /* * Calculate the size of the pv head table for superpages. */ for (i = 0; phys_avail[i + 1]; i += 2); pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_alloc(kernel_map, s); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); } /*************************************************** * Low level helper routines..... ***************************************************/ /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ int pmap_cache_bits(int mode, boolean_t is_pde) { int pat_flag, pat_index, cache_bits; /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; /* If we don't support PAT, map extended modes to older ones. */ if (!(cpu_feature & CPUID_PAT)) { switch (mode) { case PAT_UNCACHEABLE: case PAT_WRITE_THROUGH: case PAT_WRITE_BACK: break; case PAT_UNCACHED: case PAT_WRITE_COMBINING: case PAT_WRITE_PROTECTED: mode = PAT_UNCACHEABLE; break; } } /* Map the caching mode to a PAT index. */ if (pat_works) { switch (mode) { case PAT_UNCACHEABLE: pat_index = 3; break; case PAT_WRITE_THROUGH: pat_index = 1; break; case PAT_WRITE_BACK: pat_index = 0; break; case PAT_UNCACHED: pat_index = 2; break; case PAT_WRITE_COMBINING: pat_index = 5; break; case PAT_WRITE_PROTECTED: pat_index = 4; break; default: panic("Unknown caching mode %d\n", mode); } } else { switch (mode) { case PAT_UNCACHED: case PAT_UNCACHEABLE: case PAT_WRITE_PROTECTED: pat_index = 3; break; case PAT_WRITE_THROUGH: pat_index = 1; break; case PAT_WRITE_BACK: pat_index = 0; break; case PAT_WRITE_COMBINING: pat_index = 2; break; default: panic("Unknown caching mode %d\n", mode); } } /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_index & 0x4) cache_bits |= pat_flag; if (pat_index & 0x2) cache_bits |= PG_NC_PCD; if (pat_index & 0x1) cache_bits |= PG_NC_PWT; return (cache_bits); } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpumask_t cpumask, other_cpus; CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invlpg(va); if (pmap->pm_active & other_cpus) smp_masked_invlpg(pmap->pm_active & other_cpus, va); } sched_unpin(); PT_UPDATES_FLUSH(); } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpumask_t cpumask, other_cpus; vm_offset_t addr; CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", pmap, sva, eva); sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); if (pmap->pm_active & other_cpus) smp_masked_invlpg_range(pmap->pm_active & other_cpus, sva, eva); } sched_unpin(); PT_UPDATES_FLUSH(); } void pmap_invalidate_all(pmap_t pmap) { cpumask_t cpumask, other_cpus; CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); if (pmap->pm_active & cpumask) invltlb(); if (pmap->pm_active & other_cpus) smp_masked_invltlb(pmap->pm_active & other_cpus); } sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); if (pmap == kernel_pmap || pmap->pm_active) invlpg(va); PT_UPDATES_FLUSH(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (eva - sva > PAGE_SIZE) CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); if (pmap == kernel_pmap || pmap->pm_active) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); PT_UPDATES_FLUSH(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); if (pmap == kernel_pmap || pmap->pm_active) invltlb(); } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } #endif /* !SMP */ void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); if (cpu_feature & CPUID_SS) ; /* If "Self Snoop" is supported, do nothing. */ else if (cpu_feature & CPUID_CLFSH) { /* * Otherwise, do per-cache line flush. Use the mfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); mfence(); } else { /* * No targeted cache flush methods are supported by CPU, * globally invalidate cache as a last resort. */ pmap_invalidate_cache(); } } /* * Are we current address space or kernel? N.B. We return FALSE when * a pmap's page table is in use because a kernel thread is borrowing * it. The borrowed page table can change spontaneously, making any * dependence on its continued use subject to a race condition. */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); } /* * If the given pmap is not the current or kernel pmap, the returned pte must * be released by passing it to pmap_pte_release(). */ pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_lock(&PMAP2mutex); newpf = *pde & PG_FRAME; if ((*PMAP2 & PG_FRAME) != newpf) { vm_page_lock_queues(); PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M); vm_page_unlock_queues(); CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", pmap, va, (*PMAP2 & 0xffffffff)); } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } /* * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte * being NULL. */ static __inline void pmap_pte_release(pt_entry_t *pte) { if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) { CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx", *PMAP2); PT_SET_VA(PMAP2, 0, TRUE); mtx_unlock(&PMAP2mutex); } } static __inline void invlcaddr(void *caddr) { invlpg((u_int)caddr); PT_UPDATES_FLUSH(); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. * * If the given pmap is not the current pmap, vm_page_queue_mtx * must be held and curthread pinned to a CPU. */ static pt_entry_t * pmap_pte_quick(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); newpf = *pde & PG_FRAME; if ((*PMAP1 & PG_FRAME) != newpf) { PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M); CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x", pmap, va, (u_long)*PMAP1); #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; pt_entry_t pteval; rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) { rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK); PMAP_UNLOCK(pmap); return rtval; } pte = pmap_pte(pmap, va); pteval = *pte ? xpmap_mtop(*pte) : 0; rtval = (pteval & PG_FRAME) | (va & PAGE_MASK); pmap_pte_release(pte); } PMAP_UNLOCK(pmap); return (rtval); } /* * Routine: pmap_extract_ma * Function: * Like pmap_extract, but returns machine address */ vm_paddr_t pmap_extract_ma(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) { rtval = (pde & ~PDRMASK) | (va & PDRMASK); PMAP_UNLOCK(pmap); return rtval; } pte = pmap_pte(pmap, va); rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); pmap_pte_release(pte); } PMAP_UNLOCK(pmap); return (rtval); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; pt_entry_t pte; vm_page_t m; vm_paddr_t pa; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pde = PT_GET(pmap_pde(pmap, va)); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { sched_pin(); pte = PT_GET(pmap_pte_quick(pmap, va)); if (*PMAP1) PT_SET_MA(PADDR1, 0); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. */ void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); } void pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) { pt_entry_t *pte; pte = vtopte(va); pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); PT_CLEAR_VA(pte, FALSE); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; va = sva = *virt; CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x", va, start, end, prot); while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, *pte; vm_paddr_t pa; vm_offset_t va = sva; int mclcount = 0; multicall_entry_t mcl[16]; multicall_entry_t *mclp = mcl; int error; CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count); pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A; mclp->op = __HYPERVISOR_update_va_mapping; mclp->args[0] = va; mclp->args[1] = (uint32_t)(pa & 0xffffffff); mclp->args[2] = (uint32_t)(pa >> 32); +#if 0 mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0; +#else + /* + * Somehow we seem to be ending up with pages which are in + * the TLB in spite of not having PG_V set, resulting in + * pages newly loaded into the bufcache not showing up + * immediately (i.e., accessing them provides the old data). + * As a workaround, always perform a TLB flush, even if the + * old page didn't have PG_V. + */ + mclp->args[3] = UVMF_INVLPG|UVMF_ALL; +#endif va += PAGE_SIZE; pte++; ma++; mclp++; mclcount++; if (mclcount == 16) { error = HYPERVISOR_multicall(mcl, mclcount); mclp = mcl; mclcount = 0; KASSERT(error == 0, ("bad multicall %d", error)); } } if (mclcount) { error = HYPERVISOR_multicall(mcl, mclcount); KASSERT(error == 0, ("bad multicall %d", error)); } #ifdef INVARIANTS for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++) KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE)); #endif } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count); va = sva; vm_page_lock_queues(); critical_enter(); while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); critical_exit(); vm_page_unlock_queues(); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(vm_page_t free) { vm_page_t m; while (free != NULL) { m = free; free = m->right; vm_page_free_zero(m); } } /* * This routine unholds page table pages, and if the hold count * drops to zero, then it decrements the wire count. */ static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) { --m->wire_count; if (m->wire_count == 0) return _pmap_unwire_pte_hold(pmap, m, free); else return 0; } static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) { vm_offset_t pteva; PT_UPDATES_FLUSH(); /* * unmap the page table page */ xen_pt_unpin(pmap->pm_pdir[m->pindex]); /* * page *might* contain residual mapping :-/ */ PD_CLEAR_VA(pmap, m->pindex, TRUE); pmap_zero_page(m); --pmap->pm_stats.resident_count; /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&cnt.v_wire_count, 1); /* * Do an invltlb to make the invalidated mapping * take effect immediately. */ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ m->right = *free; *free = m; return 1; } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) { pd_entry_t ptepde; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return 0; ptepde = PT_GET(pmap_pde(pmap, va)); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return pmap_unwire_pte_hold(pmap, mpte, free); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_active = 0; PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD + 1]; int npgptd = NPGPTD + 1; static int color; int i; PMAP_LOCK_INIT(pmap); /* * No need to allocate page table space yet but we do need a valid * page directory table. */ if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, NBPTD); if (pmap->pm_pdir == NULL) { PMAP_LOCK_DESTROY(pmap); return (0); } #ifdef PAE pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1); #endif } /* * allocate the page directory page(s) */ for (i = 0; i < npgptd;) { m = vm_page_alloc(NULL, color++, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) VM_WAIT; else { ptdpg[i++] = m; } } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) { if ((ptdpg[i]->flags & PG_ZERO) == 0) pagezero(&pmap->pm_pdir[i*NPTEPG]); } mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* Wire in kernel global address entries. */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); #ifdef PAE pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) bzero(pmap->pm_pdpt, PAGE_SIZE); for (i = 0; i < NPGPTD; i++) { vm_paddr_t ma; ma = VM_PAGE_TO_MACH(ptdpg[i]); pmap->pm_pdpt[i] = ma | PG_V; } #endif for (i = 0; i < NPGPTD; i++) { pt_entry_t *pd; vm_paddr_t ma; ma = VM_PAGE_TO_MACH(ptdpg[i]); pd = pmap->pm_pdir + (i * NPDEPG); PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW)); #if 0 xen_pgd_pin(ma); #endif } #ifdef PAE PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW); #endif vm_page_lock_queues(); xen_flush_queue(); xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD])); for (i = 0; i < NPGPTD; i++) { vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]); PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE); } xen_flush_queue(); vm_page_unlock_queues(); pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) { vm_paddr_t ptema; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); vm_page_unlock_queues(); VM_WAIT; vm_page_lock_queues(); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptema = VM_PAGE_TO_MACH(m); xen_pt_pin(ptema); PT_SET_VA_MA(&pmap->pm_pdir[ptepindex], (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); KASSERT(pmap->pm_pdir[ptepindex], ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex)); return (m); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) { unsigned ptepindex; pd_entry_t ptema; vm_page_t m; KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; retry: /* * Get the page directory entry */ ptema = pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptema & PG_PS) { /* * XXX */ pmap->pm_pdir[ptepindex] = 0; ptema = 0; pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pmap_invalidate_all(kernel_pmap); } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptema & PG_V) { m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has * been deallocated. */ CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x", pmap, va, flags); m = _pmap_allocpte(pmap, ptepindex, flags); if (m == NULL && (flags & M_WAITOK)) goto retry; KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex)); } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ #ifdef SMP /* * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ static cpumask_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); atomic_clear_int(lazymask, mymask); atomic_store_rel_int(&lazywait, 1); } static void pmap_lazyfix_self(cpumask_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); atomic_clear_int(lazymask, mymask); } static void pmap_lazyfix(pmap_t pmap) { cpumask_t mymask, mask; u_int spins; while ((mask = pmap->pm_active) != 0) { spins = 50000000; mask = mask & -mask; /* Find least significant set bit */ mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); #else lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); if (mask == mymask) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { atomic_store_rel_int((u_int *)&lazymask, (u_int)&pmap->pm_active); atomic_store_rel_int(&lazywait, 0); ipi_selected(mask, IPI_LAZYPMAP); while (lazywait == 0) { ia32_pause(); if (--spins == 0) break; } } mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); } } #else /* SMP */ /* * Cleaning up on uniprocessor is easy. For various reasons, we're * unlikely to have to even execute this code, including the fact * that the cleanup is deferred until the parent does a wait(2), which * means that another userland process has run. */ static void pmap_lazyfix(pmap_t pmap) { u_int cr3; cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); pmap->pm_active &= ~(PCPU_GET(cpumask)); } } #endif /* SMP */ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m, ptdpg[2*NPGPTD+1]; vm_paddr_t ma; int i; #ifdef PAE int npgptd = NPGPTD + 1; #else int npgptd = NPGPTD; #endif KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); PT_UPDATES_FLUSH(); pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); for (i = 0; i < NPGPTD; i++) ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME); pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); #ifdef PAE ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt)); #endif for (i = 0; i < npgptd; i++) { m = ptdpg[i]; ma = VM_PAGE_TO_MACH(m); /* unpinning L1 and L2 treated the same */ #if 0 xen_pgd_unpin(ma); #else if (i == NPGPTD) xen_pgd_unpin(ma); #endif #ifdef PAE if (i < NPGPTD) KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); #endif m->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); vm_page_free(m); } #ifdef PAE pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1); #endif PMAP_LOCK_DESTROY(pmap); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { struct pmap *pmap; vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); nkpt++; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } addr = roundup2(addr, PAGE_SIZE * NPTEPG); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } /* * This index is bogus, but out of the way */ nkpg = vm_page_alloc(NULL, nkpt, VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); vm_page_lock_queues(); PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); mtx_unlock_spin(&allpmaps_lock); vm_page_unlock_queues(); kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static uint32_t pc_freemask[11] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); static int pmap_collect_inactive, pmap_collect_active; SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, "Current number times pmap_collect called on inactive queue"); SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. This is normally called to * unmap inactive pages, and if necessary, active pages. */ static void pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) { pmap_t pmap; pt_entry_t *pte, tpte; pv_entry_t next_pv, pv; vm_offset_t va; vm_page_t m, free; sched_pin(); TAILQ_FOREACH(m, &vpq->pl, pageq) { if (m->hold_count || m->busy) continue; TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) continue; pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pmap, va); tpte = pte_load_clear(pte); KASSERT((tpte & PG_W) == 0, ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); free = NULL; pmap_unuse_pt(pmap, va, &free); pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } sched_unpin(); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { vm_page_t m; struct pv_chunk *pc; int idx, field, bit; mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; /* move to head of list */ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) return; PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, 0); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, int try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; static vm_pindex_t colour; struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfl(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the ptelist "pv_vafree" is synchronized by the page * queues lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq == &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } /* * Reclaim pv entries: At first, destroy mappings to * inactive pages. After that, if a pv chunk entry * is still needed, destroy mappings to active pages. */ if (pq == NULL) { PV_STAT(pmap_collect_inactive++); pq = &vm_page_queues[PQ_INACTIVE]; } else if (pq == &vm_page_queues[PQ_INACTIVE]) { PV_STAT(pmap_collect_active++); pq = &vm_page_queues[PQ_ACTIVE]; } else panic("get_pv_entry: increase vm.pmap.shpgperproc"); pmap_collect(pmap, pq); goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); colour++; pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); if ((m->flags & PG_ZERO) == 0) pagezero(pc); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); break; } } return (pv); } static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { mtx_assert(&vm_page_queue_mtx, MA_OWNED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); return (TRUE); } else return (FALSE); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) { pt_entry_t oldpte; vm_page_t m; CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x", pmap, (u_long)*ptq, va); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = *ptq; PT_SET_VA_MA(ptq, 0, TRUE); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; /* * XXX This is not strictly correctly, but somewhere along the line * we are losing the managed bit on some pages. It is unclear to me * why, but I think the most likely explanation is that xen's writable * page table implementation doesn't respect the unused bits. */ if ((oldpte & PG_MANAGED) || ((oldpte & PG_V) && (va < VM_MAXUSER_ADDRESS)) ) { m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME); if (!(oldpte & PG_MANAGED)) printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); pmap_remove_entry(pmap, m, va); } else if ((va < VM_MAXUSER_ADDRESS) && (oldpte & PG_V)) printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte); return (pmap_unuse_pt(pmap, va, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) { pt_entry_t *pte; CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x", pmap, va); mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0) return; pmap_remove_pte(pmap, pte, va, free); pmap_invalidate_page(pmap, va); if (*PMAP1) PT_SET_MA(PADDR1, 0); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; vm_page_t free = NULL; int anyvalid; CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; vm_page_lock_queues(); sched_pin(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva, &free); goto out; } for (; sva < eva; sva = pdnxt) { unsigned pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { PD_CLEAR_VA(pmap, pdirindex, TRUE); pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; anyvalid = 1; continue; } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & PG_V) == 0) continue; /* * The TLB entry for a PG_G mapping is invalidated * by pmap_remove_pte(). */ if ((*pte & PG_G) == 0) anyvalid = 1; if (pmap_remove_pte(pmap, pte, sva, &free)) break; } } PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_VA_MA(PMAP1, 0, TRUE); out: if (anyvalid) pmap_invalidate_all(pmap); sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; vm_page_t free; KASSERT((m->flags & PG_FICTITIOUS) == 0, ("pmap_remove_all: page %p is fictitious", m)); free = NULL; vm_page_lock_queues(); sched_pin(); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pte = pmap_pte_quick(pmap, pv->pv_va); tpte = *pte; PT_SET_VA_MA(pte, 0, TRUE); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); vm_page_unlock_queues(); pmap_free_zero_pages(free); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; int anychanged; CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x", pmap, sva, eva, prot); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } #ifdef PAE if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; #else if (prot & VM_PROT_WRITE) return; #endif anychanged = 0; vm_page_lock_queues(); sched_pin(); PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; unsigned pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { if ((prot & VM_PROT_WRITE) == 0) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pmap->pm_pdir[pdirindex] |= pg_nx; #endif anychanged = 1; continue; } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { vm_page_t m; retry: /* * Regardless of whether a pte is 32 or 64 bits in * size, PG_RW, PG_A, and PG_M are among the least * significant 32 bits. */ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; #endif if (pbits != obits) { obits = *pte; PT_SET_VA_MA(pte, pbits, TRUE); if (*pte != pbits) goto retry; if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = 1; } } } PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_VA_MA(PMAP1, 0, TRUE); if (anychanged) pmap_invalidate_all(pmap); sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { pd_entry_t *pde; pt_entry_t *pte; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva; CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d", pmap, va, access, VM_PAGE_TO_MACH(m), prot, wired); va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || (m->oflags & VPO_BUSY) != 0, ("pmap_enter: page %p is not busy", m)); mpte = NULL; vm_page_lock_queues(); PMAP_LOCK(pmap); sched_pin(); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { mpte = pmap_allocpte(pmap, va, M_WAITOK); } pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) panic("pmap_enter: attempted pmap_enter on 4MB page"); pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va); } pa = VM_PAGE_TO_PHYS(m); om = NULL; opa = origpte = 0; #if 0 KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx", pte, *pte)); #endif origpte = *pte; if (origpte) origpte = xpmap_mtop(origpte); opa = origpte & PG_FRAME; /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; /* * Remove extra pte reference */ if (mpte) mpte->wire_count--; if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); pv = pmap_pvh_remove(&om->md, pmap, va); } else if (va < VM_MAXUSER_ADDRESS) printf("va=0x%x is unmanaged :-( \n", va); if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%x", va)); } } else pmap->pm_stats.resident_count++; /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; if ((newpte & PG_MANAGED) != 0) vm_page_flag_set(m, PG_WRITEABLE); } #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= pgeflag; critical_enter(); /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { if (origpte) { invlva = FALSE; origpte = *pte; PT_SET_VA(pte, newpte | PG_A, FALSE); if (origpte & PG_A) { if (origpte & PG_MANAGED) vm_page_flag_set(om, PG_REFERENCED); if (opa != VM_PAGE_TO_PHYS(m)) invlva = TRUE; #ifdef PAE if ((origpte & PG_NX) == 0 && (newpte & PG_NX) != 0) invlva = TRUE; #endif } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(om); if ((prot & VM_PROT_WRITE) == 0) invlva = TRUE; } if ((origpte & PG_MANAGED) != 0 && TAILQ_EMPTY(&om->md.pv_list)) vm_page_flag_clear(om, PG_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else{ PT_SET_VA(pte, newpte | PG_A, FALSE); } } PT_UPDATES_FLUSH(); critical_exit(); if (*PMAP1) PT_SET_VA_MA(PMAP1, 0, TRUE); sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m, mpte; vm_pindex_t diff, psize; multicall_entry_t mcl[16]; multicall_entry_t *mclp = mcl; int error, count = 0; VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); mpte = NULL; m = m_start; vm_page_lock_queues(); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m, prot, mpte); m = TAILQ_NEXT(m, listq); if (count == 16) { error = HYPERVISOR_multicall(mcl, count); KASSERT(error == 0, ("bad multicall %d", error)); mclp = mcl; count = 0; } } if (count) { error = HYPERVISOR_multicall(mcl, count); KASSERT(error == 0, ("bad multicall %d", error)); } vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { multicall_entry_t mcl, *mclp; int count = 0; mclp = &mcl; CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", pmap, va, m, prot); vm_page_lock_queues(); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); if (count) HYPERVISOR_multicall(&mcl, count); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } #ifdef notyet void pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count) { int i, error, index = 0; multicall_entry_t mcl[16]; multicall_entry_t *mclp = mcl; PMAP_LOCK(pmap); for (i = 0; i < count; i++, addrs++, pages++, prots++) { if (!pmap_is_prefaultable_locked(pmap, *addrs)) continue; (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL); if (index == 16) { error = HYPERVISOR_multicall(mcl, index); mclp = mcl; index = 0; KASSERT(error == 0, ("bad multicall %d", error)); } } if (index) { error = HYPERVISOR_multicall(mcl, index); KASSERT(error == 0, ("bad multicall %d", error)); } PMAP_UNLOCK(pmap); } #endif static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { pt_entry_t *pte; vm_paddr_t pa; vm_page_t free; multicall_entry_t *mcl = *mclpp; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { unsigned ptepindex; pd_entry_t ptema; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptema = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptema & PG_V) { if (ptema & PG_PS) panic("pmap_enter_quick: unexpected mapping into 4MB page"); mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); mpte->wire_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex, M_NOWAIT); if (mpte == NULL) return (mpte); } } } else { mpte = NULL; } /* * This call to vtopte makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap")); pte = vtopte(va); if (*pte & PG_V) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpte != NULL) { free = NULL; if (pmap_unwire_pte_hold(pmap, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap->pm_stats.resident_count++; pa = VM_PAGE_TO_PHYS(m); #ifdef PAE if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; #endif #if 0 /* * Now validate mapping with RO protection */ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); #else /* * Now validate mapping with RO protection */ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) pa = xpmap_ptom(pa | PG_V | PG_U); else pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED); mcl->op = __HYPERVISOR_update_va_mapping; mcl->args[0] = va; mcl->args[1] = (uint32_t)(pa & 0xffffffff); mcl->args[2] = (uint32_t)(pa >> 32); mcl->args[3] = 0; *mclpp = mcl + 1; *count = *count + 1; #endif return mpte; } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; vm_paddr_t ma = xpmap_ptom(pa); va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p; int pat_mode; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pseflag && (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2/4MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* Map using 2/4MB pages. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pde = pmap_pde(pmap, addr); if (*pde == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } /* Else continue on if the PDE is already valid. */ addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) { pt_entry_t *pte; vm_page_lock_queues(); PMAP_LOCK(pmap); pte = pmap_pte(pmap, va); if (wired && !pmap_pte_w(pte)) { PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE); pmap->pm_stats.wired_count++; } else if (!wired && pmap_pte_w(pte)) { PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE); pmap->pm_stats.wired_count--; } /* * Wiring is not a hardware characteristic so there is no need to * invalidate TLB. */ pmap_pte_release(pte); PMAP_UNLOCK(pmap); vm_page_unlock_queues(); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { vm_page_t free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) { CTR2(KTR_PMAP, "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx", (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME)); return; } CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x", dst_pmap, src_pmap, dst_addr, len, src_addr); vm_page_lock_queues(); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; unsigned ptepindex; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pdnxt = (addr + NBPDR) & ~PDRMASK; ptepindex = addr >> PDRSHIFT; srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if (dst_pmap->pm_pdir[ptepindex] == 0) { PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE); dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; } continue; } srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (pdnxt > end_addr) pdnxt = end_addr; src_pte = vtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { dstmpte = pmap_allocpte(dst_pmap, addr, M_NOWAIT); if (dstmpte == NULL) break; dst_pte = pmap_pte_quick(dst_pmap, addr); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ KASSERT(ptetemp != 0, ("src_pte not set")); PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */); KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)), ("no pmap copy expected: 0x%jx saw: 0x%jx", ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte)); dst_pmap->pm_stats.resident_count++; } else { free = NULL; if (pmap_unwire_pte_hold(dst_pmap, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, addr); pmap_free_zero_pages(free); } } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } PT_UPDATES_FLUSH(); sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } static __inline void pagezero(void *page) { #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) { #if defined(CPU_ENABLE_SSE) if (cpu_feature & CPUID_SSE2) sse2_pagezero(page); else #endif i686_pagezero(page); } else #endif bzero(page, PAGE_SIZE); } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_zero_page: CMAP2 busy"); sched_pin(); PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); pagezero(sysmaps->CADDR2); PT_SET_MA(sysmaps->CADDR2, 0); sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_zero_page: CMAP2 busy"); sched_pin(); PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); if (off == 0 && size == PAGE_SIZE) pagezero(sysmaps->CADDR2); else bzero((char *)sysmaps->CADDR2 + off, size); PT_SET_MA(sysmaps->CADDR2, 0); sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { if (*CMAP3) panic("pmap_zero_page: CMAP3 busy"); sched_pin(); PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); pagezero(CADDR3); PT_SET_MA(CADDR3, 0); sched_unpin(); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { struct sysmaps *sysmaps; sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP1) panic("pmap_copy_page: CMAP1 busy"); if (*sysmaps->CMAP2) panic("pmap_copy_page: CMAP2 busy"); sched_pin(); PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A); PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M); bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); PT_SET_MA(sysmaps->CADDR1, 0); PT_SET_MA(sysmaps->CADDR2, 0); sched_unpin(); mtx_unlock(&sysmaps->lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } vm_page_unlock_queues(); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; int count; count = 0; if ((m->flags & PG_FICTITIOUS) != 0) return (count); vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } sched_unpin(); vm_page_unlock_queues(); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 4mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) return (FALSE); vm_page_lock_queues(); rv = !TAILQ_EMPTY(&m->md.pv_list) || !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); vm_page_unlock_queues(); return (rv); } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pt_entry_t *pte, tpte; vm_page_t m, free = NULL; pv_entry_t pv; struct pv_chunk *pc, *npc; int field, idx; int32_t bit; uint32_t inuse, bitmask; int allfree; CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap); if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } vm_page_lock_queues(); KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap")); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = (~(pc->pc_map[field])) & pc_freemask[field]; while (inuse != 0) { bit = bsfl(inuse); bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = vtopte(pv->pv_va); tpte = *pte ? xpmap_mtop(*pte) : 0; if (tpte == 0) { printf( "TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT(m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); PT_CLEAR_VA(pte, FALSE); /* * Update the vm_page_t clean/reference bits. */ if (tpte & PG_M) vm_page_dirty(m); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); pmap_unuse_pt(pmap, pv->pv_va, &free); /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; pmap->pm_stats.resident_count--; } } PT_UPDATES_FLUSH(); if (allfree) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, 0); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } } PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); pmap_invalidate_all(pmap); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); pmap_free_zero_pages(free); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_is_modified: page %p is not managed", m)); rv = FALSE; /* * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PG_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return (rv); vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & PG_M) != 0; PMAP_UNLOCK(pmap); if (rv) break; } if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); vm_page_unlock_queues(); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr) { pt_entry_t *pte; boolean_t rv = FALSE; return (rv); if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) { pte = vtopte(addr); rv = (*pte == 0); } return (rv); } boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { boolean_t rv; PMAP_LOCK(pmap); rv = pmap_is_prefaultable_locked(pmap, addr); PMAP_UNLOCK(pmap); return (rv); } boolean_t pmap_is_referenced(vm_page_t m) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_is_referenced: page %p is not managed", m)); rv = FALSE; vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); PMAP_UNLOCK(pmap); if (rv) break; } if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); vm_page_unlock_queues(); return (rv); } void pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) { int i, npages = round_page(len) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { pt_entry_t *pte; pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); PMAP_MARK_PRIV(xpmap_mtop(*pte)); pmap_pte_release(pte); } } void pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) { int i, npages = round_page(len) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { pt_entry_t *pte; pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); pmap_pte_release(pte); } } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pt_entry_t oldpte, *pte; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by * another thread while the object is locked. Thus, if PG_WRITEABLE * is clear, no page table entries need updating. */ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return; vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); retry: oldpte = *pte; if ((oldpte & PG_RW) != 0) { vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M); /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ PT_SET_VA_MA(pte, newpte, TRUE); if (*pte != newpte) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_flag_clear(m, PG_WRITEABLE); PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); vm_page_unlock_queues(); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { pv_entry_t pv, pvf, pvn; pmap_t pmap; pt_entry_t *pte; int rtval = 0; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_ts_referenced: page %p is not managed", m)); vm_page_lock_queues(); sched_pin(); if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pvf = pv; do { pvn = TAILQ_NEXT(pv, pv_list); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_A) != 0) { PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); pmap_invalidate_page(pmap, pv->pv_va); rtval++; if (rtval > 4) pvn = NULL; } PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } PT_UPDATES_FLUSH(); if (*PMAP1) PT_SET_MA(PADDR1, 0); sched_unpin(); vm_page_unlock_queues(); return (rtval); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pt_entry_t *pte; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); KASSERT((m->oflags & VPO_BUSY) == 0, ("pmap_clear_modify: page %p is busy", m)); /* * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. */ if ((m->flags & PG_WRITEABLE) == 0) return; vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_M) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_M is among the least significant * 32 bits. */ PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); vm_page_unlock_queues(); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pt_entry_t *pte; KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("pmap_clear_reference: page %p is not managed", m)); vm_page_lock_queues(); sched_pin(); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_A) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_A is among the least significant * 32 bits. */ PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); vm_page_unlock_queues(); } /* * Miscellaneous support routines follow */ /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { vm_offset_t va, offset; vm_size_t tmpsize; offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); pa = pa & PG_FRAME; if (pa < KERNLOAD && pa + size <= KERNLOAD) va = KERNBASE + pa; else va = kmem_alloc_nofault(kernel_map, size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + size); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { vm_offset_t base, offset, tmpva; if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) return; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); critical_enter(); for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) pmap_kremove(tmpva); pmap_invalidate_range(kernel_pmap, va, tmpva); critical_exit(); kmem_free(kernel_map, base, size); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { struct sysmaps *sysmaps; vm_offset_t sva, eva; m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) return; /* * If "m" is a normal page, flush it from the cache. * See pmap_invalidate_cache_range(). * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m)) return; /* * If page is not mapped by sf buffer, but CPU does not * support self snoop, map the page transient and do * invalidation. In the worst case, whole cache is flushed by * pmap_invalidate_cache_range(). */ if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) panic("pmap_page_set_memattr: CMAP2 busy"); sched_pin(); PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0)); invlcaddr(sysmaps->CADDR2); sva = (vm_offset_t)sysmaps->CADDR2; eva = sva + PAGE_SIZE; } else sva = eva = 0; /* gcc */ pmap_invalidate_cache_range(sva, eva); if (sva != 0) { PT_SET_MA(sysmaps->CADDR2, 0); sched_unpin(); mtx_unlock(&sysmaps->lock); } } int pmap_change_attr(va, size, mode) vm_offset_t va; vm_size_t size; int mode; { vm_offset_t base, offset, tmpva; pt_entry_t *pte; u_int opte, npte; pd_entry_t *pde; boolean_t changed; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); /* Only supported on kernel virtual addresses. */ if (base <= VM_MAXUSER_ADDRESS) return (EINVAL); /* 4MB pages and pages that aren't mapped aren't supported. */ for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde & PG_PS) return (EINVAL); if ((*pde & PG_V) == 0) return (EINVAL); pte = vtopte(va); if ((*pte & PG_V) == 0) return (EINVAL); } changed = FALSE; /* * Ok, all the pages exist and are 4k, so run through them updating * their cache mode. */ for (tmpva = base; size > 0; ) { pte = vtopte(tmpva); /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); npte |= pmap_cache_bits(mode, 0); PT_SET_VA_MA(pte, npte, TRUE); } while (npte != opte && (*pte != npte)); if (npte != opte) changed = TRUE; tmpva += PAGE_SIZE; size -= PAGE_SIZE; } /* * Flush CPU caches to make sure any data isn't cached that shouldn't * be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); } return (0); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pt_entry_t *ptep, pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: ptep = pmap_pte(pmap, addr); pte = (ptep != NULL) ? PT_GET(ptep) : 0; pmap_pte_release(ptep); val = 0; if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { pa = pte & PG_FRAME; /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int32_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); #else oldpmap->pm_active &= ~1; pmap->pm_active |= 1; #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); #else cr3 = vtophys(pmap->pm_pdir); #endif /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_cr3 = cr3; PT_UPDATES_FLUSH(); load_cr3(cr3); PCPU_SET(curpmap, pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } void pmap_suspend() { pmap_t pmap; int i, pdir, offset; vm_paddr_t pdirma; mmu_update_t mu[4]; /* * We need to remove the recursive mapping structure from all * our pmaps so that Xen doesn't get confused when it restores * the page tables. The recursive map lives at page directory * index PTDPTDI. We assume that the suspend code has stopped * the other vcpus (if any). */ LIST_FOREACH(pmap, &allpmaps, pm_list) { for (i = 0; i < 4; i++) { /* * Figure out which page directory (L2) page * contains this bit of the recursive map and * the offset within that page of the map * entry */ pdir = (PTDPTDI + i) / NPDEPG; offset = (PTDPTDI + i) % NPDEPG; pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); mu[i].val = 0; } HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); } } void pmap_resume() { pmap_t pmap; int i, pdir, offset; vm_paddr_t pdirma; mmu_update_t mu[4]; /* * Restore the recursive map that we removed on suspend. */ LIST_FOREACH(pmap, &allpmaps, pm_list) { for (i = 0; i < 4; i++) { /* * Figure out which page directory (L2) page * contains this bit of the recursive map and * the offset within that page of the map * entry */ pdir = (PTDPTDI + i) / NPDEPG; offset = (PTDPTDI + i) % NPDEPG; pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V; } HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); } } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return npte; } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = PT_GET(pte); m = PHYS_TO_VM_PAGE(pa & PG_FRAME); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return npte; } #endif #if defined(DEBUG) static void pads(pmap_t pm); void pmap_pvdump(vm_paddr_t pa); /* print address space of pmap*/ static void pads(pmap_t pm) { int i, j; vm_paddr_t va; pt_entry_t *ptep; if (pm == kernel_pmap) return; for (i = 0; i < NPDEPTD; i++) if (pm->pm_pdir[i]) for (j = 0; j < NPTEPG; j++) { va = (i << PDRSHIFT) + (j << PAGE_SHIFT); if (pm == kernel_pmap && va < KERNBASE) continue; if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) continue; ptep = pmap_pte(pm, va); if (pmap_pte_v(ptep)) printf("%x:%x ", va, *ptep); }; } void pmap_pvdump(vm_paddr_t pa) { pv_entry_t pv; pmap_t pmap; vm_page_t m; printf("pa %x", pa); m = PHYS_TO_VM_PAGE(pa); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { pmap = PV_PMAP(pv); printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); pads(pmap); } printf(" "); } #endif Index: projects/binutils-2.17/sys/i386/xen/xen_machdep.c =================================================================== --- projects/binutils-2.17/sys/i386/xen/xen_machdep.c (revision 215829) +++ projects/binutils-2.17/sys/i386/xen/xen_machdep.c (revision 215830) @@ -1,1229 +1,1249 @@ /* * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christian Limpach. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #define IDTVEC(name) __CONCAT(X,name) extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); int xendebug_flags; start_info_t *xen_start_info; shared_info_t *HYPERVISOR_shared_info; xen_pfn_t *xen_machine_phys = machine_to_phys_mapping; xen_pfn_t *xen_phys_machine; xen_pfn_t *xen_pfn_to_mfn_frame_list[16]; xen_pfn_t *xen_pfn_to_mfn_frame_list_list; int preemptable, init_first; extern unsigned int avail_space; void ni_cli(void); void ni_sti(void); void ni_cli(void) { CTR0(KTR_SPARE2, "ni_cli disabling interrupts"); __asm__("pushl %edx;" "pushl %eax;" ); __cli(); __asm__("popl %eax;" "popl %edx;" ); } void ni_sti(void) { __asm__("pushl %edx;" "pushl %esi;" "pushl %eax;" ); __sti(); __asm__("popl %eax;" "popl %esi;" "popl %edx;" ); } /* * Modify the cmd_line by converting ',' to NULLs so that it is in a format * suitable for the static env vars. */ char * xen_setbootenv(char *cmd_line) { char *cmd_line_next; /* Skip leading spaces */ for (; *cmd_line == ' '; cmd_line++); printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line); for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;); return cmd_line; } static struct { const char *ev; int mask; } howto_names[] = { {"boot_askname", RB_ASKNAME}, {"boot_single", RB_SINGLE}, {"boot_nosync", RB_NOSYNC}, {"boot_halt", RB_ASKNAME}, {"boot_serial", RB_SERIAL}, {"boot_cdrom", RB_CDROM}, {"boot_gdb", RB_GDB}, {"boot_gdb_pause", RB_RESERVED1}, {"boot_verbose", RB_VERBOSE}, {"boot_multicons", RB_MULTIPLE}, {NULL, 0} }; int xen_boothowto(char *envp) { int i, howto = 0; /* get equivalents from the environment */ for (i = 0; howto_names[i].ev != NULL; i++) if (getenv(howto_names[i].ev) != NULL) howto |= howto_names[i].mask; return howto; } #define PRINTK_BUFSIZE 1024 void printk(const char *fmt, ...) { __va_list ap; int retval; static char buf[PRINTK_BUFSIZE]; va_start(ap, fmt); retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap); va_end(ap); buf[retval] = 0; (void)HYPERVISOR_console_write(buf, retval); } #define XPQUEUE_SIZE 128 struct mmu_log { char *file; int line; }; #ifdef SMP /* per-cpu queues and indices */ #ifdef INVARIANTS static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE]; #endif static int xpq_idx[MAX_VIRT_CPUS]; static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE]; #define XPQ_QUEUE_LOG xpq_queue_log[vcpu] #define XPQ_QUEUE xpq_queue[vcpu] #define XPQ_IDX xpq_idx[vcpu] #define SET_VCPU() int vcpu = smp_processor_id() #else static mmu_update_t xpq_queue[XPQUEUE_SIZE]; static struct mmu_log xpq_queue_log[XPQUEUE_SIZE]; static int xpq_idx = 0; #define XPQ_QUEUE_LOG xpq_queue_log #define XPQ_QUEUE xpq_queue #define XPQ_IDX xpq_idx #define SET_VCPU() #endif /* !SMP */ #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1); #if 0 static void xen_dump_queue(void) { int _xpq_idx = XPQ_IDX; int i; if (_xpq_idx <= 1) return; printk("xen_dump_queue(): %u entries\n", _xpq_idx); for (i = 0; i < _xpq_idx; i++) { printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); } } #endif static __inline void _xen_flush_queue(void) { SET_VCPU(); int _xpq_idx = XPQ_IDX; int error, i; /* window of vulnerability here? */ if (__predict_true(gdtset)) critical_enter(); XPQ_IDX = 0; /* Make sure index is cleared first to avoid double updates. */ error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE, _xpq_idx, NULL, DOMID_SELF); #if 0 if (__predict_true(gdtset)) for (i = _xpq_idx; i > 0;) { if (i >= 3) { CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx " "ptr: %lx val: %lx ptr: %lx", (XPQ_QUEUE[i-1].val & 0xffffffff), (XPQ_QUEUE[i-1].ptr & 0xffffffff), (XPQ_QUEUE[i-2].val & 0xffffffff), (XPQ_QUEUE[i-2].ptr & 0xffffffff), (XPQ_QUEUE[i-3].val & 0xffffffff), (XPQ_QUEUE[i-3].ptr & 0xffffffff)); i -= 3; } else if (i == 2) { CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx", (XPQ_QUEUE[i-1].val & 0xffffffff), (XPQ_QUEUE[i-1].ptr & 0xffffffff), (XPQ_QUEUE[i-2].val & 0xffffffff), (XPQ_QUEUE[i-2].ptr & 0xffffffff)); i = 0; } else { CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", (XPQ_QUEUE[i-1].val & 0xffffffff), (XPQ_QUEUE[i-1].ptr & 0xffffffff)); i = 0; } } #endif if (__predict_true(gdtset)) critical_exit(); if (__predict_false(error < 0)) { for (i = 0; i < _xpq_idx; i++) printf("val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); panic("Failed to execute MMU updates: %d", error); } } void xen_flush_queue(void) { SET_VCPU(); if (XPQ_IDX != 0) _xen_flush_queue(); } static __inline void xen_increment_idx(void) { SET_VCPU(); XPQ_IDX++; if (__predict_false(XPQ_IDX == XPQUEUE_SIZE)) xen_flush_queue(); } void xen_check_queue(void) { #ifdef INVARIANTS SET_VCPU(); KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX)); #endif } void xen_invlpg(vm_offset_t va) { struct mmuext_op op; op.cmd = MMUEXT_INVLPG_ALL; op.arg1.linear_addr = va & ~PAGE_MASK; PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_load_cr3(u_int val) { struct mmuext_op op; #ifdef INVARIANTS SET_VCPU(); KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX)); #endif op.cmd = MMUEXT_NEW_BASEPTR; op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT; PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } #ifdef KTR static __inline u_int rebp(void) { u_int data; __asm __volatile("movl 4(%%ebp),%0" : "=r" (data)); return (data); } #endif u_int read_eflags(void) { vcpu_info_t *_vcpu; u_int eflags; eflags = _read_eflags(); _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; if (_vcpu->evtchn_upcall_mask) eflags &= ~PSL_I; return (eflags); } void write_eflags(u_int eflags) { u_int intr; CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags); intr = ((eflags & PSL_I) == 0); __restore_flags(intr); _write_eflags(eflags); } void xen_cli(void) { CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp()); __cli(); } void xen_sti(void) { CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp()); __sti(); } u_int xen_rcr2(void) { return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2); } void _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line) { SET_VCPU(); if (__predict_true(gdtset)) critical_enter(); XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; XPQ_QUEUE[XPQ_IDX].val = pfn; #ifdef INVARIANTS XPQ_QUEUE_LOG[XPQ_IDX].file = file; XPQ_QUEUE_LOG[XPQ_IDX].line = line; #endif xen_increment_idx(); if (__predict_true(gdtset)) critical_exit(); } void _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line) { SET_VCPU(); if (__predict_true(gdtset)) mtx_assert(&vm_page_queue_mtx, MA_OWNED); KASSERT((ptr & 7) == 0, ("misaligned update")); if (__predict_true(gdtset)) critical_enter(); XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE; XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val; #ifdef INVARIANTS XPQ_QUEUE_LOG[XPQ_IDX].file = file; XPQ_QUEUE_LOG[XPQ_IDX].line = line; #endif xen_increment_idx(); if (__predict_true(gdtset)) critical_exit(); } void xen_pgdpt_pin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_PIN_L3_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_pin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_PIN_L2_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_unpin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_UNPIN_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pt_pin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_PIN_L1_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; - printk("xen_pt_pin(): mfn=%x\n", op.arg1.mfn); xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pt_unpin(vm_paddr_t ma) { struct mmuext_op op; op.cmd = MMUEXT_UNPIN_TABLE; op.arg1.mfn = ma >> PAGE_SHIFT; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_set_ldt(vm_paddr_t ptr, unsigned long len) { struct mmuext_op op; op.cmd = MMUEXT_SET_LDT; op.arg1.linear_addr = ptr; op.arg2.nr_ents = len; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_tlb_flush(void) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_LOCAL; xen_flush_queue(); PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_update_descriptor(union descriptor *table, union descriptor *entry) { vm_paddr_t pa; pt_entry_t *ptp; ptp = vtopte((vm_offset_t)table); pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK); if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry)) panic("HYPERVISOR_update_descriptor failed\n"); } #if 0 /* * Bitmap is indexed by page number. If bit is set, the page is part of a * xen_create_contiguous_region() area of memory. */ unsigned long *contiguous_bitmap; static void contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages) { unsigned long start_off, end_off, curr_idx, end_idx; curr_idx = first_page / BITS_PER_LONG; start_off = first_page & (BITS_PER_LONG-1); end_idx = (first_page + nr_pages) / BITS_PER_LONG; end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); if (curr_idx == end_idx) { contiguous_bitmap[curr_idx] |= ((1UL<> PAGE_SHIFT; mfn = PFNTOMFN(pfn); PFNTOMFN(pfn) = INVALID_P2M_ENTRY; PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Get a new contiguous memory extent. */ reservation.extent_order = order; /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not * running with a broxen driver XXXEN */ reservation.address_bits = 31; if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1) goto fail; /* 3. Map the new extent in place of old pages. */ for (i = 0; i < (1 << order); i++) { int pfn; pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT; xen_machphys_update(mfn+i, pfn); PFNTOMFN(pfn) = mfn+i; } xen_tlb_flush(); #if 0 contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order); #endif balloon_unlock(flags); return 0; fail: reservation.extent_order = 0; reservation.address_bits = 0; for (i = 0; i < (1 << order); i++) { int pfn; pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT; PANIC_IF(HYPERVISOR_memory_op( XENMEM_increase_reservation, &reservation) != 1); xen_machphys_update(mfn, pfn); PFNTOMFN(pfn) = mfn; } xen_tlb_flush(); balloon_unlock(flags); return ENOMEM; } void xen_destroy_contiguous_region(void *addr, int npages) { unsigned long mfn, i, flags, order, pfn0; struct xen_memory_reservation reservation = { .nr_extents = 1, .extent_order = 0, .domid = DOMID_SELF }; set_xen_guest_handle(reservation.extent_start, &mfn); pfn0 = vtophys(addr) >> PAGE_SHIFT; #if 0 scrub_pages(vstart, 1 << order); #endif /* can currently only handle power of two allocation */ PANIC_IF(ffs(npages) != fls(npages)); /* 0. determine order */ order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages); balloon_lock(flags); #if 0 contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order); #endif /* 1. Zap current PTEs, giving away the underlying pages. */ for (i = 0; i < (1 << order); i++) { int pfn; uint64_t new_val = 0; pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT; PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0)); PFNTOMFN(pfn) = INVALID_P2M_ENTRY; PANIC_IF(HYPERVISOR_memory_op( XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Map new pages in place of old pages. */ for (i = 0; i < (1 << order); i++) { int pfn; uint64_t new_val; pfn = pfn0 + i; PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1); new_val = mfn << PAGE_SHIFT; PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), new_val, PG_KERNEL)); xen_machphys_update(mfn, pfn); PFNTOMFN(pfn) = mfn; } xen_tlb_flush(); balloon_unlock(flags); } extern vm_offset_t proc0kstack; extern int vm86paddr, vm86phystk; char *bootmem_start, *bootmem_current, *bootmem_end; pteinfo_t *pteinfo_list; void initvalues(start_info_t *startinfo); struct xenstore_domain_interface; extern struct xenstore_domain_interface *xen_store; char *console_page; void * bootmem_alloc(unsigned int size) { char *retptr; retptr = bootmem_current; PANIC_IF(retptr + size > bootmem_end); bootmem_current += size; return retptr; } void bootmem_free(void *ptr, unsigned int size) { char *tptr; tptr = ptr; PANIC_IF(tptr != bootmem_current - size || bootmem_current - size < bootmem_start); bootmem_current -= size; } #if 0 static vm_paddr_t xpmap_mtop2(vm_paddr_t mpa) { return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT) ) | (mpa & ~PG_FRAME); } static pd_entry_t xpmap_get_bootpde(vm_paddr_t va) { return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22]; } static pd_entry_t xpmap_get_vbootpde(vm_paddr_t va) { pd_entry_t pde; pde = xpmap_get_bootpde(va); if ((pde & PG_V) == 0) return (pde & ~PG_FRAME); return (pde & ~PG_FRAME) | (xpmap_mtop2(pde & PG_FRAME) + KERNBASE); } static pt_entry_t 8* xpmap_get_bootptep(vm_paddr_t va) { pd_entry_t pde; pde = xpmap_get_vbootpde(va); if ((pde & PG_V) == 0) return (void *)-1; #define PT_MASK 0x003ff000 /* page table address bits */ return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]); } static pt_entry_t xpmap_get_bootpte(vm_paddr_t va) { return xpmap_get_bootptep(va)[0]; } #endif #ifdef ADD_ISA_HOLE static void shift_phys_machine(unsigned long *phys_machine, int nr_pages) { unsigned long *tmp_page, *current_page, *next_page; int i; tmp_page = bootmem_alloc(PAGE_SIZE); current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long)); next_page = current_page - (PAGE_SIZE/sizeof(unsigned long)); bcopy(phys_machine, tmp_page, PAGE_SIZE); while (current_page > phys_machine) { /* save next page */ bcopy(next_page, tmp_page, PAGE_SIZE); /* shift down page */ bcopy(current_page, next_page, PAGE_SIZE); /* finish swap */ bcopy(tmp_page, current_page, PAGE_SIZE); current_page -= (PAGE_SIZE/sizeof(unsigned long)); next_page -= (PAGE_SIZE/sizeof(unsigned long)); } bootmem_free(tmp_page, PAGE_SIZE); for (i = 0; i < nr_pages; i++) { xen_machphys_update(phys_machine[i], i); } memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE); } #endif /* ADD_ISA_HOLE */ /* * Build a directory of the pages that make up our Physical to Machine * mapping table. The Xen suspend/restore code uses this to find our * mapping table. */ static void init_frame_list_list(void *arg) { unsigned long nr_pages = xen_start_info->nr_pages; #define FPP (PAGE_SIZE/sizeof(xen_pfn_t)) int i, j, k; xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); for (i = 0, j = 0, k = -1; i < nr_pages; i += FPP, j++) { if ((j & (FPP - 1)) == 0) { k++; xen_pfn_to_mfn_frame_list[k] = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); xen_pfn_to_mfn_frame_list_list[k] = VTOMFN(xen_pfn_to_mfn_frame_list[k]); j = 0; } xen_pfn_to_mfn_frame_list[k][j] = VTOMFN(&xen_phys_machine[i]); } HYPERVISOR_shared_info->arch.max_pfn = nr_pages; HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = VTOMFN(xen_pfn_to_mfn_frame_list_list); } SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL); extern unsigned long physfree; int pdir, curoffset; extern int nkpt; extern uint32_t kernbase; void initvalues(start_info_t *startinfo) { vm_offset_t cur_space, cur_space_pt; struct physdev_set_iopl set_iopl; int l3_pages, l2_pages, l1_pages, offset; vm_paddr_t console_page_ma, xen_store_ma; vm_offset_t tmpva; vm_paddr_t shinfo; #ifdef PAE vm_paddr_t IdlePDPTma, IdlePDPTnewma; vm_paddr_t IdlePTDnewma[4]; pd_entry_t *IdlePDPTnew, *IdlePTDnew; vm_paddr_t IdlePTDma[4]; #else vm_paddr_t IdlePTDma[1]; #endif unsigned long i; int ncpus = MAXCPU; nkpt = min( min( max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt), NPGPTD*NPDEPG - KPTDI), (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); #ifdef notyet /* * need to install handler */ HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify); #endif xen_start_info = startinfo; xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list; IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE); l1_pages = 0; #ifdef PAE l3_pages = 1; l2_pages = 0; IdlePDPT = (pd_entry_t *)startinfo->pt_base; IdlePDPTma = VTOM(startinfo->pt_base); for (i = (KERNBASE >> 30); (i < 4) && (IdlePDPT[i] != 0); i++) l2_pages++; /* * Note that only one page directory has been allocated at this point. * Thus, if KERNBASE */ for (i = 0; i < l2_pages; i++) IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE); l2_pages = (l2_pages == 0) ? 1 : l2_pages; #else l3_pages = 0; l2_pages = 1; #endif for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT); (i>PDRSHIFT)); i++) { if (IdlePTD[i] == 0) break; l1_pages++; } /* number of pages allocated after the pts + 1*/; cur_space = xen_start_info->pt_base + (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE; printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space); printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n", KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base), xen_start_info->nr_pt_frames); xendebug_flags = 0; /* 0xffffffff; */ #ifdef ADD_ISA_HOLE shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages); #endif XENPRINTF("IdlePTD %p\n", IdlePTD); XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx " "mod_start: 0x%lx mod_len: 0x%lx\n", xen_start_info->nr_pages, xen_start_info->shared_info, xen_start_info->flags, xen_start_info->pt_base, xen_start_info->mod_start, xen_start_info->mod_len); #ifdef PAE IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE; bzero(IdlePDPTnew, PAGE_SIZE); IdlePDPTnewma = VTOM(IdlePDPTnew); IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE; bzero(IdlePTDnew, 4*PAGE_SIZE); for (i = 0; i < 4; i++) IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE); /* * L3 * * Copy the 4 machine addresses of the new PTDs in to the PDPT * */ for (i = 0; i < 4; i++) IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V; __asm__("nop;"); /* * * re-map the new PDPT read-only */ PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V); /* * * Unpin the current PDPT */ xen_pt_unpin(IdlePDPTma); #endif /* PAE */ /* Map proc0's KSTACK */ proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE); printk("proc0kstack=%u\n", proc0kstack); /* vm86/bios stack */ cur_space += PAGE_SIZE; /* Map space for the vm86 region */ vm86paddr = (vm_offset_t)cur_space; cur_space += (PAGE_SIZE * 3); /* allocate 4 pages for bootmem allocator */ bootmem_start = bootmem_current = (char *)cur_space; cur_space += (4 * PAGE_SIZE); bootmem_end = (char *)cur_space; /* allocate pages for gdt */ gdt = (union descriptor *)cur_space; cur_space += PAGE_SIZE*ncpus; /* allocate page for ldt */ ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE; cur_space += PAGE_SIZE; /* unmap remaining pages from initial chunk * */ for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<> 18)), ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK), l1_pages*sizeof(pt_entry_t)); for (i = 0; i < 4; i++) { PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE, IdlePTDnewma[i] | PG_V); } xen_load_cr3(VTOP(IdlePDPTnew)); xen_pgdpt_pin(VTOM(IdlePDPTnew)); /* allocate remainder of nkpt pages */ cur_space_pt = cur_space; for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt; i++, cur_space += PAGE_SIZE) { pdir = (offset + i) / NPDEPG; curoffset = ((offset + i) % NPDEPG); if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS) break; /* * make sure that all the initial page table pages * have been zeroed */ PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW); bzero((char *)cur_space, PAGE_SIZE); PT_SET_MA(cur_space, (vm_paddr_t)0); xen_pt_pin(VTOM(cur_space)); xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] + curoffset*sizeof(vm_paddr_t)), VTOM(cur_space) | PG_KERNEL); PT_UPDATES_FLUSH(); } for (i = 0; i < 4; i++) { pdir = (PTDPTDI + i) / NPDEPG; curoffset = (PTDPTDI + i) % NPDEPG; xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] + curoffset*sizeof(vm_paddr_t)), IdlePTDnewma[i] | PG_V); } PT_UPDATES_FLUSH(); IdlePTD = IdlePTDnew; IdlePDPT = IdlePDPTnew; IdlePDPTma = IdlePDPTnewma; HYPERVISOR_shared_info = (shared_info_t *)cur_space; cur_space += PAGE_SIZE; xen_store = (struct xenstore_domain_interface *)cur_space; cur_space += PAGE_SIZE; console_page = (char *)cur_space; cur_space += PAGE_SIZE; /* * shared_info is an unsigned long so this will randomly break if * it is allocated above 4GB - I guess people are used to that * sort of thing with Xen ... sigh */ shinfo = xen_start_info->shared_info; PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL); printk("#4\n"); xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT); PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL); console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT); PT_SET_MA(console_page, console_page_ma | PG_KERNEL); printk("#5\n"); set_iopl.iopl = 1; PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl)); printk("#6\n"); #if 0 /* add page table for KERNBASE */ xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), VTOM(cur_space) | PG_KERNEL); xen_flush_queue(); #ifdef PAE xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), VTOM(cur_space) | PG_V | PG_A); #else xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), VTOM(cur_space) | PG_V | PG_A); #endif xen_flush_queue(); cur_space += PAGE_SIZE; printk("#6\n"); #endif /* 0 */ #ifdef notyet if (xen_start_info->flags & SIF_INITDOMAIN) { /* Map first megabyte */ for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD); xen_flush_queue(); } #endif /* * re-map kernel text read-only * */ for (i = (((vm_offset_t)&btext) & ~PAGE_MASK); i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE) PT_SET_MA(i, VTOM(i) | PG_V | PG_A); printk("#7\n"); physfree = VTOP(cur_space); init_first = physfree >> PAGE_SHIFT; IdlePTD = (pd_entry_t *)VTOP(IdlePTD); IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT); setup_xen_features(); printk("#8, proc0kstack=%u\n", proc0kstack); } trap_info_t trap_table[] = { { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, { 1, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, { 3, 3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, /* This is UPL on Linux and KPL on BSD */ { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, { 7, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, /* * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, * no handler for double fault */ { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, {14, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, { 0, 0, 0, 0 } }; +/* Perform a multicall and check that individual calls succeeded. */ +int +HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls) +{ + int ret = 0; + int i; + + /* Perform the multicall. */ + PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls)); + + /* Check the results of individual hypercalls. */ + for (i = 0; i < nr_calls; i++) + if (unlikely(call_list[i].result < 0)) + ret++; + if (unlikely(ret > 0)) + panic("%d multicall(s) failed: cpu %d\n", + ret, smp_processor_id()); + + /* If we didn't panic already, everything succeeded. */ + return (0); +} /********** CODE WORTH KEEPING ABOVE HERE *****************/ void xen_failsafe_handler(void); void xen_failsafe_handler(void) { panic("xen_failsafe_handler called!\n"); } void xen_handle_thread_switch(struct pcb *pcb); /* This is called by cpu_switch() when switching threads. */ /* The pcb arg refers to the process control block of the */ /* next thread which is to run */ void xen_handle_thread_switch(struct pcb *pcb) { uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0]; uint32_t *b = (uint32_t *)&pcb->pcb_fsd; multicall_entry_t mcl[3]; int i = 0; /* Notify Xen of task switch */ mcl[i].op = __HYPERVISOR_stack_switch; mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL); mcl[i++].args[1] = (unsigned long)pcb; /* Check for update of fsd */ if (*a != *b || *(a+1) != *(b+1)) { mcl[i].op = __HYPERVISOR_update_descriptor; *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a); *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b; } a += 2; b += 2; /* Check for update of gsd */ if (*a != *b || *(a+1) != *(b+1)) { mcl[i].op = __HYPERVISOR_update_descriptor; *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a); *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b; } (void)HYPERVISOR_multicall(mcl, i); } Index: projects/binutils-2.17/sys/kern/kern_tc.c =================================================================== --- projects/binutils-2.17/sys/kern/kern_tc.c (revision 215829) +++ projects/binutils-2.17/sys/kern/kern_tc.c (revision 215830) @@ -1,956 +1,956 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- */ #include __FBSDID("$FreeBSD$"); #include "opt_ntp.h" #include #include #include #include #include #include #include #include /* * A large step happens on boot. This constant detects such steps. * It is relatively small so that ntp_update_second gets called enough * in the typical 'missed a couple of seconds' case, but doesn't loop * forever when the time step is large. */ #define LARGE_STEP 200 /* * Implement a dummy timecounter which we can use until we get a real one * in the air. This allows the console and other early stuff to use * time services. */ static u_int dummy_get_timecount(struct timecounter *tc) { static u_int now; return (++now); } static struct timecounter dummy_timecounter = { dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 }; struct timehands { /* These fields must be initialized by the driver. */ struct timecounter *th_counter; int64_t th_adjustment; uint64_t th_scale; u_int th_offset_count; struct bintime th_offset; struct timeval th_microtime; struct timespec th_nanotime; /* Fields not to be copied in tc_windup start with th_generation. */ volatile u_int th_generation; struct timehands *th_next; }; static struct timehands th0; static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; static struct timehands th0 = { &dummy_timecounter, 0, (uint64_t)-1 / 1000000, 0, {1, 0}, {0, 0}, {0, 0}, 1, &th1 }; static struct timehands *volatile timehands = &th0; struct timecounter *timecounter = &dummy_timecounter; static struct timecounter *timecounters = &dummy_timecounter; int tc_min_ticktock_freq = 1; time_t time_second = 1; time_t time_uptime = 1; struct bintime boottimebin; struct timeval boottime; static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD, NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime"); SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, ""); static int timestepwarnings; SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, ×tepwarnings, 0, "Log time steps"); static void tc_windup(void); static void cpu_tick_calibrate(int); static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) { #ifdef SCTL_MASK32 int tv[2]; if (req->flags & SCTL_MASK32) { tv[0] = boottime.tv_sec; tv[1] = boottime.tv_usec; return SYSCTL_OUT(req, tv, sizeof(tv)); } else #endif return SYSCTL_OUT(req, &boottime, sizeof(boottime)); } static int sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) { u_int ncount; struct timecounter *tc = arg1; ncount = tc->tc_get_timecount(tc); return sysctl_handle_int(oidp, &ncount, 0, req); } static int sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) { uint64_t freq; struct timecounter *tc = arg1; freq = tc->tc_frequency; return sysctl_handle_quad(oidp, &freq, 0, req); } /* * Return the difference between the timehands' counter value now and what * was when we copied it to the timehands' offset_count. */ static __inline u_int tc_delta(struct timehands *th) { struct timecounter *tc; tc = th->th_counter; return ((tc->tc_get_timecount(tc) - th->th_offset_count) & tc->tc_counter_mask); } /* * Functions for reading the time. We have to loop until we are sure that * the timehands that we operated on was not updated under our feet. See * the comment in for a description of these 12 functions. */ void binuptime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; *bt = th->th_offset; bintime_addx(bt, th->th_scale * tc_delta(th)); } while (gen == 0 || gen != th->th_generation); } void nanouptime(struct timespec *tsp) { struct bintime bt; binuptime(&bt); bintime2timespec(&bt, tsp); } void microuptime(struct timeval *tvp) { struct bintime bt; binuptime(&bt); bintime2timeval(&bt, tvp); } void bintime(struct bintime *bt) { binuptime(bt); bintime_add(bt, &boottimebin); } void nanotime(struct timespec *tsp) { struct bintime bt; bintime(&bt); bintime2timespec(&bt, tsp); } void microtime(struct timeval *tvp) { struct bintime bt; bintime(&bt); bintime2timeval(&bt, tvp); } void getbinuptime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; *bt = th->th_offset; } while (gen == 0 || gen != th->th_generation); } void getnanouptime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; bintime2timespec(&th->th_offset, tsp); } while (gen == 0 || gen != th->th_generation); } void getmicrouptime(struct timeval *tvp) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; bintime2timeval(&th->th_offset, tvp); } while (gen == 0 || gen != th->th_generation); } void getbintime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; *bt = th->th_offset; } while (gen == 0 || gen != th->th_generation); bintime_add(bt, &boottimebin); } void getnanotime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; *tsp = th->th_nanotime; } while (gen == 0 || gen != th->th_generation); } void getmicrotime(struct timeval *tvp) { struct timehands *th; u_int gen; do { th = timehands; gen = th->th_generation; *tvp = th->th_microtime; } while (gen == 0 || gen != th->th_generation); } /* * Initialize a new timecounter and possibly use it. */ void tc_init(struct timecounter *tc) { u_int u; struct sysctl_oid *tc_root; u = tc->tc_frequency / tc->tc_counter_mask; /* XXX: We need some margin here, 10% is a guess */ u *= 11; u /= 10; if (u > hz && tc->tc_quality >= 0) { tc->tc_quality = -2000; if (bootverbose) { printf("Timecounter \"%s\" frequency %ju Hz", tc->tc_name, (uintmax_t)tc->tc_frequency); printf(" -- Insufficient hz, needs at least %u\n", u); } } else if (tc->tc_quality >= 0 || bootverbose) { printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, tc->tc_quality); } tc->tc_next = timecounters; timecounters = tc; /* * Set up sysctl tree for this counter. */ tc_root = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, CTLFLAG_RW, 0, "timecounter description"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, "mask for implemented bits"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc), sysctl_kern_timecounter_get, "IU", "current timecounter value"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "frequency", CTLTYPE_QUAD | CTLFLAG_RD, tc, sizeof(*tc), sysctl_kern_timecounter_freq, "QU", "timecounter frequency"); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "quality", CTLFLAG_RD, &(tc->tc_quality), 0, "goodness of time counter"); /* * Never automatically use a timecounter with negative quality. * Even though we run on the dummy counter, switching here may be * worse since this timecounter may not be monotonous. */ if (tc->tc_quality < 0) return; if (tc->tc_quality < timecounter->tc_quality) return; if (tc->tc_quality == timecounter->tc_quality && tc->tc_frequency < timecounter->tc_frequency) return; (void)tc->tc_get_timecount(tc); (void)tc->tc_get_timecount(tc); timecounter = tc; } /* Report the frequency of the current timecounter. */ uint64_t tc_getfrequency(void) { return (timehands->th_counter->tc_frequency); } /* * Step our concept of UTC. This is done by modifying our estimate of * when we booted. * XXX: not locked. */ void tc_setclock(struct timespec *ts) { struct timespec tbef, taft; struct bintime bt, bt2; cpu_tick_calibrate(1); nanotime(&tbef); timespec2bintime(ts, &bt); binuptime(&bt2); bintime_sub(&bt, &bt2); bintime_add(&bt2, &boottimebin); boottimebin = bt; bintime2timeval(&bt, &boottime); /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(); nanotime(&taft); if (timestepwarnings) { log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", (intmax_t)tbef.tv_sec, tbef.tv_nsec, (intmax_t)taft.tv_sec, taft.tv_nsec, (intmax_t)ts->tv_sec, ts->tv_nsec); } cpu_tick_calibrate(1); } /* * Initialize the next struct timehands in the ring and make * it the active timehands. Along the way we might switch to a different * timecounter and/or do seconds processing in NTP. Slightly magic. */ static void tc_windup(void) { struct bintime bt; struct timehands *th, *tho; uint64_t scale; u_int delta, ncount, ogen; int i; time_t t; /* * Make the next timehands a copy of the current one, but do not * overwrite the generation or next pointer. While we update * the contents, the generation must be zero. */ tho = timehands; th = tho->th_next; ogen = th->th_generation; th->th_generation = 0; bcopy(tho, th, offsetof(struct timehands, th_generation)); /* * Capture a timecounter delta on the current timecounter and if * changing timecounters, a counter value from the new timecounter. * Update the offset fields accordingly. */ delta = tc_delta(th); if (th->th_counter != timecounter) ncount = timecounter->tc_get_timecount(timecounter); else ncount = 0; th->th_offset_count += delta; th->th_offset_count &= th->th_counter->tc_counter_mask; while (delta > th->th_counter->tc_frequency) { /* Eat complete unadjusted seconds. */ delta -= th->th_counter->tc_frequency; th->th_offset.sec++; } if ((delta > th->th_counter->tc_frequency / 2) && - (th->th_scale * delta < (uint64_t)1 << 63)) { + (th->th_scale * delta < ((uint64_t)1 << 63))) { /* The product th_scale * delta just barely overflows. */ th->th_offset.sec++; } bintime_addx(&th->th_offset, th->th_scale * delta); /* * Hardware latching timecounters may not generate interrupts on * PPS events, so instead we poll them. There is a finite risk that * the hardware might capture a count which is later than the one we * got above, and therefore possibly in the next NTP second which might * have a different rate than the current NTP second. It doesn't * matter in practice. */ if (tho->th_counter->tc_poll_pps) tho->th_counter->tc_poll_pps(tho->th_counter); /* * Deal with NTP second processing. The for loop normally * iterates at most once, but in extreme situations it might * keep NTP sane if timeouts are not run for several seconds. * At boot, the time step can be large when the TOD hardware * has been read, so on really large steps, we call * ntp_update_second only twice. We need to call it twice in * case we missed a leap second. */ bt = th->th_offset; bintime_add(&bt, &boottimebin); i = bt.sec - tho->th_microtime.tv_sec; if (i > LARGE_STEP) i = 2; for (; i > 0; i--) { t = bt.sec; ntp_update_second(&th->th_adjustment, &bt.sec); if (bt.sec != t) boottimebin.sec += bt.sec - t; } /* Update the UTC timestamps used by the get*() functions. */ /* XXX shouldn't do this here. Should force non-`get' versions. */ bintime2timeval(&bt, &th->th_microtime); bintime2timespec(&bt, &th->th_nanotime); /* Now is a good time to change timecounters. */ if (th->th_counter != timecounter) { th->th_counter = timecounter; th->th_offset_count = ncount; tc_min_ticktock_freq = max(1, timecounter->tc_frequency / (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); } /*- * Recalculate the scaling factor. We want the number of 1/2^64 * fractions of a second per period of the hardware counter, taking * into account the th_adjustment factor which the NTP PLL/adjtime(2) * processing provides us with. * * The th_adjustment is nanoseconds per second with 32 bit binary * fraction and we want 64 bit binary fraction of second: * * x = a * 2^32 / 10^9 = a * 4.294967296 * * The range of th_adjustment is +/- 5000PPM so inside a 64bit int * we can only multiply by about 850 without overflowing, that * leaves no suitably precise fractions for multiply before divide. * * Divide before multiply with a fraction of 2199/512 results in a * systematic undercompensation of 10PPM of th_adjustment. On a * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. * * We happily sacrifice the lowest of the 64 bits of our result * to the goddess of code clarity. * */ scale = (uint64_t)1 << 63; scale += (th->th_adjustment / 1024) * 2199; scale /= th->th_counter->tc_frequency; th->th_scale = scale * 2; /* * Now that the struct timehands is again consistent, set the new * generation number, making sure to not make it zero. */ if (++ogen == 0) ogen = 1; th->th_generation = ogen; /* Go live with the new struct timehands. */ time_second = th->th_microtime.tv_sec; time_uptime = th->th_offset.sec; timehands = th; } /* Report or change the active timecounter hardware. */ static int sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) { char newname[32]; struct timecounter *newtc, *tc; int error; tc = timecounter; strlcpy(newname, tc->tc_name, sizeof(newname)); error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); if (error != 0 || req->newptr == NULL || strcmp(newname, tc->tc_name) == 0) return (error); for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { if (strcmp(newname, newtc->tc_name) != 0) continue; /* Warm up new timecounter. */ (void)newtc->tc_get_timecount(newtc); (void)newtc->tc_get_timecount(newtc); timecounter = newtc; return (0); } return (EINVAL); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_kern_timecounter_hardware, "A", "Timecounter hardware selected"); /* Report or change the active timecounter hardware. */ static int sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) { char buf[32], *spc; struct timecounter *tc; int error; spc = ""; error = 0; for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { sprintf(buf, "%s%s(%d)", spc, tc->tc_name, tc->tc_quality); error = SYSCTL_OUT(req, buf, strlen(buf)); spc = " "; } return (error); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected"); /* * RFC 2783 PPS-API implementation. */ int pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) { pps_params_t *app; struct pps_fetch_args *fapi; #ifdef PPS_SYNC struct pps_kcbind_args *kapi; #endif KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); switch (cmd) { case PPS_IOC_CREATE: return (0); case PPS_IOC_DESTROY: return (0); case PPS_IOC_SETPARAMS: app = (pps_params_t *)data; if (app->mode & ~pps->ppscap) return (EINVAL); pps->ppsparam = *app; return (0); case PPS_IOC_GETPARAMS: app = (pps_params_t *)data; *app = pps->ppsparam; app->api_version = PPS_API_VERS_1; return (0); case PPS_IOC_GETCAP: *(int*)data = pps->ppscap; return (0); case PPS_IOC_FETCH: fapi = (struct pps_fetch_args *)data; if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) return (EOPNOTSUPP); pps->ppsinfo.current_mode = pps->ppsparam.mode; fapi->pps_info_buf = pps->ppsinfo; return (0); case PPS_IOC_KCBIND: #ifdef PPS_SYNC kapi = (struct pps_kcbind_args *)data; /* XXX Only root should be able to do this */ if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (kapi->kernel_consumer != PPS_KC_HARDPPS) return (EINVAL); if (kapi->edge & ~pps->ppscap) return (EINVAL); pps->kcmode = kapi->edge; return (0); #else return (EOPNOTSUPP); #endif default: return (ENOIOCTL); } } void pps_init(struct pps_state *pps) { pps->ppscap |= PPS_TSFMT_TSPEC; if (pps->ppscap & PPS_CAPTUREASSERT) pps->ppscap |= PPS_OFFSETASSERT; if (pps->ppscap & PPS_CAPTURECLEAR) pps->ppscap |= PPS_OFFSETCLEAR; } void pps_capture(struct pps_state *pps) { struct timehands *th; KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); th = timehands; pps->capgen = th->th_generation; pps->capth = th; pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); if (pps->capgen != th->th_generation) pps->capgen = 0; } void pps_event(struct pps_state *pps, int event) { struct bintime bt; struct timespec ts, *tsp, *osp; u_int tcount, *pcount; int foff, fhard; pps_seq_t *pseq; KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); /* If the timecounter was wound up underneath us, bail out. */ if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation) return; /* Things would be easier with arrays. */ if (event == PPS_CAPTUREASSERT) { tsp = &pps->ppsinfo.assert_timestamp; osp = &pps->ppsparam.assert_offset; foff = pps->ppsparam.mode & PPS_OFFSETASSERT; fhard = pps->kcmode & PPS_CAPTUREASSERT; pcount = &pps->ppscount[0]; pseq = &pps->ppsinfo.assert_sequence; } else { tsp = &pps->ppsinfo.clear_timestamp; osp = &pps->ppsparam.clear_offset; foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; fhard = pps->kcmode & PPS_CAPTURECLEAR; pcount = &pps->ppscount[1]; pseq = &pps->ppsinfo.clear_sequence; } /* * If the timecounter changed, we cannot compare the count values, so * we have to drop the rest of the PPS-stuff until the next event. */ if (pps->ppstc != pps->capth->th_counter) { pps->ppstc = pps->capth->th_counter; *pcount = pps->capcount; pps->ppscount[2] = pps->capcount; return; } /* Convert the count to a timespec. */ tcount = pps->capcount - pps->capth->th_offset_count; tcount &= pps->capth->th_counter->tc_counter_mask; bt = pps->capth->th_offset; bintime_addx(&bt, pps->capth->th_scale * tcount); bintime_add(&bt, &boottimebin); bintime2timespec(&bt, &ts); /* If the timecounter was wound up underneath us, bail out. */ if (pps->capgen != pps->capth->th_generation) return; *pcount = pps->capcount; (*pseq)++; *tsp = ts; if (foff) { timespecadd(tsp, osp); if (tsp->tv_nsec < 0) { tsp->tv_nsec += 1000000000; tsp->tv_sec -= 1; } } #ifdef PPS_SYNC if (fhard) { uint64_t scale; /* * Feed the NTP PLL/FLL. * The FLL wants to know how many (hardware) nanoseconds * elapsed since the previous event. */ tcount = pps->capcount - pps->ppscount[2]; pps->ppscount[2] = pps->capcount; tcount &= pps->capth->th_counter->tc_counter_mask; scale = (uint64_t)1 << 63; scale /= pps->capth->th_counter->tc_frequency; scale *= 2; bt.sec = 0; bt.frac = 0; bintime_addx(&bt, scale * tcount); bintime2timespec(&bt, &ts); hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); } #endif } /* * Timecounters need to be updated every so often to prevent the hardware * counter from overflowing. Updating also recalculates the cached values * used by the get*() family of functions, so their precision depends on * the update frequency. */ static int tc_tick; SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, "Approximate number of hardclock ticks in a millisecond"); void tc_ticktock(int cnt) { static int count; count += cnt; if (count < tc_tick) return; count = 0; tc_windup(); } static void inittimecounter(void *dummy) { u_int p; /* * Set the initial timeout to * max(1, ). * People should probably not use the sysctl to set the timeout * to smaller than its inital value, since that value is the * smallest reasonable one. If they want better timestamps they * should use the non-"get"* functions. */ if (hz > 1000) tc_tick = (hz + 500) / 1000; else tc_tick = 1; p = (tc_tick * 1000000) / hz; printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); /* warm up new timecounter (again) and get rolling. */ (void)timecounter->tc_get_timecount(timecounter); (void)timecounter->tc_get_timecount(timecounter); tc_windup(); } SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); /* Cpu tick handling -------------------------------------------------*/ static int cpu_tick_variable; static uint64_t cpu_tick_frequency; static uint64_t tc_cpu_ticks(void) { static uint64_t base; static unsigned last; unsigned u; struct timecounter *tc; tc = timehands->th_counter; u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; if (u < last) base += (uint64_t)tc->tc_counter_mask + 1; last = u; return (u + base); } void cpu_tick_calibration(void) { static time_t last_calib; if (time_uptime != last_calib && !(time_uptime & 0xf)) { cpu_tick_calibrate(0); last_calib = time_uptime; } } /* * This function gets called every 16 seconds on only one designated * CPU in the system from hardclock() via cpu_tick_calibration()(). * * Whenever the real time clock is stepped we get called with reset=1 * to make sure we handle suspend/resume and similar events correctly. */ static void cpu_tick_calibrate(int reset) { static uint64_t c_last; uint64_t c_this, c_delta; static struct bintime t_last; struct bintime t_this, t_delta; uint32_t divi; if (reset) { /* The clock was stepped, abort & reset */ t_last.sec = 0; return; } /* we don't calibrate fixed rate cputicks */ if (!cpu_tick_variable) return; getbinuptime(&t_this); c_this = cpu_ticks(); if (t_last.sec != 0) { c_delta = c_this - c_last; t_delta = t_this; bintime_sub(&t_delta, &t_last); /* * Headroom: * 2^(64-20) / 16[s] = * 2^(44) / 16[s] = * 17.592.186.044.416 / 16 = * 1.099.511.627.776 [Hz] */ divi = t_delta.sec << 20; divi |= t_delta.frac >> (64 - 20); c_delta <<= 20; c_delta /= divi; if (c_delta > cpu_tick_frequency) { if (0 && bootverbose) printf("cpu_tick increased to %ju Hz\n", c_delta); cpu_tick_frequency = c_delta; } } c_last = c_this; t_last = t_this; } void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) { if (func == NULL) { cpu_ticks = tc_cpu_ticks; } else { cpu_tick_frequency = freq; cpu_tick_variable = var; cpu_ticks = func; } } uint64_t cpu_tickrate(void) { if (cpu_ticks == tc_cpu_ticks) return (tc_getfrequency()); return (cpu_tick_frequency); } /* * We need to be slightly careful converting cputicks to microseconds. * There is plenty of margin in 64 bits of microseconds (half a million * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply * before divide conversion (to retain precision) we find that the * margin shrinks to 1.5 hours (one millionth of 146y). * With a three prong approach we never lose significant bits, no * matter what the cputick rate and length of timeinterval is. */ uint64_t cputick2usec(uint64_t tick) { if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ return (tick / (cpu_tickrate() / 1000000LL)); else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); else return ((tick * 1000000LL) / cpu_tickrate()); } cpu_tick_f *cpu_ticks = tc_cpu_ticks; Index: projects/binutils-2.17/sys/kern/subr_taskqueue.c =================================================================== --- projects/binutils-2.17/sys/kern/subr_taskqueue.c (revision 215829) +++ projects/binutils-2.17/sys/kern/subr_taskqueue.c (revision 215830) @@ -1,487 +1,485 @@ /*- * Copyright (c) 2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues"); static void *taskqueue_giant_ih; static void *taskqueue_ih; struct taskqueue_busy { struct task *tb_running; TAILQ_ENTRY(taskqueue_busy) tb_link; }; struct taskqueue { STAILQ_HEAD(, task) tq_queue; - const char *tq_name; taskqueue_enqueue_fn tq_enqueue; void *tq_context; TAILQ_HEAD(, taskqueue_busy) tq_active; struct mtx tq_mutex; struct thread **tq_threads; int tq_tcount; int tq_spin; int tq_flags; }; #define TQ_FLAGS_ACTIVE (1 << 0) #define TQ_FLAGS_BLOCKED (1 << 1) #define TQ_FLAGS_PENDING (1 << 2) #define TQ_LOCK(tq) \ do { \ if ((tq)->tq_spin) \ mtx_lock_spin(&(tq)->tq_mutex); \ else \ mtx_lock(&(tq)->tq_mutex); \ } while (0) #define TQ_UNLOCK(tq) \ do { \ if ((tq)->tq_spin) \ mtx_unlock_spin(&(tq)->tq_mutex); \ else \ mtx_unlock(&(tq)->tq_mutex); \ } while (0) static __inline int TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, int t) { if (tq->tq_spin) return (msleep_spin(p, m, wm, t)); return (msleep(p, m, pri, wm, t)); } static struct taskqueue * -_taskqueue_create(const char *name, int mflags, +_taskqueue_create(const char *name __unused, int mflags, taskqueue_enqueue_fn enqueue, void *context, int mtxflags, const char *mtxname) { struct taskqueue *queue; queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO); if (!queue) return NULL; STAILQ_INIT(&queue->tq_queue); TAILQ_INIT(&queue->tq_active); - queue->tq_name = name; queue->tq_enqueue = enqueue; queue->tq_context = context; queue->tq_spin = (mtxflags & MTX_SPIN) != 0; queue->tq_flags |= TQ_FLAGS_ACTIVE; mtx_init(&queue->tq_mutex, mtxname, NULL, mtxflags); return queue; } struct taskqueue * taskqueue_create(const char *name, int mflags, taskqueue_enqueue_fn enqueue, void *context) { return _taskqueue_create(name, mflags, enqueue, context, MTX_DEF, "taskqueue"); } /* * Signal a taskqueue thread to terminate. */ static void taskqueue_terminate(struct thread **pp, struct taskqueue *tq) { while (tq->tq_tcount > 0) { wakeup(tq); TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); } } void taskqueue_free(struct taskqueue *queue) { TQ_LOCK(queue); queue->tq_flags &= ~TQ_FLAGS_ACTIVE; taskqueue_terminate(queue->tq_threads, queue); KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); mtx_destroy(&queue->tq_mutex); free(queue->tq_threads, M_TASKQUEUE); free(queue, M_TASKQUEUE); } int taskqueue_enqueue(struct taskqueue *queue, struct task *task) { struct task *ins; struct task *prev; TQ_LOCK(queue); /* * Count multiple enqueues. */ if (task->ta_pending) { task->ta_pending++; TQ_UNLOCK(queue); return 0; } /* * Optimise the case when all tasks have the same priority. */ prev = STAILQ_LAST(&queue->tq_queue, task, ta_link); if (!prev || prev->ta_priority >= task->ta_priority) { STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link); } else { prev = NULL; for (ins = STAILQ_FIRST(&queue->tq_queue); ins; prev = ins, ins = STAILQ_NEXT(ins, ta_link)) if (ins->ta_priority < task->ta_priority) break; if (prev) STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link); else STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link); } task->ta_pending = 1; if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) queue->tq_enqueue(queue->tq_context); else queue->tq_flags |= TQ_FLAGS_PENDING; TQ_UNLOCK(queue); return 0; } void taskqueue_block(struct taskqueue *queue) { TQ_LOCK(queue); queue->tq_flags |= TQ_FLAGS_BLOCKED; TQ_UNLOCK(queue); } void taskqueue_unblock(struct taskqueue *queue) { TQ_LOCK(queue); queue->tq_flags &= ~TQ_FLAGS_BLOCKED; if (queue->tq_flags & TQ_FLAGS_PENDING) { queue->tq_flags &= ~TQ_FLAGS_PENDING; queue->tq_enqueue(queue->tq_context); } TQ_UNLOCK(queue); } static void taskqueue_run_locked(struct taskqueue *queue) { struct taskqueue_busy tb; struct task *task; int pending; mtx_assert(&queue->tq_mutex, MA_OWNED); tb.tb_running = NULL; TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link); while (STAILQ_FIRST(&queue->tq_queue)) { /* * Carefully remove the first task from the queue and * zero its pending count. */ task = STAILQ_FIRST(&queue->tq_queue); STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); pending = task->ta_pending; task->ta_pending = 0; tb.tb_running = task; TQ_UNLOCK(queue); task->ta_func(task->ta_context, pending); TQ_LOCK(queue); tb.tb_running = NULL; wakeup(task); } TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); } void taskqueue_run(struct taskqueue *queue) { TQ_LOCK(queue); taskqueue_run_locked(queue); TQ_UNLOCK(queue); } static int task_is_running(struct taskqueue *queue, struct task *task) { struct taskqueue_busy *tb; mtx_assert(&queue->tq_mutex, MA_OWNED); TAILQ_FOREACH(tb, &queue->tq_active, tb_link) { if (tb->tb_running == task) return (1); } return (0); } int taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp) { u_int pending; int error; TQ_LOCK(queue); if ((pending = task->ta_pending) > 0) STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link); task->ta_pending = 0; error = task_is_running(queue, task) ? EBUSY : 0; TQ_UNLOCK(queue); if (pendp != NULL) *pendp = pending; return (error); } void taskqueue_drain(struct taskqueue *queue, struct task *task) { if (!queue->tq_spin) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); TQ_LOCK(queue); while (task->ta_pending != 0 || task_is_running(queue, task)) TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0); TQ_UNLOCK(queue); } static void taskqueue_swi_enqueue(void *context) { swi_sched(taskqueue_ih, 0); } static void taskqueue_swi_run(void *dummy) { taskqueue_run(taskqueue_swi); } static void taskqueue_swi_giant_enqueue(void *context) { swi_sched(taskqueue_giant_ih, 0); } static void taskqueue_swi_giant_run(void *dummy) { taskqueue_run(taskqueue_swi_giant); } int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri, const char *name, ...) { va_list ap; struct thread *td; struct taskqueue *tq; int i, error; char ktname[MAXCOMLEN + 1]; if (count <= 0) return (EINVAL); tq = *tqp; va_start(ap, name); vsnprintf(ktname, sizeof(ktname), name, ap); va_end(ap); tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE, M_NOWAIT | M_ZERO); if (tq->tq_threads == NULL) { printf("%s: no memory for %s threads\n", __func__, ktname); return (ENOMEM); } for (i = 0; i < count; i++) { if (count == 1) error = kthread_add(taskqueue_thread_loop, tqp, NULL, &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname); else error = kthread_add(taskqueue_thread_loop, tqp, NULL, &tq->tq_threads[i], RFSTOPPED, 0, "%s_%d", ktname, i); if (error) { /* should be ok to continue, taskqueue_free will dtrt */ printf("%s: kthread_add(%s): error %d", __func__, ktname, error); tq->tq_threads[i] = NULL; /* paranoid */ } else tq->tq_tcount++; } for (i = 0; i < count; i++) { if (tq->tq_threads[i] == NULL) continue; td = tq->tq_threads[i]; thread_lock(td); sched_prio(td, pri); sched_add(td, SRQ_BORING); thread_unlock(td); } return (0); } void taskqueue_thread_loop(void *arg) { struct taskqueue **tqp, *tq; tqp = arg; tq = *tqp; TQ_LOCK(tq); while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { taskqueue_run_locked(tq); /* * Because taskqueue_run() can drop tq_mutex, we need to * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the * meantime, which means we missed a wakeup. */ if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) break; TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); } taskqueue_run_locked(tq); /* rendezvous with thread that asked us to terminate */ tq->tq_tcount--; wakeup_one(tq->tq_threads); TQ_UNLOCK(tq); kthread_exit(); } void taskqueue_thread_enqueue(void *context) { struct taskqueue **tqp, *tq; tqp = context; tq = *tqp; mtx_assert(&tq->tq_mutex, MA_OWNED); wakeup_one(tq); } TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL, swi_add(NULL, "task queue", taskqueue_swi_run, NULL, SWI_TQ, INTR_MPSAFE, &taskqueue_ih)); TASKQUEUE_DEFINE(swi_giant, taskqueue_swi_giant_enqueue, NULL, swi_add(NULL, "Giant taskq", taskqueue_swi_giant_run, NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih)); TASKQUEUE_DEFINE_THREAD(thread); struct taskqueue * taskqueue_create_fast(const char *name, int mflags, taskqueue_enqueue_fn enqueue, void *context) { return _taskqueue_create(name, mflags, enqueue, context, MTX_SPIN, "fast_taskqueue"); } /* NB: for backwards compatibility */ int taskqueue_enqueue_fast(struct taskqueue *queue, struct task *task) { return taskqueue_enqueue(queue, task); } static void *taskqueue_fast_ih; static void taskqueue_fast_enqueue(void *context) { swi_sched(taskqueue_fast_ih, 0); } static void taskqueue_fast_run(void *dummy) { taskqueue_run(taskqueue_fast); } TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL, swi_add(NULL, "Fast task queue", taskqueue_fast_run, NULL, SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih)); int taskqueue_member(struct taskqueue *queue, struct thread *td) { int i, j, ret = 0; TQ_LOCK(queue); for (i = 0, j = 0; ; i++) { if (queue->tq_threads[i] == NULL) continue; if (queue->tq_threads[i] == td) { ret = 1; break; } if (++j >= queue->tq_tcount) break; } TQ_UNLOCK(queue); return (ret); } Index: projects/binutils-2.17/sys/kern/vfs_mount.c =================================================================== --- projects/binutils-2.17/sys/kern/vfs_mount.c (revision 215829) +++ projects/binutils-2.17/sys/kern/vfs_mount.c (revision 215830) @@ -1,1972 +1,1974 @@ /*- * Copyright (c) 1999-2004 Poul-Henning Kamp * Copyright (c) 1999 Michael Smith * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) static int vfs_domount(struct thread *td, const char *fstype, char *fspath, int fsflags, void *fsdata); static void free_mntarg(struct mntarg *ma); static int usermount = 0; SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "Unprivileged users may mount and unmount file systems"); MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure"); MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); static uma_zone_t mount_zone; /* List of mounted filesystems. */ struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* For any iteration/modification of mountlist */ struct mtx mountlist_mtx; MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF); /* * Global opts, taken by all filesystems */ static const char *global_opts[] = { "errmsg", "fstype", "fspath", "ro", "rw", "nosuid", "noexec", NULL }; static int mount_init(void *mem, int size, int flags) { struct mount *mp; mp = (struct mount *)mem; mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); return (0); } static void mount_fini(void *mem, int size) { struct mount *mp; mp = (struct mount *)mem; lockdestroy(&mp->mnt_explock); mtx_destroy(&mp->mnt_mtx); } static void vfs_mount_init(void *dummy __unused) { mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL, NULL, mount_init, mount_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); } SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL); /* * --------------------------------------------------------------------- * Functions for building and sanitizing the mount options */ /* Remove one mount option. */ static void vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt) { TAILQ_REMOVE(opts, opt, link); free(opt->name, M_MOUNT); if (opt->value != NULL) free(opt->value, M_MOUNT); free(opt, M_MOUNT); } /* Release all resources related to the mount options. */ void vfs_freeopts(struct vfsoptlist *opts) { struct vfsopt *opt; while (!TAILQ_EMPTY(opts)) { opt = TAILQ_FIRST(opts); vfs_freeopt(opts, opt); } free(opts, M_MOUNT); } void vfs_deleteopt(struct vfsoptlist *opts, const char *name) { struct vfsopt *opt, *temp; if (opts == NULL) return; TAILQ_FOREACH_SAFE(opt, opts, link, temp) { if (strcmp(opt->name, name) == 0) vfs_freeopt(opts, opt); } } /* * Check if options are equal (with or without the "no" prefix). */ static int vfs_equalopts(const char *opt1, const char *opt2) { char *p; /* "opt" vs. "opt" or "noopt" vs. "noopt" */ if (strcmp(opt1, opt2) == 0) return (1); /* "noopt" vs. "opt" */ if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) return (1); /* "opt" vs. "noopt" */ if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) return (1); while ((p = strchr(opt1, '.')) != NULL && !strncmp(opt1, opt2, ++p - opt1)) { opt2 += p - opt1; opt1 = p; /* "foo.noopt" vs. "foo.opt" */ if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0) return (1); /* "foo.opt" vs. "foo.noopt" */ if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0) return (1); } return (0); } /* * If a mount option is specified several times, * (with or without the "no" prefix) only keep * the last occurence of it. */ static void vfs_sanitizeopts(struct vfsoptlist *opts) { struct vfsopt *opt, *opt2, *tmp; TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) { opt2 = TAILQ_PREV(opt, vfsoptlist, link); while (opt2 != NULL) { if (vfs_equalopts(opt->name, opt2->name)) { tmp = TAILQ_PREV(opt2, vfsoptlist, link); vfs_freeopt(opts, opt2); opt2 = tmp; } else { opt2 = TAILQ_PREV(opt2, vfsoptlist, link); } } } } /* * Build a linked list of mount options from a struct uio. */ int vfs_buildopts(struct uio *auio, struct vfsoptlist **options) { struct vfsoptlist *opts; struct vfsopt *opt; size_t memused, namelen, optlen; unsigned int i, iovcnt; int error; opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK); TAILQ_INIT(opts); memused = 0; iovcnt = auio->uio_iovcnt; for (i = 0; i < iovcnt; i += 2) { namelen = auio->uio_iov[i].iov_len; optlen = auio->uio_iov[i + 1].iov_len; memused += sizeof(struct vfsopt) + optlen + namelen; /* * Avoid consuming too much memory, and attempts to overflow * memused. */ if (memused > VFS_MOUNTARG_SIZE_MAX || optlen > VFS_MOUNTARG_SIZE_MAX || namelen > VFS_MOUNTARG_SIZE_MAX) { error = EINVAL; goto bad; } opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); opt->name = malloc(namelen, M_MOUNT, M_WAITOK); opt->value = NULL; opt->len = 0; opt->pos = i / 2; opt->seen = 0; /* * Do this early, so jumps to "bad" will free the current * option. */ TAILQ_INSERT_TAIL(opts, opt, link); if (auio->uio_segflg == UIO_SYSSPACE) { bcopy(auio->uio_iov[i].iov_base, opt->name, namelen); } else { error = copyin(auio->uio_iov[i].iov_base, opt->name, namelen); if (error) goto bad; } /* Ensure names are null-terminated strings. */ if (namelen == 0 || opt->name[namelen - 1] != '\0') { error = EINVAL; goto bad; } if (optlen != 0) { opt->len = optlen; opt->value = malloc(optlen, M_MOUNT, M_WAITOK); if (auio->uio_segflg == UIO_SYSSPACE) { bcopy(auio->uio_iov[i + 1].iov_base, opt->value, optlen); } else { error = copyin(auio->uio_iov[i + 1].iov_base, opt->value, optlen); if (error) goto bad; } } } vfs_sanitizeopts(opts); *options = opts; return (0); bad: vfs_freeopts(opts); return (error); } /* * Merge the old mount options with the new ones passed * in the MNT_UPDATE case. * * XXX This function will keep a "nofoo" option in the * new options if there is no matching "foo" option * to be cancelled in the old options. This is a bug * if the option's canonical name is "foo". E.g., "noro" * shouldn't end up in the mount point's active options, * but it can. */ static void vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts) { struct vfsopt *opt, *opt2, *new; TAILQ_FOREACH(opt, opts, link) { /* * Check that this option hasn't been redefined * nor cancelled with a "no" mount option. */ opt2 = TAILQ_FIRST(toopts); while (opt2 != NULL) { if (strcmp(opt2->name, opt->name) == 0) goto next; if (strncmp(opt2->name, "no", 2) == 0 && strcmp(opt2->name + 2, opt->name) == 0) { vfs_freeopt(toopts, opt2); goto next; } opt2 = TAILQ_NEXT(opt2, link); } /* We want this option, duplicate it. */ new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK); strcpy(new->name, opt->name); if (opt->len != 0) { new->value = malloc(opt->len, M_MOUNT, M_WAITOK); bcopy(opt->value, new->value, opt->len); } else { new->value = NULL; } new->len = opt->len; new->seen = opt->seen; TAILQ_INSERT_TAIL(toopts, new, link); next: continue; } } /* * Mount a filesystem. */ int nmount(td, uap) struct thread *td; struct nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap; { struct uio *auio; int error; u_int iovcnt; AUDIT_ARG_FFLAGS(uap->flags); CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__, uap->iovp, uap->iovcnt, uap->flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. - * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). + * MNT_ROOTFS should only be set by the kernel when mounting its + * root file system. */ uap->flags &= ~MNT_ROOTFS; iovcnt = uap->iovcnt; /* * Check that we have an even number of iovec's * and that we have at least two options. */ if ((iovcnt & 1) || (iovcnt < 4)) { CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__, uap->iovcnt); return (EINVAL); } error = copyinuio(uap->iovp, iovcnt, &auio); if (error) { CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno", __func__, error); return (error); } error = vfs_donmount(td, uap->flags, auio); free(auio, M_IOV); return (error); } /* * --------------------------------------------------------------------- * Various utility functions */ void vfs_ref(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); } void vfs_rel(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); } /* * Allocate and initialize the mount point struct. */ struct mount * vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, struct ucred *cred) { struct mount *mp; mp = uma_zalloc(mount_zone, M_WAITOK); bzero(&mp->mnt_startzero, __rangeof(struct mount, mnt_startzero, mnt_endzero)); TAILQ_INIT(&mp->mnt_nvnodelist); mp->mnt_nvnodelistsize = 0; mp->mnt_ref = 0; (void) vfs_busy(mp, MBF_NOWAIT); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; /* XXX Unlocked */ mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_gen++; strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); mp->mnt_vnodecovered = vp; mp->mnt_cred = crdup(cred); mp->mnt_stat.f_owner = cred->cr_uid; strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); mp->mnt_iosize_max = DFLTPHYS; #ifdef MAC mac_mount_init(mp); mac_mount_create(cred, mp); #endif arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); return (mp); } /* * Destroy the mount struct previously allocated by vfs_mount_alloc(). */ void vfs_mount_destroy(struct mount *mp) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_REFEXPIRE; if (mp->mnt_kern_flag & MNTK_MWAIT) { mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); } while (mp->mnt_ref) msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); KASSERT(mp->mnt_ref == 0, ("%s: invalid refcount in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); if (mp->mnt_writeopcount != 0) panic("vfs_mount_destroy: nonzero writeopcount"); if (mp->mnt_secondary_writes != 0) panic("vfs_mount_destroy: nonzero secondary_writes"); mp->mnt_vfc->vfc_refcount--; if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) { struct vnode *vp; TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) vprint("", vp); panic("unmount: dangling vnode"); } if (mp->mnt_nvnodelistsize != 0) panic("vfs_mount_destroy: nonzero nvnodelistsize"); if (mp->mnt_lockref != 0) panic("vfs_mount_destroy: nonzero lock refcount"); MNT_IUNLOCK(mp); #ifdef MAC mac_mount_destroy(mp); #endif if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); crfree(mp->mnt_cred); uma_zfree(mount_zone, mp); } int vfs_donmount(struct thread *td, int fsflags, struct uio *fsoptions) { struct vfsoptlist *optlist; struct vfsopt *opt, *noro_opt, *tmp_opt; char *fstype, *fspath, *errmsg; int error, fstypelen, fspathlen, errmsg_len, errmsg_pos; int has_rw, has_noro; errmsg = fspath = NULL; errmsg_len = has_noro = has_rw = fspathlen = 0; errmsg_pos = -1; error = vfs_buildopts(fsoptions, &optlist); if (error) return (error); if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0) errmsg_pos = vfs_getopt_pos(optlist, "errmsg"); /* * We need these two options before the others, * and they are mandatory for any filesystem. * Ensure they are NUL terminated as well. */ fstypelen = 0; error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); if (error || fstype[fstypelen - 1] != '\0') { error = EINVAL; if (errmsg != NULL) strncpy(errmsg, "Invalid fstype", errmsg_len); goto bail; } fspathlen = 0; error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); if (error || fspath[fspathlen - 1] != '\0') { error = EINVAL; if (errmsg != NULL) strncpy(errmsg, "Invalid fspath", errmsg_len); goto bail; } /* * We need to see if we have the "update" option * before we call vfs_domount(), since vfs_domount() has special * logic based on MNT_UPDATE. This is very important * when we want to update the root filesystem. */ TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) { if (strcmp(opt->name, "update") == 0) { fsflags |= MNT_UPDATE; vfs_freeopt(optlist, opt); } else if (strcmp(opt->name, "async") == 0) fsflags |= MNT_ASYNC; else if (strcmp(opt->name, "force") == 0) { fsflags |= MNT_FORCE; vfs_freeopt(optlist, opt); } else if (strcmp(opt->name, "reload") == 0) { fsflags |= MNT_RELOAD; vfs_freeopt(optlist, opt); } else if (strcmp(opt->name, "multilabel") == 0) fsflags |= MNT_MULTILABEL; else if (strcmp(opt->name, "noasync") == 0) fsflags &= ~MNT_ASYNC; else if (strcmp(opt->name, "noatime") == 0) fsflags |= MNT_NOATIME; else if (strcmp(opt->name, "atime") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoatime", M_MOUNT); } else if (strcmp(opt->name, "noclusterr") == 0) fsflags |= MNT_NOCLUSTERR; else if (strcmp(opt->name, "clusterr") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoclusterr", M_MOUNT); } else if (strcmp(opt->name, "noclusterw") == 0) fsflags |= MNT_NOCLUSTERW; else if (strcmp(opt->name, "clusterw") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoclusterw", M_MOUNT); } else if (strcmp(opt->name, "noexec") == 0) fsflags |= MNT_NOEXEC; else if (strcmp(opt->name, "exec") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonoexec", M_MOUNT); } else if (strcmp(opt->name, "nosuid") == 0) fsflags |= MNT_NOSUID; else if (strcmp(opt->name, "suid") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonosuid", M_MOUNT); } else if (strcmp(opt->name, "nosymfollow") == 0) fsflags |= MNT_NOSYMFOLLOW; else if (strcmp(opt->name, "symfollow") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("nonosymfollow", M_MOUNT); } else if (strcmp(opt->name, "noro") == 0) { fsflags &= ~MNT_RDONLY; has_noro = 1; } else if (strcmp(opt->name, "rw") == 0) { fsflags &= ~MNT_RDONLY; has_rw = 1; } else if (strcmp(opt->name, "ro") == 0) fsflags |= MNT_RDONLY; else if (strcmp(opt->name, "rdonly") == 0) { free(opt->name, M_MOUNT); opt->name = strdup("ro", M_MOUNT); fsflags |= MNT_RDONLY; } else if (strcmp(opt->name, "suiddir") == 0) fsflags |= MNT_SUIDDIR; else if (strcmp(opt->name, "sync") == 0) fsflags |= MNT_SYNCHRONOUS; else if (strcmp(opt->name, "union") == 0) fsflags |= MNT_UNION; } /* * If "rw" was specified as a mount option, and we * are trying to update a mount-point from "ro" to "rw", * we need a mount option "noro", since in vfs_mergeopts(), * "noro" will cancel "ro", but "rw" will not do anything. */ if (has_rw && !has_noro) { noro_opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK); noro_opt->name = strdup("noro", M_MOUNT); noro_opt->value = NULL; noro_opt->len = 0; noro_opt->pos = -1; noro_opt->seen = 1; TAILQ_INSERT_TAIL(optlist, noro_opt, link); } /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { error = ENAMETOOLONG; goto bail; } error = vfs_domount(td, fstype, fspath, fsflags, optlist); bail: /* copyout the errmsg */ if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt) && errmsg_len > 0 && errmsg != NULL) { if (fsoptions->uio_segflg == UIO_SYSSPACE) { bcopy(errmsg, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); } else { copyout(errmsg, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base, fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len); } } if (error != 0) vfs_freeopts(optlist); return (error); } /* * Old mount API. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(td, uap) struct thread *td; struct mount_args /* { char *type; char *path; int flags; caddr_t data; } */ *uap; { char *fstype; struct vfsconf *vfsp = NULL; struct mntarg *ma = NULL; int error; AUDIT_ARG_FFLAGS(uap->flags); /* * Filter out MNT_ROOTFS. We do not want clients of mount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. - * MNT_ROOTFS should only be set in the kernel in vfs_mountroot_try(). + * MNT_ROOTFS should only be set by the kernel when mounting its + * root file system. */ uap->flags &= ~MNT_ROOTFS; fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL); if (error) { free(fstype, M_TEMP); return (error); } AUDIT_ARG_TEXT(fstype); mtx_lock(&Giant); vfsp = vfs_byname_kld(fstype, td, &error); free(fstype, M_TEMP); if (vfsp == NULL) { mtx_unlock(&Giant); return (ENOENT); } if (vfsp->vfc_vfsops->vfs_cmount == NULL) { mtx_unlock(&Giant); return (EOPNOTSUPP); } ma = mount_argsu(ma, "fstype", uap->type, MNAMELEN); ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN); ma = mount_argb(ma, uap->flags & MNT_RDONLY, "noro"); ma = mount_argb(ma, !(uap->flags & MNT_NOSUID), "nosuid"); ma = mount_argb(ma, !(uap->flags & MNT_NOEXEC), "noexec"); error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, uap->flags); mtx_unlock(&Giant); return (error); } /* * vfs_domount_first(): first file system mount (not update) */ static int vfs_domount_first( struct thread *td, /* Calling thread. */ struct vfsconf *vfsp, /* File system type. */ char *fspath, /* Mount path. */ struct vnode *vp, /* Vnode to be covered. */ int fsflags, /* Flags common to all filesystems. */ void *fsdata /* Options local to the filesystem. */ ) { struct vattr va; struct mount *mp; struct vnode *newdp; int error; mtx_assert(&Giant, MA_OWNED); ASSERT_VOP_ELOCKED(vp, __func__); KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here")); /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ error = VOP_GETATTR(vp, &va, td->td_ucred); if (error == 0 && va.va_uid != td->td_ucred->cr_uid) error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN, 0); if (error == 0) error = vinvalbuf(vp, V_SAVE, 0, 0); if (error == 0 && vp->v_type != VDIR) error = ENOTDIR; if (error == 0) { VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL) vp->v_iflag |= VI_MOUNT; else error = EBUSY; VI_UNLOCK(vp); } if (error != 0) { vput(vp); return (error); } VOP_UNLOCK(vp, 0); /* Allocate and initialize the filesystem. */ mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); /* XXXMAC: pass to vfs_mount_alloc? */ mp->mnt_optnew = fsdata; /* Set the mount level flags. */ mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY)); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ error = VFS_MOUNT(mp); if (error != 0) { vfs_unbusy(mp); vfs_mount_destroy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vrele(vp); return (error); } if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); mp->mnt_opt = mp->mnt_optnew; (void)VFS_STATFS(mp, &mp->mnt_stat); /* * Prevent external consumers of mount options from reading mnt_optnew. */ mp->mnt_optnew = NULL; MNT_ILOCK(mp); if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; else mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); cache_purge(vp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vp->v_mountedhere = mp; /* Place the new filesystem at the end of the mount list. */ mtx_lock(&mountlist_mtx); TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) panic("mount: lost mount"); VOP_UNLOCK(newdp, 0); VOP_UNLOCK(vp, 0); mountcheckdirs(vp, newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); vfs_unbusy(mp); return (0); } /* * vfs_domount_update(): update of mounted file system */ static int vfs_domount_update( struct thread *td, /* Calling thread. */ struct vnode *vp, /* Mount point vnode. */ int fsflags, /* Flags common to all filesystems. */ void *fsdata /* Options local to the filesystem. */ ) { struct oexport_args oexport; struct export_args export; struct mount *mp; int error, flag; mtx_assert(&Giant, MA_OWNED); ASSERT_VOP_ELOCKED(vp, __func__); KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here")); if ((vp->v_vflag & VV_ROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ flag = mp->mnt_flag; if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } /* * Only privileged root, or (if MNT_USER is set) the user that * did the original mount is permitted to update it. */ error = vfs_suser(mp, td); if (error != 0) { vput(vp); return (error); } if (vfs_busy(mp, MBF_NOWAIT)) { vput(vp); return (EBUSY); } VI_LOCK(vp); if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) { VI_UNLOCK(vp); vfs_unbusy(mp); vput(vp); return (EBUSY); } vp->v_iflag |= VI_MOUNT; VI_UNLOCK(vp); VOP_UNLOCK(vp, 0); MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_UPDATEMASK; mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY); if ((mp->mnt_flag & MNT_ASYNC) == 0) mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); mp->mnt_optnew = fsdata; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); /* * Mount the filesystem. * XXX The final recipients of VFS_MOUNT just overwrite the ndp they * get. No freeing of cn_pnbuf. */ error = VFS_MOUNT(mp); if (error == 0) { /* Process the export option. */ if (vfs_copyopt(mp->mnt_optnew, "export", &export, sizeof(export)) == 0) { error = vfs_export(mp, &export); } else if (vfs_copyopt(mp->mnt_optnew, "export", &oexport, sizeof(oexport)) == 0) { export.ex_flags = oexport.ex_flags; export.ex_root = oexport.ex_root; export.ex_anon = oexport.ex_anon; export.ex_addr = oexport.ex_addr; export.ex_addrlen = oexport.ex_addrlen; export.ex_mask = oexport.ex_mask; export.ex_masklen = oexport.ex_masklen; export.ex_indexfile = oexport.ex_indexfile; export.ex_numsecflavors = 0; error = vfs_export(mp, &export); } } MNT_ILOCK(mp); if (error == 0) { mp->mnt_flag &= ~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); } else { /* * If we fail, restore old mount flags. MNT_QUOTA is special, * because it is not part of MNT_UPDATEMASK, but it could have * changed in the meantime if quotactl(2) was called. * All in all we want current value of MNT_QUOTA, not the old * one. */ mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA); } if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; else mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); if (error != 0) goto end; if (mp->mnt_opt != NULL) vfs_freeopts(mp->mnt_opt); mp->mnt_opt = mp->mnt_optnew; (void)VFS_STATFS(mp, &mp->mnt_stat); /* * Prevent external consumers of mount options from reading * mnt_optnew. */ mp->mnt_optnew = NULL; if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); else vfs_deallocate_syncvnode(mp); end: vfs_unbusy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); vrele(vp); return (error); } /* * vfs_domount(): actually attempt a filesystem mount. */ static int vfs_domount( struct thread *td, /* Calling thread. */ const char *fstype, /* Filesystem type. */ char *fspath, /* Mount path. */ int fsflags, /* Flags common to all filesystems. */ void *fsdata /* Options local to the filesystem. */ ) { struct vfsconf *vfsp; struct nameidata nd; struct vnode *vp; int error; /* * Be ultra-paranoid about making sure the type and fspath * variables will fit in our mp buffers, including the * terminating NUL. */ if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) return (ENAMETOOLONG); if (jailed(td->td_ucred) || usermount == 0) { if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0) return (error); } /* * Do not allow NFS export or MNT_SUIDDIR by unprivileged users. */ if (fsflags & MNT_EXPORTED) { error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED); if (error) return (error); } if (fsflags & MNT_SUIDDIR) { error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR); if (error) return (error); } /* * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users. */ if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) { if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0) fsflags |= MNT_NOSUID | MNT_USER; } /* Load KLDs before we lock the covered vnode to avoid reversals. */ vfsp = NULL; if ((fsflags & MNT_UPDATE) == 0) { /* Don't try to load KLDs if we're mounting the root. */ if (fsflags & MNT_ROOTFS) vfsp = vfs_byname(fstype); else vfsp = vfs_byname_kld(fstype, td, &error); if (vfsp == NULL) return (ENODEV); if (jailed(td->td_ucred) && !(vfsp->vfc_flags & VFCF_JAIL)) return (EPERM); } /* * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, fspath, td); error = namei(&nd); if (error != 0) return (error); if (!NDHASGIANT(&nd)) mtx_lock(&Giant); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if ((fsflags & MNT_UPDATE) == 0) { error = vfs_domount_first(td, vfsp, fspath, vp, fsflags, fsdata); } else { error = vfs_domount_update(td, vp, fsflags, fsdata); } mtx_unlock(&Giant); ASSERT_VI_UNLOCKED(vp, __func__); ASSERT_VOP_UNLOCKED(vp, __func__); return (error); } /* * Unmount a filesystem. * * Note: unmount takes a path to the vnode mounted on as argument, not * special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(td, uap) struct thread *td; register struct unmount_args /* { char *path; int flags; } */ *uap; { struct mount *mp; char *pathbuf; int error, id0, id1; AUDIT_ARG_VALUE(uap->flags); if (jailed(td->td_ucred) || usermount == 0) { error = priv_check(td, PRIV_VFS_UNMOUNT); if (error) return (error); } pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(uap->path, pathbuf, MNAMELEN, NULL); if (error) { free(pathbuf, M_TEMP); return (error); } mtx_lock(&Giant); if (uap->flags & MNT_BYFSID) { AUDIT_ARG_TEXT(pathbuf); /* Decode the filesystem ID. */ if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) { mtx_unlock(&Giant); free(pathbuf, M_TEMP); return (EINVAL); } mtx_lock(&mountlist_mtx); TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == id0 && mp->mnt_stat.f_fsid.val[1] == id1) break; } mtx_unlock(&mountlist_mtx); } else { AUDIT_ARG_UPATH1(td, pathbuf); mtx_lock(&mountlist_mtx); TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) break; } mtx_unlock(&mountlist_mtx); } free(pathbuf, M_TEMP); if (mp == NULL) { /* * Previously we returned ENOENT for a nonexistent path and * EINVAL for a non-mountpoint. We cannot tell these apart * now, so in the !MNT_BYFSID case return the more likely * EINVAL for compatibility. */ mtx_unlock(&Giant); return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL); } /* * Don't allow unmounting the root filesystem. */ if (mp->mnt_flag & MNT_ROOTFS) { mtx_unlock(&Giant); return (EINVAL); } error = dounmount(mp, uap->flags, td); mtx_unlock(&Giant); return (error); } /* * Do the actual filesystem unmount. */ int dounmount(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct vnode *coveredvp, *fsrootvp; int error; int async_flag; int mnt_gen_r; mtx_assert(&Giant, MA_OWNED); if ((coveredvp = mp->mnt_vnodecovered) != NULL) { mnt_gen_r = mp->mnt_gen; VI_LOCK(coveredvp); vholdl(coveredvp); vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); vdrop(coveredvp); /* * Check for mp being unmounted while waiting for the * covered vnode lock. */ if (coveredvp->v_mountedhere != mp || coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) { VOP_UNLOCK(coveredvp, 0); return (EBUSY); } } /* * Only privileged root, or (if MNT_USER is set) the user that did the * original mount is permitted to unmount this filesystem. */ error = vfs_suser(mp, td); if (error) { if (coveredvp) VOP_UNLOCK(coveredvp, 0); return (error); } MNT_ILOCK(mp); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT | MNTK_NOINSMNTQ; /* Allow filesystems to detect that a forced unmount is in progress. */ if (flags & MNT_FORCE) mp->mnt_kern_flag |= MNTK_UNMOUNTF; error = 0; if (mp->mnt_lockref) { if ((flags & MNT_FORCE) == 0) { mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ | MNTK_UNMOUNTF); if (mp->mnt_kern_flag & MNTK_MWAIT) { mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); } MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); return (EBUSY); } mp->mnt_kern_flag |= MNTK_DRAINING; error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, "mount drain", 0); } MNT_IUNLOCK(mp); KASSERT(mp->mnt_lockref == 0, ("%s: invalid lock refcount in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); KASSERT(error == 0, ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); vn_start_write(NULL, &mp, V_WAIT); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); MNT_ILOCK(mp); async_flag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); cache_purgevfs(mp); /* remove cache entries for this file sys */ vfs_deallocate_syncvnode(mp); /* * For forced unmounts, move process cdir/rdir refs on the fs root * vnode to the covered vnode. For non-forced unmounts we want * such references to cause an EBUSY error. */ if ((flags & MNT_FORCE) && VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) { if (mp->mnt_vnodecovered != NULL) mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); if (fsrootvp == rootvnode) { vrele(rootvnode); rootvnode = NULL; } vput(fsrootvp); } if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || (flags & MNT_FORCE) != 0) error = VFS_UNMOUNT(mp, flags); vn_finished_write(mp); /* * If we failed to flush the dirty blocks for this mount point, * undo all the cdir/rdir and rootvnode changes we made above. * Unless we failed to do so because the device is reporting that * it doesn't exist anymore. */ if (error && error != ENXIO) { if ((flags & MNT_FORCE) && VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) { if (mp->mnt_vnodecovered != NULL) mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); if (rootvnode == NULL) { rootvnode = fsrootvp; vref(rootvnode); } vput(fsrootvp); } MNT_ILOCK(mp); mp->mnt_kern_flag &= ~MNTK_NOINSMNTQ; if ((mp->mnt_flag & MNT_RDONLY) == 0) { MNT_IUNLOCK(mp); vfs_allocate_syncvnode(mp); MNT_ILOCK(mp); } mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); mp->mnt_flag |= async_flag; if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; if (mp->mnt_kern_flag & MNTK_MWAIT) { mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); } MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); return (error); } mtx_lock(&mountlist_mtx); TAILQ_REMOVE(&mountlist, mp, mnt_list); mtx_unlock(&mountlist_mtx); if (coveredvp != NULL) { coveredvp->v_mountedhere = NULL; vput(coveredvp); } vfs_event_signal(NULL, VQ_UNMOUNT, 0); vfs_mount_destroy(mp); return (0); } /* * Report errors during filesystem mounting. */ void vfs_mount_error(struct mount *mp, const char *fmt, ...) { struct vfsoptlist *moptlist = mp->mnt_optnew; va_list ap; int error, len; char *errmsg; error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len); if (error || errmsg == NULL || len <= 0) return; va_start(ap, fmt); vsnprintf(errmsg, (size_t)len, fmt, ap); va_end(ap); } void vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...) { va_list ap; int error, len; char *errmsg; error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len); if (error || errmsg == NULL || len <= 0) return; va_start(ap, fmt); vsnprintf(errmsg, (size_t)len, fmt, ap); va_end(ap); } /* * --------------------------------------------------------------------- * Functions for querying mount options/arguments from filesystems. */ /* * Check that no unknown options are given */ int vfs_filteropt(struct vfsoptlist *opts, const char **legal) { struct vfsopt *opt; char errmsg[255]; const char **t, *p, *q; int ret = 0; TAILQ_FOREACH(opt, opts, link) { p = opt->name; q = NULL; if (p[0] == 'n' && p[1] == 'o') q = p + 2; for(t = global_opts; *t != NULL; t++) { if (strcmp(*t, p) == 0) break; if (q != NULL) { if (strcmp(*t, q) == 0) break; } } if (*t != NULL) continue; for(t = legal; *t != NULL; t++) { if (strcmp(*t, p) == 0) break; if (q != NULL) { if (strcmp(*t, q) == 0) break; } } if (*t != NULL) continue; snprintf(errmsg, sizeof(errmsg), "mount option <%s> is unknown", p); ret = EINVAL; } if (ret != 0) { TAILQ_FOREACH(opt, opts, link) { if (strcmp(opt->name, "errmsg") == 0) { strncpy((char *)opt->value, errmsg, opt->len); break; } } if (opt == NULL) printf("%s\n", errmsg); } return (ret); } /* * Get a mount option by its name. * * Return 0 if the option was found, ENOENT otherwise. * If len is non-NULL it will be filled with the length * of the option. If buf is non-NULL, it will be filled * with the address of the option. */ int vfs_getopt(opts, name, buf, len) struct vfsoptlist *opts; const char *name; void **buf; int *len; { struct vfsopt *opt; KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; if (len != NULL) *len = opt->len; if (buf != NULL) *buf = opt->value; return (0); } } return (ENOENT); } int vfs_getopt_pos(struct vfsoptlist *opts, const char *name) { struct vfsopt *opt; if (opts == NULL) return (-1); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; return (opt->pos); } } return (-1); } char * vfs_getopts(struct vfsoptlist *opts, const char *name, int *error) { struct vfsopt *opt; *error = 0; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->len == 0 || ((char *)opt->value)[opt->len - 1] != '\0') { *error = EINVAL; return (NULL); } return (opt->value); } *error = ENOENT; return (NULL); } int vfs_flagopt(struct vfsoptlist *opts, const char *name, u_int *w, u_int val) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; if (w != NULL) *w |= val; return (1); } } if (w != NULL) *w &= ~val; return (0); } int vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...) { va_list ap; struct vfsopt *opt; int ret; KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->len == 0 || opt->value == NULL) return (0); if (((char *)opt->value)[opt->len - 1] != '\0') return (0); va_start(ap, fmt); ret = vsscanf(opt->value, fmt, ap); va_end(ap); return (ret); } return (0); } int vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->value == NULL) opt->len = len; else { if (opt->len != len) return (EINVAL); bcopy(value, opt->value, len); } return (0); } return (ENOENT); } int vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->value == NULL) opt->len = len; else { if (opt->len < len) return (EINVAL); opt->len = len; bcopy(value, opt->value, len); } return (0); } return (ENOENT); } int vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value) { struct vfsopt *opt; TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) != 0) continue; opt->seen = 1; if (opt->value == NULL) opt->len = strlen(value) + 1; else if (strlcpy(opt->value, value, opt->len) >= opt->len) return (EINVAL); return (0); } return (ENOENT); } /* * Find and copy a mount option. * * The size of the buffer has to be specified * in len, if it is not the same length as the * mount option, EINVAL is returned. * Returns ENOENT if the option is not found. */ int vfs_copyopt(opts, name, dest, len) struct vfsoptlist *opts; const char *name; void *dest; int len; { struct vfsopt *opt; KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL")); TAILQ_FOREACH(opt, opts, link) { if (strcmp(name, opt->name) == 0) { opt->seen = 1; if (len != opt->len) return (EINVAL); bcopy(opt->value, dest, opt->len); return (0); } } return (ENOENT); } /* * This is a helper function for filesystems to traverse their * vnodes. See MNT_VNODE_FOREACH() in sys/mount.h */ struct vnode * __mnt_vnode_next(struct vnode **mvp, struct mount *mp) { struct vnode *vp; mtx_assert(MNT_MTX(mp), MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); if ((*mvp)->v_yield++ == 500) { MNT_IUNLOCK(mp); (*mvp)->v_yield = 0; uio_yield(); MNT_ILOCK(mp); } vp = TAILQ_NEXT(*mvp, v_nmntvnodes); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { __mnt_vnode_markerfree(mvp, mp); return (NULL); } TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); return (vp); } struct vnode * __mnt_vnode_first(struct vnode **mvp, struct mount *mp) { struct vnode *vp; mtx_assert(MNT_MTX(mp), MA_OWNED); vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { *mvp = NULL; return (NULL); } MNT_REF(mp); MNT_IUNLOCK(mp); *mvp = (struct vnode *) malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); MNT_ILOCK(mp); (*mvp)->v_type = VMARKER; vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); /* Check if we are done */ if (vp == NULL) { MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); MNT_ILOCK(mp); *mvp = NULL; MNT_REL(mp); return (NULL); } (*mvp)->v_mount = mp; TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); return (vp); } void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) return; mtx_assert(MNT_MTX(mp), MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); MNT_IUNLOCK(mp); free(*mvp, M_VNODE_MARKER); MNT_ILOCK(mp); *mvp = NULL; MNT_REL(mp); } int __vfs_statfs(struct mount *mp, struct statfs *sbp) { int error; error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat); if (sbp != &mp->mnt_stat) *sbp = mp->mnt_stat; return (error); } void vfs_mountedfrom(struct mount *mp, const char *from) { bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname); strlcpy(mp->mnt_stat.f_mntfromname, from, sizeof mp->mnt_stat.f_mntfromname); } /* * --------------------------------------------------------------------- * This is the api for building mount args and mounting filesystems from * inside the kernel. * * The API works by accumulation of individual args. First error is * latched. * * XXX: should be documented in new manpage kernel_mount(9) */ /* A memory allocation which must be freed when we are done */ struct mntaarg { SLIST_ENTRY(mntaarg) next; }; /* The header for the mount arguments */ struct mntarg { struct iovec *v; int len; int error; SLIST_HEAD(, mntaarg) list; }; /* * Add a boolean argument. * * flag is the boolean value. * name must start with "no". */ struct mntarg * mount_argb(struct mntarg *ma, int flag, const char *name) { KASSERT(name[0] == 'n' && name[1] == 'o', ("mount_argb(...,%s): name must start with 'no'", name)); return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0)); } /* * Add an argument printf style */ struct mntarg * mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...) { va_list ap; struct mntaarg *maa; struct sbuf *sb; int len; if (ma == NULL) { ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INIT(&ma->list); } if (ma->error) return (ma); ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), M_MOUNT, M_WAITOK); ma->v[ma->len].iov_base = (void *)(uintptr_t)name; ma->v[ma->len].iov_len = strlen(name) + 1; ma->len++; sb = sbuf_new_auto(); va_start(ap, fmt); sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); len = sbuf_len(sb) + 1; maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INSERT_HEAD(&ma->list, maa, next); bcopy(sbuf_data(sb), maa + 1, len); sbuf_delete(sb); ma->v[ma->len].iov_base = maa + 1; ma->v[ma->len].iov_len = len; ma->len++; return (ma); } /* * Add an argument which is a userland string. */ struct mntarg * mount_argsu(struct mntarg *ma, const char *name, const void *val, int len) { struct mntaarg *maa; char *tbuf; if (val == NULL) return (ma); if (ma == NULL) { ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INIT(&ma->list); } if (ma->error) return (ma); maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INSERT_HEAD(&ma->list, maa, next); tbuf = (void *)(maa + 1); ma->error = copyinstr(val, tbuf, len, NULL); return (mount_arg(ma, name, tbuf, -1)); } /* * Plain argument. * * If length is -1, treat value as a C string. */ struct mntarg * mount_arg(struct mntarg *ma, const char *name, const void *val, int len) { if (ma == NULL) { ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO); SLIST_INIT(&ma->list); } if (ma->error) return (ma); ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2), M_MOUNT, M_WAITOK); ma->v[ma->len].iov_base = (void *)(uintptr_t)name; ma->v[ma->len].iov_len = strlen(name) + 1; ma->len++; ma->v[ma->len].iov_base = (void *)(uintptr_t)val; if (len < 0) ma->v[ma->len].iov_len = strlen(val) + 1; else ma->v[ma->len].iov_len = len; ma->len++; return (ma); } /* * Free a mntarg structure */ static void free_mntarg(struct mntarg *ma) { struct mntaarg *maa; while (!SLIST_EMPTY(&ma->list)) { maa = SLIST_FIRST(&ma->list); SLIST_REMOVE_HEAD(&ma->list, next); free(maa, M_MOUNT); } free(ma->v, M_MOUNT); free(ma, M_MOUNT); } /* * Mount a filesystem */ int kernel_mount(struct mntarg *ma, int flags) { struct uio auio; int error; KASSERT(ma != NULL, ("kernel_mount NULL ma")); KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v")); KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len)); auio.uio_iov = ma->v; auio.uio_iovcnt = ma->len; auio.uio_segflg = UIO_SYSSPACE; error = ma->error; if (!error) error = vfs_donmount(curthread, flags, &auio); free_mntarg(ma); return (error); } /* * A printflike function to mount a filesystem. */ int kernel_vmount(int flags, ...) { struct mntarg *ma = NULL; va_list ap; const char *cp; const void *vp; int error; va_start(ap, flags); for (;;) { cp = va_arg(ap, const char *); if (cp == NULL) break; vp = va_arg(ap, const void *); ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0)); } va_end(ap); error = kernel_mount(ma, flags); return (error); } void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp) { bcopy(oexp, exp, sizeof(*oexp)); exp->ex_numsecflavors = 0; } Index: projects/binutils-2.17/sys/kern/vfs_subr.c =================================================================== --- projects/binutils-2.17/sys/kern/vfs_subr.c (revision 215829) +++ projects/binutils-2.17/sys/kern/vfs_subr.c (revision 215830) @@ -1,4421 +1,4431 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #define WI_MPSAFEQ 0 #define WI_GIANTQ 1 static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure"); static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void vbusy(struct vnode *vp); static void vinactive(struct vnode *, struct thread *); static void v_incr_usecount(struct vnode *); static void v_decr_usecount(struct vnode *); static void v_decr_useonly(struct vnode *); static void v_upgrade_usecount(struct vnode *); static void vfree(struct vnode *); static void vnlru_free(int); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); static void destroy_vpollinfo(struct vpollinfo *vi); /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased on vdestroy() called on VI_DOOMed * vnode. */ static unsigned long numvnodes; SYSCTL_LONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes in existence"); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of vnodes that are ready for recycling. */ static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* * Free vnode target. Free vnodes may simply be files which have been stat'd * but not read. This is somewhat common, and a small cache of such files * should be kept to avoid recreation costs. */ static u_long wantfreevnodes; SYSCTL_LONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); /* Number of vnodes in the free list. */ static u_long freevnodes; SYSCTL_LONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "Number of vnodes in the free list"); static int vlru_allow_cache_src; SYSCTL_INT(_vfs, OID_AUTO, vlru_allow_cache_src, CTLFLAG_RW, &vlru_allow_cache_src, 0, "Allow vlru to reclaim source vnode"); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "Number of calls to reassignbuf"); /* * Cache for the mount type id assigned to NFS. This is used for * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c. */ int nfs_mount_type = -1; /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_free_list * numvnodes * freevnodes */ static struct mtx vnode_free_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending[2]; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; static struct cv sync_wakeup; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "Time to delay syncing files (in seconds)"); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "Time to delay syncing directories (in seconds)"); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "Time to delay syncing metadata (in seconds)"); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "Number of times I/O speeded up (rush requests)"); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* * Number of vnodes we want to exist at any one time. This is mostly used * to size hash tables in vnode-related code. It is normally not used in * getnewvnode(), as wantfreevnodes is normally nonzero.) * * XXX desiredvnodes is historical cruft and should not exist. */ int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "Maximum number of vnodes"); SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Minimum number of vnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); /* * Macros to control when a vnode is freed and recycled. All require * the vnode interlock. */ #define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) #define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) #define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt) /* * Initialize the vnode management data structures. * * Reevaluate the following cap on the number of vnodes after the physical * memory size exceeds 512GB. In the limit, as the physical memory size * grows, the ratio of physical pages to vnodes approaches sixteen to one. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX (512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16)) #endif static void vntblinit(void *dummy __unused) { int physvnodes, virtvnodes; /* * Desiredvnodes is a function of the physical memory size and the * kernel's heap size. Generally speaking, it scales with the * physical memory size. The ratio of desiredvnodes to physical pages * is one to four until desiredvnodes exceeds 98,304. Thereafter, the * marginal ratio of desiredvnodes to physical pages is one to * sixteen. However, desiredvnodes is limited by the kernel's heap * size. The memory required by desiredvnodes vnodes and vm objects * may not exceed one seventh of the kernel's heap size. */ physvnodes = maxproc + cnt.v_page_count / 16 + 3 * min(98304 * 4, cnt.v_page_count) / 16; virtvnodes = vm_kmem_size / (7 * (sizeof(struct vm_object) + sizeof(struct vnode))); desiredvnodes = min(physvnodes, virtvnodes); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %d -> %d\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_free_list); mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* * Initialize the filesystem syncer. */ syncer_workitem_pending[WI_MPSAFEQ] = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_workitem_pending[WI_GIANTQ] = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); cv_init(&sync_wakeup, "syncer"); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Eventually, mountlist_mtx is not released on failure. */ int vfs_busy(struct mount *mp, int flags) { MPASS((flags & ~MBF_MASK) == 0); CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags); MNT_ILOCK(mp); MNT_REF(mp); /* * If mount point is currenly being unmounted, sleep until the * mount point fate is decided. If thread doing the unmounting fails, * it will clear MNTK_UNMOUNT flag before waking us up, indicating * that this mount point has survived the unmount attempt and vfs_busy * should retry. Otherwise the unmounter thread will set MNTK_REFEXPIRE * flag in addition to MNTK_UNMOUNT, indicating that mount point is * about to be really destroyed. vfs_busy needs to release its * reference on the mount point in this case and return with ENOENT, * telling the caller that mount mount it tried to busy is no longer * valid. */ while (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) { MNT_REL(mp); MNT_IUNLOCK(mp); CTR1(KTR_VFS, "%s: failed busying before sleeping", __func__); return (ENOENT); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_kern_flag |= MNTK_MWAIT; msleep(mp, MNT_MTX(mp), PVFS, "vfs_busy", 0); if (flags & MBF_MNTLSTLOCK) mtx_lock(&mountlist_mtx); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_lockref++; MNT_IUNLOCK(mp); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_REL(mp); KASSERT(mp->mnt_lockref > 0, ("negative mnt_lockref")); mp->mnt_lockref--; if (mp->mnt_lockref == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) { MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT); CTR1(KTR_VFS, "%s: waking up waiters", __func__); mp->mnt_kern_flag &= ~MNTK_DRAINING; wakeup(&mp->mnt_lockref); } MNT_IUNLOCK(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); return ((struct mount *) 0); } /* * Lookup a mount point by filesystem identifier, busying it before * returning. */ struct mount * vfs_busyfs(fsid_t *fsid) { struct mount *mp; int error; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error) { mtx_unlock(&mountlist_mtx); return (NULL); } return (mp); } } CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; /* * If the thread is jailed, but this is not a jail-friendly file * system, deny immediately. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_JAIL) && jailed(td->td_ucred)) return (EPERM); /* * If the file system was mounted outside the jail of the calling * thread, deny immediately. */ if (prison_check(td->td_ucred, mp->mnt_cred) != 0) return (EPERM); /* * If file system supports delegated administration, we don't check * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified * by the file system itself. * If this is not the user that did original mount, we check for * the PRIV_VFS_MOUNT_OWNER privilege. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) && mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static uint16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_SEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, "File timestamp precision (0: seconds, " "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to ms, " "3+: sec + ns (max. precision))"); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desireable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. */ static int vlrureclaim(struct mount *mp) { struct vnode *vp; int done; int trigger; int usevnodes; int count; /* * Calculate the trigger point, don't allow user * screwups to blow us up. This prevents us from * recycling vnodes with lots of resident pages. We * aren't trying to free memory, we are trying to * free vnodes. */ usevnodes = desiredvnodes; if (usevnodes <= 0) usevnodes = 1; trigger = cnt.v_page_count * 2 / usevnodes; done = 0; vn_start_write(NULL, &mp, V_WAIT); MNT_ILOCK(mp); count = mp->mnt_nvnodelistsize / 10 + 1; while (count != 0) { vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); if (vp == NULL) break; TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); --count; if (!VI_TRYLOCK(vp)) goto next_iter; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. */ if (vp->v_usecount || (!vlru_allow_cache_src && !LIST_EMPTY(&(vp)->v_cache_src)) || (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } MNT_IUNLOCK(mp); vholdl(vp); if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) { vdrop(vp); goto next_iter_mntunlocked; } VI_LOCK(vp); /* * v_usecount may have been bumped after VOP_LOCK() dropped * the vnode interlock and before it was locked again. * * It is not necessary to recheck VI_DOOMED because it can * only be set by another thread that holds both the vnode * lock and vnode interlock. If another thread has the * vnode lock before we get to VOP_LOCK() and obtains the * vnode interlock after VOP_LOCK() drops the vnode * interlock, the other thread will be unable to drop the * vnode lock before our VOP_LOCK() call fails. */ if (vp->v_usecount || (!vlru_allow_cache_src && !LIST_EMPTY(&(vp)->v_cache_src)) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp, LK_INTERLOCK); goto next_iter_mntunlocked; } KASSERT((vp->v_iflag & VI_DOOMED) == 0, ("VI_DOOMED unexpectedly detected in vlrureclaim()")); vgonel(vp); VOP_UNLOCK(vp, 0); vdropl(vp); done++; next_iter_mntunlocked: if ((count % 256) != 0) goto relock_mnt; goto yield; next_iter: if ((count % 256) != 0) continue; MNT_IUNLOCK(mp); yield: uio_yield(); relock_mnt: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); vn_finished_write(mp); return done; } /* * Attempt to keep the free list at wantfreevnodes length. */ static void vnlru_free(int count) { struct vnode *vp; int vfslocked; mtx_assert(&vnode_free_list_mtx, MA_OWNED); for (; count > 0; count--) { vp = TAILQ_FIRST(&vnode_free_list); /* * The list can be modified while the free_list_mtx * has been dropped and vp could be NULL here. */ if (!vp) break; VNASSERT(vp->v_op != NULL, vp, ("vnlru_free: vnode already reclaimed.")); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* * Don't recycle if we can't get the interlock. */ if (!VI_TRYLOCK(vp)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); continue; } VNASSERT(VCANRECYCLE(vp), vp, ("vp inconsistent on freelist")); freevnodes--; vp->v_iflag &= ~VI_FREE; vholdl(vp); mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vtryrecycle(vp); VFS_UNLOCK_GIANT(vfslocked); /* * If the recycled succeeded this vdrop will actually free * the vnode. If not it will simply place it back on * the free list. */ vdrop(vp); mtx_lock(&vnode_free_list_mtx); } } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; static void vnlru_proc(void) { struct mount *mp, *nmp; int done, vfslocked; struct proc *p = vnlruproc; EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p, SHUTDOWN_PRI_FIRST); for (;;) { kproc_suspend_check(p); mtx_lock(&vnode_free_list_mtx); if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); if (numvnodes <= desiredvnodes * 9 / 10) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_free_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } mtx_unlock(&vnode_free_list_mtx); done = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } vfslocked = VFS_LOCK_GIANT(mp); done += vlrureclaim(mp); VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); if (done == 0) { #if 0 /* These messages are temporary debugging aids */ if (vnlru_nowhere < 5) printf("vnlru process getting nowhere..\n"); else if (vnlru_nowhere == 5) printf("vnlru process messages stopped.\n"); #endif vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); } else uio_yield(); } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp); /* * Routines having to do with the management of the vnode table. */ void vdestroy(struct vnode *vp) { struct bufobj *bo; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); mtx_lock(&vnode_free_list_mtx); numvnodes--; mtx_unlock(&vnode_free_list_mtx); bo = &vp->v_bufobj; VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); VI_UNLOCK(vp); #ifdef MAC mac_vnode_destroy(vp); #endif if (vp->v_pollinfo != NULL) destroy_vpollinfo(vp->v_pollinfo); #ifdef INVARIANTS /* XXX Elsewhere we can detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); mtx_destroy(BO_MTX(bo)); uma_zfree(vnode_zone, vp); } /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct mount *vnmp; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { CTR2(KTR_VFS, "%s: impossible to recycle, vp %p lock is already held", __func__, vp); return (EWOULDBLOCK); } /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp, 0); CTR2(KTR_VFS, "%s: impossible to recycle, cannot start the write for %p", __func__, vp); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); CTR2(KTR_VFS, "%s: impossible to recycle, %p is already referenced", __func__, vp); return (EBUSY); } if ((vp->v_iflag & VI_DOOMED) == 0) vgonel(vp); VOP_UNLOCK(vp, LK_INTERLOCK); vn_finished_write(vnmp); return (0); } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp = NULL; struct bufobj *bo; CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); mtx_lock(&vnode_free_list_mtx); /* * Lend our context to reclaim vnodes if they've exceeded the max. */ if (freevnodes > wantfreevnodes) vnlru_free(1); /* * Wait for available vnodes. */ if (numvnodes > desiredvnodes) { if (mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)) { /* * File system is beeing suspended, we cannot risk a * deadlock here, so allocate new vnode anyway. */ if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); goto alloc; } if (vnlruproc_sig == 0) { vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, "vlruwk", hz); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (numvnodes > desiredvnodes) { mtx_unlock(&vnode_free_list_mtx); return (ENFILE); } #endif } alloc: numvnodes++; mtx_unlock(&vnode_free_list_mtx); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems * opt-in. */ lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE); /* * Initialize bufobj. */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; mtx_init(BO_MTX(bo), "bufobj interlock", NULL, MTX_DEF); bo->bo_ops = &buf_ops_bio; bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); TAILQ_INIT(&bo->bo_dirty.bv_hd); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Finalize various vnode identity bits. */ vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; v_incr_usecount(vp); vp->v_data = 0; #ifdef MAC mac_vnode_init(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_vnode_associate_singlelabel(mp, vp); else if (mp == NULL && vops != &dead_vnodeops) printf("NULL mp in getnewvnode()\n"); #endif if (mp != NULL) { bo->bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } *vpp = vp; return (0); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); vp->v_mount = NULL; VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { vp->v_data = NULL; vp->v_op = &dead_vnodeops; /* XXX non mp-safe fs may still call insmntque with vnode unlocked */ if (!VOP_ISLOCKED(vp)) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { int locked; KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); #ifdef DEBUG_VFS_LOCKS if (!VFS_NEEDSGIANT(mp)) ASSERT_VOP_ELOCKED(vp, "insmntque: mp-safe fs and non-locked vp"); #endif MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 && ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || mp->mnt_nvnodelistsize == 0)) { locked = VOP_ISLOCKED(vp); if (!locked || (locked == LK_EXCLUSIVE && (vp->v_vflag & VV_FORCEINSMQ) == 0)) { MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); if (bo->bo_object != NULL) { VM_OBJECT_LOCK(bo->bo_object); vm_object_pip_wait(bo->bo_object, "bovlbx"); VM_OBJECT_UNLOCK(bo->bo_object); } BO_LOCK(bo); } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL)) == 0) { VM_OBJECT_LOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); VM_OBJECT_UNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo) { CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist( struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_LOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) { continue; } lblkno = 0; xflags = 0; if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); if (bp->b_bufobj != bo) { /* XXX: necessary ? */ BUF_UNLOCK(bp); BO_LOCK(bo); return (EAGAIN); } /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { BO_LOCK(bo); bremfree(bp); BO_UNLOCK(bo); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } BO_LOCK(bo); bremfree(bp); BO_UNLOCK(bo); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); if (nbp != NULL && (nbp->b_bufobj != bo || nbp->b_lblkno != lblkno || (nbp->b_xflags & (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags)) break; /* nbp invalid */ } return (retval); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, off_t length, int blksize) { struct buf *bp, *nbp; int anyfreed; int trunclbn; struct bufobj *bo; CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__, vp, cred, blksize, (uintmax_t)length); /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: bo = &vp->v_bufobj; BO_LOCK(bo); anyfreed = 1; for (;anyfreed;) { anyfreed = 0; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) goto restart; BO_LOCK(bo); bremfree(bp); BO_UNLOCK(bo); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { goto restart; } BO_LOCK(bo); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) goto restart; BO_LOCK(bo); bremfree(bp); BO_UNLOCK(bo); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } BO_LOCK(bo); } } if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) { goto restart; } VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); BO_LOCK(bo); bremfree(bp); BO_UNLOCK(bo); bawrite(bp); BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); } /* * buf_splay() - splay tree core for the clean/dirty list of buffers in * a vnode. * * NOTE: We have to deal with the special case of a background bitmap * buffer, a situation where two buffers will have the same logical * block offset. We want (1) only the foreground buffer to be accessed * in a lookup and (2) must differentiate between the foreground and * background buffer in the splay tree algorithm because the splay * tree cannot normally handle multiple entities with the same 'index'. * We accomplish this by adding differentiating flags to the splay tree's * numerical domain. */ static struct buf * buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root) { struct buf dummy; struct buf *lefttreemax, *righttreemin, *y; if (root == NULL) return (NULL); lefttreemax = righttreemin = &dummy; for (;;) { if (lblkno < root->b_lblkno || (lblkno == root->b_lblkno && (xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { if ((y = root->b_left) == NULL) break; if (lblkno < y->b_lblkno) { /* Rotate right. */ root->b_left = y->b_right; y->b_right = root; root = y; if ((y = root->b_left) == NULL) break; } /* Link into the new root's right tree. */ righttreemin->b_left = root; righttreemin = root; } else if (lblkno > root->b_lblkno || (lblkno == root->b_lblkno && (xflags & BX_BKGRDMARKER) > (root->b_xflags & BX_BKGRDMARKER))) { if ((y = root->b_right) == NULL) break; if (lblkno > y->b_lblkno) { /* Rotate left. */ root->b_right = y->b_left; y->b_left = root; root = y; if ((y = root->b_right) == NULL) break; } /* Link into the new root's left tree. */ lefttreemax->b_right = root; lefttreemax = root; } else { break; } root = y; } /* Assemble the new root. */ lefttreemax->b_right = root->b_left; righttreemin->b_left = root->b_right; root->b_left = dummy.b_right; root->b_right = dummy.b_left; return (root); } static void buf_vlist_remove(struct buf *bp) { struct buf *root; struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_LOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; if (bp != bv->bv_root) { root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); KASSERT(root == bp, ("splay lookup failed in remove")); } if (bp->b_left == NULL) { root = bp->b_right; } else { root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left); root->b_right = bp->b_right; } bv->bv_root = root; TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list using a * splay tree algorithm. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct buf *root; struct bufv *bv; ASSERT_BO_LOCKED(bo); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); if (root == NULL) { bp->b_left = NULL; bp->b_right = NULL; TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); } else if (bp->b_lblkno < root->b_lblkno || (bp->b_lblkno == root->b_lblkno && (bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { bp->b_left = root->b_left; bp->b_right = root; root->b_left = NULL; TAILQ_INSERT_BEFORE(root, bp, b_bobufs); } else { bp->b_right = root->b_right; bp->b_left = root; root->b_right = NULL; TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs); } bv->bv_cnt++; bv->bv_root = bp; } /* * Lookup a buffer using the splay tree. Note that we specifically avoid * shadow buffers used in background bitmap writes. * * This code isn't quite efficient as it could be because we are maintaining * two sorted lists and do not know which list the block resides in. * * During a "make buildworld" the desired buffer is found at one of * the roots more than 60% of the time. Thus, checking both roots * before performing either splay eliminates unnecessary splays on the * first tree splayed. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); if ((bp = bo->bo_clean.bv_root) != NULL) { bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp); if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); } if ((bp = bo->bo_dirty.bv_root) != NULL) { bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp); if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); } return (NULL); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { struct bufobj *bo; bo = &vp->v_bufobj; ASSERT_BO_LOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); vhold(vp); if (VFS_NEEDSGIANT(vp->v_mount) || bo->bo_flag & BO_NEEDSGIANT) bp->b_flags |= B_NEEDSGIANT; bp->b_vp = vp; bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, bo, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_flags &= ~B_NEEDSGIANT; bp->b_vp = NULL; bp->b_bufobj = NULL; BO_UNLOCK(bo); vdrop(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int queue, slot; ASSERT_BO_LOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; queue = VFS_NEEDSGIANT(bo->__bo_vnode->v_mount) ? WI_GIANTQ : WI_MPSAFEQ; LIST_INSERT_HEAD(&syncer_workitem_pending[queue][slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); static int sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; *bo = LIST_FIRST(slp); if (*bo == NULL) return (0); vp = (*bo)->__bo_vnode; /* XXX */ if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) return (1); /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); mtx_lock(&sync_mtx); return (*bo == LIST_FIRST(slp)); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } BO_UNLOCK(*bo); vdrop(vp); mtx_lock(&sync_mtx); return (0); } /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *gnext, *next; struct synclist *gslp, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; int last_work_seen; int net_worklist_len; int syncer_final_iter; int first_printf; int error; last_work_seen = 0; syncer_final_iter = 0; first_printf = 1; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kproc_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno]; gslp = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[WI_MPSAFEQ][syncer_delayno]; gnext = &syncer_workitem_pending[WI_GIANTQ][syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && LIST_EMPTY(gslp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } } if (!LIST_EMPTY(gslp)) { mtx_unlock(&sync_mtx); mtx_lock(&Giant); mtx_lock(&sync_mtx); while (!LIST_EMPTY(gslp)) { error = sync_vnode(gslp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(gnext, bo, bo_synclist); continue; } } mtx_unlock(&Giant); } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING) cv_timedwait(&sync_wakeup, &sync_mtx, hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) cv_timedwait(&sync_wakeup, &sync_mtx, hz); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { int ret = 0; mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { if (howto & RB_NOSYNC) return; mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_shutdown(arg, howto); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif BO_UNLOCK(bo); } /* * Increment the use and hold counts on the vnode, taking care to reference * the driver's usecount if this is a chardev. The vholdl() will remove * the vnode from the free list if it is presently free. Requires the * vnode interlock and returns with it held. */ static void v_incr_usecount(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_usecount++; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } vholdl(vp); } /* * Turn a holdcnt into a use+holdcnt such that only one call to * v_decr_usecount is needed. */ static void v_upgrade_usecount(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_usecount++; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement the vnode use and hold count along with the driver's usecount * if this is a chardev. The vdropl() below releases the vnode interlock * as it may free the vnode. */ static void v_decr_usecount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_usecount: negative usecount")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_usecount--; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } vdropl(vp); } /* * Decrement only the use count and driver use count. This is intended to * be paired with a follow on vdropl() to release the remaining hold count. * In this way we may vgone() a vnode with a 0 usecount without risk of * having it end up on a free list because the hold count is kept above 0. */ static void v_decr_useonly(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_useonly: negative usecount")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_usecount--; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. VI_DOOMED is set if the vnode * is being destroyed. Only callers who specify LK_RETRY will * see doomed vnodes. If inactive processing was delayed in * vput try to do it here. */ int vget(struct vnode *vp, int flags, struct thread *td) { int error; error = 0; VFS_ASSERT_GIANT(vp->v_mount); VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("vget: invalid lock operation")); CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); if ((flags & LK_INTERLOCK) == 0) VI_LOCK(vp); vholdl(vp); if ((error = vn_lock(vp, flags | LK_INTERLOCK)) != 0) { vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) panic("vget: vn_lock failed to return ENOENT\n"); VI_LOCK(vp); /* Upgrade our holdcnt to a usecount. */ v_upgrade_usecount(vp); /* * We don't guarantee that any particular close will * trigger inactive processing so just make a best effort * here at preventing a reference to a removed file. If * we don't succeed no harm is done. */ if (vp->v_iflag & VI_OWEINACT) { if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && (flags & LK_NOWAIT) == 0) vinactive(vp, td); vp->v_iflag &= ~VI_OWEINACT; } VI_UNLOCK(vp); return (0); } /* * Increase the reference count of a vnode. */ void vref(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VI_LOCK(vp); v_incr_usecount(vp); VI_UNLOCK(vp); } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism other * than the VI lock is used to stop other processes from gaining references * to the vnode. This may be the case if the caller holds the only reference. * This is also useful when stale data is acceptable as race conditions may * be accounted for by some other means. */ int vrefcnt(struct vnode *vp) { int usecnt; VI_LOCK(vp); usecnt = vp->v_usecount; VI_UNLOCK(vp); return (usecnt); } #define VPUTX_VRELE 1 #define VPUTX_VPUT 2 #define VPUTX_VUNREF 3 static void vputx(struct vnode *vp, int func) { int error; KASSERT(vp != NULL, ("vputx: null vp")); if (func == VPUTX_VUNREF) - ASSERT_VOP_ELOCKED(vp, "vunref"); + ASSERT_VOP_LOCKED(vp, "vunref"); else if (func == VPUTX_VPUT) ASSERT_VOP_LOCKED(vp, "vput"); else KASSERT(func == VPUTX_VRELE, ("vputx: wrong func")); VFS_ASSERT_GIANT(vp->v_mount); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VI_LOCK(vp); /* Skip this v_writecount check if we're going to panic below. */ VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp, ("vputx: missed vn_close")); error = 0; if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && vp->v_usecount == 1)) { if (func == VPUTX_VPUT) VOP_UNLOCK(vp, 0); v_decr_usecount(vp); return; } if (vp->v_usecount != 1) { vprint("vputx: negative ref count", vp); panic("vputx: negative ref cnt"); } CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp); /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ v_decr_useonly(vp); /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; - if (func == VPUTX_VRELE) { + switch (func) { + case VPUTX_VRELE: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); VI_LOCK(vp); - } else if (func == VPUTX_VPUT && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { - error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | LK_NOWAIT); - VI_LOCK(vp); + break; + case VPUTX_VPUT: + if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { + error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | + LK_NOWAIT); + VI_LOCK(vp); + } + break; + case VPUTX_VUNREF: + if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) + error = EBUSY; + break; } if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; if (error == 0) { if (vp->v_iflag & VI_OWEINACT) vinactive(vp, curthread); if (func != VPUTX_VUNREF) VOP_UNLOCK(vp, 0); } vdropl(vp); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { vputx(vp, VPUTX_VRELE); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and * re-aquiring the lock (as vrele() acquires the lock internally.) */ void vput(struct vnode *vp) { vputx(vp, VPUTX_VPUT); } /* * Release an exclusively locked vnode. Do not unlock the vnode lock. */ void vunref(struct vnode *vp) { vputx(vp, VPUTX_VUNREF); } /* * Somebody doesn't want the vnode recycled. */ void vhold(struct vnode *vp) { VI_LOCK(vp); vholdl(vp); VI_UNLOCK(vp); } void vholdl(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); } /* * Note that there is one less who cares about this vnode. vdrop() is the * opposite of vhold(). */ void vdrop(struct vnode *vp) { VI_LOCK(vp); vdropl(vp); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we will free it if it has been vgone'd otherwise it is * placed on the free list. */ void vdropl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, "vdropl"); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_holdcnt <= 0) panic("vdrop: holdcnt %d", vp->v_holdcnt); vp->v_holdcnt--; if (vp->v_holdcnt == 0) { if (vp->v_iflag & VI_DOOMED) { CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); vdestroy(vp); return; } else vfree(vp); } VI_UNLOCK(vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. * OWEINACT tracks whether a vnode missed a call to inactive due to a * failed lock upgrade. */ static void vinactive(struct vnode *vp, struct thread *td) { ASSERT_VOP_ELOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); VOP_INACTIVE(vp, td); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes"); #endif int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp, rootrefs, flags); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) { CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d", __func__, error); return (error); } vput(rootvp); } MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); vholdl(vp); MNT_IUNLOCK(mp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE); if (error) { vdrop(vp); MNT_ILOCK(mp); MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp, 0); vdrop(vp); MNT_ILOCK(mp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { error = VOP_GETATTR(vp, &vattr, td->td_ucred); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount == 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp, 0); vdropl(vp); MNT_ILOCK(mp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { VNASSERT(vp->v_usecount == 0 || (vp->v_type != VCHR && vp->v_type != VBLK), vp, ("device VNODE %p is FORCECLOSED", vp)); vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif } VOP_UNLOCK(vp, 0); vdropl(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK); vgone(rootvp); VOP_UNLOCK(rootvp, 0); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) { CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__, busy); return (EBUSY); } for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp, struct thread *td) { int recycled; ASSERT_VOP_ELOCKED(vp, "vrecycle"); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); recycled = 0; VI_LOCK(vp); if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } VI_UNLOCK(vp); return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } /* * vgone, with the vp interlock held. */ void vgonel(struct vnode *vp) { struct thread *td; int oweinact; int active; struct mount *mp; ASSERT_VOP_ELOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) vinvalbuf(vp, 0, 0, 0); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); if ((vp->v_iflag & VI_DOINGINACT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag)); /* * Clear the advisory locks and wake up waiting threads. */ (void)VOP_ADVLOCKPURGE(vp); /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_tag = "none"; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Same as above, but using the struct cdev *as argument */ int count_dev(struct cdev *dev) { int count; dev_lock(); count = dev->si_usecount; dev_unlock(); return(count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[256], buf2[16]; u_long flags; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d mountedhere %p\n", vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_mountedhere); buf[0] = '\0'; buf[1] = '\0'; if (vp->v_vflag & VV_ROOT) strlcat(buf, "|VV_ROOT", sizeof(buf)); if (vp->v_vflag & VV_ISTTY) strlcat(buf, "|VV_ISTTY", sizeof(buf)); if (vp->v_vflag & VV_NOSYNC) strlcat(buf, "|VV_NOSYNC", sizeof(buf)); if (vp->v_vflag & VV_CACHEDLABEL) strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf)); if (vp->v_vflag & VV_TEXT) strlcat(buf, "|VV_TEXT", sizeof(buf)); if (vp->v_vflag & VV_COPYONWRITE) strlcat(buf, "|VV_COPYONWRITE", sizeof(buf)); if (vp->v_vflag & VV_SYSTEM) strlcat(buf, "|VV_SYSTEM", sizeof(buf)); if (vp->v_vflag & VV_PROCDEP) strlcat(buf, "|VV_PROCDEP", sizeof(buf)); if (vp->v_vflag & VV_NOKNOTE) strlcat(buf, "|VV_NOKNOTE", sizeof(buf)); if (vp->v_vflag & VV_DELETED) strlcat(buf, "|VV_DELETED", sizeof(buf)); if (vp->v_vflag & VV_MD) strlcat(buf, "|VV_MD", sizeof(buf)); flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP | VV_NOKNOTE | VV_DELETED | VV_MD); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); if (vp->v_iflag & VI_AGE) strlcat(buf, "|VI_AGE", sizeof(buf)); if (vp->v_iflag & VI_DOOMED) strlcat(buf, "|VI_DOOMED", sizeof(buf)); if (vp->v_iflag & VI_FREE) strlcat(buf, "|VI_FREE", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_MOUNT | VI_AGE | VI_DOOMED | VI_FREE | VI_DOINGINACT | VI_OWEINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count); printf(" "); lockmgr_printinfo(vp->v_vnlock); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp, *nmp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ db_printf("Locked vnodes\n"); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { nmp = TAILQ_NEXT(mp, mnt_list); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) vprint("", vp); } nmp = TAILQ_NEXT(mp, mnt_list); } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } /* * Show details about the given mount point. */ DB_SHOW_COMMAND(mount, db_show_mount) { struct mount *mp; struct vfsopt *opt; struct statfs *sp; struct vnode *vp; char buf[512]; u_int flags; if (!have_addr) { /* No address given, print short info about all mount points. */ TAILQ_FOREACH(mp, &mountlist, mnt_list) { db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); if (db_pager_quit) break; } db_printf("\nMore info: show mount \n"); return; } mp = (struct mount *)addr; db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); buf[0] = '\0'; flags = mp->mnt_flag; #define MNT_FLAG(flag) do { \ if (flags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 4, sizeof(buf)); \ flags &= ~(flag); \ } \ } while (0) MNT_FLAG(MNT_RDONLY); MNT_FLAG(MNT_SYNCHRONOUS); MNT_FLAG(MNT_NOEXEC); MNT_FLAG(MNT_NOSUID); MNT_FLAG(MNT_UNION); MNT_FLAG(MNT_ASYNC); MNT_FLAG(MNT_SUIDDIR); MNT_FLAG(MNT_SOFTDEP); MNT_FLAG(MNT_NOSYMFOLLOW); MNT_FLAG(MNT_GJOURNAL); MNT_FLAG(MNT_MULTILABEL); MNT_FLAG(MNT_ACLS); MNT_FLAG(MNT_NOATIME); MNT_FLAG(MNT_NOCLUSTERR); MNT_FLAG(MNT_NOCLUSTERW); MNT_FLAG(MNT_NFS4ACLS); MNT_FLAG(MNT_EXRDONLY); MNT_FLAG(MNT_EXPORTED); MNT_FLAG(MNT_DEFEXPORTED); MNT_FLAG(MNT_EXPORTANON); MNT_FLAG(MNT_EXKERB); MNT_FLAG(MNT_EXPUBLIC); MNT_FLAG(MNT_LOCAL); MNT_FLAG(MNT_QUOTA); MNT_FLAG(MNT_ROOTFS); MNT_FLAG(MNT_USER); MNT_FLAG(MNT_IGNORE); MNT_FLAG(MNT_UPDATE); MNT_FLAG(MNT_DELEXPORT); MNT_FLAG(MNT_RELOAD); MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); MNT_FLAG(MNT_SOFTDEP); #undef MNT_FLAG if (flags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%08x", flags); } db_printf(" mnt_flag = %s\n", buf); buf[0] = '\0'; flags = mp->mnt_kern_flag; #define MNT_KERN_FLAG(flag) do { \ if (flags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 5, sizeof(buf)); \ flags &= ~(flag); \ } \ } while (0) MNT_KERN_FLAG(MNTK_UNMOUNTF); MNT_KERN_FLAG(MNTK_ASYNC); MNT_KERN_FLAG(MNTK_SOFTDEP); MNT_KERN_FLAG(MNTK_NOINSMNTQ); MNT_KERN_FLAG(MNTK_DRAINING); MNT_KERN_FLAG(MNTK_REFEXPIRE); MNT_KERN_FLAG(MNTK_EXTENDED_SHARED); MNT_KERN_FLAG(MNTK_SHARED_WRITES); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); MNT_KERN_FLAG(MNTK_SUSPEND); MNT_KERN_FLAG(MNTK_SUSPEND2); MNT_KERN_FLAG(MNTK_SUSPENDED); MNT_KERN_FLAG(MNTK_MPSAFE); MNT_KERN_FLAG(MNTK_LOOKUP_SHARED); MNT_KERN_FLAG(MNTK_NOKNOTE); #undef MNT_KERN_FLAG if (flags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%08x", flags); } db_printf(" mnt_kern_flag = %s\n", buf); db_printf(" mnt_opt = "); opt = TAILQ_FIRST(mp->mnt_opt); if (opt != NULL) { db_printf("%s", opt->name); opt = TAILQ_NEXT(opt, link); while (opt != NULL) { db_printf(", %s", opt->name); opt = TAILQ_NEXT(opt, link); } } db_printf("\n"); sp = &mp->mnt_stat; db_printf(" mnt_stat = { version=%u type=%u flags=0x%016jx " "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju " "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju " "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n", (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags, (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize, (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree, (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files, (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites, (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads, (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax, (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]); db_printf(" mnt_cred = { uid=%u ruid=%u", (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); if (jailed(mp->mnt_cred)) db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); db_printf(" }\n"); db_printf(" mnt_ref = %d\n", mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); db_printf(" mnt_writeopcount = %d\n", mp->mnt_writeopcount); db_printf(" mnt_noasync = %u\n", mp->mnt_noasync); db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); db_printf(" mnt_secondary_accwrites = %d\n", mp->mnt_secondary_accwrites); db_printf(" mnt_gjprovider = %s\n", mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); db_printf("\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static void vfsconf2x(struct vfsconf *vfsp, struct xvfsconf *xvfsp) { strcpy(xvfsp->vfc_name, vfsp->vfc_name); xvfsp->vfc_typenum = vfsp->vfc_typenum; xvfsp->vfc_refcount = vfsp->vfc_refcount; xvfsp->vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp->vfc_vfsops = NULL; xvfsp->vfc_next = NULL; } /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; struct xvfsconf xvfsp; int error; error = 0; TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&xvfsp, sizeof(xvfsp)); vfsconf2x(vfsp, &xvfsp); error = SYSCTL_OUT(req, &xvfsp, sizeof xvfsp); if (error) break; } return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLFLAG_RD, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; struct xvfsconf xvfsp; printf("WARNING: userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); bzero(&xvfsp, sizeof(xvfsp)); vfsconf2x(vfsp, &xvfsp); return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp; struct thread *td; int error; KASSERT(curthread != NULL, ("vfs_unmountall: NULL curthread")); CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__); td = curthread; /* * Since this only runs when rebooting, it is not interlocked. */ while(!TAILQ_EMPTY(&mountlist)) { mp = TAILQ_LAST(&mountlist, mntlist); error = dounmount(mp, MNT_FORCE, td); if (error) { TAILQ_REMOVE(&mountlist, mp, mnt_list); /* * XXX: Due to the way in which we mount the root * file system off of devfs, devfs will generate a * "busy" warning when we try to unmount it before * the root. Don't print a warning as a result in * order to avoid false positive errors that may * cause needless upset. */ if (strcmp(mp->mnt_vfc->vfc_name, "devfs") != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } else { /* The unmount has removed mp from the mountlist */ } } } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); obj = vp->v_object; if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 && (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) { MNT_IUNLOCK(mp); if (!vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, curthread)) { if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(vp); MNT_ILOCK(mp); continue; } obj = vp->v_object; if (obj != NULL) { VM_OBJECT_LOCK(obj); vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); VM_OBJECT_UNLOCK(obj); } vput(vp); } MNT_ILOCK(mp); } else VI_UNLOCK(vp); } MNT_IUNLOCK(mp); } /* * Mark a vnode as free, putting it up for recycling. */ static void vfree(struct vnode *vp) { ASSERT_VI_LOCKED(vp, "vfree"); mtx_lock(&vnode_free_list_mtx); VNASSERT(vp->v_op != NULL, vp, ("vfree: vnode already reclaimed.")); VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free")); VNASSERT(VSHOULDFREE(vp), vp, ("vfree: freeing when we shouldn't")); VNASSERT((vp->v_iflag & VI_DOOMED) == 0, vp, ("vfree: Freeing doomed vnode")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_iflag & VI_AGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; vp->v_iflag &= ~VI_AGE; vp->v_iflag |= VI_FREE; mtx_unlock(&vnode_free_list_mtx); } /* * Opposite of vfree() - mark a vnode as in use. */ static void vbusy(struct vnode *vp) { ASSERT_VI_LOCKED(vp, "vbusy"); VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free")); VNASSERT(vp->v_op != NULL, vp, ("vbusy: vnode already reclaimed.")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; vp->v_iflag &= ~(VI_FREE|VI_AGE); mtx_unlock(&vnode_free_list_mtx); } static void destroy_vpollinfo(struct vpollinfo *vi) { knlist_destroy(&vi->vpi_selinfo.si_note); mtx_destroy(&vi->vpi_lock); uma_zfree(vnodepoll_zone, vi); } /* * Initalize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; if (vp->v_pollinfo != NULL) return; vi = uma_zalloc(vnodepoll_zone, M_WAITOK); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); destroy_vpollinfo(vi); return; } vp->v_pollinfo = vi; VI_UNLOCK(vp); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return (events); } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return (0); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; /* * Create a new filesystem syncer vnode for the specified mount point. */ void vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; struct bufobj *bo; static long start, incr, next; int error; /* Allocate a new vnode */ error = getnewvnode("syncer", mp, &sync_vnodeops, &vp); if (error != 0) panic("vfs_allocate_syncvnode: getnewvnode() failed"); vp->v_type = VNON; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque() failed"); vp->v_vflag &= ~VV_FORCEINSMQ; VOP_UNLOCK(vp, 0); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } bo = &vp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; if (mp->mnt_syncer == NULL) { mp->mnt_syncer = vp; vp = NULL; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); if (vp != NULL) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vgone(vp); vput(vp); } } void vfs_deallocate_syncvnode(struct mount *mp) { struct vnode *vp; mtx_lock(&sync_mtx); vp = mp->mnt_syncer; if (vp != NULL) mp->mnt_syncer = NULL; mtx_unlock(&sync_mtx); if (vp != NULL) vrele(vp); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; int error; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ mtx_lock(&mountlist_mtx); if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { mtx_unlock(&mountlist_mtx); return (0); } if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp); return (0); } MNT_ILOCK(mp); mp->mnt_noasync++; mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); vfs_msync(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); MNT_ILOCK(mp); mp->mnt_noasync--; if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; MNT_IUNLOCK(mp); vn_finished_write(mp); vfs_unbusy(mp); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; bo = &vp->v_bufobj; BO_LOCK(bo); mtx_lock(&sync_mtx); if (vp->v_mount->mnt_syncer == vp) vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; bo->bo_flag &= ~BO_ONWORKLST; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); return (0); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; error = 0; dev_lock(); if (vp->v_type != VCHR) error = ENOTBLK; else if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. * * The ifdef'd CAPABILITIES version is here for reference, but is not * actually used. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused) { accmode_t dac_granted; accmode_t priv_granted; KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE), ("VAPPEND without VWRITE")); /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP, 0)) priv_granted |= VEXEC; } else { /* * Ensure that at least one execute bit is on. Otherwise, * a privileged user will always succeed, and we don't want * this to happen unless the file really is executable. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 && !priv_check_cred(cred, PRIV_VFS_EXEC, 0)) priv_granted |= VEXEC; } if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ, 0)) priv_granted |= VREAD; if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE, 0)) priv_granted |= (VWRITE | VAPPEND); if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN, 0)) priv_granted |= VADMIN; if ((accmode & (priv_granted | dac_granted)) == accmode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((accmode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, accmode_t accmode) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, accmode, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to supress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) (panicstr != NULL || (vp) == NULL || \ (vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, "Drop into debugger on lock violation"); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, "Check for interlock across VOPs"); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, "Print backtrace at lock violations"); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == 0) vfs_badlock("is not locked but should be", str, vp); } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #if 0 void assert_vop_elocked_other(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLOTHER) vfs_badlock("is not exclusive locked by another thread", str, vp); } void assert_vop_slocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_SHARED) vfs_badlock("is not locked shared but should be", str, vp); } #endif /* 0 */ #endif /* DEBUG_VFS_LOCKS */ void vop_rename_fail(struct vop_rename_args *ap) { if (ap->a_tvp != NULL) vput(ap->a_tvp); if (ap->a_tdvp == ap->a_tvp) vrele(ap->a_tdvp); else vput(ap->a_tdvp); vrele(ap->a_fdvp); vrele(ap->a_fvp); } void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock && (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock)) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } void vop_strategy_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (!BUF_ISLOCKED(bp)) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } #endif } void vop_lookup_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lookup_args *a; struct vnode *dvp; a = ap; dvp = a->a_dvp; ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP"); ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP"); #endif } void vop_lookup_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lookup_args *a; struct vnode *dvp; struct vnode *vp; a = ap; dvp = a->a_dvp; vp = *(a->a_vpp); ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP"); ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP"); if (!rc) ASSERT_VOP_LOCKED(vp, "VOP_LOOKUP (child)"); #endif } void vop_lock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_lock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_unlock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK"); ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_unlock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; if (!rc) { VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init_mtx(&fs_knlist, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { .f_isfd = 0, .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread }; static struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite }; static struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp, 0); } static void vfs_knl_assert_locked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked"); #endif } static void vfs_knl_assert_unlocked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked"); #endif } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; int res; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) { VI_LOCK(vp); kn->kn_flags |= (EV_EOF | EV_ONESHOT); VI_UNLOCK(vp); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred)) return (0); VI_LOCK(vp); kn->kn_data = va.va_size - kn->kn_fp->f_offset; res = (kn->kn_data != 0); VI_UNLOCK(vp); return (res); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; VI_LOCK(vp); /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; VI_UNLOCK(vp); return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; int res; VI_LOCK(vp); if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE) { kn->kn_flags |= EV_EOF; VI_UNLOCK(vp); return (1); } res = (kn->kn_fflags != 0); VI_UNLOCK(vp); return (res); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VOP_MARKATIME. This functionality is used by execve and * mmap, so we want to avoid the I/O implied by directly setting * va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct ucred *cred) { struct mount *mp; mp = vp->v_mount; VFS_ASSERT_GIANT(mp); ASSERT_VOP_LOCKED(vp, "vfs_mark_atime"); if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) (void)VOP_MARKATIME(vp); } /* * The purpose of this routine is to remove granularity from accmode_t, * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE, * VADMIN and VAPPEND. * * If it returns 0, the caller is supposed to continue with the usual * access checks using 'accmode' as modified by this routine. If it * returns nonzero value, the caller is supposed to return that value * as errno. * * Note that after this routine runs, accmode may be zero. */ int vfs_unixify_accmode(accmode_t *accmode) { /* * There is no way to specify explicit "deny" rule using * file mode or POSIX.1e ACLs. */ if (*accmode & VEXPLICIT_DENY) { *accmode = 0; return (0); } /* * None of these can be translated into usual access bits. * Also, the common case for NFSv4 ACLs is to not contain * either of these bits. Caller should check for VWRITE * on the containing directory instead. */ if (*accmode & (VDELETE_CHILD | VDELETE)) return (EPERM); if (*accmode & VADMIN_PERMS) { *accmode &= ~VADMIN_PERMS; *accmode |= VADMIN; } /* * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL * or VSYNCHRONIZE using file mode or POSIX.1e ACL. */ *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE); return (0); } Index: projects/binutils-2.17/sys/net/bpf_buffer.c =================================================================== --- projects/binutils-2.17/sys/net/bpf_buffer.c (revision 215829) +++ projects/binutils-2.17/sys/net/bpf_buffer.c (revision 215830) @@ -1,210 +1,210 @@ /*- * Copyright (c) 2007 Seccuris Inc. * All rights reserved. * * This sofware was developed by Robert N. M. Watson under contract to * Seccuris Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpf.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include #include #include #include #include #include #include #include #include #include #include /* * Implement historical kernel memory buffering model for BPF: two malloc(9) * kernel buffers are hung off of the descriptor. The size is fixed prior to * attaching to an ifnet, ad cannot be changed after that. read(2) simply * copies the data to user space using uiomove(9). */ static int bpf_bufsize = 4096; SYSCTL_INT(_net_bpf, OID_AUTO, bufsize, CTLFLAG_RW, - &bpf_bufsize, 0, "Maximum capture buffer size in bytes"); + &bpf_bufsize, 0, "Default capture buffer size in bytes"); static int bpf_maxbufsize = BPF_MAXBUFSIZE; SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW, - &bpf_maxbufsize, 0, "Default capture buffer in bytes"); + &bpf_maxbufsize, 0, "Maximum capture buffer in bytes"); void bpf_buffer_alloc(struct bpf_d *d) { KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL")); KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL")); KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL")); d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK); d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK); d->bd_hbuf = NULL; d->bd_slen = 0; d->bd_hlen = 0; } /* * Simple data copy to the current kernel buffer. */ void bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, u_int len) { u_char *src_bytes; src_bytes = (u_char *)src; bcopy(src_bytes, buf + offset, len); } /* * Scatter-gather data copy from an mbuf chain to the current kernel buffer. */ void bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, u_int len) { const struct mbuf *m; u_char *dst; u_int count; m = (struct mbuf *)src; dst = (u_char *)buf + offset; while (len > 0) { if (m == NULL) panic("bpf_mcopy"); count = min(m->m_len, len); bcopy(mtod(m, void *), dst, count); m = m->m_next; dst += count; len -= count; } } /* * Free BPF kernel buffers on device close. */ void bpf_buffer_free(struct bpf_d *d) { if (d->bd_sbuf != NULL) free(d->bd_sbuf, M_BPF); if (d->bd_hbuf != NULL) free(d->bd_hbuf, M_BPF); if (d->bd_fbuf != NULL) free(d->bd_fbuf, M_BPF); #ifdef INVARIANTS d->bd_sbuf = d->bd_hbuf = d->bd_fbuf = (caddr_t)~0; #endif } /* * This is a historical initialization that occurs when the BPF descriptor is * first opened. It does not imply selection of a buffer mode, so we don't * allocate buffers here. */ void bpf_buffer_init(struct bpf_d *d) { d->bd_bufsize = bpf_bufsize; } /* * Allocate or resize buffers. */ int bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i) { u_int size; BPFD_LOCK(d); if (d->bd_bif != NULL) { BPFD_UNLOCK(d); return (EINVAL); } size = *i; if (size > bpf_maxbufsize) *i = size = bpf_maxbufsize; else if (size < BPF_MINBUFSIZE) *i = size = BPF_MINBUFSIZE; d->bd_bufsize = size; BPFD_UNLOCK(d); return (0); } /* * Copy buffer storage to user space in read(). */ int bpf_buffer_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) { return (uiomove(buf, len, uio)); } Index: projects/binutils-2.17/sys/net/if_vlan.c =================================================================== --- projects/binutils-2.17/sys/net/if_vlan.c (revision 215829) +++ projects/binutils-2.17/sys/net/if_vlan.c (revision 215830) @@ -1,1499 +1,1541 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. * Might be extended some day to also handle IEEE 802.1p priority * tagging. This is sort of sneaky in the implementation, since * we need to pretend to be enough of an Ethernet implementation * to make arp work. The way we do this is by telling everyone * that we are an Ethernet, and then catch the packets that * ether_output() left on our output queue when it calls * if_start(), rewrite them for use by the real outgoing interface, * and ask it to send them. */ #include __FBSDID("$FreeBSD$"); #include "opt_vlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define VLANNAME "vlan" #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) LIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ struct rwlock rw; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ #else struct ifvlanhead *hash; /* dynamic hash-list table */ uint16_t hmask; uint16_t hwidth; #endif int refcnt; }; struct vlan_mc_entry { struct ether_addr mc_addr; SLIST_ENTRY(vlan_mc_entry) mc_entries; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) ((ifv)->ifv_trunk->parent) int ifv_pflags; /* special flags we have set on parent */ struct ifv_linkmib { int ifvm_encaplen; /* encapsulation length */ int ifvm_mtufudge; /* MTU fudged by this much */ int ifvm_mintu; /* min transmission unit */ uint16_t ifvm_proto; /* encapsulation ethertype */ uint16_t ifvm_tag; /* tag to apply on packets leaving if */ } ifv_mib; SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY LIST_ENTRY(ifvlan) ifv_list; #endif }; #define ifv_proto ifv_mib.ifvm_proto #define ifv_tag ifv_mib.ifvm_tag #define ifv_encaplen ifv_mib.ifvm_encaplen #define ifv_mtufudge ifv_mib.ifvm_mtufudge #define ifv_mintu ifv_mib.ifvm_mintu /* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); } vlan_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN"); SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); static int soft_pad = 0; SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW, &soft_pad, 0, "pad short frames before tagging"); static MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface"); static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; /* * We have a global mutex, that is used to serialize configuration * changes and isn't used in normal packet delivery. * * We also have a per-trunk rwlock, that is locked shared on packet * processing and exclusive when configuration is changed. * * The VLAN_ARRAY substitutes the dynamic hash with a static array * with 4096 entries. In theory this can give a boost in processing, * however on practice it does not. Probably this is because array * is too big to fit into CPU cache. */ static struct mtx ifv_mtx; #define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, "vlan_global", NULL, MTX_DEF) #define VLAN_LOCK_DESTROY() mtx_destroy(&ifv_mtx) #define VLAN_LOCK_ASSERT() mtx_assert(&ifv_mtx, MA_OWNED) #define VLAN_LOCK() mtx_lock(&ifv_mtx) #define VLAN_UNLOCK() mtx_unlock(&ifv_mtx) #define TRUNK_LOCK_INIT(trunk) rw_init(&(trunk)->rw, VLANNAME) #define TRUNK_LOCK_DESTROY(trunk) rw_destroy(&(trunk)->rw) #define TRUNK_LOCK(trunk) rw_wlock(&(trunk)->rw) #define TRUNK_UNLOCK(trunk) rw_wunlock(&(trunk)->rw) #define TRUNK_LOCK_ASSERT(trunk) rw_assert(&(trunk)->rw, RA_WLOCKED) #define TRUNK_RLOCK(trunk) rw_rlock(&(trunk)->rw) #define TRUNK_RUNLOCK(trunk) rw_runlock(&(trunk)->rw) #define TRUNK_LOCK_RASSERT(trunk) rw_assert(&(trunk)->rw, RA_RLOCKED) #ifndef VLAN_ARRAY static void vlan_inithash(struct ifvlantrunk *trunk); static void vlan_freehash(struct ifvlantrunk *trunk); static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag); #endif static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_start(struct ifnet *ifp); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethertag(struct if_clone *, const char *, int *); static int vlan_clone_match(struct if_clone *, const char *); static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t); static int vlan_clone_destroy(struct if_clone *, struct ifnet *); static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); static struct if_clone vlan_cloner = IFC_CLONE_INITIALIZER(VLANNAME, NULL, IF_MAXUNIT, NULL, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); +#ifdef VIMAGE +static VNET_DEFINE(struct if_clone, vlan_cloner); +#define V_vlan_cloner VNET(vlan_cloner) +#endif + #ifndef VLAN_ARRAY #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; /* * The trunk must not be locked here since we call malloc(M_WAITOK). * It is OK in case this function is called before the trunk struct * gets hooked up and becomes visible from other threads. */ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, ("%s: hash already initialized", __func__)); trunk->hwidth = VLAN_DEF_HWIDTH; n = 1 << trunk->hwidth; trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) LIST_INIT(&trunk->hash[i]); } static void vlan_freehash(struct ifvlantrunk *trunk) { #ifdef INVARIANTS int i; KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) KASSERT(LIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); trunk->hash = NULL; trunk->hwidth = trunk->hmask = 0; } static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; TRUNK_LOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_tag, trunk->hmask); LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_tag == ifv2->ifv_tag) return (EEXIST); /* * Grow the hash when the number of vlans exceeds half of the number of * hash buckets squared. This will make the average linked-list length * buckets/2. */ if (trunk->refcnt > (b * b) / 2) { vlan_growhash(trunk, 1); i = HASH(ifv->ifv_tag, trunk->hmask); } LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); } static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; TRUNK_LOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_tag, trunk->hmask); LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; LIST_REMOVE(ifv2, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); } panic("%s: vlan not found\n", __func__); return (ENOENT); /*NOTREACHED*/ } /* * Grow the hash larger or smaller if memory permits. */ static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch) { struct ifvlan *ifv; struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; TRUNK_LOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { /* Harmless yet obvious coding error */ printf("%s: howmuch is 0\n", __func__); return; } hwidth2 = trunk->hwidth + howmuch; n = 1 << trunk->hwidth; n2 = 1 << hwidth2; /* Do not shrink the table below the default */ if (hwidth2 < VLAN_DEF_HWIDTH) return; /* M_NOWAIT because we're called with trunk mutex held */ hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT); if (hash2 == NULL) { printf("%s: out of memory -- hash size not changed\n", __func__); return; /* We can live with the old hash table */ } for (j = 0; j < n2; j++) LIST_INIT(&hash2[j]); for (i = 0; i < n; i++) while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) { LIST_REMOVE(ifv, ifv_list); j = HASH(ifv->ifv_tag, n2 - 1); LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; trunk->hmask = n2 - 1; if (bootverbose) if_printf(trunk->parent, "VLAN hash table resized from %d to %d buckets\n", n, n2); } static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t tag) { struct ifvlan *ifv; TRUNK_LOCK_RASSERT(trunk); LIST_FOREACH(ifv, &trunk->hash[HASH(tag, trunk->hmask)], ifv_list) if (ifv->ifv_tag == tag) return (ifv); return (NULL); } #if 0 /* Debugging code to view the hashtables. */ static void vlan_dumphash(struct ifvlantrunk *trunk) { int i; struct ifvlan *ifv; for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } } #endif /* 0 */ #endif /* !VLAN_ARRAY */ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_LOCK_ASSERT(); TRUNK_LOCK(trunk); #ifndef VLAN_ARRAY vlan_freehash(trunk); #endif trunk->parent->if_vlantrunk = NULL; TRUNK_UNLOCK(trunk); TRUNK_LOCK_DESTROY(trunk); free(trunk, M_VLAN); } /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the * side effect of causing the parent interface to receive multicast * traffic that it doesn't really want, which ends up being discarded * later by the upper protocol layers. Unfortunately, there's no way * to avoid this: there really is only one physical interface. * * XXX: There is a possible race here if more than one thread is * modifying the multicast state of the vlan interface at the same time. */ static int vlan_setmulti(struct ifnet *ifp) { struct ifnet *ifp_p; struct ifmultiaddr *ifma, *rifma = NULL; struct ifvlan *sc; struct vlan_mc_entry *mc; struct sockaddr_dl sdl; int error; /*VLAN_LOCK_ASSERT();*/ /* Find the parent. */ sc = ifp->if_softc; ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); bzero((char *)&sdl, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_index = ifp_p->if_index; sdl.sdl_type = IFT_ETHER; sdl.sdl_alen = ETHER_ADDR_LEN; /* First, remove any existing filter entries. */ while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN); error = if_delmulti(ifp_p, (struct sockaddr *)&sdl); if (error) return (error); SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); free(mc, M_VLAN); } /* Now program new ones. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) return (ENOMEM); bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), (char *)&mc->mc_addr, ETHER_ADDR_LEN); SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), LLADDR(&sdl), ETHER_ADDR_LEN); error = if_addmulti(ifp_p, (struct sockaddr *)&sdl, &rifma); if (error) return (error); } CURVNET_RESTORE(); return (0); } /* * A handler for parent interface link layer address changes. * If the parent interface link layer address is changed we * should also change it on all children vlans. */ static void vlan_iflladdr(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; #ifndef VLAN_ARRAY struct ifvlan *next; #endif int i; /* * Check if it's a trunk interface first of all * to avoid needless locking. */ if (ifp->if_vlantrunk == NULL) return; VLAN_LOCK(); /* * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. */ #ifdef VLAN_ARRAY for (i = 0; i < VLAN_ARRAY_SIZE; i++) if ((ifv = ifp->if_vlantrunk->vlans[i])) { #else /* VLAN_ARRAY */ for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++) LIST_FOREACH_SAFE(ifv, &ifp->if_vlantrunk->hash[i], ifv_list, next) { #endif /* VLAN_ARRAY */ VLAN_UNLOCK(); if_setlladdr(ifv->ifv_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN); VLAN_LOCK(); } VLAN_UNLOCK(); } /* * A handler for network interface departure events. * Track departure of trunks here so that we don't access invalid * pointers or whatever if a trunk is ripped from under us, e.g., * by ejecting its hot-plug card. However, if an ifnet is simply * being renamed, then there's no need to tear down the state. */ static void vlan_ifdetach(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; int i; /* * Check if it's a trunk interface first of all * to avoid needless locking. */ if (ifp->if_vlantrunk == NULL) return; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; VLAN_LOCK(); /* * OK, it's a trunk. Loop over and detach all vlan's on it. * Check trunk pointer after each vlan_unconfig() as it will * free it and set to NULL after the last vlan was detached. */ #ifdef VLAN_ARRAY for (i = 0; i < VLAN_ARRAY_SIZE; i++) if ((ifv = ifp->if_vlantrunk->vlans[i])) { vlan_unconfig_locked(ifv->ifv_ifp); if (ifp->if_vlantrunk == NULL) break; } #else /* VLAN_ARRAY */ restart: for (i = 0; i < (1 << ifp->if_vlantrunk->hwidth); i++) if ((ifv = LIST_FIRST(&ifp->if_vlantrunk->hash[i]))) { vlan_unconfig_locked(ifv->ifv_ifp); if (ifp->if_vlantrunk) goto restart; /* trunk->hwidth can change */ else break; } #endif /* VLAN_ARRAY */ /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); VLAN_UNLOCK(); } /* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and * set here. Noone else in the system should be aware of this so * we use an explicit reference here. */ extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* For if_link_state_change() eyes only... */ extern void (*vlan_link_state_p)(struct ifnet *); static int vlan_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (ifdetach_tag == NULL) return (ENOMEM); iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); if (iflladdr_tag == NULL) return (ENOMEM); VLAN_LOCK_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; +#ifndef VIMAGE if_clone_attach(&vlan_cloner); +#endif if (bootverbose) printf("vlan: initialized, using " #ifdef VLAN_ARRAY "full-size arrays" #else "hash tables with chaining" #endif "\n"); break; case MOD_UNLOAD: +#ifndef VIMAGE if_clone_detach(&vlan_cloner); +#endif EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; VLAN_LOCK_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t vlan_mod = { "if_vlan", vlan_modevent, 0 }; DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vlan, 3); +#ifdef VIMAGE +static void +vnet_vlan_init(const void *unused __unused) +{ + + V_vlan_cloner = vlan_cloner; + if_clone_attach(&V_vlan_cloner); +} +VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_vlan_init, NULL); + +static void +vnet_vlan_uninit(const void *unused __unused) +{ + + if_clone_detach(&V_vlan_cloner); +} +VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, + vnet_vlan_uninit, NULL); +#endif + static struct ifnet * vlan_clone_match_ethertag(struct if_clone *ifc, const char *name, int *tag) { const char *cp; struct ifnet *ifp; int t; /* Check for . style interface names. */ IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_type != IFT_ETHER) continue; if (strncmp(ifp->if_xname, name, strlen(ifp->if_xname)) != 0) continue; cp = name + strlen(ifp->if_xname); if (*cp++ != '.') continue; if (*cp == '\0') continue; t = 0; for(; *cp >= '0' && *cp <= '9'; cp++) t = (t * 10) + (*cp - '0'); if (*cp != '\0') continue; if (tag != NULL) *tag = t; break; } IFNET_RUNLOCK_NOSLEEP(); return (ifp); } static int vlan_clone_match(struct if_clone *ifc, const char *name) { const char *cp; if (vlan_clone_match_ethertag(ifc, name, NULL) != NULL) return (1); if (strncmp(VLANNAME, name, strlen(VLANNAME)) != 0) return (0); for (cp = name + 4; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { char *dp; int wildcard; int unit; int error; int tag; int ethertag; struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p; struct ifaddr *ifa; struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * There are 3 (ugh) ways to specify the cloned device: * o pass a parameter block with the clone request. * o specify parameters in the text of the clone device name * o specify no parameters and get an unattached device that * must be configured separately. * The first technique is preferred; the latter two are * supported for backwards compatibilty. */ if (params) { error = copyin(params, &vlr, sizeof(vlr)); if (error) return error; p = ifunit(vlr.vlr_parent); if (p == NULL) return ENXIO; /* * Don't let the caller set up a VLAN tag with * anything except VLID bits. */ if (vlr.vlr_tag & ~EVL_VLID_MASK) return (EINVAL); error = ifc_name2unit(name, &unit); if (error != 0) return (error); ethertag = 1; tag = vlr.vlr_tag; wildcard = (unit < 0); } else if ((p = vlan_clone_match_ethertag(ifc, name, &tag)) != NULL) { ethertag = 1; unit = -1; wildcard = 0; /* * Don't let the caller set up a VLAN tag with * anything except VLID bits. */ if (tag & ~EVL_VLID_MASK) return (EINVAL); } else { ethertag = 0; error = ifc_name2unit(name, &unit); if (error != 0) return (error); wildcard = (unit < 0); } error = ifc_alloc_unit(ifc, &unit); if (error != 0) return (error); /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { panic("%s: interface name too long", __func__); } } ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (ENOSPC); } SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because * we don't conform to the default naming convention for interfaces. */ strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = ifc->ifc_name; ifp->if_dunit = unit; /* NB: flags are not set here */ ifp->if_linkmib = &ifv->ifv_mib; ifp->if_linkmiblen = sizeof(ifv->ifv_mib); /* NB: mtu is not set here */ ifp->if_init = vlan_init; ifp->if_start = vlan_start; ifp->if_ioctl = vlan_ioctl; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; if (ethertag) { error = vlan_config(ifv, p, tag); if (error != 0) { /* * Since we've partialy failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free_type(ifp, IFT_ETHER); ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); } /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); } return (0); } static int vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) { struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ if_free_type(ifp, IFT_ETHER); free(ifv, M_VLAN); ifc_free_unit(ifc, unit); return (0); } /* * The ifp->if_init entry point for vlan(4) is a no-op. */ static void vlan_init(void *foo __unused) { } /* * The if_start method for vlan(4) interface. It doesn't * raises the IFF_DRV_OACTIVE flag, since it is called * only from IFQ_HANDOFF() macro in ether_output_frame(). * If the interface queue is full, and vlan_start() is * not called, the queue would never get emptied and * interface would stall forever. */ static void vlan_start(struct ifnet *ifp) { struct ifvlan *ifv; struct ifnet *p; struct mbuf *m; int error; ifv = ifp->if_softc; p = PARENT(ifv); for (;;) { IF_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; BPF_MTAP(ifp, m); /* * Do not run parent's if_start() if the parent is not up, * or parent's driver will cause a system crash. */ if (!UP_AND_RUNNING(p)) { m_freem(m); ifp->if_collisions++; continue; } /* * Pad the frame to the minimum size allowed if told to. * This option is in accord with IEEE Std 802.1Q, 2003 Ed., * paragraph C.4.4.3.b. It can help to work around buggy * bridges that violate paragraph C.4.4.3.a from the same * document, i.e., fail to pad short frames after untagging. * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but * untagging it will produce a 62-byte frame, which is a runt * and requires padding. There are VLAN-enabled network * devices that just discard such runts instead or mishandle * them somehow. */ if (soft_pad) { static char pad[8]; /* just zeros */ int n; for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(m, min(n, sizeof(pad)), pad)) break; if (n > 0) { if_printf(ifp, "cannot pad short frame\n"); ifp->if_oerrors++; m_freem(m); continue; } } /* * If underlying interface can do VLAN tag insertion itself, * just pass the packet along. However, we need some way to * tell the interface where the packet came from so that it * knows how to find the VLAN tag to use, so we attach a * packet tag that holds it. */ if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { m->m_pkthdr.ether_vtag = ifv->ifv_tag; m->m_flags |= M_VLANTAG; } else { m = ether_vlanencap(m, ifv->ifv_tag); if (m == NULL) { if_printf(ifp, "unable to prepend VLAN header\n"); ifp->if_oerrors++; continue; } } /* * Send it, precisely as ether_output() would have. * We are already running at splimp. */ error = (p->if_transmit)(p, m); if (!error) ifp->if_opackets++; else ifp->if_oerrors++; } } static void vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; uint16_t tag; KASSERT(trunk != NULL, ("%s: no trunk", __func__)); if (m->m_flags & M_VLANTAG) { /* * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag); m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; /* * Packet is tagged in-band as specified by 802.1q. */ switch (ifp->if_type) { case IFT_ETHER: if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { if_printf(ifp, "cannot pullup VLAN header\n"); return; } evl = mtod(m, struct ether_vlan_header *); tag = EVL_VLANOFTAG(ntohs(evl->evl_tag)); /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); break; default: #ifdef INVARIANTS panic("%s: %s has unsupported if_type %u", __func__, ifp->if_xname, ifp->if_type); #endif m_freem(m); ifp->if_noproto++; return; } } TRUNK_RLOCK(trunk); #ifdef VLAN_ARRAY ifv = trunk->vlans[tag]; #else ifv = vlan_gethash(trunk, tag); #endif if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { TRUNK_RUNLOCK(trunk); m_freem(m); ifp->if_noproto++; return; } TRUNK_RUNLOCK(trunk); m->m_pkthdr.rcvif = ifv->ifv_ifp; ifv->ifv_ifp->if_ipackets++; /* Pass it back through the parent's input routine. */ (*ifp->if_input)(ifv->ifv_ifp, m); } static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag) { struct ifvlantrunk *trunk; struct ifnet *ifp; int error = 0; /* VID numbers 0x0 and 0xFFF are reserved */ if (tag == 0 || tag == 0xFFF) return (EINVAL); if (p->if_type != IFT_ETHER) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); if (ifv->ifv_trunk) return (EBUSY); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); #ifndef VLAN_ARRAY vlan_inithash(trunk); #endif VLAN_LOCK(); if (p->if_vlantrunk != NULL) { /* A race that that is very unlikely to be hit. */ #ifndef VLAN_ARRAY vlan_freehash(trunk); #endif free(trunk, M_VLAN); goto exists; } TRUNK_LOCK_INIT(trunk); TRUNK_LOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; } else { VLAN_LOCK(); exists: trunk = p->if_vlantrunk; TRUNK_LOCK(trunk); } ifv->ifv_tag = tag; /* must set this before vlan_inshash() */ #ifdef VLAN_ARRAY if (trunk->vlans[tag] != NULL) { error = EEXIST; goto done; } trunk->vlans[tag] = ifv; trunk->refcnt++; #else error = vlan_inshash(trunk, ifv); if (error) goto done; #endif ifv->ifv_proto = ETHERTYPE_VLAN; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, * use it. */ if (p->if_capenable & IFCAP_VLAN_MTU) { /* * No need to fudge the MTU since the parent can * handle extended frames. */ ifv->ifv_mtufudge = 0; } else { /* * Fudge the MTU by the encapsulation size. This * makes us incompatible with strictly compliant * 802.1Q implementations, but allows us to use * the feature with other NetBSD implementations, * which might still be useful. */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. */ #define VLAN_COPY_FLAGS (IFF_SIMPLEX) ifp->if_flags &= ~VLAN_COPY_FLAGS; ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; #undef VLAN_COPY_FLAGS ifp->if_link_state = p->if_link_state; vlan_capabilities(ifv); /* * Set up our ``Ethernet address'' to reflect the underlying * physical interface's. */ bcopy(IF_LLADDR(p), IF_LLADDR(ifp), ETHER_ADDR_LEN); /* * Configure multicast addresses that may already be * joined on the vlan device. */ (void)vlan_setmulti(ifp); /* XXX: VLAN lock held */ /* We are ready for operation now. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; done: TRUNK_UNLOCK(trunk); if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_tag); VLAN_UNLOCK(); return (error); } static void vlan_unconfig(struct ifnet *ifp) { VLAN_LOCK(); vlan_unconfig_locked(ifp); VLAN_UNLOCK(); } static void vlan_unconfig_locked(struct ifnet *ifp) { struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; struct ifnet *parent; VLAN_LOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; parent = NULL; if (trunk != NULL) { struct sockaddr_dl sdl; TRUNK_LOCK(trunk); parent = trunk->parent; /* * Since the interface is being unconfigured, we need to * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ bzero((char *)&sdl, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; sdl.sdl_index = parent->if_index; sdl.sdl_type = IFT_ETHER; sdl.sdl_alen = ETHER_ADDR_LEN; while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN); /* * This may fail if the parent interface is * being detached. Regardless, we should do a * best effort to free this interface as much * as possible as all callers expect vlan * destruction to succeed. */ (void)if_delmulti(parent, (struct sockaddr *)&sdl); SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); free(mc, M_VLAN); } vlan_setflags(ifp, 0); /* clear special flags on parent */ #ifdef VLAN_ARRAY trunk->vlans[ifv->ifv_tag] = NULL; trunk->refcnt--; #else vlan_remhash(trunk, ifv); #endif ifv->ifv_trunk = NULL; /* * Check if we were the last. */ if (trunk->refcnt == 0) { trunk->parent->if_vlantrunk = NULL; /* * XXXGL: If some ithread has already entered * vlan_input() and is now blocked on the trunk * lock, then it should preempt us right after * unlock and finish its work. Then we will acquire * lock again in trunk_destroy(). */ TRUNK_UNLOCK(trunk); trunk_destroy(trunk); } else TRUNK_UNLOCK(trunk); } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); ifp->if_mtu = ETHERMTU; ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Only dispatch an event if vlan was * attached, otherwise there is nothing * to cleanup anyway. */ if (parent != NULL) EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_tag); } /* Handle a reference counted flag that should be set on the parent as well */ static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)) { struct ifvlan *ifv; int error; /* XXX VLAN_LOCK_ASSERT(); */ ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded parent's status is different from what * we want it to be. If it is, flip it. We record parent's * status in ifv_pflags so that we won't clear parent's flag * we haven't set. In fact, we don't clear or set parent's * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; ifv->ifv_pflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the parent: * if "status" is true, update parent's flags respective to our if_flags; * if "status" is false, forcedly clear the flags set on parent. */ static int vlan_setflags(struct ifnet *ifp, int status) { int error, i; for (i = 0; vlan_pflags[i].flag; i++) { error = vlan_setflag(ifp, vlan_pflags[i].flag, status, vlan_pflags[i].func); if (error) return (error); } return (0); } /* Inform all vlans that their parent has changed link state */ static void vlan_link_state(struct ifnet *ifp) { struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; int i; TRUNK_LOCK(trunk); #ifdef VLAN_ARRAY for (i = 0; i < VLAN_ARRAY_SIZE; i++) if (trunk->vlans[i] != NULL) { ifv = trunk->vlans[i]; #else for (i = 0; i < (1 << trunk->hwidth); i++) LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) { #endif ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } TRUNK_UNLOCK(trunk); } static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p = PARENT(ifv); struct ifnet *ifp = ifv->ifv_ifp; TRUNK_LOCK_ASSERT(TRUNK(ifv)); /* * If the parent interface can do checksum offloading * on VLANs, then propagate its hardware-assisted * checksumming flags. Also assert that checksum * offloading requires hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) ifp->if_capabilities = p->if_capabilities & IFCAP_HWCSUM; if (p->if_capenable & IFCAP_VLAN_HWCSUM && p->if_capenable & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable = p->if_capenable & IFCAP_HWCSUM; ifp->if_hwassist = p->if_hwassist & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP | CSUM_IP_FRAGS | CSUM_FRAGMENT); } else { ifp->if_capenable = 0; ifp->if_hwassist = 0; } /* * If the parent interface can do TSO on VLANs then * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWTSO) ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { ifp->if_capenable |= p->if_capenable & IFCAP_TSO; ifp->if_hwassist |= p->if_hwassist & CSUM_TSO; } else { ifp->if_capenable &= ~(p->if_capenable & IFCAP_TSO); ifp->if_hwassist &= ~(p->if_hwassist & CSUM_TSO); } } static void vlan_trunk_capabilities(struct ifnet *ifp) { struct ifvlantrunk *trunk = ifp->if_vlantrunk; struct ifvlan *ifv; int i; TRUNK_LOCK(trunk); #ifdef VLAN_ARRAY for (i = 0; i < VLAN_ARRAY_SIZE; i++) if (trunk->vlans[i] != NULL) { ifv = trunk->vlans[i]; #else for (i = 0; i < (1 << trunk->hwidth); i++) { LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) #endif vlan_capabilities(ifv); } TRUNK_UNLOCK(trunk); } static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; struct ifvlan *ifv; struct vlanreq vlr; int error = 0; ifr = (struct ifreq *)data; ifv = ifp->if_softc; switch (cmd) { case SIOCGIFMEDIA: VLAN_LOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); VLAN_UNLOCK(); error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); /* Limit the result to the parent's current config. */ if (error == 0) { struct ifmediareq *ifmr; ifmr = (struct ifmediareq *)data; if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { ifmr->ifm_count = 1; error = copyout(&ifmr->ifm_current, ifmr->ifm_ulist, sizeof(int)); } } } else { VLAN_UNLOCK(); error = EINVAL; } break; case SIOCSIFMEDIA: error = EINVAL; break; case SIOCSIFMTU: /* * Set the interface MTU. */ VLAN_LOCK(); if (TRUNK(ifv) != NULL) { if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; } else error = EINVAL; VLAN_UNLOCK(); break; case SIOCSETVLAN: +#ifdef VIMAGE + if (ifp->if_vnet != ifp->if_home_vnet) { + error = EPERM; + break; + } +#endif error = copyin(ifr->ifr_data, &vlr, sizeof(vlr)); if (error) break; if (vlr.vlr_parent[0] == '\0') { vlan_unconfig(ifp); break; } p = ifunit(vlr.vlr_parent); if (p == NULL) { error = ENOENT; break; } /* * Don't let the caller set up a VLAN tag with * anything except VLID bits. */ if (vlr.vlr_tag & ~EVL_VLID_MASK) { error = EINVAL; break; } error = vlan_config(ifv, p, vlr.vlr_tag); if (error) break; /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); break; case SIOCGETVLAN: +#ifdef VIMAGE + if (ifp->if_vnet != ifp->if_home_vnet) { + error = EPERM; + break; + } +#endif bzero(&vlr, sizeof(vlr)); VLAN_LOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_tag; } VLAN_UNLOCK(); error = copyout(&vlr, ifr->ifr_data, sizeof(vlr)); break; case SIOCSIFFLAGS: /* * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * If we don't have a parent, just remember the membership for * when we do. */ if (TRUNK(ifv) != NULL) error = vlan_setmulti(ifp); break; default: error = ether_ioctl(ifp, cmd, data); } return (error); } Index: projects/binutils-2.17/sys/netgraph/ng_pipe.c =================================================================== --- projects/binutils-2.17/sys/netgraph/ng_pipe.c (revision 215829) +++ projects/binutils-2.17/sys/netgraph/ng_pipe.c (revision 215830) @@ -1,1058 +1,996 @@ /*- - * Copyright (c) 2004-2008 University of Zagreb + * Copyright (c) 2004-2010 University of Zagreb * Copyright (c) 2007-2008 FreeBSD Foundation * * This software was developed by the University of Zagreb and the * FreeBSD Foundation under sponsorship by the Stichting NLnet and the * FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This node permits simple traffic shaping by emulating bandwidth * and delay, as well as random packet losses. * The node has two hooks, upper and lower. Traffic flowing from upper to * lower hook is referenced as downstream, and vice versa. Parameters for * both directions can be set separately, except for delay. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NG_PIPE, "ng_pipe", "ng_pipe"); -struct mtx ng_pipe_giant; - /* Packet header struct */ struct ngp_hdr { TAILQ_ENTRY(ngp_hdr) ngp_link; /* next pkt in queue */ struct timeval when; /* this packet's due time */ struct mbuf *m; /* ptr to the packet data */ }; TAILQ_HEAD(p_head, ngp_hdr); /* FIFO queue struct */ struct ngp_fifo { TAILQ_ENTRY(ngp_fifo) fifo_le; /* list of active queues only */ struct p_head packet_head; /* FIFO queue head */ u_int32_t hash; /* flow signature */ struct timeval vtime; /* virtual time, for WFQ */ u_int32_t rr_deficit; /* for DRR */ u_int32_t packets; /* # of packets in this queue */ }; /* Per hook info */ struct hookinfo { hook_p hook; int noqueue; /* bypass any processing */ TAILQ_HEAD(, ngp_fifo) fifo_head; /* FIFO queues */ TAILQ_HEAD(, ngp_hdr) qout_head; /* delay queue head */ - LIST_ENTRY(hookinfo) active_le; /* active hooks */ struct timeval qin_utime; struct ng_pipe_hookcfg cfg; struct ng_pipe_hookrun run; struct ng_pipe_hookstat stats; uint64_t *ber_p; /* loss_p(BER,psize) map */ }; /* Per node info */ struct node_priv { u_int64_t delay; u_int32_t overhead; u_int32_t header_offset; struct hookinfo lower; struct hookinfo upper; + struct callout timer; + int timer_scheduled; }; typedef struct node_priv *priv_p; /* Macro for calculating the virtual time for packet dequeueing in WFQ */ #define FIFO_VTIME_SORT(plen) \ if (hinfo->cfg.wfq && hinfo->cfg.bandwidth) { \ ngp_f->vtime.tv_usec = now->tv_usec + ((uint64_t) (plen) \ + priv->overhead ) * hinfo->run.fifo_queues * \ 8000000 / hinfo->cfg.bandwidth; \ ngp_f->vtime.tv_sec = now->tv_sec + \ ngp_f->vtime.tv_usec / 1000000; \ ngp_f->vtime.tv_usec = ngp_f->vtime.tv_usec % 1000000; \ TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) \ if (ngp_f1->vtime.tv_sec > ngp_f->vtime.tv_sec || \ (ngp_f1->vtime.tv_sec == ngp_f->vtime.tv_sec && \ ngp_f1->vtime.tv_usec > ngp_f->vtime.tv_usec)) \ break; \ if (ngp_f1 == NULL) \ TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \ else \ TAILQ_INSERT_BEFORE(ngp_f1, ngp_f, fifo_le); \ } else \ TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \ static void parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *, struct hookinfo *, priv_p); static void pipe_dequeue(struct hookinfo *, struct timeval *); -static void pipe_scheduler(void *); -static void pipe_poll(void); +static void ngp_callout(node_p, hook_p, void *, int); static int ngp_modevent(module_t, int, void *); -/* linked list of active "pipe" hooks */ -static LIST_HEAD(, hookinfo) active_head; -static int active_gen_id = 0; - -/* timeout handle for pipe_scheduler */ -static struct callout polling_timer; - /* zone for storing ngp_hdr-s */ static uma_zone_t ngp_zone; /* Netgraph methods */ static ng_constructor_t ngp_constructor; static ng_rcvmsg_t ngp_rcvmsg; static ng_shutdown_t ngp_shutdown; static ng_newhook_t ngp_newhook; static ng_rcvdata_t ngp_rcvdata; static ng_disconnect_t ngp_disconnect; /* Parse type for struct ng_pipe_hookstat */ static const struct ng_parse_struct_field ng_pipe_hookstat_type_fields[] = NG_PIPE_HOOKSTAT_INFO; static const struct ng_parse_type ng_pipe_hookstat_type = { &ng_parse_struct_type, &ng_pipe_hookstat_type_fields }; /* Parse type for struct ng_pipe_stats */ static const struct ng_parse_struct_field ng_pipe_stats_type_fields[] = NG_PIPE_STATS_INFO(&ng_pipe_hookstat_type); static const struct ng_parse_type ng_pipe_stats_type = { &ng_parse_struct_type, &ng_pipe_stats_type_fields }; /* Parse type for struct ng_pipe_hookrun */ static const struct ng_parse_struct_field ng_pipe_hookrun_type_fields[] = NG_PIPE_HOOKRUN_INFO; static const struct ng_parse_type ng_pipe_hookrun_type = { &ng_parse_struct_type, &ng_pipe_hookrun_type_fields }; /* Parse type for struct ng_pipe_run */ static const struct ng_parse_struct_field ng_pipe_run_type_fields[] = NG_PIPE_RUN_INFO(&ng_pipe_hookrun_type); static const struct ng_parse_type ng_pipe_run_type = { &ng_parse_struct_type, &ng_pipe_run_type_fields }; /* Parse type for struct ng_pipe_hookcfg */ static const struct ng_parse_struct_field ng_pipe_hookcfg_type_fields[] = NG_PIPE_HOOKCFG_INFO; static const struct ng_parse_type ng_pipe_hookcfg_type = { &ng_parse_struct_type, &ng_pipe_hookcfg_type_fields }; /* Parse type for struct ng_pipe_cfg */ static const struct ng_parse_struct_field ng_pipe_cfg_type_fields[] = NG_PIPE_CFG_INFO(&ng_pipe_hookcfg_type); static const struct ng_parse_type ng_pipe_cfg_type = { &ng_parse_struct_type, &ng_pipe_cfg_type_fields }; /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ngp_cmds[] = { { .cookie = NGM_PIPE_COOKIE, .cmd = NGM_PIPE_GET_STATS, .name = "getstats", .respType = &ng_pipe_stats_type }, { .cookie = NGM_PIPE_COOKIE, .cmd = NGM_PIPE_CLR_STATS, .name = "clrstats" }, { .cookie = NGM_PIPE_COOKIE, .cmd = NGM_PIPE_GETCLR_STATS, .name = "getclrstats", .respType = &ng_pipe_stats_type }, { .cookie = NGM_PIPE_COOKIE, .cmd = NGM_PIPE_GET_RUN, .name = "getrun", .respType = &ng_pipe_run_type }, { .cookie = NGM_PIPE_COOKIE, .cmd = NGM_PIPE_GET_CFG, .name = "getcfg", .respType = &ng_pipe_cfg_type }, { .cookie = NGM_PIPE_COOKIE, .cmd = NGM_PIPE_SET_CFG, .name = "setcfg", .mesgType = &ng_pipe_cfg_type, }, { 0 } }; /* Netgraph type descriptor */ static struct ng_type ng_pipe_typestruct = { .version = NG_ABI_VERSION, .name = NG_PIPE_NODE_TYPE, .mod_event = ngp_modevent, .constructor = ngp_constructor, .shutdown = ngp_shutdown, .rcvmsg = ngp_rcvmsg, .newhook = ngp_newhook, .rcvdata = ngp_rcvdata, .disconnect = ngp_disconnect, .cmdlist = ngp_cmds }; NETGRAPH_INIT(pipe, &ng_pipe_typestruct); /* Node constructor */ static int ngp_constructor(node_p node) { priv_p priv; priv = malloc(sizeof(*priv), M_NG_PIPE, M_ZERO | M_NOWAIT); if (priv == NULL) return (ENOMEM); NG_NODE_SET_PRIVATE(node, priv); + /* Mark node as single-threaded */ + NG_NODE_FORCE_WRITER(node); + + ng_callout_init(&priv->timer); + return (0); } /* Add a hook */ static int ngp_newhook(node_p node, hook_p hook, const char *name) { const priv_p priv = NG_NODE_PRIVATE(node); struct hookinfo *hinfo; if (strcmp(name, NG_PIPE_HOOK_UPPER) == 0) { bzero(&priv->upper, sizeof(priv->upper)); priv->upper.hook = hook; NG_HOOK_SET_PRIVATE(hook, &priv->upper); } else if (strcmp(name, NG_PIPE_HOOK_LOWER) == 0) { bzero(&priv->lower, sizeof(priv->lower)); priv->lower.hook = hook; NG_HOOK_SET_PRIVATE(hook, &priv->lower); } else return (EINVAL); /* Load non-zero initial cfg values */ hinfo = NG_HOOK_PRIVATE(hook); hinfo->cfg.qin_size_limit = 50; hinfo->cfg.fifo = 1; hinfo->cfg.droptail = 1; TAILQ_INIT(&hinfo->fifo_head); TAILQ_INIT(&hinfo->qout_head); return (0); } /* Receive a control message */ static int ngp_rcvmsg(node_p node, item_p item, hook_p lasthook) { const priv_p priv = NG_NODE_PRIVATE(node); struct ng_mesg *resp = NULL; struct ng_mesg *msg; struct ng_pipe_stats *stats; struct ng_pipe_run *run; struct ng_pipe_cfg *cfg; int error = 0; - mtx_lock(&ng_pipe_giant); - NGI_GET_MSG(item, msg); switch (msg->header.typecookie) { case NGM_PIPE_COOKIE: switch (msg->header.cmd) { case NGM_PIPE_GET_STATS: case NGM_PIPE_CLR_STATS: case NGM_PIPE_GETCLR_STATS: if (msg->header.cmd != NGM_PIPE_CLR_STATS) { NG_MKRESPONSE(resp, msg, sizeof(*stats), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } - stats = (struct ng_pipe_stats *)resp->data; + stats = (struct ng_pipe_stats *) resp->data; bcopy(&priv->upper.stats, &stats->downstream, sizeof(stats->downstream)); bcopy(&priv->lower.stats, &stats->upstream, sizeof(stats->upstream)); } if (msg->header.cmd != NGM_PIPE_GET_STATS) { bzero(&priv->upper.stats, sizeof(priv->upper.stats)); bzero(&priv->lower.stats, sizeof(priv->lower.stats)); } break; case NGM_PIPE_GET_RUN: NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } - run = (struct ng_pipe_run *)resp->data; + run = (struct ng_pipe_run *) resp->data; bcopy(&priv->upper.run, &run->downstream, sizeof(run->downstream)); bcopy(&priv->lower.run, &run->upstream, sizeof(run->upstream)); break; case NGM_PIPE_GET_CFG: NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } - cfg = (struct ng_pipe_cfg *)resp->data; + cfg = (struct ng_pipe_cfg *) resp->data; bcopy(&priv->upper.cfg, &cfg->downstream, sizeof(cfg->downstream)); bcopy(&priv->lower.cfg, &cfg->upstream, sizeof(cfg->upstream)); cfg->delay = priv->delay; cfg->overhead = priv->overhead; cfg->header_offset = priv->header_offset; if (cfg->upstream.bandwidth == cfg->downstream.bandwidth) { cfg->bandwidth = cfg->upstream.bandwidth; cfg->upstream.bandwidth = 0; cfg->downstream.bandwidth = 0; } else cfg->bandwidth = 0; break; case NGM_PIPE_SET_CFG: - cfg = (struct ng_pipe_cfg *)msg->data; + cfg = (struct ng_pipe_cfg *) msg->data; if (msg->header.arglen != sizeof(*cfg)) { error = EINVAL; break; } if (cfg->delay == -1) priv->delay = 0; else if (cfg->delay > 0 && cfg->delay < 10000000) priv->delay = cfg->delay; if (cfg->bandwidth == -1) { priv->upper.cfg.bandwidth = 0; priv->lower.cfg.bandwidth = 0; priv->overhead = 0; } else if (cfg->bandwidth >= 100 && cfg->bandwidth <= 1000000000) { priv->upper.cfg.bandwidth = cfg->bandwidth; priv->lower.cfg.bandwidth = cfg->bandwidth; if (cfg->bandwidth >= 10000000) priv->overhead = 8+4+12; /* Ethernet */ else priv->overhead = 10; /* HDLC */ } if (cfg->overhead == -1) priv->overhead = 0; - else if (cfg->overhead > 0 && cfg->overhead < 256) + else if (cfg->overhead > 0 && + cfg->overhead < MAX_OHSIZE) priv->overhead = cfg->overhead; if (cfg->header_offset == -1) priv->header_offset = 0; else if (cfg->header_offset > 0 && cfg->header_offset < 64) priv->header_offset = cfg->header_offset; parse_cfg(&priv->upper.cfg, &cfg->downstream, - &priv->upper, priv); + &priv->upper, priv); parse_cfg(&priv->lower.cfg, &cfg->upstream, - &priv->lower, priv); + &priv->lower, priv); break; default: error = EINVAL; break; } break; default: error = EINVAL; break; } NG_RESPOND_MSG(error, node, item, resp); NG_FREE_MSG(msg); - mtx_unlock(&ng_pipe_giant); - return (error); } static void parse_cfg(struct ng_pipe_hookcfg *current, struct ng_pipe_hookcfg *new, struct hookinfo *hinfo, priv_p priv) { if (new->ber == -1) { current->ber = 0; if (hinfo->ber_p) { free(hinfo->ber_p, M_NG_PIPE); hinfo->ber_p = NULL; } } else if (new->ber >= 1 && new->ber <= 1000000000000) { static const uint64_t one = 0x1000000000000; /* = 2^48 */ uint64_t p0, p; uint32_t fsize, i; if (hinfo->ber_p == NULL) - hinfo->ber_p = malloc(\ - (MAX_FSIZE + MAX_OHSIZE)*sizeof(uint64_t), \ - M_NG_PIPE, M_NOWAIT); + hinfo->ber_p = + malloc((MAX_FSIZE + MAX_OHSIZE) * sizeof(uint64_t), + M_NG_PIPE, M_NOWAIT); current->ber = new->ber; /* * For given BER and each frame size N (in bytes) calculate * the probability P_OK that the frame is clean: * * P_OK(BER,N) = (1 - 1/BER)^(N*8) * * We use a 64-bit fixed-point format with decimal point * positioned between bits 47 and 48. */ p0 = one - one / new->ber; p = one; for (fsize = 0; fsize < MAX_FSIZE + MAX_OHSIZE; fsize++) { hinfo->ber_p[fsize] = p; - for (i=0; i<8; i++) - p = (p*(p0&0xffff)>>48) + \ - (p*((p0>>16)&0xffff)>>32) + \ - (p*(p0>>32)>>16); + for (i = 0; i < 8; i++) + p = (p * (p0 & 0xffff) >> 48) + + (p * ((p0 >> 16) & 0xffff) >> 32) + + (p * (p0 >> 32) >> 16); } } if (new->qin_size_limit == -1) current->qin_size_limit = 0; else if (new->qin_size_limit >= 5) current->qin_size_limit = new->qin_size_limit; if (new->qout_size_limit == -1) current->qout_size_limit = 0; else if (new->qout_size_limit >= 5) current->qout_size_limit = new->qout_size_limit; if (new->duplicate == -1) current->duplicate = 0; else if (new->duplicate > 0 && new->duplicate <= 50) current->duplicate = new->duplicate; if (new->fifo) { current->fifo = 1; current->wfq = 0; current->drr = 0; } if (new->wfq) { current->fifo = 0; current->wfq = 1; current->drr = 0; } if (new->drr) { current->fifo = 0; current->wfq = 0; /* DRR quantum */ if (new->drr >= 32) current->drr = new->drr; else current->drr = 2048; /* default quantum */ } if (new->droptail) { current->droptail = 1; current->drophead = 0; } if (new->drophead) { current->droptail = 0; current->drophead = 1; } if (new->bandwidth == -1) { current->bandwidth = 0; current->fifo = 1; current->wfq = 0; current->drr = 0; } else if (new->bandwidth >= 100 && new->bandwidth <= 1000000000) current->bandwidth = new->bandwidth; if (current->bandwidth | priv->delay | current->duplicate | current->ber) hinfo->noqueue = 0; else hinfo->noqueue = 1; } /* * Compute a hash signature for a packet. This function suffers from the * NIH sindrome, so probably it would be wise to look around what other * folks have found out to be a good and efficient IP hash function... */ static int ip_hash(struct mbuf *m, int offset) { u_int64_t i; struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset); if (m->m_len < sizeof(struct ip) + offset || ip->ip_v != 4 || ip->ip_hl << 2 != sizeof(struct ip)) return 0; i = ((u_int64_t) ip->ip_src.s_addr ^ ((u_int64_t) ip->ip_src.s_addr << 13) ^ ((u_int64_t) ip->ip_dst.s_addr << 7) ^ ((u_int64_t) ip->ip_dst.s_addr << 19)); return (i ^ (i >> 32)); } /* * Receive data on a hook - both in upstream and downstream direction. * We put the frame on the inbound queue, and try to initiate dequeuing * sequence immediately. If inbound queue is full, discard one frame * depending on dropping policy (from the head or from the tail of the * queue). */ static int ngp_rcvdata(hook_p hook, item_p item) { struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook); const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook)); struct timeval uuptime; struct timeval *now = &uuptime; struct ngp_fifo *ngp_f = NULL, *ngp_f1; struct ngp_hdr *ngp_h = NULL; struct mbuf *m; - int hash; + int hash, plen; int error = 0; - if (hinfo->noqueue) { + /* + * Shortcut from inbound to outbound hook when neither of + * bandwidth, delay, BER or duplication probability is + * configured, nor we have queued frames to drain. + */ + if (hinfo->run.qin_frames == 0 && hinfo->run.qout_frames == 0 && + hinfo->noqueue) { struct hookinfo *dest; if (hinfo == &priv->lower) dest = &priv->upper; else dest = &priv->lower; + + /* Send the frame. */ + plen = NGI_M(item)->m_pkthdr.len; NG_FWD_ITEM_HOOK(error, item, dest->hook); - return error; + + /* Update stats. */ + if (error) { + hinfo->stats.out_disc_frames++; + hinfo->stats.out_disc_octets += plen; + } else { + hinfo->stats.fwd_frames++; + hinfo->stats.fwd_octets += plen; + } + + return (error); } - mtx_lock(&ng_pipe_giant); microuptime(now); /* - * Attach us to the list of active ng_pipes if this was an empty - * one before, and also update the queue service deadline time. + * If this was an empty queue, update service deadline time. */ if (hinfo->run.qin_frames == 0) { struct timeval *when = &hinfo->qin_utime; if (when->tv_sec < now->tv_sec || (when->tv_sec == now->tv_sec && when->tv_usec < now->tv_usec)) { when->tv_sec = now->tv_sec; when->tv_usec = now->tv_usec; } - if (hinfo->run.qout_frames == 0) - LIST_INSERT_HEAD(&active_head, hinfo, active_le); } /* Populate the packet header */ ngp_h = uma_zalloc(ngp_zone, M_NOWAIT); KASSERT((ngp_h != NULL), ("ngp_h zalloc failed (1)")); NGI_GET_M(item, m); KASSERT(m != NULL, ("NGI_GET_M failed")); ngp_h->m = m; NG_FREE_ITEM(item); if (hinfo->cfg.fifo) hash = 0; /* all packets go into a single FIFO queue */ else hash = ip_hash(m, priv->header_offset); /* Find the appropriate FIFO queue for the packet and enqueue it*/ TAILQ_FOREACH(ngp_f, &hinfo->fifo_head, fifo_le) if (hash == ngp_f->hash) break; if (ngp_f == NULL) { ngp_f = uma_zalloc(ngp_zone, M_NOWAIT); KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (2)")); TAILQ_INIT(&ngp_f->packet_head); ngp_f->hash = hash; ngp_f->packets = 1; ngp_f->rr_deficit = hinfo->cfg.drr; /* DRR quantum */ hinfo->run.fifo_queues++; TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link); FIFO_VTIME_SORT(m->m_pkthdr.len); } else { TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link); ngp_f->packets++; } hinfo->run.qin_frames++; hinfo->run.qin_octets += m->m_pkthdr.len; /* Discard a frame if inbound queue limit has been reached */ if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) { struct mbuf *m1; int longest = 0; /* Find the longest queue */ TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) if (ngp_f1->packets > longest) { longest = ngp_f1->packets; ngp_f = ngp_f1; } /* Drop a frame from the queue head/tail, depending on cfg */ if (hinfo->cfg.drophead) ngp_h = TAILQ_FIRST(&ngp_f->packet_head); else ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head); TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link); m1 = ngp_h->m; uma_zfree(ngp_zone, ngp_h); hinfo->run.qin_octets -= m1->m_pkthdr.len; hinfo->stats.in_disc_octets += m1->m_pkthdr.len; m_freem(m1); if (--(ngp_f->packets) == 0) { TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le); uma_zfree(ngp_zone, ngp_f); hinfo->run.fifo_queues--; } hinfo->run.qin_frames--; hinfo->stats.in_disc_frames++; } else if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) { struct mbuf *m1; int longest = 0; /* Find the longest queue */ TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) if (ngp_f1->packets > longest) { longest = ngp_f1->packets; ngp_f = ngp_f1; } /* Drop a frame from the queue head/tail, depending on cfg */ if (hinfo->cfg.drophead) ngp_h = TAILQ_FIRST(&ngp_f->packet_head); else ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head); TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link); m1 = ngp_h->m; uma_zfree(ngp_zone, ngp_h); hinfo->run.qin_octets -= m1->m_pkthdr.len; hinfo->stats.in_disc_octets += m1->m_pkthdr.len; m_freem(m1); if (--(ngp_f->packets) == 0) { TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le); uma_zfree(ngp_zone, ngp_f); hinfo->run.fifo_queues--; } hinfo->run.qin_frames--; hinfo->stats.in_disc_frames++; } /* - * Try to start the dequeuing process immediately. We must - * hold the ng_pipe_giant lock here and pipe_dequeue() will - * release it + * Try to start the dequeuing process immediately. */ pipe_dequeue(hinfo, now); return (0); } /* * Dequeueing sequence - we basically do the following: * 1) Try to extract the frame from the inbound (bandwidth) queue; * 2) In accordance to BER specified, discard the frame randomly; * 3) If the frame survives BER, prepend it with delay info and move it * to outbound (delay) queue; * 4) Loop to 2) until bandwidth quota for this timeslice is reached, or * inbound queue is flushed completely; - * 5) Extract the first frame from the outbound queue, if it's time has - * come. Queue the frame for transmission on the outbound hook; - * 6) Loop to 5) until outbound queue is flushed completely, or the next - * frame in the queue is not scheduled to be dequeued yet; - * 7) Transimit all frames queued in 5) - * - * Note: the caller must hold the ng_pipe_giant lock; this function - * returns with the lock released. + * 5) Dequeue frames from the outbound queue and send them downstream until + * outbound queue is flushed completely, or the next frame in the queue + * is not due to be dequeued yet */ static void pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) { static uint64_t rand, oldrand; - const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hinfo->hook)); + const node_p node = NG_HOOK_NODE(hinfo->hook); + const priv_p priv = NG_NODE_PRIVATE(node); struct hookinfo *dest; struct ngp_fifo *ngp_f, *ngp_f1; struct ngp_hdr *ngp_h; struct timeval *when; - struct mbuf *q_head = NULL; - struct mbuf *q_tail = NULL; struct mbuf *m; - int error = 0; + int plen, error = 0; /* Which one is the destination hook? */ if (hinfo == &priv->lower) dest = &priv->upper; else dest = &priv->lower; /* Bandwidth queue processing */ while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) { when = &hinfo->qin_utime; if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec && when->tv_usec > now->tv_usec)) break; ngp_h = TAILQ_FIRST(&ngp_f->packet_head); m = ngp_h->m; /* Deficit Round Robin (DRR) processing */ if (hinfo->cfg.drr) { if (ngp_f->rr_deficit >= m->m_pkthdr.len) { ngp_f->rr_deficit -= m->m_pkthdr.len; } else { ngp_f->rr_deficit += hinfo->cfg.drr; TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le); TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); continue; } } /* * Either create a duplicate and pass it on, or dequeue * the original packet... */ if (hinfo->cfg.duplicate && random() % 100 <= hinfo->cfg.duplicate) { ngp_h = uma_zalloc(ngp_zone, M_NOWAIT); KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (3)")); m = m_dup(m, M_NOWAIT); KASSERT(m != NULL, ("m_dup failed")); ngp_h->m = m; } else { TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link); hinfo->run.qin_frames--; hinfo->run.qin_octets -= m->m_pkthdr.len; ngp_f->packets--; } /* Calculate the serialization delay */ if (hinfo->cfg.bandwidth) { - hinfo->qin_utime.tv_usec += ((uint64_t) m->m_pkthdr.len - + priv->overhead ) * - 8000000 / hinfo->cfg.bandwidth; + hinfo->qin_utime.tv_usec += + ((uint64_t) m->m_pkthdr.len + priv->overhead ) * + 8000000 / hinfo->cfg.bandwidth; hinfo->qin_utime.tv_sec += - hinfo->qin_utime.tv_usec / 1000000; + hinfo->qin_utime.tv_usec / 1000000; hinfo->qin_utime.tv_usec = - hinfo->qin_utime.tv_usec % 1000000; + hinfo->qin_utime.tv_usec % 1000000; } when = &ngp_h->when; when->tv_sec = hinfo->qin_utime.tv_sec; when->tv_usec = hinfo->qin_utime.tv_usec; /* Sort / rearrange inbound queues */ if (ngp_f->packets) { if (hinfo->cfg.wfq) { TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le); FIFO_VTIME_SORT(TAILQ_FIRST( &ngp_f->packet_head)->m->m_pkthdr.len) } } else { TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le); uma_zfree(ngp_zone, ngp_f); hinfo->run.fifo_queues--; } /* Randomly discard the frame, according to BER setting */ if (hinfo->cfg.ber) { oldrand = rand; rand = random(); if (((oldrand ^ rand) << 17) >= hinfo->ber_p[priv->overhead + m->m_pkthdr.len]) { hinfo->stats.out_disc_frames++; hinfo->stats.out_disc_octets += m->m_pkthdr.len; uma_zfree(ngp_zone, ngp_h); m_freem(m); continue; } } /* Discard frame if outbound queue size limit exceeded */ if (hinfo->cfg.qout_size_limit && hinfo->run.qout_frames>=hinfo->cfg.qout_size_limit) { hinfo->stats.out_disc_frames++; hinfo->stats.out_disc_octets += m->m_pkthdr.len; uma_zfree(ngp_zone, ngp_h); m_freem(m); continue; } /* Calculate the propagation delay */ when->tv_usec += priv->delay; when->tv_sec += when->tv_usec / 1000000; when->tv_usec = when->tv_usec % 1000000; /* Put the frame into the delay queue */ TAILQ_INSERT_TAIL(&hinfo->qout_head, ngp_h, ngp_link); hinfo->run.qout_frames++; hinfo->run.qout_octets += m->m_pkthdr.len; } /* Delay queue processing */ while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) { - struct mbuf *m = ngp_h->m; - when = &ngp_h->when; + m = ngp_h->m; if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec && when->tv_usec > now->tv_usec)) break; /* Update outbound queue stats */ - hinfo->stats.fwd_frames++; - hinfo->stats.fwd_octets += m->m_pkthdr.len; + plen = m->m_pkthdr.len; hinfo->run.qout_frames--; - hinfo->run.qout_octets -= m->m_pkthdr.len; + hinfo->run.qout_octets -= plen; /* Dequeue the packet from qout */ TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link); uma_zfree(ngp_zone, ngp_h); - /* Enqueue locally for sending downstream */ - if (q_head == NULL) - q_head = m; - if (q_tail) - q_tail->m_nextpkt = m; - q_tail = m; - m->m_nextpkt = NULL; + NG_SEND_DATA(error, dest->hook, m, meta); + if (error) { + hinfo->stats.out_disc_frames++; + hinfo->stats.out_disc_octets += plen; + } else { + hinfo->stats.fwd_frames++; + hinfo->stats.fwd_octets += plen; + } } - /* If both queues are empty detach us from the list of active queues */ - if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0) { - LIST_REMOVE(hinfo, active_le); - active_gen_id++; + if ((hinfo->run.qin_frames != 0 || hinfo->run.qout_frames != 0) && + !priv->timer_scheduled) { + ng_callout(&priv->timer, node, NULL, 1, ngp_callout, NULL, 0); + priv->timer_scheduled = 1; } - - mtx_unlock(&ng_pipe_giant); - - while ((m = q_head) != NULL) { - q_head = m->m_nextpkt; - m->m_nextpkt = NULL; - NG_SEND_DATA(error, dest->hook, m, meta); - } } - /* - * This routine is called on every clock tick. We poll all nodes/hooks + * This routine is called on every clock tick. We poll connected hooks * for queued frames by calling pipe_dequeue(). */ static void -pipe_scheduler(void *arg) +ngp_callout(node_p node, hook_p hook, void *arg1, int arg2) { - pipe_poll(); - - /* Reschedule */ - callout_reset(&polling_timer, 1, &pipe_scheduler, NULL); -} - - -/* - * Traverse the list of all active hooks and attempt to dequeue - * some packets. Hooks with empty queues are not traversed since - * they are not linked into this list. - */ -static void -pipe_poll(void) -{ - struct hookinfo *hinfo; + const priv_p priv = NG_NODE_PRIVATE(node); struct timeval now; - int old_gen_id = active_gen_id; - - mtx_lock(&ng_pipe_giant); + + priv->timer_scheduled = 0; microuptime(&now); - LIST_FOREACH(hinfo, &active_head, active_le) { - CURVNET_SET(NG_HOOK_NODE(hinfo->hook)->nd_vnet); - pipe_dequeue(hinfo, &now); - CURVNET_RESTORE(); - mtx_lock(&ng_pipe_giant); - if (old_gen_id != active_gen_id) { - /* the list was updated; restart traversing */ - hinfo = LIST_FIRST(&active_head); - if (hinfo == NULL) - break; - old_gen_id = active_gen_id; - continue; - } - } - mtx_unlock(&ng_pipe_giant); + if (priv->upper.hook != NULL) + pipe_dequeue(&priv->upper, &now); + if (priv->lower.hook != NULL) + pipe_dequeue(&priv->lower, &now); } - /* * Shutdown processing * * This is tricky. If we have both a lower and upper hook, then we * probably want to extricate ourselves and leave the two peers * still linked to each other. Otherwise we should just shut down as * a normal node would. */ static int ngp_shutdown(node_p node) { const priv_p priv = NG_NODE_PRIVATE(node); + if (priv->timer_scheduled) + ng_uncallout(&priv->timer, node); if (priv->lower.hook && priv->upper.hook) ng_bypass(priv->lower.hook, priv->upper.hook); else { if (priv->upper.hook != NULL) ng_rmhook_self(priv->upper.hook); if (priv->lower.hook != NULL) ng_rmhook_self(priv->lower.hook); } NG_NODE_UNREF(node); free(priv, M_NG_PIPE); return (0); } /* * Hook disconnection */ static int ngp_disconnect(hook_p hook) { struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook); struct ngp_fifo *ngp_f; struct ngp_hdr *ngp_h; - int removed = 0; - mtx_lock(&ng_pipe_giant); - KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__)); hinfo->hook = NULL; /* Flush all fifo queues associated with the hook */ while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) { while ((ngp_h = TAILQ_FIRST(&ngp_f->packet_head))) { TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link); m_freem(ngp_h->m); uma_zfree(ngp_zone, ngp_h); - removed++; } TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le); uma_zfree(ngp_zone, ngp_f); } /* Flush the delay queue */ while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) { TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link); m_freem(ngp_h->m); uma_zfree(ngp_zone, ngp_h); - removed++; } - /* - * Both queues should be empty by now, so detach us from - * the list of active queues - */ - if (removed) { - LIST_REMOVE(hinfo, active_le); - active_gen_id++; - } - if (hinfo->run.qin_frames + hinfo->run.qout_frames != removed) - printf("Mismatch: queued=%d but removed=%d !?!", - hinfo->run.qin_frames + hinfo->run.qout_frames, removed); - /* Release the packet loss probability table (BER) */ if (hinfo->ber_p) free(hinfo->ber_p, M_NG_PIPE); - mtx_unlock(&ng_pipe_giant); - return (0); } static int ngp_modevent(module_t mod, int type, void *unused) { int error = 0; switch (type) { case MOD_LOAD: ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr), sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); if (ngp_zone == NULL) panic("ng_pipe: couldn't allocate descriptor zone"); - - mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL, MTX_DEF); - LIST_INIT(&active_head); - callout_init(&polling_timer, CALLOUT_MPSAFE); - callout_reset(&polling_timer, 1, &pipe_scheduler, NULL); break; case MOD_UNLOAD: - callout_drain(&polling_timer); uma_zdestroy(ngp_zone); - mtx_destroy(&ng_pipe_giant); break; default: error = EOPNOTSUPP; break; } return (error); } Index: projects/binutils-2.17/sys/netinet/ip_carp.c =================================================================== --- projects/binutils-2.17/sys/netinet/ip_carp.c (revision 215829) +++ projects/binutils-2.17/sys/netinet/ip_carp.c (revision 215830) @@ -1,2423 +1,2426 @@ /* * Copyright (c) 2002 Michael Shalayeff. All rights reserved. * Copyright (c) 2003 Ryan McBride. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #include #include #include #endif #ifdef INET6 #include #include #include #include #include #include #endif #include #include #define CARP_IFNAME "carp" static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); SYSCTL_DECL(_net_inet_carp); struct carp_softc { struct ifnet *sc_ifp; /* Interface clue */ struct ifnet *sc_carpdev; /* Pointer to parent interface */ struct in_ifaddr *sc_ia; /* primary iface address */ struct ip_moptions sc_imo; #ifdef INET6 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ struct ip6_moptions sc_im6o; #endif /* INET6 */ TAILQ_ENTRY(carp_softc) sc_list; enum { INIT = 0, BACKUP, MASTER } sc_state; int sc_flags_backup; int sc_suppress; int sc_sendad_errors; #define CARP_SENDAD_MAX_ERRORS 3 int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 int sc_vhid; int sc_advskew; int sc_naddrs; int sc_naddrs6; int sc_advbase; /* seconds */ int sc_init_counter; u_int64_t sc_counter; /* authentication */ #define CARP_HMAC_PAD 64 unsigned char sc_key[CARP_KEY_LEN]; unsigned char sc_pad[CARP_HMAC_PAD]; SHA1_CTX sc_sha1; struct callout sc_ad_tmo; /* advertisement timeout */ struct callout sc_md_tmo; /* master down timeout */ struct callout sc_md6_tmo; /* master down timeout */ LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ }; #define SC2IFP(sc) ((sc)->sc_ifp) int carp_suppress_preempt = 0; int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, &carp_suppress_preempt, 0, "Preemption is suppressed"); struct carpstats carpstats; SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, &carpstats, carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); struct carp_if { TAILQ_HEAD(, carp_softc) vhif_vrs; int vhif_nvrs; struct ifnet *vhif_ifp; struct mtx vhif_mtx; }; #define CARP_INET 0 #define CARP_INET6 1 static int proto_reg[] = {-1, -1}; /* Get carp_if from softc. Valid after carp_set_addr{,6}. */ #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp) /* lock per carp_if queue */ #define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \ NULL, MTX_DEF) #define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx) #define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED) #define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx) #define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx) #define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx) #define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx) #define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED) #define CARP_LOG(...) do { \ if (carp_opts[CARPCTL_LOG] > 0) \ log(LOG_INFO, __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do { \ if (carp_opts[CARPCTL_LOG] > 1) \ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) static void carp_hmac_prepare(struct carp_softc *); static void carp_hmac_generate(struct carp_softc *, u_int32_t *, unsigned char *); static int carp_hmac_verify(struct carp_softc *, u_int32_t *, unsigned char *); static void carp_setroute(struct carp_softc *, int); static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); static int carp_clone_create(struct if_clone *, int, caddr_t); static void carp_clone_destroy(struct ifnet *); static void carpdetach(struct carp_softc *, int); static int carp_prepare_ad(struct mbuf *, struct carp_softc *, struct carp_header *); static void carp_send_ad_all(void); static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); static void carp_send_arp(struct carp_softc *); static void carp_master_down(void *); static void carp_master_down_locked(struct carp_softc *); static int carp_ioctl(struct ifnet *, u_long, caddr_t); static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct route *); static void carp_start(struct ifnet *); static void carp_setrun(struct carp_softc *, sa_family_t); static void carp_set_state(struct carp_softc *, int); static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int); enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; -static void carp_multicast_cleanup(struct carp_softc *); +static void carp_multicast_cleanup(struct carp_softc *, int dofree); static int carp_set_addr(struct carp_softc *, struct sockaddr_in *); static int carp_del_addr(struct carp_softc *, struct sockaddr_in *); static void carp_carpdev_state_locked(struct carp_if *); static void carp_sc_state_locked(struct carp_softc *); #ifdef INET6 static void carp_send_na(struct carp_softc *); static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); -static void carp_multicast6_cleanup(struct carp_softc *); +static void carp_multicast6_cleanup(struct carp_softc *, int dofree); #endif static LIST_HEAD(, carp_softc) carpif_list; static struct mtx carp_mtx; IFC_SIMPLE_DECLARE(carp, 0); static eventhandler_tag if_detach_event_tag; static __inline u_int16_t carp_cksum(struct mbuf *m, int len) { return (in_cksum(m, len)); } static void carp_hmac_prepare(struct carp_softc *sc) { u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; u_int8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; #ifdef INET struct in_addr last, cur, in; #endif #ifdef INET6 struct in6_addr last6, cur6, in6; #endif if (sc->sc_carpdev) CARP_SCLOCK(sc); /* XXX: possible race here */ /* compute ipad from key */ bzero(sc->sc_pad, sizeof(sc->sc_pad)); bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36; /* precompute first part of inner hash */ SHA1Init(&sc->sc_sha1); SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); #ifdef INET cur.s_addr = 0; do { found = 0; last = cur; cur.s_addr = 0xffffffff; IF_ADDR_LOCK(SC2IFP(sc)); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; if (ifa->ifa_addr->sa_family == AF_INET && ntohl(in.s_addr) > ntohl(last.s_addr) && ntohl(in.s_addr) < ntohl(cur.s_addr)) { cur.s_addr = in.s_addr; found++; } } IF_ADDR_UNLOCK(SC2IFP(sc)); if (found) SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); } while (found); #endif /* INET */ #ifdef INET6 memset(&cur6, 0, sizeof(cur6)); do { found = 0; last6 = cur6; memset(&cur6, 0xff, sizeof(cur6)); IF_ADDR_LOCK(SC2IFP(sc)); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { in6 = ifatoia6(ifa)->ia_addr.sin6_addr; if (IN6_IS_SCOPE_EMBED(&in6)) in6.s6_addr16[1] = 0; if (ifa->ifa_addr->sa_family == AF_INET6 && memcmp(&in6, &last6, sizeof(in6)) > 0 && memcmp(&in6, &cur6, sizeof(in6)) < 0) { cur6 = in6; found++; } } IF_ADDR_UNLOCK(SC2IFP(sc)); if (found) SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); } while (found); #endif /* INET6 */ /* convert ipad to opad */ for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36 ^ 0x5c; if (sc->sc_carpdev) CARP_SCUNLOCK(sc); } static void carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2], unsigned char md[20]) { SHA1_CTX sha1ctx; /* fetch first half of inner hash */ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); SHA1Final(md, &sha1ctx); /* outer hash */ SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sha1ctx, md, 20); SHA1Final(md, &sha1ctx); } static int carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2], unsigned char md[20]) { unsigned char md2[20]; CARP_SCLOCK_ASSERT(sc); carp_hmac_generate(sc, counter, md2); return (bcmp(md, md2, sizeof(md2))); } static void carp_setroute(struct carp_softc *sc, int cmd) { struct ifaddr *ifa; int s; if (sc->sc_carpdev) CARP_SCLOCK_ASSERT(sc); s = splnet(); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == AF_INET && sc->sc_carpdev != NULL) { int count = carp_addrcount( (struct carp_if *)sc->sc_carpdev->if_carp, ifatoia(ifa), CARP_COUNT_MASTER); if ((cmd == RTM_ADD && count == 1) || (cmd == RTM_DELETE && count == 0)) rtinit(ifa, cmd, RTF_UP | RTF_HOST); } } splx(s); } static int carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct carp_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); ifp = SC2IFP(sc) = if_alloc(IFT_ETHER); if (ifp == NULL) { free(sc, M_CARP); return (ENOSPC); } sc->sc_flags_backup = 0; sc->sc_suppress = 0; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_advskew = 0; sc->sc_init_counter = 1; sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */ sc->sc_imo.imo_membership = (struct in_multi **)malloc( (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, M_WAITOK); sc->sc_imo.imo_mfilters = NULL; sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; sc->sc_imo.imo_multicast_vif = -1; #ifdef INET6 sc->sc_im6o.im6o_membership = (struct in6_multi **)malloc( (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, M_WAITOK); sc->sc_im6o.im6o_mfilters = NULL; sc->sc_im6o.im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; #endif callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE); callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE); callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE); ifp->if_softc = sc; if_initname(ifp, CARP_IFNAME, unit); ifp->if_mtu = ETHERMTU; ifp->if_flags = IFF_LOOPBACK; ifp->if_ioctl = carp_ioctl; ifp->if_output = carp_looutput; ifp->if_start = carp_start; ifp->if_type = IFT_CARP; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = 0; if_attach(ifp); bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t)); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carpif_list, sc, sc_next); mtx_unlock(&carp_mtx); return (0); } static void carp_clone_destroy(struct ifnet *ifp) { struct carp_softc *sc = ifp->if_softc; if (sc->sc_carpdev) CARP_SCLOCK(sc); carpdetach(sc, 1); /* Returns unlocked. */ mtx_lock(&carp_mtx); LIST_REMOVE(sc, sc_next); mtx_unlock(&carp_mtx); bpfdetach(ifp); if_detach(ifp); if_free_type(ifp, IFT_ETHER); free(sc->sc_imo.imo_membership, M_CARP); #ifdef INET6 free(sc->sc_im6o.im6o_membership, M_CARP); #endif free(sc, M_CARP); } /* * This function can be called on CARP interface destroy path, * and in case of the removal of the underlying interface as - * well. We differentiate these two cases. In the latter case - * we do not cleanup our multicast memberships, since they - * are already freed. Also, in the latter case we do not + * well. We differentiate these two cases: in case of destruction + * of the underlying interface, we do not cleanup our multicast + * memberships, since they are already freed. But we purge pointers + * to multicast structures, since they are no longer valid, to + * avoid panic in future calls to carpdetach(). Also, we do not * release the lock on return, because the function will be * called once more, for another CARP instance on the same * interface. */ static void carpdetach(struct carp_softc *sc, int unlock) { struct carp_if *cif; callout_stop(&sc->sc_ad_tmo); callout_stop(&sc->sc_md_tmo); callout_stop(&sc->sc_md6_tmo); if (sc->sc_suppress) carp_suppress_preempt--; sc->sc_suppress = 0; if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) carp_suppress_preempt--; sc->sc_sendad_errors = 0; carp_set_state(sc, INIT); SC2IFP(sc)->if_flags &= ~IFF_UP; carp_setrun(sc, 0); - if (unlock) - carp_multicast_cleanup(sc); + carp_multicast_cleanup(sc, unlock); #ifdef INET6 - carp_multicast6_cleanup(sc); + carp_multicast6_cleanup(sc, unlock); #endif if (sc->sc_carpdev != NULL) { cif = (struct carp_if *)sc->sc_carpdev->if_carp; CARP_LOCK_ASSERT(cif); TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); if (!--cif->vhif_nvrs) { ifpromisc(sc->sc_carpdev, 0); sc->sc_carpdev->if_carp = NULL; CARP_LOCK_DESTROY(cif); free(cif, M_CARP); } else if (unlock) CARP_UNLOCK(cif); sc->sc_carpdev = NULL; } } /* Detach an interface from the carp. */ static void carp_ifdetach(void *arg __unused, struct ifnet *ifp) { struct carp_if *cif = (struct carp_if *)ifp->if_carp; struct carp_softc *sc, *nextsc; if (cif == NULL) return; /* * XXX: At the end of for() cycle the lock will be destroyed. */ CARP_LOCK(cif); for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { nextsc = TAILQ_NEXT(sc, sc_list); carpdetach(sc, 0); } } /* * process input packet. * we have rearranged checks order compared to the rfc, * but it seems more efficient this way or not possible otherwise. */ void carp_input(struct mbuf *m, int hlen) { struct ip *ip = mtod(m, struct ip *); struct carp_header *ch; int iplen, len; CARPSTATS_INC(carps_ipackets); if (!carp_opts[CARPCTL_ALLOW]) { m_freem(m); return; } /* check if received on a valid carp interface */ if (m->m_pkthdr.rcvif->if_carp == NULL) { CARPSTATS_INC(carps_badif); CARP_DEBUG("carp_input: packet received on non-carp " "interface: %s\n", m->m_pkthdr.rcvif->if_xname); m_freem(m); return; } /* verify that the IP TTL is 255. */ if (ip->ip_ttl != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("carp_input: received ttl %d != 255 on %s\n", ip->ip_ttl, m->m_pkthdr.rcvif->if_xname); m_freem(m); return; } iplen = ip->ip_hl << 2; if (m->m_pkthdr.len < iplen + sizeof(*ch)) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("carp_input: received len %zd < " "sizeof(struct carp_header) on %s\n", m->m_len - sizeof(struct ip), m->m_pkthdr.rcvif->if_xname); m_freem(m); return; } if (iplen + sizeof(*ch) < m->m_len) { if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("carp_input: pullup failed\n"); return; } ip = mtod(m, struct ip *); } ch = (struct carp_header *)((char *)ip + iplen); /* * verify that the received packet length is * equal to the CARP header */ len = iplen + sizeof(*ch); if (len > m->m_pkthdr.len) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("carp_input: packet too short %d on %s\n", m->m_pkthdr.len, m->m_pkthdr.rcvif->if_xname); m_freem(m); return; } if ((m = m_pullup(m, len)) == NULL) { CARPSTATS_INC(carps_hdrops); return; } ip = mtod(m, struct ip *); ch = (struct carp_header *)((char *)ip + iplen); /* verify the CARP checksum */ m->m_data += iplen; if (carp_cksum(m, len - iplen)) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("carp_input: checksum failed on %s\n", m->m_pkthdr.rcvif->if_xname); m_freem(m); return; } m->m_data -= iplen; carp_input_c(m, ch, AF_INET); } #ifdef INET6 int carp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct carp_header *ch; u_int len; CARPSTATS_INC(carps_ipackets6); if (!carp_opts[CARPCTL_ALLOW]) { m_freem(m); return (IPPROTO_DONE); } /* check if received on a valid carp interface */ if (m->m_pkthdr.rcvif->if_carp == NULL) { CARPSTATS_INC(carps_badif); CARP_DEBUG("carp6_input: packet received on non-carp " "interface: %s\n", m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } /* verify that the IP TTL is 255 */ if (ip6->ip6_hlim != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("carp6_input: received ttl %d != 255 on %s\n", ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } /* verify that we have a complete carp packet */ len = m->m_len; IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); if (ch == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("carp6_input: packet size %u too small\n", len); return (IPPROTO_DONE); } /* verify the CARP checksum */ m->m_data += *offp; if (carp_cksum(m, sizeof(*ch))) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("carp6_input: checksum failed, on %s\n", m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } m->m_data -= *offp; carp_input_c(m, ch, AF_INET6); return (IPPROTO_DONE); } #endif /* INET6 */ static void carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct carp_softc *sc; u_int64_t tmp_counter; struct timeval sc_tv, ch_tv; /* verify that the VHID is valid on the receiving interface */ CARP_LOCK(ifp->if_carp); TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) if (sc->sc_vhid == ch->carp_vhid) break; if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { CARPSTATS_INC(carps_badvhid); CARP_UNLOCK(ifp->if_carp); m_freem(m); return; } getmicrotime(&SC2IFP(sc)->if_lastchange); SC2IFP(sc)->if_ipackets++; SC2IFP(sc)->if_ibytes += m->m_pkthdr.len; if (bpf_peers_present(SC2IFP(sc)->if_bpf)) { struct ip *ip = mtod(m, struct ip *); uint32_t af1 = af; /* BPF wants net byte order */ ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); ip->ip_off = htons(ip->ip_off); bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m); } /* verify the CARP version. */ if (ch->carp_version != CARP_VERSION) { CARPSTATS_INC(carps_badver); SC2IFP(sc)->if_ierrors++; CARP_UNLOCK(ifp->if_carp); CARP_DEBUG("%s; invalid version %d\n", SC2IFP(sc)->if_xname, ch->carp_version); m_freem(m); return; } /* verify the hash */ if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { CARPSTATS_INC(carps_badauth); SC2IFP(sc)->if_ierrors++; CARP_UNLOCK(ifp->if_carp); CARP_DEBUG("%s: incorrect hash\n", SC2IFP(sc)->if_xname); m_freem(m); return; } tmp_counter = ntohl(ch->carp_counter[0]); tmp_counter = tmp_counter<<32; tmp_counter += ntohl(ch->carp_counter[1]); /* XXX Replay protection goes here */ sc->sc_init_counter = 0; sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; if (carp_suppress_preempt && sc->sc_advskew < 240) sc_tv.tv_usec = 240 * 1000000 / 256; else sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; ch_tv.tv_sec = ch->carp_advbase; ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; switch (sc->sc_state) { case INIT: break; case MASTER: /* * If we receive an advertisement from a master who's going to * be more frequent than us, go into BACKUP state. */ if (timevalcmp(&sc_tv, &ch_tv, >) || timevalcmp(&sc_tv, &ch_tv, ==)) { callout_stop(&sc->sc_ad_tmo); CARP_LOG("%s: MASTER -> BACKUP " "(more frequent advertisement received)\n", SC2IFP(sc)->if_xname); carp_set_state(sc, BACKUP); carp_setrun(sc, 0); carp_setroute(sc, RTM_DELETE); } break; case BACKUP: /* * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ if (carp_opts[CARPCTL_PREEMPT] && timevalcmp(&sc_tv, &ch_tv, <)) { CARP_LOG("%s: BACKUP -> MASTER " "(preempting a slower master)\n", SC2IFP(sc)->if_xname); carp_master_down_locked(sc); break; } /* * If the master is going to advertise at such a low frequency * that he's guaranteed to time out, we'd might as well just * treat him as timed out now. */ sc_tv.tv_sec = sc->sc_advbase * 3; if (timevalcmp(&sc_tv, &ch_tv, <)) { CARP_LOG("%s: BACKUP -> MASTER " "(master timed out)\n", SC2IFP(sc)->if_xname); carp_master_down_locked(sc); break; } /* * Otherwise, we reset the counter and wait for the next * advertisement. */ carp_setrun(sc, af); break; } CARP_UNLOCK(ifp->if_carp); m_freem(m); return; } static int carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { struct m_tag *mtag; struct ifnet *ifp = SC2IFP(sc); if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); } else sc->sc_counter++; ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); /* Tag packet for carp_output */ mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) { m_freem(m); SC2IFP(sc)->if_oerrors++; return (ENOMEM); } bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); m_tag_prepend(m, mtag); return (0); } static void carp_send_ad_all(void) { struct carp_softc *sc; mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carpif_list, sc_next) { if (sc->sc_carpdev == NULL) continue; CARP_SCLOCK(sc); if ((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) && sc->sc_state == MASTER) carp_send_ad_locked(sc); CARP_SCUNLOCK(sc); } mtx_unlock(&carp_mtx); } static void carp_send_ad(void *v) { struct carp_softc *sc = v; CARP_SCLOCK(sc); carp_send_ad_locked(sc); CARP_SCUNLOCK(sc); } static void carp_send_ad_locked(struct carp_softc *sc) { struct carp_header ch; struct timeval tv; struct carp_header *ch_ptr; struct mbuf *m; int len, advbase, advskew; CARP_SCLOCK_ASSERT(sc); /* bow out if we've lost our UPness or RUNNINGuiness */ if (!((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) { advbase = 255; advskew = 255; } else { advbase = sc->sc_advbase; if (!carp_suppress_preempt || sc->sc_advskew > 240) advskew = sc->sc_advskew; else advskew = 240; tv.tv_sec = advbase; tv.tv_usec = advskew * 1000000 / 256; } ch.carp_version = CARP_VERSION; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = advbase; ch.carp_advskew = advskew; ch.carp_authlen = 7; /* XXX DEFINE */ ch.carp_pad1 = 0; /* must be zero */ ch.carp_cksum = 0; #ifdef INET if (sc->sc_ia) { struct ip *ip; MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == NULL) { SC2IFP(sc)->if_oerrors++; CARPSTATS_INC(carps_onomem); /* XXX maybe less ? */ if (advbase != 255 || advskew != 255) callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); return; } len = sizeof(*ip) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; MH_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = IPTOS_LOWDELAY; ip->ip_len = len; ip->ip_id = ip_newid(); ip->ip_off = IP_DF; ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr; ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); ch_ptr = (struct carp_header *)(&ip[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) return; m->m_data += sizeof(*ip); ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); m->m_data -= sizeof(*ip); getmicrotime(&SC2IFP(sc)->if_lastchange); SC2IFP(sc)->if_opackets++; SC2IFP(sc)->if_obytes += len; CARPSTATS_INC(carps_opackets); if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { SC2IFP(sc)->if_oerrors++; if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { carp_suppress_preempt++; if (carp_suppress_preempt == 1) { CARP_SCUNLOCK(sc); carp_send_ad_all(); CARP_SCLOCK(sc); } } sc->sc_sendad_success = 0; } else { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { carp_suppress_preempt--; sc->sc_sendad_errors = 0; } } else sc->sc_sendad_errors = 0; } } #endif /* INET */ #ifdef INET6 if (sc->sc_ia6) { struct ip6_hdr *ip6; MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == NULL) { SC2IFP(sc)->if_oerrors++; CARPSTATS_INC(carps_onomem); /* XXX maybe less ? */ if (advbase != 255 || advskew != 255) callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); return; } len = sizeof(*ip6) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; MH_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip6 = mtod(m, struct ip6_hdr *); bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_hlim = CARP_DFLTTL; ip6->ip6_nxt = IPPROTO_CARP; bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr)); /* set the multicast destination */ ip6->ip6_dst.s6_addr16[0] = htons(0xff02); ip6->ip6_dst.s6_addr8[15] = 0x12; if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { SC2IFP(sc)->if_oerrors++; m_freem(m); CARP_DEBUG("%s: in6_setscope failed\n", __func__); return; } ch_ptr = (struct carp_header *)(&ip6[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) return; m->m_data += sizeof(*ip6); ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); m->m_data -= sizeof(*ip6); getmicrotime(&SC2IFP(sc)->if_lastchange); SC2IFP(sc)->if_opackets++; SC2IFP(sc)->if_obytes += len; CARPSTATS_INC(carps_opackets6); if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { SC2IFP(sc)->if_oerrors++; if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { carp_suppress_preempt++; if (carp_suppress_preempt == 1) { CARP_SCUNLOCK(sc); carp_send_ad_all(); CARP_SCLOCK(sc); } } sc->sc_sendad_success = 0; } else { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { carp_suppress_preempt--; sc->sc_sendad_errors = 0; } } else sc->sc_sendad_errors = 0; } } #endif /* INET6 */ if (advbase != 255 || advskew != 255) callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); } /* * Broadcast a gratuitous ARP request containing * the virtual router MAC address for each IP address * associated with the virtual router. */ static void carp_send_arp(struct carp_softc *sc) { struct ifaddr *ifa; TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET) continue; /* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */ arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp)); DELAY(1000); /* XXX */ } } #ifdef INET6 static void carp_send_na(struct carp_softc *sc) { struct ifaddr *ifa; struct in6_addr *in6; static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; nd6_na_output(sc->sc_carpdev, &mcast, in6, ND_NA_FLAG_OVERRIDE, 1, NULL); DELAY(1000); /* XXX */ } } #endif /* INET6 */ static int carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type) { struct carp_softc *vh; struct ifaddr *ifa; int count = 0; CARP_LOCK_ASSERT(cif); TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { if ((type == CARP_COUNT_RUNNING && (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) || (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) { IF_ADDR_LOCK(SC2IFP(vh)); TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == AF_INET && ia->ia_addr.sin_addr.s_addr == ifatoia(ifa)->ia_addr.sin_addr.s_addr) count++; } IF_ADDR_UNLOCK(SC2IFP(vh)); } } return (count); } int carp_iamatch(struct ifnet *ifp, struct in_ifaddr *ia, struct in_addr *isaddr, u_int8_t **enaddr) { struct carp_if *cif; struct carp_softc *vh; int index, count = 0; struct ifaddr *ifa; cif = ifp->if_carp; CARP_LOCK(cif); if (carp_opts[CARPCTL_ARPBALANCE]) { /* * XXX proof of concept implementation. * We use the source ip to decide which virtual host should * handle the request. If we're master of that virtual host, * then we respond, otherwise, just drop the arp packet on * the floor. */ count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING); if (count == 0) { /* should never reach this */ CARP_UNLOCK(cif); return (0); } /* this should be a hash, like pf_hash() */ index = ntohl(isaddr->s_addr) % count; count = 0; TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) { IF_ADDR_LOCK(SC2IFP(vh)); TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == AF_INET && ia->ia_addr.sin_addr.s_addr == ifatoia(ifa)->ia_addr.sin_addr.s_addr) { if (count == index) { if (vh->sc_state == MASTER) { *enaddr = IF_LLADDR(vh->sc_ifp); IF_ADDR_UNLOCK(SC2IFP(vh)); CARP_UNLOCK(cif); return (1); } else { IF_ADDR_UNLOCK(SC2IFP(vh)); CARP_UNLOCK(cif); return (0); } } count++; } } IF_ADDR_UNLOCK(SC2IFP(vh)); } } } else { TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && ia->ia_ifp == SC2IFP(vh) && vh->sc_state == MASTER) { *enaddr = IF_LLADDR(vh->sc_ifp); CARP_UNLOCK(cif); return (1); } } } CARP_UNLOCK(cif); return (0); } #ifdef INET6 struct ifaddr * carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) { struct carp_if *cif; struct carp_softc *vh; struct ifaddr *ifa; cif = ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { IF_ADDR_LOCK(SC2IFP(vh)); TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) { if (IN6_ARE_ADDR_EQUAL(taddr, &ifatoia6(ifa)->ia_addr.sin6_addr) && (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && vh->sc_state == MASTER) { ifa_ref(ifa); IF_ADDR_UNLOCK(SC2IFP(vh)); CARP_UNLOCK(cif); return (ifa); } } IF_ADDR_UNLOCK(SC2IFP(vh)); } CARP_UNLOCK(cif); return (NULL); } caddr_t carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) { struct m_tag *mtag; struct carp_if *cif; struct carp_softc *sc; struct ifaddr *ifa; cif = ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { IF_ADDR_LOCK(SC2IFP(sc)); TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) { if (IN6_ARE_ADDR_EQUAL(taddr, &ifatoia6(ifa)->ia_addr.sin6_addr) && (SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) { struct ifnet *ifp = SC2IFP(sc); mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) { /* better a bit than nothing */ IF_ADDR_UNLOCK(SC2IFP(sc)); CARP_UNLOCK(cif); return (IF_LLADDR(sc->sc_ifp)); } bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); m_tag_prepend(m, mtag); IF_ADDR_UNLOCK(SC2IFP(sc)); CARP_UNLOCK(cif); return (IF_LLADDR(sc->sc_ifp)); } } IF_ADDR_UNLOCK(SC2IFP(sc)); } CARP_UNLOCK(cif); return (NULL); } #endif struct ifnet * carp_forus(struct ifnet *ifp, u_char *dhost) { struct carp_if *cif; struct carp_softc *vh; u_int8_t *ena = dhost; if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (NULL); cif = ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) && vh->sc_state == MASTER && !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) { CARP_UNLOCK(cif); return (SC2IFP(vh)); } CARP_UNLOCK(cif); return (NULL); } static void carp_master_down(void *v) { struct carp_softc *sc = v; CARP_SCLOCK(sc); carp_master_down_locked(sc); CARP_SCUNLOCK(sc); } static void carp_master_down_locked(struct carp_softc *sc) { if (sc->sc_carpdev) CARP_SCLOCK_ASSERT(sc); switch (sc->sc_state) { case INIT: printf("%s: master_down event in INIT state\n", SC2IFP(sc)->if_xname); break; case MASTER: break; case BACKUP: carp_set_state(sc, MASTER); carp_send_ad_locked(sc); carp_send_arp(sc); #ifdef INET6 carp_send_na(sc); #endif /* INET6 */ carp_setrun(sc, 0); carp_setroute(sc, RTM_ADD); break; } } /* * When in backup state, af indicates whether to reset the master down timer * for v4 or v6. If it's set to zero, reset the ones which are already pending. */ static void carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; if (sc->sc_carpdev == NULL) { SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; carp_set_state(sc, INIT); return; } else CARP_SCLOCK_ASSERT(sc); if (SC2IFP(sc)->if_flags & IFF_UP && sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6) && sc->sc_carpdev->if_link_state == LINK_STATE_UP) SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; else { SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; carp_setroute(sc, RTM_DELETE); return; } switch (sc->sc_state) { case INIT: if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { carp_send_ad_locked(sc); carp_send_arp(sc); #ifdef INET6 carp_send_na(sc); #endif /* INET6 */ CARP_LOG("%s: INIT -> MASTER (preempting)\n", SC2IFP(sc)->if_xname); carp_set_state(sc, MASTER); carp_setroute(sc, RTM_ADD); } else { CARP_LOG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname); carp_set_state(sc, BACKUP); carp_setroute(sc, RTM_DELETE); carp_setrun(sc, 0); } break; case BACKUP: callout_stop(&sc->sc_ad_tmo); tv.tv_sec = 3 * sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; switch (af) { #ifdef INET case AF_INET: callout_reset(&sc->sc_md_tmo, tvtohz(&tv), carp_master_down, sc); break; #endif /* INET */ #ifdef INET6 case AF_INET6: callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), carp_master_down, sc); break; #endif /* INET6 */ default: if (sc->sc_naddrs) callout_reset(&sc->sc_md_tmo, tvtohz(&tv), carp_master_down, sc); if (sc->sc_naddrs6) callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), carp_master_down, sc); break; } break; case MASTER: tv.tv_sec = sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); break; } } static void -carp_multicast_cleanup(struct carp_softc *sc) +carp_multicast_cleanup(struct carp_softc *sc, int dofree) { struct ip_moptions *imo = &sc->sc_imo; u_int16_t n = imo->imo_num_memberships; /* Clean up our own multicast memberships */ while (n-- > 0) { if (imo->imo_membership[n] != NULL) { - in_delmulti(imo->imo_membership[n]); + if (dofree) + in_delmulti(imo->imo_membership[n]); imo->imo_membership[n] = NULL; } } KASSERT(imo->imo_mfilters == NULL, ("%s: imo_mfilters != NULL", __func__)); imo->imo_num_memberships = 0; imo->imo_multicast_ifp = NULL; } #ifdef INET6 static void -carp_multicast6_cleanup(struct carp_softc *sc) +carp_multicast6_cleanup(struct carp_softc *sc, int dofree) { struct ip6_moptions *im6o = &sc->sc_im6o; u_int16_t n = im6o->im6o_num_memberships; while (n-- > 0) { if (im6o->im6o_membership[n] != NULL) { - in6_mc_leave(im6o->im6o_membership[n], NULL); + if (dofree) + in6_mc_leave(im6o->im6o_membership[n], NULL); im6o->im6o_membership[n] = NULL; } } KASSERT(im6o->im6o_mfilters == NULL, ("%s: im6o_mfilters != NULL", __func__)); im6o->im6o_num_memberships = 0; im6o->im6o_multicast_ifp = NULL; } #endif static int carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) { struct ifnet *ifp; struct carp_if *cif; struct in_ifaddr *ia, *ia_if; struct ip_moptions *imo = &sc->sc_imo; struct in_addr addr; u_long iaddr = htonl(sin->sin_addr.s_addr); int own, error; if (sin->sin_addr.s_addr == 0) { if (!(SC2IFP(sc)->if_flags & IFF_UP)) carp_set_state(sc, INIT); if (sc->sc_naddrs) SC2IFP(sc)->if_flags |= IFF_UP; if (sc->sc_carpdev) CARP_SCLOCK(sc); carp_setrun(sc, 0); if (sc->sc_carpdev) CARP_SCUNLOCK(sc); return (0); } /* we have to do it by hands to check we won't match on us */ ia_if = NULL; own = 0; IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { /* and, yeah, we need a multicast-capable iface too */ if (ia->ia_ifp != SC2IFP(sc) && (ia->ia_ifp->if_flags & IFF_MULTICAST) && (iaddr & ia->ia_subnetmask) == ia->ia_subnet) { if (!ia_if) ia_if = ia; if (sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) own++; } } if (!ia_if) { IN_IFADDR_RUNLOCK(); return (EADDRNOTAVAIL); } ia = ia_if; ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); ifp = ia->ia_ifp; if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp)) { ifa_free(&ia->ia_ifa); return (EADDRNOTAVAIL); } if (imo->imo_num_memberships == 0) { addr.s_addr = htonl(INADDR_CARP_GROUP); if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) { ifa_free(&ia->ia_ifa); return (ENOBUFS); } imo->imo_num_memberships++; imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = CARP_DFLTTL; imo->imo_multicast_loop = 0; } if (!ifp->if_carp) { cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); if (!cif) { error = ENOBUFS; goto cleanup; } if ((error = ifpromisc(ifp, 1))) { free(cif, M_CARP); goto cleanup; } CARP_LOCK_INIT(cif); CARP_LOCK(cif); cif->vhif_ifp = ifp; TAILQ_INIT(&cif->vhif_vrs); ifp->if_carp = cif; } else { struct carp_softc *vr; cif = (struct carp_if *)ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) if (vr != sc && vr->sc_vhid == sc->sc_vhid) { CARP_UNLOCK(cif); error = EEXIST; goto cleanup; } } sc->sc_ia = ia; sc->sc_carpdev = ifp; { /* XXX prevent endless loop if already in queue */ struct carp_softc *vr, *after = NULL; int myself = 0; cif = (struct carp_if *)ifp->if_carp; /* XXX: cif should not change, right? So we still hold the lock */ CARP_LOCK_ASSERT(cif); TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { if (vr == sc) myself = 1; if (vr->sc_vhid < sc->sc_vhid) after = vr; } if (!myself) { /* We're trying to keep things in order */ if (after == NULL) { TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); } else { TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); } cif->vhif_nvrs++; } } sc->sc_naddrs++; SC2IFP(sc)->if_flags |= IFF_UP; if (own) sc->sc_advskew = 0; carp_sc_state_locked(sc); carp_setrun(sc, 0); CARP_UNLOCK(cif); ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ return (0); cleanup: in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); ifa_free(&ia->ia_ifa); return (error); } static int carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin) { int error = 0; if (!--sc->sc_naddrs) { struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; struct ip_moptions *imo = &sc->sc_imo; CARP_LOCK(cif); callout_stop(&sc->sc_ad_tmo); SC2IFP(sc)->if_flags &= ~IFF_UP; SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; sc->sc_vhid = -1; in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); imo->imo_multicast_ifp = NULL; TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); if (!--cif->vhif_nvrs) { sc->sc_carpdev->if_carp = NULL; CARP_LOCK_DESTROY(cif); free(cif, M_CARP); } else { CARP_UNLOCK(cif); } } return (error); } #ifdef INET6 static int carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) { struct ifnet *ifp; struct carp_if *cif; struct in6_ifaddr *ia, *ia_if; struct ip6_moptions *im6o = &sc->sc_im6o; struct in6_addr in6; int own, error; error = 0; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { if (!(SC2IFP(sc)->if_flags & IFF_UP)) carp_set_state(sc, INIT); if (sc->sc_naddrs6) SC2IFP(sc)->if_flags |= IFF_UP; if (sc->sc_carpdev) CARP_SCLOCK(sc); carp_setrun(sc, 0); if (sc->sc_carpdev) CARP_SCUNLOCK(sc); return (0); } /* we have to do it by hands to check we won't match on us */ ia_if = NULL; own = 0; IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { int i; for (i = 0; i < 4; i++) { if ((sin6->sin6_addr.s6_addr32[i] & ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != (ia->ia_addr.sin6_addr.s6_addr32[i] & ia->ia_prefixmask.sin6_addr.s6_addr32[i])) break; } /* and, yeah, we need a multicast-capable iface too */ if (ia->ia_ifp != SC2IFP(sc) && (ia->ia_ifp->if_flags & IFF_MULTICAST) && (i == 4)) { if (!ia_if) ia_if = ia; if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ia->ia_addr.sin6_addr)) own++; } } if (!ia_if) { IN6_IFADDR_RUNLOCK(); return (EADDRNOTAVAIL); } ia = ia_if; ifa_ref(&ia->ia_ifa); IN6_IFADDR_RUNLOCK(); ifp = ia->ia_ifp; if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) { ifa_free(&ia->ia_ifa); return (EADDRNOTAVAIL); } if (!sc->sc_naddrs6) { struct in6_multi *in6m; im6o->im6o_multicast_ifp = ifp; /* join CARP multicast address */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr8[15] = 0x12; if (in6_setscope(&in6, ifp, NULL) != 0) goto cleanup; in6m = NULL; error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); if (error) goto cleanup; im6o->im6o_membership[0] = in6m; im6o->im6o_num_memberships++; /* join solicited multicast address */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr32[1] = 0; in6.s6_addr32[2] = htonl(1); in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; in6.s6_addr8[12] = 0xff; if (in6_setscope(&in6, ifp, NULL) != 0) goto cleanup; in6m = NULL; error = in6_mc_join(ifp, &in6, NULL, &in6m, 0); if (error) goto cleanup; im6o->im6o_membership[1] = in6m; im6o->im6o_num_memberships++; } if (!ifp->if_carp) { cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); if (!cif) { error = ENOBUFS; goto cleanup; } if ((error = ifpromisc(ifp, 1))) { free(cif, M_CARP); goto cleanup; } CARP_LOCK_INIT(cif); CARP_LOCK(cif); cif->vhif_ifp = ifp; TAILQ_INIT(&cif->vhif_vrs); ifp->if_carp = cif; } else { struct carp_softc *vr; cif = (struct carp_if *)ifp->if_carp; CARP_LOCK(cif); TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) if (vr != sc && vr->sc_vhid == sc->sc_vhid) { CARP_UNLOCK(cif); error = EINVAL; goto cleanup; } } sc->sc_ia6 = ia; sc->sc_carpdev = ifp; { /* XXX prevent endless loop if already in queue */ struct carp_softc *vr, *after = NULL; int myself = 0; cif = (struct carp_if *)ifp->if_carp; CARP_LOCK_ASSERT(cif); TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { if (vr == sc) myself = 1; if (vr->sc_vhid < sc->sc_vhid) after = vr; } if (!myself) { /* We're trying to keep things in order */ if (after == NULL) { TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); } else { TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); } cif->vhif_nvrs++; } } sc->sc_naddrs6++; SC2IFP(sc)->if_flags |= IFF_UP; if (own) sc->sc_advskew = 0; carp_sc_state_locked(sc); carp_setrun(sc, 0); CARP_UNLOCK(cif); ifa_free(&ia->ia_ifa); /* XXXRW: should hold reference for softc. */ return (0); cleanup: if (!sc->sc_naddrs6) - carp_multicast6_cleanup(sc); + carp_multicast6_cleanup(sc, 1); ifa_free(&ia->ia_ifa); return (error); } static int carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) { int error = 0; if (!--sc->sc_naddrs6) { struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp; CARP_LOCK(cif); callout_stop(&sc->sc_ad_tmo); SC2IFP(sc)->if_flags &= ~IFF_UP; SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; sc->sc_vhid = -1; - carp_multicast6_cleanup(sc); + carp_multicast6_cleanup(sc, 1); TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); if (!--cif->vhif_nvrs) { CARP_LOCK_DESTROY(cif); sc->sc_carpdev->if_carp = NULL; free(cif, M_CARP); } else CARP_UNLOCK(cif); } return (error); } #endif /* INET6 */ static int carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) { struct carp_softc *sc = ifp->if_softc, *vr; struct carpreq carpr; struct ifaddr *ifa; struct ifreq *ifr; struct ifaliasreq *ifra; int locked = 0, error = 0; ifa = (struct ifaddr *)addr; ifra = (struct ifaliasreq *)addr; ifr = (struct ifreq *)addr; switch (cmd) { case SIOCSIFADDR: switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: SC2IFP(sc)->if_flags |= IFF_UP; bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, sizeof(struct sockaddr)); error = carp_set_addr(sc, satosin(ifa->ifa_addr)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: SC2IFP(sc)->if_flags |= IFF_UP; error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); break; #endif /* INET6 */ default: error = EAFNOSUPPORT; break; } break; case SIOCAIFADDR: switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: SC2IFP(sc)->if_flags |= IFF_UP; bcopy(ifa->ifa_addr, ifa->ifa_dstaddr, sizeof(struct sockaddr)); error = carp_set_addr(sc, satosin(&ifra->ifra_addr)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: SC2IFP(sc)->if_flags |= IFF_UP; error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); break; #endif /* INET6 */ default: error = EAFNOSUPPORT; break; } break; case SIOCDIFADDR: switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: error = carp_del_addr(sc, satosin(&ifra->ifra_addr)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); break; #endif /* INET6 */ default: error = EAFNOSUPPORT; break; } break; case SIOCSIFFLAGS: if (sc->sc_carpdev) { locked = 1; CARP_SCLOCK(sc); } if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) { callout_stop(&sc->sc_ad_tmo); callout_stop(&sc->sc_md_tmo); callout_stop(&sc->sc_md6_tmo); if (sc->sc_state == MASTER) carp_send_ad_locked(sc); carp_set_state(sc, INIT); carp_setrun(sc, 0); } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) { SC2IFP(sc)->if_flags |= IFF_UP; carp_setrun(sc, 0); } break; case SIOCSVH: error = priv_check(curthread, PRIV_NETINET_CARP); if (error) break; if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) break; error = 1; if (sc->sc_carpdev) { locked = 1; CARP_SCLOCK(sc); } if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { switch (carpr.carpr_state) { case BACKUP: callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP); carp_setrun(sc, 0); carp_setroute(sc, RTM_DELETE); break; case MASTER: carp_master_down_locked(sc); break; default: break; } } if (carpr.carpr_vhid > 0) { if (carpr.carpr_vhid > 255) { error = EINVAL; break; } if (sc->sc_carpdev) { struct carp_if *cif; cif = (struct carp_if *)sc->sc_carpdev->if_carp; TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) if (vr != sc && vr->sc_vhid == carpr.carpr_vhid) { error = EEXIST; break; } if (error == EEXIST) break; } sc->sc_vhid = carpr.carpr_vhid; IF_LLADDR(sc->sc_ifp)[0] = 0; IF_LLADDR(sc->sc_ifp)[1] = 0; IF_LLADDR(sc->sc_ifp)[2] = 0x5e; IF_LLADDR(sc->sc_ifp)[3] = 0; IF_LLADDR(sc->sc_ifp)[4] = 1; IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid; error--; } if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { if (carpr.carpr_advskew >= 255) { error = EINVAL; break; } if (carpr.carpr_advbase > 255) { error = EINVAL; break; } sc->sc_advbase = carpr.carpr_advbase; sc->sc_advskew = carpr.carpr_advskew; error--; } bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); if (error > 0) error = EINVAL; else { error = 0; carp_setrun(sc, 0); } break; case SIOCGVH: /* XXX: lockless read */ bzero(&carpr, sizeof(carpr)); carpr.carpr_state = sc->sc_state; carpr.carpr_vhid = sc->sc_vhid; carpr.carpr_advbase = sc->sc_advbase; carpr.carpr_advskew = sc->sc_advskew; error = priv_check(curthread, PRIV_NETINET_CARP); if (error == 0) bcopy(sc->sc_key, carpr.carpr_key, sizeof(carpr.carpr_key)); error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); break; default: error = EINVAL; } if (locked) CARP_SCUNLOCK(sc); carp_hmac_prepare(sc); return (error); } /* * XXX: this is looutput. We should eventually use it from there. */ static int carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct route *ro) { u_int32_t af; struct rtentry *rt = NULL; M_ASSERTPKTHDR(m); /* check if we have the packet header */ if (ro != NULL) rt = ro->ro_rt; if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { m_freem(m); return (rt->rt_flags & RTF_BLACKHOLE ? 0 : rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC) { bcopy(dst->sa_data, &af, sizeof(af)); dst->sa_family = af; } #if 1 /* XXX */ switch (dst->sa_family) { case AF_INET: case AF_INET6: case AF_IPX: case AF_APPLETALK: break; default: printf("carp_looutput: af=%d unexpected\n", dst->sa_family); m_freem(m); return (EAFNOSUPPORT); } #endif return(if_simloop(ifp, m, dst->sa_family, 0)); } /* * Start output on carp interface. This function should never be called. */ static void carp_start(struct ifnet *ifp) { #ifdef DEBUG printf("%s: start called\n", ifp->if_xname); #endif } int carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { struct m_tag *mtag; struct carp_softc *sc; struct ifnet *carp_ifp; if (!sa) return (0); switch (sa->sa_family) { #ifdef INET case AF_INET: break; #endif /* INET */ #ifdef INET6 case AF_INET6: break; #endif /* INET6 */ default: return (0); } mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); if (mtag == NULL) return (0); bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); sc = carp_ifp->if_softc; /* Set the source MAC address to Virtual Router MAC Address */ switch (ifp->if_type) { case IFT_ETHER: case IFT_L2VLAN: { struct ether_header *eh; eh = mtod(m, struct ether_header *); eh->ether_shost[0] = 0; eh->ether_shost[1] = 0; eh->ether_shost[2] = 0x5e; eh->ether_shost[3] = 0; eh->ether_shost[4] = 1; eh->ether_shost[5] = sc->sc_vhid; } break; case IFT_FDDI: { struct fddi_header *fh; fh = mtod(m, struct fddi_header *); fh->fddi_shost[0] = 0; fh->fddi_shost[1] = 0; fh->fddi_shost[2] = 0x5e; fh->fddi_shost[3] = 0; fh->fddi_shost[4] = 1; fh->fddi_shost[5] = sc->sc_vhid; } break; case IFT_ISO88025: { struct iso88025_header *th; th = mtod(m, struct iso88025_header *); th->iso88025_shost[0] = 3; th->iso88025_shost[1] = 0; th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); th->iso88025_shost[4] = 0; th->iso88025_shost[5] = 0; } break; default: printf("%s: carp is not supported for this interface type\n", ifp->if_xname); return (EOPNOTSUPP); } return (0); } static void carp_set_state(struct carp_softc *sc, int state) { int link_state; if (sc->sc_carpdev) CARP_SCLOCK_ASSERT(sc); if (sc->sc_state == state) return; sc->sc_state = state; switch (state) { case BACKUP: link_state = LINK_STATE_DOWN; break; case MASTER: link_state = LINK_STATE_UP; break; default: link_state = LINK_STATE_UNKNOWN; break; } if_link_state_change(SC2IFP(sc), link_state); } void carp_carpdev_state(struct ifnet *ifp) { struct carp_if *cif; cif = ifp->if_carp; CARP_LOCK(cif); carp_carpdev_state_locked(cif); CARP_UNLOCK(cif); } static void carp_carpdev_state_locked(struct carp_if *cif) { struct carp_softc *sc; TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) carp_sc_state_locked(sc); } static void carp_sc_state_locked(struct carp_softc *sc) { CARP_SCLOCK_ASSERT(sc); if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || !(sc->sc_carpdev->if_flags & IFF_UP)) { sc->sc_flags_backup = SC2IFP(sc)->if_flags; SC2IFP(sc)->if_flags &= ~IFF_UP; SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; callout_stop(&sc->sc_ad_tmo); callout_stop(&sc->sc_md_tmo); callout_stop(&sc->sc_md6_tmo); carp_set_state(sc, INIT); carp_setrun(sc, 0); if (!sc->sc_suppress) { carp_suppress_preempt++; if (carp_suppress_preempt == 1) { CARP_SCUNLOCK(sc); carp_send_ad_all(); CARP_SCLOCK(sc); } } sc->sc_suppress = 1; } else { SC2IFP(sc)->if_flags |= sc->sc_flags_backup; carp_set_state(sc, INIT); carp_setrun(sc, 0); if (sc->sc_suppress) carp_suppress_preempt--; sc->sc_suppress = 0; } return; } #ifdef INET extern struct domain inetdomain; static struct protosw in_carp_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_CARP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = carp_input, .pr_output = (pr_output_t *)rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; #endif #ifdef INET6 extern struct domain inet6domain; static struct ip6protosw in6_carp_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_CARP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = carp6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs }; #endif static void carp_mod_cleanup(void) { if (if_detach_event_tag == NULL) return; EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); if_clone_detach(&carp_cloner); #ifdef INET if (proto_reg[CARP_INET] == 0) { (void)ipproto_unregister(IPPROTO_CARP); pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); proto_reg[CARP_INET] = -1; } carp_iamatch_p = NULL; #endif #ifdef INET6 if (proto_reg[CARP_INET6] == 0) { (void)ip6proto_unregister(IPPROTO_CARP); pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); proto_reg[CARP_INET6] = -1; } carp_iamatch6_p = NULL; carp_macmatch6_p = NULL; #endif carp_linkstate_p = NULL; carp_forus_p = NULL; carp_output_p = NULL; mtx_destroy(&carp_mtx); } static int carp_mod_load(void) { int err; if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (if_detach_event_tag == NULL) return (ENOMEM); mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); LIST_INIT(&carpif_list); if_clone_attach(&carp_cloner); carp_linkstate_p = carp_carpdev_state; carp_forus_p = carp_forus; carp_output_p = carp_output; #ifdef INET6 carp_iamatch6_p = carp_iamatch6; carp_macmatch6_p = carp_macmatch6; proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, (struct protosw *)&in6_carp_protosw); if (proto_reg[CARP_INET6] != 0) { printf("carp: error %d attaching to PF_INET6\n", proto_reg[CARP_INET6]); carp_mod_cleanup(); return (proto_reg[CARP_INET6]); } err = ip6proto_register(IPPROTO_CARP); if (err) { printf("carp: error %d registering with INET6\n", err); carp_mod_cleanup(); return (err); } #endif #ifdef INET carp_iamatch_p = carp_iamatch; proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); if (proto_reg[CARP_INET] != 0) { printf("carp: error %d attaching to PF_INET\n", proto_reg[CARP_INET]); carp_mod_cleanup(); return (proto_reg[CARP_INET]); } err = ipproto_register(IPPROTO_CARP); if (err) { printf("carp: error %d registering with INET\n", err); carp_mod_cleanup(); return (err); } #endif return 0; } static int carp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: return carp_mod_load(); /* NOTREACHED */ case MOD_UNLOAD: /* * XXX: For now, disallow module unloading by default due to * a race condition where a thread may dereference one of the * function pointer hooks after the module has been * unloaded, during processing of a packet, causing a panic. */ #ifdef CARPMOD_CAN_UNLOAD carp_mod_cleanup(); #else return (EBUSY); #endif break; default: return (EINVAL); } return (0); } static moduledata_t carp_mod = { "carp", carp_modevent, 0 }; DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); Index: projects/binutils-2.17/sys/netinet/sctp_cc_functions.c =================================================================== --- projects/binutils-2.17/sys/netinet/sctp_cc_functions.c (revision 215829) +++ projects/binutils-2.17/sys/netinet/sctp_cc_functions.c (revision 215830) @@ -1,1563 +1,1617 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include __FBSDID("$FreeBSD$"); void sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *assoc; uint32_t cwnd_in_mtu; assoc = &stcb->asoc; /* * We take the minimum of the burst limit and the initial congestion * window. The initial congestion window is at least two times the * MTU. */ cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd); if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst)) cwnd_in_mtu = assoc->max_burst; net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu; net->ssthresh = assoc->peers_rwnd; + SDT_PROBE(sctp, cwnd, net, init, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + 0, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); } } void sctp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /*- * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || (asoc->sctp_cmt_on_off == 1)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* * per section 7.2.3, are there any * destinations that had a fast retransmit * to them. If so what we need to do is * adjust ssthresh and cwnd. */ struct sctp_tmit_chunk *lchk; int old_cwnd = net->cwnd; net->ssthresh = net->cwnd / 2; if (net->ssthresh < (net->mtu * 2)) { net->ssthresh = 2 * net->mtu; } net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, fr, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); } lchk = TAILQ_FIRST(&asoc->send_queue); net->partial_bytes_acked = 0; /* Turn on fast recovery window */ asoc->fast_retran_loss_recovery = 1; if (lchk == NULL) { /* Mark end of the window */ asoc->fast_recovery_tsn = asoc->sending_seq - 1; } else { asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } /* * CMT fast recovery -- per destination * recovery variable. */ net->fast_retran_loss_recovery = 1; if (lchk == NULL) { /* Mark end of the window */ net->fast_recovery_tsn = asoc->sending_seq - 1; } else { net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } /* * Disable Nonce Sum Checking and store the * resync tsn */ asoc->nonce_sum_check = 0; asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net); } } else if (net->net_ack > 0) { /* * Mark a peg that we WOULD have done a cwnd * reduction but RFC2582 prevented this action. */ SCTP_STAT_INCR(sctps_fastretransinrtt); } } } void sctp_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, int accum_moved, int reneged_all, int will_exit) { struct sctp_nets *net; + int old_cwnd; /******************************/ /* update cwnd and Early FR */ /******************************/ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { if (compare_with_wrap(asoc->last_acked_seq, net->fast_recovery_tsn, MAX_TSN) || (asoc->last_acked_seq == net->fast_recovery_tsn) || compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || (net->pseudo_cumack == net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif if (SCTP_BASE_SYSCTL(sctp_early_fr)) { /* * So, first of all do we need to have a Early FR * timer running? */ if ((!TAILQ_EMPTY(&asoc->sent_queue) && (net->ref_count > 1) && (net->flight_size < net->cwnd)) || (reneged_all)) { /* * yes, so in this case stop it if its * running, and then restart it. Reneging * all is a special case where we want to * run the Early FR timer and then force the * last few unacked to be sent, causing us * to illicit a sack with gaps to force out * the others. */ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { SCTP_STAT_INCR(sctps_earlyfrstpidsck2); sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); } SCTP_STAT_INCR(sctps_earlyfrstrid); sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); } else { /* No, stop it if its running */ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { SCTP_STAT_INCR(sctps_earlyfrstpidsck3); sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); } } } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); } continue; } if (net->net_ack2 > 0) { /* * Karn's rule applies to clearing error count, this * is optional. */ net->error_count = 0; if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == SCTP_ADDR_NOT_REACHABLE) { /* addr came good */ net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); /* now was it the primary? if so restore */ if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); } } /* * JRS 5/14/07 - If CMT PF is on and the destination * is in PF state, set the destination to active * state and set the cwnd to one or two MTU's based * on whether PF1 or PF2 is being used. * * Should we stop any running T3 timer here? */ if ((asoc->sctp_cmt_on_off == 1) && (asoc->sctp_cmt_pf > 0) && ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { net->dest_state &= ~SCTP_ADDR_PF; + old_cwnd = net->cwnd; net->cwnd = net->mtu * asoc->sctp_cmt_pf; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net, + old_cwnd, net->cwnd); SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", net, net->cwnd); /* * Since the cwnd value is explicitly set, * skip the code that updates the cwnd * value. */ goto skip_cwnd_update; } } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* * if (sctp_cmt_on_off == 1 && * net->fast_retran_loss_recovery && * net->will_exit_fast_recovery == 0) { @@@ Do something } * else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif if (asoc->fast_retran_loss_recovery && (will_exit == 0) && (asoc->sctp_cmt_on_off == 0)) { /* * If we are in loss recovery we skip any cwnd * update */ goto skip_cwnd_update; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { /* If the cumulative ack moved we can proceed */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ if (net->flight_size + net->net_ack >= net->cwnd) { if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { + old_cwnd = net->cwnd; net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); } } else { + old_cwnd = net->cwnd; net->cwnd += net->net_ack; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); + if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); } } } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_NOADV_SS); } } } else { /* We are in congestion avoidance */ /* * Add to pba */ net->partial_bytes_acked += net->net_ack; if ((net->flight_size + net->net_ack >= net->cwnd) && (net->partial_bytes_acked >= net->cwnd)) { net->partial_bytes_acked -= net->cwnd; + old_cwnd = net->cwnd; net->cwnd += net->mtu; + SDT_PROBE(sctp, cwnd, net, ack, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); } } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_NOADV_CA); } } } } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_NO_CUMACK); } } skip_cwnd_update: /* * NOW, according to Karn's rule do we need to restore the * RTO timer back? Check our net_ack2. If not set then we * have a ambiguity.. i.e. all data ack'd was sent to more * than one place. */ if (net->net_ack2) { /* restore any doubled timers */ net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; if (net->RTO < stcb->asoc.minrto) { net->RTO = stcb->asoc.minrto; } if (net->RTO > stcb->asoc.maxrto) { net->RTO = stcb->asoc.maxrto; } } } } void sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; net->ssthresh = max(net->cwnd / 2, 4 * net->mtu); net->cwnd = net->mtu; net->partial_bytes_acked = 0; - + SDT_PROBE(sctp, cwnd, net, to, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); } } void sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; SCTP_STAT_INCR(sctps_ecnereducedcwnd); net->ssthresh = net->cwnd / 2; if (net->ssthresh < net->mtu) { net->ssthresh = net->mtu; /* here back off the timer as well, to slow us down */ net->RTO <<= 1; } net->cwnd = net->ssthresh; + SDT_PROBE(sctp, cwnd, net, ecn, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); } } void sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, struct sctp_pktdrop_chunk *cp, uint32_t * bottle_bw, uint32_t * on_queue) { uint32_t bw_avail; int rtt, incr; int old_cwnd = net->cwnd; /* need real RTT for this calc */ rtt = ((net->lastsa >> 2) + net->lastsv) >> 1; /* get bottle neck bw */ *bottle_bw = ntohl(cp->bottle_bw); /* and whats on queue */ *on_queue = ntohl(cp->current_onq); /* * adjust the on-queue if our flight is more it could be that the * router has not yet gotten data "in-flight" to it */ if (*on_queue < net->flight_size) *on_queue = net->flight_size; /* calculate the available space */ bw_avail = (*bottle_bw * rtt) / 1000; if (bw_avail > *bottle_bw) { /* * Cap the growth to no more than the bottle neck. This can * happen as RTT slides up due to queues. It also means if * you have more than a 1 second RTT with a empty queue you * will be limited to the bottle_bw per second no matter if * other points have 1/2 the RTT and you could get more * out... */ bw_avail = *bottle_bw; } if (*on_queue > bw_avail) { /* * No room for anything else don't allow anything else to be * "added to the fire". */ int seg_inflight, seg_onqueue, my_portion; net->partial_bytes_acked = 0; /* how much are we over queue size? */ incr = *on_queue - bw_avail; if (stcb->asoc.seen_a_sack_this_pkt) { /* * undo any cwnd adjustment that the sack might have * made */ net->cwnd = net->prev_cwnd; } /* Now how much of that is mine? */ seg_inflight = net->flight_size / net->mtu; seg_onqueue = *on_queue / net->mtu; my_portion = (incr * seg_inflight) / seg_onqueue; /* Have I made an adjustment already */ if (net->cwnd > net->flight_size) { /* * for this flight I made an adjustment we need to * decrease the portion by a share our previous * adjustment. */ int diff_adj; diff_adj = net->cwnd - net->flight_size; if (diff_adj > my_portion) my_portion = 0; else my_portion -= diff_adj; } /* * back down to the previous cwnd (assume we have had a sack * before this packet). minus what ever portion of the * overage is my fault. */ net->cwnd -= my_portion; /* we will NOT back down more than 1 MTU */ if (net->cwnd <= net->mtu) { net->cwnd = net->mtu; } /* force into CA */ net->ssthresh = net->cwnd - 1; } else { /* * Take 1/4 of the space left or max burst up .. whichever * is less. */ incr = min((bw_avail - *on_queue) >> 2, stcb->asoc.max_burst * net->mtu); net->cwnd += incr; } if (net->cwnd > bw_avail) { /* We can't exceed the pipe size */ net->cwnd = bw_avail; } if (net->cwnd < net->mtu) { /* We always have 1 MTU */ net->cwnd = net->mtu; } if (net->cwnd - old_cwnd != 0) { /* log only changes */ + SDT_PROBE(sctp, cwnd, net, pd, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); } } } void sctp_cwnd_update_after_output(struct sctp_tcb *stcb, struct sctp_nets *net, int burst_limit) { int old_cwnd = net->cwnd; if (net->ssthresh < net->cwnd) net->ssthresh = net->cwnd; net->cwnd = (net->flight_size + (burst_limit * net->mtu)); - + SDT_PROBE(sctp, cwnd, net, bl, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST); } } void sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); /* * make a small adjustment to cwnd and force to CA. */ if (net->cwnd > net->mtu) /* drop down one MTU after sending */ net->cwnd -= net->mtu; if (net->cwnd < net->ssthresh) /* still in SS move to CA */ net->ssthresh = net->cwnd - 1; + SDT_PROBE(sctp, cwnd, net, fr, + stcb->asoc.my_vtag, + ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), + net, + old_cwnd, net->cwnd); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); } } struct sctp_hs_raise_drop { int32_t cwnd; int32_t increase; int32_t drop_percent; }; #define SCTP_HS_TABLE_SIZE 73 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = { {38, 1, 50}, /* 0 */ {118, 2, 44}, /* 1 */ {221, 3, 41}, /* 2 */ {347, 4, 38}, /* 3 */ {495, 5, 37}, /* 4 */ {663, 6, 35}, /* 5 */ {851, 7, 34}, /* 6 */ {1058, 8, 33}, /* 7 */ {1284, 9, 32}, /* 8 */ {1529, 10, 31}, /* 9 */ {1793, 11, 30}, /* 10 */ {2076, 12, 29}, /* 11 */ {2378, 13, 28}, /* 12 */ {2699, 14, 28}, /* 13 */ {3039, 15, 27}, /* 14 */ {3399, 16, 27}, /* 15 */ {3778, 17, 26}, /* 16 */ {4177, 18, 26}, /* 17 */ {4596, 19, 25}, /* 18 */ {5036, 20, 25}, /* 19 */ {5497, 21, 24}, /* 20 */ {5979, 22, 24}, /* 21 */ {6483, 23, 23}, /* 22 */ {7009, 24, 23}, /* 23 */ {7558, 25, 22}, /* 24 */ {8130, 26, 22}, /* 25 */ {8726, 27, 22}, /* 26 */ {9346, 28, 21}, /* 27 */ {9991, 29, 21}, /* 28 */ {10661, 30, 21}, /* 29 */ {11358, 31, 20}, /* 30 */ {12082, 32, 20}, /* 31 */ {12834, 33, 20}, /* 32 */ {13614, 34, 19}, /* 33 */ {14424, 35, 19}, /* 34 */ {15265, 36, 19}, /* 35 */ {16137, 37, 19}, /* 36 */ {17042, 38, 18}, /* 37 */ {17981, 39, 18}, /* 38 */ {18955, 40, 18}, /* 39 */ {19965, 41, 17}, /* 40 */ {21013, 42, 17}, /* 41 */ {22101, 43, 17}, /* 42 */ {23230, 44, 17}, /* 43 */ {24402, 45, 16}, /* 44 */ {25618, 46, 16}, /* 45 */ {26881, 47, 16}, /* 46 */ {28193, 48, 16}, /* 47 */ {29557, 49, 15}, /* 48 */ {30975, 50, 15}, /* 49 */ {32450, 51, 15}, /* 50 */ {33986, 52, 15}, /* 51 */ {35586, 53, 14}, /* 52 */ {37253, 54, 14}, /* 53 */ {38992, 55, 14}, /* 54 */ {40808, 56, 14}, /* 55 */ {42707, 57, 13}, /* 56 */ {44694, 58, 13}, /* 57 */ {46776, 59, 13}, /* 58 */ {48961, 60, 13}, /* 59 */ {51258, 61, 13}, /* 60 */ {53677, 62, 12}, /* 61 */ {56230, 63, 12}, /* 62 */ {58932, 64, 12}, /* 63 */ {61799, 65, 12}, /* 64 */ {64851, 66, 11}, /* 65 */ {68113, 67, 11}, /* 66 */ {71617, 68, 11}, /* 67 */ {75401, 69, 10}, /* 68 */ {79517, 70, 10}, /* 69 */ {84035, 71, 10}, /* 70 */ {89053, 72, 10}, /* 71 */ {94717, 73, 9} /* 72 */ }; static void sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net) { int cur_val, i, indx, incr; cur_val = net->cwnd >> 10; indx = SCTP_HS_TABLE_SIZE - 1; #ifdef SCTP_DEBUG printf("HS CC CAlled.\n"); #endif if (cur_val < sctp_cwnd_adjust[0].cwnd) { /* normal mode */ if (net->net_ack > net->mtu) { net->cwnd += net->mtu; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); } } else { net->cwnd += net->net_ack; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); } } } else { for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) { if (cur_val < sctp_cwnd_adjust[i].cwnd) { indx = i; break; } } net->last_hs_used = indx; incr = ((sctp_cwnd_adjust[indx].increase) << 10); net->cwnd += incr; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS); } } } static void sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net) { int cur_val, i, indx; int old_cwnd = net->cwnd; cur_val = net->cwnd >> 10; if (cur_val < sctp_cwnd_adjust[0].cwnd) { /* normal mode */ net->ssthresh = net->cwnd / 2; if (net->ssthresh < (net->mtu * 2)) { net->ssthresh = 2 * net->mtu; } net->cwnd = net->ssthresh; } else { /* drop by the proper amount */ net->ssthresh = net->cwnd - (int)((net->cwnd / 100) * sctp_cwnd_adjust[net->last_hs_used].drop_percent); net->cwnd = net->ssthresh; /* now where are we */ indx = net->last_hs_used; cur_val = net->cwnd >> 10; /* reset where we are in the table */ if (cur_val < sctp_cwnd_adjust[0].cwnd) { /* feel out of hs */ net->last_hs_used = 0; } else { for (i = indx; i >= 1; i--) { if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) { break; } } net->last_hs_used = indx; } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); } } void sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || (asoc->sctp_cmt_on_off == 1)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* * per section 7.2.3, are there any * destinations that had a fast retransmit * to them. If so what we need to do is * adjust ssthresh and cwnd. */ struct sctp_tmit_chunk *lchk; sctp_hs_cwnd_decrease(stcb, net); lchk = TAILQ_FIRST(&asoc->send_queue); net->partial_bytes_acked = 0; /* Turn on fast recovery window */ asoc->fast_retran_loss_recovery = 1; if (lchk == NULL) { /* Mark end of the window */ asoc->fast_recovery_tsn = asoc->sending_seq - 1; } else { asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } /* * CMT fast recovery -- per destination * recovery variable. */ net->fast_retran_loss_recovery = 1; if (lchk == NULL) { /* Mark end of the window */ net->fast_recovery_tsn = asoc->sending_seq - 1; } else { net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } /* * Disable Nonce Sum Checking and store the * resync tsn */ asoc->nonce_sum_check = 0; asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net); } } else if (net->net_ack > 0) { /* * Mark a peg that we WOULD have done a cwnd * reduction but RFC2582 prevented this action. */ SCTP_STAT_INCR(sctps_fastretransinrtt); } } } void sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, int accum_moved, int reneged_all, int will_exit) { struct sctp_nets *net; /******************************/ /* update cwnd and Early FR */ /******************************/ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { if (compare_with_wrap(asoc->last_acked_seq, net->fast_recovery_tsn, MAX_TSN) || (asoc->last_acked_seq == net->fast_recovery_tsn) || compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || (net->pseudo_cumack == net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif if (SCTP_BASE_SYSCTL(sctp_early_fr)) { /* * So, first of all do we need to have a Early FR * timer running? */ if ((!TAILQ_EMPTY(&asoc->sent_queue) && (net->ref_count > 1) && (net->flight_size < net->cwnd)) || (reneged_all)) { /* * yes, so in this case stop it if its * running, and then restart it. Reneging * all is a special case where we want to * run the Early FR timer and then force the * last few unacked to be sent, causing us * to illicit a sack with gaps to force out * the others. */ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { SCTP_STAT_INCR(sctps_earlyfrstpidsck2); sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); } SCTP_STAT_INCR(sctps_earlyfrstrid); sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); } else { /* No, stop it if its running */ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { SCTP_STAT_INCR(sctps_earlyfrstpidsck3); sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); } } } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); } continue; } if (net->net_ack2 > 0) { /* * Karn's rule applies to clearing error count, this * is optional. */ net->error_count = 0; if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == SCTP_ADDR_NOT_REACHABLE) { /* addr came good */ net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); /* now was it the primary? if so restore */ if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); } } /* * JRS 5/14/07 - If CMT PF is on and the destination * is in PF state, set the destination to active * state and set the cwnd to one or two MTU's based * on whether PF1 or PF2 is being used. * * Should we stop any running T3 timer here? */ if ((asoc->sctp_cmt_on_off == 1) && (asoc->sctp_cmt_pf > 0) && ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { net->dest_state &= ~SCTP_ADDR_PF; net->cwnd = net->mtu * asoc->sctp_cmt_pf; SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", net, net->cwnd); /* * Since the cwnd value is explicitly set, * skip the code that updates the cwnd * value. */ goto skip_cwnd_update; } } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* * if (sctp_cmt_on_off == 1 && * net->fast_retran_loss_recovery && * net->will_exit_fast_recovery == 0) { @@@ Do something } * else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif if (asoc->fast_retran_loss_recovery && (will_exit == 0) && (asoc->sctp_cmt_on_off == 0)) { /* * If we are in loss recovery we skip any cwnd * update */ goto skip_cwnd_update; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { /* If the cumulative ack moved we can proceed */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ if (net->flight_size + net->net_ack >= net->cwnd) { sctp_hs_cwnd_increase(stcb, net); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_NOADV_SS); } } } else { /* We are in congestion avoidance */ net->partial_bytes_acked += net->net_ack; if ((net->flight_size + net->net_ack >= net->cwnd) && (net->partial_bytes_acked >= net->cwnd)) { net->partial_bytes_acked -= net->cwnd; net->cwnd += net->mtu; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); } } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_NOADV_CA); } } } } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_NO_CUMACK); } } skip_cwnd_update: /* * NOW, according to Karn's rule do we need to restore the * RTO timer back? Check our net_ack2. If not set then we * have a ambiguity.. i.e. all data ack'd was sent to more * than one place. */ if (net->net_ack2) { /* restore any doubled timers */ net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; if (net->RTO < stcb->asoc.minrto) { net->RTO = stcb->asoc.minrto; } if (net->RTO > stcb->asoc.maxrto) { net->RTO = stcb->asoc.maxrto; } } } } /* * H-TCP congestion control. The algorithm is detailed in: * R.N.Shorten, D.J.Leith: * "H-TCP: TCP for high-speed and long-distance networks" * Proc. PFLDnet, Argonne, 2004. * http://www.hamilton.ie/net/htcp3.pdf */ static int use_rtt_scaling = 1; static int use_bandwidth_switch = 1; static inline int between(uint32_t seq1, uint32_t seq2, uint32_t seq3) { return seq3 - seq2 >= seq1 - seq2; } static inline uint32_t htcp_cong_time(struct htcp *ca) { return sctp_get_tick_count() - ca->last_cong; } static inline uint32_t htcp_ccount(struct htcp *ca) { return htcp_cong_time(ca) / ca->minRTT; } static inline void htcp_reset(struct htcp *ca) { ca->undo_last_cong = ca->last_cong; ca->undo_maxRTT = ca->maxRTT; ca->undo_old_maxB = ca->old_maxB; ca->last_cong = sctp_get_tick_count(); } #ifdef SCTP_NOT_USED static uint32_t htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net) { net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong; net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT; net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB; return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu); } #endif static inline void measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net) { uint32_t srtt = net->lastsa >> 3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT) net->htcp_ca.minRTT = srtt; /* max RTT */ if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) { if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT) net->htcp_ca.maxRTT = net->htcp_ca.minRTT; if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20)) net->htcp_ca.maxRTT = srtt; } } static void measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net) { uint32_t now = sctp_get_tick_count(); if (net->fast_retran_ip == 0) net->htcp_ca.bytes_acked = net->net_ack; if (!use_bandwidth_switch) return; /* achieved throughput calculations */ /* JRS - not 100% sure of this statement */ if (net->fast_retran_ip == 1) { net->htcp_ca.bytecount = 0; net->htcp_ca.lasttime = now; return; } net->htcp_ca.bytecount += net->net_ack; if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu) && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT && net->htcp_ca.minRTT > 0) { uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime); if (htcp_ccount(&net->htcp_ca) <= 3) { /* just after backoff */ net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi; } else { net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4; if (net->htcp_ca.Bi > net->htcp_ca.maxB) net->htcp_ca.maxB = net->htcp_ca.Bi; if (net->htcp_ca.minB > net->htcp_ca.maxB) net->htcp_ca.minB = net->htcp_ca.maxB; } net->htcp_ca.bytecount = 0; net->htcp_ca.lasttime = now; } } static inline void htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT) { if (use_bandwidth_switch) { uint32_t maxB = ca->maxB; uint32_t old_maxB = ca->old_maxB; ca->old_maxB = ca->maxB; if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) { ca->beta = BETA_MIN; ca->modeswitch = 0; return; } } if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) { ca->beta = (minRTT << 7) / maxRTT; if (ca->beta < BETA_MIN) ca->beta = BETA_MIN; else if (ca->beta > BETA_MAX) ca->beta = BETA_MAX; } else { ca->beta = BETA_MIN; ca->modeswitch = 1; } } static inline void htcp_alpha_update(struct htcp *ca) { uint32_t minRTT = ca->minRTT; uint32_t factor = 1; uint32_t diff = htcp_cong_time(ca); if (diff > (uint32_t) hz) { diff -= hz; factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz; } if (use_rtt_scaling && minRTT) { uint32_t scale = (hz << 3) / (10 * minRTT); scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to * interval [0.5,10]<<3 */ factor = (factor << 3) / scale; if (!factor) factor = 1; } ca->alpha = 2 * factor * ((1 << 7) - ca->beta); if (!ca->alpha) ca->alpha = ALPHA_BASE; } /* After we have the rtt data to calculate beta, we'd still prefer to wait one * rtt before we adjust our beta to ensure we are working from a consistent * data. * * This function should be called when we hit a congestion event since only at * that point do we really have a real sense of maxRTT (the queues en route * were getting just too full now). */ static void htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net) { uint32_t minRTT = net->htcp_ca.minRTT; uint32_t maxRTT = net->htcp_ca.maxRTT; htcp_beta_update(&net->htcp_ca, minRTT, maxRTT); htcp_alpha_update(&net->htcp_ca); /* * add slowly fading memory for maxRTT to accommodate routing * changes etc */ if (minRTT > 0 && maxRTT > minRTT) net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100; } static uint32_t htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net) { htcp_param_update(stcb, net); return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu); } static void htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net) { /*- * How to handle these functions? * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question. * return; */ if (net->cwnd <= net->ssthresh) { /* We are in slow start */ if (net->flight_size + net->net_ack >= net->cwnd) { if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) { net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS); } } else { net->cwnd += net->net_ack; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS); } } } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_NOADV_SS); } } } else { measure_rtt(stcb, net); /* * In dangerous area, increase slowly. In theory this is * net->cwnd += alpha / net->cwnd */ /* What is snd_cwnd_cnt?? */ if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) { /*- * Does SCTP have a cwnd clamp? * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS). */ net->cwnd += net->mtu; net->partial_bytes_acked = 0; htcp_alpha_update(&net->htcp_ca); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_CA); } } else { net->partial_bytes_acked += net->net_ack; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_NOADV_CA); } } net->htcp_ca.bytes_acked = net->mtu; } } #ifdef SCTP_NOT_USED /* Lower bound on congestion window. */ static uint32_t htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net) { return net->ssthresh; } #endif static void htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net) { memset(&net->htcp_ca, 0, sizeof(struct htcp)); net->htcp_ca.alpha = ALPHA_BASE; net->htcp_ca.beta = BETA_MIN; net->htcp_ca.bytes_acked = net->mtu; net->htcp_ca.last_cong = sctp_get_tick_count(); } void sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net) { /* * We take the max of the burst limit times a MTU or the * INITIAL_CWND. We then limit this to 4 MTU's of sending. */ net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND)); net->ssthresh = stcb->asoc.peers_rwnd; htcp_init(stcb, net); if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION); } } void sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb, struct sctp_association *asoc, int accum_moved, int reneged_all, int will_exit) { struct sctp_nets *net; /******************************/ /* update cwnd and Early FR */ /******************************/ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code. Need to debug. */ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) { if (compare_with_wrap(asoc->last_acked_seq, net->fast_recovery_tsn, MAX_TSN) || (asoc->last_acked_seq == net->fast_recovery_tsn) || compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) || (net->pseudo_cumack == net->fast_recovery_tsn)) { net->will_exit_fast_recovery = 1; } } #endif if (SCTP_BASE_SYSCTL(sctp_early_fr)) { /* * So, first of all do we need to have a Early FR * timer running? */ if ((!TAILQ_EMPTY(&asoc->sent_queue) && (net->ref_count > 1) && (net->flight_size < net->cwnd)) || (reneged_all)) { /* * yes, so in this case stop it if its * running, and then restart it. Reneging * all is a special case where we want to * run the Early FR timer and then force the * last few unacked to be sent, causing us * to illicit a sack with gaps to force out * the others. */ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { SCTP_STAT_INCR(sctps_earlyfrstpidsck2); sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_20); } SCTP_STAT_INCR(sctps_earlyfrstrid); sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net); } else { /* No, stop it if its running */ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) { SCTP_STAT_INCR(sctps_earlyfrstpidsck3); sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_21); } } } /* if nothing was acked on this destination skip it */ if (net->net_ack == 0) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK); } continue; } if (net->net_ack2 > 0) { /* * Karn's rule applies to clearing error count, this * is optional. */ net->error_count = 0; if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) == SCTP_ADDR_NOT_REACHABLE) { /* addr came good */ net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE; net->dest_state |= SCTP_ADDR_REACHABLE; sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED); /* now was it the primary? if so restore */ if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) { (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net); } } /* * JRS 5/14/07 - If CMT PF is on and the destination * is in PF state, set the destination to active * state and set the cwnd to one or two MTU's based * on whether PF1 or PF2 is being used. * * Should we stop any running T3 timer here? */ if ((asoc->sctp_cmt_on_off == 1) && (asoc->sctp_cmt_pf > 0) && ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) { net->dest_state &= ~SCTP_ADDR_PF; net->cwnd = net->mtu * asoc->sctp_cmt_pf; SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n", net, net->cwnd); /* * Since the cwnd value is explicitly set, * skip the code that updates the cwnd * value. */ goto skip_cwnd_update; } } #ifdef JANA_CMT_FAST_RECOVERY /* * CMT fast recovery code */ /* * if (sctp_cmt_on_off == 1 && * net->fast_retran_loss_recovery && * net->will_exit_fast_recovery == 0) { @@@ Do something } * else if (sctp_cmt_on_off == 0 && * asoc->fast_retran_loss_recovery && will_exit == 0) { */ #endif if (asoc->fast_retran_loss_recovery && will_exit == 0 && (asoc->sctp_cmt_on_off == 0)) { /* * If we are in loss recovery we skip any cwnd * update */ goto skip_cwnd_update; } /* * CMT: CUC algorithm. Update cwnd if pseudo-cumack has * moved. */ if (accum_moved || ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) { htcp_cong_avoid(stcb, net); measure_achieved_throughput(stcb, net); } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_NO_CUMACK); } } skip_cwnd_update: /* * NOW, according to Karn's rule do we need to restore the * RTO timer back? Check our net_ack2. If not set then we * have a ambiguity.. i.e. all data ack'd was sent to more * than one place. */ if (net->net_ack2) { /* restore any doubled timers */ net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1; if (net->RTO < stcb->asoc.minrto) { net->RTO = stcb->asoc.minrto; } if (net->RTO > stcb->asoc.maxrto) { net->RTO = stcb->asoc.maxrto; } } } } void sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) && * (net->fast_retran_loss_recovery == 0))) */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if ((asoc->fast_retran_loss_recovery == 0) || (asoc->sctp_cmt_on_off == 1)) { /* out of a RFC2582 Fast recovery window? */ if (net->net_ack > 0) { /* * per section 7.2.3, are there any * destinations that had a fast retransmit * to them. If so what we need to do is * adjust ssthresh and cwnd. */ struct sctp_tmit_chunk *lchk; int old_cwnd = net->cwnd; /* JRS - reset as if state were changed */ htcp_reset(&net->htcp_ca); net->ssthresh = htcp_recalc_ssthresh(stcb, net); net->cwnd = net->ssthresh; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR); } lchk = TAILQ_FIRST(&asoc->send_queue); net->partial_bytes_acked = 0; /* Turn on fast recovery window */ asoc->fast_retran_loss_recovery = 1; if (lchk == NULL) { /* Mark end of the window */ asoc->fast_recovery_tsn = asoc->sending_seq - 1; } else { asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } /* * CMT fast recovery -- per destination * recovery variable. */ net->fast_retran_loss_recovery = 1; if (lchk == NULL) { /* Mark end of the window */ net->fast_recovery_tsn = asoc->sending_seq - 1; } else { net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1; } /* * Disable Nonce Sum Checking and store the * resync tsn */ asoc->nonce_sum_check = 0; asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1; sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32); sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net); } } else if (net->net_ack > 0) { /* * Mark a peg that we WOULD have done a cwnd * reduction but RFC2582 prevented this action. */ SCTP_STAT_INCR(sctps_fastretransinrtt); } } } void sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd = net->cwnd; /* JRS - reset as if the state were being changed to timeout */ htcp_reset(&net->htcp_ca); net->ssthresh = htcp_recalc_ssthresh(stcb, net); net->cwnd = net->mtu; net->partial_bytes_acked = 0; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX); } } void sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd; old_cwnd = net->cwnd; sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED); net->htcp_ca.last_cong = sctp_get_tick_count(); /* * make a small adjustment to cwnd and force to CA. */ if (net->cwnd > net->mtu) /* drop down one MTU after sending */ net->cwnd -= net->mtu; if (net->cwnd < net->ssthresh) /* still in SS move to CA */ net->ssthresh = net->cwnd - 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR); } } void sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net) { int old_cwnd; old_cwnd = net->cwnd; /* JRS - reset hctp as if state changed */ htcp_reset(&net->htcp_ca); SCTP_STAT_INCR(sctps_ecnereducedcwnd); net->ssthresh = htcp_recalc_ssthresh(stcb, net); if (net->ssthresh < net->mtu) { net->ssthresh = net->mtu; /* here back off the timer as well, to slow us down */ net->RTO <<= 1; } net->cwnd = net->ssthresh; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) { sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT); } } Index: projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h =================================================================== --- projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h (nonexistent) +++ projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h (revision 215830) @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 2010, by Randall Stewart & Michael Tuexen, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); +#ifndef __sctp_dtrace_declare_h__ +#include "opt_kdtrace.h" +#include +#include + +/* Declare the SCTP provider */ +SDT_PROVIDER_DECLARE(sctp); + +/* The probes we have so far: */ + +/* One to track a net's cwnd */ +/* initial */ +SDT_PROBE_DECLARE(sctp, cwnd, net, init); +/* update at a ack -- increase */ +SDT_PROBE_DECLARE(sctp, cwnd, net, ack); +/* update at a fast retransmit -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, fr); +/* update at a time-out -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, to); +/* update at a burst-limit -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, bl); +/* update at a ECN -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, ecn); +/* update at a Packet-Drop -- decrease */ +SDT_PROBE_DECLARE(sctp, cwnd, net, pd); + +/* One to track an associations rwnd */ +SDT_PROBE_DECLARE(sctp, rwnd, assoc, val); + +/* One to track a net's flight size */ +SDT_PROBE_DECLARE(sctp, flightsize, net, val); + +/* One to track an associations flight size */ +SDT_PROBE_DECLARE(sctp, flightsize, assoc, val); + + + + + + +#endif Property changes on: projects/binutils-2.17/sys/netinet/sctp_dtrace_declare.h ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h =================================================================== --- projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h (nonexistent) +++ projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h (revision 215830) @@ -0,0 +1,201 @@ +/*- + * Copyright (c) 2010, by Randall Stewart & Michael Tuexen, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * a) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * b) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * c) Neither the name of Cisco Systems, Inc. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); +#ifndef __sctp_dtrace_define_h__ +#include "opt_kdtrace.h" +#include +#include + +SDT_PROVIDER_DEFINE(sctp); + +/********************************************************/ +/* Cwnd probe - tracks changes in the congestion window on a netp */ +/********************************************************/ +/* Initial */ +SDT_PROBE_DEFINE(sctp, cwnd, net, init, init); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, init, 4, "int"); + + +/* ACK-INCREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, ack, ack); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ack, 4, "int"); + +/* FastRetransmit-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, fr, fr); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, fr, 4, "int"); + + +/* TimeOut-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, to, to); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, to, 4, "int"); + + +/* BurstLimit-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, bl, bl); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, bl, 4, "int"); + + +/* ECN-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, ecn, ecn); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, ecn, 4, "int"); + + +/* PacketDrop-DECREASE */ +SDT_PROBE_DEFINE(sctp, cwnd, net, pd, pd); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 2, "uintptr_t"); +/* The old value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, cwnd, net, pd, 4, "int"); + + + +/********************************************************/ +/* Rwnd probe - tracks changes in the receiver window for an assoc */ +/********************************************************/ +SDT_PROBE_DEFINE(sctp, rwnd, assoc, val, val); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 1, "uint32_t"); +/* The up/down amount */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 2, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, rwnd, assoc, val, 3, "int"); + +/********************************************************/ +/* flight probe - tracks changes in the flight size on a net or assoc */ +/********************************************************/ +SDT_PROBE_DEFINE(sctp, flightsize, net, val, val); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 1, "uint32_t"); +/* The pointer to the struct sctp_nets * changing */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 2, "uintptr_t"); +/* The up/down amount */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 3, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, flightsize, net, val, 4, "int"); +/********************************************************/ +/* The total flight version */ +/********************************************************/ +SDT_PROBE_DEFINE(sctp, flightsize, assoc, val, val); +/* The Vtag for this end */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 0, "uint32_t"); +/* The port number of the local side << 16 | port number of remote + * in network byte order. + */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 1, "uint32_t"); +/* The up/down amount */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 2, "int"); +/* The new value of the cwnd */ +SDT_PROBE_ARGTYPE(sctp, flightsize, assoc, val, 3, "int"); + +#endif Property changes on: projects/binutils-2.17/sys/netinet/sctp_dtrace_define.h ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: projects/binutils-2.17/sys/netinet/sctp_pcb.c =================================================================== --- projects/binutils-2.17/sys/netinet/sctp_pcb.c (revision 215829) +++ projects/binutils-2.17/sys/netinet/sctp_pcb.c (revision 215830) @@ -1,6806 +1,6807 @@ /*- * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include +#include #include VNET_DEFINE(struct sctp_base_info, system_base_info); /* FIX: we don't handle multiple link local scopes */ /* "scopeless" replacement IN6_ARE_ADDR_EQUAL */ #ifdef INET6 int SCTP6_ARE_ADDR_EQUAL(struct sockaddr_in6 *a, struct sockaddr_in6 *b) { struct sockaddr_in6 tmp_a, tmp_b; memcpy(&tmp_a, a, sizeof(struct sockaddr_in6)); if (sa6_embedscope(&tmp_a, MODULE_GLOBAL(ip6_use_defzone)) != 0) { return 0; } memcpy(&tmp_b, b, sizeof(struct sockaddr_in6)); if (sa6_embedscope(&tmp_b, MODULE_GLOBAL(ip6_use_defzone)) != 0) { return 0; } return (IN6_ARE_ADDR_EQUAL(&tmp_a.sin6_addr, &tmp_b.sin6_addr)); } #endif void sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb) { /* * We really don't need to lock this, but I will just because it * does not hurt. */ SCTP_INP_INFO_RLOCK(); spcb->ep_count = SCTP_BASE_INFO(ipi_count_ep); spcb->asoc_count = SCTP_BASE_INFO(ipi_count_asoc); spcb->laddr_count = SCTP_BASE_INFO(ipi_count_laddr); spcb->raddr_count = SCTP_BASE_INFO(ipi_count_raddr); spcb->chk_count = SCTP_BASE_INFO(ipi_count_chunk); spcb->readq_count = SCTP_BASE_INFO(ipi_count_readq); spcb->stream_oque = SCTP_BASE_INFO(ipi_count_strmoq); spcb->free_chunks = SCTP_BASE_INFO(ipi_free_chunks); SCTP_INP_INFO_RUNLOCK(); } /* * Addresses are added to VRF's (Virtual Router's). For BSD we * have only the default VRF 0. We maintain a hash list of * VRF's. Each VRF has its own list of sctp_ifn's. Each of * these has a list of addresses. When we add a new address * to a VRF we lookup the ifn/ifn_index, if the ifn does * not exist we create it and add it to the list of IFN's * within the VRF. Once we have the sctp_ifn, we add the * address to the list. So we look something like: * * hash-vrf-table * vrf-> ifn-> ifn -> ifn * vrf | * ... +--ifa-> ifa -> ifa * vrf * * We keep these separate lists since the SCTP subsystem will * point to these from its source address selection nets structure. * When an address is deleted it does not happen right away on * the SCTP side, it gets scheduled. What we do when a * delete happens is immediately remove the address from * the master list and decrement the refcount. As our * addip iterator works through and frees the src address * selection pointing to the sctp_ifa, eventually the refcount * will reach 0 and we will delete it. Note that it is assumed * that any locking on system level ifn/ifa is done at the * caller of these functions and these routines will only * lock the SCTP structures as they add or delete things. * * Other notes on VRF concepts. * - An endpoint can be in multiple VRF's * - An association lives within a VRF and only one VRF. * - Any incoming packet we can deduce the VRF for by * looking at the mbuf/pak inbound (for BSD its VRF=0 :D) * - Any downward send call or connect call must supply the * VRF via ancillary data or via some sort of set default * VRF socket option call (again for BSD no brainer since * the VRF is always 0). * - An endpoint may add multiple VRF's to it. * - Listening sockets can accept associations in any * of the VRF's they are in but the assoc will end up * in only one VRF (gotten from the packet or connect/send). * */ struct sctp_vrf * sctp_allocate_vrf(int vrf_id) { struct sctp_vrf *vrf = NULL; struct sctp_vrflist *bucket; /* First allocate the VRF structure */ vrf = sctp_find_vrf(vrf_id); if (vrf) { /* Already allocated */ return (vrf); } SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf), SCTP_M_VRF); if (vrf == NULL) { /* No memory */ #ifdef INVARIANTS panic("No memory for VRF:%d", vrf_id); #endif return (NULL); } /* setup the VRF */ memset(vrf, 0, sizeof(struct sctp_vrf)); vrf->vrf_id = vrf_id; LIST_INIT(&vrf->ifnlist); vrf->total_ifa_count = 0; vrf->refcount = 0; /* now also setup table ids */ SCTP_INIT_VRF_TABLEID(vrf); /* Init the HASH of addresses */ vrf->vrf_addr_hash = SCTP_HASH_INIT(SCTP_VRF_ADDR_HASH_SIZE, &vrf->vrf_addr_hashmark); if (vrf->vrf_addr_hash == NULL) { /* No memory */ #ifdef INVARIANTS panic("No memory for VRF:%d", vrf_id); #endif SCTP_FREE(vrf, SCTP_M_VRF); return (NULL); } /* Add it to the hash table */ bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))]; LIST_INSERT_HEAD(bucket, vrf, next_vrf); atomic_add_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1); return (vrf); } struct sctp_ifn * sctp_find_ifn(void *ifn, uint32_t ifn_index) { struct sctp_ifn *sctp_ifnp; struct sctp_ifnlist *hash_ifn_head; /* * We assume the lock is held for the addresses if that's wrong * problems could occur :-) */ hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))]; LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) { if (sctp_ifnp->ifn_index == ifn_index) { return (sctp_ifnp); } if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) { return (sctp_ifnp); } } return (NULL); } struct sctp_vrf * sctp_find_vrf(uint32_t vrf_id) { struct sctp_vrflist *bucket; struct sctp_vrf *liste; bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(vrf_id & SCTP_BASE_INFO(hashvrfmark))]; LIST_FOREACH(liste, bucket, next_vrf) { if (vrf_id == liste->vrf_id) { return (liste); } } return (NULL); } void sctp_free_vrf(struct sctp_vrf *vrf) { if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&vrf->refcount)) { if (vrf->vrf_addr_hash) { SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark); vrf->vrf_addr_hash = NULL; } /* We zero'd the count */ LIST_REMOVE(vrf, next_vrf); SCTP_FREE(vrf, SCTP_M_VRF); atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_vrfs), 1); } } void sctp_free_ifn(struct sctp_ifn *sctp_ifnp) { if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifnp->refcount)) { /* We zero'd the count */ if (sctp_ifnp->vrf) { sctp_free_vrf(sctp_ifnp->vrf); } SCTP_FREE(sctp_ifnp, SCTP_M_IFN); atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifns), 1); } } void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu) { struct sctp_ifn *sctp_ifnp; sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index); if (sctp_ifnp != NULL) { sctp_ifnp->ifn_mtu = mtu; } } void sctp_free_ifa(struct sctp_ifa *sctp_ifap) { if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&sctp_ifap->refcount)) { /* We zero'd the count */ if (sctp_ifap->ifn_p) { sctp_free_ifn(sctp_ifap->ifn_p); } SCTP_FREE(sctp_ifap, SCTP_M_IFA); atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ifas), 1); } } static void sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock) { struct sctp_ifn *found; found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index); if (found == NULL) { /* Not in the list.. sorry */ return; } if (hold_addr_lock == 0) SCTP_IPI_ADDR_WLOCK(); LIST_REMOVE(sctp_ifnp, next_bucket); LIST_REMOVE(sctp_ifnp, next_ifn); SCTP_DEREGISTER_INTERFACE(sctp_ifnp->ifn_index, sctp_ifnp->registered_af); if (hold_addr_lock == 0) SCTP_IPI_ADDR_WUNLOCK(); /* Take away the reference, and possibly free it */ sctp_free_ifn(sctp_ifnp); } void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index) { struct sctp_vrf *vrf; struct sctp_ifa *sctp_ifap = NULL; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id); goto out; } sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n"); goto out; } if (sctp_ifap->ifn_p == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n"); goto out; } if (if_name) { int len1, len2; len1 = strlen(if_name); len2 = strlen(sctp_ifap->ifn_p->ifn_name); if (len1 != len2) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n", len1, len2); goto out; } if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n", sctp_ifap->ifn_p->ifn_name, if_name); goto out; } } else { if (sctp_ifap->ifn_p->ifn_index != ifn_index) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n", sctp_ifap->ifn_p->ifn_index, ifn_index); goto out; } } sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID); sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE; out: SCTP_IPI_ADDR_RUNLOCK(); } void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index) { struct sctp_vrf *vrf; struct sctp_ifa *sctp_ifap = NULL; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id); goto out; } sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n"); goto out; } if (sctp_ifap->ifn_p == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n"); goto out; } if (if_name) { int len1, len2; len1 = strlen(if_name); len2 = strlen(sctp_ifap->ifn_p->ifn_name); if (len1 != len2) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different length %d vs %d - ignored\n", len1, len2); goto out; } if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) { SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n", sctp_ifap->ifn_p->ifn_name, if_name); goto out; } } else { if (sctp_ifap->ifn_p->ifn_index != ifn_index) { SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n", sctp_ifap->ifn_p->ifn_index, ifn_index); goto out; } } sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE); sctp_ifap->localifa_flags |= SCTP_ADDR_VALID; out: SCTP_IPI_ADDR_RUNLOCK(); } /*- * Add an ifa to an ifn. * Register the interface as necessary. * NOTE: ADDR write lock MUST be held. */ static void sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap) { int ifa_af; LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa); sctp_ifap->ifn_p = sctp_ifnp; atomic_add_int(&sctp_ifap->ifn_p->refcount, 1); /* update address counts */ sctp_ifnp->ifa_count++; ifa_af = sctp_ifap->address.sa.sa_family; if (ifa_af == AF_INET) sctp_ifnp->num_v4++; else sctp_ifnp->num_v6++; if (sctp_ifnp->ifa_count == 1) { /* register the new interface */ SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af); sctp_ifnp->registered_af = ifa_af; } } /*- * Remove an ifa from its ifn. * If no more addresses exist, remove the ifn too. Otherwise, re-register * the interface based on the remaining address families left. * NOTE: ADDR write lock MUST be held. */ static void sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap) { uint32_t ifn_index; LIST_REMOVE(sctp_ifap, next_ifa); if (sctp_ifap->ifn_p) { /* update address counts */ sctp_ifap->ifn_p->ifa_count--; if (sctp_ifap->address.sa.sa_family == AF_INET6) sctp_ifap->ifn_p->num_v6--; else if (sctp_ifap->address.sa.sa_family == AF_INET) sctp_ifap->ifn_p->num_v4--; ifn_index = sctp_ifap->ifn_p->ifn_index; if (LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) { /* remove the ifn, possibly freeing it */ sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED); } else { /* re-register address family type, if needed */ if ((sctp_ifap->ifn_p->num_v6 == 0) && (sctp_ifap->ifn_p->registered_af == AF_INET6)) { SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET6); SCTP_REGISTER_INTERFACE(ifn_index, AF_INET); sctp_ifap->ifn_p->registered_af = AF_INET; } else if ((sctp_ifap->ifn_p->num_v4 == 0) && (sctp_ifap->ifn_p->registered_af == AF_INET)) { SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET); SCTP_REGISTER_INTERFACE(ifn_index, AF_INET6); sctp_ifap->ifn_p->registered_af = AF_INET6; } /* free the ifn refcount */ sctp_free_ifn(sctp_ifap->ifn_p); } sctp_ifap->ifn_p = NULL; } } struct sctp_ifa * sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index, uint32_t ifn_type, const char *if_name, void *ifa, struct sockaddr *addr, uint32_t ifa_flags, int dynamic_add) { struct sctp_vrf *vrf; struct sctp_ifn *sctp_ifnp = NULL; struct sctp_ifa *sctp_ifap = NULL; struct sctp_ifalist *hash_addr_head; struct sctp_ifnlist *hash_ifn_head; uint32_t hash_of_addr; int new_ifn_af = 0; #ifdef SCTP_DEBUG SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id); SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr); #endif SCTP_IPI_ADDR_WLOCK(); sctp_ifnp = sctp_find_ifn(ifn, ifn_index); if (sctp_ifnp) { vrf = sctp_ifnp->vrf; } else { vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { vrf = sctp_allocate_vrf(vrf_id); if (vrf == NULL) { SCTP_IPI_ADDR_WUNLOCK(); return (NULL); } } } if (sctp_ifnp == NULL) { /* * build one and add it, can't hold lock until after malloc * done though. */ SCTP_IPI_ADDR_WUNLOCK(); SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *, sizeof(struct sctp_ifn), SCTP_M_IFN); if (sctp_ifnp == NULL) { #ifdef INVARIANTS panic("No memory for IFN"); #endif return (NULL); } memset(sctp_ifnp, 0, sizeof(struct sctp_ifn)); sctp_ifnp->ifn_index = ifn_index; sctp_ifnp->ifn_p = ifn; sctp_ifnp->ifn_type = ifn_type; sctp_ifnp->refcount = 0; sctp_ifnp->vrf = vrf; atomic_add_int(&vrf->refcount, 1); sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family); if (if_name != NULL) { memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ); } else { memcpy(sctp_ifnp->ifn_name, "unknown", min(7, SCTP_IFNAMSIZ)); } hash_ifn_head = &SCTP_BASE_INFO(vrf_ifn_hash)[(ifn_index & SCTP_BASE_INFO(vrf_ifn_hashmark))]; LIST_INIT(&sctp_ifnp->ifalist); SCTP_IPI_ADDR_WLOCK(); LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket); LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn); atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifns), 1); new_ifn_af = 1; } sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap) { /* Hmm, it already exists? */ if ((sctp_ifap->ifn_p) && (sctp_ifap->ifn_p->ifn_index == ifn_index)) { SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n", sctp_ifap->ifn_p->ifn_name, ifn_index, sctp_ifap); if (new_ifn_af) { /* Remove the created one that we don't want */ sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED); } if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) { /* easy to solve, just switch back to active */ SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n"); sctp_ifap->localifa_flags = SCTP_ADDR_VALID; sctp_ifap->ifn_p = sctp_ifnp; atomic_add_int(&sctp_ifap->ifn_p->refcount, 1); } exit_stage_left: SCTP_IPI_ADDR_WUNLOCK(); return (sctp_ifap); } else { if (sctp_ifap->ifn_p) { /* * The last IFN gets the address, remove the * old one */ SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n", sctp_ifap, sctp_ifap->ifn_p->ifn_name, sctp_ifap->ifn_p->ifn_index, if_name, ifn_index); /* remove the address from the old ifn */ sctp_remove_ifa_from_ifn(sctp_ifap); /* move the address over to the new ifn */ sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap); goto exit_stage_left; } else { /* repair ifnp which was NULL ? */ sctp_ifap->localifa_flags = SCTP_ADDR_VALID; SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n", sctp_ifnp, sctp_ifap); sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap); } goto exit_stage_left; } } SCTP_IPI_ADDR_WUNLOCK(); SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), SCTP_M_IFA); if (sctp_ifap == NULL) { #ifdef INVARIANTS panic("No memory for IFA"); #endif return (NULL); } memset(sctp_ifap, 0, sizeof(struct sctp_ifa)); sctp_ifap->ifn_p = sctp_ifnp; atomic_add_int(&sctp_ifnp->refcount, 1); sctp_ifap->vrf_id = vrf_id; sctp_ifap->ifa = ifa; memcpy(&sctp_ifap->address, addr, addr->sa_len); sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE; sctp_ifap->flags = ifa_flags; /* Set scope */ switch (sctp_ifap->address.sa.sa_family) { case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *)&sctp_ifap->address.sin; if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) || (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) { sctp_ifap->src_is_loop = 1; } if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { sctp_ifap->src_is_priv = 1; } sctp_ifnp->num_v4++; if (new_ifn_af) new_ifn_af = AF_INET; break; } #ifdef INET6 case AF_INET6: { /* ok to use deprecated addresses? */ struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6; if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) || (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) { sctp_ifap->src_is_loop = 1; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { sctp_ifap->src_is_priv = 1; } sctp_ifnp->num_v6++; if (new_ifn_af) new_ifn_af = AF_INET6; break; } #endif default: new_ifn_af = 0; break; } hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa); if ((sctp_ifap->src_is_priv == 0) && (sctp_ifap->src_is_loop == 0)) { sctp_ifap->src_is_glob = 1; } SCTP_IPI_ADDR_WLOCK(); hash_addr_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)]; LIST_INSERT_HEAD(hash_addr_head, sctp_ifap, next_bucket); sctp_ifap->refcount = 1; LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa); sctp_ifnp->ifa_count++; vrf->total_ifa_count++; atomic_add_int(&SCTP_BASE_INFO(ipi_count_ifas), 1); if (new_ifn_af) { SCTP_REGISTER_INTERFACE(ifn_index, new_ifn_af); sctp_ifnp->registered_af = new_ifn_af; } SCTP_IPI_ADDR_WUNLOCK(); if (dynamic_add) { /* * Bump up the refcount so that when the timer completes it * will drop back down. */ struct sctp_laddr *wi; atomic_add_int(&sctp_ifap->refcount, 1); wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (wi == NULL) { /* * Gak, what can we do? We have lost an address * change can you say HOSED? */ SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n"); /* Opps, must decrement the count */ sctp_del_addr_from_vrf(vrf_id, addr, ifn_index, if_name); return (NULL); } SCTP_INCR_LADDR_COUNT(); bzero(wi, sizeof(*wi)); (void)SCTP_GETTIME_TIMEVAL(&wi->start_time); wi->ifa = sctp_ifap; wi->action = SCTP_ADD_IP_ADDRESS; SCTP_WQ_ADDR_LOCK(); LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); SCTP_WQ_ADDR_UNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL, (struct sctp_nets *)NULL); } else { /* it's ready for use */ sctp_ifap->localifa_flags &= ~SCTP_ADDR_DEFER_USE; } return (sctp_ifap); } void sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr, uint32_t ifn_index, const char *if_name) { struct sctp_vrf *vrf; struct sctp_ifa *sctp_ifap = NULL; SCTP_IPI_ADDR_WLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id); goto out_now; } #ifdef SCTP_DEBUG SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id); SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr); #endif sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED); if (sctp_ifap) { /* Validate the delete */ if (sctp_ifap->ifn_p) { int valid = 0; /*- * The name has priority over the ifn_index * if its given. We do this especially for * panda who might recycle indexes fast. */ if (if_name) { int len1, len2; len1 = min(SCTP_IFNAMSIZ, strlen(if_name)); len2 = min(SCTP_IFNAMSIZ, strlen(sctp_ifap->ifn_p->ifn_name)); if (len1 && len2 && (len1 == len2)) { /* we can compare them */ if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) == 0) { /* * They match its a correct * delete */ valid = 1; } } } if (!valid) { /* last ditch check ifn_index */ if (ifn_index == sctp_ifap->ifn_p->ifn_index) { valid = 1; } } if (!valid) { SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n", ifn_index, ((if_name == NULL) ? "NULL" : if_name)); SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n", sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name); SCTP_IPI_ADDR_WUNLOCK(); return; } } SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap); sctp_ifap->localifa_flags &= SCTP_ADDR_VALID; sctp_ifap->localifa_flags |= SCTP_BEING_DELETED; vrf->total_ifa_count--; LIST_REMOVE(sctp_ifap, next_bucket); sctp_remove_ifa_from_ifn(sctp_ifap); } #ifdef SCTP_DEBUG else { SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:", ifn_index); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr); } #endif out_now: SCTP_IPI_ADDR_WUNLOCK(); if (sctp_ifap) { struct sctp_laddr *wi; wi = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (wi == NULL) { /* * Gak, what can we do? We have lost an address * change can you say HOSED? */ SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n"); /* Oops, must decrement the count */ sctp_free_ifa(sctp_ifap); return; } SCTP_INCR_LADDR_COUNT(); bzero(wi, sizeof(*wi)); (void)SCTP_GETTIME_TIMEVAL(&wi->start_time); wi->ifa = sctp_ifap; wi->action = SCTP_DEL_IP_ADDRESS; SCTP_WQ_ADDR_LOCK(); /* * Should this really be a tailq? As it is we will process * the newest first :-0 */ LIST_INSERT_HEAD(&SCTP_BASE_INFO(addr_wq), wi, sctp_nxt_addr); SCTP_WQ_ADDR_UNLOCK(); sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ, (struct sctp_inpcb *)NULL, (struct sctp_tcb *)NULL, (struct sctp_nets *)NULL); } return; } static struct sctp_tcb * sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from, struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id) { /**** ASSUMES THE CALLER holds the INP_INFO_RLOCK */ /* * If we support the TCP model, then we must now dig through to see * if we can find our endpoint in the list of tcp ep's. */ uint16_t lport, rport; struct sctppcbhead *ephead; struct sctp_inpcb *inp; struct sctp_laddr *laddr; struct sctp_tcb *stcb; struct sctp_nets *net; if ((to == NULL) || (from == NULL)) { return (NULL); } if (to->sa_family == AF_INET && from->sa_family == AF_INET) { lport = ((struct sockaddr_in *)to)->sin_port; rport = ((struct sockaddr_in *)from)->sin_port; } else if (to->sa_family == AF_INET6 && from->sa_family == AF_INET6) { lport = ((struct sockaddr_in6 *)to)->sin6_port; rport = ((struct sockaddr_in6 *)from)->sin6_port; } else { return NULL; } ephead = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))]; /* * Ok now for each of the guys in this bucket we must look and see: * - Does the remote port match. - Does there single association's * addresses match this address (to). If so we update p_ep to point * to this ep and return the tcb from it. */ LIST_FOREACH(inp, ephead, sctp_hash) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(inp); continue; } if (lport != inp->sctp_lport) { SCTP_INP_RUNLOCK(inp); continue; } if (inp->def_vrf_id != vrf_id) { SCTP_INP_RUNLOCK(inp); continue; } /* check to see if the ep has one of the addresses */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { /* We are NOT bound all, so look further */ int match = 0; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__); continue; } if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n"); continue; } if (laddr->ifa->address.sa.sa_family == to->sa_family) { /* see if it matches */ struct sockaddr_in *intf_addr, *sin; intf_addr = &laddr->ifa->address.sin; sin = (struct sockaddr_in *)to; if (from->sa_family == AF_INET) { if (sin->sin_addr.s_addr == intf_addr->sin_addr.s_addr) { match = 1; break; } } #ifdef INET6 if (from->sa_family == AF_INET6) { struct sockaddr_in6 *intf_addr6; struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *) to; intf_addr6 = &laddr->ifa->address.sin6; if (SCTP6_ARE_ADDR_EQUAL(sin6, intf_addr6)) { match = 1; break; } } #endif } } if (match == 0) { /* This endpoint does not have this address */ SCTP_INP_RUNLOCK(inp); continue; } } /* * Ok if we hit here the ep has the address, does it hold * the tcb? */ stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { SCTP_INP_RUNLOCK(inp); continue; } SCTP_TCB_LOCK(stcb); if (stcb->rport != rport) { /* remote port does not match. */ SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); continue; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); continue; } /* Does this TCB have a matching address? */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->ro._l_addr.sa.sa_family != from->sa_family) { /* not the same family, can't be a match */ continue; } switch (from->sa_family) { case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *)&net->ro._l_addr; rsin = (struct sockaddr_in *)from; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { /* found it */ if (netp != NULL) { *netp = net; } /* * Update the endpoint * pointer */ *inp_p = inp; SCTP_INP_RUNLOCK(inp); return (stcb); } break; } #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; rsin6 = (struct sockaddr_in6 *)from; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { /* found it */ if (netp != NULL) { *netp = net; } /* * Update the endpoint * pointer */ *inp_p = inp; SCTP_INP_RUNLOCK(inp); return (stcb); } break; } #endif default: /* TSNH */ break; } } SCTP_TCB_UNLOCK(stcb); SCTP_INP_RUNLOCK(inp); } return (NULL); } static int sctp_does_stcb_own_this_addr(struct sctp_tcb *stcb, struct sockaddr *to) { int loopback_scope, ipv4_local_scope, local_scope, site_scope; int ipv4_addr_legal, ipv6_addr_legal; struct sctp_vrf *vrf; struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; loopback_scope = stcb->asoc.loopback_scope; ipv4_local_scope = stcb->asoc.ipv4_local_scope; local_scope = stcb->asoc.local_scope; site_scope = stcb->asoc.site_scope; ipv4_addr_legal = ipv6_addr_legal = 0; if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { ipv6_addr_legal = 1; if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) { ipv4_addr_legal = 1; } } else { ipv4_addr_legal = 1; } SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(stcb->asoc.vrf_id); if (vrf == NULL) { /* no vrf, no addresses */ SCTP_IPI_ADDR_RUNLOCK(); return (0); } if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { continue; } LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { if (sctp_is_addr_restricted(stcb, sctp_ifa)) continue; switch (sctp_ifa->address.sa.sa_family) { #ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin, *rsin; sin = &sctp_ifa->address.sin; rsin = (struct sockaddr_in *)to; if ((ipv4_local_scope == 0) && IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { continue; } if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } } break; #endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { struct sockaddr_in6 *sin6, *rsin6; sin6 = &sctp_ifa->address.sin6; rsin6 = (struct sockaddr_in6 *)to; if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { if (local_scope == 0) continue; if (sin6->sin6_scope_id == 0) { if (sa6_recoverscope(sin6) != 0) continue; } } if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { continue; } if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } } break; #endif default: /* TSNH */ break; } } } } else { struct sctp_laddr *laddr; LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) { if (sctp_is_addr_restricted(stcb, laddr->ifa)) { continue; } if (laddr->ifa->address.sa.sa_family != to->sa_family) { continue; } switch (to->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *)&laddr->ifa->address.sin; rsin = (struct sockaddr_in *)to; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *)&laddr->ifa->address.sin6; rsin6 = (struct sockaddr_in6 *)to; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { SCTP_IPI_ADDR_RUNLOCK(); return (1); } break; } #endif default: /* TSNH */ break; } } } SCTP_IPI_ADDR_RUNLOCK(); return (0); } /* * rules for use * * 1) If I return a NULL you must decrement any INP ref cnt. 2) If I find an * stcb, both will be locked (locked_tcb and stcb) but decrement will be done * (if locked == NULL). 3) Decrement happens on return ONLY if locked == * NULL. */ struct sctp_tcb * sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote, struct sctp_nets **netp, struct sockaddr *local, struct sctp_tcb *locked_tcb) { struct sctpasochead *head; struct sctp_inpcb *inp; struct sctp_tcb *stcb = NULL; struct sctp_nets *net; uint16_t rport; inp = *inp_p; if (remote->sa_family == AF_INET) { rport = (((struct sockaddr_in *)remote)->sin_port); } else if (remote->sa_family == AF_INET6) { rport = (((struct sockaddr_in6 *)remote)->sin6_port); } else { return (NULL); } if (locked_tcb) { /* * UN-lock so we can do proper locking here this occurs when * called from load_addresses_from_init. */ atomic_add_int(&locked_tcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(locked_tcb); } SCTP_INP_INFO_RLOCK(); if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { /*- * Now either this guy is our listener or it's the * connector. If it is the one that issued the connect, then * it's only chance is to be the first TCB in the list. If * it is the acceptor, then do the special_lookup to hash * and find the real inp. */ if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) { /* to is peer addr, from is my addr */ stcb = sctp_tcb_special_locate(inp_p, remote, local, netp, inp->def_vrf_id); if ((stcb != NULL) && (locked_tcb == NULL)) { /* we have a locked tcb, lower refcount */ SCTP_INP_DECR_REF(inp); } if ((locked_tcb != NULL) && (locked_tcb != stcb)) { SCTP_INP_RLOCK(locked_tcb->sctp_ep); SCTP_TCB_LOCK(locked_tcb); atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); SCTP_INP_RUNLOCK(locked_tcb->sctp_ep); } SCTP_INP_INFO_RUNLOCK(); return (stcb); } else { SCTP_INP_WLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { goto null_return; } stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { goto null_return; } SCTP_TCB_LOCK(stcb); if (stcb->rport != rport) { /* remote port does not match. */ SCTP_TCB_UNLOCK(stcb); goto null_return; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); goto null_return; } if (local && !sctp_does_stcb_own_this_addr(stcb, local)) { SCTP_TCB_UNLOCK(stcb); goto null_return; } /* now look at the list of remote addresses */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { #ifdef INVARIANTS if (net == (TAILQ_NEXT(net, sctp_next))) { panic("Corrupt net list"); } #endif if (net->ro._l_addr.sa.sa_family != remote->sa_family) { /* not the same family */ continue; } switch (remote->sa_family) { case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *) &net->ro._l_addr; rsin = (struct sockaddr_in *)remote; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; rsin6 = (struct sockaddr_in6 *)remote; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #endif default: /* TSNH */ break; } } SCTP_TCB_UNLOCK(stcb); } } else { SCTP_INP_WLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { goto null_return; } head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport, inp->sctp_hashmark)]; if (head == NULL) { goto null_return; } LIST_FOREACH(stcb, head, sctp_tcbhash) { if (stcb->rport != rport) { /* remote port does not match */ continue; } SCTP_TCB_LOCK(stcb); if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); continue; } if (local && !sctp_does_stcb_own_this_addr(stcb, local)) { SCTP_TCB_UNLOCK(stcb); continue; } /* now look at the list of remote addresses */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { #ifdef INVARIANTS if (net == (TAILQ_NEXT(net, sctp_next))) { panic("Corrupt net list"); } #endif if (net->ro._l_addr.sa.sa_family != remote->sa_family) { /* not the same family */ continue; } switch (remote->sa_family) { case AF_INET: { struct sockaddr_in *sin, *rsin; sin = (struct sockaddr_in *) &net->ro._l_addr; rsin = (struct sockaddr_in *)remote; if (sin->sin_addr.s_addr == rsin->sin_addr.s_addr) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *sin6, *rsin6; sin6 = (struct sockaddr_in6 *) &net->ro._l_addr; rsin6 = (struct sockaddr_in6 *)remote; if (SCTP6_ARE_ADDR_EQUAL(sin6, rsin6)) { /* found it */ if (netp != NULL) { *netp = net; } if (locked_tcb == NULL) { SCTP_INP_DECR_REF(inp); } else if (locked_tcb != stcb) { SCTP_TCB_LOCK(locked_tcb); } if (locked_tcb) { atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (stcb); } break; } #endif default: /* TSNH */ break; } } SCTP_TCB_UNLOCK(stcb); } } null_return: /* clean up for returning null */ if (locked_tcb) { SCTP_TCB_LOCK(locked_tcb); atomic_subtract_int(&locked_tcb->asoc.refcnt, 1); } SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); /* not found */ return (NULL); } /* * Find an association for a specific endpoint using the association id given * out in the COMM_UP notification */ struct sctp_tcb * sctp_findasoc_ep_asocid_locked(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock) { /* * Use my the assoc_id to find a endpoint */ struct sctpasochead *head; struct sctp_tcb *stcb; uint32_t id; if (inp == NULL) { SCTP_PRINTF("TSNH ep_associd\n"); return (NULL); } if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_PRINTF("TSNH ep_associd0\n"); return (NULL); } id = (uint32_t) asoc_id; head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)]; if (head == NULL) { /* invalid id TSNH */ SCTP_PRINTF("TSNH ep_associd1\n"); return (NULL); } LIST_FOREACH(stcb, head, sctp_tcbasocidhash) { if (stcb->asoc.assoc_id == id) { if (inp != stcb->sctp_ep) { /* * some other guy has the same id active (id * collision ??). */ SCTP_PRINTF("TSNH ep_associd2\n"); continue; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { continue; } if (want_lock) { SCTP_TCB_LOCK(stcb); } return (stcb); } } return (NULL); } struct sctp_tcb * sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock) { struct sctp_tcb *stcb; SCTP_INP_RLOCK(inp); stcb = sctp_findasoc_ep_asocid_locked(inp, asoc_id, want_lock); SCTP_INP_RUNLOCK(inp); return (stcb); } static struct sctp_inpcb * sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head, uint16_t lport, uint32_t vrf_id) { struct sctp_inpcb *inp; struct sockaddr_in *sin; #ifdef INET6 struct sockaddr_in6 *sin6; #endif struct sctp_laddr *laddr; #ifdef INET6 struct sockaddr_in6 *intf_addr6; #endif int fnd; /* * Endpoint probe expects that the INP_INFO is locked. */ sin = NULL; #ifdef INET6 sin6 = NULL; #endif switch (nam->sa_family) { case AF_INET: sin = (struct sockaddr_in *)nam; break; #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)nam; break; #endif default: /* unsupported family */ return (NULL); } if (head == NULL) return (NULL); LIST_FOREACH(inp, head, sctp_hash) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(inp); continue; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) && (inp->sctp_lport == lport)) { /* got it */ if ((nam->sa_family == AF_INET) && (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(inp)) { /* IPv4 on a IPv6 socket with ONLY IPv6 set */ SCTP_INP_RUNLOCK(inp); continue; } /* A V6 address and the endpoint is NOT bound V6 */ if (nam->sa_family == AF_INET6 && (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) { SCTP_INP_RUNLOCK(inp); continue; } /* does a VRF id match? */ fnd = 0; if (inp->def_vrf_id == vrf_id) fnd = 1; SCTP_INP_RUNLOCK(inp); if (!fnd) continue; return (inp); } SCTP_INP_RUNLOCK(inp); } if ((nam->sa_family == AF_INET) && (sin->sin_addr.s_addr == INADDR_ANY)) { /* Can't hunt for one that has no address specified */ return (NULL); } #ifdef INET6 if ((nam->sa_family == AF_INET6) && (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) { /* Can't hunt for one that has no address specified */ return (NULL); } #endif /* * ok, not bound to all so see if we can find a EP bound to this * address. */ LIST_FOREACH(inp, head, sctp_hash) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(inp); continue; } if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)) { SCTP_INP_RUNLOCK(inp); continue; } /* * Ok this could be a likely candidate, look at all of its * addresses */ if (inp->sctp_lport != lport) { SCTP_INP_RUNLOCK(inp); continue; } /* does a VRF id match? */ fnd = 0; if (inp->def_vrf_id == vrf_id) fnd = 1; if (!fnd) { SCTP_INP_RUNLOCK(inp); continue; } LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__); continue; } SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ", laddr->ifa); if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n"); continue; } if (laddr->ifa->address.sa.sa_family == nam->sa_family) { /* possible, see if it matches */ struct sockaddr_in *intf_addr; intf_addr = &laddr->ifa->address.sin; switch (nam->sa_family) { case AF_INET: if (sin->sin_addr.s_addr == intf_addr->sin_addr.s_addr) { SCTP_INP_RUNLOCK(inp); return (inp); } break; #ifdef INET6 case AF_INET6: intf_addr6 = &laddr->ifa->address.sin6; if (SCTP6_ARE_ADDR_EQUAL(sin6, intf_addr6)) { SCTP_INP_RUNLOCK(inp); return (inp); } break; #endif } } } SCTP_INP_RUNLOCK(inp); } return (NULL); } static struct sctp_inpcb * sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id) { struct sctppcbhead *head; struct sctp_inpcb *t_inp; int fnd; head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))]; LIST_FOREACH(t_inp, head, sctp_hash) { if (t_inp->sctp_lport != lport) { continue; } /* is it in the VRF in question */ fnd = 0; if (t_inp->def_vrf_id == vrf_id) fnd = 1; if (!fnd) continue; /* This one is in use. */ /* check the v6/v4 binding issue */ if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(t_inp)) { if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { /* collision in V6 space */ return (t_inp); } else { /* inp is BOUND_V4 no conflict */ continue; } } else if (t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { /* t_inp is bound v4 and v6, conflict always */ return (t_inp); } else { /* t_inp is bound only V4 */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) && SCTP_IPV6_V6ONLY(inp)) { /* no conflict */ continue; } /* else fall through to conflict */ } return (t_inp); } return (NULL); } int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp) { /* For 1-2-1 with port reuse */ struct sctppcbhead *head; struct sctp_inpcb *tinp; if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) { /* only works with port reuse on */ return (-1); } if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) == 0) { return (0); } SCTP_INP_RUNLOCK(inp); head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport, SCTP_BASE_INFO(hashmark))]; /* Kick out all non-listeners to the TCP hash */ LIST_FOREACH(tinp, head, sctp_hash) { if (tinp->sctp_lport != inp->sctp_lport) { continue; } if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { continue; } if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { continue; } if (tinp->sctp_socket->so_qlimit) { continue; } SCTP_INP_WLOCK(tinp); LIST_REMOVE(tinp, sctp_hash); head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(tinp->sctp_lport, SCTP_BASE_INFO(hashtcpmark))]; tinp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL; LIST_INSERT_HEAD(head, tinp, sctp_hash); SCTP_INP_WUNLOCK(tinp); } SCTP_INP_WLOCK(inp); /* Pull from where he was */ LIST_REMOVE(inp, sctp_hash); inp->sctp_flags &= ~SCTP_PCB_FLAGS_IN_TCPPOOL; head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(inp->sctp_lport, SCTP_BASE_INFO(hashmark))]; LIST_INSERT_HEAD(head, inp, sctp_hash); SCTP_INP_WUNLOCK(inp); SCTP_INP_RLOCK(inp); return (0); } struct sctp_inpcb * sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock, uint32_t vrf_id) { /* * First we check the hash table to see if someone has this port * bound with just the port. */ struct sctp_inpcb *inp; struct sctppcbhead *head; struct sockaddr_in *sin; struct sockaddr_in6 *sin6; int lport; unsigned int i; if (nam->sa_family == AF_INET) { sin = (struct sockaddr_in *)nam; lport = ((struct sockaddr_in *)nam)->sin_port; } else if (nam->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)nam; lport = ((struct sockaddr_in6 *)nam)->sin6_port; } else { /* unsupported family */ return (NULL); } /* * I could cheat here and just cast to one of the types but we will * do it right. It also provides the check against an Unsupported * type too. */ /* Find the head of the ALLADDR chain */ if (have_lock == 0) { SCTP_INP_INFO_RLOCK(); } head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))]; inp = sctp_endpoint_probe(nam, head, lport, vrf_id); /* * If the TCP model exists it could be that the main listening * endpoint is gone but there still exists a connected socket for * this guy. If so we can return the first one that we find. This * may NOT be the correct one so the caller should be wary on the * returned INP. Currently the only caller that sets find_tcp_pool * is in bindx where we are verifying that a user CAN bind the * address. He either has bound it already, or someone else has, or * its open to bind, so this is good enough. */ if (inp == NULL && find_tcp_pool) { for (i = 0; i < SCTP_BASE_INFO(hashtcpmark) + 1; i++) { head = &SCTP_BASE_INFO(sctp_tcpephash)[i]; inp = sctp_endpoint_probe(nam, head, lport, vrf_id); if (inp) { break; } } } if (inp) { SCTP_INP_INCR_REF(inp); } if (have_lock == 0) { SCTP_INP_INFO_RUNLOCK(); } return (inp); } /* * Find an association for an endpoint with the pointer to whom you want to * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may * need to change the *to to some other struct like a mbuf... */ struct sctp_tcb * sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from, struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool, uint32_t vrf_id) { struct sctp_inpcb *inp = NULL; struct sctp_tcb *retval; SCTP_INP_INFO_RLOCK(); if (find_tcp_pool) { if (inp_p != NULL) { retval = sctp_tcb_special_locate(inp_p, from, to, netp, vrf_id); } else { retval = sctp_tcb_special_locate(&inp, from, to, netp, vrf_id); } if (retval != NULL) { SCTP_INP_INFO_RUNLOCK(); return (retval); } } inp = sctp_pcb_findep(to, 0, 1, vrf_id); if (inp_p != NULL) { *inp_p = inp; } SCTP_INP_INFO_RUNLOCK(); if (inp == NULL) { return (NULL); } /* * ok, we have an endpoint, now lets find the assoc for it (if any) * we now place the source address or from in the to of the find * endpoint call. Since in reality this chain is used from the * inbound packet side. */ if (inp_p != NULL) { retval = sctp_findassociation_ep_addr(inp_p, from, netp, to, NULL); } else { retval = sctp_findassociation_ep_addr(&inp, from, netp, to, NULL); } return retval; } /* * This routine will grub through the mbuf that is a INIT or INIT-ACK and * find all addresses that the sender has specified in any address list. Each * address will be used to lookup the TCB and see if one exits. */ static struct sctp_tcb * sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, struct sockaddr *dest) { struct sockaddr_in sin4; struct sockaddr_in6 sin6; struct sctp_paramhdr *phdr, parm_buf; struct sctp_tcb *retval; uint32_t ptype, plen; memset(&sin4, 0, sizeof(sin4)); memset(&sin6, 0, sizeof(sin6)); sin4.sin_len = sizeof(sin4); sin4.sin_family = AF_INET; sin4.sin_port = sh->src_port; sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_port = sh->src_port; retval = NULL; offset += sizeof(struct sctp_init_chunk); phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); while (phdr != NULL) { /* now we must see if we want the parameter */ ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); if (plen == 0) { break; } if (ptype == SCTP_IPV4_ADDRESS && plen == sizeof(struct sctp_ipv4addr_param)) { /* Get the rest of the address */ struct sctp_ipv4addr_param ip4_parm, *p4; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&ip4_parm, min(plen, sizeof(ip4_parm))); if (phdr == NULL) { return (NULL); } p4 = (struct sctp_ipv4addr_param *)phdr; memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr)); /* look it up */ retval = sctp_findassociation_ep_addr(inp_p, (struct sockaddr *)&sin4, netp, dest, NULL); if (retval != NULL) { return (retval); } } else if (ptype == SCTP_IPV6_ADDRESS && plen == sizeof(struct sctp_ipv6addr_param)) { /* Get the rest of the address */ struct sctp_ipv6addr_param ip6_parm, *p6; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&ip6_parm, min(plen, sizeof(ip6_parm))); if (phdr == NULL) { return (NULL); } p6 = (struct sctp_ipv6addr_param *)phdr; memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr)); /* look it up */ retval = sctp_findassociation_ep_addr(inp_p, (struct sockaddr *)&sin6, netp, dest, NULL); if (retval != NULL) { return (retval); } } offset += SCTP_SIZE32(plen); phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); } return (NULL); } static struct sctp_tcb * sctp_findassoc_by_vtag(struct sockaddr *from, struct sockaddr *to, uint32_t vtag, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint16_t rport, uint16_t lport, int skip_src_check, uint32_t vrf_id, uint32_t remote_tag) { /* * Use my vtag to hash. If we find it we then verify the source addr * is in the assoc. If all goes well we save a bit on rec of a * packet. */ struct sctpasochead *head; struct sctp_nets *net; struct sctp_tcb *stcb; *netp = NULL; *inp_p = NULL; SCTP_INP_INFO_RLOCK(); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(vtag, SCTP_BASE_INFO(hashasocmark))]; if (head == NULL) { /* invalid vtag */ SCTP_INP_INFO_RUNLOCK(); return (NULL); } LIST_FOREACH(stcb, head, sctp_asocs) { SCTP_INP_RLOCK(stcb->sctp_ep); if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { SCTP_INP_RUNLOCK(stcb->sctp_ep); continue; } SCTP_TCB_LOCK(stcb); SCTP_INP_RUNLOCK(stcb->sctp_ep); if (stcb->asoc.my_vtag == vtag) { /* candidate */ if (stcb->rport != rport) { SCTP_TCB_UNLOCK(stcb); continue; } if (stcb->sctp_ep->sctp_lport != lport) { SCTP_TCB_UNLOCK(stcb); continue; } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_UNLOCK(stcb); continue; } /* RRS:Need toaddr check here */ if (sctp_does_stcb_own_this_addr(stcb, to) == 0) { /* Endpoint does not own this address */ SCTP_TCB_UNLOCK(stcb); continue; } if (remote_tag) { /* * If we have both vtags that's all we match * on */ if (stcb->asoc.peer_vtag == remote_tag) { /* * If both tags match we consider it * conclusive and check NO * source/destination addresses */ goto conclusive; } } if (skip_src_check) { conclusive: if (from) { net = sctp_findnet(stcb, from); } else { *netp = NULL; /* unknown */ } if (inp_p) *inp_p = stcb->sctp_ep; SCTP_INP_INFO_RUNLOCK(); return (stcb); } net = sctp_findnet(stcb, from); if (net) { /* yep its him. */ *netp = net; SCTP_STAT_INCR(sctps_vtagexpress); *inp_p = stcb->sctp_ep; SCTP_INP_INFO_RUNLOCK(); return (stcb); } else { /* * not him, this should only happen in rare * cases so I peg it. */ SCTP_STAT_INCR(sctps_vtagbogus); } } SCTP_TCB_UNLOCK(stcb); } SCTP_INP_INFO_RUNLOCK(); return (NULL); } /* * Find an association with the pointer to the inbound IP packet. This can be * a IPv4 or IPv6 packet. */ struct sctp_tcb * sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) { int find_tcp_pool; struct ip *iph; struct sctp_tcb *retval; struct sockaddr_storage to_store, from_store; struct sockaddr *to = (struct sockaddr *)&to_store; struct sockaddr *from = (struct sockaddr *)&from_store; struct sctp_inpcb *inp; iph = mtod(m, struct ip *); switch (iph->ip_v) { case IPVERSION: { /* its IPv4 */ struct sockaddr_in *from4; from4 = (struct sockaddr_in *)&from_store; bzero(from4, sizeof(*from4)); from4->sin_family = AF_INET; from4->sin_len = sizeof(struct sockaddr_in); from4->sin_addr.s_addr = iph->ip_src.s_addr; from4->sin_port = sh->src_port; break; } #ifdef INET6 case IPV6_VERSION >> 4: { /* its IPv6 */ struct ip6_hdr *ip6; struct sockaddr_in6 *from6; ip6 = mtod(m, struct ip6_hdr *); from6 = (struct sockaddr_in6 *)&from_store; bzero(from6, sizeof(*from6)); from6->sin6_family = AF_INET6; from6->sin6_len = sizeof(struct sockaddr_in6); from6->sin6_addr = ip6->ip6_src; from6->sin6_port = sh->src_port; /* Get the scopes in properly to the sin6 addr's */ /* we probably don't need these operations */ (void)sa6_recoverscope(from6); sa6_embedscope(from6, MODULE_GLOBAL(ip6_use_defzone)); break; } #endif default: /* Currently not supported. */ return (NULL); } switch (iph->ip_v) { case IPVERSION: { /* its IPv4 */ struct sockaddr_in *to4; to4 = (struct sockaddr_in *)&to_store; bzero(to4, sizeof(*to4)); to4->sin_family = AF_INET; to4->sin_len = sizeof(struct sockaddr_in); to4->sin_addr.s_addr = iph->ip_dst.s_addr; to4->sin_port = sh->dest_port; break; } #ifdef INET6 case IPV6_VERSION >> 4: { /* its IPv6 */ struct ip6_hdr *ip6; struct sockaddr_in6 *to6; ip6 = mtod(m, struct ip6_hdr *); to6 = (struct sockaddr_in6 *)&to_store; bzero(to6, sizeof(*to6)); to6->sin6_family = AF_INET6; to6->sin6_len = sizeof(struct sockaddr_in6); to6->sin6_addr = ip6->ip6_dst; to6->sin6_port = sh->dest_port; /* Get the scopes in properly to the sin6 addr's */ /* we probably don't need these operations */ (void)sa6_recoverscope(to6); sa6_embedscope(to6, MODULE_GLOBAL(ip6_use_defzone)); break; } #endif default: /* TSNH */ break; } if (sh->v_tag) { /* we only go down this path if vtag is non-zero */ retval = sctp_findassoc_by_vtag(from, to, ntohl(sh->v_tag), inp_p, netp, sh->src_port, sh->dest_port, 0, vrf_id, 0); if (retval) { return (retval); } } find_tcp_pool = 0; if ((ch->chunk_type != SCTP_INITIATION) && (ch->chunk_type != SCTP_INITIATION_ACK) && (ch->chunk_type != SCTP_COOKIE_ACK) && (ch->chunk_type != SCTP_COOKIE_ECHO)) { /* Other chunk types go to the tcp pool. */ find_tcp_pool = 1; } if (inp_p) { retval = sctp_findassociation_addr_sa(to, from, inp_p, netp, find_tcp_pool, vrf_id); inp = *inp_p; } else { retval = sctp_findassociation_addr_sa(to, from, &inp, netp, find_tcp_pool, vrf_id); } SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp); if (retval == NULL && inp) { /* Found a EP but not this address */ if ((ch->chunk_type == SCTP_INITIATION) || (ch->chunk_type == SCTP_INITIATION_ACK)) { /*- * special hook, we do NOT return linp or an * association that is linked to an existing * association that is under the TCP pool (i.e. no * listener exists). The endpoint finding routine * will always find a listener before examining the * TCP pool. */ if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) { if (inp_p) { *inp_p = NULL; } return (NULL); } retval = sctp_findassociation_special_addr(m, iphlen, offset, sh, &inp, netp, to); if (inp_p != NULL) { *inp_p = inp; } } } SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval); return (retval); } /* * lookup an association by an ASCONF lookup address. * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup */ struct sctp_tcb * sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id) { struct sctp_tcb *stcb; struct sockaddr_in *sin; #ifdef INET6 struct sockaddr_in6 *sin6; #endif struct sockaddr_storage local_store, remote_store; struct sockaddr *to; struct ip *iph; #ifdef INET6 struct ip6_hdr *ip6; #endif struct sctp_paramhdr parm_buf, *phdr; int ptype; int zero_address = 0; memset(&local_store, 0, sizeof(local_store)); memset(&remote_store, 0, sizeof(remote_store)); to = (struct sockaddr *)&local_store; /* First get the destination address setup too. */ iph = mtod(m, struct ip *); switch (iph->ip_v) { case IPVERSION: /* its IPv4 */ sin = (struct sockaddr_in *)&local_store; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_port = sh->dest_port; sin->sin_addr.s_addr = iph->ip_dst.s_addr; break; #ifdef INET6 case IPV6_VERSION >> 4: /* its IPv6 */ ip6 = mtod(m, struct ip6_hdr *); sin6 = (struct sockaddr_in6 *)&local_store; sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = sh->dest_port; sin6->sin6_addr = ip6->ip6_dst; break; #endif default: return NULL; } phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &parm_buf, sizeof(struct sctp_paramhdr)); if (phdr == NULL) { SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n", __FUNCTION__); return NULL; } ptype = (int)((uint32_t) ntohs(phdr->param_type)); /* get the correlation address */ switch (ptype) { #ifdef INET6 case SCTP_IPV6_ADDRESS: { /* ipv6 address param */ struct sctp_ipv6addr_param *p6, p6_buf; if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv6addr_param)) { return NULL; } p6 = (struct sctp_ipv6addr_param *)sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &p6_buf.ph, sizeof(*p6)); if (p6 == NULL) { SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n", __FUNCTION__); return (NULL); } sin6 = (struct sockaddr_in6 *)&remote_store; sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); sin6->sin6_port = sh->src_port; memcpy(&sin6->sin6_addr, &p6->addr, sizeof(struct in6_addr)); if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) zero_address = 1; break; } #endif case SCTP_IPV4_ADDRESS: { /* ipv4 address param */ struct sctp_ipv4addr_param *p4, p4_buf; if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv4addr_param)) { return NULL; } p4 = (struct sctp_ipv4addr_param *)sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk), &p4_buf.ph, sizeof(*p4)); if (p4 == NULL) { SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n", __FUNCTION__); return (NULL); } sin = (struct sockaddr_in *)&remote_store; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_port = sh->src_port; memcpy(&sin->sin_addr, &p4->addr, sizeof(struct in_addr)); if (sin->sin_addr.s_addr == INADDR_ANY) zero_address = 1; break; } default: /* invalid address param type */ return NULL; } if (zero_address) { stcb = sctp_findassoc_by_vtag(NULL, to, ntohl(sh->v_tag), inp_p, netp, sh->src_port, sh->dest_port, 1, vrf_id, 0); /* * printf("findassociation_ep_asconf: zero lookup address * finds stcb 0x%x\n", (uint32_t)stcb); */ } else { stcb = sctp_findassociation_ep_addr(inp_p, (struct sockaddr *)&remote_store, netp, to, NULL); } return (stcb); } /* * allocate a sctp_inpcb and setup a temporary binding to a port/all * addresses. This way if we don't get a bind we by default pick a ephemeral * port with all addresses bound. */ int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id) { /* * we get called when a new endpoint starts up. We need to allocate * the sctp_inpcb structure from the zone and init it. Mark it as * unbound and find a port that we can use as an ephemeral with * INADDR_ANY. If the user binds later no problem we can then add in * the specific addresses. And setup the default parameters for the * EP. */ int i, error; struct sctp_inpcb *inp; struct sctp_pcb *m; struct timeval time; sctp_sharedkey_t *null_key; error = 0; SCTP_INP_INFO_WLOCK(); inp = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_ep), struct sctp_inpcb); if (inp == NULL) { SCTP_PRINTF("Out of SCTP-INPCB structures - no resources\n"); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); return (ENOBUFS); } /* zap it */ bzero(inp, sizeof(*inp)); /* bump generations */ /* setup socket pointers */ inp->sctp_socket = so; inp->ip_inp.inp.inp_socket = so; inp->sctp_associd_counter = 1; inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT; inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT; inp->sctp_cmt_on_off = SCTP_BASE_SYSCTL(sctp_cmt_on_off); /* init the small hash table we use to track asocid <-> tcb */ inp->sctp_asocidhash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE, &inp->hashasocidmark); if (inp->sctp_asocidhash == NULL) { SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); SCTP_INP_INFO_WUNLOCK(); return (ENOBUFS); } #ifdef IPSEC { struct inpcbpolicy *pcb_sp = NULL; error = ipsec_init_policy(so, &pcb_sp); /* Arrange to share the policy */ inp->ip_inp.inp.inp_sp = pcb_sp; ((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp; } if (error != 0) { SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); SCTP_INP_INFO_WUNLOCK(); return error; } #endif /* IPSEC */ SCTP_INCR_EP_COUNT(); inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl); SCTP_INP_INFO_WUNLOCK(); so->so_pcb = (caddr_t)inp; if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) || (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) { /* UDP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE | SCTP_PCB_FLAGS_UNBOUND); /* Be sure it is NON-BLOCKING IO for UDP */ /* SCTP_SET_SO_NBIO(so); */ } else if (SCTP_SO_TYPE(so) == SOCK_STREAM) { /* TCP style socket */ inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE | SCTP_PCB_FLAGS_UNBOUND); /* Be sure we have blocking IO by default */ SCTP_CLEAR_SO_NBIO(so); } else { /* * unsupported socket type (RAW, etc)- in case we missed it * in protosw */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP); so->so_pcb = NULL; SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); return (EOPNOTSUPP); } if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_1) { sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_2) { sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } else if (SCTP_BASE_SYSCTL(sctp_default_frag_interleave) == SCTP_FRAG_LEVEL_0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE); sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS); } inp->sctp_tcbhash = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_pcbtblsize), &inp->sctp_hashmark); if (inp->sctp_tcbhash == NULL) { SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n"); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); so->so_pcb = NULL; SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); return (ENOBUFS); } inp->def_vrf_id = vrf_id; SCTP_INP_INFO_WLOCK(); SCTP_INP_LOCK_INIT(inp); INP_LOCK_INIT(&inp->ip_inp.inp, "inp", "sctpinp"); SCTP_INP_READ_INIT(inp); SCTP_ASOC_CREATE_LOCK_INIT(inp); /* lock the new ep */ SCTP_INP_WLOCK(inp); /* add it to the info area */ LIST_INSERT_HEAD(&SCTP_BASE_INFO(listhead), inp, sctp_list); SCTP_INP_INFO_WUNLOCK(); TAILQ_INIT(&inp->read_queue); LIST_INIT(&inp->sctp_addr_list); LIST_INIT(&inp->sctp_asoc_list); #ifdef SCTP_TRACK_FREED_ASOCS /* TEMP CODE */ LIST_INIT(&inp->sctp_asoc_free_list); #endif /* Init the timer structure for signature change */ SCTP_OS_TIMER_INIT(&inp->sctp_ep.signature_change.timer); inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NEWCOOKIE; /* now init the actual endpoint default data */ m = &inp->sctp_ep; /* setup the base timeout information */ m->sctp_timeoutticks[SCTP_TIMER_SEND] = SEC_TO_TICKS(SCTP_SEND_SEC); /* needed ? */ m->sctp_timeoutticks[SCTP_TIMER_INIT] = SEC_TO_TICKS(SCTP_INIT_SEC); /* needed ? */ m->sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default)); m->sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default)); m->sctp_timeoutticks[SCTP_TIMER_PMTU] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default)); m->sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default)); m->sctp_timeoutticks[SCTP_TIMER_SIGNATURE] = SEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_secret_lifetime_default)); /* all max/min max are in ms */ m->sctp_maxrto = SCTP_BASE_SYSCTL(sctp_rto_max_default); m->sctp_minrto = SCTP_BASE_SYSCTL(sctp_rto_min_default); m->initial_rto = SCTP_BASE_SYSCTL(sctp_rto_initial_default); m->initial_init_rto_max = SCTP_BASE_SYSCTL(sctp_init_rto_max_default); m->sctp_sack_freq = SCTP_BASE_SYSCTL(sctp_sack_freq_default); m->max_open_streams_intome = MAX_SCTP_STREAMS; m->max_init_times = SCTP_BASE_SYSCTL(sctp_init_rtx_max_default); m->max_send_times = SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default); m->def_net_failure = SCTP_BASE_SYSCTL(sctp_path_rtx_max_default); m->sctp_sws_sender = SCTP_SWS_SENDER_DEF; m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF; m->max_burst = SCTP_BASE_SYSCTL(sctp_max_burst_default); if ((SCTP_BASE_SYSCTL(sctp_default_cc_module) >= SCTP_CC_RFC2581) && (SCTP_BASE_SYSCTL(sctp_default_cc_module) <= SCTP_CC_HTCP)) { m->sctp_default_cc_module = SCTP_BASE_SYSCTL(sctp_default_cc_module); } else { /* sysctl done with invalid value, set to 2581 */ m->sctp_default_cc_module = SCTP_CC_RFC2581; } /* number of streams to pre-open on a association */ m->pre_open_stream_count = SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default); /* Add adaptation cookie */ m->adaptation_layer_indicator = 0x504C5253; /* seed random number generator */ m->random_counter = 1; m->store_at = SCTP_SIGNATURE_SIZE; SCTP_READ_RANDOM(m->random_numbers, sizeof(m->random_numbers)); sctp_fill_random_store(m); /* Minimum cookie size */ m->size_of_a_cookie = (sizeof(struct sctp_init_msg) * 2) + sizeof(struct sctp_state_cookie); m->size_of_a_cookie += SCTP_SIGNATURE_SIZE; /* Setup the initial secret */ (void)SCTP_GETTIME_TIMEVAL(&time); m->time_of_secret_change = time.tv_sec; for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) { m->secret_key[0][i] = sctp_select_initial_TSN(m); } sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL); /* How long is a cookie good for ? */ m->def_cookie_life = MSEC_TO_TICKS(SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default)); /* * Initialize authentication parameters */ m->local_hmacs = sctp_default_supported_hmaclist(); m->local_auth_chunks = sctp_alloc_chunklist(); sctp_auth_set_default_chunks(m->local_auth_chunks); LIST_INIT(&m->shared_keys); /* add default NULL key as key id 0 */ null_key = sctp_alloc_sharedkey(); sctp_insert_sharedkey(&m->shared_keys, null_key); SCTP_INP_WUNLOCK(inp); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 12); #endif return (error); } void sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp, struct sctp_tcb *stcb) { struct sctp_nets *net; uint16_t lport, rport; struct sctppcbhead *head; struct sctp_laddr *laddr, *oladdr; atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(old_inp); SCTP_INP_WLOCK(new_inp); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); new_inp->sctp_ep.time_of_secret_change = old_inp->sctp_ep.time_of_secret_change; memcpy(new_inp->sctp_ep.secret_key, old_inp->sctp_ep.secret_key, sizeof(old_inp->sctp_ep.secret_key)); new_inp->sctp_ep.current_secret_number = old_inp->sctp_ep.current_secret_number; new_inp->sctp_ep.last_secret_number = old_inp->sctp_ep.last_secret_number; new_inp->sctp_ep.size_of_a_cookie = old_inp->sctp_ep.size_of_a_cookie; /* make it so new data pours into the new socket */ stcb->sctp_socket = new_inp->sctp_socket; stcb->sctp_ep = new_inp; /* Copy the port across */ lport = new_inp->sctp_lport = old_inp->sctp_lport; rport = stcb->rport; /* Pull the tcb from the old association */ LIST_REMOVE(stcb, sctp_tcbhash); LIST_REMOVE(stcb, sctp_tcblist); if (stcb->asoc.in_asocid_hash) { LIST_REMOVE(stcb, sctp_tcbasocidhash); } /* Now insert the new_inp into the TCP connected hash */ head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR((lport | rport), SCTP_BASE_INFO(hashtcpmark))]; LIST_INSERT_HEAD(head, new_inp, sctp_hash); /* Its safe to access */ new_inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND; /* Now move the tcb into the endpoint list */ LIST_INSERT_HEAD(&new_inp->sctp_asoc_list, stcb, sctp_tcblist); /* * Question, do we even need to worry about the ep-hash since we * only have one connection? Probably not :> so lets get rid of it * and not suck up any kernel memory in that. */ if (stcb->asoc.in_asocid_hash) { struct sctpasochead *lhd; lhd = &new_inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(stcb->asoc.assoc_id, new_inp->hashasocidmark)]; LIST_INSERT_HEAD(lhd, stcb, sctp_tcbasocidhash); } /* Ok. Let's restart timer. */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, new_inp, stcb, net); } SCTP_INP_INFO_WUNLOCK(); if (new_inp->sctp_tcbhash != NULL) { SCTP_HASH_FREE(new_inp->sctp_tcbhash, new_inp->sctp_hashmark); new_inp->sctp_tcbhash = NULL; } if ((new_inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) { /* Subset bound, so copy in the laddr list from the old_inp */ LIST_FOREACH(oladdr, &old_inp->sctp_addr_list, sctp_nxt_addr) { laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (laddr == NULL) { /* * Gak, what can we do? This assoc is really * HOSED. We probably should send an abort * here. */ SCTPDBG(SCTP_DEBUG_PCB1, "Association hosed in TCP model, out of laddr memory\n"); continue; } SCTP_INCR_LADDR_COUNT(); bzero(laddr, sizeof(*laddr)); (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time); laddr->ifa = oladdr->ifa; atomic_add_int(&laddr->ifa->refcount, 1); LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr, sctp_nxt_addr); new_inp->laddr_count++; } } /* * Now any running timers need to be adjusted since we really don't * care if they are running or not just blast in the new_inp into * all of them. */ stcb->asoc.hb_timer.ep = (void *)new_inp; stcb->asoc.dack_timer.ep = (void *)new_inp; stcb->asoc.asconf_timer.ep = (void *)new_inp; stcb->asoc.strreset_timer.ep = (void *)new_inp; stcb->asoc.shut_guard_timer.ep = (void *)new_inp; stcb->asoc.autoclose_timer.ep = (void *)new_inp; stcb->asoc.delayed_event_timer.ep = (void *)new_inp; stcb->asoc.delete_prim_timer.ep = (void *)new_inp; /* now what about the nets? */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { net->pmtu_timer.ep = (void *)new_inp; net->rxt_timer.ep = (void *)new_inp; net->fr_timer.ep = (void *)new_inp; } SCTP_INP_WUNLOCK(new_inp); SCTP_INP_WUNLOCK(old_inp); } /* sctp_ifap is used to bypass normal local address validation checks */ int sctp_inpcb_bind(struct socket *so, struct sockaddr *addr, struct sctp_ifa *sctp_ifap, struct thread *p) { /* bind a ep to a socket address */ struct sctppcbhead *head; struct sctp_inpcb *inp, *inp_tmp; struct inpcb *ip_inp; int port_reuse_active = 0; int bindall; uint16_t lport; int error; uint32_t vrf_id; lport = 0; error = 0; bindall = 1; inp = (struct sctp_inpcb *)so->so_pcb; ip_inp = (struct inpcb *)so->so_pcb; #ifdef SCTP_DEBUG if (addr) { SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n", ntohs(((struct sockaddr_in *)addr)->sin_port)); SCTPDBG(SCTP_DEBUG_PCB1, "Addr :"); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr); } #endif if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) { /* already did a bind, subsequent binds NOT allowed ! */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } #ifdef INVARIANTS if (p == NULL) panic("null proc/thread"); #endif if (addr != NULL) { switch (addr->sa_family) { case AF_INET: { struct sockaddr_in *sin; /* IPV6_V6ONLY socket? */ if (SCTP_IPV6_V6ONLY(ip_inp)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } if (addr->sa_len != sizeof(*sin)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } sin = (struct sockaddr_in *)addr; lport = sin->sin_port; /* * For LOOPBACK the prison_local_ip4() call * will transmute the ip address to the * proper value. */ if (p && (error = prison_local_ip4(p->td_ucred, &sin->sin_addr)) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } if (sin->sin_addr.s_addr != INADDR_ANY) { bindall = 0; } break; } #ifdef INET6 case AF_INET6: { /* * Only for pure IPv6 Address. (No IPv4 * Mapped!) */ struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (addr->sa_len != sizeof(*sin6)) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } lport = sin6->sin6_port; /* * For LOOPBACK the prison_local_ip6() call * will transmute the ipv6 address to the * proper value. */ if (p && (error = prison_local_ip6(p->td_ucred, &sin6->sin6_addr, (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { bindall = 0; /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } } /* this must be cleared for ifa_ifwithaddr() */ sin6->sin6_scope_id = 0; break; } #endif default: SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EAFNOSUPPORT); return (EAFNOSUPPORT); } } SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); /* Setup a vrf_id to be the default for the non-bind-all case. */ vrf_id = inp->def_vrf_id; /* increase our count due to the unlock we do */ SCTP_INP_INCR_REF(inp); if (lport) { /* * Did the caller specify a port? if so we must see if a ep * already has this one bound. */ /* got to be root to get at low ports */ if (ntohs(lport) < IPPORT_RESERVED) { if (p && (error = priv_check(p, PRIV_NETINET_RESERVEDPORT) )) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (error); } } if (p == NULL) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } SCTP_INP_WUNLOCK(inp); if (bindall) { vrf_id = inp->def_vrf_id; inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id); if (inp_tmp != NULL) { /* * lock guy returned and lower count note * that we are not bound so inp_tmp should * NEVER be inp. And it is this inp * (inp_tmp) that gets the reference bump, * so we must lower it. */ SCTP_INP_DECR_REF(inp_tmp); /* unlock info */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { /* * Ok, must be one-2-one and * allowing port re-use */ port_reuse_active = 1; goto continue_anyway; } SCTP_INP_DECR_REF(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } } else { inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id); if (inp_tmp != NULL) { /* * lock guy returned and lower count note * that we are not bound so inp_tmp should * NEVER be inp. And it is this inp * (inp_tmp) that gets the reference bump, * so we must lower it. */ SCTP_INP_DECR_REF(inp_tmp); /* unlock info */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { /* * Ok, must be one-2-one and * allowing port re-use */ port_reuse_active = 1; goto continue_anyway; } SCTP_INP_DECR_REF(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } } continue_anyway: SCTP_INP_WLOCK(inp); if (bindall) { /* verify that no lport is not used by a singleton */ if ((port_reuse_active == 0) && (inp_tmp = sctp_isport_inuse(inp, lport, vrf_id)) ) { /* Sorry someone already has this one bound */ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) && (sctp_is_feature_on(inp_tmp, SCTP_PCB_FLAGS_PORTREUSE))) { port_reuse_active = 1; } else { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } } } } else { uint16_t first, last, candidate; uint16_t count; int done; if (ip_inp->inp_flags & INP_HIGHPORT) { first = MODULE_GLOBAL(ipport_hifirstauto); last = MODULE_GLOBAL(ipport_hilastauto); } else if (ip_inp->inp_flags & INP_LOWPORT) { if (p && (error = priv_check(p, PRIV_NETINET_RESERVEDPORT) )) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error); return (error); } first = MODULE_GLOBAL(ipport_lowfirstauto); last = MODULE_GLOBAL(ipport_lowlastauto); } else { first = MODULE_GLOBAL(ipport_firstauto); last = MODULE_GLOBAL(ipport_lastauto); } if (first > last) { uint16_t temp; temp = first; first = last; last = temp; } count = last - first + 1; /* number of candidates */ candidate = first + sctp_select_initial_TSN(&inp->sctp_ep) % (count); done = 0; while (!done) { if (sctp_isport_inuse(inp, htons(candidate), inp->def_vrf_id) == NULL) { done = 1; } if (!done) { if (--count == 0) { SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE); return (EADDRINUSE); } if (candidate == last) candidate = first; else candidate = candidate + 1; } } lport = htons(candidate); } SCTP_INP_DECR_REF(inp); if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { /* * this really should not happen. The guy did a non-blocking * bind and then did a close at the same time. */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } /* ok we look clear to give out this port, so lets setup the binding */ if (bindall) { /* binding to all addresses, so just set in the proper flags */ inp->sctp_flags |= SCTP_PCB_FLAGS_BOUNDALL; /* set the automatic addr changes from kernel flag */ if (SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF); sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } else { sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF); sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); } if (SCTP_BASE_SYSCTL(sctp_multiple_asconfs) == 0) { sctp_feature_off(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS); } else { sctp_feature_on(inp, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS); } /* * set the automatic mobility_base from kernel flag (by * micchie) */ if (SCTP_BASE_SYSCTL(sctp_mobility_base) == 0) { sctp_mobility_feature_off(inp, SCTP_MOBILITY_BASE); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } else { sctp_mobility_feature_on(inp, SCTP_MOBILITY_BASE); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } /* * set the automatic mobility_fasthandoff from kernel flag * (by micchie) */ if (SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) == 0) { sctp_mobility_feature_off(inp, SCTP_MOBILITY_FASTHANDOFF); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } else { sctp_mobility_feature_on(inp, SCTP_MOBILITY_FASTHANDOFF); sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED); } } else { /* * bind specific, make sure flags is off and add a new * address structure to the sctp_addr_list inside the ep * structure. * * We will need to allocate one and insert it at the head. The * socketopt call can just insert new addresses in there as * well. It will also have to do the embed scope kame hack * too (before adding). */ struct sctp_ifa *ifa; struct sockaddr_storage store_sa; memset(&store_sa, 0, sizeof(store_sa)); if (addr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)&store_sa; memcpy(sin, addr, sizeof(struct sockaddr_in)); sin->sin_port = 0; } else if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&store_sa; memcpy(sin6, addr, sizeof(struct sockaddr_in6)); sin6->sin6_port = 0; } /* * first find the interface with the bound address need to * zero out the port to find the address! yuck! can't do * this earlier since need port for sctp_pcb_findep() */ if (sctp_ifap != NULL) ifa = sctp_ifap; else { /* * Note for BSD we hit here always other O/S's will * pass things in via the sctp_ifap argument * (Panda). */ ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa, vrf_id, SCTP_ADDR_NOT_LOCKED); } if (ifa == NULL) { /* Can't find an interface with that address */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL); return (EADDRNOTAVAIL); } if (addr->sa_family == AF_INET6) { /* GAK, more FIXME IFA lock? */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } } /* we're not bound all */ inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL; /* allow bindx() to send ASCONF's for binding changes */ sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF); /* clear automatic addr changes from kernel flag */ sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF); /* add this address to the endpoint list */ error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0); if (error != 0) { SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (error); } inp->laddr_count++; } /* find the bucket */ if (port_reuse_active) { /* Put it into tcp 1-2-1 hash */ head = &SCTP_BASE_INFO(sctp_tcpephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashtcpmark))]; inp->sctp_flags |= SCTP_PCB_FLAGS_IN_TCPPOOL; } else { head = &SCTP_BASE_INFO(sctp_ephash)[SCTP_PCBHASH_ALLADDR(lport, SCTP_BASE_INFO(hashmark))]; } /* put it in the bucket */ LIST_INSERT_HEAD(head, inp, sctp_hash); SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d - in tcp_pool=%d\n", head, ntohs(lport), port_reuse_active); /* set in the port */ inp->sctp_lport = lport; /* turn off just the unbound flag */ inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND; SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return (0); } static void sctp_iterator_inp_being_freed(struct sctp_inpcb *inp) { struct sctp_iterator *it, *nit; /* * We enter with the only the ITERATOR_LOCK in place and a write * lock on the inp_info stuff. */ it = sctp_it_ctl.cur_it; if (it && (it->vn != curvnet)) { /* Its not looking at our VNET */ return; } if (it && (it->inp == inp)) { /* * This is tricky and we hold the iterator lock, but when it * returns and gets the lock (when we release it) the * iterator will try to operate on inp. We need to stop that * from happening. But of course the iterator has a * reference on the stcb and inp. We can mark it and it will * stop. * * If its a single iterator situation, we set the end iterator * flag. Otherwise we set the iterator to go to the next * inp. * */ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT; } else { sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_INP; } } /* * Now go through and remove any single reference to our inp that * may be still pending on the list */ SCTP_IPI_ITERATOR_WQ_LOCK(); it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); while (it) { nit = TAILQ_NEXT(it, sctp_nxt_itr); if (it->vn != curvnet) { it = nit; continue; } if (it->inp == inp) { /* This one points to me is it inp specific? */ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) { /* Remove and free this one */ TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (it->function_atend != NULL) { (*it->function_atend) (it->pointer, it->val); } SCTP_FREE(it, SCTP_M_ITER); } else { it->inp = LIST_NEXT(it->inp, sctp_list); if (it->inp) { SCTP_INP_INCR_REF(it->inp); } } /* * When its put in the refcnt is incremented so decr * it */ SCTP_INP_DECR_REF(inp); } it = nit; } SCTP_IPI_ITERATOR_WQ_UNLOCK(); } /* release sctp_inpcb unbind the port */ void sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) { /* * Here we free a endpoint. We must find it (if it is in the Hash * table) and remove it from there. Then we must also find it in the * overall list and remove it from there. After all removals are * complete then any timer has to be stopped. Then start the actual * freeing. a) Any local lists. b) Any associations. c) The hash of * all associations. d) finally the ep itself. */ struct sctp_pcb *m; struct sctp_tcb *asoc, *nasoc; struct sctp_laddr *laddr, *nladdr; struct inpcb *ip_pcb; struct socket *so; int being_refed = 0; struct sctp_queued_to_read *sq; int cnt; sctp_sharedkey_t *shared_key; #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 0); #endif SCTP_ITERATOR_LOCK(); /* mark any iterators on the list or being processed */ sctp_iterator_inp_being_freed(inp); SCTP_ITERATOR_UNLOCK(); so = inp->sctp_socket; if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { /* been here before.. eeks.. get out of here */ SCTP_PRINTF("This conflict in free SHOULD not be happening! from %d, imm %d\n", from, immediate); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 1); #endif return; } SCTP_ASOC_CREATE_LOCK(inp); SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); if (from == SCTP_CALLED_AFTER_CMPSET_OFCLOSE) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_CLOSE_IP; /* socket is gone, so no more wakeups allowed */ inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE; inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT; inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT; } /* First time through we have the socket lock, after that no more. */ sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL, SCTP_FROM_SCTP_PCB + SCTP_LOC_1); if (inp->control) { sctp_m_freem(inp->control); inp->control = NULL; } if (inp->pkt) { sctp_m_freem(inp->pkt); inp->pkt = NULL; } m = &inp->sctp_ep; ip_pcb = &inp->ip_inp.inp; /* we could just cast the main pointer * here but I will be nice :> (i.e. * ip_pcb = ep;) */ if (immediate == SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE) { int cnt_in_sd; cnt_in_sd = 0; for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL; asoc = nasoc) { SCTP_TCB_LOCK(asoc); nasoc = LIST_NEXT(asoc, sctp_tcblist); if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { /* Skip guys being freed */ cnt_in_sd++; if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) { /* * Special case - we did not start a * kill timer on the asoc due to it * was not closed. So go ahead and * start it now. */ asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL); } SCTP_TCB_UNLOCK(asoc); continue; } if (((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) && (asoc->asoc.total_output_queue_size == 0)) { /* * If we have data in queue, we don't want * to just free since the app may have done, * send()/close or connect/send/close. And * it wants the data to get across first. */ /* Just abandon things in the front states */ if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_2) == 0) { cnt_in_sd++; } continue; } /* Disconnect the socket please */ asoc->sctp_socket = NULL; asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET; if ((asoc->asoc.size_on_reasm_queue > 0) || (asoc->asoc.control_pdapi) || (asoc->asoc.size_on_all_streams > 0) || (so && (so->so_rcv.sb_cc > 0)) ) { /* Left with Data unread */ struct mbuf *op_err; op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* Fill in the user initiated abort */ struct sctp_paramhdr *ph; uint32_t *ippp; SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr) + sizeof(uint32_t); ph = mtod(op_err, struct sctp_paramhdr *); ph->param_type = htons( SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); ippp = (uint32_t *) (ph + 1); *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3); } asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3; #if defined(SCTP_PANIC_ON_ABORT) panic("inpcb_free does an abort"); #endif sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_4) == 0) { cnt_in_sd++; } continue; } else if (TAILQ_EMPTY(&asoc->asoc.send_queue) && TAILQ_EMPTY(&asoc->asoc.sent_queue) && (asoc->asoc.stream_queue_cnt == 0) ) { if (asoc->asoc.locked_on_sending) { goto abort_anyway; } if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { /* * there is nothing queued to send, * so I send shutdown */ sctp_send_shutdown(asoc, asoc->asoc.primary_destination); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT); SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc, asoc->asoc.primary_destination); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc, asoc->asoc.primary_destination); sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED); } } else { /* mark into shutdown pending */ struct sctp_stream_queue_pending *sp; asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING; sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc, asoc->asoc.primary_destination); if (asoc->asoc.locked_on_sending) { sp = TAILQ_LAST(&((asoc->asoc.locked_on_sending)->outqueue), sctp_streamhead); if (sp == NULL) { SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n", asoc->asoc.locked_on_sending, asoc->asoc.locked_on_sending->stream_no); } else { if ((sp->length == 0) && (sp->msg_is_complete == 0)) asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT; } } if (TAILQ_EMPTY(&asoc->asoc.send_queue) && TAILQ_EMPTY(&asoc->asoc.sent_queue) && (asoc->asoc.state & SCTP_STATE_PARTIAL_MSG_LEFT)) { struct mbuf *op_err; abort_anyway: op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* * Fill in the user * initiated abort */ struct sctp_paramhdr *ph; uint32_t *ippp; SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)); ph = mtod(op_err, struct sctp_paramhdr *); ph->param_type = htons( SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); ippp = (uint32_t *) (ph + 1); *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5); } asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5; #if defined(SCTP_PANIC_ON_ABORT) panic("inpcb_free does an abort"); #endif sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_6) == 0) { cnt_in_sd++; } continue; } else { sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED); } } cnt_in_sd++; SCTP_TCB_UNLOCK(asoc); } /* now is there some left in our SHUTDOWN state? */ if (cnt_in_sd) { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 2); #endif inp->sctp_socket = NULL; SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return; } } inp->sctp_socket = NULL; if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) != SCTP_PCB_FLAGS_UNBOUND) { /* * ok, this guy has been bound. It's port is somewhere in * the SCTP_BASE_INFO(hash table). Remove it! */ LIST_REMOVE(inp, sctp_hash); inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND; } /* * If there is a timer running to kill us, forget it, since it may * have a contest on the INP lock.. which would cause us to die ... */ cnt = 0; for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL; asoc = nasoc) { SCTP_TCB_LOCK(asoc); nasoc = LIST_NEXT(asoc, sctp_tcblist); if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { if (asoc->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE) { asoc->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, asoc, NULL); } cnt++; SCTP_TCB_UNLOCK(asoc); continue; } /* Free associations that are NOT killing us */ if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) && ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) { struct mbuf *op_err; uint32_t *ippp; op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)), 0, M_DONTWAIT, 1, MT_DATA); if (op_err) { /* Fill in the user initiated abort */ struct sctp_paramhdr *ph; SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)); ph = mtod(op_err, struct sctp_paramhdr *); ph->param_type = htons( SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons(SCTP_BUF_LEN(op_err)); ippp = (uint32_t *) (ph + 1); *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7); } asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7; #if defined(SCTP_PANIC_ON_ABORT) panic("inpcb_free does an abort"); #endif sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED); SCTP_STAT_INCR_COUNTER32(sctps_aborted); } else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { cnt++; SCTP_TCB_UNLOCK(asoc); continue; } if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) { cnt++; } } if (cnt) { /* Ok we have someone out there that will kill us */ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 3); #endif SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return; } if (SCTP_INP_LOCK_CONTENDED(inp)) being_refed++; if (SCTP_INP_READ_CONTENDED(inp)) being_refed++; if (SCTP_ASOC_CREATE_LOCK_CONTENDED(inp)) being_refed++; if ((inp->refcount) || (being_refed) || (inp->sctp_flags & SCTP_PCB_FLAGS_CLOSE_IP)) { (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 4); #endif sctp_timer_start(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL); SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); return; } inp->sctp_ep.signature_change.type = 0; inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE; /* * Remove it from the list .. last thing we need a lock for. */ LIST_REMOVE(inp, sctp_list); SCTP_INP_WUNLOCK(inp); SCTP_ASOC_CREATE_UNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); /* * Now we release all locks. Since this INP cannot be found anymore * except possibly by the kill timer that might be running. We call * the drain function here. It should hit the case were it sees the * ACTIVE flag cleared and exit out freeing us to proceed and * destroy everything. */ if (from != SCTP_CALLED_FROM_INPKILL_TIMER) { (void)SCTP_OS_TIMER_STOP_DRAIN(&inp->sctp_ep.signature_change.timer); } else { /* Probably un-needed */ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 5); #endif if ((inp->sctp_asocidhash) != NULL) { SCTP_HASH_FREE(inp->sctp_asocidhash, inp->hashasocidmark); inp->sctp_asocidhash = NULL; } /* sa_ignore FREED_MEMORY */ while ((sq = TAILQ_FIRST(&inp->read_queue)) != NULL) { /* Its only abandoned if it had data left */ if (sq->length) SCTP_STAT_INCR(sctps_left_abandon); TAILQ_REMOVE(&inp->read_queue, sq, next); sctp_free_remote_addr(sq->whoFrom); if (so) so->so_rcv.sb_cc -= sq->length; if (sq->data) { sctp_m_freem(sq->data); sq->data = NULL; } /* * no need to free the net count, since at this point all * assoc's are gone. */ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq); SCTP_DECR_READQ_COUNT(); } /* Now the sctp_pcb things */ /* * free each asoc if it is not already closed/free. we can't use the * macro here since le_next will get freed as part of the * sctp_free_assoc() call. */ cnt = 0; if (so) { #ifdef IPSEC ipsec_delete_pcbpolicy(ip_pcb); #endif /* IPSEC */ /* Unlocks not needed since the socket is gone now */ } if (ip_pcb->inp_options) { (void)sctp_m_free(ip_pcb->inp_options); ip_pcb->inp_options = 0; } if (ip_pcb->inp_moptions) { inp_freemoptions(ip_pcb->inp_moptions); ip_pcb->inp_moptions = 0; } #ifdef INET6 if (ip_pcb->inp_vflag & INP_IPV6) { struct in6pcb *in6p; in6p = (struct in6pcb *)inp; ip6_freepcbopts(in6p->in6p_outputopts); } #endif /* INET6 */ ip_pcb->inp_vflag = 0; /* free up authentication fields */ if (inp->sctp_ep.local_auth_chunks != NULL) sctp_free_chunklist(inp->sctp_ep.local_auth_chunks); if (inp->sctp_ep.local_hmacs != NULL) sctp_free_hmaclist(inp->sctp_ep.local_hmacs); shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys); while (shared_key) { LIST_REMOVE(shared_key, next); sctp_free_sharedkey(shared_key); /* sa_ignore FREED_MEMORY */ shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys); } /* * if we have an address list the following will free the list of * ifaddr's that are set into this ep. Again macro limitations here, * since the LIST_FOREACH could be a bad idea. */ for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL; laddr = nladdr) { nladdr = LIST_NEXT(laddr, sctp_nxt_addr); sctp_remove_laddr(laddr); } #ifdef SCTP_TRACK_FREED_ASOCS /* TEMP CODE */ for ((asoc = LIST_FIRST(&inp->sctp_asoc_free_list)); asoc != NULL; asoc = nasoc) { nasoc = LIST_NEXT(asoc, sctp_tcblist); LIST_REMOVE(asoc, sctp_tcblist); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), asoc); SCTP_DECR_ASOC_COUNT(); } /* *** END TEMP CODE *** */ #endif /* Now lets see about freeing the EP hash table. */ if (inp->sctp_tcbhash != NULL) { SCTP_HASH_FREE(inp->sctp_tcbhash, inp->sctp_hashmark); inp->sctp_tcbhash = NULL; } /* Now we must put the ep memory back into the zone pool */ INP_LOCK_DESTROY(&inp->ip_inp.inp); SCTP_INP_LOCK_DESTROY(inp); SCTP_INP_READ_DESTROY(inp); SCTP_ASOC_CREATE_LOCK_DESTROY(inp); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); SCTP_DECR_EP_COUNT(); } struct sctp_nets * sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr) { struct sctp_nets *net; /* locate the address */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (sctp_cmpaddr(addr, (struct sockaddr *)&net->ro._l_addr)) return (net); } return (NULL); } int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id) { struct sctp_ifa *sctp_ifa; sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, SCTP_ADDR_NOT_LOCKED); if (sctp_ifa) { return (1); } else { return (0); } } /* * add's a remote endpoint address, done with the INIT/INIT-ACK as well as * when a ASCONF arrives that adds it. It will also initialize all the cwnd * stats of stuff. */ int sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, int set_scope, int from) { /* * The following is redundant to the same lines in the * sctp_aloc_assoc() but is needed since others call the add address * function */ struct sctp_nets *net, *netfirst; int addr_inscope; SCTPDBG(SCTP_DEBUG_PCB1, "Adding an address (from:%d) to the peer: ", from); SCTPDBG_ADDR(SCTP_DEBUG_PCB1, newaddr); netfirst = sctp_findnet(stcb, newaddr); if (netfirst) { /* * Lie and return ok, we don't want to make the association * go away for this behavior. It will happen in the TCP * model in a connected socket. It does not reach the hash * table until after the association is built so it can't be * found. Mark as reachable, since the initial creation will * have been cleared and the NOT_IN_ASSOC flag will have * been added... and we don't want to end up removing it * back out. */ if (netfirst->dest_state & SCTP_ADDR_UNCONFIRMED) { netfirst->dest_state = (SCTP_ADDR_REACHABLE | SCTP_ADDR_UNCONFIRMED); } else { netfirst->dest_state = SCTP_ADDR_REACHABLE; } return (0); } addr_inscope = 1; if (newaddr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)newaddr; if (sin->sin_addr.s_addr == 0) { /* Invalid address */ return (-1); } /* zero out the bzero area */ memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); /* assure len is set */ sin->sin_len = sizeof(struct sockaddr_in); if (set_scope) { #ifdef SCTP_DONT_DO_PRIVADDR_SCOPE stcb->ipv4_local_scope = 1; #else if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) { stcb->asoc.ipv4_local_scope = 1; } #endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */ } else { /* Validate the address is in scope */ if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) && (stcb->asoc.ipv4_local_scope == 0)) { addr_inscope = 0; } } #ifdef INET6 } else if (newaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)newaddr; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* Invalid address */ return (-1); } /* assure len is set */ sin6->sin6_len = sizeof(struct sockaddr_in6); if (set_scope) { if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) { stcb->asoc.loopback_scope = 1; stcb->asoc.local_scope = 0; stcb->asoc.ipv4_local_scope = 1; stcb->asoc.site_scope = 1; } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { /* * If the new destination is a LINK_LOCAL we * must have common site scope. Don't set * the local scope since we may not share * all links, only loopback can do this. * Links on the local network would also be * on our private network for v4 too. */ stcb->asoc.ipv4_local_scope = 1; stcb->asoc.site_scope = 1; } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { /* * If the new destination is SITE_LOCAL then * we must have site scope in common. */ stcb->asoc.site_scope = 1; } } else { /* Validate the address is in scope */ if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) && (stcb->asoc.loopback_scope == 0)) { addr_inscope = 0; } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && (stcb->asoc.local_scope == 0)) { addr_inscope = 0; } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && (stcb->asoc.site_scope == 0)) { addr_inscope = 0; } } #endif } else { /* not supported family type */ return (-1); } net = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_net), struct sctp_nets); if (net == NULL) { return (-1); } SCTP_INCR_RADDR_COUNT(); bzero(net, sizeof(*net)); (void)SCTP_GETTIME_TIMEVAL(&net->start_time); memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len); if (newaddr->sa_family == AF_INET) { ((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport; } else if (newaddr->sa_family == AF_INET6) { ((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport; } net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id); if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) { stcb->asoc.loopback_scope = 1; stcb->asoc.ipv4_local_scope = 1; stcb->asoc.local_scope = 0; stcb->asoc.site_scope = 1; addr_inscope = 1; } net->failure_threshold = stcb->asoc.def_net_failure; if (addr_inscope == 0) { net->dest_state = (SCTP_ADDR_REACHABLE | SCTP_ADDR_OUT_OF_SCOPE); } else { if (from == SCTP_ADDR_IS_CONFIRMED) /* SCTP_ADDR_IS_CONFIRMED is passed by connect_x */ net->dest_state = SCTP_ADDR_REACHABLE; else net->dest_state = SCTP_ADDR_REACHABLE | SCTP_ADDR_UNCONFIRMED; } /* * We set this to 0, the timer code knows that this means its an * initial value */ net->RTO = 0; net->RTO_measured = 0; stcb->asoc.numnets++; *(&net->ref_count) = 1; net->tos_flowlabel = 0; if (SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable)) { net->port = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); } else { net->port = 0; } #ifdef INET if (newaddr->sa_family == AF_INET) net->tos_flowlabel = stcb->asoc.default_tos; #endif #ifdef INET6 if (newaddr->sa_family == AF_INET6) net->tos_flowlabel = stcb->asoc.default_flowlabel; #endif /* Init the timer structure */ SCTP_OS_TIMER_INIT(&net->rxt_timer.timer); SCTP_OS_TIMER_INIT(&net->fr_timer.timer); SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer); /* Now generate a route for this guy */ #ifdef INET6 /* KAME hack: embed scopeid */ if (newaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; (void)sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)); sin6->sin6_scope_id = 0; } #endif SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id); if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) { /* Get source address */ net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep, stcb, (sctp_route_t *) & net->ro, net, 0, stcb->asoc.vrf_id); /* Now get the interface MTU */ if (net->ro._s_addr && net->ro._s_addr->ifn_p) { net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p); } else { net->mtu = 0; } if (net->mtu == 0) { /* Huh ?? */ net->mtu = SCTP_DEFAULT_MTU; } else { uint32_t rmtu; rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt); if (rmtu == 0) { /* * Start things off to match mtu of * interface please. */ SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa, net->ro.ro_rt, net->mtu); } else { /* * we take the route mtu over the interface, * since the route may be leading out the * loopback, or a different interface. */ net->mtu = rmtu; } } if (from == SCTP_ALLOC_ASOC) { stcb->asoc.smallest_mtu = net->mtu; } } else { net->mtu = stcb->asoc.smallest_mtu; } #ifdef INET6 if (newaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; (void)sa6_recoverscope(sin6); } #endif if (net->port) { net->mtu -= sizeof(struct udphdr); } if (stcb->asoc.smallest_mtu > net->mtu) { stcb->asoc.smallest_mtu = net->mtu; } /* JRS - Use the congestion control given in the CC module */ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net); /* * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning * of assoc (2005/06/27, iyengar@cis.udel.edu) */ net->find_pseudo_cumack = 1; net->find_rtx_pseudo_cumack = 1; net->src_addr_selected = 0; netfirst = TAILQ_FIRST(&stcb->asoc.nets); if (net->ro.ro_rt == NULL) { /* Since we have no route put it at the back */ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next); } else if (netfirst == NULL) { /* We are the first one in the pool. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else if (netfirst->ro.ro_rt == NULL) { /* * First one has NO route. Place this one ahead of the first * one. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) { /* * This one has a different interface than the one at the * top of the list. Place it ahead. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); } else { /* * Ok we have the same interface as the first one. Move * forward until we find either a) one with a NULL route... * insert ahead of that b) one with a different ifp.. insert * after that. c) end of the list.. insert at the tail. */ struct sctp_nets *netlook; do { netlook = TAILQ_NEXT(netfirst, sctp_next); if (netlook == NULL) { /* End of the list */ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next); break; } else if (netlook->ro.ro_rt == NULL) { /* next one has NO route */ TAILQ_INSERT_BEFORE(netfirst, net, sctp_next); break; } else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) { TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook, net, sctp_next); break; } /* Shift forward */ netfirst = netlook; } while (netlook != NULL); } /* got to have a primary set */ if (stcb->asoc.primary_destination == 0) { stcb->asoc.primary_destination = net; } else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) && (net->ro.ro_rt) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) { /* No route to current primary adopt new primary */ stcb->asoc.primary_destination = net; } sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net); /* Validate primary is first */ net = TAILQ_FIRST(&stcb->asoc.nets); if ((net != stcb->asoc.primary_destination) && (stcb->asoc.primary_destination)) { /* * first one on the list is NOT the primary sctp_cmpaddr() * is much more efficient if the primary is the first on the * list, make it so. */ TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); } return (0); } static uint32_t sctp_aloc_a_assoc_id(struct sctp_inpcb *inp, struct sctp_tcb *stcb) { uint32_t id; struct sctpasochead *head; struct sctp_tcb *lstcb; SCTP_INP_WLOCK(inp); try_again: if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { /* TSNH */ SCTP_INP_WUNLOCK(inp); return (0); } /* * We don't allow assoc id to be 0, this is needed otherwise if the * id were to wrap we would have issues with some socket options. */ if (inp->sctp_associd_counter == 0) { inp->sctp_associd_counter++; } id = inp->sctp_associd_counter; inp->sctp_associd_counter++; lstcb = sctp_findasoc_ep_asocid_locked(inp, (sctp_assoc_t) id, 0); if (lstcb) { goto try_again; } head = &inp->sctp_asocidhash[SCTP_PCBHASH_ASOC(id, inp->hashasocidmark)]; LIST_INSERT_HEAD(head, stcb, sctp_tcbasocidhash); stcb->asoc.in_asocid_hash = 1; SCTP_INP_WUNLOCK(inp); return id; } /* * allocate an association and add it to the endpoint. The caller must be * careful to add all additional addresses once they are know right away or * else the assoc will be may experience a blackout scenario. */ struct sctp_tcb * sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr, int *error, uint32_t override_tag, uint32_t vrf_id, struct thread *p ) { /* note the p argument is only valid in unbound sockets */ struct sctp_tcb *stcb; struct sctp_association *asoc; struct sctpasochead *head; uint16_t rport; int err; /* * Assumption made here: Caller has done a * sctp_findassociation_ep_addr(ep, addr's); to make sure the * address does not exist already. */ if (SCTP_BASE_INFO(ipi_count_asoc) >= SCTP_MAX_NUM_OF_ASOC) { /* Hit max assoc, sorry no more */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); *error = ENOBUFS; return (NULL); } if (firstaddr == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTP_INP_RLOCK(inp); if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) && ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE)) || (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) { /* * If its in the TCP pool, its NOT allowed to create an * association. The parent listener needs to call * sctp_aloc_assoc.. or the one-2-many socket. If a peeled * off, or connected one does this.. its an error. */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:"); #ifdef SCTP_DEBUG if (firstaddr) { SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr); SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n", ntohs(((struct sockaddr_in *)firstaddr)->sin_port)); } else { SCTPDBG(SCTP_DEBUG_PCB3, "None\n"); } #endif /* SCTP_DEBUG */ if (firstaddr->sa_family == AF_INET) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)firstaddr; if ((sin->sin_port == 0) || (sin->sin_addr.s_addr == 0)) { /* Invalid address */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } rport = sin->sin_port; } else if (firstaddr->sa_family == AF_INET6) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)firstaddr; if ((sin6->sin6_port == 0) || (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) { /* Invalid address */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } rport = sin6->sin6_port; } else { /* not supported family type */ SCTP_INP_RUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTP_INP_RUNLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { /* * If you have not performed a bind, then we need to do the * ephemeral bind for you. */ if ((err = sctp_inpcb_bind(inp->sctp_socket, (struct sockaddr *)NULL, (struct sctp_ifa *)NULL, p ))) { /* bind error, probably perm */ *error = err; return (NULL); } } stcb = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_asoc), struct sctp_tcb); if (stcb == NULL) { /* out of memory? */ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM); *error = ENOMEM; return (NULL); } SCTP_INCR_ASOC_COUNT(); bzero(stcb, sizeof(*stcb)); asoc = &stcb->asoc; asoc->assoc_id = sctp_aloc_a_assoc_id(inp, stcb); SCTP_TCB_LOCK_INIT(stcb); SCTP_TCB_SEND_LOCK_INIT(stcb); stcb->rport = rport; /* setup back pointer's */ stcb->sctp_ep = inp; stcb->sctp_socket = inp->sctp_socket; if ((err = sctp_init_asoc(inp, stcb, override_tag, vrf_id))) { /* failed */ SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); LIST_REMOVE(stcb, sctp_tcbasocidhash); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_DECR_ASOC_COUNT(); *error = err; return (NULL); } /* and the port */ SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { /* inpcb freed while alloc going on */ SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); LIST_REMOVE(stcb, sctp_tcbasocidhash); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_INP_WUNLOCK(inp); SCTP_INP_INFO_WUNLOCK(); SCTP_DECR_ASOC_COUNT(); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); *error = EINVAL; return (NULL); } SCTP_TCB_LOCK(stcb); /* now that my_vtag is set, add it to the hash */ head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))]; /* put it in the bucket in the vtag hash of assoc's for the system */ LIST_INSERT_HEAD(head, stcb, sctp_asocs); SCTP_INP_INFO_WUNLOCK(); if ((err = sctp_add_remote_addr(stcb, firstaddr, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) { /* failure.. memory error? */ if (asoc->strmout) { SCTP_FREE(asoc->strmout, SCTP_M_STRMO); asoc->strmout = NULL; } if (asoc->mapping_array) { SCTP_FREE(asoc->mapping_array, SCTP_M_MAP); asoc->mapping_array = NULL; } if (asoc->nr_mapping_array) { SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP); asoc->nr_mapping_array = NULL; } SCTP_DECR_ASOC_COUNT(); SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); LIST_REMOVE(stcb, sctp_tcbasocidhash); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_INP_WUNLOCK(inp); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); *error = ENOBUFS; return (NULL); } /* Init all the timers */ SCTP_OS_TIMER_INIT(&asoc->hb_timer.timer); SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer); SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer); SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer); SCTP_OS_TIMER_INIT(&asoc->shut_guard_timer.timer); SCTP_OS_TIMER_INIT(&asoc->autoclose_timer.timer); SCTP_OS_TIMER_INIT(&asoc->delayed_event_timer.timer); SCTP_OS_TIMER_INIT(&asoc->delete_prim_timer.timer); LIST_INSERT_HEAD(&inp->sctp_asoc_list, stcb, sctp_tcblist); /* now file the port under the hash as well */ if (inp->sctp_tcbhash != NULL) { head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(stcb->rport, inp->sctp_hashmark)]; LIST_INSERT_HEAD(head, stcb, sctp_tcbhash); } SCTP_INP_WUNLOCK(inp); SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb); return (stcb); } void sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; asoc = &stcb->asoc; asoc->numnets--; TAILQ_REMOVE(&asoc->nets, net, sctp_next); if (net == asoc->primary_destination) { /* Reset primary */ struct sctp_nets *lnet; lnet = TAILQ_FIRST(&asoc->nets); /* * Mobility adaptation Ideally, if deleted destination is * the primary, it becomes a fast retransmission trigger by * the subsequent SET PRIMARY. (by micchie) */ if (sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE) || sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_FASTHANDOFF)) { SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: primary dst is deleting\n"); if (asoc->deleted_primary != NULL) { SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: deleted primary may be already stored\n"); goto out; } asoc->deleted_primary = net; atomic_add_int(&net->ref_count, 1); memset(&net->lastsa, 0, sizeof(net->lastsa)); memset(&net->lastsv, 0, sizeof(net->lastsv)); sctp_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_PRIM_DELETED); sctp_timer_start(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL); } out: /* Try to find a confirmed primary */ asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0); } if (net == asoc->last_data_chunk_from) { /* Reset primary */ asoc->last_data_chunk_from = TAILQ_FIRST(&asoc->nets); } if (net == asoc->last_control_chunk_from) { /* Clear net */ asoc->last_control_chunk_from = NULL; } sctp_free_remote_addr(net); } /* * remove a remote endpoint address from an association, it will fail if the * address does not exist. */ int sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr) { /* * Here we need to remove a remote address. This is quite simple, we * first find it in the list of address for the association * (tasoc->asoc.nets) and then if it is there, we do a LIST_REMOVE * on that item. Note we do not allow it to be removed if there are * no other addresses. */ struct sctp_association *asoc; struct sctp_nets *net, *net_tmp; asoc = &stcb->asoc; /* locate the address */ for (net = TAILQ_FIRST(&asoc->nets); net != NULL; net = net_tmp) { net_tmp = TAILQ_NEXT(net, sctp_next); if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) { continue; } if (sctp_cmpaddr((struct sockaddr *)&net->ro._l_addr, remaddr)) { /* we found the guy */ if (asoc->numnets < 2) { /* Must have at LEAST two remote addresses */ return (-1); } else { sctp_remove_net(stcb, net); return (0); } } } /* not found. */ return (-2); } void sctp_delete_from_timewait(uint32_t tag, uint16_t lport, uint16_t rport) { struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; int found = 0; int i; chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; if (!LIST_EMPTY(chain)) { LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if ((twait_block->vtag_block[i].v_tag == tag) && (twait_block->vtag_block[i].lport == lport) && (twait_block->vtag_block[i].rport == rport)) { twait_block->vtag_block[i].tv_sec_at_expire = 0; twait_block->vtag_block[i].v_tag = 0; twait_block->vtag_block[i].lport = 0; twait_block->vtag_block[i].rport = 0; found = 1; break; } } if (found) break; } } } int sctp_is_in_timewait(uint32_t tag, uint16_t lport, uint16_t rport) { struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; int found = 0; int i; SCTP_INP_INFO_WLOCK(); chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; if (!LIST_EMPTY(chain)) { LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if ((twait_block->vtag_block[i].v_tag == tag) && (twait_block->vtag_block[i].lport == lport) && (twait_block->vtag_block[i].rport == rport)) { found = 1; break; } } if (found) break; } } SCTP_INP_INFO_WUNLOCK(); return (found); } void sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time, uint16_t lport, uint16_t rport) { struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; struct timeval now; int set, i; if (time == 0) { /* Its disabled */ return; } (void)SCTP_GETTIME_TIMEVAL(&now); chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; set = 0; if (!LIST_EMPTY(chain)) { /* Block(s) present, lets find space, and expire on the fly */ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if ((twait_block->vtag_block[i].v_tag == 0) && !set) { twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time; twait_block->vtag_block[i].v_tag = tag; twait_block->vtag_block[i].lport = lport; twait_block->vtag_block[i].rport = rport; set = 1; } else if ((twait_block->vtag_block[i].v_tag) && ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) { /* Audit expires this guy */ twait_block->vtag_block[i].tv_sec_at_expire = 0; twait_block->vtag_block[i].v_tag = 0; twait_block->vtag_block[i].lport = 0; twait_block->vtag_block[i].rport = 0; if (set == 0) { /* Reuse it for my new tag */ twait_block->vtag_block[i].tv_sec_at_expire = now.tv_sec + time; twait_block->vtag_block[i].v_tag = tag; twait_block->vtag_block[i].lport = lport; twait_block->vtag_block[i].rport = rport; set = 1; } } } if (set) { /* * We only do up to the block where we can * place our tag for audits */ break; } } } /* Need to add a new block to chain */ if (!set) { SCTP_MALLOC(twait_block, struct sctp_tagblock *, sizeof(struct sctp_tagblock), SCTP_M_TIMW); if (twait_block == NULL) { #ifdef INVARIANTS panic("Can not alloc tagblock"); #endif return; } memset(twait_block, 0, sizeof(struct sctp_tagblock)); LIST_INSERT_HEAD(chain, twait_block, sctp_nxt_tagblock); twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time; twait_block->vtag_block[0].v_tag = tag; twait_block->vtag_block[0].lport = lport; twait_block->vtag_block[0].rport = rport; } } /*- * Free the association after un-hashing the remote port. This * function ALWAYS returns holding NO LOCK on the stcb. It DOES * expect that the input to this function IS a locked TCB. * It will return 0, if it did NOT destroy the association (instead * it unlocks it. It will return NON-zero if it either destroyed the * association OR the association is already destroyed. */ int sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfree, int from_location) { int i; struct sctp_association *asoc; struct sctp_nets *net, *prev; struct sctp_laddr *laddr; struct sctp_tmit_chunk *chk; struct sctp_asconf_addr *aparam; struct sctp_asconf_ack *aack; struct sctp_stream_reset_list *liste; struct sctp_queued_to_read *sq; struct sctp_stream_queue_pending *sp; sctp_sharedkey_t *shared_key; struct socket *so; int ccnt = 0; int cnt = 0; /* first, lets purge the entry from the hash table. */ #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 6); #endif if (stcb->asoc.state == 0) { #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 7); #endif /* there is no asoc, really TSNH :-0 */ return (1); } /* TEMP CODE */ if (stcb->freed_from_where == 0) { /* Only record the first place free happened from */ stcb->freed_from_where = from_location; } /* TEMP CODE */ asoc = &stcb->asoc; if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) /* nothing around */ so = NULL; else so = inp->sctp_socket; /* * We used timer based freeing if a reader or writer is in the way. * So we first check if we are actually being called from a timer, * if so we abort early if a reader or writer is still in the way. */ if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) && (from_inpcbfree == SCTP_NORMAL_PROC)) { /* * is it the timer driving us? if so are the reader/writers * gone? */ if (stcb->asoc.refcnt) { /* nope, reader or writer in the way */ sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); /* no asoc destroyed */ SCTP_TCB_UNLOCK(stcb); #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 8); #endif return (0); } } /* now clean up any other timers */ (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer); asoc->hb_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); asoc->dack_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); /*- * For stream reset we don't blast this unless * it is a str-reset timer, it might be the * free-asoc timer which we DON'T want to * disturb. */ if (asoc->strreset_timer.type == SCTP_TIMER_TYPE_STRRESET) asoc->strreset_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer); asoc->asconf_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer); asoc->autoclose_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer); asoc->shut_guard_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer); asoc->delayed_event_timer.self = NULL; /* Mobility adaptation */ (void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer); asoc->delete_prim_timer.self = NULL; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer); net->fr_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer); net->rxt_timer.self = NULL; (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); net->pmtu_timer.self = NULL; } /* Now the read queue needs to be cleaned up (only once) */ cnt = 0; if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) { stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED; SCTP_INP_READ_LOCK(inp); TAILQ_FOREACH(sq, &inp->read_queue, next) { if (sq->stcb == stcb) { sq->do_not_ref_stcb = 1; sq->sinfo_cumtsn = stcb->asoc.cumulative_tsn; /* * If there is no end, there never will be * now. */ if (sq->end_added == 0) { /* Held for PD-API clear that. */ sq->pdapi_aborted = 1; sq->held_length = 0; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) { /* * Need to add a PD-API * aborted indication. * Setting the control_pdapi * assures that it will be * added right after this * msg. */ uint32_t strseq; stcb->asoc.control_pdapi = sq; strseq = (sq->sinfo_stream << 16) | sq->sinfo_ssn; sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION, stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&strseq, SCTP_SO_LOCKED); stcb->asoc.control_pdapi = NULL; } } /* Add an end to wake them */ sq->end_added = 1; cnt++; } } SCTP_INP_READ_UNLOCK(inp); if (stcb->block_entry) { cnt++; SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET); stcb->block_entry->error = ECONNRESET; stcb->block_entry = NULL; } } if ((stcb->asoc.refcnt) || (stcb->asoc.state & SCTP_STATE_IN_ACCEPT_QUEUE)) { /* * Someone holds a reference OR the socket is unaccepted * yet. */ if ((stcb->asoc.refcnt) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) { stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); } SCTP_TCB_UNLOCK(stcb); if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) /* nothing around */ so = NULL; if (so) { /* Wake any reader/writers */ sctp_sorwakeup(inp, so); sctp_sowwakeup(inp, so); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 9); #endif /* no asoc destroyed */ return (0); } #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, stcb, 10); #endif /* * When I reach here, no others want to kill the assoc yet.. and I * own the lock. Now its possible an abort comes in when I do the * lock exchange below to grab all the locks to do the final take * out. to prevent this we increment the count, which will start a * timer and blow out above thus assuring us that we hold exclusive * killing of the asoc. Note that after getting back the TCB lock we * will go ahead and increment the counter back up and stop any * timer a passing stranger may have started :-S */ if (from_inpcbfree == SCTP_NORMAL_PROC) { atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_INP_INFO_WLOCK(); SCTP_INP_WLOCK(inp); SCTP_TCB_LOCK(stcb); } /* Double check the GONE flag */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) /* nothing around */ so = NULL; if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { /* * For TCP type we need special handling when we are * connected. We also include the peel'ed off ones to. */ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) { inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED; inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED; if (so) { SOCK_LOCK(so); if (so->so_rcv.sb_cc == 0) { so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING | SS_ISCONFIRMING | SS_ISCONNECTED); } socantrcvmore_locked(so); sctp_sowwakeup(inp, so); sctp_sorwakeup(inp, so); SCTP_SOWAKEUP(so); } } } /* * Make it invalid too, that way if its about to run it will abort * and return. */ /* re-increment the lock */ if (from_inpcbfree == SCTP_NORMAL_PROC) { atomic_add_int(&stcb->asoc.refcnt, -1); } if (stcb->asoc.refcnt) { stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE; sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL); if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_INFO_WUNLOCK(); SCTP_INP_WUNLOCK(inp); } SCTP_TCB_UNLOCK(stcb); return (0); } asoc->state = 0; if (inp->sctp_tcbhash) { LIST_REMOVE(stcb, sctp_tcbhash); } if (stcb->asoc.in_asocid_hash) { LIST_REMOVE(stcb, sctp_tcbasocidhash); } /* Now lets remove it from the list of ALL associations in the EP */ LIST_REMOVE(stcb, sctp_tcblist); if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_INCR_REF(inp); SCTP_INP_WUNLOCK(inp); } /* pull from vtag hash */ LIST_REMOVE(stcb, sctp_asocs); sctp_add_vtag_to_timewait(asoc->my_vtag, SCTP_BASE_SYSCTL(sctp_vtag_time_wait), inp->sctp_lport, stcb->rport); /* * Now restop the timers to be sure this is paranoia at is finest! */ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer); (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer); TAILQ_FOREACH(net, &asoc->nets, sctp_next) { (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer); (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer); } asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE; prev = NULL; /* * The chunk lists and such SHOULD be empty but we check them just * in case. */ /* anything on the wheel needs to be removed */ for (i = 0; i < asoc->streamoutcnt; i++) { struct sctp_stream_out *outs; outs = &asoc->strmout[i]; /* now clean up any chunks here */ sp = TAILQ_FIRST(&outs->outqueue); while (sp) { TAILQ_REMOVE(&outs->outqueue, sp, next); if (sp->data) { if (so) { /* Still an open socket - report */ sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb, SCTP_NOTIFY_DATAGRAM_UNSENT, (void *)sp, SCTP_SO_LOCKED); } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; sp->tail_mbuf = NULL; } } if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } sctp_free_spbufspace(stcb, asoc, sp); if (sp->holds_key_ref) sctp_auth_key_release(stcb, sp->auth_keyid); /* Free the zone stuff */ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_strmoq), sp); SCTP_DECR_STRMOQ_COUNT(); /* sa_ignore FREED_MEMORY */ sp = TAILQ_FIRST(&outs->outqueue); } } /* sa_ignore FREED_MEMORY */ while ((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) { TAILQ_REMOVE(&asoc->resetHead, liste, next_resp); SCTP_FREE(liste, SCTP_M_STRESET); } sq = TAILQ_FIRST(&asoc->pending_reply_queue); while (sq) { TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next); if (sq->data) { sctp_m_freem(sq->data); sq->data = NULL; } sctp_free_remote_addr(sq->whoFrom); sq->whoFrom = NULL; sq->stcb = NULL; /* Free the ctl entry */ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), sq); SCTP_DECR_READQ_COUNT(); /* sa_ignore FREED_MEMORY */ sq = TAILQ_FIRST(&asoc->pending_reply_queue); } chk = TAILQ_FIRST(&asoc->free_chunks); while (chk) { TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid); ccnt++; SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); atomic_subtract_int(&SCTP_BASE_INFO(ipi_free_chunks), 1); asoc->free_chunk_cnt--; /* sa_ignore FREED_MEMORY */ chk = TAILQ_FIRST(&asoc->free_chunks); } /* pending send queue SHOULD be empty */ if (!TAILQ_EMPTY(&asoc->send_queue)) { chk = TAILQ_FIRST(&asoc->send_queue); while (chk) { TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next); if (chk->data) { if (so) { /* Still a socket? */ sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, SCTP_NOTIFY_DATAGRAM_UNSENT, chk, SCTP_SO_LOCKED); } if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid); ccnt++; if (chk->whoTo) { sctp_free_remote_addr(chk->whoTo); chk->whoTo = NULL; } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ chk = TAILQ_FIRST(&asoc->send_queue); } } /* if (ccnt) { printf("Freed %d from send_queue\n", ccnt); ccnt = 0; } */ /* sent queue SHOULD be empty */ if (!TAILQ_EMPTY(&asoc->sent_queue)) { chk = TAILQ_FIRST(&asoc->sent_queue); while (chk) { TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next); if (chk->data) { if (so) { /* Still a socket? */ sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, SCTP_NOTIFY_DATAGRAM_SENT, chk, SCTP_SO_LOCKED); } if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid); ccnt++; sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ chk = TAILQ_FIRST(&asoc->sent_queue); } } /* if (ccnt) { printf("Freed %d from sent_queue\n", ccnt); ccnt = 0; } */ /* control queue MAY not be empty */ if (!TAILQ_EMPTY(&asoc->control_send_queue)) { chk = TAILQ_FIRST(&asoc->control_send_queue); while (chk) { TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid); ccnt++; sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ chk = TAILQ_FIRST(&asoc->control_send_queue); } } /* if (ccnt) { printf("Freed %d from ctrl_queue\n", ccnt); ccnt = 0; } */ /* ASCONF queue MAY not be empty */ if (!TAILQ_EMPTY(&asoc->asconf_send_queue)) { chk = TAILQ_FIRST(&asoc->asconf_send_queue); while (chk) { TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid); ccnt++; sctp_free_remote_addr(chk->whoTo); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ chk = TAILQ_FIRST(&asoc->asconf_send_queue); } } /* if (ccnt) { printf("Freed %d from asconf_queue\n", ccnt); ccnt = 0; } */ if (!TAILQ_EMPTY(&asoc->reasmqueue)) { chk = TAILQ_FIRST(&asoc->reasmqueue); while (chk) { TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->holds_key_ref) sctp_auth_key_release(stcb, chk->auth_keyid); sctp_free_remote_addr(chk->whoTo); ccnt++; SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_chunk), chk); SCTP_DECR_CHK_COUNT(); /* sa_ignore FREED_MEMORY */ chk = TAILQ_FIRST(&asoc->reasmqueue); } } /* if (ccnt) { printf("Freed %d from reasm_queue\n", ccnt); ccnt = 0; } */ if (asoc->mapping_array) { SCTP_FREE(asoc->mapping_array, SCTP_M_MAP); asoc->mapping_array = NULL; } if (asoc->nr_mapping_array) { SCTP_FREE(asoc->nr_mapping_array, SCTP_M_MAP); asoc->nr_mapping_array = NULL; } /* the stream outs */ if (asoc->strmout) { SCTP_FREE(asoc->strmout, SCTP_M_STRMO); asoc->strmout = NULL; } asoc->strm_realoutsize = asoc->streamoutcnt = 0; if (asoc->strmin) { struct sctp_queued_to_read *ctl; for (i = 0; i < asoc->streamincnt; i++) { if (!TAILQ_EMPTY(&asoc->strmin[i].inqueue)) { /* We have somethings on the streamin queue */ ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue); while (ctl) { TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next); sctp_free_remote_addr(ctl->whoFrom); if (ctl->data) { sctp_m_freem(ctl->data); ctl->data = NULL; } /* * We don't free the address here * since all the net's were freed * above. */ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl); SCTP_DECR_READQ_COUNT(); ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue); } } } SCTP_FREE(asoc->strmin, SCTP_M_STRMI); asoc->strmin = NULL; } asoc->streamincnt = 0; while (!TAILQ_EMPTY(&asoc->nets)) { /* sa_ignore FREED_MEMORY */ net = TAILQ_FIRST(&asoc->nets); /* pull from list */ if ((SCTP_BASE_INFO(ipi_count_raddr) == 0) || (prev == net)) { #ifdef INVARIANTS panic("no net's left alloc'ed, or list points to itself"); #endif break; } prev = net; TAILQ_REMOVE(&asoc->nets, net, sctp_next); sctp_free_remote_addr(net); } while (!LIST_EMPTY(&asoc->sctp_restricted_addrs)) { /* sa_ignore FREED_MEMORY */ laddr = LIST_FIRST(&asoc->sctp_restricted_addrs); sctp_remove_laddr(laddr); } /* pending asconf (address) parameters */ while (!TAILQ_EMPTY(&asoc->asconf_queue)) { /* sa_ignore FREED_MEMORY */ aparam = TAILQ_FIRST(&asoc->asconf_queue); TAILQ_REMOVE(&asoc->asconf_queue, aparam, next); SCTP_FREE(aparam, SCTP_M_ASC_ADDR); } while (!TAILQ_EMPTY(&asoc->asconf_ack_sent)) { /* sa_ignore FREED_MEMORY */ aack = TAILQ_FIRST(&asoc->asconf_ack_sent); TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next); if (aack->data != NULL) { sctp_m_freem(aack->data); } SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asconf_ack), aack); } /* clean up auth stuff */ if (asoc->local_hmacs) sctp_free_hmaclist(asoc->local_hmacs); if (asoc->peer_hmacs) sctp_free_hmaclist(asoc->peer_hmacs); if (asoc->local_auth_chunks) sctp_free_chunklist(asoc->local_auth_chunks); if (asoc->peer_auth_chunks) sctp_free_chunklist(asoc->peer_auth_chunks); sctp_free_authinfo(&asoc->authinfo); shared_key = LIST_FIRST(&asoc->shared_keys); while (shared_key) { LIST_REMOVE(shared_key, next); sctp_free_sharedkey(shared_key); /* sa_ignore FREED_MEMORY */ shared_key = LIST_FIRST(&asoc->shared_keys); } /* Insert new items here :> */ /* Get rid of LOCK */ SCTP_TCB_LOCK_DESTROY(stcb); SCTP_TCB_SEND_LOCK_DESTROY(stcb); if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_INFO_WUNLOCK(); SCTP_INP_RLOCK(inp); } #ifdef SCTP_TRACK_FREED_ASOCS if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* now clean up the tasoc itself */ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_DECR_ASOC_COUNT(); } else { LIST_INSERT_HEAD(&inp->sctp_asoc_free_list, stcb, sctp_tcblist); } #else SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_asoc), stcb); SCTP_DECR_ASOC_COUNT(); #endif if (from_inpcbfree == SCTP_NORMAL_PROC) { if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) { /* * If its NOT the inp_free calling us AND sctp_close * as been called, we call back... */ SCTP_INP_RUNLOCK(inp); /* * This will start the kill timer (if we are the * last one) since we hold an increment yet. But * this is the only safe way to do this since * otherwise if the socket closes at the same time * we are here we might collide in the cleanup. */ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE, SCTP_CALLED_DIRECTLY_NOCMPSET); SCTP_INP_DECR_REF(inp); goto out_of; } else { /* The socket is still open. */ SCTP_INP_DECR_REF(inp); } } if (from_inpcbfree == SCTP_NORMAL_PROC) { SCTP_INP_RUNLOCK(inp); } out_of: /* destroyed the asoc */ #ifdef SCTP_LOG_CLOSING sctp_log_closing(inp, NULL, 11); #endif return (1); } /* * determine if a destination is "reachable" based upon the addresses bound * to the current endpoint (e.g. only v4 or v6 currently bound) */ /* * FIX: if we allow assoc-level bindx(), then this needs to be fixed to use * assoc level v4/v6 flags, as the assoc *may* not have the same address * types bound as its endpoint */ int sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr) { struct sctp_inpcb *inp; int answer; /* * No locks here, the TCB, in all cases is already locked and an * assoc is up. There is either a INP lock by the caller applied (in * asconf case when deleting an address) or NOT in the HB case, * however if HB then the INP increment is up and the INP will not * be removed (on top of the fact that we have a TCB lock). So we * only want to read the sctp_flags, which is either bound-all or * not.. no protection needed since once an assoc is up you can't be * changing your binding. */ inp = stcb->sctp_ep; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* if bound all, destination is not restricted */ /* * RRS: Question during lock work: Is this correct? If you * are bound-all you still might need to obey the V4--V6 * flags??? IMO this bound-all stuff needs to be removed! */ return (1); } /* NOTE: all "scope" checks are done when local addresses are added */ if (destaddr->sa_family == AF_INET6) { answer = inp->ip_inp.inp.inp_vflag & INP_IPV6; } else if (destaddr->sa_family == AF_INET) { answer = inp->ip_inp.inp.inp_vflag & INP_IPV4; } else { /* invalid family, so it's unreachable */ answer = 0; } return (answer); } /* * update the inp_vflags on an endpoint */ static void sctp_update_ep_vflag(struct sctp_inpcb *inp) { struct sctp_laddr *laddr; /* first clear the flag */ inp->ip_inp.inp.inp_vflag = 0; /* set the flag based on addresses on the ep list */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__); continue; } if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { continue; } if (laddr->ifa->address.sa.sa_family == AF_INET6) { inp->ip_inp.inp.inp_vflag |= INP_IPV6; } else if (laddr->ifa->address.sa.sa_family == AF_INET) { inp->ip_inp.inp.inp_vflag |= INP_IPV4; } } } /* * Add the address to the endpoint local address list There is nothing to be * done if we are bound to all addresses */ void sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action) { struct sctp_laddr *laddr; int fnd, error = 0; fnd = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* You are already bound to all. You have it already */ return; } if (ifa->address.sa.sa_family == AF_INET6) { if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-useable addr. */ return; } } /* first, is it already present? */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == ifa) { fnd = 1; break; } } if (fnd == 0) { /* Not in the ep list */ error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action); if (error != 0) return; inp->laddr_count++; /* update inp_vflag flags */ if (ifa->address.sa.sa_family == AF_INET6) { inp->ip_inp.inp.inp_vflag |= INP_IPV6; } else if (ifa->address.sa.sa_family == AF_INET) { inp->ip_inp.inp.inp_vflag |= INP_IPV4; } } return; } /* * select a new (hopefully reachable) destination net (should only be used * when we deleted an ep addr that is the only usable source address to reach * the destination net) */ static void sctp_select_primary_destination(struct sctp_tcb *stcb) { struct sctp_nets *net; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* for now, we'll just pick the first reachable one we find */ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) continue; if (sctp_destination_is_reachable(stcb, (struct sockaddr *)&net->ro._l_addr)) { /* found a reachable destination */ stcb->asoc.primary_destination = net; } } /* I can't there from here! ...we're gonna die shortly... */ } /* * Delete the address from the endpoint local address list There is nothing * to be done if we are bound to all addresses */ void sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) { struct sctp_laddr *laddr; int fnd; fnd = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* You are already bound to all. You have it already */ return; } LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == ifa) { fnd = 1; break; } } if (fnd && (inp->laddr_count < 2)) { /* can't delete unless there are at LEAST 2 addresses */ return; } if (fnd) { /* * clean up any use of this address go through our * associations and clear any last_used_address that match * this one for each assoc, see if a new primary_destination * is needed */ struct sctp_tcb *stcb; /* clean up "next_addr_touse" */ if (inp->next_addr_touse == laddr) /* delete this address */ inp->next_addr_touse = NULL; /* clean up "last_used_address" */ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { struct sctp_nets *net; SCTP_TCB_LOCK(stcb); if (stcb->asoc.last_used_address == laddr) /* delete this address */ stcb->asoc.last_used_address = NULL; /* * Now spin through all the nets and purge any ref * to laddr */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->ro._s_addr && (net->ro._s_addr->ifa == laddr->ifa)) { /* Yep, purge src address selected */ sctp_rtentry_t *rt; /* delete this address if cached */ rt = net->ro.ro_rt; if (rt != NULL) { RTFREE(rt); net->ro.ro_rt = NULL; } sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; } } SCTP_TCB_UNLOCK(stcb); } /* for each tcb */ /* remove it from the ep list */ sctp_remove_laddr(laddr); inp->laddr_count--; /* update inp_vflag flags */ sctp_update_ep_vflag(inp); } return; } /* * Add the address to the TCB local address restricted list. * This is a "pending" address list (eg. addresses waiting for an * ASCONF-ACK response) and cannot be used as a valid source address. */ void sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { struct sctp_inpcb *inp; struct sctp_laddr *laddr; struct sctpladdr *list; /* * Assumes TCB is locked.. and possibly the INP. May need to * confirm/fix that if we need it and is not the case. */ list = &stcb->asoc.sctp_restricted_addrs; inp = stcb->sctp_ep; if (ifa->address.sa.sa_family == AF_INET6) { if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { /* Can't bind a non-existent addr. */ return; } } /* does the address already exist? */ LIST_FOREACH(laddr, list, sctp_nxt_addr) { if (laddr->ifa == ifa) { return; } } /* add to the list */ (void)sctp_insert_laddr(list, ifa, 0); return; } /* * insert an laddr entry with the given ifa for the desired list */ int sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act) { struct sctp_laddr *laddr; laddr = SCTP_ZONE_GET(SCTP_BASE_INFO(ipi_zone_laddr), struct sctp_laddr); if (laddr == NULL) { /* out of memory? */ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL); return (EINVAL); } SCTP_INCR_LADDR_COUNT(); bzero(laddr, sizeof(*laddr)); (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time); laddr->ifa = ifa; laddr->action = act; atomic_add_int(&ifa->refcount, 1); /* insert it */ LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr); return (0); } /* * Remove an laddr entry from the local address list (on an assoc) */ void sctp_remove_laddr(struct sctp_laddr *laddr) { /* remove from the list */ LIST_REMOVE(laddr, sctp_nxt_addr); sctp_free_ifa(laddr->ifa); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), laddr); SCTP_DECR_LADDR_COUNT(); } /* * Remove a local address from the TCB local address restricted list */ void sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { struct sctp_inpcb *inp; struct sctp_laddr *laddr; /* * This is called by asconf work. It is assumed that a) The TCB is * locked and b) The INP is locked. This is true in as much as I can * trace through the entry asconf code where I did these locks. * Again, the ASCONF code is a bit different in that it does lock * the INP during its work often times. This must be since we don't * want other proc's looking up things while what they are looking * up is changing :-D */ inp = stcb->sctp_ep; /* if subset bound and don't allow ASCONF's, can't delete last */ if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) && sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) { if (stcb->sctp_ep->laddr_count < 2) { /* can't delete last address */ return; } } LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) { /* remove the address if it exists */ if (laddr->ifa == NULL) continue; if (laddr->ifa == ifa) { sctp_remove_laddr(laddr); return; } } /* address not found! */ return; } /* * Temporarily remove for __APPLE__ until we use the Tiger equivalents */ /* sysctl */ static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC; static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR; void sctp_pcb_init() { /* * SCTP initialization for the PCB structures should be called by * the sctp_init() funciton. */ int i; struct timeval tv; if (SCTP_BASE_VAR(sctp_pcb_initialized) != 0) { /* error I was called twice */ return; } SCTP_BASE_VAR(sctp_pcb_initialized) = 1; #if defined(SCTP_LOCAL_TRACE_BUF) bzero(&SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log)); #endif (void)SCTP_GETTIME_TIMEVAL(&tv); #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_sec = (uint32_t) tv.tv_sec; SCTP_BASE_STATS[PCPU_GET(cpuid)].sctps_discontinuitytime.tv_usec = (uint32_t) tv.tv_usec; #else SCTP_BASE_STAT(sctps_discontinuitytime).tv_sec = (uint32_t) tv.tv_sec; SCTP_BASE_STAT(sctps_discontinuitytime).tv_usec = (uint32_t) tv.tv_usec; #endif /* init the empty list of (All) Endpoints */ LIST_INIT(&SCTP_BASE_INFO(listhead)); /* init the hash table of endpoints */ TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &SCTP_BASE_SYSCTL(sctp_hashtblsize)); TUNABLE_INT_FETCH("net.inet.sctp.pcbhashsize", &SCTP_BASE_SYSCTL(sctp_pcbtblsize)); TUNABLE_INT_FETCH("net.inet.sctp.chunkscale", &SCTP_BASE_SYSCTL(sctp_chunkscale)); SCTP_BASE_INFO(sctp_asochash) = SCTP_HASH_INIT((SCTP_BASE_SYSCTL(sctp_hashtblsize) * 31), &SCTP_BASE_INFO(hashasocmark)); SCTP_BASE_INFO(sctp_ephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize), &SCTP_BASE_INFO(hashmark)); SCTP_BASE_INFO(sctp_tcpephash) = SCTP_HASH_INIT(SCTP_BASE_SYSCTL(sctp_hashtblsize), &SCTP_BASE_INFO(hashtcpmark)); SCTP_BASE_INFO(hashtblsize) = SCTP_BASE_SYSCTL(sctp_hashtblsize); SCTP_BASE_INFO(sctp_vrfhash) = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH, &SCTP_BASE_INFO(hashvrfmark)); SCTP_BASE_INFO(vrf_ifn_hash) = SCTP_HASH_INIT(SCTP_VRF_IFN_HASH_SIZE, &SCTP_BASE_INFO(vrf_ifn_hashmark)); /* init the zones */ /* * FIX ME: Should check for NULL returns, but if it does fail we are * doomed to panic anyways... add later maybe. */ SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_ep), "sctp_ep", sizeof(struct sctp_inpcb), maxsockets); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asoc), "sctp_asoc", sizeof(struct sctp_tcb), sctp_max_number_of_assoc); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_laddr), "sctp_laddr", sizeof(struct sctp_laddr), (sctp_max_number_of_assoc * sctp_scale_up_for_address)); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_net), "sctp_raddr", sizeof(struct sctp_nets), (sctp_max_number_of_assoc * sctp_scale_up_for_address)); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_chunk), "sctp_chunk", sizeof(struct sctp_tmit_chunk), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_readq), "sctp_readq", sizeof(struct sctp_queued_to_read), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_strmoq), "sctp_stream_msg_out", sizeof(struct sctp_stream_queue_pending), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf), "sctp_asconf", sizeof(struct sctp_asconf), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); SCTP_ZONE_INIT(SCTP_BASE_INFO(ipi_zone_asconf_ack), "sctp_asconf_ack", sizeof(struct sctp_asconf_ack), (sctp_max_number_of_assoc * SCTP_BASE_SYSCTL(sctp_chunkscale))); /* Master Lock INIT for info structure */ SCTP_INP_INFO_LOCK_INIT(); SCTP_STATLOG_INIT_LOCK(); SCTP_IPI_COUNT_INIT(); SCTP_IPI_ADDR_INIT(); #ifdef SCTP_PACKET_LOGGING SCTP_IP_PKTLOG_INIT(); #endif LIST_INIT(&SCTP_BASE_INFO(addr_wq)); SCTP_WQ_ADDR_INIT(); /* not sure if we need all the counts */ SCTP_BASE_INFO(ipi_count_ep) = 0; /* assoc/tcb zone info */ SCTP_BASE_INFO(ipi_count_asoc) = 0; /* local addrlist zone info */ SCTP_BASE_INFO(ipi_count_laddr) = 0; /* remote addrlist zone info */ SCTP_BASE_INFO(ipi_count_raddr) = 0; /* chunk info */ SCTP_BASE_INFO(ipi_count_chunk) = 0; /* socket queue zone info */ SCTP_BASE_INFO(ipi_count_readq) = 0; /* stream out queue cont */ SCTP_BASE_INFO(ipi_count_strmoq) = 0; SCTP_BASE_INFO(ipi_free_strmoq) = 0; SCTP_BASE_INFO(ipi_free_chunks) = 0; SCTP_OS_TIMER_INIT(&SCTP_BASE_INFO(addr_wq_timer.timer)); /* Init the TIMEWAIT list */ for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) { LIST_INIT(&SCTP_BASE_INFO(vtag_timewait)[i]); } sctp_startup_iterator(); /* * INIT the default VRF which for BSD is the only one, other O/S's * may have more. But initially they must start with one and then * add the VRF's as addresses are added. */ sctp_init_vrf_list(SCTP_DEFAULT_VRF); } /* * Assumes that the SCTP_BASE_INFO() lock is NOT held. */ void sctp_pcb_finish(void) { struct sctp_vrflist *vrf_bucket; struct sctp_vrf *vrf; struct sctp_ifn *ifn; struct sctp_ifa *ifa; struct sctpvtaghead *chain; struct sctp_tagblock *twait_block, *prev_twait_block; struct sctp_laddr *wi; int i; /* * Free BSD the it thread never exits but we do clean up. The only * way freebsd reaches here if we have VRF's but we still add the * ifdef to make it compile on old versions. */ { struct sctp_iterator *it, *nit; SCTP_IPI_ITERATOR_WQ_LOCK(); it = TAILQ_FIRST(&sctp_it_ctl.iteratorhead); while (it) { nit = TAILQ_NEXT(it, sctp_nxt_itr); if (it->vn != curvnet) { it = nit; continue; } TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (it->function_atend != NULL) { (*it->function_atend) (it->pointer, it->val); } SCTP_FREE(it, SCTP_M_ITER); it = nit; } SCTP_IPI_ITERATOR_WQ_UNLOCK(); SCTP_ITERATOR_LOCK(); if ((sctp_it_ctl.cur_it) && (sctp_it_ctl.cur_it->vn == curvnet)) { sctp_it_ctl.iterator_flags |= SCTP_ITERATOR_STOP_CUR_IT; } SCTP_ITERATOR_UNLOCK(); } SCTP_OS_TIMER_STOP(&SCTP_BASE_INFO(addr_wq_timer.timer)); SCTP_WQ_ADDR_LOCK(); while ((wi = LIST_FIRST(&SCTP_BASE_INFO(addr_wq))) != NULL) { LIST_REMOVE(wi, sctp_nxt_addr); SCTP_DECR_LADDR_COUNT(); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_laddr), wi); } SCTP_WQ_ADDR_UNLOCK(); /* * free the vrf/ifn/ifa lists and hashes (be sure address monitor is * destroyed first). */ vrf_bucket = &SCTP_BASE_INFO(sctp_vrfhash)[(SCTP_DEFAULT_VRFID & SCTP_BASE_INFO(hashvrfmark))]; while ((vrf = LIST_FIRST(vrf_bucket)) != NULL) { while ((ifn = LIST_FIRST(&vrf->ifnlist)) != NULL) { while ((ifa = LIST_FIRST(&ifn->ifalist)) != NULL) { /* free the ifa */ LIST_REMOVE(ifa, next_bucket); LIST_REMOVE(ifa, next_ifa); SCTP_FREE(ifa, SCTP_M_IFA); } /* free the ifn */ LIST_REMOVE(ifn, next_bucket); LIST_REMOVE(ifn, next_ifn); SCTP_FREE(ifn, SCTP_M_IFN); } SCTP_HASH_FREE(vrf->vrf_addr_hash, vrf->vrf_addr_hashmark); /* free the vrf */ LIST_REMOVE(vrf, next_vrf); SCTP_FREE(vrf, SCTP_M_VRF); } /* free the vrf hashes */ SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_vrfhash), SCTP_BASE_INFO(hashvrfmark)); SCTP_HASH_FREE(SCTP_BASE_INFO(vrf_ifn_hash), SCTP_BASE_INFO(vrf_ifn_hashmark)); /* * free the TIMEWAIT list elements malloc'd in the function * sctp_add_vtag_to_timewait()... */ for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE; i++) { chain = &SCTP_BASE_INFO(vtag_timewait)[i]; if (!LIST_EMPTY(chain)) { prev_twait_block = NULL; LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { if (prev_twait_block) { SCTP_FREE(prev_twait_block, SCTP_M_TIMW); } prev_twait_block = twait_block; } SCTP_FREE(prev_twait_block, SCTP_M_TIMW); } } /* free the locks and mutexes */ #ifdef SCTP_PACKET_LOGGING SCTP_IP_PKTLOG_DESTROY(); #endif SCTP_IPI_ADDR_DESTROY(); SCTP_STATLOG_DESTROY(); SCTP_INP_INFO_LOCK_DESTROY(); SCTP_WQ_ADDR_DESTROY(); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_ep)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asoc)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_laddr)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_net)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_chunk)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_readq)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_strmoq)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf)); SCTP_ZONE_DESTROY(SCTP_BASE_INFO(ipi_zone_asconf_ack)); /* Get rid of other stuff to */ if (SCTP_BASE_INFO(sctp_asochash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_asochash), SCTP_BASE_INFO(hashasocmark)); if (SCTP_BASE_INFO(sctp_ephash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_ephash), SCTP_BASE_INFO(hashmark)); if (SCTP_BASE_INFO(sctp_tcpephash) != NULL) SCTP_HASH_FREE(SCTP_BASE_INFO(sctp_tcpephash), SCTP_BASE_INFO(hashtcpmark)); } int sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m, int iphlen, int offset, int limit, struct sctphdr *sh, struct sockaddr *altsa) { /* * grub through the INIT pulling addresses and loading them to the * nets structure in the asoc. The from address in the mbuf should * also be loaded (if it is not already). This routine can be called * with either INIT or INIT-ACK's as long as the m points to the IP * packet and the offset points to the beginning of the parameters. */ struct sctp_inpcb *inp, *l_inp; struct sctp_nets *net, *net_tmp; struct ip *iph; struct sctp_paramhdr *phdr, parm_buf; struct sctp_tcb *stcb_tmp; uint16_t ptype, plen; struct sockaddr *sa; struct sockaddr_storage dest_store; struct sockaddr *local_sa = (struct sockaddr *)&dest_store; struct sockaddr_in sin; struct sockaddr_in6 sin6; uint8_t random_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_random *p_random = NULL; uint16_t random_len = 0; uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_hmac_algo *hmacs = NULL; uint16_t hmacs_len = 0; uint8_t saw_asconf = 0; uint8_t saw_asconf_ack = 0; uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE]; struct sctp_auth_chunk_list *chunks = NULL; uint16_t num_chunks = 0; sctp_key_t *new_key; uint32_t keylen; int got_random = 0, got_hmacs = 0, got_chklist = 0; /* First get the destination address setup too. */ memset(&sin, 0, sizeof(sin)); memset(&sin6, 0, sizeof(sin6)); sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); sin.sin_port = stcb->rport; sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = stcb->rport; if (altsa == NULL) { iph = mtod(m, struct ip *); switch (iph->ip_v) { case IPVERSION: { /* its IPv4 */ struct sockaddr_in *sin_2; sin_2 = (struct sockaddr_in *)(local_sa); memset(sin_2, 0, sizeof(sin)); sin_2->sin_family = AF_INET; sin_2->sin_len = sizeof(sin); sin_2->sin_port = sh->dest_port; sin_2->sin_addr.s_addr = iph->ip_dst.s_addr; sin.sin_addr = iph->ip_src; sa = (struct sockaddr *)&sin; break; } #ifdef INET6 case IPV6_VERSION >> 4: { /* its IPv6 */ struct ip6_hdr *ip6; struct sockaddr_in6 *sin6_2; ip6 = mtod(m, struct ip6_hdr *); sin6_2 = (struct sockaddr_in6 *)(local_sa); memset(sin6_2, 0, sizeof(sin6)); sin6_2->sin6_family = AF_INET6; sin6_2->sin6_len = sizeof(struct sockaddr_in6); sin6_2->sin6_port = sh->dest_port; sin6.sin6_addr = ip6->ip6_src; sa = (struct sockaddr *)&sin6; break; } #endif default: return (-1); break; } } else { /* * For cookies we use the src address NOT from the packet * but from the original INIT */ sa = altsa; } /* Turn off ECN until we get through all params */ stcb->asoc.ecn_allowed = 0; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* mark all addresses that we have currently on the list */ net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC; } /* does the source address already exist? if so skip it */ l_inp = inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) { /* we must add the source address */ /* no scope set here since we have a tcb already. */ if ((sa->sa_family == AF_INET) && (stcb->asoc.ipv4_addr_legal)) { if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) { return (-1); } } else if ((sa->sa_family == AF_INET6) && (stcb->asoc.ipv6_addr_legal)) { if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) { return (-2); } } } else { if (net_tmp != NULL && stcb_tmp == stcb) { net_tmp->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC; } else if (stcb_tmp != stcb) { /* It belongs to another association? */ if (stcb_tmp) SCTP_TCB_UNLOCK(stcb_tmp); return (-3); } } if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-4); } /* * peer must explicitly turn this on. This may have been initialized * to be "on" in order to allow local addr changes while INIT's are * in flight. */ stcb->asoc.peer_supports_asconf = 0; /* now we must go through each of the params. */ phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); while (phdr) { ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); /* * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype, * (int)plen); */ if (offset + plen > limit) { break; } if (plen == 0) { break; } if (ptype == SCTP_IPV4_ADDRESS) { if (stcb->asoc.ipv4_addr_legal) { struct sctp_ipv4addr_param *p4, p4_buf; /* ok get the v4 address and check/add */ phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf)); if (plen != sizeof(struct sctp_ipv4addr_param) || phdr == NULL) { return (-5); } p4 = (struct sctp_ipv4addr_param *)phdr; sin.sin_addr.s_addr = p4->addr; if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { /* Skip multi-cast addresses */ goto next_param; } if ((sin.sin_addr.s_addr == INADDR_BROADCAST) || (sin.sin_addr.s_addr == INADDR_ANY)) { goto next_param; } sa = (struct sockaddr *)&sin; inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net, local_sa, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) { /* we must add the source address */ /* * no scope set since we have a tcb * already */ /* * we must validate the state again * here */ add_it_now: if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-7); } if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) { return (-8); } } else if (stcb_tmp == stcb) { if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-10); } if (net != NULL) { /* clear flag */ net->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC; } } else { /* * strange, address is in another * assoc? straighten out locks. */ if (stcb_tmp) { if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) { /* * in setup state we * abort this guy */ sctp_abort_an_association(stcb_tmp->sctp_ep, stcb_tmp, 1, NULL, 0); goto add_it_now; } SCTP_TCB_UNLOCK(stcb_tmp); } if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-12); } return (-13); } } } else if (ptype == SCTP_IPV6_ADDRESS) { if (stcb->asoc.ipv6_addr_legal) { /* ok get the v6 address and check/add */ struct sctp_ipv6addr_param *p6, p6_buf; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf)); if (plen != sizeof(struct sctp_ipv6addr_param) || phdr == NULL) { return (-14); } p6 = (struct sctp_ipv6addr_param *)phdr; memcpy((caddr_t)&sin6.sin6_addr, p6->addr, sizeof(p6->addr)); if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { /* Skip multi-cast addresses */ goto next_param; } if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) { /* * Link local make no sense without * scope */ goto next_param; } sa = (struct sockaddr *)&sin6; inp = stcb->sctp_ep; atomic_add_int(&stcb->asoc.refcnt, 1); stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net, local_sa, stcb); atomic_add_int(&stcb->asoc.refcnt, -1); if (stcb_tmp == NULL && (inp == stcb->sctp_ep || inp == NULL)) { /* * we must validate the state again * here */ add_it_now6: if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-16); } /* * we must add the address, no scope * set */ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) { return (-17); } } else if (stcb_tmp == stcb) { /* * we must validate the state again * here */ if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-19); } if (net != NULL) { /* clear flag */ net->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC; } } else { /* * strange, address is in another * assoc? straighten out locks. */ if (stcb_tmp) if (SCTP_GET_STATE(&stcb_tmp->asoc) & SCTP_STATE_COOKIE_WAIT) { /* * in setup state we * abort this guy */ sctp_abort_an_association(stcb_tmp->sctp_ep, stcb_tmp, 1, NULL, 0); goto add_it_now6; } SCTP_TCB_UNLOCK(stcb_tmp); if (stcb->asoc.state == 0) { /* the assoc was freed? */ return (-21); } return (-22); } } } else if (ptype == SCTP_ECN_CAPABLE) { stcb->asoc.ecn_allowed = 1; } else if (ptype == SCTP_ULP_ADAPTATION) { if (stcb->asoc.state != SCTP_STATE_OPEN) { struct sctp_adaptation_layer_indication ai, *aip; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&ai, sizeof(ai)); aip = (struct sctp_adaptation_layer_indication *)phdr; if (aip) { stcb->asoc.peers_adaptation = ntohl(aip->indication); stcb->asoc.adaptation_needed = 1; } } } else if (ptype == SCTP_SET_PRIM_ADDR) { struct sctp_asconf_addr_param lstore, *fee; struct sctp_asconf_addrv4_param *fii; int lptype; struct sockaddr *lsa = NULL; stcb->asoc.peer_supports_asconf = 1; if (plen > sizeof(lstore)) { return (-23); } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&lstore, min(plen, sizeof(lstore))); if (phdr == NULL) { return (-24); } fee = (struct sctp_asconf_addr_param *)phdr; lptype = ntohs(fee->addrp.ph.param_type); if (lptype == SCTP_IPV4_ADDRESS) { if (plen != sizeof(struct sctp_asconf_addrv4_param)) { SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n", (int)sizeof(struct sctp_asconf_addrv4_param), plen); } else { fii = (struct sctp_asconf_addrv4_param *)fee; sin.sin_addr.s_addr = fii->addrp.addr; lsa = (struct sockaddr *)&sin; } } else if (lptype == SCTP_IPV6_ADDRESS) { if (plen != sizeof(struct sctp_asconf_addr_param)) { SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n", (int)sizeof(struct sctp_asconf_addr_param), plen); } else { memcpy(sin6.sin6_addr.s6_addr, fee->addrp.addr, sizeof(fee->addrp.addr)); lsa = (struct sockaddr *)&sin6; } } if (lsa) { (void)sctp_set_primary_addr(stcb, sa, NULL); } } else if (ptype == SCTP_HAS_NAT_SUPPORT) { stcb->asoc.peer_supports_nat = 1; } else if (ptype == SCTP_PRSCTP_SUPPORTED) { /* Peer supports pr-sctp */ stcb->asoc.peer_supports_prsctp = 1; } else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) { /* A supported extension chunk */ struct sctp_supported_chunk_types_param *pr_supported; uint8_t local_store[SCTP_PARAM_BUFFER_SIZE]; int num_ent, i; phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)&local_store, min(sizeof(local_store), plen)); if (phdr == NULL) { return (-25); } stcb->asoc.peer_supports_asconf = 0; stcb->asoc.peer_supports_prsctp = 0; stcb->asoc.peer_supports_pktdrop = 0; stcb->asoc.peer_supports_strreset = 0; stcb->asoc.peer_supports_nr_sack = 0; stcb->asoc.peer_supports_auth = 0; pr_supported = (struct sctp_supported_chunk_types_param *)phdr; num_ent = plen - sizeof(struct sctp_paramhdr); for (i = 0; i < num_ent; i++) { switch (pr_supported->chunk_types[i]) { case SCTP_ASCONF: case SCTP_ASCONF_ACK: stcb->asoc.peer_supports_asconf = 1; break; case SCTP_FORWARD_CUM_TSN: stcb->asoc.peer_supports_prsctp = 1; break; case SCTP_PACKET_DROPPED: stcb->asoc.peer_supports_pktdrop = 1; break; case SCTP_NR_SELECTIVE_ACK: stcb->asoc.peer_supports_nr_sack = 1; break; case SCTP_STREAM_RESET: stcb->asoc.peer_supports_strreset = 1; break; case SCTP_AUTHENTICATION: stcb->asoc.peer_supports_auth = 1; break; default: /* one I have not learned yet */ break; } } } else if (ptype == SCTP_ECN_NONCE_SUPPORTED) { /* Peer supports ECN-nonce */ stcb->asoc.peer_supports_ecn_nonce = 1; stcb->asoc.ecn_nonce_allowed = 1; } else if (ptype == SCTP_RANDOM) { if (plen > sizeof(random_store)) break; if (got_random) { /* already processed a RANDOM */ goto next_param; } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)random_store, min(sizeof(random_store), plen)); if (phdr == NULL) return (-26); p_random = (struct sctp_auth_random *)phdr; random_len = plen - sizeof(*p_random); /* enforce the random length */ if (random_len != SCTP_AUTH_RANDOM_SIZE_REQUIRED) { SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: invalid RANDOM len\n"); return (-27); } got_random = 1; } else if (ptype == SCTP_HMAC_LIST) { int num_hmacs; int i; if (plen > sizeof(hmacs_store)) break; if (got_hmacs) { /* already processed a HMAC list */ goto next_param; } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)hmacs_store, min(plen, sizeof(hmacs_store))); if (phdr == NULL) return (-28); hmacs = (struct sctp_auth_hmac_algo *)phdr; hmacs_len = plen - sizeof(*hmacs); num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]); /* validate the hmac list */ if (sctp_verify_hmac_param(hmacs, num_hmacs)) { return (-29); } if (stcb->asoc.peer_hmacs != NULL) sctp_free_hmaclist(stcb->asoc.peer_hmacs); stcb->asoc.peer_hmacs = sctp_alloc_hmaclist(num_hmacs); if (stcb->asoc.peer_hmacs != NULL) { for (i = 0; i < num_hmacs; i++) { (void)sctp_auth_add_hmacid(stcb->asoc.peer_hmacs, ntohs(hmacs->hmac_ids[i])); } } got_hmacs = 1; } else if (ptype == SCTP_CHUNK_LIST) { int i; if (plen > sizeof(chunks_store)) break; if (got_chklist) { /* already processed a Chunks list */ goto next_param; } phdr = sctp_get_next_param(m, offset, (struct sctp_paramhdr *)chunks_store, min(plen, sizeof(chunks_store))); if (phdr == NULL) return (-30); chunks = (struct sctp_auth_chunk_list *)phdr; num_chunks = plen - sizeof(*chunks); if (stcb->asoc.peer_auth_chunks != NULL) sctp_clear_chunklist(stcb->asoc.peer_auth_chunks); else stcb->asoc.peer_auth_chunks = sctp_alloc_chunklist(); for (i = 0; i < num_chunks; i++) { (void)sctp_auth_add_chunk(chunks->chunk_types[i], stcb->asoc.peer_auth_chunks); /* record asconf/asconf-ack if listed */ if (chunks->chunk_types[i] == SCTP_ASCONF) saw_asconf = 1; if (chunks->chunk_types[i] == SCTP_ASCONF_ACK) saw_asconf_ack = 1; } got_chklist = 1; } else if ((ptype == SCTP_HEARTBEAT_INFO) || (ptype == SCTP_STATE_COOKIE) || (ptype == SCTP_UNRECOG_PARAM) || (ptype == SCTP_COOKIE_PRESERVE) || (ptype == SCTP_SUPPORTED_ADDRTYPE) || (ptype == SCTP_ADD_IP_ADDRESS) || (ptype == SCTP_DEL_IP_ADDRESS) || (ptype == SCTP_ERROR_CAUSE_IND) || (ptype == SCTP_SUCCESS_REPORT)) { /* don't care */ ; } else { if ((ptype & 0x8000) == 0x0000) { /* * must stop processing the rest of the * param's. Any report bits were handled * with the call to * sctp_arethere_unrecognized_parameters() * when the INIT or INIT-ACK was first seen. */ break; } } next_param: offset += SCTP_SIZE32(plen); if (offset >= limit) { break; } phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf)); } /* Now check to see if we need to purge any addresses */ for (net = TAILQ_FIRST(&stcb->asoc.nets); net != NULL; net = net_tmp) { net_tmp = TAILQ_NEXT(net, sctp_next); if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) == SCTP_ADDR_NOT_IN_ASSOC) { /* This address has been removed from the asoc */ /* remove and free it */ stcb->asoc.numnets--; TAILQ_REMOVE(&stcb->asoc.nets, net, sctp_next); sctp_free_remote_addr(net); if (net == stcb->asoc.primary_destination) { stcb->asoc.primary_destination = NULL; sctp_select_primary_destination(stcb); } } } /* validate authentication required parameters */ if (got_random && got_hmacs) { stcb->asoc.peer_supports_auth = 1; } else { stcb->asoc.peer_supports_auth = 0; } if (!stcb->asoc.peer_supports_auth && got_chklist) { /* peer does not support auth but sent a chunks list? */ return (-31); } if (!SCTP_BASE_SYSCTL(sctp_asconf_auth_nochk) && stcb->asoc.peer_supports_asconf && !stcb->asoc.peer_supports_auth) { /* peer supports asconf but not auth? */ return (-32); } else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) && ((saw_asconf == 0) || (saw_asconf_ack == 0))) { return (-33); } /* concatenate the full random key */ keylen = sizeof(*p_random) + random_len + sizeof(*hmacs) + hmacs_len; if (chunks != NULL) { keylen += sizeof(*chunks) + num_chunks; } new_key = sctp_alloc_key(keylen); if (new_key != NULL) { /* copy in the RANDOM */ if (p_random != NULL) { keylen = sizeof(*p_random) + random_len; bcopy(p_random, new_key->key, keylen); } /* append in the AUTH chunks */ if (chunks != NULL) { bcopy(chunks, new_key->key + keylen, sizeof(*chunks) + num_chunks); keylen += sizeof(*chunks) + num_chunks; } /* append in the HMACs */ if (hmacs != NULL) { bcopy(hmacs, new_key->key + keylen, sizeof(*hmacs) + hmacs_len); } } else { /* failed to get memory for the key */ return (-34); } if (stcb->asoc.authinfo.peer_random != NULL) sctp_free_key(stcb->asoc.authinfo.peer_random); stcb->asoc.authinfo.peer_random = new_key; sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid); sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid); return (0); } int sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa, struct sctp_nets *net) { /* make sure the requested primary address exists in the assoc */ if (net == NULL && sa) net = sctp_findnet(stcb, sa); if (net == NULL) { /* didn't find the requested primary address! */ return (-1); } else { /* set the primary address */ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { /* Must be confirmed, so queue to set */ net->dest_state |= SCTP_ADDR_REQ_PRIMARY; return (0); } stcb->asoc.primary_destination = net; net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY; net = TAILQ_FIRST(&stcb->asoc.nets); if (net != stcb->asoc.primary_destination) { /* * first one on the list is NOT the primary * sctp_cmpaddr() is much more efficient if the * primary is the first on the list, make it so. */ TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next); } return (0); } } int sctp_is_vtag_good(struct sctp_inpcb *inp, uint32_t tag, uint16_t lport, uint16_t rport, struct timeval *now, int save_in_twait) { /* * This function serves two purposes. It will see if a TAG can be * re-used and return 1 for yes it is ok and 0 for don't use that * tag. A secondary function it will do is purge out old tags that * can be removed. */ struct sctpvtaghead *chain; struct sctp_tagblock *twait_block; struct sctpasochead *head; struct sctp_tcb *stcb; int i; SCTP_INP_INFO_RLOCK(); head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag, SCTP_BASE_INFO(hashasocmark))]; if (head == NULL) { /* invalid vtag */ goto skip_vtag_check; } LIST_FOREACH(stcb, head, sctp_asocs) { /* * We choose not to lock anything here. TCB's can't be * removed since we have the read lock, so they can't be * freed on us, same thing for the INP. I may be wrong with * this assumption, but we will go with it for now :-) */ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { continue; } if (stcb->asoc.my_vtag == tag) { /* candidate */ if (stcb->rport != rport) { continue; } if (stcb->sctp_ep->sctp_lport != lport) { continue; } /* Its a used tag set */ SCTP_INP_INFO_RUNLOCK(); return (0); } } skip_vtag_check: chain = &SCTP_BASE_INFO(vtag_timewait)[(tag % SCTP_STACK_VTAG_HASH_SIZE)]; /* Now what about timed wait ? */ if (!LIST_EMPTY(chain)) { /* * Block(s) are present, lets see if we have this tag in the * list */ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) { for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) { if (twait_block->vtag_block[i].v_tag == 0) { /* not used */ continue; } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire < now->tv_sec) { /* Audit expires this guy */ twait_block->vtag_block[i].tv_sec_at_expire = 0; twait_block->vtag_block[i].v_tag = 0; twait_block->vtag_block[i].lport = 0; twait_block->vtag_block[i].rport = 0; } else if ((twait_block->vtag_block[i].v_tag == tag) && (twait_block->vtag_block[i].lport == lport) && (twait_block->vtag_block[i].rport == rport)) { /* Bad tag, sorry :< */ SCTP_INP_INFO_RUNLOCK(); return (0); } } } } SCTP_INP_INFO_RUNLOCK(); return (1); } static sctp_assoc_t reneged_asoc_ids[256]; static uint8_t reneged_at = 0; static void sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb) { /* * We must hunt this association for MBUF's past the cumack (i.e. * out of order data that we can renege on). */ struct sctp_association *asoc; struct sctp_tmit_chunk *chk, *nchk; uint32_t cumulative_tsn_p1; struct sctp_queued_to_read *ctl, *nctl; int cnt, strmat; uint32_t gap, i; int fnd = 0; /* We look for anything larger than the cum-ack + 1 */ asoc = &stcb->asoc; if (asoc->cumulative_tsn == asoc->highest_tsn_inside_map) { /* none we can reneg on. */ return; } SCTP_STAT_INCR(sctps_protocol_drains_done); cumulative_tsn_p1 = asoc->cumulative_tsn + 1; cnt = 0; /* First look in the re-assembly queue */ chk = TAILQ_FIRST(&asoc->reasmqueue); while (chk) { /* Get the next one */ nchk = TAILQ_NEXT(chk, sctp_next); if (compare_with_wrap(chk->rec.data.TSN_seq, cumulative_tsn_p1, MAX_TSN)) { /* Yep it is above cum-ack */ cnt++; SCTP_CALC_TSN_TO_GAP(gap, chk->rec.data.TSN_seq, asoc->mapping_array_base_tsn); asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size); sctp_ucount_decr(asoc->cnt_on_reasm_queue); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next); if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk); } chk = nchk; } /* Ok that was fun, now we will drain all the inbound streams? */ for (strmat = 0; strmat < asoc->streamincnt; strmat++) { ctl = TAILQ_FIRST(&asoc->strmin[strmat].inqueue); while (ctl) { nctl = TAILQ_NEXT(ctl, next); if (compare_with_wrap(ctl->sinfo_tsn, cumulative_tsn_p1, MAX_TSN)) { /* Yep it is above cum-ack */ cnt++; SCTP_CALC_TSN_TO_GAP(gap, ctl->sinfo_tsn, asoc->mapping_array_base_tsn); asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length); sctp_ucount_decr(asoc->cnt_on_all_streams); SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap); TAILQ_REMOVE(&asoc->strmin[strmat].inqueue, ctl, next); if (ctl->data) { sctp_m_freem(ctl->data); ctl->data = NULL; } sctp_free_remote_addr(ctl->whoFrom); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), ctl); SCTP_DECR_READQ_COUNT(); } ctl = nctl; } } if (cnt) { /* We must back down to see what the new highest is */ for (i = asoc->highest_tsn_inside_map; (compare_with_wrap(i, asoc->mapping_array_base_tsn, MAX_TSN) || (i == asoc->mapping_array_base_tsn)); i--) { SCTP_CALC_TSN_TO_GAP(gap, i, asoc->mapping_array_base_tsn); if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) { asoc->highest_tsn_inside_map = i; fnd = 1; break; } } if (!fnd) { asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn - 1; } /* * Question, should we go through the delivery queue? The * only reason things are on here is the app not reading OR * a p-d-api up. An attacker COULD send enough in to * initiate the PD-API and then send a bunch of stuff to * other streams... these would wind up on the delivery * queue.. and then we would not get to them. But in order * to do this I then have to back-track and un-deliver * sequence numbers in streams.. el-yucko. I think for now * we will NOT look at the delivery queue and leave it to be * something to consider later. An alternative would be to * abort the P-D-API with a notification and then deliver * the data.... Or another method might be to keep track of * how many times the situation occurs and if we see a * possible attack underway just abort the association. */ #ifdef SCTP_DEBUG SCTPDBG(SCTP_DEBUG_PCB1, "Freed %d chunks from reneg harvest\n", cnt); #endif /* * Now do we need to find a new * asoc->highest_tsn_inside_map? */ asoc->last_revoke_count = cnt; (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); /* sa_ignore NO_NULL_CHK */ sctp_send_sack(stcb); sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED); reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb); reneged_at++; } /* * Another issue, in un-setting the TSN's in the mapping array we * DID NOT adjust the highest_tsn marker. This will cause one of * two things to occur. It may cause us to do extra work in checking * for our mapping array movement. More importantly it may cause us * to SACK every datagram. This may not be a bad thing though since * we will recover once we get our cum-ack above and all this stuff * we dumped recovered. */ } void sctp_drain() { /* * We must walk the PCB lists for ALL associations here. The system * is LOW on MBUF's and needs help. This is where reneging will * occur. We really hope this does NOT happen! */ VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct sctp_inpcb *inp; struct sctp_tcb *stcb; SCTP_STAT_INCR(sctps_protocol_drain_calls); if (SCTP_BASE_SYSCTL(sctp_do_drain) == 0) { #ifdef VIMAGE continue; #else return; #endif } SCTP_INP_INFO_RLOCK(); LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) { /* For each endpoint */ SCTP_INP_RLOCK(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { /* For each association */ SCTP_TCB_LOCK(stcb); sctp_drain_mbufs(inp, stcb); SCTP_TCB_UNLOCK(stcb); } SCTP_INP_RUNLOCK(inp); } SCTP_INP_INFO_RUNLOCK(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * start a new iterator * iterates through all endpoints and associations based on the pcb_state * flags and asoc_state. "af" (mandatory) is executed for all matching * assocs and "ef" (optional) is executed when the iterator completes. * "inpf" (optional) is executed for each new endpoint as it is being * iterated through. inpe (optional) is called when the inp completes * its way through all the stcbs. */ int sctp_initiate_iterator(inp_func inpf, asoc_func af, inp_func inpe, uint32_t pcb_state, uint32_t pcb_features, uint32_t asoc_state, void *argp, uint32_t argi, end_func ef, struct sctp_inpcb *s_inp, uint8_t chunk_output_off) { struct sctp_iterator *it = NULL; if (af == NULL) { return (-1); } SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator), SCTP_M_ITER); if (it == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM); return (ENOMEM); } memset(it, 0, sizeof(*it)); it->function_assoc = af; it->function_inp = inpf; if (inpf) it->done_current_ep = 0; else it->done_current_ep = 1; it->function_atend = ef; it->pointer = argp; it->val = argi; it->pcb_flags = pcb_state; it->pcb_features = pcb_features; it->asoc_state = asoc_state; it->function_inp_end = inpe; it->no_chunk_output = chunk_output_off; it->vn = curvnet; if (s_inp) { /* Assume lock is held here */ it->inp = s_inp; SCTP_INP_INCR_REF(it->inp); it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP; } else { SCTP_INP_INFO_RLOCK(); it->inp = LIST_FIRST(&SCTP_BASE_INFO(listhead)); if (it->inp) { SCTP_INP_INCR_REF(it->inp); } SCTP_INP_INFO_RUNLOCK(); it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP; } SCTP_IPI_ITERATOR_WQ_LOCK(); TAILQ_INSERT_TAIL(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr); if (sctp_it_ctl.iterator_running == 0) { sctp_wakeup_iterator(); } SCTP_IPI_ITERATOR_WQ_UNLOCK(); /* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */ return (0); } Index: projects/binutils-2.17/sys/netinet/sctp_uio.h =================================================================== --- projects/binutils-2.17/sys/netinet/sctp_uio.h (revision 215829) +++ projects/binutils-2.17/sys/netinet/sctp_uio.h (revision 215830) @@ -1,1166 +1,1166 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #ifndef __sctp_uio_h__ #define __sctp_uio_h__ #if ! defined(_KERNEL) #include #endif #include #include #include typedef uint32_t sctp_assoc_t; /* Compatibility to previous define's */ #define sctp_stream_reset_events sctp_stream_reset_event /* On/Off setup for subscription to events */ struct sctp_event_subscribe { uint8_t sctp_data_io_event; uint8_t sctp_association_event; uint8_t sctp_address_event; uint8_t sctp_send_failure_event; uint8_t sctp_peer_error_event; uint8_t sctp_shutdown_event; uint8_t sctp_partial_delivery_event; uint8_t sctp_adaptation_layer_event; uint8_t sctp_authentication_event; uint8_t sctp_sender_dry_event; uint8_t sctp_stream_reset_event; }; /* ancillary data types */ #define SCTP_INIT 0x0001 #define SCTP_SNDRCV 0x0002 #define SCTP_EXTRCV 0x0003 /* * ancillary data structures */ struct sctp_initmsg { uint16_t sinit_num_ostreams; uint16_t sinit_max_instreams; uint16_t sinit_max_attempts; uint16_t sinit_max_init_timeo; }; /* We add 96 bytes to the size of sctp_sndrcvinfo. * This makes the current structure 128 bytes long * which is nicely 64 bit aligned but also has room * for us to add more and keep ABI compatibility. * For example, already we have the sctp_extrcvinfo * when enabled which is 48 bytes. */ /* * The assoc up needs a verfid * all sendrcvinfo's need a verfid for SENDING only. */ #define SCTP_ALIGN_RESV_PAD 96 #define SCTP_ALIGN_RESV_PAD_SHORT 80 struct sctp_sndrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD]; }; struct sctp_extrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; uint16_t sinfo_pr_policy; uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; uint16_t sreinfo_next_flags; uint16_t sreinfo_next_stream; uint32_t sreinfo_next_aid; uint32_t sreinfo_next_length; uint32_t sreinfo_next_ppid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT]; }; #define SCTP_NO_NEXT_MSG 0x0000 #define SCTP_NEXT_MSG_AVAIL 0x0001 #define SCTP_NEXT_MSG_ISCOMPLETE 0x0002 #define SCTP_NEXT_MSG_IS_UNORDERED 0x0004 #define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008 struct sctp_snd_all_completes { uint16_t sall_stream; uint16_t sall_flags; uint32_t sall_ppid; uint32_t sall_context; uint32_t sall_num_sent; uint32_t sall_num_failed; }; /* Flags that go into the sinfo->sinfo_flags field */ #define SCTP_EOF 0x0100 /* Start shutdown procedures */ #define SCTP_ABORT 0x0200 /* Send an ABORT to peer */ #define SCTP_UNORDERED 0x0400 /* Message is un-ordered */ #define SCTP_ADDR_OVER 0x0800 /* Override the primary-address */ #define SCTP_SENDALL 0x1000 /* Send this on all associations */ #define SCTP_EOR 0x2000 /* end of message signal */ #define SCTP_SACK_IMMEDIATELY 0x4000 /* Set I-Bit */ #define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \ & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\ SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\ SCTP_SACK_IMMEDIATELY)) != 0) /* for the endpoint */ /* The lower byte is an enumeration of PR-SCTP policies */ #define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */ #define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */ #define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */ #define PR_SCTP_POLICY(x) ((x) & 0xff) #define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != 0) #define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL) #define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF) #define PR_SCTP_RTX_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX) #define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX) /* Stat's */ struct sctp_pcbinfo { uint32_t ep_count; uint32_t asoc_count; uint32_t laddr_count; uint32_t raddr_count; uint32_t chk_count; uint32_t readq_count; uint32_t free_chunks; uint32_t stream_oque; }; struct sctp_sockstat { sctp_assoc_t ss_assoc_id; uint32_t ss_total_sndbuf; uint32_t ss_total_recv_buf; }; /* * notification event structures */ /* * association change event */ struct sctp_assoc_change { uint16_t sac_type; uint16_t sac_flags; uint32_t sac_length; uint16_t sac_state; uint16_t sac_error; uint16_t sac_outbound_streams; uint16_t sac_inbound_streams; sctp_assoc_t sac_assoc_id; }; /* sac_state values */ #define SCTP_COMM_UP 0x0001 #define SCTP_COMM_LOST 0x0002 #define SCTP_RESTART 0x0003 #define SCTP_SHUTDOWN_COMP 0x0004 #define SCTP_CANT_STR_ASSOC 0x0005 /* * Address event */ struct sctp_paddr_change { uint16_t spc_type; uint16_t spc_flags; uint32_t spc_length; struct sockaddr_storage spc_aaddr; uint32_t spc_state; uint32_t spc_error; sctp_assoc_t spc_assoc_id; uint8_t spc_padding[4]; }; /* paddr state values */ #define SCTP_ADDR_AVAILABLE 0x0001 #define SCTP_ADDR_UNREACHABLE 0x0002 #define SCTP_ADDR_REMOVED 0x0003 #define SCTP_ADDR_ADDED 0x0004 #define SCTP_ADDR_MADE_PRIM 0x0005 #define SCTP_ADDR_CONFIRMED 0x0006 /* * CAUTION: these are user exposed SCTP addr reachability states must be * compatible with SCTP_ADDR states in sctp_constants.h */ #ifdef SCTP_ACTIVE #undef SCTP_ACTIVE #endif #define SCTP_ACTIVE 0x0001 /* SCTP_ADDR_REACHABLE */ #ifdef SCTP_INACTIVE #undef SCTP_INACTIVE #endif #define SCTP_INACTIVE 0x0002 /* SCTP_ADDR_NOT_REACHABLE */ #ifdef SCTP_UNCONFIRMED #undef SCTP_UNCONFIRMED #endif #define SCTP_UNCONFIRMED 0x0200 /* SCTP_ADDR_UNCONFIRMED */ #ifdef SCTP_NOHEARTBEAT #undef SCTP_NOHEARTBEAT #endif #define SCTP_NOHEARTBEAT 0x0040 /* SCTP_ADDR_NOHB */ /* remote error events */ struct sctp_remote_error { uint16_t sre_type; uint16_t sre_flags; uint32_t sre_length; uint16_t sre_error; sctp_assoc_t sre_assoc_id; uint8_t sre_data[4]; }; /* data send failure event */ struct sctp_send_failed { uint16_t ssf_type; uint16_t ssf_flags; uint32_t ssf_length; uint32_t ssf_error; struct sctp_sndrcvinfo ssf_info; sctp_assoc_t ssf_assoc_id; uint8_t ssf_data[]; }; /* flag that indicates state of data */ #define SCTP_DATA_UNSENT 0x0001 /* inqueue never on wire */ #define SCTP_DATA_SENT 0x0002 /* on wire at failure */ /* shutdown event */ struct sctp_shutdown_event { uint16_t sse_type; uint16_t sse_flags; uint32_t sse_length; sctp_assoc_t sse_assoc_id; }; /* Adaptation layer indication stuff */ struct sctp_adaptation_event { uint16_t sai_type; uint16_t sai_flags; uint32_t sai_length; uint32_t sai_adaptation_ind; sctp_assoc_t sai_assoc_id; }; struct sctp_setadaptation { uint32_t ssb_adaptation_ind; }; /* compatible old spelling */ struct sctp_adaption_event { uint16_t sai_type; uint16_t sai_flags; uint32_t sai_length; uint32_t sai_adaption_ind; sctp_assoc_t sai_assoc_id; }; struct sctp_setadaption { uint32_t ssb_adaption_ind; }; /* * Partial Delivery API event */ struct sctp_pdapi_event { uint16_t pdapi_type; uint16_t pdapi_flags; uint32_t pdapi_length; uint32_t pdapi_indication; uint16_t pdapi_stream; uint16_t pdapi_seq; sctp_assoc_t pdapi_assoc_id; }; /* indication values */ #define SCTP_PARTIAL_DELIVERY_ABORTED 0x0001 /* * authentication key event */ struct sctp_authkey_event { uint16_t auth_type; uint16_t auth_flags; uint32_t auth_length; uint16_t auth_keynumber; uint16_t auth_altkeynumber; uint32_t auth_indication; sctp_assoc_t auth_assoc_id; }; /* indication values */ #define SCTP_AUTH_NEWKEY 0x0001 #define SCTP_AUTH_NO_AUTH 0x0002 #define SCTP_AUTH_FREE_KEY 0x0003 struct sctp_sender_dry_event { uint16_t sender_dry_type; uint16_t sender_dry_flags; uint32_t sender_dry_length; sctp_assoc_t sender_dry_assoc_id; }; /* * stream reset event */ struct sctp_stream_reset_event { uint16_t strreset_type; uint16_t strreset_flags; uint32_t strreset_length; sctp_assoc_t strreset_assoc_id; uint16_t strreset_list[]; }; /* flags in strreset_flags field */ #define SCTP_STRRESET_INBOUND_STR 0x0001 #define SCTP_STRRESET_OUTBOUND_STR 0x0002 #define SCTP_STRRESET_ALL_STREAMS 0x0004 #define SCTP_STRRESET_STREAM_LIST 0x0008 #define SCTP_STRRESET_FAILED 0x0010 #define SCTP_STRRESET_ADD_STREAM 0x0020 /* SCTP notification event */ struct sctp_tlv { uint16_t sn_type; uint16_t sn_flags; uint32_t sn_length; }; union sctp_notification { struct sctp_tlv sn_header; struct sctp_assoc_change sn_assoc_change; struct sctp_paddr_change sn_paddr_change; struct sctp_remote_error sn_remote_error; struct sctp_send_failed sn_send_failed; struct sctp_shutdown_event sn_shutdown_event; struct sctp_adaptation_event sn_adaptation_event; /* compatibility same as above */ struct sctp_adaption_event sn_adaption_event; struct sctp_pdapi_event sn_pdapi_event; struct sctp_authkey_event sn_auth_event; struct sctp_sender_dry_event sn_sender_dry_event; struct sctp_stream_reset_event sn_strreset_event; }; /* notification types */ #define SCTP_ASSOC_CHANGE 0x0001 #define SCTP_PEER_ADDR_CHANGE 0x0002 #define SCTP_REMOTE_ERROR 0x0003 #define SCTP_SEND_FAILED 0x0004 #define SCTP_SHUTDOWN_EVENT 0x0005 #define SCTP_ADAPTATION_INDICATION 0x0006 /* same as above */ #define SCTP_ADAPTION_INDICATION 0x0006 #define SCTP_PARTIAL_DELIVERY_EVENT 0x0007 #define SCTP_AUTHENTICATION_EVENT 0x0008 #define SCTP_STREAM_RESET_EVENT 0x0009 #define SCTP_SENDER_DRY_EVENT 0x000a #define SCTP__NOTIFICATIONS_STOPPED_EVENT 0x000b /* we don't send this */ /* * socket option structs */ struct sctp_paddrparams { struct sockaddr_storage spp_address; sctp_assoc_t spp_assoc_id; uint32_t spp_hbinterval; uint32_t spp_pathmtu; uint32_t spp_flags; uint32_t spp_ipv6_flowlabel; uint16_t spp_pathmaxrxt; uint8_t spp_ipv4_tos; }; #define SPP_HB_ENABLE 0x00000001 #define SPP_HB_DISABLE 0x00000002 #define SPP_HB_DEMAND 0x00000004 #define SPP_PMTUD_ENABLE 0x00000008 #define SPP_PMTUD_DISABLE 0x00000010 #define SPP_HB_TIME_IS_ZERO 0x00000080 #define SPP_IPV6_FLOWLABEL 0x00000100 #define SPP_IPV4_TOS 0x00000200 struct sctp_paddrinfo { struct sockaddr_storage spinfo_address; sctp_assoc_t spinfo_assoc_id; int32_t spinfo_state; uint32_t spinfo_cwnd; uint32_t spinfo_srtt; uint32_t spinfo_rto; uint32_t spinfo_mtu; }; struct sctp_rtoinfo { sctp_assoc_t srto_assoc_id; uint32_t srto_initial; uint32_t srto_max; uint32_t srto_min; }; struct sctp_assocparams { sctp_assoc_t sasoc_assoc_id; uint32_t sasoc_peer_rwnd; uint32_t sasoc_local_rwnd; uint32_t sasoc_cookie_life; uint16_t sasoc_asocmaxrxt; uint16_t sasoc_number_peer_destinations; }; struct sctp_setprim { struct sockaddr_storage ssp_addr; sctp_assoc_t ssp_assoc_id; uint8_t ssp_padding[4]; }; struct sctp_setpeerprim { struct sockaddr_storage sspp_addr; sctp_assoc_t sspp_assoc_id; uint8_t sspp_padding[4]; }; struct sctp_getaddresses { sctp_assoc_t sget_assoc_id; /* addr is filled in for N * sockaddr_storage */ struct sockaddr addr[1]; }; struct sctp_setstrm_timeout { sctp_assoc_t ssto_assoc_id; uint32_t ssto_timeout; uint32_t ssto_streamid_start; uint32_t ssto_streamid_end; }; struct sctp_status { sctp_assoc_t sstat_assoc_id; int32_t sstat_state; uint32_t sstat_rwnd; uint16_t sstat_unackdata; uint16_t sstat_penddata; uint16_t sstat_instrms; uint16_t sstat_outstrms; uint32_t sstat_fragmentation_point; struct sctp_paddrinfo sstat_primary; }; /* * AUTHENTICATION support */ /* SCTP_AUTH_CHUNK */ struct sctp_authchunk { uint8_t sauth_chunk; }; /* SCTP_AUTH_KEY */ struct sctp_authkey { sctp_assoc_t sca_assoc_id; uint16_t sca_keynumber; uint8_t sca_key[]; }; /* SCTP_HMAC_IDENT */ struct sctp_hmacalgo { uint32_t shmac_number_of_idents; uint16_t shmac_idents[]; }; /* AUTH hmac_id */ #define SCTP_AUTH_HMAC_ID_RSVD 0x0000 #define SCTP_AUTH_HMAC_ID_SHA1 0x0001 /* default, mandatory */ #define SCTP_AUTH_HMAC_ID_SHA256 0x0003 #define SCTP_AUTH_HMAC_ID_SHA224 0x0004 #define SCTP_AUTH_HMAC_ID_SHA384 0x0005 #define SCTP_AUTH_HMAC_ID_SHA512 0x0006 /* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */ struct sctp_authkeyid { sctp_assoc_t scact_assoc_id; uint16_t scact_keynumber; }; /* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */ struct sctp_authchunks { sctp_assoc_t gauth_assoc_id; uint8_t gauth_chunks[]; }; struct sctp_assoc_value { sctp_assoc_t assoc_id; uint32_t assoc_value; }; struct sctp_assoc_ids { uint32_t gaids_number_of_ids; sctp_assoc_t gaids_assoc_id[]; }; struct sctp_sack_info { sctp_assoc_t sack_assoc_id; uint32_t sack_delay; uint32_t sack_freq; }; struct sctp_timeouts { sctp_assoc_t stimo_assoc_id; uint32_t stimo_init; uint32_t stimo_data; uint32_t stimo_sack; uint32_t stimo_shutdown; uint32_t stimo_heartbeat; uint32_t stimo_cookie; uint32_t stimo_shutdownack; }; struct sctp_cwnd_args { struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */ uint32_t cwnd_new_value;/* cwnd in k */ uint32_t pseudo_cumack; uint16_t inflight; /* flightsize in k */ uint16_t cwnd_augment; /* increment to it */ uint8_t meets_pseudo_cumack; uint8_t need_new_pseudo_cumack; uint8_t cnt_in_send; uint8_t cnt_in_str; }; struct sctp_blk_args { uint32_t onsb; /* in 1k bytes */ uint32_t sndlen; /* len of send being attempted */ uint32_t peer_rwnd; /* rwnd of peer */ uint16_t send_sent_qcnt;/* chnk cnt */ uint16_t stream_qcnt; /* chnk cnt */ uint16_t chunks_on_oque;/* chunks out */ uint16_t flight_size; /* flight size in k */ }; /* * Max we can reset in one setting, note this is dictated not by the define * but the size of a mbuf cluster so don't change this define and think you * can specify more. You must do multiple resets if you want to reset more * than SCTP_MAX_EXPLICIT_STR_RESET. */ #define SCTP_MAX_EXPLICT_STR_RESET 1000 #define SCTP_RESET_LOCAL_RECV 0x0001 #define SCTP_RESET_LOCAL_SEND 0x0002 #define SCTP_RESET_BOTH 0x0003 #define SCTP_RESET_TSN 0x0004 #define SCTP_RESET_ADD_STREAMS 0x0005 struct sctp_stream_reset { sctp_assoc_t strrst_assoc_id; uint16_t strrst_flags; uint16_t strrst_num_streams; /* 0 == ALL */ uint16_t strrst_list[]; /* list if strrst_num_streams is not 0 */ }; struct sctp_get_nonce_values { sctp_assoc_t gn_assoc_id; uint32_t gn_peers_tag; uint32_t gn_local_tag; }; /* Debugging logs */ struct sctp_str_log { void *stcb; /* FIXME: LP64 issue */ uint32_t n_tsn; uint32_t e_tsn; uint16_t n_sseq; uint16_t e_sseq; uint16_t strm; }; struct sctp_sb_log { void *stcb; /* FIXME: LP64 issue */ uint32_t so_sbcc; uint32_t stcb_sbcc; uint32_t incr; }; struct sctp_fr_log { uint32_t largest_tsn; uint32_t largest_new_tsn; uint32_t tsn; }; struct sctp_fr_map { uint32_t base; uint32_t cum; uint32_t high; }; struct sctp_rwnd_log { uint32_t rwnd; uint32_t send_size; uint32_t overhead; uint32_t new_rwnd; }; struct sctp_mbcnt_log { uint32_t total_queue_size; uint32_t size_change; uint32_t total_queue_mb_size; uint32_t mbcnt_change; }; struct sctp_sack_log { uint32_t cumack; uint32_t oldcumack; uint32_t tsn; uint16_t numGaps; uint16_t numDups; }; struct sctp_lock_log { void *sock; /* FIXME: LP64 issue */ void *inp; /* FIXME: LP64 issue */ uint8_t tcb_lock; uint8_t inp_lock; uint8_t info_lock; uint8_t sock_lock; uint8_t sockrcvbuf_lock; uint8_t socksndbuf_lock; uint8_t create_lock; uint8_t resv; }; struct sctp_rto_log { void *net; /* FIXME: LP64 issue */ uint32_t rtt; }; struct sctp_nagle_log { void *stcb; /* FIXME: LP64 issue */ uint32_t total_flight; uint32_t total_in_queue; uint16_t count_in_queue; uint16_t count_in_flight; }; struct sctp_sbwake_log { void *stcb; /* FIXME: LP64 issue */ uint16_t send_q; uint16_t sent_q; uint16_t flight; uint16_t wake_cnt; uint8_t stream_qcnt; /* chnk cnt */ uint8_t chunks_on_oque; /* chunks out */ uint8_t sbflags; uint8_t sctpflags; }; struct sctp_misc_info { uint32_t log1; uint32_t log2; uint32_t log3; uint32_t log4; }; struct sctp_log_closing { void *inp; /* FIXME: LP64 issue */ void *stcb; /* FIXME: LP64 issue */ uint32_t sctp_flags; uint16_t state; int16_t loc; }; struct sctp_mbuf_log { struct mbuf *mp; /* FIXME: LP64 issue */ caddr_t ext; caddr_t data; uint16_t size; uint8_t refcnt; uint8_t mbuf_flags; }; struct sctp_cwnd_log { uint64_t time_event; uint8_t from; uint8_t event_type; uint8_t resv[2]; union { struct sctp_log_closing close; struct sctp_blk_args blk; struct sctp_cwnd_args cwnd; struct sctp_str_log strlog; struct sctp_fr_log fr; struct sctp_fr_map map; struct sctp_rwnd_log rwnd; struct sctp_mbcnt_log mbcnt; struct sctp_sack_log sack; struct sctp_lock_log lock; struct sctp_rto_log rto; struct sctp_sb_log sb; struct sctp_nagle_log nagle; struct sctp_sbwake_log wake; struct sctp_mbuf_log mb; struct sctp_misc_info misc; } x; }; struct sctp_cwnd_log_req { int32_t num_in_log; /* Number in log */ int32_t num_ret; /* Number returned */ int32_t start_at; /* start at this one */ int32_t end_at; /* end at this one */ struct sctp_cwnd_log log[]; }; struct sctp_timeval { uint32_t tv_sec; uint32_t tv_usec; }; struct sctpstat { struct sctp_timeval sctps_discontinuitytime; /* sctpStats 18 * (TimeStamp) */ /* MIB according to RFC 3873 */ uint32_t sctps_currestab; /* sctpStats 1 (Gauge32) */ uint32_t sctps_activeestab; /* sctpStats 2 (Counter32) */ uint32_t sctps_restartestab; uint32_t sctps_collisionestab; uint32_t sctps_passiveestab; /* sctpStats 3 (Counter32) */ uint32_t sctps_aborted; /* sctpStats 4 (Counter32) */ uint32_t sctps_shutdown;/* sctpStats 5 (Counter32) */ uint32_t sctps_outoftheblue; /* sctpStats 6 (Counter32) */ uint32_t sctps_checksumerrors; /* sctpStats 7 (Counter32) */ uint32_t sctps_outcontrolchunks; /* sctpStats 8 (Counter64) */ uint32_t sctps_outorderchunks; /* sctpStats 9 (Counter64) */ uint32_t sctps_outunorderchunks; /* sctpStats 10 (Counter64) */ uint32_t sctps_incontrolchunks; /* sctpStats 11 (Counter64) */ uint32_t sctps_inorderchunks; /* sctpStats 12 (Counter64) */ uint32_t sctps_inunorderchunks; /* sctpStats 13 (Counter64) */ uint32_t sctps_fragusrmsgs; /* sctpStats 14 (Counter64) */ uint32_t sctps_reasmusrmsgs; /* sctpStats 15 (Counter64) */ uint32_t sctps_outpackets; /* sctpStats 16 (Counter64) */ uint32_t sctps_inpackets; /* sctpStats 17 (Counter64) */ /* input statistics: */ uint32_t sctps_recvpackets; /* total input packets */ uint32_t sctps_recvdatagrams; /* total input datagrams */ uint32_t sctps_recvpktwithdata; /* total packets that had data */ uint32_t sctps_recvsacks; /* total input SACK chunks */ uint32_t sctps_recvdata;/* total input DATA chunks */ uint32_t sctps_recvdupdata; /* total input duplicate DATA chunks */ uint32_t sctps_recvheartbeat; /* total input HB chunks */ uint32_t sctps_recvheartbeatack; /* total input HB-ACK chunks */ uint32_t sctps_recvecne;/* total input ECNE chunks */ uint32_t sctps_recvauth;/* total input AUTH chunks */ uint32_t sctps_recvauthmissing; /* total input chunks missing AUTH */ uint32_t sctps_recvivalhmacid; /* total number of invalid HMAC ids * received */ uint32_t sctps_recvivalkeyid; /* total number of invalid secret ids * received */ uint32_t sctps_recvauthfailed; /* total number of auth failed */ uint32_t sctps_recvexpress; /* total fast path receives all one * chunk */ uint32_t sctps_recvexpressm; /* total fast path multi-part data */ uint32_t sctps_recvnocrc; uint32_t sctps_recvswcrc; uint32_t sctps_recvhwcrc; /* output statistics: */ uint32_t sctps_sendpackets; /* total output packets */ uint32_t sctps_sendsacks; /* total output SACKs */ uint32_t sctps_senddata;/* total output DATA chunks */ uint32_t sctps_sendretransdata; /* total output retransmitted DATA * chunks */ uint32_t sctps_sendfastretrans; /* total output fast retransmitted * DATA chunks */ uint32_t sctps_sendmultfastretrans; /* total FR's that happened * more than once to same * chunk (u-del multi-fr * algo). */ uint32_t sctps_sendheartbeat; /* total output HB chunks */ uint32_t sctps_sendecne;/* total output ECNE chunks */ uint32_t sctps_sendauth;/* total output AUTH chunks FIXME */ uint32_t sctps_senderrors; /* ip_output error counter */ uint32_t sctps_sendnocrc; uint32_t sctps_sendswcrc; uint32_t sctps_sendhwcrc; /* PCKDROPREP statistics: */ uint32_t sctps_pdrpfmbox; /* Packet drop from middle box */ uint32_t sctps_pdrpfehos; /* P-drop from end host */ uint32_t sctps_pdrpmbda;/* P-drops with data */ uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */ uint32_t sctps_pdrpbwrpt; /* P-drop, non-endhost, bandwidth rep * only */ uint32_t sctps_pdrpcrupt; /* P-drop, not enough for chunk header */ uint32_t sctps_pdrpnedat; /* P-drop, not enough data to confirm */ uint32_t sctps_pdrppdbrk; /* P-drop, where process_chunk_drop * said break */ uint32_t sctps_pdrptsnnf; /* P-drop, could not find TSN */ uint32_t sctps_pdrpdnfnd; /* P-drop, attempt reverse TSN lookup */ uint32_t sctps_pdrpdiwnp; /* P-drop, e-host confirms zero-rwnd */ uint32_t sctps_pdrpdizrw; /* P-drop, midbox confirms no space */ uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */ uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */ /* timeouts */ uint32_t sctps_timoiterator; /* Number of iterator timers that * fired */ uint32_t sctps_timodata;/* Number of T3 data time outs */ uint32_t sctps_timowindowprobe; /* Number of window probe (T3) timers * that fired */ uint32_t sctps_timoinit;/* Number of INIT timers that fired */ uint32_t sctps_timosack;/* Number of sack timers that fired */ uint32_t sctps_timoshutdown; /* Number of shutdown timers that * fired */ uint32_t sctps_timoheartbeat; /* Number of heartbeat timers that * fired */ uint32_t sctps_timocookie; /* Number of times a cookie timeout * fired */ uint32_t sctps_timosecret; /* Number of times an endpoint changed * its cookie secret */ uint32_t sctps_timopathmtu; /* Number of PMTU timers that fired */ uint32_t sctps_timoshutdownack; /* Number of shutdown ack timers that * fired */ uint32_t sctps_timoshutdownguard; /* Number of shutdown guard * timers that fired */ uint32_t sctps_timostrmrst; /* Number of stream reset timers that * fired */ uint32_t sctps_timoearlyfr; /* Number of early FR timers that * fired */ uint32_t sctps_timoasconf; /* Number of times an asconf timer * fired */ uint32_t sctps_timodelprim; /* Number of times a prim_deleted * timer fired */ uint32_t sctps_timoautoclose; /* Number of times auto close timer * fired */ uint32_t sctps_timoassockill; /* Number of asoc free timers expired */ uint32_t sctps_timoinpkill; /* Number of inp free timers expired */ /* Early fast retransmission counters */ uint32_t sctps_earlyfrstart; uint32_t sctps_earlyfrstop; uint32_t sctps_earlyfrmrkretrans; uint32_t sctps_earlyfrstpout; uint32_t sctps_earlyfrstpidsck1; uint32_t sctps_earlyfrstpidsck2; uint32_t sctps_earlyfrstpidsck3; uint32_t sctps_earlyfrstpidsck4; uint32_t sctps_earlyfrstrid; uint32_t sctps_earlyfrstrout; uint32_t sctps_earlyfrstrtmr; /* others */ uint32_t sctps_hdrops; /* packet shorter than header */ uint32_t sctps_badsum; /* checksum error */ uint32_t sctps_noport; /* no endpoint for port */ uint32_t sctps_badvtag; /* bad v-tag */ uint32_t sctps_badsid; /* bad SID */ uint32_t sctps_nomem; /* no memory */ uint32_t sctps_fastretransinrtt; /* number of multiple FR in a * RTT window */ uint32_t sctps_markedretrans; uint32_t sctps_naglesent; /* nagle allowed sending */ uint32_t sctps_naglequeued; /* nagle doesn't allow sending */ uint32_t sctps_maxburstqueued; /* max burst doesn't allow sending */ uint32_t sctps_ifnomemqueued; /* look ahead tells us no memory in * interface ring buffer OR we had a * send error and are queuing one * send. */ uint32_t sctps_windowprobed; /* total number of window probes sent */ uint32_t sctps_lowlevelerr; /* total times an output error causes * us to clamp down on next user send. */ uint32_t sctps_lowlevelerrusr; /* total times sctp_senderrors were * caused from a user send from a user * invoked send not a sack response */ uint32_t sctps_datadropchklmt; /* Number of in data drops due to * chunk limit reached */ uint32_t sctps_datadroprwnd; /* Number of in data drops due to rwnd * limit reached */ uint32_t sctps_ecnereducedcwnd; /* Number of times a ECN reduced the * cwnd */ uint32_t sctps_vtagexpress; /* Used express lookup via vtag */ uint32_t sctps_vtagbogus; /* Collision in express lookup. */ uint32_t sctps_primary_randry; /* Number of times the sender ran dry * of user data on primary */ uint32_t sctps_cmt_randry; /* Same for above */ uint32_t sctps_slowpath_sack; /* Sacks the slow way */ uint32_t sctps_wu_sacks_sent; /* Window Update only sacks sent */ uint32_t sctps_sends_with_flags; /* number of sends with * sinfo_flags !=0 */ - uint32_t sctps_sends_with_unord; /* number of unordered sends */ + uint32_t sctps_sends_with_unord; /* number of unordered sends */ uint32_t sctps_sends_with_eof; /* number of sends with EOF flag set */ uint32_t sctps_sends_with_abort; /* number of sends with ABORT * flag set */ uint32_t sctps_protocol_drain_calls; /* number of times protocol * drain called */ uint32_t sctps_protocol_drains_done; /* number of times we did a * protocol drain */ uint32_t sctps_read_peeks; /* Number of times recv was called * with peek */ uint32_t sctps_cached_chk; /* Number of cached chunks used */ uint32_t sctps_cached_strmoq; /* Number of cached stream oq's used */ uint32_t sctps_left_abandon; /* Number of unread messages abandoned * by close */ uint32_t sctps_send_burst_avoid; /* Unused */ uint32_t sctps_send_cwnd_avoid; /* Send cwnd full avoidance, already * max burst inflight to net */ uint32_t sctps_fwdtsn_map_over; /* number of map array over-runs via * fwd-tsn's */ uint32_t sctps_reserved[32]; /* Future ABI compat - remove int's * from here when adding new */ }; #define SCTP_STAT_INCR(_x) SCTP_STAT_INCR_BY(_x,1) #define SCTP_STAT_DECR(_x) SCTP_STAT_DECR_BY(_x,1) #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) #define SCTP_STAT_INCR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x += _d) #define SCTP_STAT_DECR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x -= _d) #else #define SCTP_STAT_INCR_BY(_x,_d) atomic_add_int(&SCTP_BASE_STAT(_x), _d) #define SCTP_STAT_DECR_BY(_x,_d) atomic_subtract_int(&SCTP_BASE_STAT(_x), _d) #endif /* The following macros are for handling MIB values, */ #define SCTP_STAT_INCR_COUNTER32(_x) SCTP_STAT_INCR(_x) #define SCTP_STAT_INCR_COUNTER64(_x) SCTP_STAT_INCR(_x) #define SCTP_STAT_INCR_GAUGE32(_x) SCTP_STAT_INCR(_x) #define SCTP_STAT_DECR_COUNTER32(_x) SCTP_STAT_DECR(_x) #define SCTP_STAT_DECR_COUNTER64(_x) SCTP_STAT_DECR(_x) #define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x) union sctp_sockstore { #if defined(INET) || !defined(_KERNEL) struct sockaddr_in sin; #endif #if defined(INET6) || !defined(_KERNEL) struct sockaddr_in6 sin6; #endif struct sockaddr sa; }; /***********************************/ /* And something for us old timers */ /***********************************/ #ifndef ntohll #include #define ntohll(x) be64toh(x) #endif #ifndef htonll #include #define htonll(x) htobe64(x) #endif /***********************************/ struct xsctp_inpcb { uint32_t last; uint32_t flags; uint32_t features; uint32_t total_sends; uint32_t total_recvs; uint32_t total_nospaces; uint32_t fragmentation_point; uint16_t local_port; uint16_t qlen; uint16_t maxqlen; uint32_t extra_padding[32]; /* future */ }; struct xsctp_tcb { union sctp_sockstore primary_addr; /* sctpAssocEntry 5/6 */ uint32_t last; uint32_t heartbeat_interval; /* sctpAssocEntry 7 */ uint32_t state; /* sctpAssocEntry 8 */ uint32_t in_streams; /* sctpAssocEntry 9 */ uint32_t out_streams; /* sctpAssocEntry 10 */ uint32_t max_nr_retrans;/* sctpAssocEntry 11 */ uint32_t primary_process; /* sctpAssocEntry 12 */ uint32_t T1_expireries; /* sctpAssocEntry 13 */ uint32_t T2_expireries; /* sctpAssocEntry 14 */ uint32_t retransmitted_tsns; /* sctpAssocEntry 15 */ uint32_t total_sends; uint32_t total_recvs; uint32_t local_tag; uint32_t remote_tag; uint32_t initial_tsn; uint32_t highest_tsn; uint32_t cumulative_tsn; uint32_t cumulative_tsn_ack; uint32_t mtu; uint32_t refcnt; uint16_t local_port; /* sctpAssocEntry 3 */ uint16_t remote_port; /* sctpAssocEntry 4 */ struct sctp_timeval start_time; /* sctpAssocEntry 16 */ struct sctp_timeval discontinuity_time; /* sctpAssocEntry 17 */ uint32_t peers_rwnd; sctp_assoc_t assoc_id; /* sctpAssocEntry 1 */ uint32_t extra_padding[32]; /* future */ }; struct xsctp_laddr { union sctp_sockstore address; /* sctpAssocLocalAddrEntry 1/2 */ uint32_t last; struct sctp_timeval start_time; /* sctpAssocLocalAddrEntry 3 */ uint32_t extra_padding[32]; /* future */ }; struct xsctp_raddr { union sctp_sockstore address; /* sctpAssocLocalRemEntry 1/2 */ uint32_t last; uint32_t rto; /* sctpAssocLocalRemEntry 5 */ uint32_t max_path_rtx; /* sctpAssocLocalRemEntry 6 */ uint32_t rtx; /* sctpAssocLocalRemEntry 7 */ uint32_t error_counter; /* */ uint32_t cwnd; /* */ uint32_t flight_size; /* */ uint32_t mtu; /* */ uint8_t active; /* sctpAssocLocalRemEntry 3 */ uint8_t confirmed; /* */ uint8_t heartbeat_enabled; /* sctpAssocLocalRemEntry 4 */ struct sctp_timeval start_time; /* sctpAssocLocalRemEntry 8 */ uint32_t rtt; uint32_t extra_padding[32]; /* future */ }; #define SCTP_MAX_LOGGING_SIZE 30000 #define SCTP_TRACE_PARAMS 6 /* This number MUST be even */ struct sctp_log_entry { uint64_t timestamp; uint32_t subsys; uint32_t padding; uint32_t params[SCTP_TRACE_PARAMS]; }; struct sctp_log { struct sctp_log_entry entry[SCTP_MAX_LOGGING_SIZE]; uint32_t index; uint32_t padding; }; /* * Kernel defined for sctp_send */ #if defined(_KERNEL) || defined(__Userspace__) int sctp_lower_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *i_pak, struct mbuf *control, int flags, struct sctp_sndrcvinfo *srcv ,struct thread *p ); int sctp_sorecvmsg(struct socket *so, struct uio *uio, struct mbuf **mp, struct sockaddr *from, int fromlen, int *msg_flags, struct sctp_sndrcvinfo *sinfo, int filling_sinfo); #endif /* * API system calls */ #if !(defined(_KERNEL)) && !(defined(__Userspace__)) __BEGIN_DECLS int sctp_peeloff __P((int, sctp_assoc_t)); int sctp_bindx __P((int, struct sockaddr *, int, int)); int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *)); int sctp_getaddrlen __P((sa_family_t)); int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **)); void sctp_freepaddrs __P((struct sockaddr *)); int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **)); void sctp_freeladdrs __P((struct sockaddr *)); int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *)); ssize_t sctp_sendmsg __P((int, const void *, size_t, const struct sockaddr *, socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); ssize_t sctp_send __P((int sd, const void *msg, size_t len, const struct sctp_sndrcvinfo *sinfo, int flags)); ssize_t sctp_sendx __P((int sd, const void *msg, size_t len, struct sockaddr *addrs, int addrcnt, struct sctp_sndrcvinfo *sinfo, int flags)); ssize_t sctp_sendmsgx __P((int sd, const void *, size_t, struct sockaddr *, int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t)); sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa)); ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *, socklen_t *, struct sctp_sndrcvinfo *, int *)); __END_DECLS #endif /* !_KERNEL */ #endif /* !__sctp_uio_h__ */ Index: projects/binutils-2.17/sys/sys/elf_common.h =================================================================== --- projects/binutils-2.17/sys/sys/elf_common.h (revision 215829) +++ projects/binutils-2.17/sys/sys/elf_common.h (revision 215830) @@ -1,963 +1,964 @@ /*- * Copyright (c) 1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_ELF_COMMON_H_ #define _SYS_ELF_COMMON_H_ 1 /* * ELF definitions that are independent of architecture or word size. */ /* * Note header. The ".note" section contains an array of notes. Each * begins with this header, aligned to a word boundary. Immediately * following the note header is n_namesz bytes of name, padded to the * next word boundary. Then comes n_descsz bytes of descriptor, again * padded to a word boundary. The values of n_namesz and n_descsz do * not include the padding. */ typedef struct { u_int32_t n_namesz; /* Length of name. */ u_int32_t n_descsz; /* Length of descriptor. */ u_int32_t n_type; /* Type of this note. */ } Elf_Note; /* * The header for GNU-style hash sections. */ typedef struct { u_int32_t gh_nbuckets; /* Number of hash buckets. */ u_int32_t gh_symndx; /* First visible symbol in .dynsym. */ u_int32_t gh_maskwords; /* #maskwords used in bloom filter. */ u_int32_t gh_shift2; /* Bloom filter shift count. */ } Elf_GNU_Hash_Header; /* Indexes into the e_ident array. Keep synced with http://www.sco.com/developers/gabi/latest/ch4.eheader.html */ #define EI_MAG0 0 /* Magic number, byte 0. */ #define EI_MAG1 1 /* Magic number, byte 1. */ #define EI_MAG2 2 /* Magic number, byte 2. */ #define EI_MAG3 3 /* Magic number, byte 3. */ #define EI_CLASS 4 /* Class of machine. */ #define EI_DATA 5 /* Data format. */ #define EI_VERSION 6 /* ELF format version. */ #define EI_OSABI 7 /* Operating system / ABI identification */ #define EI_ABIVERSION 8 /* ABI version */ #define OLD_EI_BRAND 8 /* Start of architecture identification. */ #define EI_PAD 9 /* Start of padding (per SVR4 ABI). */ #define EI_NIDENT 16 /* Size of e_ident array. */ /* Values for the magic number bytes. */ #define ELFMAG0 0x7f #define ELFMAG1 'E' #define ELFMAG2 'L' #define ELFMAG3 'F' #define ELFMAG "\177ELF" /* magic string */ #define SELFMAG 4 /* magic string size */ /* Values for e_ident[EI_VERSION] and e_version. */ #define EV_NONE 0 #define EV_CURRENT 1 /* Values for e_ident[EI_CLASS]. */ #define ELFCLASSNONE 0 /* Unknown class. */ #define ELFCLASS32 1 /* 32-bit architecture. */ #define ELFCLASS64 2 /* 64-bit architecture. */ /* Values for e_ident[EI_DATA]. */ #define ELFDATANONE 0 /* Unknown data format. */ #define ELFDATA2LSB 1 /* 2's complement little-endian. */ #define ELFDATA2MSB 2 /* 2's complement big-endian. */ /* Values for e_ident[EI_OSABI]. */ #define ELFOSABI_NONE 0 /* UNIX System V ABI */ #define ELFOSABI_HPUX 1 /* HP-UX operating system */ #define ELFOSABI_NETBSD 2 /* NetBSD */ #define ELFOSABI_LINUX 3 /* GNU/Linux */ #define ELFOSABI_HURD 4 /* GNU/Hurd */ #define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */ #define ELFOSABI_SOLARIS 6 /* Solaris */ #define ELFOSABI_AIX 7 /* AIX */ #define ELFOSABI_IRIX 8 /* IRIX */ #define ELFOSABI_FREEBSD 9 /* FreeBSD */ #define ELFOSABI_TRU64 10 /* TRU64 UNIX */ #define ELFOSABI_MODESTO 11 /* Novell Modesto */ #define ELFOSABI_OPENBSD 12 /* OpenBSD */ #define ELFOSABI_OPENVMS 13 /* Open VMS */ #define ELFOSABI_NSK 14 /* HP Non-Stop Kernel */ #define ELFOSABI_AROS 15 /* Amiga Research OS */ #define ELFOSABI_ARM 97 /* ARM */ #define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ #define ELFOSABI_SYSV ELFOSABI_NONE /* symbol used in old spec */ #define ELFOSABI_MONTEREY ELFOSABI_AIX /* Monterey */ /* e_ident */ #define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \ (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \ (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \ (ehdr).e_ident[EI_MAG3] == ELFMAG3) /* Values for e_type. */ #define ET_NONE 0 /* Unknown type. */ #define ET_REL 1 /* Relocatable. */ #define ET_EXEC 2 /* Executable. */ #define ET_DYN 3 /* Shared object. */ #define ET_CORE 4 /* Core file. */ #define ET_LOOS 0xfe00 /* First operating system specific. */ #define ET_HIOS 0xfeff /* Last operating system-specific. */ #define ET_LOPROC 0xff00 /* First processor-specific. */ #define ET_HIPROC 0xffff /* Last processor-specific. */ /* Values for e_machine. */ #define EM_NONE 0 /* Unknown machine. */ #define EM_M32 1 /* AT&T WE32100. */ #define EM_SPARC 2 /* Sun SPARC. */ #define EM_386 3 /* Intel i386. */ #define EM_68K 4 /* Motorola 68000. */ #define EM_88K 5 /* Motorola 88000. */ #define EM_860 7 /* Intel i860. */ #define EM_MIPS 8 /* MIPS R3000 Big-Endian only. */ #define EM_S370 9 /* IBM System/370. */ #define EM_MIPS_RS3_LE 10 /* MIPS R3000 Little-Endian. */ #define EM_PARISC 15 /* HP PA-RISC. */ #define EM_VPP500 17 /* Fujitsu VPP500. */ #define EM_SPARC32PLUS 18 /* SPARC v8plus. */ #define EM_960 19 /* Intel 80960. */ #define EM_PPC 20 /* PowerPC 32-bit. */ #define EM_PPC64 21 /* PowerPC 64-bit. */ #define EM_S390 22 /* IBM System/390. */ #define EM_V800 36 /* NEC V800. */ #define EM_FR20 37 /* Fujitsu FR20. */ #define EM_RH32 38 /* TRW RH-32. */ #define EM_RCE 39 /* Motorola RCE. */ #define EM_ARM 40 /* ARM. */ #define EM_SH 42 /* Hitachi SH. */ #define EM_SPARCV9 43 /* SPARC v9 64-bit. */ #define EM_TRICORE 44 /* Siemens TriCore embedded processor. */ #define EM_ARC 45 /* Argonaut RISC Core. */ #define EM_H8_300 46 /* Hitachi H8/300. */ #define EM_H8_300H 47 /* Hitachi H8/300H. */ #define EM_H8S 48 /* Hitachi H8S. */ #define EM_H8_500 49 /* Hitachi H8/500. */ #define EM_IA_64 50 /* Intel IA-64 Processor. */ #define EM_MIPS_X 51 /* Stanford MIPS-X. */ #define EM_COLDFIRE 52 /* Motorola ColdFire. */ #define EM_68HC12 53 /* Motorola M68HC12. */ #define EM_MMA 54 /* Fujitsu MMA. */ #define EM_PCP 55 /* Siemens PCP. */ #define EM_NCPU 56 /* Sony nCPU. */ #define EM_NDR1 57 /* Denso NDR1 microprocessor. */ #define EM_STARCORE 58 /* Motorola Star*Core processor. */ #define EM_ME16 59 /* Toyota ME16 processor. */ #define EM_ST100 60 /* STMicroelectronics ST100 processor. */ #define EM_TINYJ 61 /* Advanced Logic Corp. TinyJ processor. */ #define EM_X86_64 62 /* Advanced Micro Devices x86-64 */ #define EM_AMD64 EM_X86_64 /* Advanced Micro Devices x86-64 (compat) */ #define EM_PDSP 63 /* Sony DSP Processor. */ #define EM_FX66 66 /* Siemens FX66 microcontroller. */ #define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16 microcontroller. */ #define EM_ST7 68 /* STmicroelectronics ST7 8-bit microcontroller. */ #define EM_68HC16 69 /* Motorola MC68HC16 microcontroller. */ #define EM_68HC11 70 /* Motorola MC68HC11 microcontroller. */ #define EM_68HC08 71 /* Motorola MC68HC08 microcontroller. */ #define EM_68HC05 72 /* Motorola MC68HC05 microcontroller. */ #define EM_SVX 73 /* Silicon Graphics SVx. */ #define EM_ST19 74 /* STMicroelectronics ST19 8-bit mc. */ #define EM_VAX 75 /* Digital VAX. */ #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor. */ #define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded processor. */ #define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor. */ #define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor. */ #define EM_MMIX 80 /* Donald Knuth's educational 64-bit proc. */ #define EM_HUANY 81 /* Harvard University machine-independent object files. */ #define EM_PRISM 82 /* SiTera Prism. */ #define EM_AVR 83 /* Atmel AVR 8-bit microcontroller. */ #define EM_FR30 84 /* Fujitsu FR30. */ #define EM_D10V 85 /* Mitsubishi D10V. */ #define EM_D30V 86 /* Mitsubishi D30V. */ #define EM_V850 87 /* NEC v850. */ #define EM_M32R 88 /* Mitsubishi M32R. */ #define EM_MN10300 89 /* Matsushita MN10300. */ #define EM_MN10200 90 /* Matsushita MN10200. */ #define EM_PJ 91 /* picoJava. */ #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor. */ #define EM_ARC_A5 93 /* ARC Cores Tangent-A5. */ #define EM_XTENSA 94 /* Tensilica Xtensa Architecture. */ #define EM_VIDEOCORE 95 /* Alphamosaic VideoCore processor. */ #define EM_TMM_GPP 96 /* Thompson Multimedia General Purpose Processor. */ #define EM_NS32K 97 /* National Semiconductor 32000 series. */ #define EM_TPC 98 /* Tenor Network TPC processor. */ #define EM_SNP1K 99 /* Trebia SNP 1000 processor. */ #define EM_ST200 100 /* STMicroelectronics ST200 microcontroller. */ #define EM_IP2K 101 /* Ubicom IP2xxx microcontroller family. */ #define EM_MAX 102 /* MAX Processor. */ #define EM_CR 103 /* National Semiconductor CompactRISC microprocessor. */ #define EM_F2MC16 104 /* Fujitsu F2MC16. */ #define EM_MSP430 105 /* Texas Instruments embedded microcontroller msp430. */ #define EM_BLACKFIN 106 /* Analog Devices Blackfin (DSP) processor. */ #define EM_SE_C33 107 /* S1C33 Family of Seiko Epson processors. */ #define EM_SEP 108 /* Sharp embedded microprocessor. */ #define EM_ARCA 109 /* Arca RISC Microprocessor. */ #define EM_UNICORE 110 /* Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University */ /* Non-standard or deprecated. */ #define EM_486 6 /* Intel i486. */ #define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */ #define EM_ALPHA_STD 41 /* Digital Alpha (standard value). */ #define EM_ALPHA 0x9026 /* Alpha (written in the absence of an ABI) */ /* Special section indexes. */ #define SHN_UNDEF 0 /* Undefined, missing, irrelevant. */ #define SHN_LORESERVE 0xff00 /* First of reserved range. */ #define SHN_LOPROC 0xff00 /* First processor-specific. */ #define SHN_HIPROC 0xff1f /* Last processor-specific. */ #define SHN_LOOS 0xff20 /* First operating system-specific. */ #define SHN_HIOS 0xff3f /* Last operating system-specific. */ #define SHN_ABS 0xfff1 /* Absolute values. */ #define SHN_COMMON 0xfff2 /* Common data. */ #define SHN_XINDEX 0xffff /* Escape -- index stored elsewhere. */ #define SHN_HIRESERVE 0xffff /* Last of reserved range. */ /* sh_type */ #define SHT_NULL 0 /* inactive */ #define SHT_PROGBITS 1 /* program defined information */ #define SHT_SYMTAB 2 /* symbol table section */ #define SHT_STRTAB 3 /* string table section */ #define SHT_RELA 4 /* relocation section with addends */ #define SHT_HASH 5 /* symbol hash table section */ #define SHT_DYNAMIC 6 /* dynamic section */ #define SHT_NOTE 7 /* note section */ #define SHT_NOBITS 8 /* no space section */ #define SHT_REL 9 /* relocation section - no addends */ #define SHT_SHLIB 10 /* reserved - purpose unknown */ #define SHT_DYNSYM 11 /* dynamic symbol table section */ #define SHT_INIT_ARRAY 14 /* Initialization function pointers. */ #define SHT_FINI_ARRAY 15 /* Termination function pointers. */ #define SHT_PREINIT_ARRAY 16 /* Pre-initialization function ptrs. */ #define SHT_GROUP 17 /* Section group. */ #define SHT_SYMTAB_SHNDX 18 /* Section indexes (see SHN_XINDEX). */ #define SHT_LOOS 0x60000000 /* First of OS specific semantics */ #define SHT_LOSUNW 0x6ffffff4 #define SHT_SUNW_dof 0x6ffffff4 #define SHT_SUNW_cap 0x6ffffff5 #define SHT_SUNW_SIGNATURE 0x6ffffff6 #define SHT_GNU_HASH 0x6ffffff6 #define SHT_SUNW_ANNOTATE 0x6ffffff7 #define SHT_SUNW_DEBUGSTR 0x6ffffff8 #define SHT_SUNW_DEBUG 0x6ffffff9 #define SHT_SUNW_move 0x6ffffffa #define SHT_SUNW_COMDAT 0x6ffffffb #define SHT_SUNW_syminfo 0x6ffffffc #define SHT_SUNW_verdef 0x6ffffffd #define SHT_GNU_verdef 0x6ffffffd /* Symbol versions provided */ #define SHT_SUNW_verneed 0x6ffffffe #define SHT_GNU_verneed 0x6ffffffe /* Symbol versions required */ #define SHT_SUNW_versym 0x6fffffff #define SHT_GNU_versym 0x6fffffff /* Symbol version table */ #define SHT_HISUNW 0x6fffffff #define SHT_HIOS 0x6fffffff /* Last of OS specific semantics */ #define SHT_LOPROC 0x70000000 /* reserved range for processor */ #define SHT_AMD64_UNWIND 0x70000001 /* unwind information */ #define SHT_HIPROC 0x7fffffff /* specific section header types */ #define SHT_LOUSER 0x80000000 /* reserved range for application */ #define SHT_HIUSER 0xffffffff /* specific indexes */ /* Flags for sh_flags. */ #define SHF_WRITE 0x1 /* Section contains writable data. */ #define SHF_ALLOC 0x2 /* Section occupies memory. */ #define SHF_EXECINSTR 0x4 /* Section contains instructions. */ #define SHF_MERGE 0x10 /* Section may be merged. */ #define SHF_STRINGS 0x20 /* Section contains strings. */ #define SHF_INFO_LINK 0x40 /* sh_info holds section index. */ #define SHF_LINK_ORDER 0x80 /* Special ordering requirements. */ #define SHF_OS_NONCONFORMING 0x100 /* OS-specific processing required. */ #define SHF_GROUP 0x200 /* Member of section group. */ #define SHF_TLS 0x400 /* Section contains TLS data. */ #define SHF_MASKOS 0x0ff00000 /* OS-specific semantics. */ #define SHF_MASKPROC 0xf0000000 /* Processor-specific semantics. */ /* Values for p_type. */ #define PT_NULL 0 /* Unused entry. */ #define PT_LOAD 1 /* Loadable segment. */ #define PT_DYNAMIC 2 /* Dynamic linking information segment. */ #define PT_INTERP 3 /* Pathname of interpreter. */ #define PT_NOTE 4 /* Auxiliary information. */ #define PT_SHLIB 5 /* Reserved (not used). */ #define PT_PHDR 6 /* Location of program header itself. */ #define PT_TLS 7 /* Thread local storage segment */ #define PT_LOOS 0x60000000 /* First OS-specific. */ #define PT_SUNW_UNWIND 0x6464e550 /* amd64 UNWIND program header */ #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_STACK 0x6474e551 #define PT_LOSUNW 0x6ffffffa #define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */ #define PT_SUNWSTACK 0x6ffffffb /* describes the stack segment */ #define PT_SUNWDTRACE 0x6ffffffc /* private */ #define PT_SUNWCAP 0x6ffffffd /* hard/soft capabilities segment */ #define PT_HISUNW 0x6fffffff #define PT_HIOS 0x6fffffff /* Last OS-specific. */ #define PT_LOPROC 0x70000000 /* First processor-specific type. */ #define PT_HIPROC 0x7fffffff /* Last processor-specific type. */ /* Values for p_flags. */ #define PF_X 0x1 /* Executable. */ #define PF_W 0x2 /* Writable. */ #define PF_R 0x4 /* Readable. */ #define PF_MASKOS 0x0ff00000 /* Operating system-specific. */ #define PF_MASKPROC 0xf0000000 /* Processor-specific. */ /* Extended program header index. */ #define PN_XNUM 0xffff /* Values for d_tag. */ #define DT_NULL 0 /* Terminating entry. */ #define DT_NEEDED 1 /* String table offset of a needed shared library. */ #define DT_PLTRELSZ 2 /* Total size in bytes of PLT relocations. */ #define DT_PLTGOT 3 /* Processor-dependent address. */ #define DT_HASH 4 /* Address of symbol hash table. */ #define DT_STRTAB 5 /* Address of string table. */ #define DT_SYMTAB 6 /* Address of symbol table. */ #define DT_RELA 7 /* Address of ElfNN_Rela relocations. */ #define DT_RELASZ 8 /* Total size of ElfNN_Rela relocations. */ #define DT_RELAENT 9 /* Size of each ElfNN_Rela relocation entry. */ #define DT_STRSZ 10 /* Size of string table. */ #define DT_SYMENT 11 /* Size of each symbol table entry. */ #define DT_INIT 12 /* Address of initialization function. */ #define DT_FINI 13 /* Address of finalization function. */ #define DT_SONAME 14 /* String table offset of shared object name. */ #define DT_RPATH 15 /* String table offset of library path. [sup] */ #define DT_SYMBOLIC 16 /* Indicates "symbolic" linking. [sup] */ #define DT_REL 17 /* Address of ElfNN_Rel relocations. */ #define DT_RELSZ 18 /* Total size of ElfNN_Rel relocations. */ #define DT_RELENT 19 /* Size of each ElfNN_Rel relocation. */ #define DT_PLTREL 20 /* Type of relocation used for PLT. */ #define DT_DEBUG 21 /* Reserved (not used). */ #define DT_TEXTREL 22 /* Indicates there may be relocations in non-writable segments. [sup] */ #define DT_JMPREL 23 /* Address of PLT relocations. */ #define DT_BIND_NOW 24 /* [sup] */ #define DT_INIT_ARRAY 25 /* Address of the array of pointers to initialization functions */ #define DT_FINI_ARRAY 26 /* Address of the array of pointers to termination functions */ #define DT_INIT_ARRAYSZ 27 /* Size in bytes of the array of initialization functions. */ #define DT_FINI_ARRAYSZ 28 /* Size in bytes of the array of terminationfunctions. */ #define DT_RUNPATH 29 /* String table offset of a null-terminated library search path string. */ #define DT_FLAGS 30 /* Object specific flag values. */ #define DT_ENCODING 32 /* Values greater than or equal to DT_ENCODING and less than DT_LOOS follow the rules for the interpretation of the d_un union as follows: even == 'd_ptr', odd == 'd_val' or none */ #define DT_PREINIT_ARRAY 32 /* Address of the array of pointers to pre-initialization functions. */ #define DT_PREINIT_ARRAYSZ 33 /* Size in bytes of the array of pre-initialization functions. */ #define DT_MAXPOSTAGS 34 /* number of positive tags */ #define DT_LOOS 0x6000000d /* First OS-specific */ #define DT_SUNW_AUXILIARY 0x6000000d /* symbol auxiliary name */ #define DT_SUNW_RTLDINF 0x6000000e /* ld.so.1 info (private) */ #define DT_SUNW_FILTER 0x6000000f /* symbol filter name */ #define DT_SUNW_CAP 0x60000010 /* hardware/software */ #define DT_HIOS 0x6ffff000 /* Last OS-specific */ /* * DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the * Dyn.d_un.d_val field of the Elf*_Dyn structure. */ #define DT_VALRNGLO 0x6ffffd00 #define DT_CHECKSUM 0x6ffffdf8 /* elf checksum */ #define DT_PLTPADSZ 0x6ffffdf9 /* pltpadding size */ #define DT_MOVEENT 0x6ffffdfa /* move table entry size */ #define DT_MOVESZ 0x6ffffdfb /* move table size */ #define DT_FEATURE_1 0x6ffffdfc /* feature holder */ #define DT_POSFLAG_1 0x6ffffdfd /* flags for DT_* entries, effecting */ /* the following DT_* entry. */ /* See DF_P1_* definitions */ #define DT_SYMINSZ 0x6ffffdfe /* syminfo table size (in bytes) */ #define DT_SYMINENT 0x6ffffdff /* syminfo entry size (in bytes) */ #define DT_VALRNGHI 0x6ffffdff /* * DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the * Dyn.d_un.d_ptr field of the Elf*_Dyn structure. * * If any adjustment is made to the ELF object after it has been * built, these entries will need to be adjusted. */ #define DT_ADDRRNGLO 0x6ffffe00 #define DT_GNU_HASH 0x6ffffef5 /* GNU-style hash table */ #define DT_CONFIG 0x6ffffefa /* configuration information */ #define DT_DEPAUDIT 0x6ffffefb /* dependency auditing */ #define DT_AUDIT 0x6ffffefc /* object auditing */ #define DT_PLTPAD 0x6ffffefd /* pltpadding (sparcv9) */ #define DT_MOVETAB 0x6ffffefe /* move table */ #define DT_SYMINFO 0x6ffffeff /* syminfo table */ #define DT_ADDRRNGHI 0x6ffffeff #define DT_VERSYM 0x6ffffff0 /* Address of versym section. */ #define DT_RELACOUNT 0x6ffffff9 /* number of RELATIVE relocations */ #define DT_RELCOUNT 0x6ffffffa /* number of RELATIVE relocations */ #define DT_FLAGS_1 0x6ffffffb /* state flags - see DF_1_* defs */ #define DT_VERDEF 0x6ffffffc /* Address of verdef section. */ #define DT_VERDEFNUM 0x6ffffffd /* Number of elems in verdef section */ #define DT_VERNEED 0x6ffffffe /* Address of verneed section. */ #define DT_VERNEEDNUM 0x6fffffff /* Number of elems in verneed section */ #define DT_LOPROC 0x70000000 /* First processor-specific type. */ #define DT_DEPRECATED_SPARC_REGISTER 0x7000001 #define DT_AUXILIARY 0x7ffffffd /* shared library auxiliary name */ #define DT_USED 0x7ffffffe /* ignored - same as needed */ #define DT_FILTER 0x7fffffff /* shared library filter name */ #define DT_HIPROC 0x7fffffff /* Last processor-specific type. */ /* Values for DT_FLAGS */ #define DF_ORIGIN 0x0001 /* Indicates that the object being loaded may make reference to the $ORIGIN substitution string */ #define DF_SYMBOLIC 0x0002 /* Indicates "symbolic" linking. */ #define DF_TEXTREL 0x0004 /* Indicates there may be relocations in non-writable segments. */ #define DF_BIND_NOW 0x0008 /* Indicates that the dynamic linker should process all relocations for the object containing this entry before transferring control to the program. */ #define DF_STATIC_TLS 0x0010 /* Indicates that the shared object or executable contains code using a static thread-local storage scheme. */ /* Values for DT_FLAGS_1 */ #define DF_1_BIND_NOW 0x00000001 /* Same as DF_BIND_NOW */ #define DF_1_GLOBAL 0x00000002 /* Set the RTLD_GLOBAL for object */ #define DF_1_NODELETE 0x00000008 /* Set the RTLD_NODELETE for object */ #define DF_1_LOADFLTR 0x00000010 /* Immediate loading of filtees */ #define DF_1_NOOPEN 0x00000040 /* Do not allow loading on dlopen() */ #define DF_1_ORIGIN 0x00000080 /* Process $ORIGIN */ /* Values for n_type. Used in core files. */ #define NT_PRSTATUS 1 /* Process status. */ #define NT_FPREGSET 2 /* Floating point registers. */ #define NT_PRPSINFO 3 /* Process state info. */ #define NT_THRMISC 7 /* Thread miscellaneous info. */ /* Symbol Binding - ELFNN_ST_BIND - st_info */ #define STB_LOCAL 0 /* Local symbol */ #define STB_GLOBAL 1 /* Global symbol */ #define STB_WEAK 2 /* like global - lower precedence */ #define STB_LOOS 10 /* Reserved range for operating system */ #define STB_HIOS 12 /* specific semantics. */ #define STB_LOPROC 13 /* reserved range for processor */ #define STB_HIPROC 15 /* specific semantics. */ /* Symbol type - ELFNN_ST_TYPE - st_info */ #define STT_NOTYPE 0 /* Unspecified type. */ #define STT_OBJECT 1 /* Data object. */ #define STT_FUNC 2 /* Function. */ #define STT_SECTION 3 /* Section. */ #define STT_FILE 4 /* Source file. */ #define STT_COMMON 5 /* Uninitialized common block. */ #define STT_TLS 6 /* TLS object. */ #define STT_NUM 7 #define STT_LOOS 10 /* Reserved range for operating system */ #define STT_HIOS 12 /* specific semantics. */ #define STT_LOPROC 13 /* reserved range for processor */ #define STT_HIPROC 15 /* specific semantics. */ /* Symbol visibility - ELFNN_ST_VISIBILITY - st_other */ #define STV_DEFAULT 0x0 /* Default visibility (see binding). */ #define STV_INTERNAL 0x1 /* Special meaning in relocatable objects. */ #define STV_HIDDEN 0x2 /* Not visible. */ #define STV_PROTECTED 0x3 /* Visible but not preemptible. */ #define STV_EXPORTED 0x4 #define STV_SINGLETON 0x5 #define STV_ELIMINATE 0x6 /* Special symbol table indexes. */ #define STN_UNDEF 0 /* Undefined symbol index. */ /* Symbol versioning flags. */ #define VER_DEF_CURRENT 1 #define VER_DEF_IDX(x) VER_NDX(x) #define VER_FLG_BASE 0x01 #define VER_FLG_WEAK 0x02 #define VER_NEED_CURRENT 1 #define VER_NEED_WEAK (1u << 15) #define VER_NEED_HIDDEN VER_NDX_HIDDEN #define VER_NEED_IDX(x) VER_NDX(x) #define VER_NDX_LOCAL 0 #define VER_NDX_GLOBAL 1 #define VER_NDX_GIVEN 2 #define VER_NDX_HIDDEN (1u << 15) #define VER_NDX(x) ((x) & ~(1u << 15)) #define CA_SUNW_NULL 0 #define CA_SUNW_HW_1 1 /* first hardware capabilities entry */ #define CA_SUNW_SF_1 2 /* first software capabilities entry */ /* * Syminfo flag values */ #define SYMINFO_FLG_DIRECT 0x0001 /* symbol ref has direct association */ /* to object containing defn. */ #define SYMINFO_FLG_PASSTHRU 0x0002 /* ignored - see SYMINFO_FLG_FILTER */ #define SYMINFO_FLG_COPY 0x0004 /* symbol is a copy-reloc */ #define SYMINFO_FLG_LAZYLOAD 0x0008 /* object containing defn should be */ /* lazily-loaded */ #define SYMINFO_FLG_DIRECTBIND 0x0010 /* ref should be bound directly to */ /* object containing defn. */ #define SYMINFO_FLG_NOEXTDIRECT 0x0020 /* don't let an external reference */ /* directly bind to this symbol */ #define SYMINFO_FLG_FILTER 0x0002 /* symbol ref is associated to a */ #define SYMINFO_FLG_AUXILIARY 0x0040 /* standard or auxiliary filter */ /* * Syminfo.si_boundto values. */ #define SYMINFO_BT_SELF 0xffff /* symbol bound to self */ #define SYMINFO_BT_PARENT 0xfffe /* symbol bound to parent */ #define SYMINFO_BT_NONE 0xfffd /* no special symbol binding */ #define SYMINFO_BT_EXTERN 0xfffc /* symbol defined as external */ #define SYMINFO_BT_LOWRESERVE 0xff00 /* beginning of reserved entries */ /* * Syminfo version values. */ #define SYMINFO_NONE 0 /* Syminfo version */ #define SYMINFO_CURRENT 1 #define SYMINFO_NUM 2 /* * Relocation types. * * All machine architectures are defined here to allow tools on one to * handle others. */ #define R_386_NONE 0 /* No relocation. */ #define R_386_32 1 /* Add symbol value. */ #define R_386_PC32 2 /* Add PC-relative symbol value. */ #define R_386_GOT32 3 /* Add PC-relative GOT offset. */ #define R_386_PLT32 4 /* Add PC-relative PLT offset. */ #define R_386_COPY 5 /* Copy data from shared object. */ #define R_386_GLOB_DAT 6 /* Set GOT entry to data address. */ #define R_386_JMP_SLOT 7 /* Set GOT entry to code address. */ #define R_386_RELATIVE 8 /* Add load address of shared object. */ #define R_386_GOTOFF 9 /* Add GOT-relative symbol address. */ #define R_386_GOTPC 10 /* Add PC-relative GOT table address. */ #define R_386_TLS_TPOFF 14 /* Negative offset in static TLS block */ #define R_386_TLS_IE 15 /* Absolute address of GOT for -ve static TLS */ #define R_386_TLS_GOTIE 16 /* GOT entry for negative static TLS block */ #define R_386_TLS_LE 17 /* Negative offset relative to static TLS */ #define R_386_TLS_GD 18 /* 32 bit offset to GOT (index,off) pair */ #define R_386_TLS_LDM 19 /* 32 bit offset to GOT (index,zero) pair */ #define R_386_TLS_GD_32 24 /* 32 bit offset to GOT (index,off) pair */ #define R_386_TLS_GD_PUSH 25 /* pushl instruction for Sun ABI GD sequence */ #define R_386_TLS_GD_CALL 26 /* call instruction for Sun ABI GD sequence */ #define R_386_TLS_GD_POP 27 /* popl instruction for Sun ABI GD sequence */ #define R_386_TLS_LDM_32 28 /* 32 bit offset to GOT (index,zero) pair */ #define R_386_TLS_LDM_PUSH 29 /* pushl instruction for Sun ABI LD sequence */ #define R_386_TLS_LDM_CALL 30 /* call instruction for Sun ABI LD sequence */ #define R_386_TLS_LDM_POP 31 /* popl instruction for Sun ABI LD sequence */ #define R_386_TLS_LDO_32 32 /* 32 bit offset from start of TLS block */ #define R_386_TLS_IE_32 33 /* 32 bit offset to GOT static TLS offset entry */ #define R_386_TLS_LE_32 34 /* 32 bit offset within static TLS block */ #define R_386_TLS_DTPMOD32 35 /* GOT entry containing TLS index */ #define R_386_TLS_DTPOFF32 36 /* GOT entry containing TLS offset */ #define R_386_TLS_TPOFF32 37 /* GOT entry of -ve static TLS offset */ #define R_ARM_NONE 0 /* No relocation. */ #define R_ARM_PC24 1 #define R_ARM_ABS32 2 #define R_ARM_REL32 3 #define R_ARM_PC13 4 #define R_ARM_ABS16 5 #define R_ARM_ABS12 6 #define R_ARM_THM_ABS5 7 #define R_ARM_ABS8 8 #define R_ARM_SBREL32 9 #define R_ARM_THM_PC22 10 #define R_ARM_THM_PC8 11 #define R_ARM_AMP_VCALL9 12 #define R_ARM_SWI24 13 #define R_ARM_THM_SWI8 14 #define R_ARM_XPC25 15 #define R_ARM_THM_XPC22 16 #define R_ARM_COPY 20 /* Copy data from shared object. */ #define R_ARM_GLOB_DAT 21 /* Set GOT entry to data address. */ #define R_ARM_JUMP_SLOT 22 /* Set GOT entry to code address. */ #define R_ARM_RELATIVE 23 /* Add load address of shared object. */ #define R_ARM_GOTOFF 24 /* Add GOT-relative symbol address. */ #define R_ARM_GOTPC 25 /* Add PC-relative GOT table address. */ #define R_ARM_GOT32 26 /* Add PC-relative GOT offset. */ #define R_ARM_PLT32 27 /* Add PC-relative PLT offset. */ #define R_ARM_GNU_VTENTRY 100 #define R_ARM_GNU_VTINHERIT 101 #define R_ARM_RSBREL32 250 #define R_ARM_THM_RPC22 251 #define R_ARM_RREL32 252 #define R_ARM_RABS32 253 #define R_ARM_RPC24 254 #define R_ARM_RBASE 255 /* Name Value Field Calculation */ #define R_IA_64_NONE 0 /* None */ #define R_IA_64_IMM14 0x21 /* immediate14 S + A */ #define R_IA_64_IMM22 0x22 /* immediate22 S + A */ #define R_IA_64_IMM64 0x23 /* immediate64 S + A */ #define R_IA_64_DIR32MSB 0x24 /* word32 MSB S + A */ #define R_IA_64_DIR32LSB 0x25 /* word32 LSB S + A */ #define R_IA_64_DIR64MSB 0x26 /* word64 MSB S + A */ #define R_IA_64_DIR64LSB 0x27 /* word64 LSB S + A */ #define R_IA_64_GPREL22 0x2a /* immediate22 @gprel(S + A) */ #define R_IA_64_GPREL64I 0x2b /* immediate64 @gprel(S + A) */ #define R_IA_64_GPREL32MSB 0x2c /* word32 MSB @gprel(S + A) */ #define R_IA_64_GPREL32LSB 0x2d /* word32 LSB @gprel(S + A) */ #define R_IA_64_GPREL64MSB 0x2e /* word64 MSB @gprel(S + A) */ #define R_IA_64_GPREL64LSB 0x2f /* word64 LSB @gprel(S + A) */ #define R_IA_64_LTOFF22 0x32 /* immediate22 @ltoff(S + A) */ #define R_IA_64_LTOFF64I 0x33 /* immediate64 @ltoff(S + A) */ #define R_IA_64_PLTOFF22 0x3a /* immediate22 @pltoff(S + A) */ #define R_IA_64_PLTOFF64I 0x3b /* immediate64 @pltoff(S + A) */ #define R_IA_64_PLTOFF64MSB 0x3e /* word64 MSB @pltoff(S + A) */ #define R_IA_64_PLTOFF64LSB 0x3f /* word64 LSB @pltoff(S + A) */ #define R_IA_64_FPTR64I 0x43 /* immediate64 @fptr(S + A) */ #define R_IA_64_FPTR32MSB 0x44 /* word32 MSB @fptr(S + A) */ #define R_IA_64_FPTR32LSB 0x45 /* word32 LSB @fptr(S + A) */ #define R_IA_64_FPTR64MSB 0x46 /* word64 MSB @fptr(S + A) */ #define R_IA_64_FPTR64LSB 0x47 /* word64 LSB @fptr(S + A) */ #define R_IA_64_PCREL60B 0x48 /* immediate60 form1 S + A - P */ #define R_IA_64_PCREL21B 0x49 /* immediate21 form1 S + A - P */ #define R_IA_64_PCREL21M 0x4a /* immediate21 form2 S + A - P */ #define R_IA_64_PCREL21F 0x4b /* immediate21 form3 S + A - P */ #define R_IA_64_PCREL32MSB 0x4c /* word32 MSB S + A - P */ #define R_IA_64_PCREL32LSB 0x4d /* word32 LSB S + A - P */ #define R_IA_64_PCREL64MSB 0x4e /* word64 MSB S + A - P */ #define R_IA_64_PCREL64LSB 0x4f /* word64 LSB S + A - P */ #define R_IA_64_LTOFF_FPTR22 0x52 /* immediate22 @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR64I 0x53 /* immediate64 @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR32MSB 0x54 /* word32 MSB @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR32LSB 0x55 /* word32 LSB @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR64MSB 0x56 /* word64 MSB @ltoff(@fptr(S + A)) */ #define R_IA_64_LTOFF_FPTR64LSB 0x57 /* word64 LSB @ltoff(@fptr(S + A)) */ #define R_IA_64_SEGREL32MSB 0x5c /* word32 MSB @segrel(S + A) */ #define R_IA_64_SEGREL32LSB 0x5d /* word32 LSB @segrel(S + A) */ #define R_IA_64_SEGREL64MSB 0x5e /* word64 MSB @segrel(S + A) */ #define R_IA_64_SEGREL64LSB 0x5f /* word64 LSB @segrel(S + A) */ #define R_IA_64_SECREL32MSB 0x64 /* word32 MSB @secrel(S + A) */ #define R_IA_64_SECREL32LSB 0x65 /* word32 LSB @secrel(S + A) */ #define R_IA_64_SECREL64MSB 0x66 /* word64 MSB @secrel(S + A) */ #define R_IA_64_SECREL64LSB 0x67 /* word64 LSB @secrel(S + A) */ #define R_IA_64_REL32MSB 0x6c /* word32 MSB BD + A */ #define R_IA_64_REL32LSB 0x6d /* word32 LSB BD + A */ #define R_IA_64_REL64MSB 0x6e /* word64 MSB BD + A */ #define R_IA_64_REL64LSB 0x6f /* word64 LSB BD + A */ #define R_IA_64_LTV32MSB 0x74 /* word32 MSB S + A */ #define R_IA_64_LTV32LSB 0x75 /* word32 LSB S + A */ #define R_IA_64_LTV64MSB 0x76 /* word64 MSB S + A */ #define R_IA_64_LTV64LSB 0x77 /* word64 LSB S + A */ #define R_IA_64_PCREL21BI 0x79 /* immediate21 form1 S + A - P */ #define R_IA_64_PCREL22 0x7a /* immediate22 S + A - P */ #define R_IA_64_PCREL64I 0x7b /* immediate64 S + A - P */ #define R_IA_64_IPLTMSB 0x80 /* function descriptor MSB special */ #define R_IA_64_IPLTLSB 0x81 /* function descriptor LSB speciaal */ #define R_IA_64_SUB 0x85 /* immediate64 A - S */ #define R_IA_64_LTOFF22X 0x86 /* immediate22 special */ #define R_IA_64_LDXMOV 0x87 /* immediate22 special */ #define R_IA_64_TPREL14 0x91 /* imm14 @tprel(S + A) */ #define R_IA_64_TPREL22 0x92 /* imm22 @tprel(S + A) */ #define R_IA_64_TPREL64I 0x93 /* imm64 @tprel(S + A) */ #define R_IA_64_TPREL64MSB 0x96 /* word64 MSB @tprel(S + A) */ #define R_IA_64_TPREL64LSB 0x97 /* word64 LSB @tprel(S + A) */ #define R_IA_64_LTOFF_TPREL22 0x9a /* imm22 @ltoff(@tprel(S+A)) */ #define R_IA_64_DTPMOD64MSB 0xa6 /* word64 MSB @dtpmod(S + A) */ #define R_IA_64_DTPMOD64LSB 0xa7 /* word64 LSB @dtpmod(S + A) */ #define R_IA_64_LTOFF_DTPMOD22 0xaa /* imm22 @ltoff(@dtpmod(S+A)) */ #define R_IA_64_DTPREL14 0xb1 /* imm14 @dtprel(S + A) */ #define R_IA_64_DTPREL22 0xb2 /* imm22 @dtprel(S + A) */ #define R_IA_64_DTPREL64I 0xb3 /* imm64 @dtprel(S + A) */ #define R_IA_64_DTPREL32MSB 0xb4 /* word32 MSB @dtprel(S + A) */ #define R_IA_64_DTPREL32LSB 0xb5 /* word32 LSB @dtprel(S + A) */ #define R_IA_64_DTPREL64MSB 0xb6 /* word64 MSB @dtprel(S + A) */ #define R_IA_64_DTPREL64LSB 0xb7 /* word64 LSB @dtprel(S + A) */ #define R_IA_64_LTOFF_DTPREL22 0xba /* imm22 @ltoff(@dtprel(S+A)) */ #define R_MIPS_NONE 0 /* No reloc */ #define R_MIPS_16 1 /* Direct 16 bit */ #define R_MIPS_32 2 /* Direct 32 bit */ #define R_MIPS_REL32 3 /* PC relative 32 bit */ #define R_MIPS_26 4 /* Direct 26 bit shifted */ #define R_MIPS_HI16 5 /* High 16 bit */ #define R_MIPS_LO16 6 /* Low 16 bit */ #define R_MIPS_GPREL16 7 /* GP relative 16 bit */ #define R_MIPS_LITERAL 8 /* 16 bit literal entry */ #define R_MIPS_GOT16 9 /* 16 bit GOT entry */ #define R_MIPS_PC16 10 /* PC relative 16 bit */ #define R_MIPS_CALL16 11 /* 16 bit GOT entry for function */ #define R_MIPS_GPREL32 12 /* GP relative 32 bit */ #define R_MIPS_GOTHI16 21 /* GOT HI 16 bit */ #define R_MIPS_GOTLO16 22 /* GOT LO 16 bit */ #define R_MIPS_CALLHI16 30 /* upper 16 bit GOT entry for function */ #define R_MIPS_CALLLO16 31 /* lower 16 bit GOT entry for function */ #define R_PPC_NONE 0 /* No relocation. */ #define R_PPC_ADDR32 1 #define R_PPC_ADDR24 2 #define R_PPC_ADDR16 3 #define R_PPC_ADDR16_LO 4 #define R_PPC_ADDR16_HI 5 #define R_PPC_ADDR16_HA 6 #define R_PPC_ADDR14 7 #define R_PPC_ADDR14_BRTAKEN 8 #define R_PPC_ADDR14_BRNTAKEN 9 #define R_PPC_REL24 10 #define R_PPC_REL14 11 #define R_PPC_REL14_BRTAKEN 12 #define R_PPC_REL14_BRNTAKEN 13 #define R_PPC_GOT16 14 #define R_PPC_GOT16_LO 15 #define R_PPC_GOT16_HI 16 #define R_PPC_GOT16_HA 17 #define R_PPC_PLTREL24 18 #define R_PPC_COPY 19 #define R_PPC_GLOB_DAT 20 #define R_PPC_JMP_SLOT 21 #define R_PPC_RELATIVE 22 #define R_PPC_LOCAL24PC 23 #define R_PPC_UADDR32 24 #define R_PPC_UADDR16 25 #define R_PPC_REL32 26 #define R_PPC_PLT32 27 #define R_PPC_PLTREL32 28 #define R_PPC_PLT16_LO 29 #define R_PPC_PLT16_HI 30 #define R_PPC_PLT16_HA 31 #define R_PPC_SDAREL16 32 #define R_PPC_SECTOFF 33 #define R_PPC_SECTOFF_LO 34 #define R_PPC_SECTOFF_HI 35 #define R_PPC_SECTOFF_HA 36 /* * 64-bit relocations */ #define R_PPC64_ADDR64 38 #define R_PPC64_ADDR16_HIGHER 39 #define R_PPC64_ADDR16_HIGHERA 40 #define R_PPC64_ADDR16_HIGHEST 41 #define R_PPC64_ADDR16_HIGHESTA 42 #define R_PPC64_UADDR64 43 #define R_PPC64_REL64 44 #define R_PPC64_PLT64 45 #define R_PPC64_PLTREL64 46 #define R_PPC64_TOC16 47 #define R_PPC64_TOC16_LO 48 #define R_PPC64_TOC16_HI 49 #define R_PPC64_TOC16_HA 50 #define R_PPC64_TOC 51 #define R_PPC64_DTPMOD64 68 #define R_PPC64_TPREL64 73 #define R_PPC64_DTPREL64 78 /* * TLS relocations */ #define R_PPC_TLS 67 #define R_PPC_DTPMOD32 68 #define R_PPC_TPREL16 69 #define R_PPC_TPREL16_LO 70 #define R_PPC_TPREL16_HI 71 #define R_PPC_TPREL16_HA 72 #define R_PPC_TPREL32 73 #define R_PPC_DTPREL16 74 #define R_PPC_DTPREL16_LO 75 #define R_PPC_DTPREL16_HI 76 #define R_PPC_DTPREL16_HA 77 #define R_PPC_DTPREL32 78 #define R_PPC_GOT_TLSGD16 79 #define R_PPC_GOT_TLSGD16_LO 80 #define R_PPC_GOT_TLSGD16_HI 81 #define R_PPC_GOT_TLSGD16_HA 82 #define R_PPC_GOT_TLSLD16 83 #define R_PPC_GOT_TLSLD16_LO 84 #define R_PPC_GOT_TLSLD16_HI 85 #define R_PPC_GOT_TLSLD16_HA 86 #define R_PPC_GOT_TPREL16 87 #define R_PPC_GOT_TPREL16_LO 88 #define R_PPC_GOT_TPREL16_HI 89 #define R_PPC_GOT_TPREL16_HA 90 /* * The remaining relocs are from the Embedded ELF ABI, and are not in the * SVR4 ELF ABI. */ #define R_PPC_EMB_NADDR32 101 #define R_PPC_EMB_NADDR16 102 #define R_PPC_EMB_NADDR16_LO 103 #define R_PPC_EMB_NADDR16_HI 104 #define R_PPC_EMB_NADDR16_HA 105 #define R_PPC_EMB_SDAI16 106 #define R_PPC_EMB_SDA2I16 107 #define R_PPC_EMB_SDA2REL 108 #define R_PPC_EMB_SDA21 109 #define R_PPC_EMB_MRKREF 110 #define R_PPC_EMB_RELSEC16 111 #define R_PPC_EMB_RELST_LO 112 #define R_PPC_EMB_RELST_HI 113 #define R_PPC_EMB_RELST_HA 114 #define R_PPC_EMB_BIT_FLD 115 #define R_PPC_EMB_RELSDA 116 #define R_SPARC_NONE 0 #define R_SPARC_8 1 #define R_SPARC_16 2 #define R_SPARC_32 3 #define R_SPARC_DISP8 4 #define R_SPARC_DISP16 5 #define R_SPARC_DISP32 6 #define R_SPARC_WDISP30 7 #define R_SPARC_WDISP22 8 #define R_SPARC_HI22 9 #define R_SPARC_22 10 #define R_SPARC_13 11 #define R_SPARC_LO10 12 #define R_SPARC_GOT10 13 #define R_SPARC_GOT13 14 #define R_SPARC_GOT22 15 #define R_SPARC_PC10 16 #define R_SPARC_PC22 17 #define R_SPARC_WPLT30 18 #define R_SPARC_COPY 19 #define R_SPARC_GLOB_DAT 20 #define R_SPARC_JMP_SLOT 21 #define R_SPARC_RELATIVE 22 #define R_SPARC_UA32 23 #define R_SPARC_PLT32 24 #define R_SPARC_HIPLT22 25 #define R_SPARC_LOPLT10 26 #define R_SPARC_PCPLT32 27 #define R_SPARC_PCPLT22 28 #define R_SPARC_PCPLT10 29 #define R_SPARC_10 30 #define R_SPARC_11 31 #define R_SPARC_64 32 #define R_SPARC_OLO10 33 #define R_SPARC_HH22 34 #define R_SPARC_HM10 35 #define R_SPARC_LM22 36 #define R_SPARC_PC_HH22 37 #define R_SPARC_PC_HM10 38 #define R_SPARC_PC_LM22 39 #define R_SPARC_WDISP16 40 #define R_SPARC_WDISP19 41 #define R_SPARC_GLOB_JMP 42 #define R_SPARC_7 43 #define R_SPARC_5 44 #define R_SPARC_6 45 #define R_SPARC_DISP64 46 #define R_SPARC_PLT64 47 #define R_SPARC_HIX22 48 #define R_SPARC_LOX10 49 #define R_SPARC_H44 50 #define R_SPARC_M44 51 #define R_SPARC_L44 52 #define R_SPARC_REGISTER 53 #define R_SPARC_UA64 54 #define R_SPARC_UA16 55 #define R_SPARC_TLS_GD_HI22 56 #define R_SPARC_TLS_GD_LO10 57 #define R_SPARC_TLS_GD_ADD 58 #define R_SPARC_TLS_GD_CALL 59 #define R_SPARC_TLS_LDM_HI22 60 #define R_SPARC_TLS_LDM_LO10 61 #define R_SPARC_TLS_LDM_ADD 62 #define R_SPARC_TLS_LDM_CALL 63 #define R_SPARC_TLS_LDO_HIX22 64 #define R_SPARC_TLS_LDO_LOX10 65 #define R_SPARC_TLS_LDO_ADD 66 #define R_SPARC_TLS_IE_HI22 67 #define R_SPARC_TLS_IE_LO10 68 #define R_SPARC_TLS_IE_LD 69 #define R_SPARC_TLS_IE_LDX 70 #define R_SPARC_TLS_IE_ADD 71 #define R_SPARC_TLS_LE_HIX22 72 #define R_SPARC_TLS_LE_LOX10 73 #define R_SPARC_TLS_DTPMOD32 74 #define R_SPARC_TLS_DTPMOD64 75 #define R_SPARC_TLS_DTPOFF32 76 #define R_SPARC_TLS_DTPOFF64 77 #define R_SPARC_TLS_TPOFF32 78 #define R_SPARC_TLS_TPOFF64 79 #define R_X86_64_NONE 0 /* No relocation. */ #define R_X86_64_64 1 /* Add 64 bit symbol value. */ #define R_X86_64_PC32 2 /* PC-relative 32 bit signed sym value. */ #define R_X86_64_GOT32 3 /* PC-relative 32 bit GOT offset. */ #define R_X86_64_PLT32 4 /* PC-relative 32 bit PLT offset. */ #define R_X86_64_COPY 5 /* Copy data from shared object. */ #define R_X86_64_GLOB_DAT 6 /* Set GOT entry to data address. */ #define R_X86_64_JMP_SLOT 7 /* Set GOT entry to code address. */ #define R_X86_64_RELATIVE 8 /* Add load address of shared object. */ #define R_X86_64_GOTPCREL 9 /* Add 32 bit signed pcrel offset to GOT. */ #define R_X86_64_32 10 /* Add 32 bit zero extended symbol value */ #define R_X86_64_32S 11 /* Add 32 bit sign extended symbol value */ #define R_X86_64_16 12 /* Add 16 bit zero extended symbol value */ #define R_X86_64_PC16 13 /* Add 16 bit signed extended pc relative symbol value */ #define R_X86_64_8 14 /* Add 8 bit zero extended symbol value */ #define R_X86_64_PC8 15 /* Add 8 bit signed extended pc relative symbol value */ #define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */ #define R_X86_64_DTPOFF64 17 /* Offset in TLS block */ #define R_X86_64_TPOFF64 18 /* Offset in static TLS block */ #define R_X86_64_TLSGD 19 /* PC relative offset to GD GOT entry */ #define R_X86_64_TLSLD 20 /* PC relative offset to LD GOT entry */ #define R_X86_64_DTPOFF32 21 /* Offset in TLS block */ #define R_X86_64_GOTTPOFF 22 /* PC relative offset to IE GOT entry */ #define R_X86_64_TPOFF32 23 /* Offset in static TLS block */ #endif /* !_SYS_ELF_COMMON_H_ */ Index: projects/binutils-2.17/sys/sys/sysctl.h =================================================================== --- projects/binutils-2.17/sys/sys/sysctl.h (revision 215829) +++ projects/binutils-2.17/sys/sys/sysctl.h (revision 215830) @@ -1,726 +1,727 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sysctl.h 8.1 (Berkeley) 6/2/93 * $FreeBSD$ */ #ifndef _SYS_SYSCTL_H_ #define _SYS_SYSCTL_H_ #include struct thread; /* * Definitions for sysctl call. The sysctl call uses a hierarchical name * for objects that can be examined or modified. The name is expressed as * a sequence of integers. Like a file path name, the meaning of each * component depends on its place in the hierarchy. The top-level and kern * identifiers are defined here, and other identifiers are defined in the * respective subsystem header files. */ #define CTL_MAXNAME 24 /* largest number of components supported */ /* * Each subsystem defined by sysctl defines a list of variables * for that subsystem. Each name is either a node with further * levels defined below it, or it is a leaf of some particular * type given below. Each sysctl level defines a set of name/type * pairs to be used by sysctl(8) in manipulating the subsystem. */ struct ctlname { char *ctl_name; /* subsystem name */ int ctl_type; /* type of name */ }; #define CTLTYPE 0xf /* Mask for the type */ #define CTLTYPE_NODE 1 /* name is a node */ #define CTLTYPE_INT 2 /* name describes an integer */ #define CTLTYPE_STRING 3 /* name describes a string */ #define CTLTYPE_QUAD 4 /* name describes a 64-bit number */ #define CTLTYPE_OPAQUE 5 /* name describes a structure */ #define CTLTYPE_STRUCT CTLTYPE_OPAQUE /* name describes a structure */ #define CTLTYPE_UINT 6 /* name describes an unsigned integer */ #define CTLTYPE_LONG 7 /* name describes a long */ #define CTLTYPE_ULONG 8 /* name describes an unsigned long */ #define CTLFLAG_RD 0x80000000 /* Allow reads of variable */ #define CTLFLAG_WR 0x40000000 /* Allow writes to the variable */ #define CTLFLAG_RW (CTLFLAG_RD|CTLFLAG_WR) #define CTLFLAG_NOLOCK 0x20000000 /* XXX Don't Lock */ #define CTLFLAG_ANYBODY 0x10000000 /* All users can set this var */ #define CTLFLAG_SECURE 0x08000000 /* Permit set only if securelevel<=0 */ #define CTLFLAG_PRISON 0x04000000 /* Prisoned roots can fiddle */ #define CTLFLAG_DYN 0x02000000 /* Dynamic oid - can be freed */ #define CTLFLAG_SKIP 0x01000000 /* Skip this sysctl when listing */ #define CTLMASK_SECURE 0x00F00000 /* Secure level */ #define CTLFLAG_TUN 0x00080000 /* Tunable variable */ #define CTLFLAG_MPSAFE 0x00040000 /* Handler is MP safe */ #define CTLFLAG_VNET 0x00020000 /* Prisons with vnet can fiddle */ #define CTLFLAG_RDTUN (CTLFLAG_RD|CTLFLAG_TUN) /* * Secure level. Note that CTLFLAG_SECURE == CTLFLAG_SECURE1. * * Secure when the securelevel is raised to at least N. */ #define CTLSHIFT_SECURE 20 #define CTLFLAG_SECURE1 (CTLFLAG_SECURE | (0 << CTLSHIFT_SECURE)) #define CTLFLAG_SECURE2 (CTLFLAG_SECURE | (1 << CTLSHIFT_SECURE)) #define CTLFLAG_SECURE3 (CTLFLAG_SECURE | (2 << CTLSHIFT_SECURE)) /* * USE THIS instead of a hardwired number from the categories below * to get dynamically assigned sysctl entries using the linker-set * technology. This is the way nearly all new sysctl variables should * be implemented. * e.g. SYSCTL_INT(_parent, OID_AUTO, name, CTLFLAG_RW, &variable, 0, ""); */ #define OID_AUTO (-1) /* * The starting number for dynamically-assigned entries. WARNING! * ALL static sysctl entries should have numbers LESS than this! */ #define CTL_AUTO_START 0x100 #ifdef _KERNEL #define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, int arg2, \ struct sysctl_req *req /* definitions for sysctl_req 'lock' member */ #define REQ_UNLOCKED 0 /* not locked and not wired */ #define REQ_LOCKED 1 /* locked and not wired */ #define REQ_WIRED 2 /* locked and wired */ /* definitions for sysctl_req 'flags' member */ #if defined(__amd64__) || defined(__ia64__) || defined(__powerpc64__) #define SCTL_MASK32 1 /* 32 bit emulation */ #endif /* * This describes the access space for a sysctl request. This is needed * so that we can use the interface from the kernel or from user-space. */ struct sysctl_req { struct thread *td; /* used for access checking */ int lock; /* locking/wiring state */ void *oldptr; size_t oldlen; size_t oldidx; int (*oldfunc)(struct sysctl_req *, const void *, size_t); void *newptr; size_t newlen; size_t newidx; int (*newfunc)(struct sysctl_req *, void *, size_t); size_t validlen; int flags; }; SLIST_HEAD(sysctl_oid_list, sysctl_oid); /* * This describes one "oid" in the MIB tree. Potentially more nodes can * be hidden behind it, expanded by the handler. */ struct sysctl_oid { struct sysctl_oid_list *oid_parent; SLIST_ENTRY(sysctl_oid) oid_link; int oid_number; u_int oid_kind; void *oid_arg1; int oid_arg2; const char *oid_name; int (*oid_handler)(SYSCTL_HANDLER_ARGS); const char *oid_fmt; int oid_refcnt; const char *oid_descr; }; #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l) #define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l) int sysctl_handle_int(SYSCTL_HANDLER_ARGS); int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS); int sysctl_handle_long(SYSCTL_HANDLER_ARGS); int sysctl_handle_quad(SYSCTL_HANDLER_ARGS); int sysctl_handle_intptr(SYSCTL_HANDLER_ARGS); int sysctl_handle_string(SYSCTL_HANDLER_ARGS); int sysctl_handle_opaque(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS); /* * These functions are used to add/remove an oid from the mib. */ void sysctl_register_oid(struct sysctl_oid *oidp); void sysctl_unregister_oid(struct sysctl_oid *oidp); /* Declare a static oid to allow child oids to be added to it. */ #define SYSCTL_DECL(name) \ extern struct sysctl_oid_list sysctl_##name##_children /* Hide these in macros */ #define SYSCTL_CHILDREN(oid_ptr) (struct sysctl_oid_list *) \ (oid_ptr)->oid_arg1 #define SYSCTL_CHILDREN_SET(oid_ptr, val) \ (oid_ptr)->oid_arg1 = (val); #define SYSCTL_STATIC_CHILDREN(oid_name) \ (&sysctl_##oid_name##_children) /* === Structs and macros related to context handling === */ /* All dynamically created sysctls can be tracked in a context list. */ struct sysctl_ctx_entry { struct sysctl_oid *entry; TAILQ_ENTRY(sysctl_ctx_entry) link; }; TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #define SYSCTL_NODE_CHILDREN(parent, name) \ sysctl_##parent##_##name##_children #ifndef NO_SYSCTL_DESCR #define __DESCR(d) d #else #define __DESCR(d) "" #endif /* This constructs a "raw" MIB oid. */ #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ static struct sysctl_oid sysctl__##parent##_##name = { \ &sysctl_##parent##_children, { NULL }, nbr, kind, \ a1, a2, #name, handler, fmt, 0, __DESCR(descr) }; \ DATA_SET(sysctl_set, sysctl__##parent##_##name) #define SYSCTL_ADD_OID(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ sysctl_add_oid(ctx, parent, nbr, name, kind, a1, a2, handler, fmt, __DESCR(descr)) /* This constructs a node from which other oids can hang. */ #define SYSCTL_NODE(parent, nbr, name, access, handler, descr) \ struct sysctl_oid_list SYSCTL_NODE_CHILDREN(parent, name); \ SYSCTL_OID(parent, nbr, name, CTLTYPE_NODE|(access), \ (void*)&SYSCTL_NODE_CHILDREN(parent, name), 0, handler, "N", descr) #define SYSCTL_ADD_NODE(ctx, parent, nbr, name, access, handler, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_NODE|(access), \ NULL, 0, handler, "N", __DESCR(descr)) /* Oid for a string. len can be 0 to indicate '\0' termination. */ #define SYSCTL_STRING(parent, nbr, name, access, arg, len, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|(access), \ arg, len, sysctl_handle_string, "A", descr) #define SYSCTL_ADD_STRING(ctx, parent, nbr, name, access, arg, len, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_STRING|(access), \ arg, len, sysctl_handle_string, "A", __DESCR(descr)) /* Oid for an int. If ptr is NULL, val is returned. */ #define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_int, "I", descr) #define SYSCTL_ADD_INT(ctx, parent, nbr, name, access, ptr, val, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_INT|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_int, "I", __DESCR(descr)) /* Oid for an unsigned int. If ptr is NULL, val is returned. */ #define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_int, "IU", descr) #define SYSCTL_ADD_UINT(ctx, parent, nbr, name, access, ptr, val, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_int, "IU", __DESCR(descr)) #define SYSCTL_XINT(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_int, "IX", descr) #define SYSCTL_ADD_XINT(ctx, parent, nbr, name, access, ptr, val, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_UINT|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_int, "IX", __DESCR(descr)) /* Oid for a long. The pointer must be non NULL. */ #define SYSCTL_LONG(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_LONG|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_long, "L", descr) #define SYSCTL_ADD_LONG(ctx, parent, nbr, name, access, ptr, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_LONG|CTLFLAG_MPSAFE|(access), \ ptr, 0, sysctl_handle_long, "L", __DESCR(descr)) /* Oid for an unsigned long. The pointer must be non NULL. */ #define SYSCTL_ULONG(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_long, "LU", __DESCR(descr)) #define SYSCTL_ADD_ULONG(ctx, parent, nbr, name, access, ptr, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \ ptr, 0, sysctl_handle_long, "LU", __DESCR(descr)) #define SYSCTL_XLONG(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_long, "LX", __DESCR(descr)) #define SYSCTL_ADD_XLONG(ctx, parent, nbr, name, access, ptr, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_ULONG|CTLFLAG_MPSAFE|(access), \ ptr, 0, sysctl_handle_long, "LX", __DESCR(descr)) /* Oid for a quad. The pointer must be non NULL. */ #define SYSCTL_QUAD(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_QUAD|CTLFLAG_MPSAFE|(access), \ ptr, val, sysctl_handle_quad, "Q", __DESCR(descr)) #define SYSCTL_ADD_QUAD(ctx, parent, nbr, name, access, ptr, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_QUAD|CTLFLAG_MPSAFE|(access), \ ptr, 0, sysctl_handle_quad, "Q", __DESCR(descr)) /* Oid for an opaque object. Specified by a pointer and a length. */ #define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \ ptr, len, sysctl_handle_opaque, fmt, descr) #define SYSCTL_ADD_OPAQUE(ctx, parent, nbr, name, access, ptr, len, fmt, descr)\ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access), \ ptr, len, sysctl_handle_opaque, fmt, __DESCR(descr)) /* Oid for a struct. Specified by a pointer and a type. */ #define SYSCTL_STRUCT(parent, nbr, name, access, ptr, type, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_OPAQUE|(access), \ ptr, sizeof(struct type), sysctl_handle_opaque, \ "S," #type, descr) #define SYSCTL_ADD_STRUCT(ctx, parent, nbr, name, access, ptr, type, descr) \ sysctl_add_oid(ctx, parent, nbr, name, CTLTYPE_OPAQUE|(access), \ ptr, sizeof(struct type), sysctl_handle_opaque, "S," #type, __DESCR(descr)) /* Oid for a procedure. Specified by a pointer and an arg. */ #define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ SYSCTL_OID(parent, nbr, name, (access), \ ptr, arg, handler, fmt, descr) #define SYSCTL_ADD_PROC(ctx, parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ sysctl_add_oid(ctx, parent, nbr, name, (access), \ ptr, arg, handler, fmt, __DESCR(descr)) /* * A macro to generate a read-only sysctl to indicate the presense of optional * kernel features. */ #define FEATURE(name, desc) \ SYSCTL_INT(_kern_features, OID_AUTO, name, CTLFLAG_RD, 0, 1, desc) #endif /* _KERNEL */ /* * Top-level identifiers */ #define CTL_UNSPEC 0 /* unused */ #define CTL_KERN 1 /* "high kernel": proc, limits */ #define CTL_VM 2 /* virtual memory */ #define CTL_VFS 3 /* filesystem, mount type is next */ #define CTL_NET 4 /* network, see socket.h */ #define CTL_DEBUG 5 /* debugging parameters */ #define CTL_HW 6 /* generic cpu/io */ #define CTL_MACHDEP 7 /* machine dependent */ #define CTL_USER 8 /* user-level */ #define CTL_P1003_1B 9 /* POSIX 1003.1B */ #define CTL_MAXID 10 /* number of valid top-level ids */ #define CTL_NAMES { \ { 0, 0 }, \ { "kern", CTLTYPE_NODE }, \ { "vm", CTLTYPE_NODE }, \ { "vfs", CTLTYPE_NODE }, \ { "net", CTLTYPE_NODE }, \ { "debug", CTLTYPE_NODE }, \ { "hw", CTLTYPE_NODE }, \ { "machdep", CTLTYPE_NODE }, \ { "user", CTLTYPE_NODE }, \ { "p1003_1b", CTLTYPE_NODE }, \ } /* * CTL_KERN identifiers */ #define KERN_OSTYPE 1 /* string: system version */ #define KERN_OSRELEASE 2 /* string: system release */ #define KERN_OSREV 3 /* int: system revision */ #define KERN_VERSION 4 /* string: compile time info */ #define KERN_MAXVNODES 5 /* int: max vnodes */ #define KERN_MAXPROC 6 /* int: max processes */ #define KERN_MAXFILES 7 /* int: max open files */ #define KERN_ARGMAX 8 /* int: max arguments to exec */ #define KERN_SECURELVL 9 /* int: system security level */ #define KERN_HOSTNAME 10 /* string: hostname */ #define KERN_HOSTID 11 /* int: host identifier */ #define KERN_CLOCKRATE 12 /* struct: struct clockrate */ #define KERN_VNODE 13 /* struct: vnode structures */ #define KERN_PROC 14 /* struct: process entries */ #define KERN_FILE 15 /* struct: file entries */ #define KERN_PROF 16 /* node: kernel profiling info */ #define KERN_POSIX1 17 /* int: POSIX.1 version */ #define KERN_NGROUPS 18 /* int: # of supplemental group ids */ #define KERN_JOB_CONTROL 19 /* int: is job control available */ #define KERN_SAVED_IDS 20 /* int: saved set-user/group-ID */ #define KERN_BOOTTIME 21 /* struct: time kernel was booted */ #define KERN_NISDOMAINNAME 22 /* string: YP domain name */ #define KERN_UPDATEINTERVAL 23 /* int: update process sleep time */ #define KERN_OSRELDATE 24 /* int: kernel release date */ #define KERN_NTP_PLL 25 /* node: NTP PLL control */ #define KERN_BOOTFILE 26 /* string: name of booted kernel */ #define KERN_MAXFILESPERPROC 27 /* int: max open files per proc */ #define KERN_MAXPROCPERUID 28 /* int: max processes per uid */ #define KERN_DUMPDEV 29 /* struct cdev *: device to dump on */ #define KERN_IPC 30 /* node: anything related to IPC */ #define KERN_DUMMY 31 /* unused */ #define KERN_PS_STRINGS 32 /* int: address of PS_STRINGS */ #define KERN_USRSTACK 33 /* int: address of USRSTACK */ #define KERN_LOGSIGEXIT 34 /* int: do we log sigexit procs? */ #define KERN_IOV_MAX 35 /* int: value of UIO_MAXIOV */ #define KERN_HOSTUUID 36 /* string: host UUID identifier */ #define KERN_ARND 37 /* int: from arc4rand() */ #define KERN_MAXID 38 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ { "ostype", CTLTYPE_STRING }, \ { "osrelease", CTLTYPE_STRING }, \ { "osrevision", CTLTYPE_INT }, \ { "version", CTLTYPE_STRING }, \ { "maxvnodes", CTLTYPE_INT }, \ { "maxproc", CTLTYPE_INT }, \ { "maxfiles", CTLTYPE_INT }, \ { "argmax", CTLTYPE_INT }, \ { "securelevel", CTLTYPE_INT }, \ { "hostname", CTLTYPE_STRING }, \ { "hostid", CTLTYPE_UINT }, \ { "clockrate", CTLTYPE_STRUCT }, \ { "vnode", CTLTYPE_STRUCT }, \ { "proc", CTLTYPE_STRUCT }, \ { "file", CTLTYPE_STRUCT }, \ { "profiling", CTLTYPE_NODE }, \ { "posix1version", CTLTYPE_INT }, \ { "ngroups", CTLTYPE_INT }, \ { "job_control", CTLTYPE_INT }, \ { "saved_ids", CTLTYPE_INT }, \ { "boottime", CTLTYPE_STRUCT }, \ { "nisdomainname", CTLTYPE_STRING }, \ { "update", CTLTYPE_INT }, \ { "osreldate", CTLTYPE_INT }, \ { "ntp_pll", CTLTYPE_NODE }, \ { "bootfile", CTLTYPE_STRING }, \ { "maxfilesperproc", CTLTYPE_INT }, \ { "maxprocperuid", CTLTYPE_INT }, \ { "ipc", CTLTYPE_NODE }, \ { "dummy", CTLTYPE_INT }, \ { "ps_strings", CTLTYPE_INT }, \ { "usrstack", CTLTYPE_INT }, \ { "logsigexit", CTLTYPE_INT }, \ { "iov_max", CTLTYPE_INT }, \ { "hostuuid", CTLTYPE_STRING }, \ + { "arc4rand", CTLTYPE_OPAQUE }, \ } /* * CTL_VFS identifiers */ #define CTL_VFS_NAMES { \ { "vfsconf", CTLTYPE_STRUCT }, \ } /* * KERN_PROC subtypes */ #define KERN_PROC_ALL 0 /* everything */ #define KERN_PROC_PID 1 /* by process id */ #define KERN_PROC_PGRP 2 /* by process group id */ #define KERN_PROC_SESSION 3 /* by session of pid */ #define KERN_PROC_TTY 4 /* by controlling tty */ #define KERN_PROC_UID 5 /* by effective uid */ #define KERN_PROC_RUID 6 /* by real uid */ #define KERN_PROC_ARGS 7 /* get/set arguments/proctitle */ #define KERN_PROC_PROC 8 /* only return procs */ #define KERN_PROC_SV_NAME 9 /* get syscall vector name */ #define KERN_PROC_RGID 10 /* by real group id */ #define KERN_PROC_GID 11 /* by effective group id */ #define KERN_PROC_PATHNAME 12 /* path to executable */ #define KERN_PROC_OVMMAP 13 /* Old VM map entries for process */ #define KERN_PROC_OFILEDESC 14 /* Old file descriptors for process */ #define KERN_PROC_KSTACK 15 /* Kernel stacks for process */ #define KERN_PROC_INC_THREAD 0x10 /* * modifier for pid, pgrp, tty, * uid, ruid, gid, rgid and proc * This effectively uses 16-31 */ #define KERN_PROC_VMMAP 32 /* VM map entries for process */ #define KERN_PROC_FILEDESC 33 /* File descriptors for process */ #define KERN_PROC_GROUPS 34 /* process groups */ /* * KERN_IPC identifiers */ #define KIPC_MAXSOCKBUF 1 /* int: max size of a socket buffer */ #define KIPC_SOCKBUF_WASTE 2 /* int: wastage factor in sockbuf */ #define KIPC_SOMAXCONN 3 /* int: max length of connection q */ #define KIPC_MAX_LINKHDR 4 /* int: max length of link header */ #define KIPC_MAX_PROTOHDR 5 /* int: max length of network header */ #define KIPC_MAX_HDR 6 /* int: max total length of headers */ #define KIPC_MAX_DATALEN 7 /* int: max length of data? */ /* * CTL_HW identifiers */ #define HW_MACHINE 1 /* string: machine class */ #define HW_MODEL 2 /* string: specific machine model */ #define HW_NCPU 3 /* int: number of cpus */ #define HW_BYTEORDER 4 /* int: machine byte order */ #define HW_PHYSMEM 5 /* int: total memory */ #define HW_USERMEM 6 /* int: non-kernel memory */ #define HW_PAGESIZE 7 /* int: software page size */ #define HW_DISKNAMES 8 /* strings: disk drive names */ #define HW_DISKSTATS 9 /* struct: diskstats[] */ #define HW_FLOATINGPT 10 /* int: has HW floating point? */ #define HW_MACHINE_ARCH 11 /* string: machine architecture */ #define HW_REALMEM 12 /* int: 'real' memory */ #define HW_MAXID 13 /* number of valid hw ids */ #define CTL_HW_NAMES { \ { 0, 0 }, \ { "machine", CTLTYPE_STRING }, \ { "model", CTLTYPE_STRING }, \ { "ncpu", CTLTYPE_INT }, \ { "byteorder", CTLTYPE_INT }, \ { "physmem", CTLTYPE_ULONG }, \ { "usermem", CTLTYPE_ULONG }, \ { "pagesize", CTLTYPE_INT }, \ { "disknames", CTLTYPE_STRUCT }, \ { "diskstats", CTLTYPE_STRUCT }, \ { "floatingpoint", CTLTYPE_INT }, \ { "machine_arch", CTLTYPE_STRING }, \ { "realmem", CTLTYPE_ULONG }, \ } /* * CTL_USER definitions */ #define USER_CS_PATH 1 /* string: _CS_PATH */ #define USER_BC_BASE_MAX 2 /* int: BC_BASE_MAX */ #define USER_BC_DIM_MAX 3 /* int: BC_DIM_MAX */ #define USER_BC_SCALE_MAX 4 /* int: BC_SCALE_MAX */ #define USER_BC_STRING_MAX 5 /* int: BC_STRING_MAX */ #define USER_COLL_WEIGHTS_MAX 6 /* int: COLL_WEIGHTS_MAX */ #define USER_EXPR_NEST_MAX 7 /* int: EXPR_NEST_MAX */ #define USER_LINE_MAX 8 /* int: LINE_MAX */ #define USER_RE_DUP_MAX 9 /* int: RE_DUP_MAX */ #define USER_POSIX2_VERSION 10 /* int: POSIX2_VERSION */ #define USER_POSIX2_C_BIND 11 /* int: POSIX2_C_BIND */ #define USER_POSIX2_C_DEV 12 /* int: POSIX2_C_DEV */ #define USER_POSIX2_CHAR_TERM 13 /* int: POSIX2_CHAR_TERM */ #define USER_POSIX2_FORT_DEV 14 /* int: POSIX2_FORT_DEV */ #define USER_POSIX2_FORT_RUN 15 /* int: POSIX2_FORT_RUN */ #define USER_POSIX2_LOCALEDEF 16 /* int: POSIX2_LOCALEDEF */ #define USER_POSIX2_SW_DEV 17 /* int: POSIX2_SW_DEV */ #define USER_POSIX2_UPE 18 /* int: POSIX2_UPE */ #define USER_STREAM_MAX 19 /* int: POSIX2_STREAM_MAX */ #define USER_TZNAME_MAX 20 /* int: POSIX2_TZNAME_MAX */ #define USER_MAXID 21 /* number of valid user ids */ #define CTL_USER_NAMES { \ { 0, 0 }, \ { "cs_path", CTLTYPE_STRING }, \ { "bc_base_max", CTLTYPE_INT }, \ { "bc_dim_max", CTLTYPE_INT }, \ { "bc_scale_max", CTLTYPE_INT }, \ { "bc_string_max", CTLTYPE_INT }, \ { "coll_weights_max", CTLTYPE_INT }, \ { "expr_nest_max", CTLTYPE_INT }, \ { "line_max", CTLTYPE_INT }, \ { "re_dup_max", CTLTYPE_INT }, \ { "posix2_version", CTLTYPE_INT }, \ { "posix2_c_bind", CTLTYPE_INT }, \ { "posix2_c_dev", CTLTYPE_INT }, \ { "posix2_char_term", CTLTYPE_INT }, \ { "posix2_fort_dev", CTLTYPE_INT }, \ { "posix2_fort_run", CTLTYPE_INT }, \ { "posix2_localedef", CTLTYPE_INT }, \ { "posix2_sw_dev", CTLTYPE_INT }, \ { "posix2_upe", CTLTYPE_INT }, \ { "stream_max", CTLTYPE_INT }, \ { "tzname_max", CTLTYPE_INT }, \ } #define CTL_P1003_1B_ASYNCHRONOUS_IO 1 /* boolean */ #define CTL_P1003_1B_MAPPED_FILES 2 /* boolean */ #define CTL_P1003_1B_MEMLOCK 3 /* boolean */ #define CTL_P1003_1B_MEMLOCK_RANGE 4 /* boolean */ #define CTL_P1003_1B_MEMORY_PROTECTION 5 /* boolean */ #define CTL_P1003_1B_MESSAGE_PASSING 6 /* boolean */ #define CTL_P1003_1B_PRIORITIZED_IO 7 /* boolean */ #define CTL_P1003_1B_PRIORITY_SCHEDULING 8 /* boolean */ #define CTL_P1003_1B_REALTIME_SIGNALS 9 /* boolean */ #define CTL_P1003_1B_SEMAPHORES 10 /* boolean */ #define CTL_P1003_1B_FSYNC 11 /* boolean */ #define CTL_P1003_1B_SHARED_MEMORY_OBJECTS 12 /* boolean */ #define CTL_P1003_1B_SYNCHRONIZED_IO 13 /* boolean */ #define CTL_P1003_1B_TIMERS 14 /* boolean */ #define CTL_P1003_1B_AIO_LISTIO_MAX 15 /* int */ #define CTL_P1003_1B_AIO_MAX 16 /* int */ #define CTL_P1003_1B_AIO_PRIO_DELTA_MAX 17 /* int */ #define CTL_P1003_1B_DELAYTIMER_MAX 18 /* int */ #define CTL_P1003_1B_MQ_OPEN_MAX 19 /* int */ #define CTL_P1003_1B_PAGESIZE 20 /* int */ #define CTL_P1003_1B_RTSIG_MAX 21 /* int */ #define CTL_P1003_1B_SEM_NSEMS_MAX 22 /* int */ #define CTL_P1003_1B_SEM_VALUE_MAX 23 /* int */ #define CTL_P1003_1B_SIGQUEUE_MAX 24 /* int */ #define CTL_P1003_1B_TIMER_MAX 25 /* int */ #define CTL_P1003_1B_MAXID 26 #define CTL_P1003_1B_NAMES { \ { 0, 0 }, \ { "asynchronous_io", CTLTYPE_INT }, \ { "mapped_files", CTLTYPE_INT }, \ { "memlock", CTLTYPE_INT }, \ { "memlock_range", CTLTYPE_INT }, \ { "memory_protection", CTLTYPE_INT }, \ { "message_passing", CTLTYPE_INT }, \ { "prioritized_io", CTLTYPE_INT }, \ { "priority_scheduling", CTLTYPE_INT }, \ { "realtime_signals", CTLTYPE_INT }, \ { "semaphores", CTLTYPE_INT }, \ { "fsync", CTLTYPE_INT }, \ { "shared_memory_objects", CTLTYPE_INT }, \ { "synchronized_io", CTLTYPE_INT }, \ { "timers", CTLTYPE_INT }, \ { "aio_listio_max", CTLTYPE_INT }, \ { "aio_max", CTLTYPE_INT }, \ { "aio_prio_delta_max", CTLTYPE_INT }, \ { "delaytimer_max", CTLTYPE_INT }, \ { "mq_open_max", CTLTYPE_INT }, \ { "pagesize", CTLTYPE_INT }, \ { "rtsig_max", CTLTYPE_INT }, \ { "nsems_max", CTLTYPE_INT }, \ { "sem_value_max", CTLTYPE_INT }, \ { "sigqueue_max", CTLTYPE_INT }, \ { "timer_max", CTLTYPE_INT }, \ } #ifdef _KERNEL /* * Declare some common oids. */ extern struct sysctl_oid_list sysctl__children; SYSCTL_DECL(_kern); SYSCTL_DECL(_kern_features); SYSCTL_DECL(_kern_ipc); SYSCTL_DECL(_kern_proc); SYSCTL_DECL(_kern_sched); SYSCTL_DECL(_kern_sched_stats); SYSCTL_DECL(_sysctl); SYSCTL_DECL(_vm); SYSCTL_DECL(_vm_stats); SYSCTL_DECL(_vm_stats_misc); SYSCTL_DECL(_vfs); SYSCTL_DECL(_net); SYSCTL_DECL(_debug); SYSCTL_DECL(_debug_sizeof); SYSCTL_DECL(_dev); SYSCTL_DECL(_hw); SYSCTL_DECL(_hw_bus); SYSCTL_DECL(_hw_bus_devices); SYSCTL_DECL(_hw_bus_info); SYSCTL_DECL(_machdep); SYSCTL_DECL(_user); SYSCTL_DECL(_compat); SYSCTL_DECL(_regression); SYSCTL_DECL(_security); SYSCTL_DECL(_security_bsd); extern char machine[]; extern char osrelease[]; extern char ostype[]; extern char kern_ident[]; /* Dynamic oid handling */ struct sysctl_oid *sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int nbr, const char *name, int kind, void *arg1, int arg2, int (*handler) (SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr); void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name); int sysctl_move_oid(struct sysctl_oid *oidp, struct sysctl_oid_list *parent); int sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse); int sysctl_ctx_init(struct sysctl_ctx_list *clist); int sysctl_ctx_free(struct sysctl_ctx_list *clist); struct sysctl_ctx_entry *sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp); struct sysctl_ctx_entry *sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp); int sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp); int kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags); int kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags); int userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, int flags); int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, int *nindx, struct sysctl_req *req); void sysctl_lock(void); void sysctl_unlock(void); int sysctl_wire_old_buffer(struct sysctl_req *req, size_t len); struct sbuf; struct sbuf *sbuf_new_for_sysctl(struct sbuf *, char *, int, struct sysctl_req *); #else /* !_KERNEL */ #include __BEGIN_DECLS int sysctl(const int *, u_int, void *, size_t *, const void *, size_t); int sysctlbyname(const char *, void *, size_t *, const void *, size_t); int sysctlnametomib(const char *, int *, size_t *); __END_DECLS #endif /* _KERNEL */ #endif /* !_SYS_SYSCTL_H_ */ Index: projects/binutils-2.17/sys/vm/vm_object.c =================================================================== --- projects/binutils-2.17/sys/vm/vm_object.c (revision 215829) +++ projects/binutils-2.17/sys/vm/vm_object.c (revision 215830) @@ -1,2231 +1,2233 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory object module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include /* for curproc, pageproc */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int old_msync; SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags); static void vm_object_qcollapse(vm_object_t object); static void vm_object_vndeallocate(vm_object_t object); /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given * page of memory exists within exactly one object. * * An object is only deallocated when all "references" * are given up. Only one "reference" to a given * region of an object should be writeable. * * Associated with each object is a list of all resident * memory pages belonging to that object; this list is * maintained by the "vm_page" module, and locked by the object's * lock. * * Each object also records a "pager" routine which is * used to retrieve (and store) pages to the proper backing * storage. In addition, objects may be backed by other * objects from which they were virtual-copied. * * The only items within the object structure which are * modified after time of creation are: * reference count locked by object's lock * pager routine locked by object's lock * */ struct object_q vm_object_list; struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; struct vm_object kmem_object_store; SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); static long object_collapses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD, &object_collapses, 0, "VM object collapses"); static long object_bypasses; SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, &object_bypasses, 0, "VM object bypasses"); static uma_zone_t obj_zone; static int vm_object_zinit(void *mem, int size, int flags); #ifdef INVARIANTS static void vm_object_zdtor(void *mem, int size, void *arg); static void vm_object_zdtor(void *mem, int size, void *arg) { vm_object_t object; object = (vm_object_t)mem; KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages", object)); #if VM_NRESERVLEVEL > 0 KASSERT(LIST_EMPTY(&object->rvq), ("object %p has reservations", object)); #endif KASSERT(object->cache == NULL, ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); KASSERT(object->resident_page_count == 0, ("object %p resident_page_count = %d", object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); } #endif static int vm_object_zinit(void *mem, int size, int flags) { vm_object_t object; object = (vm_object_t)mem; bzero(&object->mtx, sizeof(object->mtx)); VM_OBJECT_LOCK_INIT(object, "standard object"); /* These are true for any object that has been freed */ object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; return (0); } void _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) { TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); object->root = NULL; object->type = type; object->size = size; object->generation = 1; object->ref_count = 1; object->memattr = VM_MEMATTR_DEFAULT; object->flags = 0; object->uip = NULL; object->charge = 0; if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) object->flags = OBJ_ONEMAPPING; object->pg_color = 0; object->handle = NULL; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif object->cache = NULL; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); mtx_unlock(&vm_object_list_mtx); } /* * vm_object_init: * * Initialize the VM objects module. */ void vm_object_init(void) { TAILQ_INIT(&vm_object_list); mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object"); _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kernel_object); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object"); _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); #if VM_NRESERVLEVEL > 0 kmem_object->flags |= OBJ_COLORED; kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif /* * The lock portion of struct vm_object must be type stable due * to vm_pageout_fallback_object_lock locking a vm object * without holding any references to it. */ obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, #ifdef INVARIANTS vm_object_zdtor, #else NULL, #endif vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE); } void vm_object_clear_flag(vm_object_t object, u_short bits) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); object->flags &= ~bits; } /* * Sets the default memory attribute for the specified object. Pages * that are allocated to this object are by default assigned this memory * attribute. * * Presently, this function must be called before any pages are allocated * to the object. In the future, this requirement may be relaxed for * "default" and "swap" objects. */ int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); switch (object->type) { case OBJT_DEFAULT: case OBJT_DEVICE: case OBJT_PHYS: case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: if (!TAILQ_EMPTY(&object->memq)) return (KERN_FAILURE); break; case OBJT_DEAD: return (KERN_INVALID_ARGUMENT); } object->memattr = memattr; return (KERN_SUCCESS); } void vm_object_pip_add(vm_object_t object, short i) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); object->paging_in_progress += i; } void vm_object_pip_subtract(vm_object_t object, short i) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); object->paging_in_progress -= i; } void vm_object_pip_wakeup(vm_object_t object) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); object->paging_in_progress--; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wakeupn(vm_object_t object, short i) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (i) object->paging_in_progress -= i; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { vm_object_clear_flag(object, OBJ_PIPWNT); wakeup(object); } } void vm_object_pip_wait(vm_object_t object, char *waitid) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0); } } /* * vm_object_allocate: * * Returns a new object with the given size. */ vm_object_t vm_object_allocate(objtype_t type, vm_pindex_t size) { vm_object_t object; object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(type, size, object); return (object); } /* * vm_object_reference: * * Gets another reference to the given object. Note: OBJ_DEAD * objects can be referenced during final cleaning. */ void vm_object_reference(vm_object_t object) { if (object == NULL) return; VM_OBJECT_LOCK(object); vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); } /* * vm_object_reference_locked: * * Gets another reference to the given object. * * The object must be locked. */ void vm_object_reference_locked(vm_object_t object) { struct vnode *vp; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); object->ref_count++; if (object->type == OBJT_VNODE) { vp = object->handle; vref(vp); } } /* * Handle deallocating an object of type OBJT_VNODE. */ static void vm_object_vndeallocate(vm_object_t object) { struct vnode *vp = (struct vnode *) object->handle; VFS_ASSERT_GIANT(vp->v_mount); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_VNODE, ("vm_object_vndeallocate: not a vnode object")); KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); #ifdef INVARIANTS if (object->ref_count == 0) { vprint("vm_object_vndeallocate", vp); panic("vm_object_vndeallocate: bad object reference count"); } #endif object->ref_count--; if (object->ref_count == 0) { mp_fixme("Unlocked vflag access."); vp->v_vflag &= ~VV_TEXT; } VM_OBJECT_UNLOCK(object); /* * vrele may need a vop lock */ vrele(vp); } /* * vm_object_deallocate: * * Release a reference to the specified object, * gained either through a vm_object_allocate * or a vm_object_reference call. When all references * are gone, storage associated with this object * may be relinquished. * * No object may be locked. */ void vm_object_deallocate(vm_object_t object) { vm_object_t temp; while (object != NULL) { int vfslocked; vfslocked = 0; restart: VM_OBJECT_LOCK(object); if (object->type == OBJT_VNODE) { struct vnode *vp = (struct vnode *) object->handle; /* * Conditionally acquire Giant for a vnode-backed * object. We have to be careful since the type of * a vnode object can change while the object is * unlocked. */ if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) { vfslocked = 1; if (!mtx_trylock(&Giant)) { VM_OBJECT_UNLOCK(object); mtx_lock(&Giant); goto restart; } } vm_object_vndeallocate(object); VFS_UNLOCK_GIANT(vfslocked); return; } else /* * This is to handle the case that the object * changed type while we dropped its lock to * obtain Giant. */ VFS_UNLOCK_GIANT(vfslocked); KASSERT(object->ref_count != 0, ("vm_object_deallocate: object deallocated too many times: %d", object->type)); /* * If the reference count goes to 0 we start calling * vm_object_terminate() on the object chain. * A ref count of 1 may be a special case depending on the * shadow count being 0 or 1. */ object->ref_count--; if (object->ref_count > 1) { VM_OBJECT_UNLOCK(object); return; } else if (object->ref_count == 1) { if (object->shadow_count == 0 && object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_set_flag(object, OBJ_ONEMAPPING); } else if ((object->shadow_count == 1) && (object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; robject = LIST_FIRST(&object->shadow_head); KASSERT(robject != NULL, ("vm_object_deallocate: ref_count: %d, shadow_count: %d", object->ref_count, object->shadow_count)); if (!VM_OBJECT_TRYLOCK(robject)) { /* * Avoid a potential deadlock. */ object->ref_count++; VM_OBJECT_UNLOCK(object); /* * More likely than not the thread * holding robject's lock has lower * priority than the current thread. * Let the lower priority thread run. */ pause("vmo_de", 1); continue; } /* * Collapse object into its shadow unless its * shadow is dead. In that case, object will * be deallocated by the thread that is * deallocating its shadow. */ if ((robject->flags & OBJ_DEAD) == 0 && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || robject->type == OBJT_SWAP)) { robject->ref_count++; retry: if (robject->paging_in_progress) { VM_OBJECT_UNLOCK(object); vm_object_pip_wait(robject, "objde1"); temp = robject->backing_object; if (object == temp) { VM_OBJECT_LOCK(object); goto retry; } } else if (object->paging_in_progress) { VM_OBJECT_UNLOCK(robject); object->flags |= OBJ_PIPWNT; msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "objde2", 0); VM_OBJECT_LOCK(robject); temp = robject->backing_object; if (object == temp) { VM_OBJECT_LOCK(object); goto retry; } } else VM_OBJECT_UNLOCK(object); if (robject->ref_count == 1) { robject->ref_count--; object = robject; goto doterm; } object = robject; vm_object_collapse(object); VM_OBJECT_UNLOCK(object); continue; } VM_OBJECT_UNLOCK(robject); } VM_OBJECT_UNLOCK(object); return; } doterm: temp = object->backing_object; if (temp != NULL) { VM_OBJECT_LOCK(temp); LIST_REMOVE(object, shadow_list); temp->shadow_count--; VM_OBJECT_UNLOCK(temp); object->backing_object = NULL; } /* * Don't double-terminate, we could be in a termination * recursion due to the terminate having to sync data * to disk. */ if ((object->flags & OBJ_DEAD) == 0) vm_object_terminate(object); else VM_OBJECT_UNLOCK(object); object = temp; } } /* * vm_object_destroy removes the object from the global object list * and frees the space for the object. */ void vm_object_destroy(vm_object_t object) { /* * Remove the object from the global object list. */ mtx_lock(&vm_object_list_mtx); TAILQ_REMOVE(&vm_object_list, object, object_list); mtx_unlock(&vm_object_list_mtx); /* * Release the allocation charge. */ if (object->uip != NULL) { KASSERT(object->type == OBJT_DEFAULT || object->type == OBJT_SWAP, ("vm_object_terminate: non-swap obj %p has uip", object)); swap_release_by_uid(object->charge, object->uip); object->charge = 0; uifree(object->uip); object->uip = NULL; } /* * Free the space for the object. */ uma_zfree(obj_zone, object); } /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. * This routine may block. */ void vm_object_terminate(vm_object_t object) { vm_page_t p, p_next; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); /* * Make sure no one uses us. */ vm_object_set_flag(object, OBJ_DEAD); /* * wait for the pageout daemon to be done with the object */ vm_object_pip_wait(object, "objtrm"); KASSERT(!object->paging_in_progress, ("vm_object_terminate: pageout in progress")); /* * Clean and free the pages, as appropriate. All references to the * object are gone, so we don't need to lock it. */ if (object->type == OBJT_VNODE) { struct vnode *vp = (struct vnode *)object->handle; /* * Clean pages and flush buffers. */ vm_object_page_clean(object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(object); vinvalbuf(vp, V_SAVE, 0, 0); VM_OBJECT_LOCK(object); } KASSERT(object->ref_count == 0, ("vm_object_terminate: object with references, ref_count=%d", object->ref_count)); /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p", p)); vm_page_lock(p); /* * Optimize the page's removal from the object by resetting * its "object" field. Specifically, if the page is not * wired, then the effect of this assignment is that * vm_page_free()'s call to vm_page_remove() will return * immediately without modifying the page or the object. */ p->object = NULL; if (p->wire_count == 0) { vm_page_free(p); PCPU_INC(cnt.v_pfree); } vm_page_unlock(p); } /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { object->root = NULL; TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif if (__predict_false(object->cache != NULL)) vm_page_cache_free(object, 0, 0); /* * Let the pager know object is dead. */ vm_pager_deallocate(object); VM_OBJECT_UNLOCK(object); vm_object_destroy(object); } /* * vm_object_page_clean * * Clean all dirty pages in the specified range of object. Leaves page * on whatever queue it is currently on. If NOSYNC is set then do not * write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC), * leaving the object dirty. * * When stuffing pages asynchronously, allow clustering. XXX we need a * synchronous clustering mode implementation. * * Odd semantics: if start == end, we clean everything. * * The object must be locked. */ void vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags) { vm_page_t np, p; vm_pindex_t pi, tend; int clearobjflags, curgeneration, n, pagerflags; mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_VNODE, ("Not a vnode object")); if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 || object->resident_page_count == 0) return; pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0; tend = (end == 0) ? object->size : end; vm_object_set_flag(object, OBJ_CLEANING); /* * Make the page read-only so we can then clear the object flags. * * However, if this is a nosync mmap then the object is likely to * stay dirty so do not mess with the page and do not clear the * object flags. */ clearobjflags = 1; for (p = vm_page_find_least(object, start); p != NULL && p->pindex < tend; p = TAILQ_NEXT(p, listq)) { if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) clearobjflags = 0; else pmap_remove_write(p); } if (clearobjflags && (start == 0) && (tend == object->size)) vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY); rescan: curgeneration = object->generation; for (p = vm_page_find_least(object, start); p != NULL; p = np) { pi = p->pindex; if (pi >= tend) break; np = TAILQ_NEXT(p, listq); if (p->valid == 0) continue; - while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { + if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { if (object->generation != curgeneration) goto rescan; + np = vm_page_find_least(object, pi); + continue; } vm_page_test_dirty(p); if (p->dirty == 0) continue; /* * If we have been asked to skip nosync pages and this is a * nosync page, skip it. Note that the object flags were * not cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) continue; n = vm_object_page_collect_flush(object, p, pagerflags); if (object->generation != curgeneration) goto rescan; np = vm_page_find_least(object, pi + n); } #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); #endif vm_object_clear_flag(object, OBJ_CLEANING); } static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags) { vm_page_t ma[vm_pageout_page_count], p_first, tp; int count, i, mreq, runlen; mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); vm_page_lock_assert(p, MA_NOTOWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); count = 1; mreq = 0; for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0) break; vm_page_test_dirty(tp); if (tp->dirty == 0) break; } for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0) break; vm_page_test_dirty(tp); if (tp->dirty == 0) break; p_first = tp; mreq++; } for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++) ma[i] = tp; vm_pageout_flush(ma, count, pagerflags, mreq, &runlen); return (runlen); } /* * Note that there is absolutely no sense in writing out * anonymous objects, so we track down the vnode object * to write out. * We invalidate (remove) all pages from the address space * for semantic correctness. * * Note: certain anonymous maps, such as MAP_NOSYNC maps, * may start out with a NULL object. */ void vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, boolean_t syncio, boolean_t invalidate) { vm_object_t backing_object; struct vnode *vp; struct mount *mp; int flags; if (object == NULL) return; VM_OBJECT_LOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_LOCK(backing_object); offset += object->backing_object_offset; VM_OBJECT_UNLOCK(object); object = backing_object; if (object->size < OFF_TO_IDX(offset + size)) size = IDX_TO_OFF(object->size) - offset; } /* * Flush pages if writing is allowed, invalidate them * if invalidation requested. Pages undergoing I/O * will be ignored by vm_object_page_remove(). * * We cannot lock the vnode and then wait for paging * to complete without deadlocking against vm_fault. * Instead we simply call vm_object_page_remove() and * allow it to block internally on a page-by-page * basis when it encounters pages undergoing async * I/O. */ if (object->type == OBJT_VNODE && (object->flags & OBJ_MIGHTBEDIRTY) != 0) { int vfslocked; vp = object->handle; VM_OBJECT_UNLOCK(object); (void) vn_start_write(vp, &mp, V_WAIT); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? OBJPC_INVAL : 0; VM_OBJECT_LOCK(object); vm_object_page_clean(object, OFF_TO_IDX(offset), OFF_TO_IDX(offset + size + PAGE_MASK), flags); VM_OBJECT_UNLOCK(object); VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); vn_finished_write(mp); VM_OBJECT_LOCK(object); } if ((object->type == OBJT_VNODE || object->type == OBJT_DEVICE) && invalidate) { boolean_t purge; purge = old_msync || (object->type == OBJT_DEVICE); vm_object_page_remove(object, OFF_TO_IDX(offset), OFF_TO_IDX(offset + size + PAGE_MASK), purge ? FALSE : TRUE); } VM_OBJECT_UNLOCK(object); } /* * vm_object_madvise: * * Implements the madvise function at the object/page level. * * MADV_WILLNEED (any object) * * Activate the specified pages if they are resident. * * MADV_DONTNEED (any object) * * Deactivate the specified pages if they are resident. * * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, * OBJ_ONEMAPPING only) * * Deactivate and clean the specified pages if they are * resident. This permits the process to reuse the pages * without faulting or the kernel to reclaim the pages * without I/O. */ void vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise) { vm_pindex_t end, tpindex; vm_object_t backing_object, tobject; vm_page_t m; if (object == NULL) return; VM_OBJECT_LOCK(object); end = pindex + count; /* * Locate and adjust resident pages */ for (; pindex < end; pindex += 1) { relookup: tobject = object; tpindex = pindex; shadowlookup: /* * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages * and those pages must be OBJ_ONEMAPPING. */ if (advise == MADV_FREE) { if ((tobject->type != OBJT_DEFAULT && tobject->type != OBJT_SWAP) || (tobject->flags & OBJ_ONEMAPPING) == 0) { goto unlock_tobject; } } else if (tobject->type == OBJT_PHYS) goto unlock_tobject; m = vm_page_lookup(tobject, tpindex); if (m == NULL && advise == MADV_WILLNEED) { /* * If the page is cached, reactivate it. */ m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED | VM_ALLOC_NOBUSY); } if (m == NULL) { /* * There may be swap even if there is no backing page */ if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); /* * next object */ backing_object = tobject->backing_object; if (backing_object == NULL) goto unlock_tobject; VM_OBJECT_LOCK(backing_object); tpindex += OFF_TO_IDX(tobject->backing_object_offset); if (tobject != object) VM_OBJECT_UNLOCK(tobject); tobject = backing_object; goto shadowlookup; } else if (m->valid != VM_PAGE_BITS_ALL) goto unlock_tobject; /* * If the page is not in a normal state, skip it. */ vm_page_lock(m); if (m->hold_count != 0 || m->wire_count != 0) { vm_page_unlock(m); goto unlock_tobject; } KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("vm_object_madvise: page %p is not managed", m)); if ((m->oflags & VPO_BUSY) || m->busy) { if (advise == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_lock_queues(); vm_page_flag_set(m, PG_REFERENCED); vm_page_unlock_queues(); } vm_page_unlock(m); if (object != tobject) VM_OBJECT_UNLOCK(object); m->oflags |= VPO_WANTED; msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", 0); VM_OBJECT_LOCK(object); goto relookup; } if (advise == MADV_WILLNEED) { vm_page_activate(m); } else if (advise == MADV_DONTNEED) { vm_page_dontneed(m); } else if (advise == MADV_FREE) { /* * Mark the page clean. This will allow the page * to be freed up by the system. However, such pages * are often reused quickly by malloc()/free() * so we do not do anything that would cause * a page fault if we can help it. * * Specifically, we do not try to actually free * the page now nor do we try to put it in the * cache (which would cause a page fault on reuse). * * But we do make the page is freeable as we * can without actually taking the step of unmapping * it. */ pmap_clear_modify(m); m->dirty = 0; m->act_count = 0; vm_page_dontneed(m); } vm_page_unlock(m); if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); unlock_tobject: if (tobject != object) VM_OBJECT_UNLOCK(tobject); } VM_OBJECT_UNLOCK(object); } /* * vm_object_shadow: * * Create a new object which is backed by the * specified existing object range. The source * object reference is deallocated. * * The new object and offset into that object * are returned in the source parameters. */ void vm_object_shadow( vm_object_t *object, /* IN/OUT */ vm_ooffset_t *offset, /* IN/OUT */ vm_size_t length) { vm_object_t source; vm_object_t result; source = *object; /* * Don't create the new object if the old object isn't shared. */ if (source != NULL) { VM_OBJECT_LOCK(source); if (source->ref_count == 1 && source->handle == NULL && (source->type == OBJT_DEFAULT || source->type == OBJT_SWAP)) { VM_OBJECT_UNLOCK(source); return; } VM_OBJECT_UNLOCK(source); } /* * Allocate a new object with the given length. */ result = vm_object_allocate(OBJT_DEFAULT, length); /* * The new object shadows the source object, adding a reference to it. * Our caller changes his reference to point to the new object, * removing a reference to the source object. Net result: no change * of reference count. * * Try to optimize the result object's page color when shadowing * in order to maintain page coloring consistency in the combined * shadowed object. */ result->backing_object = source; /* * Store the offset into the source object, and fix up the offset into * the new object. */ result->backing_object_offset = *offset; if (source != NULL) { VM_OBJECT_LOCK(source); LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); source->shadow_count++; #if VM_NRESERVLEVEL > 0 result->flags |= source->flags & OBJ_COLORED; result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - 1)) - 1); #endif VM_OBJECT_UNLOCK(source); } /* * Return the new things */ *offset = 0; *object = result; } /* * vm_object_split: * * Split the pages in a map entry into a new object. This affords * easier removal of unused pages, and keeps object inheritance from * being a negative impact on memory usage. */ void vm_object_split(vm_map_entry_t entry) { vm_page_t m, m_next; vm_object_t orig_object, new_object, source; vm_pindex_t idx, offidxstart; vm_size_t size; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) return; if (orig_object->ref_count <= 1) return; VM_OBJECT_UNLOCK(orig_object); offidxstart = OFF_TO_IDX(entry->offset); size = atop(entry->end - entry->start); /* * If swap_pager_copy() is later called, it will convert new_object * into a swap object. */ new_object = vm_object_allocate(OBJT_DEFAULT, size); /* * At this point, the new object is still private, so the order in * which the original and new objects are locked does not matter. */ VM_OBJECT_LOCK(new_object); VM_OBJECT_LOCK(orig_object); source = orig_object->backing_object; if (source != NULL) { VM_OBJECT_LOCK(source); if ((source->flags & OBJ_DEAD) != 0) { VM_OBJECT_UNLOCK(source); VM_OBJECT_UNLOCK(orig_object); VM_OBJECT_UNLOCK(new_object); vm_object_deallocate(new_object); VM_OBJECT_LOCK(orig_object); return; } LIST_INSERT_HEAD(&source->shadow_head, new_object, shadow_list); source->shadow_count++; vm_object_reference_locked(source); /* for new_object */ vm_object_clear_flag(source, OBJ_ONEMAPPING); VM_OBJECT_UNLOCK(source); new_object->backing_object_offset = orig_object->backing_object_offset + entry->offset; new_object->backing_object = source; } if (orig_object->uip != NULL) { new_object->uip = orig_object->uip; uihold(orig_object->uip); new_object->charge = ptoa(size); KASSERT(orig_object->charge >= ptoa(size), ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } retry: m = vm_page_find_least(orig_object, offidxstart); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); /* * We must wait for pending I/O to complete before we can * rename the page. * * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ if ((m->oflags & VPO_BUSY) || m->busy) { VM_OBJECT_UNLOCK(new_object); m->oflags |= VPO_WANTED; msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); VM_OBJECT_LOCK(new_object); goto retry; } vm_page_lock(m); vm_page_rename(m, new_object, idx); vm_page_unlock(m); /* page automatically made dirty by rename and cache handled */ vm_page_busy(m); } if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); /* * Transfer any cached pages from orig_object to new_object. */ if (__predict_false(orig_object->cache != NULL)) vm_page_cache_transfer(orig_object, offidxstart, new_object); } VM_OBJECT_UNLOCK(orig_object); TAILQ_FOREACH(m, &new_object->memq, listq) vm_page_wakeup(m); VM_OBJECT_UNLOCK(new_object); entry->object.vm_object = new_object; entry->offset = 0LL; vm_object_deallocate(orig_object); VM_OBJECT_LOCK(new_object); } #define OBSC_TEST_ALL_SHADOWED 0x0001 #define OBSC_COLLAPSE_NOWAIT 0x0002 #define OBSC_COLLAPSE_WAIT 0x0004 static int vm_object_backing_scan(vm_object_t object, int op) { int r = 1; vm_page_t p; vm_object_t backing_object; vm_pindex_t backing_offset_index; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED); backing_object = object->backing_object; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* * Initial conditions */ if (op & OBSC_TEST_ALL_SHADOWED) { /* * We do not want to have to test for the existence of cache * or swap pages in the backing object. XXX but with the * new swapper this would be pretty easy to do. * * XXX what about anonymous MAP_SHARED memory that hasn't * been ZFOD faulted yet? If we do not test for this, the * shadow test may succeed! XXX */ if (backing_object->type != OBJT_DEFAULT) { return (0); } } if (op & OBSC_COLLAPSE_WAIT) { vm_object_set_flag(backing_object, OBJ_DEAD); } /* * Our scan */ p = TAILQ_FIRST(&backing_object->memq); while (p) { vm_page_t next = TAILQ_NEXT(p, listq); vm_pindex_t new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { vm_page_t pp; /* * Ignore pages outside the parent object's range * and outside the parent object's mapping of the * backing object. * * note that we do not busy the backing object's * page. */ if ( p->pindex < backing_offset_index || new_pindex >= object->size ) { p = next; continue; } /* * See if the parent has the page or if the parent's * object pager has the page. If the parent has the * page but the page is not valid, the parent's * object pager must have the page. * * If this fails, the parent does not completely shadow * the object and we might as well give up now. */ pp = vm_page_lookup(object, new_pindex); if ( (pp == NULL || pp->valid == 0) && !vm_pager_has_page(object, new_pindex, NULL, NULL) ) { r = 0; break; } } /* * Check for busy page */ if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { if ((p->oflags & VPO_BUSY) || !p->valid || p->busy) { p = next; continue; } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { VM_OBJECT_UNLOCK(object); p->oflags |= VPO_WANTED; msleep(p, VM_OBJECT_MTX(backing_object), PDROP | PVM, "vmocol", 0); VM_OBJECT_LOCK(object); VM_OBJECT_LOCK(backing_object); /* * If we slept, anything could have * happened. Since the object is * marked dead, the backing offset * should not have changed so we * just restart our scan. */ p = TAILQ_FIRST(&backing_object->memq); continue; } } KASSERT( p->object == backing_object, ("vm_object_backing_scan: object mismatch") ); /* * Destroy any associated swap */ if (backing_object->type == OBJT_SWAP) { swap_pager_freespace( backing_object, p->pindex, 1 ); } if ( p->pindex < backing_offset_index || new_pindex >= object->size ) { /* * Page is out of the parent object's range, we * can simply destroy it. */ vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); p = next; continue; } pp = vm_page_lookup(object, new_pindex); if ( pp != NULL || vm_pager_has_page(object, new_pindex, NULL, NULL) ) { /* * page already exists in parent OR swap exists * for this location in the parent. Destroy * the original page from the backing object. * * Leave the parent's page alone */ vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (p->wire_count == 0) vm_page_free(p); else vm_page_remove(p); vm_page_unlock(p); p = next; continue; } #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(p, object, backing_object, backing_offset_index); #endif /* * Page does not exist in parent, rename the * page from the backing object to the main object. * * If the page was mapped to a process, it can remain * mapped through the rename. */ vm_page_lock(p); vm_page_rename(p, object, new_pindex); vm_page_unlock(p); /* page automatically made dirty by rename */ } p = next; } return (r); } /* * this version of collapse allows the operation to occur earlier and * when paging_in_progress is true for an object... This is not a complete * operation, but should plug 99.9% of the rest of the leaks. */ static void vm_object_qcollapse(vm_object_t object) { vm_object_t backing_object = object->backing_object; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED); if (backing_object->ref_count != 1) return; vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT); } /* * vm_object_collapse: * * Collapse an object with the object backing it. * Pages in the backing object are moved into the * parent, and the backing object is deallocated. */ void vm_object_collapse(vm_object_t object) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); while (TRUE) { vm_object_t backing_object; /* * Verify that the conditions are right for collapse: * * The object exists and the backing object exists. */ if ((backing_object = object->backing_object) == NULL) break; /* * we check the backing object first, because it is most likely * not collapsable. */ VM_OBJECT_LOCK(backing_object); if (backing_object->handle != NULL || (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) || (backing_object->flags & OBJ_DEAD) || object->handle != NULL || (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP) || (object->flags & OBJ_DEAD)) { VM_OBJECT_UNLOCK(backing_object); break; } if ( object->paging_in_progress != 0 || backing_object->paging_in_progress != 0 ) { vm_object_qcollapse(object); VM_OBJECT_UNLOCK(backing_object); break; } /* * We know that we can either collapse the backing object (if * the parent is the only reference to it) or (perhaps) have * the parent bypass the object if the parent happens to shadow * all the resident pages in the entire backing object. * * This is ignoring pager-backed pages such as swap pages. * vm_object_backing_scan fails the shadowing test in this * case. */ if (backing_object->ref_count == 1) { /* * If there is exactly one reference to the backing * object, we can collapse it into the parent. */ vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT); #if VM_NRESERVLEVEL > 0 /* * Break any reservations from backing_object. */ if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) vm_reserv_break_all(backing_object); #endif /* * Move the pager from backing_object to object. */ if (backing_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case * the backing_object's and object's locks are * released and reacquired. */ swap_pager_copy( backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); /* * Free any cached pages from backing_object. */ if (__predict_false(backing_object->cache != NULL)) vm_page_cache_free(backing_object, 0, 0); } /* * Object now shadows whatever backing_object did. * Note that the reference to * backing_object->backing_object moves from within * backing_object to within object. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; if (backing_object->backing_object) { VM_OBJECT_LOCK(backing_object->backing_object); LIST_REMOVE(backing_object, shadow_list); LIST_INSERT_HEAD( &backing_object->backing_object->shadow_head, object, shadow_list); /* * The shadow_count has not changed. */ VM_OBJECT_UNLOCK(backing_object->backing_object); } object->backing_object = backing_object->backing_object; object->backing_object_offset += backing_object->backing_object_offset; /* * Discard backing_object. * * Since the backing object has no pages, no pager left, * and no object references within it, all that is * necessary is to dispose of it. */ KASSERT(backing_object->ref_count == 1, ( "backing_object %p was somehow re-referenced during collapse!", backing_object)); VM_OBJECT_UNLOCK(backing_object); vm_object_destroy(backing_object); object_collapses++; } else { vm_object_t new_backing_object; /* * If we do not entirely shadow the backing object, * there is nothing we can do so we give up. */ if (object->resident_page_count != object->size && vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) { VM_OBJECT_UNLOCK(backing_object); break; } /* * Make the parent shadow the next object in the * chain. Deallocating backing_object will not remove * it, since its reference count is at least 2. */ LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; new_backing_object = backing_object->backing_object; if ((object->backing_object = new_backing_object) != NULL) { VM_OBJECT_LOCK(new_backing_object); LIST_INSERT_HEAD( &new_backing_object->shadow_head, object, shadow_list ); new_backing_object->shadow_count++; vm_object_reference_locked(new_backing_object); VM_OBJECT_UNLOCK(new_backing_object); object->backing_object_offset += backing_object->backing_object_offset; } /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish. */ backing_object->ref_count--; VM_OBJECT_UNLOCK(backing_object); object_bypasses++; } /* * Try again with this object's new backing object. */ } } /* * vm_object_page_remove: * * For the given object, either frees or invalidates each of the * specified pages. In general, a page is freed. However, if a * page is wired for any reason other than the existence of a * managed, wired mapping, then it may be invalidated but not * removed from the object. Pages are specified by the given * range ["start", "end") and Boolean "clean_only". As a * special case, if "end" is zero, then the range extends from * "start" to the end of the object. If "clean_only" is TRUE, * then only the non-dirty pages within the specified range are * affected. * * In general, this operation should only be performed on objects * that contain managed pages. There are two exceptions. First, * it may be performed on the kernel and kmem objects. Second, * it may be used by msync(..., MS_INVALIDATE) to invalidate * device-backed pages. In both of these cases, "clean_only" * must be FALSE. * * The object must be locked. */ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, boolean_t clean_only) { vm_page_t p, next; int wirings; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (object->resident_page_count == 0) goto skipmemq; /* * Since physically-backed objects do not use managed pages, we can't * remove pages from the object (we must instead remove the page * references, and then destroy the object). */ KASSERT(object->type != OBJT_PHYS || object == kernel_object || object == kmem_object, ("attempt to remove pages from a physical object")); vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex * or (2) NULL. */ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * If the page is wired for any reason besides the * existence of managed, wired mappings, then it cannot * be freed. For example, fictitious pages, which * represent device memory, are inherently wired and * cannot be freed. They can, however, be invalidated * if "clean_only" is FALSE. */ vm_page_lock(p); if ((wirings = p->wire_count) != 0 && (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { /* Fictitious pages do not have managed mappings. */ if ((p->flags & PG_FICTITIOUS) == 0) pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ p->wire_count -= wirings; if (!clean_only) { p->valid = 0; vm_page_undirty(p); } vm_page_unlock(p); continue; } if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) goto again; KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if (clean_only && p->valid) { pmap_remove_write(p); if (p->dirty) { vm_page_unlock(p); continue; } } pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ if (wirings != 0) p->wire_count -= wirings; vm_page_free(p); vm_page_unlock(p); } vm_object_pip_wakeup(object); skipmemq: if (__predict_false(object->cache != NULL)) vm_page_cache_free(object, start, end); } /* * Populate the specified range of the object with valid pages. Returns * TRUE if the range is successfully populated and FALSE otherwise. * * Note: This function should be optimized to pass a larger array of * pages to vm_pager_get_pages() before it is applied to a non- * OBJT_DEVICE object. * * The object must be locked. */ boolean_t vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m, ma[1]; vm_pindex_t pindex; int rv; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); for (pindex = start; pindex < end; pindex++) { m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { ma[0] = m; rv = vm_pager_get_pages(object, ma, 1, 0); m = vm_page_lookup(object, pindex); if (m == NULL) break; if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); break; } } /* * Keep "m" busy because a subsequent iteration may unlock * the object. */ } if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { vm_page_wakeup(m); m = TAILQ_NEXT(m, listq); } } return (pindex == end); } /* * Routine: vm_object_coalesce * Function: Coalesces two objects backing up adjoining * regions of memory into a single object. * * returns TRUE if objects were combined. * * NOTE: Only works at the moment if the second object is NULL - * if it's not, which object do we lock first? * * Parameters: * prev_object First object to coalesce * prev_offset Offset into prev_object * prev_size Size of reference to prev_object * next_size Size of reference to the second object * reserved Indicator that extension region has * swap accounted for * * Conditions: * The object must *not* be locked. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) { vm_pindex_t next_pindex; if (prev_object == NULL) return (TRUE); VM_OBJECT_LOCK(prev_object); if (prev_object->type != OBJT_DEFAULT && prev_object->type != OBJT_SWAP) { VM_OBJECT_UNLOCK(prev_object); return (FALSE); } /* * Try to collapse the object first */ vm_object_collapse(prev_object); /* * Can't coalesce if: . more than one reference . paged out . shadows * another object . has a copy elsewhere (any of which mean that the * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->backing_object != NULL) { VM_OBJECT_UNLOCK(prev_object); return (FALSE); } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; next_pindex = OFF_TO_IDX(prev_offset) + prev_size; if ((prev_object->ref_count > 1) && (prev_object->size != next_pindex)) { VM_OBJECT_UNLOCK(prev_object); return (FALSE); } /* * Account for the charge. */ if (prev_object->uip != NULL) { /* * If prev_object was charged, then this mapping, * althought not charged now, may become writable * later. Non-NULL uip in the object would prevent * swap reservation during enabling of the write * access, so reserve swap now. Failed reservation * cause allocation of the separate object for the map * entry, and swap reservation for this entry is * managed in appropriate time. */ if (!reserved && !swap_reserve_by_uid(ptoa(next_size), prev_object->uip)) { return (FALSE); } prev_object->charge += ptoa(next_size); } /* * Remove any pages that may still be in the object from a previous * deallocation. */ if (next_pindex < prev_object->size) { vm_object_page_remove(prev_object, next_pindex, next_pindex + next_size, FALSE); if (prev_object->type == OBJT_SWAP) swap_pager_freespace(prev_object, next_pindex, next_size); #if 0 if (prev_object->uip != NULL) { KASSERT(prev_object->charge >= ptoa(prev_object->size - next_pindex), ("object %p overcharged 1 %jx %jx", prev_object, (uintmax_t)next_pindex, (uintmax_t)next_size)); prev_object->charge -= ptoa(prev_object->size - next_pindex); } #endif } /* * Extend the object if necessary. */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; VM_OBJECT_UNLOCK(prev_object); return (TRUE); } void vm_object_set_writeable_dirty(vm_object_t object) { VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (object->type != OBJT_VNODE || (object->flags & OBJ_MIGHTBEDIRTY) != 0) return; vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); } #include "opt_ddb.h" #ifdef DDB #include #include #include static int _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; int entcount; if (map == 0) return 0; if (entry == 0) { tmpe = map->header.next; entcount = map->nentries; while (entcount-- && (tmpe != &map->header)) { if (_vm_object_in_map(map, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { tmpm = entry->object.sub_map; tmpe = tmpm->header.next; entcount = tmpm->nentries; while (entcount-- && tmpe != &tmpm->header) { if (_vm_object_in_map(tmpm, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if ((obj = entry->object.vm_object) != NULL) { for (; obj; obj = obj->backing_object) if (obj == object) { return 1; } } return 0; } static int vm_object_in_map(vm_object_t object) { struct proc *p; /* sx_slock(&allproc_lock); */ FOREACH_PROC_IN_SYSTEM(p) { if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { /* sx_sunlock(&allproc_lock); */ return 1; } } /* sx_sunlock(&allproc_lock); */ if (_vm_object_in_map(kernel_map, object, 0)) return 1; if (_vm_object_in_map(kmem_map, object, 0)) return 1; if (_vm_object_in_map(pager_map, object, 0)) return 1; if (_vm_object_in_map(buffer_map, object, 0)) return 1; return 0; } DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ TAILQ_FOREACH(object, &vm_object_list, object_list) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { db_printf("vmochk: internal obj has zero ref count: %ld\n", (long)object->size); } if (!vm_object_in_map(object)) { db_printf( "vmochk: internal obj is not in a map: " "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", object->ref_count, (u_long)object->size, (u_long)object->size, (void *)object->backing_object); } } } } /* * vm_object_print: [ debug ] */ DB_SHOW_COMMAND(object, vm_object_print_static) { /* XXX convert args. */ vm_object_t object = (vm_object_t)addr; boolean_t full = have_addr; vm_page_t p; /* XXX count is an (unused) arg. Avoid shadowing it. */ #define count was_count int count; if (object == NULL) return; db_iprintf( "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x uip %d charge %jx\n", object, (int)object->type, (uintmax_t)object->size, object->resident_page_count, object->ref_count, object->flags, object->uip ? object->uip->ui_uid : -1, (uintmax_t)object->charge); db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n", object->shadow_count, object->backing_object ? object->backing_object->ref_count : 0, object->backing_object, (uintmax_t)object->backing_object_offset); if (!full) return; db_indent += 2; count = 0; TAILQ_FOREACH(p, &object->memq, listq) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { db_printf("\n"); db_iprintf(" ..."); count = 0; } else db_printf(","); count++; db_printf("(off=0x%jx,page=0x%jx)", (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); } if (count != 0) db_printf("\n"); db_indent -= 2; } /* XXX. */ #undef count /* XXX need this non-static entry for calling from vm_map_print. */ void vm_object_print( /* db_expr_t */ long addr, boolean_t have_addr, /* db_expr_t */ long count, char *modif) { vm_object_print_static(addr, have_addr, count, modif); } DB_SHOW_COMMAND(vmopag, vm_object_print_pages) { vm_object_t object; vm_pindex_t fidx; vm_paddr_t pa; vm_page_t m, prev_m; int rcount, nl, c; nl = 0; TAILQ_FOREACH(object, &vm_object_list, object_list) { db_printf("new object: %p\n", (void *)object); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; fidx = 0; pa = -1; TAILQ_FOREACH(m, &object->memq, listq) { if (m->pindex > 128) break; if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && prev_m->pindex + 1 != m->pindex) { if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; } } if (rcount && (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { ++rcount; continue; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } fidx = m->pindex; pa = VM_PAGE_TO_PHYS(m); rcount = 1; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } } } #endif /* DDB */ Index: projects/binutils-2.17/sys/x86/x86/local_apic.c =================================================================== --- projects/binutils-2.17/sys/x86/x86/local_apic.c (revision 215829) +++ projects/binutils-2.17/sys/x86/x86/local_apic.c (revision 215830) @@ -1,1522 +1,1522 @@ /*- * Copyright (c) 2003 John Baldwin * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Local APIC support on Pentium and later processors. */ #include __FBSDID("$FreeBSD$"); #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef __amd64__ #define SDT_APIC SDT_SYSIGT #define SDT_APICT SDT_SYSIGT #define GSEL_APIC 0 #else #define SDT_APIC SDT_SYS386IGT #define SDT_APICT SDT_SYS386TGT #define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL) #endif /* Sanity checks on IDT vectors. */ CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* Magic IRQ values for the timer and syscalls. */ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) /* * Support for local APICs. Local APICs manage interrupts on each * individual processor as opposed to I/O APICs which receive interrupts * from I/O devices and then forward them on to the local APICs. * * Local APICs can also send interrupts to each other thus providing the * mechanism for IPIs. */ struct lvt { u_int lvt_edgetrigger:1; u_int lvt_activehi:1; u_int lvt_masked:1; u_int lvt_active:1; u_int lvt_mode:16; u_int lvt_vector:8; }; struct lapic { struct lvt la_lvts[LVT_MAX + 1]; u_int la_id:8; u_int la_cluster:4; u_int la_cluster_id:2; u_int la_present:1; u_long *la_timer_count; u_long la_timer_period; u_int la_timer_mode; /* Include IDT_SYSCALL to make indexing easier. */ int la_ioint_irqs[APIC_NUM_IOINTS + 1]; } static lapics[MAX_APIC_ID + 1]; /* Global defaults for local APIC LVT entries. */ static struct lvt lvts[LVT_MAX + 1] = { { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 }, /* LINT0: masked ExtINT */ { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 }, /* LINT1: NMI */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT }, /* Timer */ { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */ { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */ }; static inthand_t *ioint_handlers[] = { NULL, /* 0 - 31 */ IDTVEC(apic_isr1), /* 32 - 63 */ IDTVEC(apic_isr2), /* 64 - 95 */ IDTVEC(apic_isr3), /* 96 - 127 */ IDTVEC(apic_isr4), /* 128 - 159 */ IDTVEC(apic_isr5), /* 160 - 191 */ IDTVEC(apic_isr6), /* 192 - 223 */ IDTVEC(apic_isr7), /* 224 - 255 */ }; static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; extern inthand_t IDTVEC(rsvd); volatile lapic_t *lapic; vm_paddr_t lapic_paddr; static u_long lapic_timer_divisor; static struct eventtimer lapic_et; static void lapic_enable(void); static void lapic_resume(struct pic *pic); static void lapic_timer_enable_intr(void); static void lapic_timer_oneshot(u_int count); static void lapic_timer_periodic(u_int count); static void lapic_timer_stop(void); static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static int lapic_et_start(struct eventtimer *et, struct bintime *first, struct bintime *period); static int lapic_et_stop(struct eventtimer *et); struct pic lapic_pic = { .pic_resume = lapic_resume }; static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value) { struct lvt *lvt; KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin)); if (la->la_lvts[pin].lvt_active) lvt = &la->la_lvts[pin]; else lvt = &lvts[pin]; value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM | APIC_LVT_VECTOR); if (lvt->lvt_edgetrigger == 0) value |= APIC_LVT_TM; if (lvt->lvt_activehi == 0) value |= APIC_LVT_IIPP_INTALO; if (lvt->lvt_masked) value |= APIC_LVT_M; value |= lvt->lvt_mode; switch (lvt->lvt_mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: if (!lvt->lvt_edgetrigger) { printf("lapic%u: Forcing LINT%u to edge trigger\n", la->la_id, pin); value |= APIC_LVT_TM; } /* Use a vector of 0. */ break; case APIC_LVT_DM_FIXED: value |= lvt->lvt_vector; break; default: panic("bad APIC LVT delivery mode: %#x\n", value); } return (value); } /* * Map the local APIC and setup necessary interrupt vectors. */ void lapic_init(vm_paddr_t addr) { u_int regs[4]; int i, arat; /* Map the local APIC and setup the spurious interrupt handler. */ KASSERT(trunc_page(addr) == addr, ("local APIC not aligned on a page boundary")); lapic = pmap_mapdev(addr, sizeof(lapic_t)); lapic_paddr = addr; setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Perform basic initialization of the BSP's local APIC. */ lapic_enable(); /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); /* Local APIC timer interrupt. */ setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC); /* Local APIC error interrupt. */ setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC); /* XXX: Thermal interrupt */ /* Local APIC CMCI. */ setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC); if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) { arat = 0; /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) { do_cpuid(0x06, regs); - if (regs[0] & 0x4) + if ((regs[0] & CPUTPM1_ARAT) != 0) arat = 1; } bzero(&lapic_et, sizeof(lapic_et)); lapic_et.et_name = "LAPIC"; lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; lapic_et.et_quality = 600; if (!arat) { lapic_et.et_flags |= ET_FLAGS_C3STOP; lapic_et.et_quality -= 200; } lapic_et.et_frequency = 0; /* We don't know frequency yet, so trying to guess. */ lapic_et.et_min_period.sec = 0; lapic_et.et_min_period.frac = 0x00001000LL << 32; lapic_et.et_max_period.sec = 1; lapic_et.et_max_period.frac = 0; lapic_et.et_start = lapic_et_start; lapic_et.et_stop = lapic_et_stop; lapic_et.et_priv = NULL; et_register(&lapic_et); } } /* * Create a local APIC instance. */ void lapic_create(u_int apic_id, int boot_cpu) { int i; if (apic_id > MAX_APIC_ID) { printf("APIC: Ignoring local APIC with ID %d\n", apic_id); if (boot_cpu) panic("Can't ignore BSP"); return; } KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u", apic_id)); /* * Assume no local LVT overrides and a cluster of 0 and * intra-cluster ID of 0. */ lapics[apic_id].la_present = 1; lapics[apic_id].la_id = apic_id; for (i = 0; i <= LVT_MAX; i++) { lapics[apic_id].la_lvts[i] = lvts[i]; lapics[apic_id].la_lvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) lapics[apic_id].la_ioint_irqs[i] = -1; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif #ifdef SMP cpu_add(apic_id, boot_cpu); #endif } /* * Dump contents of local APIC registers */ void lapic_dump(const char* str) { uint32_t maxlvt; maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; printf("cpu%d %s:\n", PCPU_GET(cpuid), str); printf(" ID: 0x%08x VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n", lapic->id, lapic->version, lapic->ldr, lapic->dfr); printf(" lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n", lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr); printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x", lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error); if (maxlvt >= LVT_PMC) printf(" pmc: 0x%08x", lapic->lvt_pcint); printf("\n"); if (maxlvt >= LVT_CMCI) printf(" cmci: 0x%08x\n", lapic->lvt_cmci); } void lapic_setup(int boot) { struct lapic *la; u_int32_t maxlvt; register_t saveintr; char buf[MAXCOMLEN + 1]; la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); saveintr = intr_disable(); maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); /* Setup spurious vector and enable the local APIC. */ lapic_enable(); /* Program LINT[01] LVT entries. */ lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0); lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1); /* Program the PMC LVT entry if present. */ if (maxlvt >= LVT_PMC) lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint); /* Program timer LVT and setup handler. */ lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer); if (boot) { snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); intrcnt_add(buf, &la->la_timer_count); } /* Setup the timer if configured. */ if (la->la_timer_mode != 0) { KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor", lapic_id())); lapic_timer_stop(); lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_enable_intr(); if (la->la_timer_mode == 1) lapic_timer_periodic(la->la_timer_period); else lapic_timer_oneshot(la->la_timer_period); } /* Program error LVT and clear any existing errors. */ lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error); lapic->esr = 0; /* XXX: Thermal LVT */ /* Program the CMCI LVT entry if present. */ if (maxlvt >= LVT_CMCI) lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci); intr_restore(saveintr); } void lapic_reenable_pmc(void) { #ifdef HWPMC_HOOKS uint32_t value; value = lapic->lvt_pcint; value &= ~APIC_LVT_M; lapic->lvt_pcint = value; #endif } #ifdef HWPMC_HOOKS static void lapic_update_pmc(void *dummy) { struct lapic *la; la = &lapics[lapic_id()]; lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint); } #endif int lapic_enable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (lapic == NULL) return (0); /* Fail if the PMC LVT is not present. */ maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < LVT_PMC) return (0); lvts[LVT_PMC].lvt_masked = 0; #ifdef SMP /* * If hwpmc was loaded at boot time then the APs may not be * started yet. In that case, don't forward the request to * them as they will program the lvt when they start. */ if (smp_started) smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); else #endif lapic_update_pmc(NULL); return (1); #else return (0); #endif } void lapic_disable_pmc(void) { #ifdef HWPMC_HOOKS u_int32_t maxlvt; /* Fail if the local APIC is not present. */ if (lapic == NULL) return; /* Fail if the PMC LVT is not present. */ maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; if (maxlvt < LVT_PMC) return; lvts[LVT_PMC].lvt_masked = 1; #ifdef SMP /* The APs should always be started when hwpmc is unloaded. */ KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early")); #endif smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL); #endif } static int lapic_et_start(struct eventtimer *et, struct bintime *first, struct bintime *period) { struct lapic *la; u_long value; if (et->et_frequency == 0) { /* Start off with a divisor of 2 (power on reset default). */ lapic_timer_divisor = 2; /* Try to calibrate the local APIC timer. */ do { lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); DELAY(1000000); value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; if (value != APIC_TIMER_MAX_COUNT) break; lapic_timer_divisor <<= 1; } while (lapic_timer_divisor <= 128); if (lapic_timer_divisor > 128) panic("lapic: Divisor too big"); if (bootverbose) printf("lapic: Divisor %lu, Frequency %lu Hz\n", lapic_timer_divisor, value); et->et_frequency = value; et->et_min_period.sec = 0; et->et_min_period.frac = ((0x00000002LLU << 32) / et->et_frequency) << 32; et->et_max_period.sec = 0xfffffffeLLU / et->et_frequency; et->et_max_period.frac = ((0xfffffffeLLU << 32) / et->et_frequency) << 32; } lapic_timer_stop(); lapic_timer_set_divisor(lapic_timer_divisor); lapic_timer_enable_intr(); la = &lapics[lapic_id()]; if (period != NULL) { la->la_timer_mode = 1; la->la_timer_period = (et->et_frequency * (period->frac >> 32)) >> 32; if (period->sec != 0) la->la_timer_period += et->et_frequency * period->sec; lapic_timer_periodic(la->la_timer_period); } else { la->la_timer_mode = 2; la->la_timer_period = (et->et_frequency * (first->frac >> 32)) >> 32; if (first->sec != 0) la->la_timer_period += et->et_frequency * first->sec; lapic_timer_oneshot(la->la_timer_period); } return (0); } static int lapic_et_stop(struct eventtimer *et) { struct lapic *la = &lapics[lapic_id()]; la->la_timer_mode = 0; lapic_timer_stop(); return (0); } void lapic_disable(void) { uint32_t value; /* Software disable the local APIC. */ value = lapic->svr; value &= ~APIC_SVR_SWEN; lapic->svr = value; } static void lapic_enable(void) { u_int32_t value; /* Program the spurious vector to enable the local APIC. */ value = lapic->svr; value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS); value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT); lapic->svr = value; } /* Reset the local APIC on the BSP during resume. */ static void lapic_resume(struct pic *pic) { lapic_setup(0); } int lapic_id(void) { KASSERT(lapic != NULL, ("local APIC is not mapped")); return (lapic->id >> APIC_ID_SHIFT); } int lapic_intr_pending(u_int vector) { volatile u_int32_t *irr; /* * The IRR registers are an array of 128-bit registers each of * which only describes 32 interrupts in the low 32 bits.. Thus, * we divide the vector by 32 to get the 128-bit index. We then * multiply that index by 4 to get the equivalent index from * treating the IRR as an array of 32-bit registers. Finally, we * modulus the vector by 32 to determine the individual bit to * test. */ irr = &lapic->irr0; return (irr[(vector / 32) * 4] & 1 << (vector % 32)); } void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id) { struct lapic *la; KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist", __func__, apic_id)); KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big", __func__, cluster)); KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID, ("%s: intra cluster id %u too big", __func__, cluster_id)); la = &lapics[apic_id]; la->la_cluster = cluster; la->la_cluster_id = cluster_id; } int lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked) { if (pin > LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_masked = masked; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_masked = masked; lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked"); return (0); } int lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode) { struct lvt *lvt; if (pin > LVT_MAX) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvt = &lvts[pin]; if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lvt = &lapics[apic_id].la_lvts[pin]; lvt->lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } lvt->lvt_mode = mode; switch (mode) { case APIC_LVT_DM_NMI: case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: lvt->lvt_edgetrigger = 1; lvt->lvt_activehi = 1; if (mode == APIC_LVT_DM_EXTINT) lvt->lvt_masked = 1; else lvt->lvt_masked = 0; break; default: panic("Unsupported delivery mode: 0x%x\n", mode); } if (bootverbose) { printf(" Routing "); switch (mode) { case APIC_LVT_DM_NMI: printf("NMI"); break; case APIC_LVT_DM_SMI: printf("SMI"); break; case APIC_LVT_DM_INIT: printf("INIT"); break; case APIC_LVT_DM_EXTINT: printf("ExtINT"); break; } printf(" -> LINT%u\n", pin); } return (0); } int lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol) { if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_active = 1; lapics[apic_id].la_lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH); if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u polarity: %s\n", pin, pol == INTR_POLARITY_HIGH ? "high" : "low"); return (0); } int lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger) { if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (apic_id == APIC_ID_ALL) { lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (bootverbose) printf("lapic:"); } else { KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE); lapics[apic_id].la_lvts[pin].lvt_active = 1; if (bootverbose) printf("lapic%u:", apic_id); } if (bootverbose) printf(" LINT%u trigger: %s\n", pin, trigger == INTR_TRIGGER_EDGE ? "edge" : "level"); return (0); } /* * Adjust the TPR of the current CPU so that it blocks all interrupts below * the passed in vector. */ void lapic_set_tpr(u_int vector) { #ifdef CHEAP_TPR lapic->tpr = vector; #else u_int32_t tpr; tpr = lapic->tpr & ~APIC_TPR_PRIO; tpr |= vector; lapic->tpr = tpr; #endif } void lapic_eoi(void) { lapic->eoi = 0; } void lapic_handle_intr(int vector, struct trapframe *frame) { struct intsrc *isrc; if (vector == -1) panic("Couldn't get vector from ISR!"); isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id), vector)); intr_execute_handlers(isrc, frame); } void lapic_handle_timer(struct trapframe *frame) { struct lapic *la; struct trapframe *oldframe; struct thread *td; /* Send EOI first thing. */ lapic_eoi(); #if defined(SMP) && !defined(SCHED_ULE) /* * Don't do any accounting for the disabled HTT cores, since it * will provide misleading numbers for the userland. * * No locking is necessary here, since even if we loose the race * when hlt_cpus_mask changes it is not a big deal, really. * * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask * and unlike other schedulers it actually schedules threads to * those CPUs. */ if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0) return; #endif /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)]; (*la->la_timer_count)++; critical_enter(); if (lapic_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } critical_exit(); } static void lapic_timer_set_divisor(u_int divisor) { KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) / sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor)); lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1]; } static void lapic_timer_oneshot(u_int count) { u_int32_t value; value = lapic->lvt_timer; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_ONE_SHOT; lapic->lvt_timer = value; lapic->icr_timer = count; } static void lapic_timer_periodic(u_int count) { u_int32_t value; value = lapic->lvt_timer; value &= ~APIC_LVTT_TM; value |= APIC_LVTT_TM_PERIODIC; lapic->lvt_timer = value; lapic->icr_timer = count; } static void lapic_timer_stop(void) { u_int32_t value; value = lapic->lvt_timer; value &= ~APIC_LVTT_TM; value |= APIC_LVT_M; lapic->lvt_timer = value; lapic->icr_timer = 0; } static void lapic_timer_enable_intr(void) { u_int32_t value; value = lapic->lvt_timer; value &= ~APIC_LVT_M; lapic->lvt_timer = value; } void lapic_handle_cmc(void) { lapic_eoi(); cmc_intr(); } /* * Called from the mca_init() to activate the CMC interrupt if this CPU is * responsible for monitoring any MC banks for CMC events. Since mca_init() * is called prior to lapic_setup() during boot, this just needs to unmask * this CPU's LVT_CMCI entry. */ void lapic_enable_cmc(void) { u_int apic_id; apic_id = PCPU_GET(apic_id); KASSERT(lapics[apic_id].la_present, ("%s: missing APIC %u", __func__, apic_id)); lapics[apic_id].la_lvts[LVT_CMCI].lvt_masked = 0; lapics[apic_id].la_lvts[LVT_CMCI].lvt_active = 1; if (bootverbose) printf("lapic%u: CMCI unmasked\n", apic_id); } void lapic_handle_error(void) { u_int32_t esr; /* * Read the contents of the error status register. Write to * the register first before reading from it to force the APIC * to update its value to indicate any errors that have * occurred since the previous write to the register. */ lapic->esr = 0; esr = lapic->esr; printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr); lapic_eoi(); } u_int apic_cpuid(u_int apic_id) { #ifdef SMP return apic_cpuids[apic_id]; #else return 0; #endif } /* Request a free IDT vector to be used by the specified IRQ. */ u_int apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple * algorithm to find the first free vector. */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { if (lapics[apic_id].la_ioint_irqs[vector] != -1) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); return (0); } /* * Request 'count' free contiguous IDT vectors to be used by 'count' * IRQs. 'count' must be a power of two and the vectors will be * aligned on a boundary of 'align'. If the request cannot be * satisfied, 0 is returned. */ u_int apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) { u_int first, run, vector; KASSERT(powerof2(count), ("bad count")); KASSERT(powerof2(align), ("bad align")); KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif /* * Search for 'count' free vectors. As with apic_alloc_vector(), * this just uses a simple first fit algorithm. */ run = 0; first = 0; mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ if (lapics[apic_id].la_ioint_irqs[vector] != -1) { run = 0; first = 0; continue; } /* Start a new run if run == 0 and vector is aligned. */ if (run == 0) { if ((vector & (align - 1)) != 0) continue; first = vector; } run++; /* Keep looping if the run isn't long enough yet. */ if (run < count) continue; /* Found a run, assign IRQs and return the first vector. */ for (vector = 0; vector < count; vector++) lapics[apic_id].la_ioint_irqs[first + vector] = irqs[vector]; mtx_unlock_spin(&icu_lock); return (first + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); return (0); } /* * Enable a vector for a particular apic_id. Since all lapics share idt * entries and ioint_handlers this enables the vector on all lapics. lapics * which do not have the vector configured would report spurious interrupts * should it fire. */ void apic_enable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL, GSEL_APIC); } void apic_disable_vector(u_int apic_id, u_int vector) { KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); #ifdef notyet /* * We can not currently clear the idt entry because other cpus * may have a valid vector at this offset. */ setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC); #endif } /* Release an APIC vector when it's no longer in use. */ void apic_free_vector(u_int apic_id, u_int vector, u_int irq) { struct thread *td; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif /* * Bind us to the cpu that owned the vector before freeing it so * we don't lose an interrupt delivery race. */ td = curthread; if (!rebooting) { thread_lock(td); if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); thread_unlock(td); } mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); sched_unbind(td); thread_unlock(td); } } /* Map an IDT vector (APIC) to an IRQ (interrupt source). */ u_int apic_idt_to_irq(u_int apic_id, u_int vector) { int irq; KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); #ifdef KDTRACE_HOOKS KASSERT(vector != IDT_DTRACE_RET, ("Attempt to overwrite DTrace entry")); #endif irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]; if (irq < 0) irq = 0; return (irq); } #ifdef DDB /* * Dump data about APIC IDT vector mappings. */ DB_SHOW_COMMAND(apic, db_show_apic) { struct intsrc *isrc; int i, verbose; u_int apic_id; u_int irq; if (strcmp(modif, "vv") == 0) verbose = 2; else if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { if (lapics[apic_id].la_present == 0) continue; db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; if (irq == -1 || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) continue; #endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); else if (irq < NUM_IO_INTS) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); else db_dump_intr_event(isrc->is_event, verbose == 2); } else db_printf("IRQ %u ???\n", irq); } } } static void dump_mask(const char *prefix, uint32_t v, int base) { int i, first; first = 1; for (i = 0; i < 32; i++) if (v & (1 << i)) { if (first) { db_printf("%s:", prefix); first = 0; } db_printf(" %02x", base + i); } if (!first) db_printf("\n"); } /* Show info from the lapic regs for this CPU. */ DB_SHOW_COMMAND(lapic, db_show_lapic) { uint32_t v; db_printf("lapic ID = %d\n", lapic_id()); v = lapic->version; db_printf("version = %d.%d\n", (v & APIC_VER_VERSION) >> 4, v & 0xf); db_printf("max LVT = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT); v = lapic->svr; db_printf("SVR = %02x (%s)\n", v & APIC_SVR_VECTOR, v & APIC_SVR_ENABLE ? "enabled" : "disabled"); db_printf("TPR = %02x\n", lapic->tpr); #define dump_field(prefix, index) \ dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index, \ index * 32) db_printf("In-service Interrupts:\n"); dump_field(isr, 0); dump_field(isr, 1); dump_field(isr, 2); dump_field(isr, 3); dump_field(isr, 4); dump_field(isr, 5); dump_field(isr, 6); dump_field(isr, 7); db_printf("TMR Interrupts:\n"); dump_field(tmr, 0); dump_field(tmr, 1); dump_field(tmr, 2); dump_field(tmr, 3); dump_field(tmr, 4); dump_field(tmr, 5); dump_field(tmr, 6); dump_field(tmr, 7); db_printf("IRR Interrupts:\n"); dump_field(irr, 0); dump_field(irr, 1); dump_field(irr, 2); dump_field(irr, 3); dump_field(irr, 4); dump_field(irr, 5); dump_field(irr, 6); dump_field(irr, 7); #undef dump_field } #endif /* * APIC probing support code. This includes code to manage enumerators. */ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; void apic_register_enumerator(struct apic_enumerator *enumerator) { #ifdef INVARIANTS struct apic_enumerator *apic_enum; SLIST_FOREACH(apic_enum, &enumerators, apic_next) { if (apic_enum == enumerator) panic("%s: Duplicate register of %s", __func__, enumerator->apic_name); } #endif SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next); } /* * We have to look for CPU's very, very early because certain subsystems * want to know how many CPU's we have extremely early on in the boot * process. */ static void apic_init(void *dummy __unused) { struct apic_enumerator *enumerator; #ifndef __amd64__ uint64_t apic_base; #endif int retval, best; /* We only support built in local APICs. */ if (!(cpu_feature & CPUID_APIC)) return; /* Don't probe if APIC mode is disabled. */ if (resource_disabled("apic", 0)) return; /* Probe all the enumerators to find the best match. */ best_enum = NULL; best = 0; SLIST_FOREACH(enumerator, &enumerators, apic_next) { retval = enumerator->apic_probe(); if (retval > 0) continue; if (best_enum == NULL || best < retval) { best_enum = enumerator; best = retval; } } if (best_enum == NULL) { if (bootverbose) printf("APIC: Could not find any APICs.\n"); return; } if (bootverbose) printf("APIC: Using the %s enumerator.\n", best_enum->apic_name); #ifndef __amd64__ /* * To work around an errata, we disable the local APIC on some * CPUs during early startup. We need to turn the local APIC back * on on such CPUs now. */ if (cpu == CPU_686 && cpu_vendor_id == CPU_VENDOR_INTEL && (cpu_id & 0xff0) == 0x610) { apic_base = rdmsr(MSR_APICBASE); apic_base |= APICBASE_ENABLED; wrmsr(MSR_APICBASE, apic_base); } #endif /* Probe the CPU's in the system. */ retval = best_enum->apic_probe_cpus(); if (retval != 0) printf("%s: Failed to probe CPUs: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL); /* * Setup the local APIC. We have to do this prior to starting up the APs * in the SMP case. */ static void apic_setup_local(void *dummy __unused) { int retval; if (best_enum == NULL) return; /* Initialize the local APIC. */ retval = best_enum->apic_setup_local(); if (retval != 0) printf("%s: Failed to setup the local APIC: returned %d\n", best_enum->apic_name, retval); } SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL); /* * Setup the I/O APICs. */ static void apic_setup_io(void *dummy __unused) { int retval; if (best_enum == NULL) return; retval = best_enum->apic_setup_io(); if (retval != 0) printf("%s: Failed to setup I/O APICs: returned %d\n", best_enum->apic_name, retval); #ifdef XEN return; #endif /* * Finish setting up the local APIC on the BSP once we know how to * properly program the LINT pins. */ lapic_setup(1); intr_register_pic(&lapic_pic); if (bootverbose) lapic_dump("BSP"); /* Enable the MSI "pic". */ msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL); #ifdef SMP /* * Inter Processor Interrupt functions. The lapic_ipi_*() functions are * private to the MD code. The public interface for the rest of the * kernel is defined in mp_machdep.c. */ int lapic_ipi_wait(int delay) { int x, incr; /* * Wait delay loops for IPI to be sent. This is highly bogus * since this is sensitive to CPU clock speed. If delay is * -1, we wait forever. */ if (delay == -1) { incr = 0; delay = 1; } else incr = 1; for (x = 0; x < delay; x += incr) { if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); ia32_pause(); } return (0); } void lapic_ipi_raw(register_t icrlo, u_int dest) { register_t value, saveintr; /* XXX: Need more sanity checking of icrlo? */ KASSERT(lapic != NULL, ("%s called too early", __func__)); KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid dest field", __func__)); KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0, ("%s: reserved bits set in ICR LO register", __func__)); /* Set destination in ICR HI register if it is being used. */ saveintr = intr_disable(); if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) { value = lapic->icr_hi; value &= ~APIC_ID_MASK; value |= dest << APIC_ID_SHIFT; lapic->icr_hi = value; } /* Program the contents of the IPI and dispatch it. */ value = lapic->icr_lo; value &= APIC_ICRLO_RESV_MASK; value |= icrlo; lapic->icr_lo = value; intr_restore(saveintr); } #define BEFORE_SPIN 1000000 #ifdef DETECT_DEADLOCK #define AFTER_SPIN 1000 #endif void lapic_ipi_vectored(u_int vector, int dest) { register_t icrlo, destfield; KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE; /* * IPI_STOP_HARD is just a "fake" vector used to send a NMI. * Use special rules regard NMI if passed, otherwise specify * the vector. */ if (vector == IPI_STOP_HARD) icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT; else icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: icrlo |= APIC_DEST_SELF; break; case APIC_IPI_DEST_ALL: icrlo |= APIC_DEST_ALLISELF; break; case APIC_IPI_DEST_OTHERS: icrlo |= APIC_DEST_ALLESELF; break; default: KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0, ("%s: invalid destination 0x%x", __func__, dest)); destfield = dest; } /* Wait for an earlier IPI to finish. */ if (!lapic_ipi_wait(BEFORE_SPIN)) { if (panicstr != NULL) return; else panic("APIC: Previous IPI is stuck"); } lapic_ipi_raw(icrlo, destfield); #ifdef DETECT_DEADLOCK /* Wait for IPI to be delivered. */ if (!lapic_ipi_wait(AFTER_SPIN)) { #ifdef needsattention /* * XXX FIXME: * * The above function waits for the message to actually be * delivered. It breaks out after an arbitrary timeout * since the message should eventually be delivered (at * least in theory) and that if it wasn't we would catch * the failure with the check above when the next IPI is * sent. * * We could skip this wait entirely, EXCEPT it probably * protects us from other routines that assume that the * message was delivered and acted upon when this function * returns. */ printf("APIC: IPI might be stuck\n"); #else /* !needsattention */ /* Wait until mesage is sent without a timeout. */ while (lapic->icr_lo & APIC_DELSTAT_PEND) ia32_pause(); #endif /* needsattention */ } #endif /* DETECT_DEADLOCK */ } #endif /* SMP */ Index: projects/binutils-2.17/sys =================================================================== --- projects/binutils-2.17/sys (revision 215829) +++ projects/binutils-2.17/sys (revision 215830) Property changes on: projects/binutils-2.17/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r215709-215824 Index: projects/binutils-2.17/tools/tools/sysbuild/sysbuild.sh =================================================================== --- projects/binutils-2.17/tools/tools/sysbuild/sysbuild.sh (revision 215829) +++ projects/binutils-2.17/tools/tools/sysbuild/sysbuild.sh (revision 215830) @@ -1,550 +1,577 @@ #!/bin/sh # # Copyright (c) 1994-2009 Poul-Henning Kamp. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # set -e exec < /dev/null if [ `uname -m` = "i386" ] ; then TARGET_PART=`df / | sed ' 1d s/[ ].*// s,/dev/,, s,s1a,s3a, s,s2a,s1a, s,s3a,s2a, '` # Where our build-bits are to be found FREEBSD_PART=`echo $TARGET_PART | sed 's/s[12]a/s3/'` elif [ `uname -m` = "amd64" ] ; then TARGET_PART=`df / | sed ' 1d s/[ ].*// s,/dev/,, s,s1a,s3a, s,s2a,s1a, s,s3a,s2a, '` # Where our build-bits are to be found FREEBSD_PART=`echo $TARGET_PART | sed 's/s[12]a/s3/'` else TARGET_PART=unknown FREEBSD_PART=unknown fi # Relative to /freebsd PORTS_PATH=ports SRC_PATH=src # OBJ_PATH=obj # Name of kernel KERNCONF=GENERIC # srcconf #SRCCONF="SRCCONF=/usr/src/src.conf" # -j arg to make(1) ncpu=`sysctl -n kern.smp.cpus` if [ $ncpu -gt 1 ] ; then JARG="-j $ncpu" fi # serial console ? SERCONS=false # Remotely mounted distfiles # REMOTEDISTFILES=fs:/rdonly/distfiles # Proxy #FTP_PROXY=http://127.0.0.1:3128/ #HTTP_PROXY=http://127.0.0.1:3128/ #export FTP_PROXY HTTP_PROXY PORTS_WE_WANT=' ' PORTS_OPTS="BATCH=YES MAKE_IDEA=YES A4=yes" CONFIGFILES=' ' cleanup() ( ) before_ports() ( ) before_ports_chroot() ( ) final_root() ( ) final_chroot() ( ) ####################################################################### ####################################################################### usage () { ( echo "Usage: $0 [-b/-k/-w] [-c config_file]" echo " -b suppress builds (both kernel and world)" echo " -k suppress buildkernel" echo " -w suppress buildworld" echo " -p used cached packages" echo " -c specify config file" ) 1>&2 exit 2 } ####################################################################### ####################################################################### if [ ! -f $0 ] ; then echo "Must be able to access self ($0)" 1>&2 exit 1 fi if grep -q 'Magic String: 0`0nQT40W%l,CX&' $0 ; then true else echo "self ($0) does not contain magic string" 1>&2 exit 1 fi ####################################################################### set -e log_it() ( set +x a="$*" set `cat /tmp/_sb_log` TX=`date +%s` echo "$1 $TX" > /tmp/_sb_log DT=`expr $TX - $1 || true` DL=`expr $TX - $2 || true` echo -n "### `date +%H:%M:%S`" printf " ### %5d ### %5d ### %s\n" $DT $DL "$a" ) ####################################################################### ports_recurse() ( set +x + t=$1 + shift + if [ "x$t" = "x." ] ; then + true > /tmp/_.plist + true > /tmp/_.plist.tdone + echo 'digraph {' > /tmp/_.plist.dot + fi + if grep -q "^$t\$" /tmp/_.plist.tdone ; then + return + fi + echo "$t" >> /tmp/_.plist.tdone for d do if [ ! -d $d ] ; then echo "Missing port $d" 1>&2 exit 2 fi + if [ "x$t" != "x." ] ; then + echo "\"$t\" -> \"$d\"" >> /tmp/_.plist.dot + fi if grep -q "^$d\$" /tmp/_.plist ; then true + elif grep -q "^$d\$" /tmp/_.plist.tdone ; then + true else ( cd $d - ports_recurse `make -V _DEPEND_DIRS ${PORTS_OPTS}` + ports_recurse $d `make -V _DEPEND_DIRS ${PORTS_OPTS}` ) - echo $d >> /tmp/_.plist + echo "$d" >> /tmp/_.plist fi done + if [ "x$t" = "x." ] ; then + echo '}' >> /tmp/_.plist.dot + fi ) ports_build() ( set +x - true > /tmp/_.plist - ports_recurse $PORTS_WE_WANT + ports_recurse . $PORTS_WE_WANT # Now build & install them for p in `cat /tmp/_.plist` do t=`echo $p | sed 's,/usr/ports/,,'` pn=`cd $p && make package-name` if [ "x${PKG_DIR}" != "x" -a -f ${PKG_DIR}/$pn.tbz ] ; then if [ "x$use_pkg" = "x-p" ] ; then log_it "install $p from ${PKG_DIR}/$pn.tbz" pkg_add ${PKG_DIR}/$pn.tbz fi fi i=`pkg_info -qO $t` if [ -z "$i" ] ; then log_it "build $p" b=`echo $p | tr / _` ( set -x cd /usr/ports cd $p set +e make clean ${PORTS_OPTS} if make install ${PORTS_OPTS} ; then if [ "x${PKG_DIR}" != "x" ] ; then make package ${PORTS_OPTS} mv *.tbz ${PKG_DIR} fi else log_it FAIL build $p fi make clean ) > _.$b 2>&1 < /dev/null date fi done ) ports_prefetch() ( ( set +x - true > /tmp/_.plist - ports_recurse $PORTS_WE_WANT - true > /mnt/_.prefetch + echo "Building /tmp/_.plist" >> /mnt/_.prefetch + + ports_recurse . $PORTS_WE_WANT + + echo "Completed /tmp/_.plist" >> /mnt/_.prefetch # Now checksump/fetch them for p in `cat /tmp/_.plist` do - echo "Prefetching $p" >> /mnt/_.prefetch b=`echo $p | tr / _` ( cd $p if make checksum $PORTS_OPTS ; then - true + rm -f /mnt/_.prefetch.$b + echo "OK $p" >> /mnt/_.prefetch + exit 0 + fi + make distclean + make checksum $PORTS_OPTS || true + + if make checksum $PORTS_OPTS > /dev/null 2>&1 ; then + rm -f /mnt/_.prefetch.$b + echo "OK $p" >> /mnt/_.prefetch else - make distclean - make checksum $PORTS_OPTS || true + echo "BAD $p" >> /mnt/_.prefetch fi ) > /mnt/_.prefetch.$b 2>&1 done ) ) ####################################################################### do_world=true do_kernel=true use_pkg="" c_arg="" set +e args=`getopt bc:hkpw $*` if [ $? -ne 0 ] ; then usage fi set -e set -- $args for i do case "$i" in -b) shift; do_world=false do_kernel=false ;; -c) c_arg=$2 if [ ! -f "$c_arg" ] ; then echo "Cannot read $c_arg" 1>&2 usage fi . "$2" shift shift ;; -h) usage ;; -k) shift; do_kernel=false ;; -p) shift; use_pkg="-p" ;; -w) shift; do_world=false ;; --) shift break; ;; esac done ####################################################################### if [ "x$1" = "xchroot_script" ] ; then set +x set -e shift before_ports_chroot ports_build exit 0 fi if [ "x$1" = "xfinal_chroot" ] ; then final_chroot exit 0 fi if [ $# -gt 0 ] ; then echo "$0: Extraneous arguments supplied" usage fi ####################################################################### T0=`date +%s` echo $T0 $T0 > /tmp/_sb_log log_it Unmount everything ( ( cleanup ) umount /freebsd/distfiles || true umount /mnt/freebsd/distfiles || true umount /dev/${FREEBSD_PART} || true umount /mnt/freebsd || true umount /mnt/dev || true umount /mnt || true umount /dev/${TARGET_PART} || true ) # > /dev/null 2>&1 log_it Prepare running image mkdir -p /freebsd mount /dev/${FREEBSD_PART} /freebsd ####################################################################### if [ ! -d /freebsd/${PORTS_PATH} ] ; then echo PORTS_PATH does not exist 1>&2 exit 1 fi if [ ! -d /freebsd/${SRC_PATH} ] ; then echo SRC_PATH does not exist 1>&2 exit 1 fi log_it TARGET_PART $TARGET_PART sleep 5 rm -rf /usr/ports ln -s /freebsd/${PORTS_PATH} /usr/ports rm -rf /usr/src ln -s /freebsd/${SRC_PATH} /usr/src if $do_world ; then if [ "x${OBJ_PATH}" != "x" ] ; then rm -rf /usr/obj mkdir -p /freebsd/${OBJ_PATH} ln -s /freebsd/${OBJ_PATH} /usr/obj else rm -rf /usr/obj mkdir -p /usr/obj fi fi ####################################################################### for i in ${PORTS_WE_WANT} do if [ ! -d $i ] ; then echo "Port $i not found" 1>&2 exit 2 fi done export PORTS_WE_WANT export PORTS_OPTS ####################################################################### log_it Prepare destination partition newfs -O2 -U /dev/${TARGET_PART} > /dev/null mount /dev/${TARGET_PART} /mnt mkdir -p /mnt/dev mount -t devfs devfs /mnt/dev if [ "x${REMOTEDISTFILES}" != "x" ] ; then rm -rf /freebsd/${PORTS_PATH}/distfiles ln -s /freebsd/distfiles /freebsd/${PORTS_PATH}/distfiles mkdir -p /freebsd/distfiles mount ${REMOTEDISTFILES} /freebsd/distfiles fi log_it copy ports config files -(cd / ; find var/db/ports -print | cpio -dumpv /mnt ) +(cd / ; find var/db/ports -print | cpio -dumpv /mnt > /dev/null 2>&1) log_it "Start prefetch of ports distfiles" ports_prefetch & if $do_world ; then ( cd /usr/src log_it "Buildworld" make ${JARG} -s buildworld ${SRCCONF} > /mnt/_.bw 2>&1 ) fi if $do_kernel ; then ( cd /usr/src log_it "Buildkernel" make ${JARG} -s buildkernel KERNCONF=$KERNCONF > /mnt/_.bk 2>&1 ) fi log_it Installworld (cd /usr/src && make ${JARG} installworld DESTDIR=/mnt ${SRCCONF} ) \ > /mnt/_.iw 2>&1 log_it distribution (cd /usr/src/etc && make -m /usr/src/share/mk distribution DESTDIR=/mnt ${SRCCONF} ) \ > /mnt/_.dist 2>&1 log_it Installkernel (cd /usr/src && make ${JARG} installkernel DESTDIR=/mnt KERNCONF=$KERNCONF ) \ > /mnt/_.ik 2>&1 if [ "x${OBJ_PATH}" != "x" ] ; then rmdir /mnt/usr/obj ln -s /freebsd/${OBJ_PATH} /mnt/usr/obj fi log_it Wait for ports prefetch log_it "(Tail /mnt/_.prefetch for progress)" wait log_it Move filesystems if [ "x${REMOTEDISTFILES}" != "x" ] ; then umount /freebsd/distfiles fi umount /dev/${FREEBSD_PART} || true mkdir -p /mnt/freebsd mount /dev/${FREEBSD_PART} /mnt/freebsd if [ "x${REMOTEDISTFILES}" != "x" ] ; then mount ${REMOTEDISTFILES} /mnt/freebsd/distfiles fi rm -rf /mnt/usr/ports || true ln -s /freebsd/${PORTS_PATH} /mnt/usr/ports rm -rf /mnt/usr/src || true ln -s /freebsd/${SRC_PATH} /mnt/usr/src log_it Build and install ports # Make sure fetching will work in the chroot if [ -f /etc/resolv.conf ] ; then log_it copy resolv.conf cp /etc/resolv.conf /mnt/etc chflags schg /mnt/etc/resolv.conf fi if [ -f /etc/localtime ] ; then log_it copy localtime cp /etc/localtime /mnt/etc fi log_it ldconfig in chroot chroot /mnt sh /etc/rc.d/ldconfig start log_it before_ports ( before_ports ) log_it build ports pwd cp $0 /mnt/root cp /tmp/_sb_log /mnt/tmp b=`basename $0` if [ "x$c_arg" != "x" ] ; then cp $c_arg /mnt/root chroot /mnt sh /root/$0 -c /root/`basename $c_arg` $use_pkg chroot_script else chroot /mnt sh /root/$0 $use_pkg chroot_script fi cp /mnt/tmp/_sb_log /tmp log_it fixing fstab sed "/[ ]\/[ ]/s;^[^ ]*[ ];/dev/${TARGET_PART} ;" \ /etc/fstab > /mnt/etc/fstab log_it create all mountpoints grep -v '^[ ]*#' /mnt/etc/fstab | while read a b c do mkdir -p /mnt/$b done if [ "x$SERCONS" != "xfalse" ] ; then log_it serial console echo " -h" > /mnt/boot.config sed -i "" -e /ttyd0/s/off/on/ /mnt/etc/ttys sed -i "" -e /ttyu0/s/off/on/ /mnt/etc/ttys sed -i "" -e '/^ttyv[0-8]/s/ on/ off/' /mnt/etc/ttys fi log_it move config files ( cd /mnt mkdir root/configfiles_dist find ${CONFIGFILES} -print | cpio -dumpv root/configfiles_dist ) (cd / && find ${CONFIGFILES} -print | cpio -dumpv /mnt) log_it final_root ( final_root ) log_it final_chroot cp /tmp/_sb_log /mnt/tmp if [ "x$c_arg" != "x" ] ; then chroot /mnt sh /root/$0 -c /root/`basename $c_arg` final_chroot else chroot /mnt sh /root/$0 final_chroot fi cp /mnt/tmp/_sb_log /tmp log_it "Check these messages (if any):" grep '^Stop' /mnt/_* || true log_it DONE Index: projects/binutils-2.17/usr.bin/calendar =================================================================== --- projects/binutils-2.17/usr.bin/calendar (revision 215829) +++ projects/binutils-2.17/usr.bin/calendar (revision 215830) Property changes on: projects/binutils-2.17/usr.bin/calendar ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.bin/calendar:r215709-215824 Index: projects/binutils-2.17/usr.bin/csup =================================================================== --- projects/binutils-2.17/usr.bin/csup (revision 215829) +++ projects/binutils-2.17/usr.bin/csup (revision 215830) Property changes on: projects/binutils-2.17/usr.bin/csup ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.bin/csup:r215709-215824 Index: projects/binutils-2.17/usr.bin/locate/locate/locate.h =================================================================== --- projects/binutils-2.17/usr.bin/locate/locate/locate.h (revision 215829) +++ projects/binutils-2.17/usr.bin/locate/locate/locate.h (revision 215830) @@ -1,72 +1,72 @@ /* * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)locate.h 8.1 (Berkeley) 6/6/93 * $FreeBSD$ */ /* Symbolic constants shared by locate.c and code.c */ #define NBG 128 /* number of bigrams considered */ #define OFFSET 14 /* abs value of max likely diff */ #define PARITY 0200 /* parity bit */ #define SWITCH 30 /* switch code */ #define UMLAUT 31 /* an 8 bit char followed */ /* 0-28 likeliest differential counts + offset to make nonnegative */ #define LDC_MIN 0 #define LDC_MAX 28 /* 128-255 bigram codes (128 most common, as determined by 'updatedb') */ -#define BIGRAM_MIN (UCHAR_MAX - CHAR_MAX) +#define BIGRAM_MIN (UCHAR_MAX - SCHAR_MAX) #define BIGRAM_MAX UCHAR_MAX /* 32-127 single character (printable) ascii residue (ie, literal) */ #define ASCII_MIN 32 -#define ASCII_MAX CHAR_MAX +#define ASCII_MAX SCHAR_MAX -/* #define TO7BIT(x) (x = ( ((u_char)x) & CHAR_MAX )) */ -#define TO7BIT(x) (x = x & CHAR_MAX ) +/* #define TO7BIT(x) (x = ( ((u_char)x) & SCHAR_MAX )) */ +#define TO7BIT(x) (x = x & SCHAR_MAX ) #if UCHAR_MAX >= 4096 define TOLOWER(ch) tolower(ch) #else u_char myctype[UCHAR_MAX + 1]; #define TOLOWER(ch) (myctype[ch]) #endif #define INTSIZE (sizeof(int)) #define LOCATE_REG "*?[]\\" /* fnmatch(3) meta characters */ Index: projects/binutils-2.17/usr.bin/netstat/inet.c =================================================================== --- projects/binutils-2.17/usr.bin/netstat/inet.c (revision 215829) +++ projects/binutils-2.17/usr.bin/netstat/inet.c (revision 215830) @@ -1,1309 +1,1310 @@ /*- * Copyright (c) 1983, 1988, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" char *inetname(struct in_addr *); void inetprint(struct in_addr *, int, const char *, int); #ifdef INET6 static int udp_done, tcp_done; #endif /* INET6 */ static int pcblist_sysctl(int proto, char **bufp, int istcp) { const char *mibvar; char *buf; size_t len; switch (proto) { case IPPROTO_TCP: mibvar = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; case IPPROTO_DIVERT: mibvar = "net.inet.divert.pcblist"; break; default: mibvar = "net.inet.raw.pcblist"; break; } len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) warn("sysctl: %s", mibvar); return (0); } if ((buf = malloc(len)) == 0) { warnx("malloc %lu bytes", (u_long)len); return (0); } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { warn("sysctl: %s", mibvar); free(buf); return (0); } *bufp = buf; return (1); } /* * Copied directly from uipc_socket2.c. We leave out some fields that are in * nested structures that aren't used to avoid extra work. */ static void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_cc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mcnt = sb->sb_mcnt; xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } int sotoxsocket(struct socket *so, struct xsocket *xso) { struct protosw proto; struct domain domain; bzero(xso, sizeof *xso); xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0) return (-1); xso->xso_protocol = proto.pr_protocol; if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0) return (-1); xso->xso_family = domain.dom_family; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); return (0); } static int pcblist_kvm(u_long off, char **bufp, int istcp) { struct inpcbinfo pcbinfo; struct inpcbhead listhead; struct inpcb *inp; struct xinpcb xi; struct xinpgen xig; struct xtcpcb xt; struct socket so; struct xsocket *xso; char *buf, *p; size_t len; if (off == 0) return (0); kread(off, &pcbinfo, sizeof(pcbinfo)); if (istcp) len = 2 * sizeof(xig) + (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * sizeof(struct xtcpcb); else len = 2 * sizeof(xig) + (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * sizeof(struct xinpcb); if ((buf = malloc(len)) == 0) { warnx("malloc %lu bytes", (u_long)len); return (0); } p = buf; #define COPYOUT(obj, size) do { \ if (len < (size)) { \ warnx("buffer size exceeded"); \ goto fail; \ } \ bcopy((obj), p, (size)); \ len -= (size); \ p += (size); \ } while (0) #define KREAD(off, buf, len) do { \ if (kread((uintptr_t)(off), (buf), (len)) != 0) \ goto fail; \ } while (0) /* Write out header. */ xig.xig_len = sizeof xig; xig.xig_count = pcbinfo.ipi_count; xig.xig_gen = pcbinfo.ipi_gencnt; xig.xig_sogen = 0; COPYOUT(&xig, sizeof xig); /* Walk the PCB list. */ xt.xt_len = sizeof xt; xi.xi_len = sizeof xi; if (istcp) xso = &xt.xt_socket; else xso = &xi.xi_socket; KREAD(pcbinfo.ipi_listhead, &listhead, sizeof(listhead)); LIST_FOREACH(inp, &listhead, inp_list) { if (istcp) { KREAD(inp, &xt.xt_inp, sizeof(*inp)); inp = &xt.xt_inp; } else { KREAD(inp, &xi.xi_inp, sizeof(*inp)); inp = &xi.xi_inp; } if (inp->inp_gencnt > pcbinfo.ipi_gencnt) continue; if (istcp) { if (inp->inp_ppcb == NULL) bzero(&xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero(&xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else KREAD(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); } if (inp->inp_socket) { KREAD(inp->inp_socket, &so, sizeof(so)); if (sotoxsocket(&so, xso) != 0) goto fail; } else { bzero(xso, sizeof(*xso)); if (istcp) xso->xso_protocol = IPPROTO_TCP; } if (istcp) COPYOUT(&xt, sizeof xt); else COPYOUT(&xi, sizeof xi); } /* Reread the pcbinfo and write out the footer. */ kread(off, &pcbinfo, sizeof(pcbinfo)); xig.xig_count = pcbinfo.ipi_count; xig.xig_gen = pcbinfo.ipi_gencnt; COPYOUT(&xig, sizeof xig); *bufp = buf; return (1); fail: free(buf); return (0); #undef COPYOUT #undef KREAD } /* * Print a summary of connections related to an Internet * protocol. For TCP, also give state of connection. * Listening processes (aflag) are suppressed unless the * -a (all) flag is specified. */ void protopr(u_long off, const char *name, int af1, int proto) { int istcp; static int first = 1; char *buf; const char *vchar; struct tcpcb *tp = NULL; struct inpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; struct xtcp_timer *timer; istcp = 0; switch (proto) { case IPPROTO_TCP: #ifdef INET6 if (tcp_done != 0) return; else tcp_done = 1; #endif istcp = 1; break; case IPPROTO_UDP: #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif break; } if (live) { if (!pcblist_sysctl(proto, &buf, istcp)) return; } else { if (!pcblist_kvm(off, &buf, istcp)) return; } oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { timer = &((struct xtcpcb *)xig)->xt_timer; tp = &((struct xtcpcb *)xig)->xt_tp; inp = &((struct xtcpcb *)xig)->xt_inp; so = &((struct xtcpcb *)xig)->xt_socket; } else { inp = &((struct xinpcb *)xig)->xi_inp; so = &((struct xinpcb *)xig)->xi_socket; timer = NULL; } /* Ignore sockets for protocols other than the desired one. */ if (so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ if (inp->inp_gencnt > oxig->xig_gen) continue; if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0) #ifdef INET6 || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0) #endif /* INET6 */ || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0 #ifdef INET6 && (inp->inp_vflag & INP_IPV6) == 0 #endif /* INET6 */ )) ) continue; if (!aflag && ( (istcp && tp->t_state == TCPS_LISTEN) || (af1 == AF_INET && inet_lnaof(inp->inp_laddr) == INADDR_ANY) #ifdef INET6 || (af1 == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif /* INET6 */ || (af1 == AF_UNSPEC && (((inp->inp_vflag & INP_IPV4) != 0 && inet_lnaof(inp->inp_laddr) == INADDR_ANY) #ifdef INET6 || ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif )) )) continue; if (first) { if (!Lflag) { printf("Active Internet connections"); if (aflag) printf(" (including servers)"); } else printf( "Current listen queue sizes (qlen/incqlen/maxqlen)"); putchar('\n'); if (Aflag) printf("%-8.8s ", "Tcpcb"); if (Lflag) printf("%-5.5s %-14.14s %-22.22s\n", "Proto", "Listen", "Local Address"); if (Tflag) printf((Aflag && !Wflag) ? "%-5.5s %-6.6s %-6.6s %-6.6s %-18.18s %s\n" : "%-5.5s %-6.6s %-6.6s %-6.6s %-22.22s %s\n", "Proto", "Rexmit", "OOORcv", "0-win", "Local Address", "Foreign Address"); if (xflag) { printf("%-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s %-6.6s ", "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS", "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA", "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX"); printf("%7.7s %7.7s %7.7s %7.7s %7.7s %7.7s %s\n", "rexmt", "persist", "keep", "2msl", "delack", "rcvtime", "(state)"); } - if (!xflag && !Tflag) + if (!xflag && !Tflag) { printf((Aflag && !Wflag) ? "%-5.5s %-6.6s %-6.6s %-18.18s %-18.18s" : "%-5.5s %-6.6s %-6.6s %-22.22s %-22.22s", "Proto", "Recv-Q", "Send-Q", "Local Address", "Foreign Address"); - + printf("(state)\n"); + } first = 0; } if (Lflag && so->so_qlimit == 0) continue; if (Aflag) { if (istcp) printf("%8lx ", (u_long)inp->inp_ppcb); else printf("%8lx ", (u_long)so->so_pcb); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "46" : "6 "; else #endif vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "4 " : " "; printf("%-3.3s%-2.2s ", name, vchar); if (Lflag) { char buf1[15]; snprintf(buf1, 15, "%d/%d/%d", so->so_qlen, so->so_incqlen, so->so_qlimit); printf("%-14.14s ", buf1); } else if (Tflag) { if (istcp) printf("%6u %6u %6u ", tp->t_sndrexmitpack, tp->t_rcvoopack, tp->t_sndzerowin); } else { printf("%6u %6u ", so->so_rcv.sb_cc, so->so_snd.sb_cc); } if (numeric_port) { if (inp->inp_vflag & INP_IPV4) { inetprint(&inp->inp_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inetprint(&inp->inp_faddr, (int)inp->inp_fport, name, 1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print(&inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print(&inp->in6p_faddr, (int)inp->inp_fport, name, 1); } /* else nothing printed now */ #endif /* INET6 */ } else if (inp->inp_flags & INP_ANONPORT) { if (inp->inp_vflag & INP_IPV4) { inetprint(&inp->inp_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inetprint(&inp->inp_faddr, (int)inp->inp_fport, name, 0); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print(&inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print(&inp->in6p_faddr, (int)inp->inp_fport, name, 0); } /* else nothing printed now */ #endif /* INET6 */ } else { if (inp->inp_vflag & INP_IPV4) { inetprint(&inp->inp_laddr, (int)inp->inp_lport, name, 0); if (!Lflag) inetprint(&inp->inp_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print(&inp->in6p_laddr, (int)inp->inp_lport, name, 0); if (!Lflag) inet6print(&inp->in6p_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport); } /* else nothing printed now */ #endif /* INET6 */ } if (xflag) { if (Lflag) printf("%21s %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u ", " ", so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt, so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt, so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat, so->so_rcv.sb_lowat, so->so_snd.sb_lowat, so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt, so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax); else { printf("%6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u %6u ", so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt, so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt, so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat, so->so_rcv.sb_lowat, so->so_snd.sb_lowat, so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt, so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax); if (timer != NULL) printf("%4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d %4d.%02d ", timer->tt_rexmt / 1000, (timer->tt_rexmt % 1000) / 10, timer->tt_persist / 1000, (timer->tt_persist % 1000) / 10, timer->tt_keep / 1000, (timer->tt_keep % 1000) / 10, timer->tt_2msl / 1000, (timer->tt_2msl % 1000) / 10, timer->tt_delack / 1000, (timer->tt_delack % 1000) / 10, timer->t_rcvtime / 1000, (timer->t_rcvtime % 1000) / 10); } } if (istcp && !Lflag && !xflag && !Tflag) { if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES) printf("%d", tp->t_state); else { printf("%s", tcpstates[tp->t_state]); #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN) /* Show T/TCP `hidden state' */ if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) putchar('*'); #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */ } } putchar('\n'); } if (xig != oxig && xig->xig_gen != oxig->xig_gen) { if (oxig->xig_count > xig->xig_count) { printf("Some %s sockets may have been deleted.\n", name); } else if (oxig->xig_count < xig->xig_count) { printf("Some %s sockets may have been created.\n", name); } else { printf( "Some %s sockets may have been created or deleted.\n", name); } } free(buf); } /* * Dump TCP statistics structure. */ void tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct tcpstat tcpstat, zerostat; size_t len = sizeof tcpstat; #ifdef INET6 if (tcp_done != 0) return; else tcp_done = 1; #endif if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.tcp.stats", &tcpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.inet.tcp.stats"); return; } } else kread(off, &tcpstat, len); printf ("%s:\n", name); #define p(f, m) if (tcpstat.f || sflag <= 1) \ printf(m, tcpstat.f, plural(tcpstat.f)) #define p1a(f, m) if (tcpstat.f || sflag <= 1) \ printf(m, tcpstat.f) #define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ printf(m, tcpstat.f1, plural(tcpstat.f1), tcpstat.f2, plural(tcpstat.f2)) #define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ printf(m, tcpstat.f1, plural(tcpstat.f1), tcpstat.f2) #define p3(f, m) if (tcpstat.f || sflag <= 1) \ printf(m, tcpstat.f, pluralies(tcpstat.f)) p(tcps_sndtotal, "\t%lu packet%s sent\n"); p2(tcps_sndpack,tcps_sndbyte, "\t\t%lu data packet%s (%lu byte%s)\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t%lu data packet%s (%lu byte%s) retransmitted\n"); p(tcps_sndrexmitbad, "\t\t%lu data packet%s unnecessarily retransmitted\n"); p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n"); p2a(tcps_sndacks, tcps_delack, "\t\t%lu ack-only packet%s (%lu delayed)\n"); p(tcps_sndurg, "\t\t%lu URG only packet%s\n"); p(tcps_sndprobe, "\t\t%lu window probe packet%s\n"); p(tcps_sndwinup, "\t\t%lu window update packet%s\n"); p(tcps_sndctrl, "\t\t%lu control packet%s\n"); p(tcps_rcvtotal, "\t%lu packet%s received\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t%lu ack%s (for %lu byte%s)\n"); p(tcps_rcvdupack, "\t\t%lu duplicate ack%s\n"); p(tcps_rcvacktoomuch, "\t\t%lu ack%s for unsent data\n"); p2(tcps_rcvpack, tcps_rcvbyte, "\t\t%lu packet%s (%lu byte%s) received in-sequence\n"); p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t%lu completely duplicate packet%s (%lu byte%s)\n"); p(tcps_pawsdrop, "\t\t%lu old duplicate packet%s\n"); p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t%lu packet%s with some dup. data (%lu byte%s duped)\n"); p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t%lu out-of-order packet%s (%lu byte%s)\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t%lu packet%s (%lu byte%s) of data after window\n"); p(tcps_rcvwinprobe, "\t\t%lu window probe%s\n"); p(tcps_rcvwinupd, "\t\t%lu window update packet%s\n"); p(tcps_rcvafterclose, "\t\t%lu packet%s received after close\n"); p(tcps_rcvbadsum, "\t\t%lu discarded for bad checksum%s\n"); p(tcps_rcvbadoff, "\t\t%lu discarded for bad header offset field%s\n"); p1a(tcps_rcvshort, "\t\t%lu discarded because packet too short\n"); p1a(tcps_rcvmemdrop, "\t\t%lu discarded due to memory problems\n"); p(tcps_connattempt, "\t%lu connection request%s\n"); p(tcps_accepts, "\t%lu connection accept%s\n"); p(tcps_badsyn, "\t%lu bad connection attempt%s\n"); p(tcps_listendrop, "\t%lu listen queue overflow%s\n"); p(tcps_badrst, "\t%lu ignored RSTs in the window%s\n"); p(tcps_connects, "\t%lu connection%s established (including accepts)\n"); p2(tcps_closed, tcps_drops, "\t%lu connection%s closed (including %lu drop%s)\n"); p(tcps_cachedrtt, "\t\t%lu connection%s updated cached RTT on close\n"); p(tcps_cachedrttvar, "\t\t%lu connection%s updated cached RTT variance on close\n"); p(tcps_cachedssthresh, "\t\t%lu connection%s updated cached ssthresh on close\n"); p(tcps_conndrops, "\t%lu embryonic connection%s dropped\n"); p2(tcps_rttupdated, tcps_segstimed, "\t%lu segment%s updated rtt (of %lu attempt%s)\n"); p(tcps_rexmttimeo, "\t%lu retransmit timeout%s\n"); p(tcps_timeoutdrop, "\t\t%lu connection%s dropped by rexmit timeout\n"); p(tcps_persisttimeo, "\t%lu persist timeout%s\n"); p(tcps_persistdrop, "\t\t%lu connection%s dropped by persist timeout\n"); p(tcps_finwait2_drops, "\t%lu Connection%s (fin_wait_2) dropped because of timeout\n"); p(tcps_keeptimeo, "\t%lu keepalive timeout%s\n"); p(tcps_keepprobe, "\t\t%lu keepalive probe%s sent\n"); p(tcps_keepdrops, "\t\t%lu connection%s dropped by keepalive\n"); p(tcps_predack, "\t%lu correct ACK header prediction%s\n"); p(tcps_preddat, "\t%lu correct data packet header prediction%s\n"); p3(tcps_sc_added, "\t%lu syncache entr%s added\n"); p1a(tcps_sc_retransmitted, "\t\t%lu retransmitted\n"); p1a(tcps_sc_dupsyn, "\t\t%lu dupsyn\n"); p1a(tcps_sc_dropped, "\t\t%lu dropped\n"); p1a(tcps_sc_completed, "\t\t%lu completed\n"); p1a(tcps_sc_bucketoverflow, "\t\t%lu bucket overflow\n"); p1a(tcps_sc_cacheoverflow, "\t\t%lu cache overflow\n"); p1a(tcps_sc_reset, "\t\t%lu reset\n"); p1a(tcps_sc_stale, "\t\t%lu stale\n"); p1a(tcps_sc_aborted, "\t\t%lu aborted\n"); p1a(tcps_sc_badack, "\t\t%lu badack\n"); p1a(tcps_sc_unreach, "\t\t%lu unreach\n"); p(tcps_sc_zonefail, "\t\t%lu zone failure%s\n"); p(tcps_sc_sendcookie, "\t%lu cookie%s sent\n"); p(tcps_sc_recvcookie, "\t%lu cookie%s received\n"); p(tcps_hc_added, "\t%lu hostcache entrie%s added\n"); p1a(tcps_hc_bucketoverflow, "\t\t%lu bucket overflow\n"); p(tcps_sack_recovery_episode, "\t%lu SACK recovery episode%s\n"); p(tcps_sack_rexmits, "\t%lu segment rexmit%s in SACK recovery episodes\n"); p(tcps_sack_rexmit_bytes, "\t%lu byte rexmit%s in SACK recovery episodes\n"); p(tcps_sack_rcv_blocks, "\t%lu SACK option%s (SACK blocks) received\n"); p(tcps_sack_send_blocks, "\t%lu SACK option%s (SACK blocks) sent\n"); p1a(tcps_sack_sboverflow, "\t%lu SACK scoreboard overflow\n"); p(tcps_ecn_ce, "\t%lu packet%s with ECN CE bit set\n"); p(tcps_ecn_ect0, "\t%lu packet%s with ECN ECT(0) bit set\n"); p(tcps_ecn_ect1, "\t%lu packet%s with ECN ECT(1) bit set\n"); p(tcps_ecn_shs, "\t%lu successful ECN handshake%s\n"); p(tcps_ecn_rcwnd, "\t%lu time%s ECN reduced the congestion window\n"); #undef p #undef p1a #undef p2 #undef p2a #undef p3 } /* * Dump UDP statistics structure. */ void udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct udpstat udpstat, zerostat; size_t len = sizeof udpstat; u_long delivered; #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.udp.stats", &udpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.inet.udp.stats"); return; } } else kread(off, &udpstat, len); printf("%s:\n", name); #define p(f, m) if (udpstat.f || sflag <= 1) \ printf(m, udpstat.f, plural(udpstat.f)) #define p1a(f, m) if (udpstat.f || sflag <= 1) \ printf(m, udpstat.f) p(udps_ipackets, "\t%lu datagram%s received\n"); p1a(udps_hdrops, "\t%lu with incomplete header\n"); p1a(udps_badlen, "\t%lu with bad data length field\n"); p1a(udps_badsum, "\t%lu with bad checksum\n"); p1a(udps_nosum, "\t%lu with no checksum\n"); p1a(udps_noport, "\t%lu dropped due to no socket\n"); p(udps_noportbcast, "\t%lu broadcast/multicast datagram%s undelivered\n"); p1a(udps_fullsock, "\t%lu dropped due to full socket buffers\n"); p1a(udpps_pcbhashmiss, "\t%lu not for hashed pcb\n"); delivered = udpstat.udps_ipackets - udpstat.udps_hdrops - udpstat.udps_badlen - udpstat.udps_badsum - udpstat.udps_noport - udpstat.udps_noportbcast - udpstat.udps_fullsock; if (delivered || sflag <= 1) printf("\t%lu delivered\n", delivered); p(udps_opackets, "\t%lu datagram%s output\n"); /* the next statistic is cumulative in udps_noportbcast */ p(udps_filtermcast, "\t%lu time%s multicast source filter matched\n"); #undef p #undef p1a } /* * Dump CARP statistics structure. */ void carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct carpstats carpstat, zerostat; size_t len = sizeof(struct carpstats); if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.carp.stats", &carpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { if (errno != ENOENT) warn("sysctl: net.inet.carp.stats"); return; } } else { if (off == 0) return; kread(off, &carpstat, len); } printf("%s:\n", name); #define p(f, m) if (carpstat.f || sflag <= 1) \ printf(m, (uintmax_t)carpstat.f, plural(carpstat.f)) #define p2(f, m) if (carpstat.f || sflag <= 1) \ printf(m, (uintmax_t)carpstat.f) p(carps_ipackets, "\t%ju packet%s received (IPv4)\n"); p(carps_ipackets6, "\t%ju packet%s received (IPv6)\n"); p(carps_badttl, "\t\t%ju packet%s discarded for wrong TTL\n"); p(carps_hdrops, "\t\t%ju packet%s shorter than header\n"); p(carps_badsum, "\t\t%ju discarded for bad checksum%s\n"); p(carps_badver, "\t\t%ju discarded packet%s with a bad version\n"); p2(carps_badlen, "\t\t%ju discarded because packet too short\n"); p2(carps_badauth, "\t\t%ju discarded for bad authentication\n"); p2(carps_badvhid, "\t\t%ju discarded for bad vhid\n"); p2(carps_badaddrs, "\t\t%ju discarded because of a bad address list\n"); p(carps_opackets, "\t%ju packet%s sent (IPv4)\n"); p(carps_opackets6, "\t%ju packet%s sent (IPv6)\n"); p2(carps_onomem, "\t\t%ju send failed due to mbuf memory error\n"); #if notyet p(carps_ostates, "\t\t%s state update%s sent\n"); #endif #undef p #undef p2 } /* * Dump IP statistics structure. */ void ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct ipstat ipstat, zerostat; size_t len = sizeof ipstat; if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.ip.stats", &ipstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.inet.ip.stats"); return; } } else kread(off, &ipstat, len); printf("%s:\n", name); #define p(f, m) if (ipstat.f || sflag <= 1) \ printf(m, ipstat.f, plural(ipstat.f)) #define p1a(f, m) if (ipstat.f || sflag <= 1) \ printf(m, ipstat.f) p(ips_total, "\t%lu total packet%s received\n"); p(ips_badsum, "\t%lu bad header checksum%s\n"); p1a(ips_toosmall, "\t%lu with size smaller than minimum\n"); p1a(ips_tooshort, "\t%lu with data size < data length\n"); p1a(ips_toolong, "\t%lu with ip length > max ip packet size\n"); p1a(ips_badhlen, "\t%lu with header length < data size\n"); p1a(ips_badlen, "\t%lu with data length < header length\n"); p1a(ips_badoptions, "\t%lu with bad options\n"); p1a(ips_badvers, "\t%lu with incorrect version number\n"); p(ips_fragments, "\t%lu fragment%s received\n"); p(ips_fragdropped, "\t%lu fragment%s dropped (dup or out of space)\n"); p(ips_fragtimeout, "\t%lu fragment%s dropped after timeout\n"); p(ips_reassembled, "\t%lu packet%s reassembled ok\n"); p(ips_delivered, "\t%lu packet%s for this host\n"); p(ips_noproto, "\t%lu packet%s for unknown/unsupported protocol\n"); p(ips_forward, "\t%lu packet%s forwarded"); p(ips_fastforward, " (%lu packet%s fast forwarded)"); if (ipstat.ips_forward || sflag <= 1) putchar('\n'); p(ips_cantforward, "\t%lu packet%s not forwardable\n"); p(ips_notmember, "\t%lu packet%s received for unknown multicast group\n"); p(ips_redirectsent, "\t%lu redirect%s sent\n"); p(ips_localout, "\t%lu packet%s sent from this host\n"); p(ips_rawout, "\t%lu packet%s sent with fabricated ip header\n"); p(ips_odropped, "\t%lu output packet%s dropped due to no bufs, etc.\n"); p(ips_noroute, "\t%lu output packet%s discarded due to no route\n"); p(ips_fragmented, "\t%lu output datagram%s fragmented\n"); p(ips_ofragments, "\t%lu fragment%s created\n"); p(ips_cantfrag, "\t%lu datagram%s that can't be fragmented\n"); p(ips_nogif, "\t%lu tunneling packet%s that can't find gif\n"); p(ips_badaddr, "\t%lu datagram%s with bad address in header\n"); #undef p #undef p1a } /* * Dump ARP statistics structure. */ void arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct arpstat arpstat, zerostat; size_t len = sizeof(arpstat); if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.link.ether.arp.stats", &arpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.link.ether.arp.stats"); return; } } else kread(off, &arpstat, len); printf("%s:\n", name); #define p(f, m) if (arpstat.f || sflag <= 1) \ printf(m, arpstat.f, plural(arpstat.f)) #define p2(f, m) if (arpstat.f || sflag <= 1) \ printf(m, arpstat.f, pluralies(arpstat.f)) p(txrequests, "\t%lu ARP request%s sent\n"); p2(txreplies, "\t%lu ARP repl%s sent\n"); p(rxrequests, "\t%lu ARP request%s received\n"); p2(rxreplies, "\t%lu ARP repl%s received\n"); p(received, "\t%lu ARP packet%s received\n"); p(dropped, "\t%lu total packet%s dropped due to no ARP entry\n"); p(timeouts, "\t%lu ARP entry%s timed out\n"); p(dupips, "\t%lu Duplicate IP%s seen\n"); #undef p #undef p2 } static const char *icmpnames[ICMP_MAXTYPE + 1] = { "echo reply", /* RFC 792 */ "#1", "#2", "destination unreachable", /* RFC 792 */ "source quench", /* RFC 792 */ "routing redirect", /* RFC 792 */ "#6", "#7", "echo", /* RFC 792 */ "router advertisement", /* RFC 1256 */ "router solicitation", /* RFC 1256 */ "time exceeded", /* RFC 792 */ "parameter problem", /* RFC 792 */ "time stamp", /* RFC 792 */ "time stamp reply", /* RFC 792 */ "information request", /* RFC 792 */ "information request reply", /* RFC 792 */ "address mask request", /* RFC 950 */ "address mask reply", /* RFC 950 */ "#19", "#20", "#21", "#22", "#23", "#24", "#25", "#26", "#27", "#28", "#29", "icmp traceroute", /* RFC 1393 */ "datagram conversion error", /* RFC 1475 */ "mobile host redirect", "IPv6 where-are-you", "IPv6 i-am-here", "mobile registration req", "mobile registration reply", "domain name request", /* RFC 1788 */ "domain name reply", /* RFC 1788 */ "icmp SKIP", "icmp photuris", /* RFC 2521 */ }; /* * Dump ICMP statistics. */ void icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct icmpstat icmpstat, zerostat; int i, first; size_t len; len = sizeof icmpstat; if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.icmp.stats", &icmpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.inet.icmp.stats"); return; } } else kread(off, &icmpstat, len); printf("%s:\n", name); #define p(f, m) if (icmpstat.f || sflag <= 1) \ printf(m, icmpstat.f, plural(icmpstat.f)) #define p1a(f, m) if (icmpstat.f || sflag <= 1) \ printf(m, icmpstat.f) #define p2(f, m) if (icmpstat.f || sflag <= 1) \ printf(m, icmpstat.f, plurales(icmpstat.f)) p(icps_error, "\t%lu call%s to icmp_error\n"); p(icps_oldicmp, "\t%lu error%s not generated in response to an icmp message\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) if (icmpstat.icps_outhist[i] != 0) { if (first) { printf("\tOutput histogram:\n"); first = 0; } if (icmpnames[i] != NULL) printf("\t\t%s: %lu\n", icmpnames[i], icmpstat.icps_outhist[i]); else printf("\t\tunknown ICMP #%d: %lu\n", i, icmpstat.icps_outhist[i]); } p(icps_badcode, "\t%lu message%s with bad code fields\n"); p(icps_tooshort, "\t%lu message%s less than the minimum length\n"); p(icps_checksum, "\t%lu message%s with bad checksum\n"); p(icps_badlen, "\t%lu message%s with bad length\n"); p1a(icps_bmcastecho, "\t%lu multicast echo requests ignored\n"); p1a(icps_bmcasttstamp, "\t%lu multicast timestamp requests ignored\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) if (icmpstat.icps_inhist[i] != 0) { if (first) { printf("\tInput histogram:\n"); first = 0; } if (icmpnames[i] != NULL) printf("\t\t%s: %lu\n", icmpnames[i], icmpstat.icps_inhist[i]); else printf("\t\tunknown ICMP #%d: %lu\n", i, icmpstat.icps_inhist[i]); } p(icps_reflect, "\t%lu message response%s generated\n"); p2(icps_badaddr, "\t%lu invalid return address%s\n"); p(icps_noroute, "\t%lu no return route%s\n"); #undef p #undef p1a #undef p2 if (live) { len = sizeof i; if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) < 0) return; printf("\tICMP address mask responses are %sabled\n", i ? "en" : "dis"); } } #ifndef BURN_BRIDGES /* * Dump IGMP statistics structure (pre 8.x kernel). */ static void igmp_stats_live_old(u_long off, const char *name) { struct oigmpstat oigmpstat, zerostat; size_t len = sizeof(oigmpstat); if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.igmp.stats", &oigmpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.inet.igmp.stats"); return; } printf("%s:\n", name); #define p(f, m) if (oigmpstat.f || sflag <= 1) \ printf(m, oigmpstat.f, plural(oigmpstat.f)) #define py(f, m) if (oigmpstat.f || sflag <= 1) \ printf(m, oigmpstat.f, oigmpstat.f != 1 ? "ies" : "y") p(igps_rcv_total, "\t%u message%s received\n"); p(igps_rcv_tooshort, "\t%u message%s received with too few bytes\n"); p(igps_rcv_badsum, "\t%u message%s received with bad checksum\n"); py(igps_rcv_queries, "\t%u membership quer%s received\n"); py(igps_rcv_badqueries, "\t%u membership quer%s received with invalid field(s)\n"); p(igps_rcv_reports, "\t%u membership report%s received\n"); p(igps_rcv_badreports, "\t%u membership report%s received with invalid field(s)\n"); p(igps_rcv_ourreports, "\t%u membership report%s received for groups to which we belong\n"); p(igps_snd_reports, "\t%u membership report%s sent\n"); #undef p #undef py } #endif /* !BURN_BRIDGES */ /* * Dump IGMP statistics structure. */ void igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct igmpstat igmpstat, zerostat; size_t len; #ifndef BURN_BRIDGES if (live) { /* * Detect if we are being run against a pre-IGMPv3 kernel. * We cannot do this for a core file as the legacy * struct igmpstat has no size field, nor does it * export it in any readily-available symbols. */ len = 0; if (sysctlbyname("net.inet.igmp.stats", NULL, &len, NULL, 0) < 0) { warn("sysctl: net.inet.igmp.stats"); return; } if (len < sizeof(igmpstat)) { igmp_stats_live_old(off, name); return; } } #endif /* !BURN_BRIDGES */ len = sizeof(igmpstat); if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.igmp.stats", &igmpstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { warn("sysctl: net.inet.igmp.stats"); return; } } else { len = sizeof(igmpstat); kread(off, &igmpstat, len); } if (igmpstat.igps_version != IGPS_VERSION_3) { warnx("%s: version mismatch (%d != %d)", __func__, igmpstat.igps_version, IGPS_VERSION_3); } if (igmpstat.igps_len != IGPS_VERSION3_LEN) { warnx("%s: size mismatch (%d != %d)", __func__, igmpstat.igps_len, IGPS_VERSION3_LEN); } printf("%s:\n", name); #define p64(f, m) if (igmpstat.f || sflag <= 1) \ printf(m, (uintmax_t) igmpstat.f, plural(igmpstat.f)) #define py64(f, m) if (igmpstat.f || sflag <= 1) \ printf(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f)) p64(igps_rcv_total, "\t%ju message%s received\n"); p64(igps_rcv_tooshort, "\t%ju message%s received with too few bytes\n"); p64(igps_rcv_badttl, "\t%ju message%s received with wrong TTL\n"); p64(igps_rcv_badsum, "\t%ju message%s received with bad checksum\n"); py64(igps_rcv_v1v2_queries, "\t%ju V1/V2 membership quer%s received\n"); py64(igps_rcv_v3_queries, "\t%ju V3 membership quer%s received\n"); py64(igps_rcv_badqueries, "\t%ju membership quer%s received with invalid field(s)\n"); py64(igps_rcv_gen_queries, "\t%ju general quer%s received\n"); py64(igps_rcv_group_queries, "\t%ju group quer%s received\n"); py64(igps_rcv_gsr_queries, "\t%ju group-source quer%s received\n"); py64(igps_drop_gsr_queries, "\t%ju group-source quer%s dropped\n"); p64(igps_rcv_reports, "\t%ju membership report%s received\n"); p64(igps_rcv_badreports, "\t%ju membership report%s received with invalid field(s)\n"); p64(igps_rcv_ourreports, "\t%ju membership report%s received for groups to which we belong\n"); p64(igps_rcv_nora, "\t%ju V3 report%s received without Router Alert\n"); p64(igps_snd_reports, "\t%ju membership report%s sent\n"); #undef p64 #undef py64 } /* * Dump PIM statistics structure. */ void pim_stats(u_long off __unused, const char *name, int af1 __unused, int proto __unused) { struct pimstat pimstat, zerostat; size_t len = sizeof pimstat; if (live) { if (zflag) memset(&zerostat, 0, len); if (sysctlbyname("net.inet.pim.stats", &pimstat, &len, zflag ? &zerostat : NULL, zflag ? len : 0) < 0) { if (errno != ENOENT) warn("sysctl: net.inet.pim.stats"); return; } } else { if (off == 0) return; kread(off, &pimstat, len); } printf("%s:\n", name); #define p(f, m) if (pimstat.f || sflag <= 1) \ printf(m, (uintmax_t)pimstat.f, plural(pimstat.f)) #define py(f, m) if (pimstat.f || sflag <= 1) \ printf(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y") p(pims_rcv_total_msgs, "\t%ju message%s received\n"); p(pims_rcv_total_bytes, "\t%ju byte%s received\n"); p(pims_rcv_tooshort, "\t%ju message%s received with too few bytes\n"); p(pims_rcv_badsum, "\t%ju message%s received with bad checksum\n"); p(pims_rcv_badversion, "\t%ju message%s received with bad version\n"); p(pims_rcv_registers_msgs, "\t%ju data register message%s received\n"); p(pims_rcv_registers_bytes, "\t%ju data register byte%s received\n"); p(pims_rcv_registers_wrongiif, "\t%ju data register message%s received on wrong iif\n"); p(pims_rcv_badregisters, "\t%ju bad register%s received\n"); p(pims_snd_registers_msgs, "\t%ju data register message%s sent\n"); p(pims_snd_registers_bytes, "\t%ju data register byte%s sent\n"); #undef p #undef py } /* * Pretty print an Internet address (net address + port). */ void inetprint(struct in_addr *in, int port, const char *proto, int num_port) { struct servent *sp = 0; char line[80], *cp; int width; if (Wflag) sprintf(line, "%s.", inetname(in)); else sprintf(line, "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in)); cp = index(line, '\0'); if (!num_port && port) sp = getservbyport((int)port, proto); if (sp || port == 0) sprintf(cp, "%.15s ", sp ? sp->s_name : "*"); else sprintf(cp, "%d ", ntohs((u_short)port)); width = (Aflag && !Wflag) ? 18 : 22; if (Wflag) printf("%-*s ", width, line); else printf("%-*.*s ", width, width, line); } /* * Construct an Internet address representation. * If numeric_addr has been supplied, give * numeric value, otherwise try for symbolic name. */ char * inetname(struct in_addr *inp) { char *cp; static char line[MAXHOSTNAMELEN]; struct hostent *hp; struct netent *np; cp = 0; if (!numeric_addr && inp->s_addr != INADDR_ANY) { int net = inet_netof(*inp); int lna = inet_lnaof(*inp); if (lna == INADDR_ANY) { np = getnetbyaddr(net, AF_INET); if (np) cp = np->n_name; } if (cp == 0) { hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET); if (hp) { cp = hp->h_name; trimdomain(cp, strlen(cp)); } } } if (inp->s_addr == INADDR_ANY) strcpy(line, "*"); else if (cp) { strlcpy(line, cp, sizeof(line)); } else { inp->s_addr = ntohl(inp->s_addr); #define C(x) ((u_int)((x) & 0xff)) sprintf(line, "%u.%u.%u.%u", C(inp->s_addr >> 24), C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr)); } return (line); } Index: projects/binutils-2.17/usr.bin/procstat =================================================================== --- projects/binutils-2.17/usr.bin/procstat (revision 215829) +++ projects/binutils-2.17/usr.bin/procstat (revision 215830) Property changes on: projects/binutils-2.17/usr.bin/procstat ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.bin/procstat:r215709-215824 Index: projects/binutils-2.17/usr.sbin/kernbb/kernbb.8 =================================================================== --- projects/binutils-2.17/usr.sbin/kernbb/kernbb.8 (revision 215829) +++ projects/binutils-2.17/usr.sbin/kernbb/kernbb.8 (nonexistent) @@ -1,82 +0,0 @@ -.\" Copyright (c) 1983, 1991, 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 4. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd May 22, 1995 -.Dt KERNBB 8 -.Os -.Sh NAME -.Nm kernbb -.Nd generate a dump of the kernels basic-block profile buffers -.Sh SYNOPSIS -.Nm -.Sh DESCRIPTION -The -.Nm -utility is used to extract the basic-block profiling buffers of the running -kernel into the files needed for the -.Xr gcov 1 -tool. -.Pp -At least one source file in the running kernel must have been compiled -with the -.Fl Fl test-coverage -and -.Fl Fl profile-arcs -options. -.Pp -The output is stored in the filenames compiled into the kernel by -.Xr gcc 1 . -If the absolute pathname cannot be written to, the directory part -of the filename is discarded and the file stored in the current -directory under its basename. -.Pp -The output files are named -.Pa *.da , -and the -.Xr gcov 1 -program will extract the counts and merge them with the source -file to show actual execution counts. -.Sh FILES -.Bl -tag -width /boot/kernel/kernel -compact -.It Pa /boot/kernel/kernel -the default system -.It Pa /dev/kmem -the default memory -.El -.Sh SEE ALSO -.Xr cc 1 , -.Xr gcov 1 -.Sh AUTHORS -The -.Nm -utility was written by -.An Poul-Henning Kamp , -along with the kernel-support. -.Sh BUGS -There are far too much magic and internal knowledge from GCC in this. Property changes on: projects/binutils-2.17/usr.sbin/kernbb/kernbb.8 ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/binutils-2.17/usr.sbin/kernbb/kernbb.c =================================================================== --- projects/binutils-2.17/usr.sbin/kernbb/kernbb.c (revision 215829) +++ projects/binutils-2.17/usr.sbin/kernbb/kernbb.c (nonexistent) @@ -1,145 +0,0 @@ -/* - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp - * ---------------------------------------------------------------------------- - * - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef long long gcov_type; - -#define PARAMS(foo) foo -#define ATTRIBUTE_UNUSED __unused -#include "gcov-io.h" - -struct bbf { - long checksum; - int arc_count; - u_long name; -}; - -struct bb { - u_long zero_one; - u_long filename; - u_long counts; - u_long ncounts; - u_long next; - u_long sizeof_bb; - u_long funcs; -}; - -struct nlist namelist[] = { - { "bbhead", 0, 0, 0, 0 }, - { NULL, 0, 0, 0, 0 } -}; - -kvm_t *kv; - -int -main(int argc __unused, char **argv __unused) -{ - int i, funcs; - u_long l1,l2,l4; - struct bb bb; - struct bbf bbf; - char buf[BUFSIZ], *p; - gcov_type *q, *qr; - - FILE *f; - - kv = kvm_open(NULL,NULL,NULL,O_RDWR,"dnc"); - if (!kv) - err(1,"kvm_open"); - i = kvm_nlist(kv,namelist); - if (i) - err(1,"kvm_nlist"); - - l1 = namelist[0].n_value; - kvm_read(kv,l1,&l2,sizeof l2); - while(l2) { - l1 += sizeof l1; - kvm_read(kv,l2,&bb,sizeof bb); -#if 0 -printf("%lx\n%lx\n%lx\n%lx\n%lx\n%lx\n%lx\n", - bb.zero_one, bb.filename, bb.counts, bb.ncounts, bb.next, - bb.sizeof_bb, bb.funcs); -#endif - - funcs = 0; - for (l4 = bb.funcs; ; l4 += sizeof (bbf)) { - kvm_read(kv, l4, &bbf, sizeof(bbf)); - if (bbf.arc_count == -1) - break; - funcs++; - } - - l2 = bb.next; - - kvm_read(kv, bb.filename, buf, sizeof(buf)); - p = buf; - f = fopen(p, "w"); - if (f != NULL) { - printf("Writing \"%s\"\n", p); - } else { - p = strrchr(buf, '/'); - if (p == NULL) - p = buf; - else - p++; - printf("Writing \"%s\" (spec \"%s\")\n", p, buf); - f = fopen(p, "w"); - } - if (f == NULL) - err(1,"%s", p); - __write_long(-123, f, 4); - - __write_long(funcs, f, 4); - - __write_long(4 + 8 + 8 + 4 + 8 + 8, f, 4); - - __write_long(bb.ncounts, f, 4); - __write_long(0, f, 8); - __write_long(0, f, 8); - - __write_long(bb.ncounts, f, 4); - __write_long(0, f, 8); - __write_long(0, f, 8); - - qr = malloc(bb.ncounts * 8); - kvm_read(kv, bb.counts, qr, bb.ncounts * 8); - q = qr; - for (l4 = bb.funcs; ; l4 += sizeof (bbf)) { - kvm_read(kv, l4, &bbf, sizeof(bbf)); - if (bbf.arc_count == -1) - break; - kvm_read(kv, bbf.name, buf, sizeof(buf)); - - __write_gcov_string(buf, strlen(buf), f, -1); - - __write_long(bbf.checksum, f, 4); - __write_long(bbf.arc_count, f, 4); - for (i = 0; i < bbf.arc_count; i++) { - __write_gcov_type(*q, f, 8); - q++; - } - } - fclose(f); - free(qr); - } - return 0; -} Property changes on: projects/binutils-2.17/usr.sbin/kernbb/kernbb.c ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/binutils-2.17/usr.sbin/kernbb/Makefile =================================================================== --- projects/binutils-2.17/usr.sbin/kernbb/Makefile (revision 215829) +++ projects/binutils-2.17/usr.sbin/kernbb/Makefile (nonexistent) @@ -1,12 +0,0 @@ -# $FreeBSD$ - -PROG= kernbb -MAN= kernbb.8 - -DPADD= ${LIBKVM} -LDADD= -lkvm - -CFLAGS+= -I${.CURDIR}/../../contrib/gcc - -.include - Property changes on: projects/binutils-2.17/usr.sbin/kernbb/Makefile ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/binutils-2.17/usr.sbin/iostat/iostat.8 =================================================================== --- projects/binutils-2.17/usr.sbin/iostat/iostat.8 (revision 215829) +++ projects/binutils-2.17/usr.sbin/iostat/iostat.8 (revision 215830) @@ -1,479 +1,493 @@ .\" .\" Copyright (c) 1997 Kenneth D. Merry. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. The name of the author may not be used to endorse or promote products .\" derived from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .\" Copyright (c) 1985, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by the University of .\" California, Berkeley and its contributors. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)iostat.8 8.1 (Berkeley) 6/6/93 .\" -.Dd April 17, 2006 +.Dd November 24, 2010 .Dt IOSTAT 8 .Os .Sh NAME .Nm iostat .Nd report .Tn I/O statistics .Sh SYNOPSIS .Nm .Op Fl CdhIKoTxz?\& .Op Fl c Ar count .Op Fl M Ar core .Op Fl n Ar devs .Op Fl N Ar system .Oo .Fl t .Sm off .Ar type , if , pass .Sm on .Oc .Op Fl w Ar wait .Op Ar drives .Sh DESCRIPTION The .Nm utility displays kernel .Tn I/O statistics on terminal, device and cpu operations. The first statistics that are printed are averaged over the system uptime. To get information about the current activity, a suitable wait time should be specified, so that the subsequent sets of printed statistics will be averaged over that time. .Pp The options are as follows: .Bl -tag -width flag .It Fl c Repeat the display .Ar count times. If no repeat .Ar count -is specified, the default is infinity. +is specified, the default depends on whether +.Fl w +is specified. +With +.Fl w +the default repeat count is infinity, otherwise it is 1. .It Fl C Display CPU statistics. This is on by default, unless .Fl d +or +.Fl x is specified. .It Fl d Display only device statistics. If this flag is turned on, only device statistics will be displayed, unless .Fl C or .Fl T is also specified to enable the display of CPU or TTY statistics. .It Fl h Put .Nm in .Sq top mode. In this mode, .Nm will show devices in order from highest to lowest bytes per measurement cycle. .It Fl I Display total statistics for a given time period, rather than average statistics for each second during that time period. .It Fl K In the blocks transferred display (-o), display block count in kilobytes rather then the device native block size. .It Fl M Extract values associated with the name list from the specified core instead of the default .Dq Pa /dev/kmem . .It Fl n Display up to .Ar devs number of devices. The .Nm utility will display fewer devices if there are not .Ar devs devices present. .It Fl N Extract the name list from the specified system instead of the default .Dq Pa /boot/kernel/kernel . .It Fl o Display old-style .Nm device statistics. Sectors per second, transfers per second, and milliseconds per seek are displayed. If .Fl I is specified, total blocks/sectors, total transfers, and milliseconds per seek are displayed. .It Fl t Specify which types of devices to display. There are three different categories of devices: .Pp .Bl -tag -width indent -compact .It device type: .Bl -tag -width 9n -compact .It da Direct Access devices .It sa Sequential Access devices .It printer Printers .It proc Processor devices .It worm Write Once Read Multiple devices .It cd CD devices .It scanner Scanner devices .It optical Optical Memory devices .It changer Medium Changer devices .It comm Communication devices .It array Storage Array devices .It enclosure Enclosure Services devices .It floppy Floppy devices .El .Pp .It interface: .Bl -tag -width 9n -compact .It IDE Integrated Drive Electronics devices .It SCSI Small Computer System Interface devices .It other Any other device interface .El .Pp .It passthrough: .Bl -tag -width 9n -compact .It pass Passthrough devices .El .El .Pp The user must specify at least one device type, and may specify at most one device type from each category. Multiple device types in a single device type statement must be separated by commas. .Pp Any number of .Fl t arguments may be specified on the command line. All .Fl t arguments are ORed together to form a matching expression against which all devices in the system are compared. Any device that fully matches any .Fl t argument will be included in the .Nm output, up to the number of devices that can be displayed in 80 columns, or the maximum number of devices specified by the user. .It Fl T Display TTY statistics. This is on by default, unless .Fl d +or +.Fl x is specified. .It Fl w Pause .Ar wait seconds between each display. If no .Ar wait interval is specified, the default is 1 second. .Pp The .Nm command will accept and honor a non-integer number of seconds. Note that the interval only has millisecond granularity. Finer values will be truncated. E.g., .Dq Li -w1.0001 is the same as .Dq Li -w1.000 . The interval will also suffer from modifications to .Va kern.hz so your mileage may vary. .It Fl x Show extended disk statistics. Each disk is displayed on a line of its own with all available statistics. +If this flag is turned on, only disk statistics will be displayed, unless +.Fl C +or +.Fl T +is also specified to enable the display of CPU or TTY statistics. .It Fl z If .Fl x is specified, omit lines for devices with no activity. .It Fl ?\& Display a usage statement and exit. .El .Pp The .Nm utility displays its information in the following format: .Bl -tag -width flag .It tty .Bl -tag -width indent -compact .It tin characters read from terminals .It tout characters written to terminals .El .It devices Device operations. The header of the field is the device name and unit number. The .Nm utility will display as many devices as will fit in a standard 80 column screen, or the maximum number of devices in the system, whichever is smaller. If .Fl n is specified on the command line, .Nm will display the smaller of the requested number of devices, and the maximum number of devices in the system. To force .Nm to display specific drives, their names may be supplied on the command line. The .Nm utility will not display more devices than will fit in an 80 column screen, unless the .Fl n argument is given on the command line to specify a maximum number of devices to display. If fewer devices are specified on the command line than will fit in an 80 column screen, .Nm will show only the specified devices. .Pp The standard .Nm device display shows the following statistics: .Pp .Bl -tag -width indent -compact .It KB/t kilobytes per transfer .It tps transfers per second .It MB/s megabytes per second .El .Pp The standard .Nm device display, with the .Fl I flag specified, shows the following statistics: .Pp .Bl -tag -width indent -compact .It KB/t kilobytes per transfer .It xfrs total number of transfers .It MB total number of megabytes transferred .El .Pp The extended .Nm device display, with the .Fl x flag specified, shows the following statistics: .Pp .Bl -tag -width indent -compact .It r/s read operations per second .It w/s write operations per second .It kr/s kilobytes read per second .It kw/s kilobytes write per second .It qlen transactions queue length .It svc_t average duration of transactions, in milliseconds .It %b % of time the device had one or more outstanding transactions .El .Pp The old-style .Nm display (using .Fl o ) shows the following statistics: .Pp .Bl -tag -width indent -compact .It sps sectors transferred per second .It tps transfers per second .It msps average milliseconds per transaction .El .Pp The old-style .Nm display, with the .Fl I flag specified, shows the following statistics: .Pp .Bl -tag -width indent -compact .It blk total blocks/sectors transferred .It xfr total transfers .It msps average milliseconds per transaction .El .It cpu .Bl -tag -width indent -compact .It \&us % of cpu time in user mode .It \&ni % of cpu time in user mode running niced processes .It \&sy % of cpu time in system mode .It \&in % of cpu time in interrupt mode .It \&id % of cpu time in idle mode .El .El .Sh FILES .Bl -tag -width /boot/kernel/kernel -compact .It Pa /boot/kernel/kernel Default kernel namelist. .It Pa /dev/kmem Default memory file. .El .Sh EXAMPLES .Dl iostat -w 1 da0 da1 cd0 .Pp Display statistics for the first two Direct Access devices and the first CDROM device every second ad infinitum. .Pp .Dl iostat -c 2 .Pp Display the statistics for the first four devices in the system twice, with a one second display interval. .Pp .Dl iostat -t da -t cd -w 1 .Pp Display statistics for all CDROM and Direct Access devices every second ad infinitum. .Pp .Dl iostat -t da,scsi,pass -t cd,scsi,pass .Pp Display statistics once for all SCSI passthrough devices that provide access to either Direct Access or CDROM devices. .Pp .Dl iostat -h -n 8 -w 1 .Pp Display up to 8 devices with the most I/O every second ad infinitum. .Pp .Dl iostat -dh -t da -w 1 .Pp Omit the TTY and CPU displays, show devices in order of performance and show only Direct Access devices every second ad infinitum. .Pp .Dl iostat -Iw 3 .Pp Display total statistics every three seconds ad infinitum. .Pp .Dl iostat -odICTw 2 -c 9 .Pp Display total statistics using the old-style output format 9 times, with a two second interval between each measurement/display. The .Fl d flag generally disables the TTY and CPU displays, but since the .Fl T and .Fl C flags are given, the TTY and CPU displays will be displayed. .Sh SEE ALSO .Xr fstat 1 , .Xr netstat 1 , .Xr nfsstat 1 , .Xr ps 1 , .Xr systat 1 , .Xr devstat 3 , .Xr gstat 8 , .Xr pstat 8 , .Xr vmstat 8 .Pp The sections starting with ``Interpreting system activity'' in .%T "Installing and Operating 4.3BSD" . .Sh HISTORY This version of .Nm first appeared in .Fx 3.0 . .Sh AUTHORS .An Kenneth Merry Aq ken@FreeBSD.org .Sh BUGS The use of .Nm as a debugging tool for crash dumps is probably limited because there is currently no way to get statistics that only cover the time immediately before the crash. Index: projects/binutils-2.17/usr.sbin/usbdump/usbdump.c =================================================================== --- projects/binutils-2.17/usr.sbin/usbdump/usbdump.c (revision 215829) +++ projects/binutils-2.17/usr.sbin/usbdump/usbdump.c (revision 215830) @@ -1,542 +1,544 @@ /*- * Copyright (c) 2010 Weongyo Jeong * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. * * $FreeBSD$ */ #include #include #include +#include #include #include +#include +#include #include #include #include #include #include #include #include #include #include #include #include #include struct usbcap { int fd; /* fd for /dev/usbpf */ u_int bufsize; char *buffer; /* for -w option */ int wfd; /* for -r option */ int rfd; }; struct usbcap_filehdr { u_int magic; #define USBCAP_FILEHDR_MAGIC 0x9a90000e u_char major; u_char minor; u_char reserved[26]; } __packed; static int doexit = 0; static int pkt_captured = 0; static int verbose = 0; static const char *i_arg = "usbus0";; static const char *r_arg = NULL; static const char *w_arg = NULL; static const char *errstr_table[USB_ERR_MAX] = { [USB_ERR_NORMAL_COMPLETION] = "NORMAL_COMPLETION", [USB_ERR_PENDING_REQUESTS] = "PENDING_REQUESTS", [USB_ERR_NOT_STARTED] = "NOT_STARTED", [USB_ERR_INVAL] = "INVAL", [USB_ERR_NOMEM] = "NOMEM", [USB_ERR_CANCELLED] = "CANCELLED", [USB_ERR_BAD_ADDRESS] = "BAD_ADDRESS", [USB_ERR_BAD_BUFSIZE] = "BAD_BUFSIZE", [USB_ERR_BAD_FLAG] = "BAD_FLAG", [USB_ERR_NO_CALLBACK] = "NO_CALLBACK", [USB_ERR_IN_USE] = "IN_USE", [USB_ERR_NO_ADDR] = "NO_ADDR", [USB_ERR_NO_PIPE] = "NO_PIPE", [USB_ERR_ZERO_NFRAMES] = "ZERO_NFRAMES", [USB_ERR_ZERO_MAXP] = "ZERO_MAXP", [USB_ERR_SET_ADDR_FAILED] = "SET_ADDR_FAILED", [USB_ERR_NO_POWER] = "NO_POWER", [USB_ERR_TOO_DEEP] = "TOO_DEEP", [USB_ERR_IOERROR] = "IOERROR", [USB_ERR_NOT_CONFIGURED] = "NOT_CONFIGURED", [USB_ERR_TIMEOUT] = "TIMEOUT", [USB_ERR_SHORT_XFER] = "SHORT_XFER", [USB_ERR_STALLED] = "STALLED", [USB_ERR_INTERRUPTED] = "INTERRUPTED", [USB_ERR_DMA_LOAD_FAILED] = "DMA_LOAD_FAILED", [USB_ERR_BAD_CONTEXT] = "BAD_CONTEXT", [USB_ERR_NO_ROOT_HUB] = "NO_ROOT_HUB", [USB_ERR_NO_INTR_THREAD] = "NO_INTR_THREAD", [USB_ERR_NOT_LOCKED] = "NOT_LOCKED", }; static const char *xfertype_table[] = { [UE_CONTROL] = "CTRL", [UE_ISOCHRONOUS] = "ISOC", [UE_BULK] = "BULK", [UE_INTERRUPT] = "INTR" }; static void handle_sigint(int sig) { (void)sig; doexit = 1; } static void print_flags(u_int32_t flags) { #define PRINTFLAGS(name) \ if ((flags & USBPF_FLAG_##name) != 0) \ printf("%s ", #name); printf(" flags %#x", flags); printf(" < "); PRINTFLAGS(FORCE_SHORT_XFER); PRINTFLAGS(SHORT_XFER_OK); PRINTFLAGS(SHORT_FRAMES_OK); PRINTFLAGS(PIPE_BOF); PRINTFLAGS(PROXY_BUFFER); PRINTFLAGS(EXT_BUFFER); PRINTFLAGS(MANUAL_STATUS); PRINTFLAGS(NO_PIPE_OK); PRINTFLAGS(STALL_PIPE); printf(">\n"); #undef PRINTFLAGS } static void print_status(u_int32_t status) { #define PRINTSTATUS(name) \ if ((status & USBPF_STATUS_##name) != 0) \ printf("%s ", #name); printf(" status %#x", status); printf(" < "); PRINTSTATUS(OPEN); PRINTSTATUS(TRANSFERRING); PRINTSTATUS(DID_DMA_DELAY); PRINTSTATUS(DID_CLOSE); PRINTSTATUS(DRAINING); PRINTSTATUS(STARTED); PRINTSTATUS(BW_RECLAIMED); PRINTSTATUS(CONTROL_XFR); PRINTSTATUS(CONTROL_HDR); PRINTSTATUS(CONTROL_ACT); PRINTSTATUS(CONTROL_STALL); PRINTSTATUS(SHORT_FRAMES_OK); PRINTSTATUS(SHORT_XFER_OK); #if USB_HAVE_BUSDMA PRINTSTATUS(BDMA_ENABLE); PRINTSTATUS(BDMA_NO_POST_SYNC); PRINTSTATUS(BDMA_SETUP); #endif PRINTSTATUS(ISOCHRONOUS_XFR); PRINTSTATUS(CURR_DMA_SET); PRINTSTATUS(CAN_CANCEL_IMMED); PRINTSTATUS(DOING_CALLBACK); printf(">\n"); #undef PRINTSTATUS } /* * Display a region in traditional hexdump format. */ static void hexdump(const char *region, size_t len) { const char *line; int x, c; char lbuf[80]; #define EMIT(fmt, args...) do { \ sprintf(lbuf, fmt , ## args); \ printf("%s", lbuf); \ } while (0) for (line = region; line < (region + len); line += 16) { EMIT(" %04lx ", (long) (line - region)); for (x = 0; x < 16; x++) { if ((line + x) < (region + len)) EMIT("%02x ", *(const u_int8_t *)(line + x)); else EMIT("-- "); if (x == 7) EMIT(" "); } EMIT(" |"); for (x = 0; x < 16; x++) { if ((line + x) < (region + len)) { c = *(const u_int8_t *)(line + x); /* !isprint(c) */ if ((c < ' ') || (c > '~')) c = '.'; EMIT("%c", c); } else EMIT(" "); } EMIT("|\n"); } #undef EMIT } static void -print_apacket(const struct usbpf_xhdr *hdr, struct usbpf_pkthdr *up, +print_apacket(const struct bpf_xhdr *hdr, struct usbpf_pkthdr *up, const char *payload) { struct tm *tm; struct timeval tv; size_t len; u_int32_t framelen, x; const char *ptr = payload; char buf[64]; /* A packet from the kernel is based on little endian byte order. */ up->up_busunit = le32toh(up->up_busunit); up->up_flags = le32toh(up->up_flags); up->up_status = le32toh(up->up_status); up->up_length = le32toh(up->up_length); up->up_frames = le32toh(up->up_frames); up->up_error = le32toh(up->up_error); up->up_interval = le32toh(up->up_interval); - tv.tv_sec = hdr->uh_tstamp.ut_sec; - tv.tv_usec = hdr->uh_tstamp.ut_frac; + tv.tv_sec = hdr->bh_tstamp.bt_sec; + tv.tv_usec = hdr->bh_tstamp.bt_frac; tm = localtime(&tv.tv_sec); len = strftime(buf, sizeof(buf), "%H:%M:%S", tm); printf("%.*s.%06ju", (int)len, buf, tv.tv_usec); printf(" usbus%d.%d 0x%02x %s %s", up->up_busunit, up->up_address, up->up_endpoint, xfertype_table[up->up_xfertype], up->up_type == USBPF_XFERTAP_SUBMIT ? ">" : "<"); printf(" (%d/%d)", up->up_frames, up->up_length); if (up->up_type == USBPF_XFERTAP_DONE) printf(" %s", errstr_table[up->up_error]); if (up->up_xfertype == UE_BULK || up->up_xfertype == UE_ISOCHRONOUS) printf(" %d", up->up_interval); printf("\n"); if (verbose >= 1) { for (x = 0; x < up->up_frames; x++) { framelen = le32toh(*((const u_int32_t *)ptr)); ptr += sizeof(u_int32_t); printf(" frame[%u] len %d\n", x, framelen); assert(framelen < (1024 * 4)); hexdump(ptr, framelen); ptr += framelen; } } if (verbose >= 2) { print_flags(up->up_flags); print_status(up->up_status); } } - static void print_packets(char *data, const int datalen) { struct usbpf_pkthdr *up; - const struct usbpf_xhdr *hdr; + const struct bpf_xhdr *hdr; u_int32_t framelen, x; char *ptr, *next; for (ptr = data; ptr < (data + datalen); ptr = next) { - hdr = (const struct usbpf_xhdr *)ptr; - up = (struct usbpf_pkthdr *)(ptr + hdr->uh_hdrlen); - next = ptr + USBPF_WORDALIGN(hdr->uh_hdrlen + hdr->uh_caplen); + hdr = (const struct bpf_xhdr *)ptr; + up = (struct usbpf_pkthdr *)(ptr + hdr->bh_hdrlen); + next = ptr + BPF_WORDALIGN(hdr->bh_hdrlen + hdr->bh_caplen); ptr = ((char *)up) + sizeof(struct usbpf_pkthdr); if (w_arg == NULL) print_apacket(hdr, up, ptr); pkt_captured++; for (x = 0; x < up->up_frames; x++) { framelen = le32toh(*((const u_int32_t *)ptr)); ptr += sizeof(u_int32_t) + framelen; } } } static void write_packets(struct usbcap *p, const char *data, const int datalen) { int len = htole32(datalen), ret; ret = write(p->wfd, &len, sizeof(int)); assert(ret == sizeof(int)); ret = write(p->wfd, data, datalen); assert(ret == datalen); } static void read_file(struct usbcap *p) { int datalen, ret; char *data; while ((ret = read(p->rfd, &datalen, sizeof(int))) == sizeof(int)) { datalen = le32toh(datalen); data = malloc(datalen); assert(data != NULL); ret = read(p->rfd, data, datalen); assert(ret == datalen); print_packets(data, datalen); free(data); } if (ret == -1) fprintf(stderr, "read: %s\n", strerror(errno)); } static void do_loop(struct usbcap *p) { int cc; while (doexit == 0) { cc = read(p->fd, (char *)p->buffer, p->bufsize); if (cc < 0) { switch (errno) { case EINTR: break; default: fprintf(stderr, "read: %s\n", strerror(errno)); return; } continue; } if (cc == 0) continue; if (w_arg != NULL) write_packets(p, p->buffer, cc); print_packets(p->buffer, cc); } } static void init_rfile(struct usbcap *p) { struct usbcap_filehdr uf; int ret; p->rfd = open(r_arg, O_RDONLY); if (p->rfd < 0) { fprintf(stderr, "open: %s (%s)\n", r_arg, strerror(errno)); exit(EXIT_FAILURE); } ret = read(p->rfd, &uf, sizeof(uf)); assert(ret == sizeof(uf)); assert(le32toh(uf.magic) == USBCAP_FILEHDR_MAGIC); assert(uf.major == 0); assert(uf.minor == 1); } static void init_wfile(struct usbcap *p) { struct usbcap_filehdr uf; int ret; p->wfd = open(w_arg, O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR); if (p->wfd < 0) { fprintf(stderr, "open: %s (%s)\n", w_arg, strerror(errno)); exit(EXIT_FAILURE); } bzero(&uf, sizeof(uf)); uf.magic = htole32(USBCAP_FILEHDR_MAGIC); uf.major = 0; uf.minor = 1; ret = write(p->wfd, (const void *)&uf, sizeof(uf)); assert(ret == sizeof(uf)); } static void usage(void) { #define FMT " %-14s %s\n" fprintf(stderr, "usage: usbdump [options]\n"); fprintf(stderr, FMT, "-i ifname", "Listen on USB bus interface"); fprintf(stderr, FMT, "-r file", "Read the raw packets from file"); fprintf(stderr, FMT, "-s snaplen", "Snapshot bytes from each packet"); fprintf(stderr, FMT, "-v", "Increases the verbose level"); fprintf(stderr, FMT, "-w file", "Write the raw packets to file"); #undef FMT exit(1); } int main(int argc, char *argv[]) { struct timeval tv; - struct usbpf_insn total_insn; - struct usbpf_program total_prog; - struct usbpf_stat us; - struct usbpf_version uv; + struct bpf_insn total_insn; + struct bpf_program total_prog; + struct bpf_stat us; + struct bpf_version bv; struct usbcap uc, *p = &uc; - struct usbpf_ifreq ufr; + struct ifreq ifr; long snapshot = 192; u_int v; int fd, o; const char *optstring; bzero(&uc, sizeof(struct usbcap)); optstring = "i:r:s:vw:"; while ((o = getopt(argc, argv, optstring)) != -1) { switch (o) { case 'i': i_arg = optarg; break; case 'r': r_arg = optarg; init_rfile(p); break; case 's': snapshot = strtol(optarg, NULL, 10); errno = 0; if (snapshot == 0 && errno == EINVAL) usage(); /* snapeshot == 0 is special */ if (snapshot == 0) snapshot = -1; break; case 'v': verbose++; break; case 'w': w_arg = optarg; init_wfile(p); break; default: usage(); /* NOTREACHED */ } } if (r_arg != NULL) { read_file(p); exit(EXIT_SUCCESS); } - p->fd = fd = open("/dev/usbpf", O_RDONLY); + p->fd = fd = open("/dev/bpf", O_RDONLY); if (p->fd < 0) { fprintf(stderr, "(no devices found)\n"); return (EXIT_FAILURE); } - if (ioctl(fd, UIOCVERSION, (caddr_t)&uv) < 0) { - fprintf(stderr, "UIOCVERSION: %s\n", strerror(errno)); + if (ioctl(fd, BIOCVERSION, (caddr_t)&bv) < 0) { + fprintf(stderr, "BIOCVERSION: %s\n", strerror(errno)); return (EXIT_FAILURE); } - if (uv.uv_major != USBPF_MAJOR_VERSION || - uv.uv_minor < USBPF_MINOR_VERSION) { + if (bv.bv_major != BPF_MAJOR_VERSION || + bv.bv_minor < BPF_MINOR_VERSION) { fprintf(stderr, "kernel bpf filter out of date"); return (EXIT_FAILURE); } - if ((ioctl(fd, UIOCGBLEN, (caddr_t)&v) < 0) || v < 65536) - v = 65536; + if ((ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) || v < 4096) + v = 4096; for ( ; v != 0; v >>= 1) { - (void)ioctl(fd, UIOCSBLEN, (caddr_t)&v); - (void)strncpy(ufr.ufr_name, i_arg, sizeof(ufr.ufr_name)); - if (ioctl(fd, UIOCSETIF, (caddr_t)&ufr) >= 0) + (void)ioctl(fd, BIOCSBLEN, (caddr_t)&v); + (void)strncpy(ifr.ifr_name, i_arg, sizeof(ifr.ifr_name)); + if (ioctl(fd, BIOCSETIF, (caddr_t)&ifr) >= 0) break; } if (v == 0) { - fprintf(stderr, "UIOCSBLEN: %s: No buffer size worked", i_arg); + fprintf(stderr, "BIOCSBLEN: %s: No buffer size worked", i_arg); return (EXIT_FAILURE); } - if (ioctl(fd, UIOCGBLEN, (caddr_t)&v) < 0) { - fprintf(stderr, "UIOCGBLEN: %s", strerror(errno)); + if (ioctl(fd, BIOCGBLEN, (caddr_t)&v) < 0) { + fprintf(stderr, "BIOCGBLEN: %s", strerror(errno)); return (EXIT_FAILURE); } p->bufsize = v; p->buffer = (u_char *)malloc(p->bufsize); if (p->buffer == NULL) { fprintf(stderr, "malloc: %s", strerror(errno)); return (EXIT_FAILURE); } /* XXX no read filter rules yet so at this moment accept everything */ - total_insn.code = (u_short)(USBPF_RET | USBPF_K); + total_insn.code = (u_short)(BPF_RET | BPF_K); total_insn.jt = 0; total_insn.jf = 0; total_insn.k = snapshot; - total_prog.uf_len = 1; - total_prog.uf_insns = &total_insn; - if (ioctl(p->fd, UIOCSETF, (caddr_t)&total_prog) < 0) { - fprintf(stderr, "UIOCSETF: %s", strerror(errno)); + total_prog.bf_len = 1; + total_prog.bf_insns = &total_insn; + if (ioctl(p->fd, BIOCSETF, (caddr_t)&total_prog) < 0) { + fprintf(stderr, "BIOCSETF: %s", strerror(errno)); return (EXIT_FAILURE); } /* 1 second read timeout */ tv.tv_sec = 1; tv.tv_usec = 0; - if (ioctl(p->fd, UIOCSRTIMEOUT, (caddr_t)&tv) < 0) { - fprintf(stderr, "UIOCSRTIMEOUT: %s", strerror(errno)); + if (ioctl(p->fd, BIOCSRTIMEOUT, (caddr_t)&tv) < 0) { + fprintf(stderr, "BIOCSRTIMEOUT: %s", strerror(errno)); return (EXIT_FAILURE); } (void)signal(SIGINT, handle_sigint); do_loop(p); - if (ioctl(fd, UIOCGSTATS, (caddr_t)&us) < 0) { - fprintf(stderr, "UIOCGSTATS: %s", strerror(errno)); + if (ioctl(fd, BIOCGSTATS, (caddr_t)&us) < 0) { + fprintf(stderr, "BIOCGSTATS: %s", strerror(errno)); return (EXIT_FAILURE); } /* XXX what's difference between pkt_captured and us.us_recv? */ printf("\n"); printf("%d packets captured\n", pkt_captured); - printf("%d packets received by filter\n", us.us_recv); - printf("%d packets dropped by kernel\n", us.us_drop); + printf("%d packets received by filter\n", us.bs_recv); + printf("%d packets dropped by kernel\n", us.bs_drop); if (p->fd > 0) close(p->fd); if (p->rfd > 0) close(p->rfd); if (p->wfd > 0) close(p->wfd); return (EXIT_SUCCESS); } Index: projects/binutils-2.17/usr.sbin/zic =================================================================== --- projects/binutils-2.17/usr.sbin/zic (revision 215829) +++ projects/binutils-2.17/usr.sbin/zic (revision 215830) Property changes on: projects/binutils-2.17/usr.sbin/zic ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.sbin/zic:r215709-215824 Index: projects/binutils-2.17 =================================================================== --- projects/binutils-2.17 (revision 215829) +++ projects/binutils-2.17 (revision 215830) Property changes on: projects/binutils-2.17 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r215709-215824