Index: projects/runtime-coverage/contrib/one-true-awk/lib.c =================================================================== --- projects/runtime-coverage/contrib/one-true-awk/lib.c (revision 323974) +++ projects/runtime-coverage/contrib/one-true-awk/lib.c (revision 323975) @@ -1,706 +1,707 @@ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the name Lucent Technologies or any of its entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ #define DEBUG #include #include #include #include #include #include #include "awk.h" #include "ytab.h" FILE *infile = NULL; char *file = ""; char *record; int recsize = RECSIZE; char *fields; int fieldssize = RECSIZE; Cell **fldtab; /* pointers to Cells */ char inputFS[100] = " "; #define MAXFLD 2 int nfields = MAXFLD; /* last allocated slot for $i */ int donefld; /* 1 = implies rec broken into fields */ int donerec; /* 1 = record is valid (no flds have changed) */ int lastfld = 0; /* last used field */ int argno = 1; /* current input argument number */ extern Awkfloat *ARGC; static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL || (fldtab = (Cell **) malloc((nfields+1) * sizeof(Cell *))) == NULL || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) FATAL("out of space for $0 and fields"); + *record = '\0'; *fldtab[0] = dollar0; fldtab[0]->sval = record; fldtab[0]->nval = tostring("0"); makefields(1, nfields); } void makefields(int n1, int n2) /* create $n1..$n2 inclusive */ { char temp[50]; int i; for (i = n1; i <= n2; i++) { fldtab[i] = (Cell *) malloc(sizeof (struct Cell)); if (fldtab[i] == NULL) FATAL("out of space in makefields %d", i); *fldtab[i] = dollar1; sprintf(temp, "%d", i); fldtab[i]->nval = tostring(temp); } } void initgetrec(void) { int i; char *p; for (i = 1; i < *ARGC; i++) { p = getargv(i); /* find 1st real filename */ if (p == NULL || *p == '\0') { /* deleted or zapped */ argno++; continue; } if (!isclvar(p)) { setsval(lookup("FILENAME", symtab), p); return; } setclvar(p); /* a commandline assignment before filename */ argno++; } infile = stdin; /* no filenames, so use stdin */ } static int firsttime = 1; int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ { /* note: cares whether buf == record */ int c; char *buf = *pbuf; uschar saveb0; int bufsize = *pbufsize, savebufsize = bufsize; if (firsttime) { firsttime = 0; initgetrec(); } dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", *RS, *FS, *ARGC, *FILENAME) ); if (isrecord) { donefld = 0; donerec = 1; } saveb0 = buf[0]; buf[0] = 0; while (argno < *ARGC || infile == stdin) { dprintf( ("argno=%d, file=|%s|\n", argno, file) ); if (infile == NULL) { /* have to open a new file */ file = getargv(argno); if (file == NULL || *file == '\0') { /* deleted or zapped */ argno++; continue; } if (isclvar(file)) { /* a var=value arg */ setclvar(file); argno++; continue; } *FILENAME = file; dprintf( ("opening file %s\n", file) ); if (*file == '-' && *(file+1) == '\0') infile = stdin; else if ((infile = fopen(file, "r")) == NULL) FATAL("can't open file %s", file); setfval(fnrloc, 0.0); } c = readrec(&buf, &bufsize, infile); if (c != 0 || buf[0] != '\0') { /* normal record */ if (isrecord) { if (freeable(fldtab[0])) xfree(fldtab[0]->sval); fldtab[0]->sval = buf; /* buf == record */ fldtab[0]->tval = REC | STR | DONTFREE; if (is_number(fldtab[0]->sval)) { fldtab[0]->fval = atof(fldtab[0]->sval); fldtab[0]->tval |= NUM; } } setfval(nrloc, nrloc->fval+1); setfval(fnrloc, fnrloc->fval+1); *pbuf = buf; *pbufsize = bufsize; return 1; } /* EOF arrived on this file; set up next */ if (infile != stdin) fclose(infile); infile = NULL; argno++; } buf[0] = saveb0; *pbuf = buf; *pbufsize = savebufsize; return 0; /* true end of file */ } void nextfile(void) { if (infile != NULL && infile != stdin) fclose(infile); infile = NULL; argno++; } int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ { int sep, c; char *rr, *buf = *pbuf; int bufsize = *pbufsize; if (strlen(*FS) >= sizeof(inputFS)) FATAL("field separator %.10s... is too long", *FS); /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ strcpy(inputFS, *FS); /* for subsequent field splitting */ if ((sep = **RS) == 0) { sep = '\n'; while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ ; if (c != EOF) ungetc(c, inf); } for (rr = buf; ; ) { for (; (c=getc(inf)) != sep && c != EOF; ) { if (rr-buf+1 > bufsize) if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) FATAL("input record `%.30s...' too long", buf); *rr++ = c; } if (**RS == sep || c == EOF) break; if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ break; if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) FATAL("input record `%.30s...' too long", buf); *rr++ = '\n'; *rr++ = c; } if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) FATAL("input record `%.30s...' too long", buf); *rr = 0; dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); *pbuf = buf; *pbufsize = bufsize; return c == EOF && rr == buf ? 0 : 1; } char *getargv(int n) /* get ARGV[n] */ { Cell *x; char *s, temp[50]; extern Array *ARGVtab; sprintf(temp, "%d", n); if (lookup(temp, ARGVtab) == NULL) return NULL; x = setsymtab(temp, "", 0.0, STR, ARGVtab); s = getsval(x); dprintf( ("getargv(%d) returns |%s|\n", n, s) ); return s; } void setclvar(char *s) /* set var=value from s */ { char *p; Cell *q; for (p=s; *p != '='; p++) ; *p++ = 0; p = qstring(p, '\0'); q = setsymtab(s, p, 0.0, STR, symtab); setsval(q, p); if (is_number(q->sval)) { q->fval = atof(q->sval); q->tval |= NUM; } dprintf( ("command line set %s to |%s|\n", s, p) ); } void fldbld(void) /* create fields from current record */ { /* this relies on having fields[] the same length as $0 */ /* the fields are all stored in this one array with \0's */ /* possibly with a final trailing \0 not associated with any field */ char *r, *fr, sep; Cell *p; int i, j, n; if (donefld) return; if (!isstr(fldtab[0])) getsval(fldtab[0]); r = fldtab[0]->sval; n = strlen(r); if (n > fieldssize) { xfree(fields); if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */ FATAL("out of space for fields in fldbld %d", n); fieldssize = n; } fr = fields; i = 0; /* number of fields accumulated here */ strcpy(inputFS, *FS); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); } else if ((sep = *inputFS) == ' ') { /* default whitespace */ for (i = 0; ; ) { while (*r == ' ' || *r == '\t' || *r == '\n') r++; if (*r == 0) break; i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); fldtab[i]->sval = fr; fldtab[i]->tval = FLD | STR | DONTFREE; do *fr++ = *r++; while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0'); *fr++ = 0; } *fr = 0; } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ for (i = 0; *r != 0; r++) { char buf[2]; i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); buf[0] = *r; buf[1] = 0; fldtab[i]->sval = tostring(buf); fldtab[i]->tval = FLD | STR; } *fr = 0; } else if (*r != 0) { /* if 0, it's a null field */ /* subtlecase : if length(FS) == 1 && length(RS > 0) * \n is NOT a field separator (cf awk book 61,84). * this variable is tested in the inner while loop. */ int rtest = '\n'; /* normal case */ if (strlen(*RS) > 0) rtest = '\0'; for (;;) { i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); fldtab[i]->sval = fr; fldtab[i]->tval = FLD | STR | DONTFREE; while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */ *fr++ = *r++; *fr++ = 0; if (*r++ == 0) break; } *fr = 0; } if (i > nfields) FATAL("record `%.30s...' has too many fields; can't happen", r); cleanfld(i+1, lastfld); /* clean out junk from previous record */ lastfld = i; donefld = 1; for (j = 1; j <= lastfld; j++) { p = fldtab[j]; if(is_number(p->sval)) { p->fval = atof(p->sval); p->tval |= NUM; } } setfval(nfloc, (Awkfloat) lastfld); if (dbg) { for (j = 0; j <= lastfld; j++) { p = fldtab[j]; printf("field %d (%s): |%s|\n", j, p->nval, p->sval); } } } void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */ { /* nvals remain intact */ Cell *p; int i; for (i = n1; i <= n2; i++) { p = fldtab[i]; if (freeable(p)) xfree(p->sval); p->sval = ""; p->tval = FLD | STR | DONTFREE; } } void newfld(int n) /* add field n after end of existing lastfld */ { if (n > nfields) growfldtab(n); cleanfld(lastfld+1, n); lastfld = n; setfval(nfloc, (Awkfloat) n); } Cell *fieldadr(int n) /* get nth field */ { if (n < 0) FATAL("trying to access out of range field %d", n); if (n > nfields) /* fields after NF are empty */ growfldtab(n); /* but does not increase NF */ return(fldtab[n]); } void growfldtab(int n) /* make new fields up to at least $n */ { int nf = 2 * nfields; size_t s; if (n > nf) nf = n; s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */ if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */ fldtab = (Cell **) realloc(fldtab, s); else /* overflow sizeof int */ xfree(fldtab); /* make it null */ if (fldtab == NULL) FATAL("out of space creating %d fields", nf); makefields(nfields+1, nf); nfields = nf; } int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */ { /* this relies on having fields[] the same length as $0 */ /* the fields are all stored in this one array with \0's */ char *fr; int i, tempstat, n; fa *pfa; n = strlen(rec); if (n > fieldssize) { xfree(fields); if ((fields = (char *) malloc(n+1)) == NULL) FATAL("out of space for fields in refldbld %d", n); fieldssize = n; } fr = fields; *fr = '\0'; if (*rec == '\0') return 0; pfa = makedfa(fs, 1); dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); tempstat = pfa->initstat; for (i = 1; ; i++) { if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); fldtab[i]->tval = FLD | STR | DONTFREE; fldtab[i]->sval = fr; dprintf( ("refldbld: i=%d\n", i) ); if (nematch(pfa, rec)) { pfa->initstat = 2; /* horrible coupling to b.c */ dprintf( ("match %s (%d chars)\n", patbeg, patlen) ); strncpy(fr, rec, patbeg-rec); fr += patbeg - rec + 1; *(fr-1) = '\0'; rec = patbeg + patlen; } else { dprintf( ("no match %s\n", rec) ); strcpy(fr, rec); pfa->initstat = tempstat; break; } } return i; } void recbld(void) /* create $0 from $1..$NF if necessary */ { int i; char *r, *p; if (donerec == 1) return; r = record; for (i = 1; i <= *NF; i++) { p = getsval(fldtab[i]); if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1")) FATAL("created $0 `%.30s...' too long", record); while ((*r = *p++) != 0) r++; if (i < *NF) { if (!adjbuf(&record, &recsize, 2+strlen(*OFS)+r-record, recsize, &r, "recbld 2")) FATAL("created $0 `%.30s...' too long", record); for (p = *OFS; (*r = *p++) != 0; ) r++; } } if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3")) FATAL("built giant record `%.30s...'", record); *r = '\0'; dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) ); if (freeable(fldtab[0])) xfree(fldtab[0]->sval); fldtab[0]->tval = REC | STR | DONTFREE; fldtab[0]->sval = record; dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) ); dprintf( ("recbld = |%s|\n", record) ); donerec = 1; } int errorflag = 0; void yyerror(const char *s) { SYNTAX("%s", s); } void SYNTAX(const char *fmt, ...) { extern char *cmdname, *curfname; static int been_here = 0; va_list varg; if (been_here++ > 2) return; fprintf(stderr, "%s: ", cmdname); va_start(varg, fmt); vfprintf(stderr, fmt, varg); va_end(varg); fprintf(stderr, " at source line %d", lineno); if (curfname != NULL) fprintf(stderr, " in function %s", curfname); if (compile_time == 1 && cursource() != NULL) fprintf(stderr, " source file %s", cursource()); fprintf(stderr, "\n"); errorflag = 2; eprint(); } void fpecatch(int n) { FATAL("floating point exception %d", n); } extern int bracecnt, brackcnt, parencnt; void bracecheck(void) { int c; static int beenhere = 0; if (beenhere++) return; while ((c = input()) != EOF && c != '\0') bclass(c); bcheck2(bracecnt, '{', '}'); bcheck2(brackcnt, '[', ']'); bcheck2(parencnt, '(', ')'); } void bcheck2(int n, int c1, int c2) { if (n == 1) fprintf(stderr, "\tmissing %c\n", c2); else if (n > 1) fprintf(stderr, "\t%d missing %c's\n", n, c2); else if (n == -1) fprintf(stderr, "\textra %c\n", c2); else if (n < -1) fprintf(stderr, "\t%d extra %c's\n", -n, c2); } void FATAL(const char *fmt, ...) { extern char *cmdname; va_list varg; fflush(stdout); fprintf(stderr, "%s: ", cmdname); va_start(varg, fmt); vfprintf(stderr, fmt, varg); va_end(varg); error(); if (dbg > 1) /* core dump if serious debugging on */ abort(); exit(2); } void WARNING(const char *fmt, ...) { extern char *cmdname; va_list varg; fflush(stdout); fprintf(stderr, "%s: ", cmdname); va_start(varg, fmt); vfprintf(stderr, fmt, varg); va_end(varg); error(); } void error() { extern Node *curnode; fprintf(stderr, "\n"); if (compile_time != 2 && NR && *NR > 0) { fprintf(stderr, " input record number %d", (int) (*FNR)); if (strcmp(*FILENAME, "-") != 0) fprintf(stderr, ", file %s", *FILENAME); fprintf(stderr, "\n"); } if (compile_time != 2 && curnode) fprintf(stderr, " source line number %d", curnode->lineno); else if (compile_time != 2 && lineno) fprintf(stderr, " source line number %d", lineno); if (compile_time == 1 && cursource() != NULL) fprintf(stderr, " source file %s", cursource()); fprintf(stderr, "\n"); eprint(); } void eprint(void) /* try to print context around error */ { char *p, *q; int c; static int been_here = 0; extern char ebuf[], *ep; - if (compile_time == 2 || compile_time == 0 || been_here++ > 0) + if (compile_time == 2 || compile_time == 0 || been_here++ > 0 || ebuf == ep) return; p = ep - 1; if (p > ebuf && *p == '\n') p--; for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--) ; while (*p == '\n') p++; fprintf(stderr, " context is\n\t"); for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) ; for ( ; p < q; p++) if (*p) putc(*p, stderr); fprintf(stderr, " >>> "); for ( ; p < ep; p++) if (*p) putc(*p, stderr); fprintf(stderr, " <<< "); if (*ep) while ((c = input()) != '\n' && c != '\0' && c != EOF) { putc(c, stderr); bclass(c); } putc('\n', stderr); ep = ebuf; } void bclass(int c) { switch (c) { case '{': bracecnt++; break; case '}': bracecnt--; break; case '[': brackcnt++; break; case ']': brackcnt--; break; case '(': parencnt++; break; case ')': parencnt--; break; } } double errcheck(double x, const char *s) { if (errno == EDOM) { errno = 0; WARNING("%s argument out of domain", s); x = 1; } else if (errno == ERANGE) { errno = 0; WARNING("%s result out of range", s); x = 1; } return x; } int isclvar(const char *s) /* is s of form var=something ? */ { const char *os = s; if (!isalpha((uschar) *s) && *s != '_') return 0; for ( ; *s; s++) if (!(isalnum((uschar) *s) || *s == '_')) break; return *s == '=' && s > os && *(s+1) != '='; } /* strtod is supposed to be a proper test of what's a valid number */ /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */ /* wrong: violates 4.10.1.4 of ansi C standard */ #include int is_number(const char *s) { double r; char *ep; errno = 0; r = strtod(s, &ep); if (ep == s || r == HUGE_VAL || errno == ERANGE) return 0; while (*ep == ' ' || *ep == '\t' || *ep == '\n') ep++; if (*ep == '\0') return 1; else return 0; } Index: projects/runtime-coverage/contrib/one-true-awk/run.c =================================================================== --- projects/runtime-coverage/contrib/one-true-awk/run.c (revision 323974) +++ projects/runtime-coverage/contrib/one-true-awk/run.c (revision 323975) @@ -1,2033 +1,2033 @@ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the name Lucent Technologies or any of its entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ #include __FBSDID("$FreeBSD$"); #define DEBUG #include #include #include #include #include #include #include #include #include "awk.h" #include "ytab.h" #define tempfree(x) if (istemp(x)) tfree(x); else /* #undef tempfree void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", p->csub, p->ctype, p->sval); } if (istemp(p)) tfree(p); } */ /* do we really need these? */ /* #ifdef _NFILE */ /* #ifndef FOPEN_MAX */ /* #define FOPEN_MAX _NFILE */ /* #endif */ /* #endif */ /* */ /* #ifndef FOPEN_MAX */ /* #define FOPEN_MAX 40 */ /* max number of open files */ /* #endif */ /* */ /* #ifndef RAND_MAX */ /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ /* #endif */ jmp_buf env; extern int pairstack[]; extern Awkfloat srand_seed; Node *winner = NULL; /* root of parse tree */ Cell *tmps; /* free temporary cells for execution */ static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM }; Cell *True = &truecell; static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM }; Cell *False = &falsecell; static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM }; Cell *jbreak = &breakcell; static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM }; Cell *jcont = &contcell; static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM }; Cell *jnext = &nextcell; static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM }; Cell *jnextfile = &nextfilecell; static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM }; Cell *jexit = &exitcell; static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM }; Cell *jret = &retcell; static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE }; Node *curnode = NULL; /* the node being executed, for debugging */ /* buffer memory management */ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, const char *whatrtn) /* pbuf: address of pointer to buffer being managed * psiz: address of buffer size variable * minlen: minimum length of buffer needed * quantum: buffer size quantum * pbptr: address of movable pointer into buffer, or 0 if none * whatrtn: name of the calling routine if failure should cause fatal error * * return 0 for realloc failure, !=0 for success */ { if (minlen > *psiz) { char *tbuf; int rminlen = quantum ? minlen % quantum : 0; int boff = pbptr ? *pbptr - *pbuf : 0; /* round up to next multiple of quantum */ if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) ); if (tbuf == NULL) { if (whatrtn) FATAL("out of memory in %s", whatrtn); return 0; } *pbuf = tbuf; *psiz = minlen; if (pbptr) *pbptr = tbuf + boff; } return 1; } void run(Node *a) /* execution of parse tree starts here */ { extern void stdinit(void); stdinit(); execute(a); closeall(); } Cell *execute(Node *u) /* execute a node of the parse tree */ { Cell *(*proc)(Node **, int); Cell *x; Node *a; if (u == NULL) return(True); for (a = u; ; a = a->nnext) { curnode = a; if (isvalue(a)) { x = (Cell *) (a->narg[0]); if (isfld(x) && !donefld) fldbld(); else if (isrec(x) && !donerec) recbld(); return(x); } if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ FATAL("illegal statement"); proc = proctab[a->nobj-FIRSTTOKEN]; x = (*proc)(a->narg, a->nobj); if (isfld(x) && !donefld) fldbld(); else if (isrec(x) && !donerec) recbld(); if (isexpr(a)) return(x); if (isjump(x)) return(x); if (a->nnext == NULL) return(x); tempfree(x); } } Cell *program(Node **a, int n) /* execute an awk program */ { /* a[0] = BEGIN, a[1] = body, a[2] = END */ Cell *x; if (setjmp(env) != 0) goto ex; if (a[0]) { /* BEGIN */ x = execute(a[0]); if (isexit(x)) return(True); if (isjump(x)) FATAL("illegal break, continue, next or nextfile from BEGIN"); tempfree(x); } if (a[1] || a[2]) while (getrec(&record, &recsize, 1) > 0) { x = execute(a[1]); if (isexit(x)) break; tempfree(x); } ex: if (setjmp(env) != 0) /* handles exit within END */ goto ex1; if (a[2]) { /* END */ x = execute(a[2]); if (isbreak(x) || isnext(x) || iscont(x)) FATAL("illegal break, continue, next or nextfile from END"); tempfree(x); } ex1: return(True); } struct Frame { /* stack frame for awk function calls */ int nargs; /* number of arguments in this call */ Cell *fcncell; /* pointer to Cell for function */ Cell **args; /* pointer to array of arguments after execute */ Cell *retval; /* return value */ }; #define NARGS 50 /* max args in a call */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ struct Frame *fp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ Cell *y, *z, *fcn; char *s; fcn = execute(a[0]); /* the function itself */ s = fcn->nval; if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); y = execute(x); oargs[i] = y; dprintf( ("args[%d]: %s %f <%s>, t=%o\n", i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) ); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); if (isarr(y)) args[i] = y; /* arrays by ref */ else args[i] = copycell(y); tempfree(y); } for ( ; i < ndef; i++) { /* add null args for ones not provided */ args[i] = gettemp(); *args[i] = newcopycell; } fp++; /* now ok to up frame */ if (fp >= frame + nframe) { int dfp = fp - frame; /* old index */ frame = (struct Frame *) realloc((char *) frame, (nframe += 100) * sizeof(struct Frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); fp = frame + dfp; } fp->fcncell = fcn; fp->args = args; fp->nargs = ndef; /* number defined with (excess are locals) */ fp->retval = gettemp(); dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); y = execute((Node *)(fcn->sval)); /* execute body */ dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); for (i = 0; i < ndef; i++) { Cell *t = fp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { freesymtab(t); t->csub = CTEMP; tempfree(t); } else { oargs[i]->tval = t->tval; oargs[i]->tval &= ~(STR|NUM|DONTFREE); oargs[i]->sval = t->sval; tempfree(t); } } } else if (t != y) { /* kludge to prevent freeing twice */ t->csub = CTEMP; tempfree(t); } else if (t == y && t->csub == CCOPY) { t->csub = CTEMP; tempfree(t); freed = 1; } } tempfree(fcn); if (isexit(y) || isnext(y)) return y; if (freed == 0) { tempfree(y); /* don't free twice! */ } z = fp->retval; /* return value */ dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); fp--; return(z); } Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ { Cell *y; y = gettemp(); y->csub = CCOPY; /* prevents freeing until call is over */ y->nval = x->nval; /* BUG? */ if (isstr(x)) y->sval = tostring(x->sval); y->fval = x->fval; y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ /* is DONTFREE right? */ return y; } Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); if (n+1 > fp->nargs) FATAL("argument #%d of function %s was not supplied", n+1, fp->fcncell->nval); return fp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ { Cell *y; switch (n) { case EXIT: if (a[0] != NULL) { y = execute(a[0]); errorflag = (int) getfval(y); tempfree(y); } longjmp(env, 1); case RETURN: if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(fp->retval, getsval(y)); fp->retval->fval = getfval(y); fp->retval->tval |= NUM; } else if (y->tval & STR) setsval(fp->retval, getsval(y)); else if (y->tval & NUM) setfval(fp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); } return(jret); case NEXT: return(jnext); case NEXTFILE: nextfile(); return(jnextfile); case BREAK: return(jbreak); case CONTINUE: return(jcont); default: /* can't happen */ FATAL("illegal jump type %d", n); } return 0; /* not reached */ } Cell *awkgetline(Node **a, int n) /* get next line from specific input */ { /* a[0] is variable, a[1] is operator, a[2] is filename */ Cell *r, *x; extern Cell **fldtab; FILE *fp; char *buf; int bufsize = recsize; int mode; if ((buf = (char *) malloc(bufsize)) == NULL) FATAL("out of memory in getline"); fflush(stdout); /* in case someone is waiting for a prompt */ r = gettemp(); if (a[1] != NULL) { /* getline < file */ x = execute(a[2]); /* filename */ mode = ptoi(a[1]); if (mode == '|') /* input pipe */ mode = LE; /* arbitrary flag */ fp = openfile(mode, getsval(x)); tempfree(x); if (fp == NULL) n = -1; else n = readrec(&buf, &bufsize, fp); if (n <= 0) { ; } else if (a[0] != NULL) { /* getline var sval)) { fldtab[0]->fval = atof(fldtab[0]->sval); fldtab[0]->tval |= NUM; } } } else { /* bare getline; use current input */ if (a[0] == NULL) /* getline */ n = getrec(&record, &recsize, 1); else { /* getline var */ n = getrec(&buf, &bufsize, 0); x = execute(a[0]); setsval(x, buf); tempfree(x); } } setfval(r, (Awkfloat) n); free(buf); return r; } Cell *getnf(Node **a, int n) /* get NF */ { if (donefld == 0) fldbld(); return (Cell *) a[0]; } Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x, *y, *z; char *s; Node *np; char *buf; int bufsz = recsize; int nsub = strlen(*SUBSEP); if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in array"); x = execute(a[0]); /* Cell* for symbol table */ buf[0] = 0; for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) FATAL("out of memory for %s[%s...]", x->nval, buf); strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); tempfree(y); } if (!isarr(x)) { dprintf( ("making %s into an array\n", NN(x->nval)) ); if (freeable(x)) xfree(x->sval); x->tval &= ~(STR|NUM|DONTFREE); x->tval |= ARR; x->sval = (char *) makesymtab(NSYMTAB); } z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); z->ctype = OCELL; z->csub = CVAR; tempfree(x); free(buf); return(z); } Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { Cell *x, *y; Node *np; char *s; int nsub = strlen(*SUBSEP); x = execute(a[0]); /* Cell* for symbol table */ if (!isarr(x)) return True; if (a[1] == NULL) { /* delete the elements, not the table */ freesymtab(x); x->tval &= ~STR; x->tval |= ARR; x->sval = (char *) makesymtab(NSYMTAB); } else { int bufsz = recsize; char *buf; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in adelete"); buf[0] = 0; for (np = a[1]; np; np = np->nnext) { y = execute(np); /* subscript */ s = getsval(y); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); if (np->nnext) strcat(buf, *SUBSEP); tempfree(y); } freeelem(x, buf); free(buf); } tempfree(x); return True; } Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ { Cell *x, *ap, *k; Node *p; char *buf; char *s; int bufsz = recsize; int nsub = strlen(*SUBSEP); ap = execute(a[1]); /* array name */ if (!isarr(ap)) { dprintf( ("making %s into an array\n", ap->nval) ); if (freeable(ap)) xfree(ap->sval); ap->tval &= ~(STR|NUM|DONTFREE); ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); } if ((buf = (char *) malloc(bufsz)) == NULL) { FATAL("out of memory in intest"); } buf[0] = 0; for (p = a[0]; p; p = p->nnext) { x = execute(p); /* expr */ s = getsval(x); if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) FATAL("out of memory deleting %s[%s...]", x->nval, buf); strcat(buf, s); tempfree(x); if (p->nnext) strcat(buf, *SUBSEP); } k = lookup(buf, (Array *) ap->sval); tempfree(ap); free(buf); if (k == NULL) return(False); else return(True); } Cell *matchop(Node **a, int n) /* ~ and match() */ { Cell *x, *y; char *s, *t; int i; fa *pfa; int (*mf)(fa *, const char *) = match, mode = 0; if (n == MATCHFCN) { mf = pmatch; mode = 1; } x = execute(a[1]); /* a[1] = target text */ s = getsval(x); if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ i = (*mf)((fa *) a[2], s); else { y = execute(a[2]); /* a[2] = regular expr */ t = getsval(y); pfa = makedfa(t, mode); i = (*mf)(pfa, s); tempfree(y); } tempfree(x); if (n == MATCHFCN) { int start = patbeg - s + 1; if (patlen < 0) start = 0; setfval(rstartloc, (Awkfloat) start); setfval(rlengthloc, (Awkfloat) patlen); x = gettemp(); x->tval = NUM; x->fval = start; return x; } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) return(True); else return(False); } Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ { Cell *x, *y; int i; x = execute(a[0]); i = istrue(x); tempfree(x); switch (n) { case BOR: if (i) return(True); y = execute(a[1]); i = istrue(y); tempfree(y); if (i) return(True); else return(False); case AND: if ( !i ) return(False); y = execute(a[1]); i = istrue(y); tempfree(y); if (i) return(True); else return(False); case NOT: if (i) return(False); else return(True); default: /* can't happen */ FATAL("unknown boolean operator %d", n); } return 0; /*NOTREACHED*/ } Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ { int i; Cell *x, *y; Awkfloat j; x = execute(a[0]); y = execute(a[1]); if (x->tval&NUM && y->tval&NUM) { j = x->fval - y->fval; i = j<0? -1: (j>0? 1: 0); } else { i = strcoll(getsval(x), getsval(y)); } tempfree(x); tempfree(y); switch (n) { case LT: if (i<0) return(True); else return(False); case LE: if (i<=0) return(True); else return(False); case NE: if (i!=0) return(True); else return(False); case EQ: if (i == 0) return(True); else return(False); case GE: if (i>=0) return(True); else return(False); case GT: if (i>0) return(True); else return(False); default: /* can't happen */ FATAL("unknown relational operator %d", n); } return 0; /*NOTREACHED*/ } void tfree(Cell *a) /* free a tempcell */ { if (freeable(a)) { dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) ); xfree(a->sval); } if (a == tmps) FATAL("tempcell list is curdled"); a->cnext = tmps; tmps = a; } Cell *gettemp(void) /* get a tempcell */ { int i; Cell *x; if (!tmps) { tmps = (Cell *) calloc(100, sizeof(Cell)); if (!tmps) FATAL("out of space for temporaries"); for(i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; tmps[i-1].cnext = NULL; } x = tmps; tmps = x->cnext; *x = tempcell; return(x); } Cell *indirect(Node **a, int n) /* $( a[0] ) */ { Awkfloat val; Cell *x; int m; char *s; x = execute(a[0]); val = getfval(x); /* freebsd: defend against super large field numbers */ if ((Awkfloat)INT_MAX < val) FATAL("trying to access out of range field %s", x->nval); m = (int) val; if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ FATAL("illegal field $(%s), name \"%s\"", s, x->nval); /* BUG: can x->nval ever be null??? */ tempfree(x); x = fieldadr(m); x->ctype = OCELL; /* BUG? why are these needed? */ x->csub = CFLD; return(x); } Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ { int k, m, n; char *s; int temp; Cell *x, *y, *z = NULL; x = execute(a[0]); y = execute(a[1]); if (a[2] != NULL) z = execute(a[2]); s = getsval(x); k = strlen(s) + 1; if (k <= 1) { tempfree(x); tempfree(y); if (a[2] != NULL) { tempfree(z); } x = gettemp(); setsval(x, ""); return(x); } m = (int) getfval(y); if (m <= 0) m = 1; else if (m > k) m = k; tempfree(y); if (a[2] != NULL) { n = (int) getfval(z); tempfree(z); } else n = k - 1; if (n < 0) n = 0; else if (n > k - m) n = k - m; dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); y = gettemp(); temp = s[n+m-1]; /* with thanks to John Linderman */ s[n+m-1] = '\0'; setsval(y, s + m - 1); s[n+m-1] = temp; tempfree(x); return(y); } Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ { Cell *x, *y, *z; char *s1, *s2, *p1, *p2, *q; Awkfloat v = 0.0; x = execute(a[0]); s1 = getsval(x); y = execute(a[1]); s2 = getsval(y); z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) ; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; } } tempfree(x); tempfree(y); setfval(z, v); return(z); } #define MAXNUMSIZE 50 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ { char *fmt; char *p, *t; const char *os; Cell *x; int flag = 0, n; int fmtwd; /* format width */ int fmtsz = recsize; char *buf = *pbuf; int bufsize = *pbufsize; os = s; p = buf; if ((fmt = (char *) malloc(fmtsz)) == NULL) FATAL("out of memory in format()"); while (*s) { adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); if (*s != '%') { *p++ = *s++; continue; } if (*(s+1) == '%') { *p++ = '%'; s += 2; continue; } /* have to be real careful in case this is a huge number, eg, %100000d */ fmtwd = atoi(s+1); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); for (t = fmt; (*t++ = *s) != '\0'; s++) { if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) FATAL("format item %.30s... ran format() out of memory", os); if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L') break; /* the ansi panoply */ if (*s == '*') { x = execute(a); a = a->nnext; sprintf(t-1, "%d", fmtwd=(int) getfval(x)); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); t = fmt + strlen(fmt); tempfree(x); } } *t = '\0'; if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); switch (*s) { case 'f': case 'e': case 'g': case 'E': case 'G': flag = 'f'; break; case 'd': case 'i': flag = 'd'; if(*(s-1) == 'l') break; *(t-1) = 'l'; *t = 'd'; *++t = '\0'; break; case 'o': case 'x': case 'X': case 'u': flag = *(s-1) == 'l' ? 'd' : 'u'; break; case 's': flag = 's'; break; case 'c': flag = 'c'; break; default: WARNING("weird printf conversion %s", fmt); flag = '?'; break; } if (a == NULL) FATAL("not enough args in printf(%s)", os); x = execute(a); a = a->nnext; n = MAXNUMSIZE; if (fmtwd > n) n = fmtwd; adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); switch (flag) { case '?': sprintf(p, "%s", fmt); /* unknown, so dump it too */ t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); p += strlen(p); sprintf(p, "%s", t); break; case 'f': sprintf(p, fmt, getfval(x)); break; case 'd': sprintf(p, fmt, (long) getfval(x)); break; case 'u': sprintf(p, fmt, (int) getfval(x)); break; case 's': t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); sprintf(p, fmt, t); break; case 'c': if (isnum(x)) { - if (getfval(x)) + if ((int)getfval(x)) sprintf(p, fmt, (int) getfval(x)); else { *p++ = '\0'; /* explicit null byte */ *p = '\0'; /* next output will start here */ } } else sprintf(p, fmt, getsval(x)[0]); break; default: FATAL("can't happen: bad conversion %c in format()", flag); } tempfree(x); p += strlen(p); s++; } *p = '\0'; free(fmt); for ( ; a; a = a->nnext) /* evaluate any remaining args */ execute(a); *pbuf = buf; *pbufsize = bufsize; return p - buf; } Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ { Cell *x; Node *y; char *buf; int bufsz=3*recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in awksprintf"); y = a[0]->nnext; x = execute(a[0]); if (format(&buf, &bufsz, getsval(x), y) == -1) FATAL("sprintf string %.30s... too long. can't happen.", buf); tempfree(x); x = gettemp(); x->sval = buf; x->tval = STR; return(x); } Cell *awkprintf(Node **a, int n) /* printf */ { /* a[0] is list of args, starting with format string */ /* a[1] is redirection operator, a[2] is redirection file */ FILE *fp; Cell *x; Node *y; char *buf; int len; int bufsz=3*recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in awkprintf"); y = a[0]->nnext; x = execute(a[0]); if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) FATAL("printf string %.30s... too long. can't happen.", buf); tempfree(x); if (a[1] == NULL) { /* fputs(buf, stdout); */ fwrite(buf, len, 1, stdout); if (ferror(stdout)) FATAL("write error on stdout"); } else { fp = redirect(ptoi(a[1]), a[2]); /* fputs(buf, fp); */ fwrite(buf, len, 1, fp); fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); } free(buf); return(True); } Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ { Awkfloat i, j = 0; double v; Cell *x, *y, *z; x = execute(a[0]); i = getfval(x); tempfree(x); if (n != UMINUS) { y = execute(a[1]); j = getfval(y); tempfree(y); } z = gettemp(); switch (n) { case ADD: i += j; break; case MINUS: i -= j; break; case MULT: i *= j; break; case DIVIDE: if (j == 0) FATAL("division by zero"); i /= j; break; case MOD: if (j == 0) FATAL("division by zero in mod"); modf(i/j, &v); i = i - j * v; break; case UMINUS: i = -i; break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ i = ipow(i, (int) j); else i = errcheck(pow(i, j), "pow"); break; default: /* can't happen */ FATAL("illegal arithmetic operator %d", n); } setfval(z, i); return(z); } double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ { double v; if (n <= 0) return 1; v = ipow(x, n/2); if (n % 2 == 0) return v * v; else return x * v * v; } Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ { Cell *x, *z; int k; Awkfloat xf; x = execute(a[0]); xf = getfval(x); k = (n == PREINCR || n == POSTINCR) ? 1 : -1; if (n == PREINCR || n == PREDECR) { setfval(x, xf + k); return(x); } z = gettemp(); setfval(z, xf); setfval(x, xf + k); tempfree(x); return(z); } Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ { /* this is subtle; don't muck with it. */ Cell *x, *y; Awkfloat xf, yf; double v; y = execute(a[1]); x = execute(a[0]); if (n == ASSIGN) { /* ordinary assignment */ if (x == y && !(x->tval & (FLD|REC))) /* self-assignment: */ ; /* leave alone unless it's a field */ else if ((y->tval & (STR|NUM)) == (STR|NUM)) { setsval(x, getsval(y)); x->fval = getfval(y); x->tval |= NUM; } else if (isstr(y)) setsval(x, getsval(y)); else if (isnum(y)) setfval(x, getfval(y)); else funnyvar(y, "read value of"); tempfree(y); return(x); } xf = getfval(x); yf = getfval(y); switch (n) { case ADDEQ: xf += yf; break; case SUBEQ: xf -= yf; break; case MULTEQ: xf *= yf; break; case DIVEQ: if (yf == 0) FATAL("division by zero in /="); xf /= yf; break; case MODEQ: if (yf == 0) FATAL("division by zero in %%="); modf(xf/yf, &v); xf = xf - yf * v; break; case POWEQ: if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ xf = ipow(xf, (int) yf); else xf = errcheck(pow(xf, yf), "pow"); break; default: FATAL("illegal assignment operator %d", n); break; } tempfree(y); setfval(x, xf); return(x); } Cell *cat(Node **a, int q) /* a[0] cat a[1] */ { Cell *x, *y, *z; int n1, n2; char *s; x = execute(a[0]); y = execute(a[1]); getsval(x); getsval(y); n1 = strlen(x->sval); n2 = strlen(y->sval); s = (char *) malloc(n1 + n2 + 1); if (s == NULL) FATAL("out of space concatenating %.15s... and %.15s...", x->sval, y->sval); strcpy(s, x->sval); strcpy(s+n1, y->sval); tempfree(x); tempfree(y); z = gettemp(); z->sval = s; z->tval = STR; return(z); } Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ { Cell *x; if (a[0] == NULL) x = execute(a[1]); else { x = execute(a[0]); if (istrue(x)) { tempfree(x); x = execute(a[1]); } } return x; } Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ { Cell *x; int pair; pair = ptoi(a[3]); if (pairstack[pair] == 0) { x = execute(a[0]); if (istrue(x)) pairstack[pair] = 1; tempfree(x); } if (pairstack[pair] == 1) { x = execute(a[1]); if (istrue(x)) pairstack[pair] = 0; tempfree(x); x = execute(a[2]); return(x); } return(False); } Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { Cell *x = NULL, *y, *ap; char *s, *origs; int sep; char *t, temp, num[50], *fs = NULL; int n, tempstat, arg3type; y = execute(a[0]); /* source string */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); if (a[2] == NULL) /* fs string */ fs = *FS; else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); fs = getsval(x); } else if (arg3type == REGEXPR) fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ else FATAL("illegal type of split"); sep = *fs; ap = execute(a[1]); /* array name */ freesymtab(ap); dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) ); ap->tval &= ~STR; ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); n = 0; if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { /* split(s, a, //); have to arrange that it looks like empty sep */ arg3type = 0; fs = ""; sep = 0; } if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ fa *pfa; if (arg3type == REGEXPR) { /* it's ready already */ pfa = (fa *) a[2]; } else { pfa = makedfa(fs, 1); } if (nematch(pfa,s)) { tempstat = pfa->initstat; pfa->initstat = 2; do { n++; sprintf(num, "%d", n); temp = *patbeg; *patbeg = '\0'; if (is_number(s)) setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); *patbeg = temp; s = patbeg + patlen; if (*(patbeg+patlen-1) == 0 || *s == 0) { n++; sprintf(num, "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); pfa->initstat = tempstat; goto spdone; } } while (nematch(pfa,s)); pfa->initstat = tempstat; /* bwk: has to be here to reset */ /* cf gsub and refldbld */ } n++; sprintf(num, "%d", n); if (is_number(s)) setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); spdone: pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { while (*s == ' ' || *s == '\t' || *s == '\n') s++; if (*s == 0) break; n++; t = s; do s++; while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); temp = *s; *s = '\0'; sprintf(num, "%d", n); if (is_number(t)) setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); *s = temp; if (*s != 0) s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ for (n = 0; *s != 0; s++) { char buf[2]; n++; sprintf(num, "%d", n); buf[0] = *s; buf[1] = 0; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } } else if (*s != 0) { for (;;) { n++; t = s; while (*s != sep && *s != '\n' && *s != '\0') s++; temp = *s; *s = '\0'; sprintf(num, "%d", n); if (is_number(t)) setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); *s = temp; if (*s++ == 0) break; } } tempfree(ap); tempfree(y); free(origs); if (a[2] != NULL && arg3type == STRING) { tempfree(x); } x = gettemp(); x->tval = NUM; x->fval = n; return(x); } Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ { Cell *x; x = execute(a[0]); if (istrue(x)) { tempfree(x); x = execute(a[1]); } else { tempfree(x); x = execute(a[2]); } return(x); } Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ { Cell *x; x = execute(a[0]); if (istrue(x)) { tempfree(x); x = execute(a[1]); } else if (a[2] != NULL) { tempfree(x); x = execute(a[2]); } return(x); } Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ { Cell *x; for (;;) { x = execute(a[0]); if (!istrue(x)) return(x); tempfree(x); x = execute(a[1]); if (isbreak(x)) { x = True; return(x); } if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); } } Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ { Cell *x; for (;;) { x = execute(a[0]); if (isbreak(x)) return True; if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); x = execute(a[1]); if (!istrue(x)) return(x); tempfree(x); } } Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ { Cell *x; x = execute(a[0]); tempfree(x); for (;;) { if (a[1]!=NULL) { x = execute(a[1]); if (!istrue(x)) return(x); else tempfree(x); } x = execute(a[3]); if (isbreak(x)) /* turn off break */ return True; if (isnext(x) || isexit(x) || isret(x)) return(x); tempfree(x); x = execute(a[2]); tempfree(x); } } Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ { Cell *x, *vp, *arrayp, *cp, *ncp; Array *tp; int i; vp = execute(a[0]); arrayp = execute(a[1]); if (!isarr(arrayp)) { return True; } tp = (Array *) arrayp->sval; tempfree(arrayp); for (i = 0; i < tp->size; i++) { /* this routine knows too much */ for (cp = tp->tab[i]; cp != NULL; cp = ncp) { setsval(vp, cp->nval); ncp = cp->cnext; x = execute(a[2]); if (isbreak(x)) { tempfree(vp); return True; } if (isnext(x) || isexit(x) || isret(x)) { tempfree(vp); return(x); } tempfree(x); } } return True; } Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; int t, i; Awkfloat tmp; char *p, *buf; Node *nextarg; FILE *fp; void flush_all(void); t = ptoi(a[0]); x = execute(a[1]); nextarg = a[1]->nnext; switch (t) { case FLENGTH: if (isarr(x)) u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ else u = strlen(getsval(x)); break; case FLOG: u = errcheck(log(getfval(x)), "log"); break; case FINT: modf(getfval(x), &u); break; case FEXP: u = errcheck(exp(getfval(x)), "exp"); break; case FSQRT: u = errcheck(sqrt(getfval(x)), "sqrt"); break; case FSIN: u = sin(getfval(x)); break; case FCOS: u = cos(getfval(x)); break; case FATAN: if (nextarg == NULL) { WARNING("atan2 requires two arguments; returning 1.0"); u = 1.0; } else { y = execute(a[1]->nnext); u = atan2(getfval(x), getfval(y)); tempfree(y); nextarg = nextarg->nnext; } break; case FCOMPL: u = ~((int)getfval(x)); break; case FAND: if (nextarg == NULL) { WARNING("and requires two arguments; returning 0"); u = 0; break; } i = ((int)getfval(x)); while (nextarg != NULL) { y = execute(nextarg); i &= (int)getfval(y); tempfree(y); nextarg = nextarg->nnext; } u = i; break; case FFOR: if (nextarg == NULL) { WARNING("or requires two arguments; returning 0"); u = 0; break; } i = ((int)getfval(x)); while (nextarg != NULL) { y = execute(nextarg); i |= (int)getfval(y); tempfree(y); nextarg = nextarg->nnext; } u = i; break; case FXOR: if (nextarg == NULL) { WARNING("xor requires two arguments; returning 0"); u = 0; break; } i = ((int)getfval(x)); while (nextarg != NULL) { y = execute(nextarg); i ^= (int)getfval(y); tempfree(y); nextarg = nextarg->nnext; } u = i; break; case FLSHIFT: if (nextarg == NULL) { WARNING("lshift requires two arguments; returning 0"); u = 0; break; } y = execute(a[1]->nnext); u = ((int)getfval(x)) << ((int)getfval(y)); tempfree(y); nextarg = nextarg->nnext; break; case FRSHIFT: if (nextarg == NULL) { WARNING("rshift requires two arguments; returning 0"); u = 0; break; } y = execute(a[1]->nnext); u = ((int)getfval(x)) >> ((int)getfval(y)); tempfree(y); nextarg = nextarg->nnext; break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ u = (Awkfloat) system(getsval(x)) / 256; /* 256 is unix-dep */ break; case FRAND: /* random() returns numbers in [0..2^31-1] * in order to get a number in [0, 1), divide it by 2^31 */ u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); break; case FSRAND: if (isrec(x)) /* no argument provided */ u = time((time_t *)0); else u = getfval(x); tmp = u; srandom((unsigned long) u); u = srand_seed; srand_seed = tmp; break; case FTOUPPER: case FTOLOWER: buf = tostring(getsval(x)); if (t == FTOUPPER) { for (p = buf; *p; p++) if (islower((uschar) *p)) *p = toupper((uschar)*p); } else { for (p = buf; *p; p++) if (isupper((uschar) *p)) *p = tolower((uschar)*p); } tempfree(x); x = gettemp(); setsval(x, buf); free(buf); return x; case FFLUSH: if (isrec(x) || strlen(getsval(x)) == 0) { flush_all(); /* fflush() or fflush("") -> all */ u = 0; } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) u = EOF; else u = fflush(fp); break; default: /* can't happen */ FATAL("illegal function type %d", t); break; } tempfree(x); x = gettemp(); setfval(x, u); if (nextarg != NULL) { WARNING("warning: function has too many arguments"); for ( ; nextarg; nextarg = nextarg->nnext) execute(nextarg); } return(x); } Cell *printstat(Node **a, int n) /* print a[0] */ { Node *x; Cell *y; FILE *fp; if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ fp = stdout; else fp = redirect(ptoi(a[1]), a[2]); for (x = a[0]; x != NULL; x = x->nnext) { y = execute(x); fputs(getpssval(y), fp); tempfree(y); if (x->nnext == NULL) fputs(*ORS, fp); else fputs(*OFS, fp); } if (a[1] != NULL) fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); return(True); } Cell *nullproc(Node **a, int n) { n = n; a = a; return 0; } FILE *redirect(int a, Node *b) /* set up all i/o redirections */ { FILE *fp; Cell *x; char *fname; x = execute(b); fname = getsval(x); fp = openfile(a, fname); if (fp == NULL) FATAL("can't open file %s", fname); tempfree(x); return fp; } struct files { FILE *fp; const char *fname; int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; int nfiles; void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; files = calloc(nfiles, sizeof(*files)); if (files == NULL) FATAL("can't allocate file memory for %u files", nfiles); files[0].fp = stdin; files[0].fname = "/dev/stdin"; files[0].mode = LT; files[1].fp = stdout; files[1].fname = "/dev/stdout"; files[1].mode = GT; files[2].fp = stderr; files[2].fname = "/dev/stderr"; files[2].mode = GT; } FILE *openfile(int a, const char *us) { const char *s = us; int i, m; FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); for (i=0; i < nfiles; i++) if (files[i].fname && strcmp(s, files[i].fname) == 0) { if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) return files[i].fp; if (a == FFLUSH) return files[i].fp; } if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; for (i=0; i < nfiles; i++) if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; int nnf = nfiles + FOPEN_MAX; nf = realloc(files, nnf * sizeof(*nf)); if (nf == NULL) FATAL("cannot grow files for %s and %d files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; } fflush(stdout); /* force a semblance of order */ m = a; if (a == GT) { fp = fopen(s, "w"); } else if (a == APPEND) { fp = fopen(s, "a"); m = GT; /* so can mix > and >> */ } else if (a == '|') { /* output pipe */ fp = popen(s, "w"); } else if (a == LE) { /* input pipe */ fp = popen(s, "r"); } else if (a == LT) { /* getline sval, files[i].fname) == 0) { if (ferror(files[i].fp)) WARNING( "i/o error occurred on %s", files[i].fname ); if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else stat = fclose(files[i].fp); if (stat == EOF) WARNING( "i/o error occurred closing %s", files[i].fname ); if (i > 2) /* don't do /dev/std... */ xfree(files[i].fname); files[i].fname = NULL; /* watch out for ref thru this */ files[i].fp = NULL; } } tempfree(x); x = gettemp(); setfval(x, (Awkfloat) stat); return(x); } void closeall(void) { int i, stat; for (i = 0; i < FOPEN_MAX; i++) { if (files[i].fp) { if (ferror(files[i].fp)) WARNING( "i/o error occurred on %s", files[i].fname ); if (files[i].mode == '|' || files[i].mode == LE) stat = pclose(files[i].fp); else stat = fclose(files[i].fp); if (stat == EOF) WARNING( "i/o error occurred while closing %s", files[i].fname ); } } } void flush_all(void) { int i; for (i = 0; i < nfiles; i++) if (files[i].fp) fflush(files[i].fp); } void backsub(char **pb_ptr, char **sptr_ptr); Cell *sub(Node **a, int nnn) /* substitute command */ { char *sptr, *pb, *q; Cell *x, *y, *result; char *t, *buf; fa *pfa; int bufsz = recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); pfa = makedfa(getsval(y), 1); tempfree(y); } y = execute(a[2]); /* replacement string */ result = False; if (pmatch(pfa, t)) { sptr = t; adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); pb = buf; while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); while (*sptr != 0) { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); for (q = patbeg; q < patbeg+patlen; ) *pb++ = *q++; } else *pb++ = *sptr++; } *pb = '\0'; if (pb > buf + bufsz) FATAL("sub result1 %.30s too big; can't happen", buf); sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); while ((*pb++ = *sptr++) != 0) ; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); setsval(x, buf); /* BUG: should be able to avoid copy */ result = True; } tempfree(x); tempfree(y); free(buf); return result; } Cell *gsub(Node **a, int nnn) /* global substitute */ { Cell *x, *y; char *rptr, *sptr, *t, *pb, *q; char *buf; fa *pfa; int mflag, tempstat, num; int bufsz = recsize; if ((buf = (char *) malloc(bufsz)) == NULL) FATAL("out of memory in gsub"); mflag = 0; /* if mflag == 0, can replace empty string */ num = 0; x = execute(a[3]); /* target string */ t = getsval(x); if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); pfa = makedfa(getsval(y), 1); tempfree(y); } y = execute(a[2]); /* replacement string */ if (pmatch(pfa, t)) { tempstat = pfa->initstat; pfa->initstat = 2; pb = buf; rptr = getsval(y); do { if (patlen == 0 && *patbeg != 0) { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; while (*sptr != 0) { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); for (q = patbeg; q < patbeg+patlen; ) *pb++ = *q++; } else *pb++ = *sptr++; } } if (*t == 0) /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; if (pb > buf + bufsz) /* BUG: not sure of this test */ FATAL("gsub result0 %.30s too big; can't happen", buf); mflag = 0; } else { /* matched nonempty string */ num++; sptr = t; adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; while (*sptr != 0) { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); } else if (*sptr == '&') { sptr++; adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); for (q = patbeg; q < patbeg+patlen; ) *pb++ = *q++; } else *pb++ = *sptr++; } t = patbeg + patlen; if (patlen == 0 || *t == 0 || *(t-1) == 0) goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); mflag = 1; } } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); while ((*pb++ = *sptr++) != 0) ; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') FATAL("gsub result2 %.30s truncated; can't happen", buf); setsval(x, buf); /* BUG: should be able to avoid copy + free */ pfa->initstat = tempstat; } tempfree(x); tempfree(y); x = gettemp(); x->tval = NUM; x->fval = num; free(buf); return(x); } void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr, *sptr = *sptr_ptr; if (sptr[1] == '\\') { if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ *pb++ = '\\'; *pb++ = '&'; sptr += 4; } else if (sptr[2] == '&') { /* \\& -> \ + matched */ *pb++ = '\\'; sptr += 2; } else { /* \\x -> \\x */ *pb++ = *sptr++; *pb++ = *sptr++; } } else if (sptr[1] == '&') { /* literal & */ sptr++; *pb++ = *sptr++; } else /* literal \ */ *pb++ = *sptr++; *pb_ptr = pb; *sptr_ptr = sptr; } Index: projects/runtime-coverage/contrib/one-true-awk =================================================================== --- projects/runtime-coverage/contrib/one-true-awk (revision 323974) +++ projects/runtime-coverage/contrib/one-true-awk (revision 323975) Property changes on: projects/runtime-coverage/contrib/one-true-awk ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/one-true-awk:r323636-323974 Index: projects/runtime-coverage/share/man/man4/mpr.4 =================================================================== --- projects/runtime-coverage/share/man/man4/mpr.4 (revision 323974) +++ projects/runtime-coverage/share/man/man4/mpr.4 (revision 323975) @@ -1,394 +1,404 @@ .\" .\" Copyright (c) 2010 Spectra Logic Corporation .\" Copyright (c) 2014 LSI Corp .\" Copyright (c) 2015-2017 Avago Technologies .\" Copyright (c) 2015-2017 Broadcom Ltd. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" mpr driver man page. .\" .\" Author: Ken Merry .\" Author: Stephen McConnell .\" .\" $Id$ .\" $FreeBSD$ .\" .Dd May 25, 2017 .Dt MPR 4 .Os .Sh NAME .Nm mpr .Nd "LSI Fusion-MPT 3/3.5 IT/IR 12Gb/s Serial Attached SCSI/SATA/PCIe driver" .Sh SYNOPSIS To compile this driver into the kernel, place these lines in the kernel configuration file: .Bd -ragged -offset indent .Cd "device pci" .Cd "device scbus" .Cd "device mpr" .Ed .Pp The driver can be loaded as a module at boot time by placing this line in .Xr loader.conf 5 : .Bd -literal -offset indent mpr_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for Broadcom Ltd./Avago Tech (LSI) Fusion-MPT 3/3.5 IT/IR .Tn SAS/PCIe controllers. .Sh HARDWARE These controllers are supported by the .Nm driver: .Pp .Bl -bullet -compact .It Broadcom Ltd./Avago Tech (LSI) SAS 3004 (4 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3008 (8 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3108 (8 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3216 (16 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3224 (24 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3316 (16 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3324 (24 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 3408 (8 Port SAS/PCIe) .It Broadcom Ltd./Avago Tech (LSI) SAS 3416 (16 Port SAS/PCIe) .It Broadcom Ltd./Avago Tech (LSI) SAS 3508 (8 Port SAS/PCIe) .It Broadcom Ltd./Avago Tech (LSI) SAS 3516 (16 Port SAS/PCIe) .It Broadcom Ltd./Avago Tech (LSI) SAS 3616 (16 Port SAS/PCIe) .It Broadcom Ltd./Avago Tech (LSI) SAS 3708 (8 Port SAS/PCIe) .It Broadcom Ltd./Avago Tech (LSI) SAS 3716 (16 Port SAS/PCIe) .El .Sh CONFIGURATION In all tunable descriptions below, X represents the adapter number. .Pp To disable MSI interrupts for all .Nm driver instances, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mpr.disable_msi=1 .Ed .Pp To disable MSI interrupts for a specific .Nm driver instance, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.disable_msi=1 .Ed .Pp To disable MSI-X interrupts for all .Nm driver instances, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mpr.disable_msix=1 .Ed .Pp To disable MSI-X interrupts for a specific .Nm driver instance, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.disable_msix=1 .Ed .Pp To set the maximum number of DMA chains allocated for all adapters, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mpr.max_chains=NNNN .Ed .Pp To set the maximum number of DMA chains allocated for a specific adapter, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.max_chains=NNNN .Ed .Pp The default max_chains value is 2048. .Pp The current number of free chain frames is stored in the dev.mpr.X.chain_free .Xr sysctl 8 variable. .Pp The lowest number of free chain frames seen since boot is stored in the dev.mpr.X.chain_free_lowwater .Xr sysctl 8 variable. .Pp The number of times that chain frame allocations have failed since boot is stored in the dev.mpr.X.chain_alloc_fail .Xr sysctl 8 variable. This can be used to determine whether the max_chains tunable should be increased to help performance. .Pp The current number of active I/O commands is shown in the dev.mpr.X.io_cmds_active .Xr sysctl 8 variable. .Pp The current number of free PRP pages is stored in the dev.mpr.X.prp_pages_free .Xr sysctl 8 variable. PRP pages are used by NVMe devices for I/O transfers, much like Scatter/Gather lists. .Pp The lowest number of free PRP pages seen since boot is stored in the dev.mpr.X.prp_pages_free_lowwater .Xr sysctl 8 variable. .Pp The number of times that PRP page allocations have failed since boot is stored in the dev.mpr.X.prp_page_alloc_fail .Xr sysctl 8 variable. .Pp To set the maximum number of pages that will be used per I/O for all adapters, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mpr.max_io_pages=NNNN .Ed .Pp To set the maximum number of pages that will be used per I/O for a specific adapter, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.max_io_pages=NNNN .Ed .Pp The default max_io_pages value is -1, meaning that the maximum I/O size that will be used per I/O will be calculated using the IOCFacts values stored in the controller. The lowest value that the driver will use for max_io_pages is 1, otherwise IOCFacts will be used to calculate the maximum I/O size. The smaller I/O size calculated from either max_io_pages or IOCFacts will be the maximum I/O size used by the driver. .Pp The highest number of active I/O commands seen since boot is stored in the dev.mpr.X.io_cmds_highwater .Xr sysctl 8 variable. .Pp Devices can be excluded from .Nm control for all adapters by setting this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mpr.exclude_ids=Y .Ed .Pp Y represents the target ID of the device. If more than one device is to be excluded, target IDs are separated by commas. .Pp Devices can be excluded from .Nm control for a specific adapter by setting this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.exclude_ids=Y .Ed .Pp Y represents the target ID of the device. If more than one device is to be excluded, target IDs are separated by commas. .Pp The adapter can issue the .Sy StartStopUnit SCSI command to SATA direct-access devices during shutdown. This allows the device to quiesce powering down. To control this feature for all adapters, set the .Bd -literal -offset indent hw.mpr.enable_ssu .Ed .Pp tunable in .Xr loader.conf 5 to one of these values: .Bl -tag -width 6n -offset indent .It 0 Do not send SSU to either HDDs or SSDs. .It 1 Send SSU to SSDs, but not to HDDs. This is the default value. .It 2 Send SSU to HDDs, but not to SSDs. .It 3 Send SSU to both HDDs and SSDs. .El .Pp To control this feature for a specific adapter, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.enable_ssu .Ed .Pp The same set of values are valid as when setting this tunable for all adapters. .Pp SATA disks that take several seconds to spin up and fail the SATA Identify command might not be discovered by the driver. This problem can sometimes be overcome by increasing the value of the spinup wait time in .Xr loader.conf 5 with the .Bd -literal -offset indent hw.mpr.spinup_wait_time=NNNN .Ed .Pp tunable. NNNN represents the number of seconds to wait for SATA devices to spin up when the device fails the initial SATA Identify command. .Pp Spinup wait times can be set for specific adapters in .Xr loader.conf 5 : with the .Bd -literal -offset indent dev.mpr.X.spinup_wait_time=NNNN .Ed .Pp tunable. NNNN is the number of seconds to wait for SATA devices to spin up when they fail the initial SATA Identify command. .Pp The driver can map devices discovered by the adapter so that target IDs corresponding to a specific device persist across resets and reboots. In some cases it is possible for devices to lose their mapped IDs due to unexpected behavior from certain hardware, such as some types of enclosures. To overcome this problem, a tunable is provided that will force the driver to map devices using the Phy number associated with the device. This feature is not recommended if the topology includes multiple enclosures/expanders. If multiple enclosures/expanders are present in the topology, Phy numbers are repeated, causing all devices at these Phy numbers except the first device to fail enumeration. To control this feature for all adapters, set the .Bd -literal -offset indent hw.mpr.use_phy_num .Ed .Pp tunable in .Xr loader.conf 5 to one of these values: .Bl -tag -width 6n -offset indent .It -1 Only use Phy numbers to map devices and bypass the driver's mapping logic. .It 0 Never use Phy numbers to map devices. .It 1 Use Phy numbers to map devices, but only if the driver's mapping logic fails to map the device that is being enumerated. This is the default value. .El .Pp To control this feature for a specific adapter, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mpr.X.use_phy_num .Ed .Pp The same set of values are valid as when setting this tunable for all adapters. .Pp .Sh DEBUGGING -To enable debugging prints from the -.Nm -driver, set the -.Bd -literal -offset indent -hw.mpr.X.debug_level -.Ed -.Pp -tunable, either in +Driver diagnostic printing is controlled in .Xr loader.conf 5 -or by using -.Xr sysctl 8 . -These bits have the described effects: -.Bd -literal -offset indent -0x0001 Enable informational prints (set by default). -0x0002 Enable prints for driver faults (set by default). -0x0004 Enable prints for controller events. -0x0008 Enable prints for controller logging. -0x0010 Enable prints for tracing recovery operations. -0x0020 Enable prints for parameter errors and programming bugs. -0x0040 Enable prints for system initialization operations. -0x0080 Enable prints for more detailed information. -0x0100 Enable prints for user-generated commands (IOCTL). -0x0200 Enable prints for device mapping. -0x0400 Enable prints for tracing through driver functions. -.Ed +by using the global +.Va hw.mpr.debug_level +and per-device +.Va dev.mpr.X.debug_level +tunables. +One can alter the debug level for any adapter at run-time using the +.Xr sysctl 8 +variable +.Va dev.mpr.X.debug_level . +.Pp +All +.Va debug_level +variables can be named by either an integer value or a text string. +Multiple values can be specified together by either ORing the +integer values or by providing a comma-separated list of names. +The current +.Va debug_level +status is reported in both formats for convenience. +The following levels are available: +.Bl -column "FlagXX" "NameXXXX" "Description" -offset indent +.It Em Flag Ta Em Name Ta Em Description +.It 0x0001 Ta info Ta Basic information (enabled by default) +.It 0x0002 Ta fault Ta Driver faults (enabled by default) +.It 0x0004 Ta event Ta Controller events +.It 0x0008 Ta log Ta Logging data from controller +.It 0x0010 Ta recovery Ta Tracing of recovery operations +.It 0x0020 Ta error Ta Parameter errors and programming bugs +.It 0x0040 Ta init Ta System initialization operations +.It 0x0080 Ta xinfo Ta More detailed information +.It 0x0100 Ta user Ta Tracing of user-generated commands (IOCTL) +.It 0x0200 Ta mapping Ta Tracing of device mapping +.It 0x0400 Ta trace Ta Tracing through driver functions +.El .Sh SEE ALSO .Xr cam 4 , .Xr cd 4 , .Xr ch 4 , .Xr da 4 , .Xr mps 4 , .Xr mpt 4 , .Xr pci 4 , .Xr sa 4 , .Xr scsi 4 , .Xr targ 4 , .Xr loader.conf 5 , .Xr sysctl 8 .Sh HISTORY The .Nm driver first appeared in FreeBSD 9.3. .Sh AUTHORS The .Nm driver was originally written by .An -nosplit .An Scott Long Aq Mt scottl@FreeBSD.org . It has been improved and tested by LSI Corporation, Avago Technologies (formally LSI), and Broadcom Ltd. (formally Avago). .Pp This man page was written by .An Ken Merry Aq Mt ken@FreeBSD.org with additional input from .An Stephen McConnell Aq Mt slm@FreeBSD.org . Index: projects/runtime-coverage/share/man/man4/mps.4 =================================================================== --- projects/runtime-coverage/share/man/man4/mps.4 (revision 323974) +++ projects/runtime-coverage/share/man/man4/mps.4 (revision 323975) @@ -1,370 +1,380 @@ .\" .\" Copyright (c) 2010 Spectra Logic Corporation .\" Copyright (c) 2014 LSI Corp .\" Copyright (c) 2015-2017 Avago Technologies .\" Copyright (c) 2015-2017 Broadcom Ltd. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" mps driver man page. .\" .\" Author: Ken Merry .\" Author: Stephen McConnell .\" .\" $Id: //depot/SpectraBSD/head/share/man/man4/mps.4#6 $ .\" $FreeBSD$ .\" .Dd May 25, 2017 .Dt MPS 4 .Os .Sh NAME .Nm mps .Nd "LSI Fusion-MPT 2 IT/IR 6Gb/s Serial Attached SCSI/SATA driver" .Sh SYNOPSIS To compile this driver into the kernel, place these lines in the kernel configuration file: .Bd -ragged -offset indent .Cd "device pci" .Cd "device scbus" .Cd "device mps" .Ed .Pp The driver can be loaded as a module at boot time by placing this line in .Xr loader.conf 5 : .Bd -literal -offset indent mps_load="YES" .Ed .Sh DESCRIPTION The .Nm driver provides support for Broadcom Ltd./Avago Tech (LSI) Fusion-MPT 2 IT/IR .Tn SAS controllers and WarpDrive solid state storage cards. .Sh HARDWARE These controllers are supported by the .Nm driver: .Pp .Bl -bullet -compact .It Broadcom Ltd./Avago Tech (LSI) SAS 2004 (4 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 2008 (8 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 2108 (8 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 2116 (16 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 2208 (8 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SAS 2308 (8 Port SAS) .It Broadcom Ltd./Avago Tech (LSI) SSS6200 Solid State Storage .It Intel Integrated RAID Module RMS25JB040 .It Intel Integrated RAID Module RMS25JB080 .It Intel Integrated RAID Module RMS25KB040 .It Intel Integrated RAID Module RMS25KB080 .El .Sh CONFIGURATION In all tunable descriptions below, X represents the adapter number. .Pp To disable MSI interrupts for all .Nm driver instances, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mps.disable_msi=1 .Ed .Pp To disable MSI interrupts for a specific .Nm driver instance, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.disable_msi=1 .Ed .Pp To disable MSI-X interrupts for all .Nm driver instances, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mps.disable_msix=1 .Ed .Pp To disable MSI-X interrupts for a specific .Nm driver instance, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.disable_msix=1 .Ed .Pp To set the maximum number of DMA chains allocated for all adapters, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mps.max_chains=NNNN .Ed .Pp To set the maximum number of DMA chains allocated for a specific adapter, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.max_chains=NNNN .Ed .Pp The default max_chains value is 2048. .Pp The current number of free chain frames is stored in the dev.mps.X.chain_free .Xr sysctl 8 variable. .Pp The lowest number of free chain frames seen since boot is stored in the dev.mps.X.chain_free_lowwater .Xr sysctl 8 variable. .Pp The number of times that chain frame allocations have failed since boot is stored in the dev.mps.X.chain_alloc_fail .Xr sysctl 8 variable. This can be used to determine whether the max_chains tunable should be increased to help performance. .Pp The current number of active I/O commands is shown in the dev.mps.X.io_cmds_active .Xr sysctl 8 variable. .Pp To set the maximum number of pages that will be used per I/O for all adapters, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mps.max_io_pages=NNNN .Ed .Pp To set the maximum number of pages that will be used per I/O for a specific adapter, set this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.max_io_pages=NNNN .Ed .Pp The default max_io_pages value is -1, meaning that the maximum I/O size that will be used per I/O will be calculated using the IOCFacts values stored in the controller. The lowest value that the driver will use for max_io_pages is 1, otherwise IOCFacts will be used to calculate the maximum I/O size. The smaller I/O size calculated from either max_io_pages or IOCFacts will be the maximum I/O size used by the driver. .Pp The highest number of active I/O commands seen since boot is stored in the dev.mps.X.io_cmds_highwater .Xr sysctl 8 variable. .Pp Devices can be excluded from .Nm control for all adapters by setting this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent hw.mps.exclude_ids=Y .Ed .Pp Y represents the target ID of the device. If more than one device is to be excluded, target IDs are separated by commas. .Pp Devices can be excluded from .Nm control for a specific adapter by setting this tunable in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.exclude_ids=Y .Ed .Pp Y represents the target ID of the device. If more than one device is to be excluded, target IDs are separated by commas. .Pp The adapter can issue the .Sy StartStopUnit SCSI command to SATA direct-access devices during shutdown. This allows the device to quiesce powering down. To control this feature for all adapters, set the .Bd -literal -offset indent hw.mps.enable_ssu .Ed .Pp tunable in .Xr loader.conf 5 to one of these values: .Bl -tag -width 6n -offset indent .It 0 Do not send SSU to either HDDs or SSDs. .It 1 Send SSU to SSDs, but not to HDDs. This is the default value. .It 2 Send SSU to HDDs, but not to SSDs. .It 3 Send SSU to both HDDs and SSDs. .El .Pp To control this feature for a specific adapter, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.enable_ssu .Ed .Pp The same set of values are valid as when setting this tunable for all adapters. .Pp SATA disks that take several seconds to spin up and fail the SATA Identify command might not be discovered by the driver. This problem can sometimes be overcome by increasing the value of the spinup wait time in .Xr loader.conf 5 with the .Bd -literal -offset indent hw.mps.spinup_wait_time=NNNN .Ed .Pp tunable. NNNN represents the number of seconds to wait for SATA devices to spin up when the device fails the initial SATA Identify command. .Pp Spinup wait times can be set for specific adapters in .Xr loader.conf 5 : with the .Bd -literal -offset indent dev.mps.X.spinup_wait_time=NNNN .Ed .Pp tunable. NNNN is the number of seconds to wait for SATA devices to spin up when they fail the initial SATA Identify command. .Pp The driver can map devices discovered by the adapter so that target IDs corresponding to a specific device persist across resets and reboots. In some cases it is possible for devices to lose their mapped IDs due to unexpected behavior from certain hardware, such as some types of enclosures. To overcome this problem, a tunable is provided that will force the driver to map devices using the Phy number associated with the device. This feature is not recommended if the topology includes multiple enclosures/expanders. If multiple enclosures/expanders are present in the topology, Phy numbers are repeated, causing all devices at these Phy numbers except the first device to fail enumeration. To control this feature for all adapters, set the .Bd -literal -offset indent hw.mps.use_phy_num .Ed .Pp tunable in .Xr loader.conf 5 to one of these values: .Bl -tag -width 6n -offset indent .It -1 Only use Phy numbers to map devices and bypass the driver's mapping logic. .It 0 Never use Phy numbers to map devices. .It 1 Use Phy numbers to map devices, but only if the driver's mapping logic fails to map the device that is being enumerated. This is the default value. .El .Pp To control this feature for a specific adapter, set this tunable value in .Xr loader.conf 5 : .Bd -literal -offset indent dev.mps.X.use_phy_num .Ed .Pp The same set of values are valid as when setting this tunable for all adapters. .Pp .Sh DEBUGGING -To enable debugging prints from the -.Nm -driver, set the -.Bd -literal -offset indent -hw.mps.X.debug_level -.Ed -.Pp -tunable, either in +Driver diagnostic printing is controlled in .Xr loader.conf 5 -or by using -.Xr sysctl 8 . -These bits have the described effects: -.Bd -literal -offset indent -0x0001 Enable informational prints (set by default). -0x0002 Enable prints for driver faults (set by default). -0x0004 Enable prints for controller events. -0x0008 Enable prints for controller logging. -0x0010 Enable prints for tracing recovery operations. -0x0020 Enable prints for parameter errors and programming bugs. -0x0040 Enable prints for system initialization operations. -0x0080 Enable prints for more detailed information. -0x0100 Enable prints for user-generated commands (IOCTL). -0x0200 Enable prints for device mapping. -0x0400 Enable prints for tracing through driver functions. -.Ed +by using the global +.Va hw.mps.debug_level +and per-device +.Va dev.mps.X.debug_level +tunables. +One can alter the debug level for any adapter at run-time using the +.Xr sysctl 8 +variable +.Va dev.mps.X.debug_level . +.Pp +All +.Va debug_level +variables can be named by either an integer value or a text string. +Multiple values can be specified together by either ORing the +integer values or by providing a comma-separated list of names. +The current +.Va debug_level +status is reported in both formats for convenience. +The following levels are available: +.Bl -column "FlagXX" "NameXXXX" "Description" -offset indent +.It Em Flag Ta Em Name Ta Em Description +.It 0x0001 Ta info Ta Basic information (enabled by default) +.It 0x0002 Ta fault Ta Driver faults (enabled by default) +.It 0x0004 Ta event Ta Controller events +.It 0x0008 Ta log Ta Logging data from controller +.It 0x0010 Ta recovery Ta Tracing of recovery operations +.It 0x0020 Ta error Ta Parameter errors and programming bugs +.It 0x0040 Ta init Ta System initialization operations +.It 0x0080 Ta xinfo Ta More detailed information +.It 0x0100 Ta user Ta Tracing of user-generated commands (IOCTL) +.It 0x0200 Ta mapping Ta Tracing of device mapping +.It 0x0400 Ta trace Ta Tracing through driver functions +.El .Sh SEE ALSO .Xr cam 4 , .Xr cd 4 , .Xr ch 4 , .Xr da 4 , .Xr mpr 4 , .Xr mpt 4 , .Xr pci 4 , .Xr sa 4 , .Xr scsi 4 , .Xr targ 4 , .Xr loader.conf 5 , .Xr sysctl 8 .Sh HISTORY The .Nm driver first appeared in FreeBSD 9.3. .Sh AUTHORS The .Nm driver was originally written by .An -nosplit .An Scott Long Aq Mt scottl@FreeBSD.org . It has been improved and tested by LSI Corporation, Avago Technologies (formally LSI), and Broadcom Ltd. (formally Avago). .Pp This man page was written by .An Ken Merry Aq Mt ken@FreeBSD.org with additional input from .An Stephen McConnell Aq Mt slm@FreeBSD.org . Index: projects/runtime-coverage/share/misc/committers-src.dot =================================================================== --- projects/runtime-coverage/share/misc/committers-src.dot (revision 323974) +++ projects/runtime-coverage/share/misc/committers-src.dot (revision 323975) @@ -1,820 +1,823 @@ # $FreeBSD$ # This file is meant to list all FreeBSD src committers and describe the # mentor-mentee relationships between them. # The graphical output can be generated from this file with the following # command: # $ dot -T png -o file.png committers-src.dot # # The dot binary is part of the graphics/graphviz port. digraph src { # Node definitions follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??"] # # ????/??/?? is the date when the commit bit was obtained, usually the one you # can find looking at svn logs for the svnadmin/conf/access file. # Use YYYY/MM/DD format. # # For returned commit bits, the node definition will follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??\n????/??/??"] # # The first date is the same as for an active committer, the second date is # the date when the commit bit has been returned. Again, check svn logs. node [color=grey62, style=filled, bgcolor=black]; # Alumni go here.. Try to keep things sorted. alm [label="Andrew Moore\nalm@FreeBSD.org\n1993/06/12\n????/??/??"] anholt [label="Eric Anholt\nanholt@FreeBSD.org\n2002/04/22\n2008/08/07"] archie [label="Archie Cobbs\narchie@FreeBSD.org\n1998/11/06\n2006/06/09"] arr [label="Andrew R. Reiter\narr@FreeBSD.org\n2001/11/02\n2005/05/25"] arun [label="Arun Sharma\narun@FreeBSD.org\n2003/03/06\n2006/12/16"] asmodai [label="Jeroen Ruigrok\nasmodai@FreeBSD.org\n1999/12/16\n2001/11/16"] benjsc [label="Benjamin Close\nbenjsc@FreeBSD.org\n2007/02/09\n2010/09/15"] billf [label="Bill Fumerola\nbillf@FreeBSD.org\n1998/11/11\n2008/11/10"] bmah [label="Bruce A. Mah\nbmah@FreeBSD.org\n2002/01/29\n2009/09/13"] bmilekic [label="Bosko Milekic\nbmilekic@FreeBSD.org\n2000/09/21\n2008/11/10"] bushman [label="Michael Bushkov\nbushman@FreeBSD.org\n2007/03/10\n2010/04/29"] carl [label="Carl Delsey\ncarl@FreeBSD.org\n2013/01/14\n2014/03/06"] ceri [label="Ceri Davies\nceri@FreeBSD.org\n2006/11/07\n2012/03/07"] cjc [label="Crist J. Clark\ncjc@FreeBSD.org\n2001/06/01\n2006/12/29"] davidxu [label="David Xu\ndavidxu@FreeBSD.org\n2002/09/02\n2014/04/14"] dds [label="Diomidis Spinellis\ndds@FreeBSD.org\n2003/06/20\n2010/09/22"] dhartmei [label="Daniel Hartmeier\ndhartmei@FreeBSD.org\n2004/04/06\n2008/12/08"] dmlb [label="Duncan Barclay\ndmlb@FreeBSD.org\n2001/12/14\n2008/11/10"] dougb [label="Doug Barton\ndougb@FreeBSD.org\n2000/10/26\n2012/10/08"] eik [label="Oliver Eikemeier\neik@FreeBSD.org\n2004/05/20\n2008/11/10"] furuta [label="Atsushi Furuta\nfuruta@FreeBSD.org\n2000/06/21\n2003/03/08"] gj [label="Gary L. Jennejohn\ngj@FreeBSD.org\n1994/??/??\n2006/04/28"] groudier [label="Gerard Roudier\ngroudier@FreeBSD.org\n1999/12/30\n2006/04/06"] jake [label="Jake Burkholder\njake@FreeBSD.org\n2000/05/16\n2008/11/10"] jayanth [label="Jayanth Vijayaraghavan\njayanth@FreeBSD.org\n2000/05/08\n2008/11/10"] jb [label="John Birrell\njb@FreeBSD.org\n1997/03/27\n2009/12/15"] jdp [label="John Polstra\njdp@FreeBSD.org\n1995/12/07\n2008/02/26"] jedgar [label="Chris D. Faulhaber\njedgar@FreeBSD.org\n1999/12/15\n2006/04/07"] jkh [label="Jordan K. Hubbard\njkh@FreeBSD.org\n1993/06/12\n2008/06/13"] jlemon [label="Jonathan Lemon\njlemon@FreeBSD.org\n1997/08/14\n2008/11/10"] joe [label="Josef Karthauser\njoe@FreeBSD.org\n1999/10/22\n2008/08/10"] jtc [label="J.T. Conklin\njtc@FreeBSD.org\n1993/06/12\n????/??/??"] kargl [label="Steven G. Kargl\nkargl@FreeBSD.org\n2011/01/17\n2015/06/28"] kbyanc [label="Kelly Yancey\nkbyanc@FreeBSD.org\n2000/07/11\n2006/07/25"] keichii [label="Michael Wu\nkeichii@FreeBSD.org\n2001/03/07\n2006/04/28"] linimon [label="Mark Linimon\nlinimon@FreeBSD.org\n2006/09/30\n2008/05/04"] lulf [label="Ulf Lilleengen\nlulf@FreeBSD.org\n2007/10/24\n2012/01/19"] mb [label="Maxim Bolotin\nmb@FreeBSD.org\n2000/04/06\n2003/03/08"] marks [label="Mark Santcroos\nmarks@FreeBSD.org\n2004/03/18\n2008/09/29"] mike [label="Mike Barcroft\nmike@FreeBSD.org\n2001/07/17\n2006/04/28"] msmith [label="Mike Smith\nmsmith@FreeBSD.org\n1996/10/22\n2003/12/15"] murray [label="Murray Stokely\nmurray@FreeBSD.org\n2000/04/05\n2010/07/25"] mux [label="Maxime Henrion\nmux@FreeBSD.org\n2002/03/03\n2011/06/22"] nate [label="Nate Willams\nnate@FreeBSD.org\n1993/06/12\n2003/12/15"] njl [label="Nate Lawson\nnjl@FreeBSD.org\n2002/08/07\n2008/02/16"] non [label="Noriaki Mitsnaga\nnon@FreeBSD.org\n2000/06/19\n2007/03/06"] onoe [label="Atsushi Onoe\nonoe@FreeBSD.org\n2000/07/21\n2008/11/10"] rafan [label="Rong-En Fan\nrafan@FreeBSD.org\n2007/01/31\n2012/07/23"] randi [label="Randi Harper\nrandi@FreeBSD.org\n2010/04/20\n2012/05/10"] rink [label="Rink Springer\nrink@FreeBSD.org\n2006/01/16\n2010/11/04"] robert [label="Robert Drehmel\nrobert@FreeBSD.org\n2001/08/23\n2006/05/13"] sah [label="Sam Hopkins\nsah@FreeBSD.org\n2004/12/15\n2008/11/10"] shafeeq [label="Shafeeq Sinnamohideen\nshafeeq@FreeBSD.org\n2000/06/19\n2006/04/06"] sheldonh [label="Sheldon Hearn\nsheldonh@FreeBSD.org\n1999/06/14\n2006/05/13"] shiba [label="Takeshi Shibagaki\nshiba@FreeBSD.org\n2000/06/19\n2008/11/10"] shin [label="Yoshinobu Inoue\nshin@FreeBSD.org\n1999/07/29\n2003/03/08"] snb [label="Nick Barkas\nsnb@FreeBSD.org\n2009/05/05\n2010/11/04"] tmm [label="Thomas Moestl\ntmm@FreeBSD.org\n2001/03/07\n2006/07/12"] toshi [label="Toshihiko Arai\ntoshi@FreeBSD.org\n2000/07/06\n2003/03/08"] tshiozak [label="Takuya SHIOZAKI\ntshiozak@FreeBSD.org\n2001/04/25\n2003/03/08"] uch [label="UCHIYAMA Yasushi\nuch@FreeBSD.org\n2000/06/21\n2002/04/24"] wilko [label="Wilko Bulte\nwilko@FreeBSD.org\n2000/01/13\n2013/01/17"] yar [label="Yar Tikhiy\nyar@FreeBSD.org\n2001/03/25\n2012/05/23"] zack [label="Zack Kirsch\nzack@FreeBSD.org\n2010/11/05\n2012/09/08"] node [color=lightblue2, style=filled, bgcolor=black]; # Current src committers go here. Try to keep things sorted. ache [label="Andrey Chernov\nache@FreeBSD.org\n1993/10/31"] achim [label="Achim Leubner\nachim@FreeBSD.org\n2013/01/23"] adrian [label="Adrian Chadd\nadrian@FreeBSD.org\n2000/07/03"] ae [label="Andrey V. Elsukov\nae@FreeBSD.org\n2010/06/03"] akiyama [label="Shunsuke Akiyama\nakiyama@FreeBSD.org\n2000/06/19"] alc [label="Alan Cox\nalc@FreeBSD.org\n1999/02/23"] allanjude [label="Allan Jude\nallanjude@FreeBSD.org\n2015/07/30"] ambrisko [label="Doug Ambrisko\nambrisko@FreeBSD.org\n2001/12/19"] anchie [label="Ana Kukec\nanchie@FreeBSD.org\n2010/04/14"] andre [label="Andre Oppermann\nandre@FreeBSD.org\n2003/11/12"] andreast [label="Andreas Tobler\nandreast@FreeBSD.org\n2010/09/05"] andrew [label="Andrew Turner\nandrew@FreeBSD.org\n2010/07/19"] antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2008/02/03"] araujo [label="Marcelo Araujo\naraujo@FreeBSD.org\n2015/08/04"] ariff [label="Ariff Abdullah\nariff@FreeBSD.org\n2005/11/14"] art [label="Artem Belevich\nart@FreeBSD.org\n2011/03/29"] arybchik [label="Andrew Rybchenko\narybchik@FreeBSD.org\n2014/10/12"] asomers [label="Alan Somers\nasomers@FreeBSD.org\n2013/04/24"] avg [label="Andriy Gapon\navg@FreeBSD.org\n2009/02/18"] avos [label="Andriy Voskoboinyk\navos@FreeBSD.org\n2015/09/24"] badger [label="Eric Badger\nbadger@FreeBSD.org\n2016/07/01"] bapt [label="Baptiste Daroussin\nbapt@FreeBSD.org\n2011/12/23"] bdrewery [label="Bryan Drewery\nbdrewery@FreeBSD.org\n2013/12/14"] benl [label="Ben Laurie\nbenl@FreeBSD.org\n2011/05/18"] benno [label="Benno Rice\nbenno@FreeBSD.org\n2000/11/02"] bms [label="Bruce M Simpson\nbms@FreeBSD.org\n2003/08/06"] br [label="Ruslan Bukin\nbr@FreeBSD.org\n2013/09/02"] brian [label="Brian Somers\nbrian@FreeBSD.org\n1996/12/16"] brooks [label="Brooks Davis\nbrooks@FreeBSD.org\n2001/06/21"] brucec [label="Bruce Cran\nbrucec@FreeBSD.org\n2010/01/29"] brueffer [label="Christian Brueffer\nbrueffer@FreeBSD.org\n2006/02/28"] bruno [label="Bruno Ducrot\nbruno@FreeBSD.org\n2005/07/18"] bryanv [label="Bryan Venteicher\nbryanv@FreeBSD.org\n2012/11/03"] bschmidt [label="Bernhard Schmidt\nbschmidt@FreeBSD.org\n2010/02/06"] bz [label="Bjoern A. Zeeb\nbz@FreeBSD.org\n2004/07/27"] cem [label="Conrad Meyer\ncem@FreeBSD.org\n2015/07/05"] cognet [label="Olivier Houchard\ncognet@FreeBSD.org\n2002/10/09"] cokane [label="Coleman Kane\ncokane@FreeBSD.org\n2000/06/19"] cperciva [label="Colin Percival\ncperciva@FreeBSD.org\n2004/01/20"] csjp [label="Christian S.J. Peron\ncsjp@FreeBSD.org\n2004/05/04"] dab [label="David Bright\ndab@FreeBSD.org\n2016/10/24"] das [label="David Schultz\ndas@FreeBSD.org\n2003/02/21"] davide [label="Davide Italiano\ndavide@FreeBSD.org\n2012/01/27"] dchagin [label="Dmitry Chagin\ndchagin@FreeBSD.org\n2009/02/28"] def [label="Konrad Witaszczyk\ndef@FreeBSD.org\n2016/11/02"] delphij [label="Xin Li\ndelphij@FreeBSD.org\n2004/09/14"] des [label="Dag-Erling Smorgrav\ndes@FreeBSD.org\n1998/04/03"] dexuan [label="Dexuan Cui\ndexuan@FreeBSD.org\n2016/10/24"] dfr [label="Doug Rabson\ndfr@FreeBSD.org\n????/??/??"] dg [label="David Greenman\ndg@FreeBSD.org\n1993/06/14"] dim [label="Dimitry Andric\ndim@FreeBSD.org\n2010/08/30"] dteske [label="Devin Teske\ndteske@FreeBSD.org\n2012/04/10"] dumbbell [label="Jean-Sebastien Pedron\ndumbbell@FreeBSD.org\n2004/11/29"] dwmalone [label="David Malone\ndwmalone@FreeBSD.org\n2000/07/11"] eadler [label="Eitan Adler\neadler@FreeBSD.org\n2012/01/18"] ed [label="Ed Schouten\ned@FreeBSD.org\n2008/05/22"] edavis [label="Eric Davis\nedavis@FreeBSD.org\n2013/10/09"] edwin [label="Edwin Groothuis\nedwin@FreeBSD.org\n2007/06/25"] eivind [label="Eivind Eklund\neivind@FreeBSD.org\n1997/02/02"] emaste [label="Ed Maste\nemaste@FreeBSD.org\n2005/10/04"] emax [label="Maksim Yevmenkin\nemax@FreeBSD.org\n2003/10/12"] eri [label="Ermal Luci\neri@FreeBSD.org\n2008/06/11"] erj [label="Eric Joyner\nerj@FreeBSD.org\n2014/12/14"] eugen [label="Eugene Grosbein\neugen@FreeBSD.org\n2017/09/19"] fabient [label="Fabien Thomas\nfabient@FreeBSD.org\n2009/03/16"] fanf [label="Tony Finch\nfanf@FreeBSD.org\n2002/05/05"] fjoe [label="Max Khon\nfjoe@FreeBSD.org\n2001/08/06"] flz [label="Florent Thoumie\nflz@FreeBSD.org\n2006/03/30"] +fsu [label="Fedor Uporov\nfsu@FreeBSD.org\n2017/08/28"] gabor [label="Gabor Kovesdan\ngabor@FreeBSD.org\n2010/02/02"] gad [label="Garance A. Drosehn\ngad@FreeBSD.org\n2000/10/27"] gallatin [label="Andrew Gallatin\ngallatin@FreeBSD.org\n1999/01/15"] ganbold [label="Ganbold Tsagaankhuu\nganbold@FreeBSD.org\n2013/12/18"] gavin [label="Gavin Atkinson\ngavin@FreeBSD.org\n2009/12/07"] gibbs [label="Justin T. Gibbs\ngibbs@FreeBSD.org\n????/??/??"] gjb [label="Glen Barber\ngjb@FreeBSD.org\n2013/06/04"] gleb [label="Gleb Kurtsou\ngleb@FreeBSD.org\n2011/09/19"] glebius [label="Gleb Smirnoff\nglebius@FreeBSD.org\n2004/07/14"] gnn [label="George V. Neville-Neil\ngnn@FreeBSD.org\n2004/10/11"] gordon [label="Gordon Tetlow\ngordon@FreeBSD.org\n2002/05/17"] grehan [label="Peter Grehan\ngrehan@FreeBSD.org\n2002/08/08"] grog [label="Greg Lehey\ngrog@FreeBSD.org\n1998/08/30"] gshapiro [label="Gregory Shapiro\ngshapiro@FreeBSD.org\n2000/07/12"] harti [label="Hartmut Brandt\nharti@FreeBSD.org\n2003/01/29"] hiren [label="Hiren Panchasara\nhiren@FreeBSD.org\n2013/04/12"] hmp [label="Hiten Pandya\nhmp@FreeBSD.org\n2004/03/23"] ian [label="Ian Lepore\nian@FreeBSD.org\n2013/01/07"] iedowse [label="Ian Dowse\niedowse@FreeBSD.org\n2000/12/01"] imp [label="Warner Losh\nimp@FreeBSD.org\n1996/09/20"] ivoras [label="Ivan Voras\nivoras@FreeBSD.org\n2008/06/10"] jah [label="Jason A. Harmening\njah@FreeBSD.org\n2015/03/08"] jamie [label="Jamie Gritton\njamie@FreeBSD.org\n2009/01/28"] jasone [label="Jason Evans\njasone@FreeBSD.org\n1999/03/03"] jceel [label="Jakub Klama\njceel@FreeBSD.org\n2011/09/25"] jch [label="Julien Charbon\njch@FreeBSD.org\n2014/09/24"] jchandra [label="Jayachandran C.\njchandra@FreeBSD.org\n2010/05/19"] jeff [label="Jeff Roberson\njeff@FreeBSD.org\n2002/02/21"] jh [label="Jaakko Heinonen\njh@FreeBSD.org\n2009/10/02"] jhb [label="John Baldwin\njhb@FreeBSD.org\n1999/08/23"] jhibbits [label="Justin Hibbits\njhibbits@FreeBSD.org\n2011/11/30"] jilles [label="Jilles Tjoelker\njilles@FreeBSD.org\n2009/05/22"] jimharris [label="Jim Harris\njimharris@FreeBSD.org\n2011/12/09"] jinmei [label="JINMEI Tatuya\njinmei@FreeBSD.org\n2007/03/17"] jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2005/07/06"] jkoshy [label="A. Joseph Koshy\njkoshy@FreeBSD.org\n1998/05/13"] jlh [label="Jeremie Le Hen\njlh@FreeBSD.org\n2012/04/22"] jls [label="Jordan Sissel\njls@FreeBSD.org\n2006/12/06"] jmcneill [label="Jared McNeill\njmcneill@FreeBSD.org\n2016/02/24"] jmg [label="John-Mark Gurney\njmg@FreeBSD.org\n1997/02/13"] jmmv [label="Julio Merino\njmmv@FreeBSD.org\n2013/11/02"] joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1993/11/14"] jon [label="Jonathan Chen\njon@FreeBSD.org\n2000/10/17"] jonathan [label="Jonathan Anderson\njonathan@FreeBSD.org\n2010/10/07"] jpaetzel [label="Josh Paetzel\njpaetzel@FreeBSD.org\n2011/01/21"] jtl [label="Jonathan T. Looney\njtl@FreeBSD.org\n2015/10/26"] julian [label="Julian Elischer\njulian@FreeBSD.org\n1993/04/19"] jwd [label="John De Boskey\njwd@FreeBSD.org\n2000/05/19"] kaiw [label="Kai Wang\nkaiw@FreeBSD.org\n2007/09/26"] kan [label="Alexander Kabaev\nkan@FreeBSD.org\n2002/07/21"] karels [label="Mike Karels\nkarels@FreeBSD.org\n2016/06/09"] ken [label="Ken Merry\nken@FreeBSD.org\n1998/09/08"] kensmith [label="Ken Smith\nkensmith@FreeBSD.org\n2004/01/23"] kevans [label="Kyle Evans\nkevans@FreeBSD.org\n2017/06/20"] kevlo [label="Kevin Lo\nkevlo@FreeBSD.org\n2006/07/23"] kib [label="Konstantin Belousov\nkib@FreeBSD.org\n2006/06/03"] kibab [label="Ilya Bakulin\nkibab@FreeBSD.org\n2017/09/02"] kmacy [label="Kip Macy\nkmacy@FreeBSD.org\n2005/06/01"] kp [label="Kristof Provost\nkp@FreeBSD.org\n2015/03/22"] landonf [label="Landon Fuller\nlandonf@FreeBSD.org\n2016/05/31"] le [label="Lukas Ertl\nle@FreeBSD.org\n2004/02/02"] lidl [label="Kurt Lidl\nlidl@FreeBSD.org\n2015/10/21"] loos [label="Luiz Otavio O Souza\nloos@FreeBSD.org\n2013/07/03"] lstewart [label="Lawrence Stewart\nlstewart@FreeBSD.org\n2008/10/06"] manu [label="Emmanuel Vadot\nmanu@FreeBSD.org\n2016/04/24"] marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03"] marius [label="Marius Strobl\nmarius@FreeBSD.org\n2004/04/17"] markj [label="Mark Johnston\nmarkj@FreeBSD.org\n2012/12/18"] markm [label="Mark Murray\nmarkm@FreeBSD.org\n1995/04/24"] markus [label="Markus Brueffer\nmarkus@FreeBSD.org\n2006/06/01"] matteo [label="Matteo Riondato\nmatteo@FreeBSD.org\n2006/01/18"] mav [label="Alexander Motin\nmav@FreeBSD.org\n2007/04/12"] maxim [label="Maxim Konovalov\nmaxim@FreeBSD.org\n2002/02/07"] mdf [label="Matthew Fleming\nmdf@FreeBSD.org\n2010/06/04"] mdodd [label="Matthew N. Dodd\nmdodd@FreeBSD.org\n1999/07/27"] melifaro [label="Alexander V. Chernikov\nmelifaro@FreeBSD.org\n2011/10/04"] mizhka [label="Michael Zhilin\nmizhka@FreeBSD.org\n2016/07/19"] mjacob [label="Matt Jacob\nmjacob@FreeBSD.org\n1997/08/13"] mjg [label="Mateusz Guzik\nmjg@FreeBSD.org\n2012/06/04"] mjoras [label="Matt Joras\nmjoras@FreeBSD.org\n2017/07/12"] mlaier [label="Max Laier\nmlaier@FreeBSD.org\n2004/02/10"] mmel [label="Michal Meloun\nmmel@FreeBSD.org\n2015/11/01"] monthadar [label="Monthadar Al Jaberi\nmonthadar@FreeBSD.org\n2012/04/02"] mp [label="Mark Peek\nmp@FreeBSD.org\n2001/07/27"] mr [label="Michael Reifenberger\nmr@FreeBSD.org\n2001/09/30"] mw [label="Marcin Wojtas\nmw@FreeBSD.org\n2017/07/18"] neel [label="Neel Natu\nneel@FreeBSD.org\n2009/09/20"] netchild [label="Alexander Leidinger\nnetchild@FreeBSD.org\n2005/03/31"] ngie [label="Ngie Cooper\nngie@FreeBSD.org\n2014/07/27"] nork [label="Norikatsu Shigemura\nnork@FreeBSD.org\n2009/06/09"] np [label="Navdeep Parhar\nnp@FreeBSD.org\n2009/06/05"] nwhitehorn [label="Nathan Whitehorn\nnwhitehorn@FreeBSD.org\n2008/07/03"] n_hibma [label="Nick Hibma\nn_hibma@FreeBSD.org\n1998/11/26"] obrien [label="David E. O'Brien\nobrien@FreeBSD.org\n1996/10/29"] olli [label="Oliver Fromme\nolli@FreeBSD.org\n2008/02/14"] oshogbo [label="Mariusz Zaborski\noshogbo@FreeBSD.org\n2015/04/15"] peadar [label="Peter Edwards\npeadar@FreeBSD.org\n2004/03/08"] peter [label="Peter Wemm\npeter@FreeBSD.org\n1995/07/04"] peterj [label="Peter Jeremy\npeterj@FreeBSD.org\n2012/09/14"] pfg [label="Pedro Giffuni\npfg@FreeBSD.org\n2011/12/01"] phil [label="Phil Shafer\nphil@FreeBSD.ogr\n2015/12/30"] philip [label="Philip Paeps\nphilip@FreeBSD.org\n2004/01/21"] phk [label="Poul-Henning Kamp\nphk@FreeBSD.org\n1994/02/21"] pho [label="Peter Holm\npho@FreeBSD.org\n2008/11/16"] pjd [label="Pawel Jakub Dawidek\npjd@FreeBSD.org\n2004/02/02"] pkelsey [label="Patrick Kelsey\pkelsey@FreeBSD.org\n2014/05/29"] pluknet [label="Sergey Kandaurov\npluknet@FreeBSD.org\n2010/10/05"] ps [label="Paul Saab\nps@FreeBSD.org\n2000/02/23"] qingli [label="Qing Li\nqingli@FreeBSD.org\n2005/04/13"] ray [label="Aleksandr Rybalko\nray@FreeBSD.org\n2011/05/25"] rdivacky [label="Roman Divacky\nrdivacky@FreeBSD.org\n2008/03/13"] remko [label="Remko Lodder\nremko@FreeBSD.org\n2007/02/23"] rgrimes [label="Rodney W. Grimes\nrgrimes@FreeBSD.org\n1993/06/12\n2017/03/03"] rik [label="Roman Kurakin\nrik@FreeBSD.org\n2003/12/18"] rlibby [label="Ryan Libby\nrlibby@FreeBSD.org\n2017/06/07"] rmacklem [label="Rick Macklem\nrmacklem@FreeBSD.org\n2009/03/27"] rmh [label="Robert Millan\nrmh@FreeBSD.org\n2011/09/18"] rnoland [label="Robert Noland\nrnoland@FreeBSD.org\n2008/09/15"] roberto [label="Ollivier Robert\nroberto@FreeBSD.org\n1995/02/22"] rodrigc [label="Craig Rodrigues\nrodrigc@FreeBSD.org\n2005/05/14"] royger [label="Roger Pau Monne\nroyger@FreeBSD.org\n2013/11/26"] rpaulo [label="Rui Paulo\nrpaulo@FreeBSD.org\n2007/09/25"] rpokala [label="Ravi Pokala\nrpokala@FreeBSD.org\n2015/11/19"] rrs [label="Randall R Stewart\nrrs@FreeBSD.org\n2007/02/08"] rse [label="Ralf S. Engelschall\nrse@FreeBSD.org\n1997/07/31"] rstone [label="Ryan Stone\nrstone@FreeBSD.org\n2010/04/19"] ru [label="Ruslan Ermilov\nru@FreeBSD.org\n1999/05/27"] rwatson [label="Robert N. M. Watson\nrwatson@FreeBSD.org\n1999/12/16"] sam [label="Sam Leffler\nsam@FreeBSD.org\n2002/07/02"] sanpei [label="MIHIRA Sanpei Yoshiro\nsanpei@FreeBSD.org\n2000/06/19"] sbruno [label="Sean Bruno\nsbruno@FreeBSD.org\n2008/08/02"] scf [label="Sean C. Farley\nscf@FreeBSD.org\n2007/06/24"] schweikh [label="Jens Schweikhardt\nschweikh@FreeBSD.org\n2001/04/06"] scottl [label="Scott Long\nscottl@FreeBSD.org\n2000/09/28"] se [label="Stefan Esser\nse@FreeBSD.org\n1994/08/26"] sephe [label="Sepherosa Ziehau\nsephe@FreeBSD.org\n2007/03/28"] sepotvin [label="Stephane E. Potvin\nsepotvin@FreeBSD.org\n2007/02/15"] sgalabov [label="Stanislav Galabov\nsgalabov@FreeBSD.org\n2016/02/24"] shurd [label="Stephen Hurd\nshurd@FreeBSD.org\n2017/09/02"] simon [label="Simon L. Nielsen\nsimon@FreeBSD.org\n2006/03/07"] sjg [label="Simon J. Gerraty\nsjg@FreeBSD.org\n2012/10/23"] skra [label="Svatopluk Kraus\nskra@FreeBSD.org\n2015/10/28"] slm [label="Stephen McConnell\nslm@FreeBSD.org\n2014/05/07"] smh [label="Steven Hartland\nsmh@FreeBSD.org\n2012/11/12"] sobomax [label="Maxim Sobolev\nsobomax@FreeBSD.org\n2001/07/25"] sos [label="Soren Schmidt\nsos@FreeBSD.org\n????/??/??"] sson [label="Stacey Son\nsson@FreeBSD.org\n2008/07/08"] stas [label="Stanislav Sedov\nstas@FreeBSD.org\n2008/08/22"] stevek [label="Stephen J. Kiernan\nstevek@FreeBSD.org\n2016/07/18"] suz [label="SUZUKI Shinsuke\nsuz@FreeBSD.org\n2002/03/26"] syrinx [label="Shteryana Shopova\nsyrinx@FreeBSD.org\n2006/10/07"] takawata [label="Takanori Watanabe\ntakawata@FreeBSD.org\n2000/07/06"] theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"] thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"] ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"] tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"] tsoome [label="Toomas Soome\ntsoome@FreeBSD.org\n2016/08/10"] trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"] trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"] tuexen [label="Michael Tuexen\ntuexen@FreeBSD.org\n2009/06/06"] tychon [label="Tycho Nightingale\ntychon@FreeBSD.org\n2014/01/21"] ume [label="Hajimu UMEMOTO\nume@FreeBSD.org\n2000/02/26"] uqs [label="Ulrich Spoerlein\nuqs@FreeBSD.org\n2010/01/28"] vangyzen [label="Eric van Gyzen\nvangyzen@FreeBSD.org\n2015/03/08"] vanhu [label="Yvan Vanhullebus\nvanhu@FreeBSD.org\n2008/07/21"] versus [label="Konrad Jankowski\nversus@FreeBSD.org\n2008/10/27"] weongyo [label="Weongyo Jeong\nweongyo@FreeBSD.org\n2007/12/21"] wes [label="Wes Peters\nwes@FreeBSD.org\n1998/11/25"] whu [label="Wei Hu\nwhu@FreeBSD.org\n2015/02/11"] wkoszek [label="Wojciech A. Koszek\nwkoszek@FreeBSD.org\n2006/02/21"] wma [label="Wojciech Macek\nwma@FreeBSD.org\n2016/01/18"] wollman [label="Garrett Wollman\nwollman@FreeBSD.org\n????/??/??"] wsalamon [label="Wayne Salamon\nwsalamon@FreeBSD.org\n2005/06/25"] wulf [label="Vladimir Kondratyev\nwulf@FreeBSD.org\n2017/04/27"] yongari [label="Pyun YongHyeon\nyongari@FreeBSD.org\n2004/08/01"] zbb [label="Zbigniew Bodek\nzbb@FreeBSD.org\n2013/09/02"] zec [label="Marko Zec\nzec@FreeBSD.org\n2008/06/22"] zml [label="Zachary Loafman\nzml@FreeBSD.org\n2009/05/27"] zont [label="Andrey Zonov\nzont@FreeBSD.org\n2012/08/21"] # Pseudo target representing rev 1.1 of commit.allow day1 [label="Birth of FreeBSD"] # Here are the mentor/mentee relationships. # Group together all the mentees for a particular mentor. # Keep the list sorted by mentor login. day1 -> jtc day1 -> jkh day1 -> nate day1 -> rgrimes day1 -> alm day1 -> dg adrian -> avos adrian -> jmcneill adrian -> landonf adrian -> lidl adrian -> loos adrian -> mizhka adrian -> monthadar adrian -> ray adrian -> rmh adrian -> sephe adrian -> sgalabov ae -> melifaro allanjude -> tsoome alc -> davide andre -> qingli andrew -> manu anholt -> jkim avg -> art avg -> eugen avg -> pluknet avg -> smh bapt -> allanjude bapt -> araujo bapt -> bdrewery bapt -> wulf benno -> grehan billf -> dougb billf -> gad billf -> jedgar billf -> jhb billf -> shafeeq bmilekic -> csjp bms -> dhartmei bms -> mlaier bms -> thompsa brian -> joe brooks -> bushman brooks -> jamie brooks -> theraven bz -> anchie bz -> jamie bz -> syrinx cognet -> br cognet -> jceel cognet -> kevlo cognet -> ian cognet -> manu cognet -> mw cognet -> wkoszek cognet -> wma cognet -> zbb cperciva -> eadler cperciva -> flz cperciva -> randi cperciva -> simon csjp -> bushman das -> kargl das -> rodrigc delphij -> gabor delphij -> rafan delphij -> sephe des -> anholt des -> hmp des -> mike des -> olli des -> ru des -> bapt dds -> versus dfr -> gallatin dfr -> zml dg -> peter dim -> theraven dwmalone -> fanf dwmalone -> peadar dwmalone -> snb ed -> dim ed -> gavin ed -> jilles ed -> rdivacky ed -> uqs eivind -> des eivind -> rwatson emaste -> achim emaste -> dteske emaste -> kevans emaste -> markj emaste -> rstone emax -> markus fjoe -> versus gallatin -> ticso gavin -> versus gibbs -> mjacob gibbs -> njl gibbs -> royger gibbs -> whu glebius -> mav gnn -> jinmei gnn -> rrs gnn -> ivoras gnn -> vanhu gnn -> lstewart gnn -> np gnn -> davide gnn -> arybchik gnn -> erj gnn -> kp gnn -> jtl gnn -> karels gonzo -> jmcneill gonzo -> wulf grehan -> bryanv grehan -> rgrimes grog -> edwin grog -> le grog -> peterj imp -> akiyama imp -> ambrisko imp -> andrew imp -> bmah imp -> bruno imp -> dmlb imp -> emax imp -> furuta imp -> joe imp -> jon imp -> keichii imp -> kibab imp -> mb imp -> mr imp -> neel imp -> non imp -> nork imp -> onoe imp -> remko imp -> rik imp -> rink imp -> sanpei imp -> shiba imp -> takawata imp -> toshi imp -> tsoome imp -> uch jake -> bms jake -> gordon jake -> harti jake -> jeff jake -> kmacy jake -> robert jake -> yongari jb -> sson jdp -> fjoe jfv -> erj jhb -> arr jhb -> avg jhb -> jch jhb -> jeff jhb -> kbyanc jhb -> peterj jhb -> pfg jhb -> rnoland jhb -> rpokala jimharris -> carl jkh -> dfr jkh -> gj jkh -> grog jkh -> imp jkh -> jlemon jkh -> joerg jkh -> jwd jkh -> msmith jkh -> murray jkh -> phk jkh -> wes jkh -> yar jkoshy -> kaiw jkoshy -> fabient jkoshy -> rstone jlemon -> bmilekic jlemon -> brooks jmallett -> pkelsey jmmv -> ngie joerg -> brian joerg -> eik joerg -> jmg joerg -> le joerg -> netchild joerg -> schweikh julian -> glebius julian -> davidxu julian -> archie julian -> adrian julian -> zec julian -> mp kan -> kib ken -> asomers ken -> slm kib -> ae kib -> badger kib -> dchagin kib -> gjb kib -> jah kib -> jlh kib -> jpaetzel kib -> lulf kib -> melifaro kib -> mmel kib -> pho kib -> pluknet kib -> rdivacky kib -> rmacklem kib -> rmh kib -> skra kib -> stas kib -> tijl kib -> trociny kib -> vangyzen kib -> zont kmacy -> lstewart marcel -> allanjude marcel -> art marcel -> arun marcel -> marius marcel -> nwhitehorn marcel -> sjg markj -> cem markj -> rlibby markm -> jasone markm -> sheldonh mav -> ae mav -> eugen mdf -> gleb mdodd -> jake mike -> das mlaier -> benjsc mlaier -> dhartmei mlaier -> thompsa mlaier -> eri msmith -> cokane msmith -> jasone msmith -> scottl murray -> delphij mux -> cognet mux -> dumbbell netchild -> ariff njl -> marks njl -> philip njl -> rpaulo njl -> sepotvin nwhitehorn -> andreast nwhitehorn -> jhibbits obrien -> benno obrien -> groudier obrien -> gshapiro obrien -> kan obrien -> sam + +pfg -> fsu peter -> asmodai peter -> jayanth peter -> ps philip -> benl philip -> ed philip -> jls philip -> matteo philip -> uqs philip -> kp phk -> jkoshy phk -> mux pjd -> def pjd -> kib pjd -> lulf pjd -> oshogbo pjd -> smh pjd -> trociny rgrimes -> markm rmacklem -> jwd royger -> whu rpaulo -> avg rpaulo -> bschmidt rpaulo -> dim rpaulo -> jmmv rpaulo -> lidl rpaulo -> ngie rrs -> brucec rrs -> jchandra rrs -> tuexen rstone -> markj rstone -> mjoras ru -> ceri ru -> cjc ru -> eik ru -> maxim ru -> sobomax rwatson -> adrian rwatson -> antoine rwatson -> bmah rwatson -> brueffer rwatson -> bz rwatson -> cperciva rwatson -> emaste rwatson -> gnn rwatson -> jh rwatson -> jonathan rwatson -> kensmith rwatson -> kmacy rwatson -> linimon rwatson -> rmacklem rwatson -> shafeeq rwatson -> tmm rwatson -> trasz rwatson -> trhodes rwatson -> wsalamon rodrigc -> araujo sam -> andre sam -> benjsc sam -> sephe sbruno -> hiren sbruno -> jimharris sbruno -> shurd schweikh -> dds scottl -> achim scottl -> jimharris scottl -> pjd scottl -> sah scottl -> sbruno scottl -> slm scottl -> yongari sephe -> dexuan sheldonh -> dwmalone sheldonh -> iedowse shin -> ume simon -> benl sjg -> phil sjg -> stevek sos -> marcel stas -> ganbold theraven -> phil thompsa -> weongyo thompsa -> eri trasz -> jh trasz -> mjg ume -> jinmei ume -> suz ume -> tshiozak vangyzen -> badger vangyzen -> dab wes -> scf wkoszek -> jceel wollman -> gad zml -> mdf zml -> zack } Index: projects/runtime-coverage/share/mk/bsd.lib.mk =================================================================== --- projects/runtime-coverage/share/mk/bsd.lib.mk (revision 323974) +++ projects/runtime-coverage/share/mk/bsd.lib.mk (revision 323975) @@ -1,505 +1,509 @@ # from: @(#)bsd.lib.mk 5.26 (Berkeley) 5/2/91 # $FreeBSD$ # .include .if defined(LIB_CXX) || defined(SHLIB_CXX) _LD= ${CXX} .else _LD= ${CC} .endif .if defined(LIB_CXX) LIB= ${LIB_CXX} .endif .if defined(SHLIB_CXX) SHLIB= ${SHLIB_CXX} .endif LIB_PRIVATE= ${PRIVATELIB:Dprivate} # Set up the variables controlling shared libraries. After this section, # SHLIB_NAME will be defined only if we are to create a shared library. # SHLIB_LINK will be defined only if we are to create a link to it. # INSTALL_PIC_ARCHIVE will be defined only if we are to create a PIC archive. .if defined(NO_PIC) .undef SHLIB_NAME .undef INSTALL_PIC_ARCHIVE .else .if !defined(SHLIB) && defined(LIB) SHLIB= ${LIB} .endif .if !defined(SHLIB_NAME) && defined(SHLIB) && defined(SHLIB_MAJOR) SHLIB_NAME= lib${LIB_PRIVATE}${SHLIB}.so.${SHLIB_MAJOR} .endif .if defined(SHLIB_NAME) && !empty(SHLIB_NAME:M*.so.*) SHLIB_LINK?= ${SHLIB_NAME:R} .endif SONAME?= ${SHLIB_NAME} .endif .if defined(CRUNCH_CFLAGS) CFLAGS+= ${CRUNCH_CFLAGS} .endif .if ${MK_ASSERT_DEBUG} == "no" CFLAGS+= -DNDEBUG NO_WERROR= .endif .if defined(DEBUG_FLAGS) CFLAGS+= ${DEBUG_FLAGS} .if ${MK_CTF} != "no" && ${DEBUG_FLAGS:M-g} != "" CTFFLAGS+= -g .endif _WANTS_DEBUG= .else STRIP?= -s .endif .if ${SHLIBDIR:M*lib32*} TAGS+= lib32 .endif .if defined(NO_ROOT) .if !defined(TAGS) || ! ${TAGS:Mpackage=*} TAGS+= package=${PACKAGE:Uruntime} .endif TAG_ARGS= -T ${TAGS:[*]:S/ /,/g} .endif .if ${MK_DEBUG_FILES} != "no" && empty(DEBUG_FLAGS:M-g) && \ empty(DEBUG_FLAGS:M-gdwarf*) CFLAGS+= ${DEBUG_FILES_CFLAGS} CXXFLAGS+= ${DEBUG_FILES_CFLAGS} CTFFLAGS+= -g _WANTS_DEBUG= .endif .if ${MK_COVERAGE} != "no" && defined(_WANTS_DEBUG) _COV_FLAG= --coverage SHARED_CFLAGS+= ${_COV_FLAG} SHARED_CXXFLAGS+= ${_COV_FLAG} .endif .include # prefer .s to a .c, add .po, remove stuff not used in the BSD libraries # .pico used for PIC object files .SUFFIXES: .out .o .bc .ll .po .pico .ppico .S .asm .s .c .cc .cpp .cxx .C .f .y .l .ln .if !defined(PICFLAG) .if ${MACHINE_CPUARCH} == "sparc64" PICFLAG=-fPIC .else PICFLAG=-fpic .endif .endif PO_FLAG=-pg .c.po: ${CC} ${PO_FLAG} ${STATIC_CFLAGS} ${PO_CFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} .c.pico: ${CC} ${PICFLAG} -DPIC ${SHARED_CFLAGS} ${CFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} .c.ppico: ${CC} ${PICFLAG} -DPIC ${SHARED_CFLAGS:N${_COV_FLAG}} ${CFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} .cc.po .C.po .cpp.po .cxx.po: ${CXX} ${PO_FLAG} ${STATIC_CXXFLAGS} ${PO_CXXFLAGS} -c ${.IMPSRC} -o ${.TARGET} .cc.pico .C.pico .cpp.pico .cxx.pico: ${CXX} ${PICFLAG} -DPIC ${SHARED_CXXFLAGS} ${CXXFLAGS} -c ${.IMPSRC} -o ${.TARGET} .cc.ppico .C.ppico .cpp.ppico .cxx.ppico: ${CXX} ${PICFLAG} -DPIC ${SHARED_CXXFLAGS:N${_COV_FLAG}} ${CXXFLAGS} -c ${.IMPSRC} -o ${.TARGET} .f.po: ${FC} -pg ${FFLAGS} -o ${.TARGET} -c ${.IMPSRC} ${CTFCONVERT_CMD} .f.pico .f.ppico: ${FC} ${PICFLAG} -DPIC ${FFLAGS} -o ${.TARGET} -c ${.IMPSRC} ${CTFCONVERT_CMD} .s.po .s.pico .s.ppico: ${AS} ${AFLAGS} -o ${.TARGET} ${.IMPSRC} ${CTFCONVERT_CMD} .asm.po: ${CC:N${CCACHE_BIN}} -x assembler-with-cpp -DPROF ${PO_CFLAGS} \ ${ACFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} .asm.pico: ${CC:N${CCACHE_BIN}} -x assembler-with-cpp ${PICFLAG} -DPIC \ ${CFLAGS} ${ACFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} .asm.ppico: ${CC:N${CCACHE_BIN}} -x assembler-with-cpp ${PICFLAG} -DPIC \ ${CFLAGS:N${_COV_FLAG}} ${ACFLAGS} -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} .S.po: ${CC:N${CCACHE_BIN}} -DPROF ${PO_CFLAGS} ${ACFLAGS} -c ${.IMPSRC} \ -o ${.TARGET} ${CTFCONVERT_CMD} .S.pico .S.ppico: ${CC:N${CCACHE_BIN}} ${PICFLAG} -DPIC ${CFLAGS} ${ACFLAGS} \ -c ${.IMPSRC} -o ${.TARGET} ${CTFCONVERT_CMD} _LIBDIR:=${LIBDIR} _SHLIBDIR:=${SHLIBDIR} .if defined(SHLIB_NAME) .if ${MK_DEBUG_FILES} != "no" SHLIB_NAME_FULL=${SHLIB_NAME}.full .if ${MK_COVERAGE} != "no" COVERAGEFILEDIR=${COVERAGEDIR}${_SHLIBDIR} .if !exists(${DESTDIR}${COVERAGEFILEDIR}) COVERAGEMKDIR= .endif .endif # Use ${DEBUGDIR} for base system debug files, else .debug subdirectory .if ${_SHLIBDIR} == "/boot" ||\ ${SHLIBDIR:C%/lib(/.*)?$%/lib%} == "/lib" ||\ ${SHLIBDIR:C%/usr/(tests/)?lib(32|exec)?(/.*)?%/usr/lib%} == "/usr/lib" DEBUGFILEDIR=${DEBUGDIR}${_SHLIBDIR} .else DEBUGFILEDIR=${_SHLIBDIR}/.debug .endif .if !exists(${DESTDIR}${DEBUGFILEDIR}) DEBUGMKDIR= .endif .else SHLIB_NAME_FULL=${SHLIB_NAME} .endif .endif .include # Allow libraries to specify their own version map or have it # automatically generated (see bsd.symver.mk above). .if ${MK_SYMVER} == "yes" && !empty(VERSION_MAP) ${SHLIB_NAME_FULL}: ${VERSION_MAP} LDFLAGS+= -Wl,--version-script=${VERSION_MAP} .endif .if defined(LIB) && !empty(LIB) || defined(SHLIB_NAME) OBJS+= ${SRCS:N*.h:${OBJS_SRCS_FILTER:ts:}:S/$/.o/} CLEANFILES+= ${OBJS} ${STATICOBJS} .endif .if defined(LIB) && !empty(LIB) _LIBS= lib${LIB_PRIVATE}${LIB}.a lib${LIB_PRIVATE}${LIB}.a: ${OBJS} ${STATICOBJS} @${ECHO} building static ${LIB} library @rm -f ${.TARGET} ${AR} ${ARFLAGS} ${.TARGET} `NM='${NM}' NMFLAGS='${NMFLAGS}' \ ${LORDER} ${OBJS} ${STATICOBJS} | ${TSORT} ${TSORTFLAGS}` ${ARADD} ${RANLIB} ${RANLIBFLAGS} ${.TARGET} .endif .if !defined(INTERNALLIB) .if ${MK_PROFILE} != "no" && defined(LIB) && !empty(LIB) _LIBS+= lib${LIB_PRIVATE}${LIB}_p.a POBJS+= ${OBJS:.o=.po} ${STATICOBJS:.o=.po} DEPENDOBJS+= ${POBJS} CLEANFILES+= ${POBJS} lib${LIB_PRIVATE}${LIB}_p.a: ${POBJS} @${ECHO} building profiled ${LIB} library @rm -f ${.TARGET} ${AR} ${ARFLAGS} ${.TARGET} `NM='${NM}' NMFLAGS='${NMFLAGS}' \ ${LORDER} ${POBJS} | ${TSORT} ${TSORTFLAGS}` ${ARADD} ${RANLIB} ${RANLIBFLAGS} ${.TARGET} .endif .if defined(LLVM_LINK) BCOBJS= ${OBJS:.o=.bco} ${STATICOBJS:.o=.bco} LLOBJS= ${OBJS:.o=.llo} ${STATICOBJS:.o=.llo} CLEANFILES+= ${BCOBJS} ${LLOBJS} lib${LIB_PRIVATE}${LIB}.bc: ${BCOBJS} ${LLVM_LINK} -o ${.TARGET} ${BCOBJS} lib${LIB_PRIVATE}${LIB}.ll: ${LLOBJS} ${LLVM_LINK} -S -o ${.TARGET} ${LLOBJS} .endif .if defined(SHLIB_NAME) || \ defined(INSTALL_PIC_ARCHIVE) && defined(LIB) && !empty(LIB) SOBJS+= ${OBJS:.o=.pico} DEPENDOBJS+= ${SOBJS} CLEANFILES+= ${SOBJS} .if defined(INSTALL_PIC_ARCHIVE) && ${MK_COVERAGE} != "no" SPOBJS:= ${SOBJS:.pico=.ppico} DEPENDOBJS+= ${SPOBJS} CLEANFILES+= ${SPOBJS} .endif .endif .if defined(SHLIB_NAME) _LIBS+= ${SHLIB_NAME} .if defined(_COV_FLAG) SOLINKOPTS+= ${_COV_FLAG} .endif SOLINKOPTS+= -shared -Wl,-x .if defined(LD_FATAL_WARNINGS) && ${LD_FATAL_WARNINGS} == "no" SOLINKOPTS+= -Wl,--no-fatal-warnings .else SOLINKOPTS+= -Wl,--fatal-warnings .endif SOLINKOPTS+= -Wl,--warn-shared-textrel .if target(beforelinking) beforelinking: ${SOBJS} ${SPOBJS} ${SHLIB_NAME_FULL}: beforelinking .endif .if defined(SHLIB_LINK) .if defined(SHLIB_LDSCRIPT) && !empty(SHLIB_LDSCRIPT) && exists(${.CURDIR}/${SHLIB_LDSCRIPT}) ${SHLIB_LINK:R}.ld: ${.CURDIR}/${SHLIB_LDSCRIPT} sed -e 's,@@SHLIB@@,${_SHLIBDIR}/${SHLIB_NAME},g' \ -e 's,@@LIBDIR@@,${_LIBDIR},g' \ ${.ALLSRC} > ${.TARGET} ${SHLIB_NAME_FULL}: ${SHLIB_LINK:R}.ld CLEANFILES+= ${SHLIB_LINK:R}.ld .endif CLEANFILES+= ${SHLIB_LINK} .endif ${SHLIB_NAME_FULL}: ${SOBJS} @${ECHO} building shared library ${SHLIB_NAME} @rm -f ${SHLIB_NAME} ${SHLIB_LINK} .if defined(SHLIB_LINK) && !commands(${SHLIB_LINK:R}.ld) && ${MK_DEBUG_FILES} == "no" @${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},development} ${SHLIB_NAME} ${SHLIB_LINK} .endif ${_LD:N${CCACHE_BIN}} ${LDFLAGS} ${SSP_CFLAGS} ${SOLINKOPTS} \ -o ${.TARGET} -Wl,-soname,${SONAME} \ `NM='${NM}' NMFLAGS='${NMFLAGS}' ${LORDER} ${SOBJS} | \ ${TSORT} ${TSORTFLAGS}` ${LDADD} .if ${MK_CTF} != "no" ${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ${SOBJS} .endif .if ${MK_DEBUG_FILES} != "no" CLEANFILES+= ${SHLIB_NAME_FULL} ${SHLIB_NAME}.debug ${SHLIB_NAME}: ${SHLIB_NAME_FULL} ${SHLIB_NAME}.debug ${OBJCOPY} --strip-debug --add-gnu-debuglink=${SHLIB_NAME}.debug \ ${SHLIB_NAME_FULL} ${.TARGET} .if defined(SHLIB_LINK) && !commands(${SHLIB_LINK:R}.ld) @${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},development} ${SHLIB_NAME} ${SHLIB_LINK} .endif ${SHLIB_NAME}.debug: ${SHLIB_NAME_FULL} ${OBJCOPY} --only-keep-debug ${SHLIB_NAME_FULL} ${.TARGET} .endif .endif #defined(SHLIB_NAME) .if defined(INSTALL_PIC_ARCHIVE) && defined(LIB) && !empty(LIB) && ${MK_TOOLCHAIN} != "no" _LIBS+= lib${LIB_PRIVATE}${LIB}_pic.a +.if ${MK_COVERAGE} != "no" lib${LIB_PRIVATE}${LIB}_pic.a: ${SPOBJS} +.else +lib${LIB_PRIVATE}${LIB}_pic.a: ${SOBJS} +.endif @${ECHO} building special pic ${LIB} library @rm -f ${.TARGET} ${AR} ${ARFLAGS} ${.TARGET} ${SPOBJS} ${ARADD} ${RANLIB} ${RANLIBFLAGS} ${.TARGET} .endif .if defined(WANT_LINT) && !defined(NO_LINT) && defined(LIB) && !empty(LIB) LINTLIB= llib-l${LIB}.ln _LIBS+= ${LINTLIB} LINTOBJS+= ${SRCS:M*.c:.c=.ln} CLEANFILES+= ${LINTOBJS} ${LINTLIB}: ${LINTOBJS} @${ECHO} building lint library ${.TARGET} @rm -f ${.TARGET} ${LINT} ${LINTLIBFLAGS} ${CFLAGS:M-[DIU]*} ${.ALLSRC} .endif .endif # !defined(INTERNALLIB) .if defined(_SKIP_BUILD) all: .else .if defined(_LIBS) && !empty(_LIBS) all: ${_LIBS} .endif .if ${MK_MAN} != "no" && !defined(LIBRARIES_ONLY) all: all-man .endif .endif CLEANFILES+= ${_LIBS} _EXTRADEPEND: .if !defined(NO_EXTRADEPEND) && defined(SHLIB_NAME) .if defined(DPADD) && !empty(DPADD) echo ${SHLIB_NAME_FULL}: ${DPADD} >> ${DEPENDFILE} .endif .endif .if !target(install) .if defined(PRECIOUSLIB) .if !defined(NO_FSCHG) SHLINSTALLFLAGS+= -fschg .endif .endif # Install libraries with -S to avoid risk of modifying in-use libraries when # installing to a running system. It is safe to avoid this for NO_ROOT builds # that are only creating an image. .if !defined(NO_SAFE_LIBINSTALL) && !defined(NO_ROOT) SHLINSTALLFLAGS+= -S .endif _INSTALLFLAGS:= ${INSTALLFLAGS} .for ie in ${INSTALLFLAGS_EDIT} _INSTALLFLAGS:= ${_INSTALLFLAGS${ie}} .endfor _SHLINSTALLFLAGS:= ${SHLINSTALLFLAGS} .for ie in ${INSTALLFLAGS_EDIT} _SHLINSTALLFLAGS:= ${_SHLINSTALLFLAGS${ie}} .endfor .if !defined(INTERNALLIB) realinstall: _libinstall .ORDER: beforeinstall _libinstall _libinstall: .if defined(LIB) && !empty(LIB) && ${MK_INSTALLLIB} != "no" ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},development} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} lib${LIB_PRIVATE}${LIB}.a ${DESTDIR}${_LIBDIR}/ .endif .if ${MK_PROFILE} != "no" && defined(LIB) && !empty(LIB) ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},profile} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} lib${LIB_PRIVATE}${LIB}_p.a ${DESTDIR}${_LIBDIR}/ .endif .if defined(SHLIB_NAME) ${INSTALL} ${TAG_ARGS} ${STRIP} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} ${_SHLINSTALLFLAGS} \ ${SHLIB_NAME} ${DESTDIR}${_SHLIBDIR}/ .if ${MK_DEBUG_FILES} != "no" .if ${MK_COVERAGE} != "no" .if defined(COVERAGEMKDIR) ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},coverage} -d ${DESTDIR}${COVERAGEFILEDIR}/ .endif ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},coverage} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} \ ${SHLIB_NAME}.full ${DESTDIR}${COVERAGEFILEDIR}/${SHLIB_NAME} .endif .if defined(DEBUGMKDIR) ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},debug} -d ${DESTDIR}${DEBUGFILEDIR}/ .endif ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},debug} -o ${LIBOWN} -g ${LIBGRP} -m ${DEBUGMODE} \ ${_INSTALLFLAGS} \ ${SHLIB_NAME}.debug ${DESTDIR}${DEBUGFILEDIR}/ .endif .if defined(SHLIB_LINK) .if commands(${SHLIB_LINK:R}.ld) ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},development} -S -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} ${SHLIB_LINK:R}.ld \ ${DESTDIR}${_LIBDIR}/${SHLIB_LINK} .for _SHLIB_LINK_LINK in ${SHLIB_LDSCRIPT_LINKS} ${INSTALL_SYMLINK} ${SHLIB_LINK} ${DESTDIR}${_LIBDIR}/${_SHLIB_LINK_LINK} .endfor .else .if ${_SHLIBDIR} == ${_LIBDIR} .if ${SHLIB_LINK:Mlib*} ${INSTALL_RSYMLINK} ${TAG_ARGS:D${TAG_ARGS},development} ${SHLIB_NAME} ${DESTDIR}${_LIBDIR}/${SHLIB_LINK} .else ${INSTALL_RSYMLINK} ${TAG_ARGS} ${DESTDIR}${_SHLIBDIR}/${SHLIB_NAME} \ ${DESTDIR}${_LIBDIR}/${SHLIB_LINK} .endif .else .if ${SHLIB_LINK:Mlib*} ${INSTALL_RSYMLINK} ${TAG_ARGS:D${TAG_ARGS},development} ${DESTDIR}${_SHLIBDIR}/${SHLIB_NAME} \ ${DESTDIR}${_LIBDIR}/${SHLIB_LINK} .else ${INSTALL_RSYMLINK} ${TAG_ARGS} ${DESTDIR}${_SHLIBDIR}/${SHLIB_NAME} \ ${DESTDIR}${_LIBDIR}/${SHLIB_LINK} .endif .if exists(${DESTDIR}${_LIBDIR}/${SHLIB_NAME}) -chflags noschg ${DESTDIR}${_LIBDIR}/${SHLIB_NAME} rm -f ${DESTDIR}${_LIBDIR}/${SHLIB_NAME} .endif .endif .endif # SHLIB_LDSCRIPT .endif # SHLIB_LINK .endif # SHIB_NAME .if defined(INSTALL_PIC_ARCHIVE) && defined(LIB) && !empty(LIB) && ${MK_TOOLCHAIN} != "no" ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},development} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} lib${LIB}_pic.a ${DESTDIR}${_LIBDIR}/ .endif .if defined(WANT_LINT) && !defined(NO_LINT) && defined(LIB) && !empty(LIB) ${INSTALL} ${TAG_ARGS:D${TAG_ARGS},development} -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \ ${_INSTALLFLAGS} ${LINTLIB} ${DESTDIR}${LINTLIBDIR}/ .endif .endif # !defined(INTERNALLIB) .if !defined(LIBRARIES_ONLY) .include .if defined(_COV_FLAG) GCDAS= ${SRCS:M*.[c|cc|cpp|cxx|C]:R:S/$/.gcda/g} .include .endif .include .include .include .endif .include .if ${MK_MAN} != "no" && !defined(LIBRARIES_ONLY) realinstall: maninstall .ORDER: beforeinstall maninstall .endif .endif .if !target(lint) lint: ${SRCS:M*.c} ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.ALLSRC} .endif .if ${MK_MAN} != "no" && !defined(LIBRARIES_ONLY) .include .endif .if defined(LIB) && !empty(LIB) OBJS_DEPEND_GUESS+= ${SRCS:M*.h} .for _S in ${SRCS:N*.[hly]} OBJS_DEPEND_GUESS.${_S:${OBJS_SRCS_FILTER:ts:}}.po+= ${_S} .endfor .endif .if defined(SHLIB_NAME) || \ defined(INSTALL_PIC_ARCHIVE) && defined(LIB) && !empty(LIB) .for _S in ${SRCS:N*.[hly]} OBJS_DEPEND_GUESS.${_S:${OBJS_SRCS_FILTER:ts:}}.pico+= ${_S} OBJS_DEPEND_GUESS.${_S:R}.ppico+= ${_S} .endfor .endif .if defined(HAS_TESTS) MAKE+= MK_MAKE_CHECK_USE_SANDBOX=yes SUBDIR_TARGETS+= check TESTS_LD_LIBRARY_PATH+= ${.OBJDIR} .endif .include .include .include .include Index: projects/runtime-coverage/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c =================================================================== --- projects/runtime-coverage/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c (revision 323974) +++ projects/runtime-coverage/sys/arm/broadcom/bcm2835/bcm2835_sdhci.c (revision 323975) @@ -1,690 +1,686 @@ /*- * Copyright (c) 2012 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmcbr_if.h" #include "sdhci_if.h" #include "opt_mmccam.h" #include "bcm2835_dma.h" #include #include "bcm2835_vcbus.h" #define BCM2835_DEFAULT_SDHCI_FREQ 50 #define BCM_SDHCI_BUFFER_SIZE 512 #define NUM_DMA_SEGS 2 #ifdef DEBUG #define dprintf(fmt, args...) do { printf("%s(): ", __func__); \ printf(fmt,##args); } while (0) #else #define dprintf(fmt, args...) #endif static int bcm2835_sdhci_hs = 1; static int bcm2835_sdhci_pio_mode = 0; static struct ofw_compat_data compat_data[] = { {"broadcom,bcm2835-sdhci", 1}, {"brcm,bcm2835-mmc", 1}, {NULL, 0} }; TUNABLE_INT("hw.bcm2835.sdhci.hs", &bcm2835_sdhci_hs); TUNABLE_INT("hw.bcm2835.sdhci.pio_mode", &bcm2835_sdhci_pio_mode); struct bcm_sdhci_softc { device_t sc_dev; struct resource * sc_mem_res; struct resource * sc_irq_res; bus_space_tag_t sc_bst; bus_space_handle_t sc_bsh; void * sc_intrhand; struct mmc_request * sc_req; struct sdhci_slot sc_slot; int sc_dma_ch; bus_dma_tag_t sc_dma_tag; bus_dmamap_t sc_dma_map; vm_paddr_t sc_sdhci_buffer_phys; uint32_t cmd_and_mode; bus_addr_t dmamap_seg_addrs[NUM_DMA_SEGS]; bus_size_t dmamap_seg_sizes[NUM_DMA_SEGS]; int dmamap_seg_count; int dmamap_seg_index; int dmamap_status; }; static int bcm_sdhci_probe(device_t); static int bcm_sdhci_attach(device_t); static int bcm_sdhci_detach(device_t); static void bcm_sdhci_intr(void *); static int bcm_sdhci_get_ro(device_t, device_t); static void bcm_sdhci_dma_intr(int ch, void *arg); static void bcm_sdhci_dmacb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { struct bcm_sdhci_softc *sc = arg; int i; sc->dmamap_status = err; sc->dmamap_seg_count = nseg; /* Note nseg is guaranteed to be zero if err is non-zero. */ for (i = 0; i < nseg; i++) { sc->dmamap_seg_addrs[i] = segs[i].ds_addr; sc->dmamap_seg_sizes[i] = segs[i].ds_len; } } static int bcm_sdhci_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) return (ENXIO); device_set_desc(dev, "Broadcom 2708 SDHCI controller"); return (BUS_PROBE_DEFAULT); } static int bcm_sdhci_attach(device_t dev) { struct bcm_sdhci_softc *sc = device_get_softc(dev); int rid, err; phandle_t node; pcell_t cell; u_int default_freq; sc->sc_dev = dev; sc->sc_req = NULL; err = bcm2835_mbox_set_power_state(BCM2835_MBOX_POWER_ID_EMMC, TRUE); if (err != 0) { if (bootverbose) device_printf(dev, "Unable to enable the power\n"); return (err); } default_freq = 0; err = bcm2835_mbox_get_clock_rate(BCM2835_MBOX_CLOCK_ID_EMMC, &default_freq); if (err == 0) { /* Convert to MHz */ default_freq /= 1000000; } if (default_freq == 0) { node = ofw_bus_get_node(sc->sc_dev); if ((OF_getencprop(node, "clock-frequency", &cell, sizeof(cell))) > 0) default_freq = cell / 1000000; } if (default_freq == 0) default_freq = BCM2835_DEFAULT_SDHCI_FREQ; if (bootverbose) device_printf(dev, "SDHCI frequency: %dMHz\n", default_freq); rid = 0; sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->sc_mem_res) { device_printf(dev, "cannot allocate memory window\n"); err = ENXIO; goto fail; } sc->sc_bst = rman_get_bustag(sc->sc_mem_res); sc->sc_bsh = rman_get_bushandle(sc->sc_mem_res); rid = 0; sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->sc_irq_res) { device_printf(dev, "cannot allocate interrupt\n"); err = ENXIO; goto fail; } if (bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_BIO | INTR_MPSAFE, NULL, bcm_sdhci_intr, sc, &sc->sc_intrhand)) { device_printf(dev, "cannot setup interrupt handler\n"); err = ENXIO; goto fail; } if (!bcm2835_sdhci_pio_mode) sc->sc_slot.opt = SDHCI_PLATFORM_TRANSFER; sc->sc_slot.caps = SDHCI_CAN_VDD_330 | SDHCI_CAN_VDD_180; if (bcm2835_sdhci_hs) sc->sc_slot.caps |= SDHCI_CAN_DO_HISPD; sc->sc_slot.caps |= (default_freq << SDHCI_CLOCK_BASE_SHIFT); sc->sc_slot.quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | SDHCI_QUIRK_BROKEN_TIMEOUT_VAL | SDHCI_QUIRK_DONT_SET_HISPD_BIT | SDHCI_QUIRK_MISSING_CAPS; sdhci_init_slot(dev, &sc->sc_slot, 0); sc->sc_dma_ch = bcm_dma_allocate(BCM_DMA_CH_ANY); if (sc->sc_dma_ch == BCM_DMA_CH_INVALID) goto fail; bcm_dma_setup_intr(sc->sc_dma_ch, bcm_sdhci_dma_intr, sc); /* Allocate bus_dma resources. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BCM_SDHCI_BUFFER_SIZE, NUM_DMA_SEGS, BCM_SDHCI_BUFFER_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->sc_dma_tag); if (err) { device_printf(dev, "failed allocate DMA tag"); goto fail; } err = bus_dmamap_create(sc->sc_dma_tag, 0, &sc->sc_dma_map); if (err) { device_printf(dev, "bus_dmamap_create failed\n"); goto fail; } /* FIXME: Fix along with other BUS_SPACE_PHYSADDR instances */ sc->sc_sdhci_buffer_phys = rman_get_start(sc->sc_mem_res) + SDHCI_BUFFER; bus_generic_probe(dev); bus_generic_attach(dev); -#ifdef MMCCAM - sdhci_cam_start_slot(&sc->sc_slot); -#else sdhci_start_slot(&sc->sc_slot); -#endif return (0); fail: if (sc->sc_intrhand) bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_intrhand); if (sc->sc_irq_res) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res); if (sc->sc_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); return (err); } static int bcm_sdhci_detach(device_t dev) { return (EBUSY); } static void bcm_sdhci_intr(void *arg) { struct bcm_sdhci_softc *sc = arg; sdhci_generic_intr(&sc->sc_slot); } static int bcm_sdhci_get_ro(device_t bus, device_t child) { return (0); } static inline uint32_t RD4(struct bcm_sdhci_softc *sc, bus_size_t off) { uint32_t val = bus_space_read_4(sc->sc_bst, sc->sc_bsh, off); return val; } static inline void WR4(struct bcm_sdhci_softc *sc, bus_size_t off, uint32_t val) { bus_space_write_4(sc->sc_bst, sc->sc_bsh, off, val); /* * The Arasan HC has a bug where it may lose the content of * consecutive writes to registers that are within two SD-card * clock cycles of each other (a clock domain crossing problem). */ if (sc->sc_slot.clock > 0) DELAY(((2 * 1000000) / sc->sc_slot.clock) + 1); } static uint8_t bcm_sdhci_read_1(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct bcm_sdhci_softc *sc = device_get_softc(dev); uint32_t val = RD4(sc, off & ~3); return ((val >> (off & 3)*8) & 0xff); } static uint16_t bcm_sdhci_read_2(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct bcm_sdhci_softc *sc = device_get_softc(dev); uint32_t val = RD4(sc, off & ~3); /* * Standard 32-bit handling of command and transfer mode. */ if (off == SDHCI_TRANSFER_MODE) { return (sc->cmd_and_mode >> 16); } else if (off == SDHCI_COMMAND_FLAGS) { return (sc->cmd_and_mode & 0x0000ffff); } return ((val >> (off & 3)*8) & 0xffff); } static uint32_t bcm_sdhci_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct bcm_sdhci_softc *sc = device_get_softc(dev); return RD4(sc, off); } static void bcm_sdhci_read_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t *data, bus_size_t count) { struct bcm_sdhci_softc *sc = device_get_softc(dev); bus_space_read_multi_4(sc->sc_bst, sc->sc_bsh, off, data, count); } static void bcm_sdhci_write_1(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint8_t val) { struct bcm_sdhci_softc *sc = device_get_softc(dev); uint32_t val32 = RD4(sc, off & ~3); val32 &= ~(0xff << (off & 3)*8); val32 |= (val << (off & 3)*8); WR4(sc, off & ~3, val32); } static void bcm_sdhci_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint16_t val) { struct bcm_sdhci_softc *sc = device_get_softc(dev); uint32_t val32; if (off == SDHCI_COMMAND_FLAGS) val32 = sc->cmd_and_mode; else val32 = RD4(sc, off & ~3); val32 &= ~(0xffff << (off & 3)*8); val32 |= (val << (off & 3)*8); if (off == SDHCI_TRANSFER_MODE) sc->cmd_and_mode = val32; else { WR4(sc, off & ~3, val32); if (off == SDHCI_COMMAND_FLAGS) sc->cmd_and_mode = val32; } } static void bcm_sdhci_write_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t val) { struct bcm_sdhci_softc *sc = device_get_softc(dev); WR4(sc, off, val); } static void bcm_sdhci_write_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t *data, bus_size_t count) { struct bcm_sdhci_softc *sc = device_get_softc(dev); bus_space_write_multi_4(sc->sc_bst, sc->sc_bsh, off, data, count); } static void bcm_sdhci_start_dma_seg(struct bcm_sdhci_softc *sc) { struct sdhci_slot *slot; vm_paddr_t pdst, psrc; int err, idx, len, sync_op; slot = &sc->sc_slot; idx = sc->dmamap_seg_index++; len = sc->dmamap_seg_sizes[idx]; slot->offset += len; if (slot->curcmd->data->flags & MMC_DATA_READ) { bcm_dma_setup_src(sc->sc_dma_ch, BCM_DMA_DREQ_EMMC, BCM_DMA_SAME_ADDR, BCM_DMA_32BIT); bcm_dma_setup_dst(sc->sc_dma_ch, BCM_DMA_DREQ_NONE, BCM_DMA_INC_ADDR, (len & 0xf) ? BCM_DMA_32BIT : BCM_DMA_128BIT); psrc = sc->sc_sdhci_buffer_phys; pdst = sc->dmamap_seg_addrs[idx]; sync_op = BUS_DMASYNC_PREREAD; } else { bcm_dma_setup_src(sc->sc_dma_ch, BCM_DMA_DREQ_NONE, BCM_DMA_INC_ADDR, (len & 0xf) ? BCM_DMA_32BIT : BCM_DMA_128BIT); bcm_dma_setup_dst(sc->sc_dma_ch, BCM_DMA_DREQ_EMMC, BCM_DMA_SAME_ADDR, BCM_DMA_32BIT); psrc = sc->dmamap_seg_addrs[idx]; pdst = sc->sc_sdhci_buffer_phys; sync_op = BUS_DMASYNC_PREWRITE; } /* * When starting a new DMA operation do the busdma sync operation, and * disable SDCHI data interrrupts because we'll be driven by DMA * interrupts (or SDHCI error interrupts) until the IO is done. */ if (idx == 0) { bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, sync_op); slot->intmask &= ~(SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DATA_END); bcm_sdhci_write_4(sc->sc_dev, &sc->sc_slot, SDHCI_SIGNAL_ENABLE, slot->intmask); } /* * Start the DMA transfer. Only programming errors (like failing to * allocate a channel) cause a non-zero return from bcm_dma_start(). */ err = bcm_dma_start(sc->sc_dma_ch, psrc, pdst, len); KASSERT((err == 0), ("bcm2835_sdhci: failed DMA start")); } static void bcm_sdhci_dma_intr(int ch, void *arg) { struct bcm_sdhci_softc *sc = (struct bcm_sdhci_softc *)arg; struct sdhci_slot *slot = &sc->sc_slot; uint32_t reg, mask; int left, sync_op; mtx_lock(&slot->mtx); /* * If there are more segments for the current dma, start the next one. * Otherwise unload the dma map and decide what to do next based on the * status of the sdhci controller and whether there's more data left. */ if (sc->dmamap_seg_index < sc->dmamap_seg_count) { bcm_sdhci_start_dma_seg(sc); mtx_unlock(&slot->mtx); return; } if (slot->curcmd->data->flags & MMC_DATA_READ) { sync_op = BUS_DMASYNC_POSTREAD; mask = SDHCI_INT_DATA_AVAIL; } else { sync_op = BUS_DMASYNC_POSTWRITE; mask = SDHCI_INT_SPACE_AVAIL; } bus_dmamap_sync(sc->sc_dma_tag, sc->sc_dma_map, sync_op); bus_dmamap_unload(sc->sc_dma_tag, sc->sc_dma_map); sc->dmamap_seg_count = 0; sc->dmamap_seg_index = 0; left = min(BCM_SDHCI_BUFFER_SIZE, slot->curcmd->data->len - slot->offset); /* DATA END? */ reg = bcm_sdhci_read_4(slot->bus, slot, SDHCI_INT_STATUS); if (reg & SDHCI_INT_DATA_END) { /* ACK for all outstanding interrupts */ bcm_sdhci_write_4(slot->bus, slot, SDHCI_INT_STATUS, reg); /* enable INT */ slot->intmask |= SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DATA_END; bcm_sdhci_write_4(slot->bus, slot, SDHCI_SIGNAL_ENABLE, slot->intmask); /* finish this data */ sdhci_finish_data(slot); } else { /* already available? */ if (reg & mask) { /* ACK for DATA_AVAIL or SPACE_AVAIL */ bcm_sdhci_write_4(slot->bus, slot, SDHCI_INT_STATUS, mask); /* continue next DMA transfer */ if (bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, (uint8_t *)slot->curcmd->data->data + slot->offset, left, bcm_sdhci_dmacb, sc, BUS_DMA_NOWAIT) != 0 || sc->dmamap_status != 0) { slot->curcmd->error = MMC_ERR_NO_MEMORY; sdhci_finish_data(slot); } else { bcm_sdhci_start_dma_seg(sc); } } else { /* wait for next data by INT */ /* enable INT */ slot->intmask |= SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DATA_END; bcm_sdhci_write_4(slot->bus, slot, SDHCI_SIGNAL_ENABLE, slot->intmask); } } mtx_unlock(&slot->mtx); } static void bcm_sdhci_read_dma(device_t dev, struct sdhci_slot *slot) { struct bcm_sdhci_softc *sc = device_get_softc(slot->bus); size_t left; if (sc->dmamap_seg_count != 0) { device_printf(sc->sc_dev, "DMA in use\n"); return; } left = min(BCM_SDHCI_BUFFER_SIZE, slot->curcmd->data->len - slot->offset); KASSERT((left & 3) == 0, ("%s: len = %zu, not word-aligned", __func__, left)); if (bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, (uint8_t *)slot->curcmd->data->data + slot->offset, left, bcm_sdhci_dmacb, sc, BUS_DMA_NOWAIT) != 0 || sc->dmamap_status != 0) { slot->curcmd->error = MMC_ERR_NO_MEMORY; return; } /* DMA start */ bcm_sdhci_start_dma_seg(sc); } static void bcm_sdhci_write_dma(device_t dev, struct sdhci_slot *slot) { struct bcm_sdhci_softc *sc = device_get_softc(slot->bus); size_t left; if (sc->dmamap_seg_count != 0) { device_printf(sc->sc_dev, "DMA in use\n"); return; } left = min(BCM_SDHCI_BUFFER_SIZE, slot->curcmd->data->len - slot->offset); KASSERT((left & 3) == 0, ("%s: len = %zu, not word-aligned", __func__, left)); if (bus_dmamap_load(sc->sc_dma_tag, sc->sc_dma_map, (uint8_t *)slot->curcmd->data->data + slot->offset, left, bcm_sdhci_dmacb, sc, BUS_DMA_NOWAIT) != 0 || sc->dmamap_status != 0) { slot->curcmd->error = MMC_ERR_NO_MEMORY; return; } /* DMA start */ bcm_sdhci_start_dma_seg(sc); } static int bcm_sdhci_will_handle_transfer(device_t dev, struct sdhci_slot *slot) { size_t left; /* * Do not use DMA for transfers less than block size or with a length * that is not a multiple of four. */ left = min(BCM_DMA_BLOCK_SIZE, slot->curcmd->data->len - slot->offset); if (left < BCM_DMA_BLOCK_SIZE) return (0); if (left & 0x03) return (0); return (1); } static void bcm_sdhci_start_transfer(device_t dev, struct sdhci_slot *slot, uint32_t *intmask) { /* DMA transfer FIFO 1KB */ if (slot->curcmd->data->flags & MMC_DATA_READ) bcm_sdhci_read_dma(dev, slot); else bcm_sdhci_write_dma(dev, slot); } static void bcm_sdhci_finish_transfer(device_t dev, struct sdhci_slot *slot) { sdhci_finish_data(slot); } static device_method_t bcm_sdhci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, bcm_sdhci_probe), DEVMETHOD(device_attach, bcm_sdhci_attach), DEVMETHOD(device_detach, bcm_sdhci_detach), /* Bus interface */ DEVMETHOD(bus_read_ivar, sdhci_generic_read_ivar), DEVMETHOD(bus_write_ivar, sdhci_generic_write_ivar), /* MMC bridge interface */ DEVMETHOD(mmcbr_update_ios, sdhci_generic_update_ios), DEVMETHOD(mmcbr_request, sdhci_generic_request), DEVMETHOD(mmcbr_get_ro, bcm_sdhci_get_ro), DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), /* Platform transfer methods */ DEVMETHOD(sdhci_platform_will_handle, bcm_sdhci_will_handle_transfer), DEVMETHOD(sdhci_platform_start_transfer, bcm_sdhci_start_transfer), DEVMETHOD(sdhci_platform_finish_transfer, bcm_sdhci_finish_transfer), /* SDHCI registers accessors */ DEVMETHOD(sdhci_read_1, bcm_sdhci_read_1), DEVMETHOD(sdhci_read_2, bcm_sdhci_read_2), DEVMETHOD(sdhci_read_4, bcm_sdhci_read_4), DEVMETHOD(sdhci_read_multi_4, bcm_sdhci_read_multi_4), DEVMETHOD(sdhci_write_1, bcm_sdhci_write_1), DEVMETHOD(sdhci_write_2, bcm_sdhci_write_2), DEVMETHOD(sdhci_write_4, bcm_sdhci_write_4), DEVMETHOD(sdhci_write_multi_4, bcm_sdhci_write_multi_4), DEVMETHOD_END }; static devclass_t bcm_sdhci_devclass; static driver_t bcm_sdhci_driver = { "sdhci_bcm", bcm_sdhci_methods, sizeof(struct bcm_sdhci_softc), }; DRIVER_MODULE(sdhci_bcm, simplebus, bcm_sdhci_driver, bcm_sdhci_devclass, NULL, NULL); MODULE_DEPEND(sdhci_bcm, sdhci, 1, 1, 1); #ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_bcm); #endif Index: projects/runtime-coverage/sys/arm/conf/VIRT =================================================================== --- projects/runtime-coverage/sys/arm/conf/VIRT (revision 323974) +++ projects/runtime-coverage/sys/arm/conf/VIRT (nonexistent) @@ -1,59 +0,0 @@ -# -# VIRT -- Custom configuration for the qemu virt platform -# -# For more information on this file, please read the config(5) manual page, -# and/or the handbook section on Kernel Configuration Files: -# -# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html -# -# The handbook is also available locally in /usr/share/doc/handbook -# if you've installed the doc distribution, otherwise always see the -# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the -# latest information. -# -# An exhaustive list of options and more detailed explanations of the -# device lines is also present in the ../../conf/NOTES and NOTES files. -# If you are in doubt as to the purpose or necessity of a line, check first -# in NOTES. -# -# $FreeBSD$ - -ident VIRT - -include "std.armv6" -include "../qemu/std.virt" - -options SCHED_ULE # ULE scheduler -options PLATFORM -options SMP # Enable multiple cores - -# Interrupt controller -device gic -options INTRNG - -# ARM Generic Timer -device generic_timer - -device bpf -device loop -device ether -device uart -device pty -device snp -device pl011 -device psci - -device virtio -device virtio_mmio -device virtio_blk -device vtnet - -device md -device random # Entropy device - -# Flattened Device Tree -options FDT # Configure using FDT/DTB data - -# Extensible Firmware Interface -options EFI - Property changes on: projects/runtime-coverage/sys/arm/conf/VIRT ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/runtime-coverage/sys/arm/conf/GENERIC =================================================================== --- projects/runtime-coverage/sys/arm/conf/GENERIC (revision 323974) +++ projects/runtime-coverage/sys/arm/conf/GENERIC (revision 323975) @@ -1,240 +1,249 @@ # # GENERICV6 -- Generic(ish) kernel config. # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ ident GENERIC cpu CPU_CORTEXA options SMP_ON_UP machine arm armv6 makeoptions CONF_CFLAGS="-march=armv7a" makeoptions KERNVIRTADDR=0xc0000000 options KERNVIRTADDR=0xc0000000 include "std.armv6" files "../allwinner/files.allwinner" files "../allwinner/files.allwinner_up" files "../allwinner/a10/files.a10" files "../allwinner/a13/files.a13" files "../allwinner/a20/files.a20" files "../allwinner/a31/files.a31" files "../allwinner/a33/files.a33" files "../allwinner/a83t/files.a83t" files "../allwinner/h3/files.h3" files "../broadcom/bcm2835/files.bcm2836" files "../broadcom/bcm2835/files.bcm283x" +files "../freescale/imx/files.imx6" files "../nvidia/tegra124/files.tegra124" files "../qemu/files.qemu" files "../ti/files.ti" files "../ti/am335x/files.am335x" files "../ti/omap4/files.omap4" +files "../xilinx/files.zynq7" options SOC_ALLWINNER_A10 options SOC_ALLWINNER_A13 options SOC_ALLWINNER_A20 options SOC_ALLWINNER_A31 options SOC_ALLWINNER_A31S options SOC_ALLWINNER_A33 options SOC_ALLWINNER_A83T options SOC_ALLWINNER_H2PLUS options SOC_ALLWINNER_H3 options SOC_BCM2836 options SOC_TI_AM335X options SOC_OMAP4 options SCHED_ULE # ULE scheduler options SMP # Enable multiple cores options PLATFORM options LINUX_BOOT_ABI # EXT_RESOURCES pseudo devices options EXT_RESOURCES device clk device phy device hwreset device regulator # CPU frequency control device cpufreq # Interrupt controller options INTRNG device gic # PMU support (for CCNT). device pmu # ARM Generic Timer device generic_timer device mpcore_timer # MMC/SD/SDIO Card slot support device sdhci # SD controller device mmc # mmc/sd bus device mmcsd # mmc/sd flash cards # ATA controllers device ahci # AHCI-compatible SATA controllers #device ata # Legacy ATA/SATA controllers # PCI options NEW_PCIB device pci # PCI NICs device re # RealTek 8139C+/8169/8169S/8110S # VirtIO device virtio device virtio_mmio device virtio_blk device vtnet # Console and misc device uart device uart_ns8250 device uart_snps device pl011 device pty device snp device md # Memory "disks" device random # Entropy device +device firmware # firmware assist module device pl310 # PL310 L2 cache controller device psci # I2C support device iicbus device iic device twsi device rsb # Allwinner Reduced Serial Bus device p2wi # Allwinner Push-Pull Two Wire device axp209 # AXP209 Power Management Unit device axp81x # AXP813/818 Power Management Unit device bcm2835_bsc +device fsliic # Freescale i2c/iic device icee # AT24Cxxx and compatible EEPROMs device sy8106a # SY8106A Buck Regulator device ti_i2c device am335x_pmic # AM335x Power Management IC (TPC65217) device am335x_rtc # RTC support (power management only) device twl # TI TWLX0X0/TPS659x0 Power Management device twl_vreg # twl voltage regulation device twl_clks # twl external clocks # i2c RTCs device ds1307 # Dallas DS1307 RTC and compatible device ds13rtc # All Dallas/Maxim DS13xx RTCs device ds1672 # Dallas DS1672 RTC device ds3231 # Dallas DS3231 RTC + temperature device nxprtc # NXP RTCs: PCA/PFC212x PCA/PCF85xx device s35390a # Seiko s3539x RTCs # GPIO device gpio device gpiobacklight device gpioled device gpioregulator # EVDEV support device evdev # input event device support options EVDEV_SUPPORT # evdev support in legacy drivers device uinput # install /dev/uinput cdev device aw_cir # SPI device spibus device spigen device bcm2835_spi device ti_spi # ADC support device ti_adc # Watchdog support # If we don't enable the watchdog driver, the BealeBone could potentially # reboot automatically because the boot loader might have enabled the # watchdog. device ti_wdt +device imxwdt # Watchdog. WARNING: can't be disabled!!! device scbus # SCSI bus (required for ATA/SCSI) device da # Direct Access (disks) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) # USB support options USB_HOST_ALIGN=64 # Align usb buffers to cache line size. device usb #device uhci device ohci device ehci device dwcotg # DWC OTG controller device musb +device axe # USB-Ethernet device umass # Disks/Mass storage - Requires scbus and da device uhid # "Human Interface Devices" device ukbd # Allow keyboard like HIDs to control console # Device mode support device usb_template # Control of the gadget # Ethernet device loop device ether device vlan # 802.1Q VLAN support device bpf # Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus device awg # 10/100/1000 integrated EMAC controller device cpsw # TI Common Platform Ethernet Switch (CPSW) +device cgem # Zynq-7000 gig ethernet device device dwc # 10/100/1000 integrated GMAC controller device emac # 10/100 integrated EMAC controller +device ffec # Freescale Fast Ethernet Controller device smsc # SMSC LAN91C111 # Sound support device sound # Framebuffer support device vt device kbdmux device ums device videomode device hdmi device vchiq # Pinmux device fdt_pinctrl # TI Programmable Realtime Unit support device ti_pruss # Mailbox support device ti_mbox # DMA controller +device fslsdma device ti_sdma # Extensible Firmware Interface options EFI # Flattened Device Tree options FDT # Configure using FDT/DTB data -makeoptions MODULES_EXTRA="dtb/allwinner dtb/am335x dtb/nvidia dtb/rpi dtb/omap4" +makeoptions MODULES_EXTRA="dtb/allwinner dtb/am335x dtb/imx6 dtb/nvidia dtb/rpi dtb/zynq dtb/omap4" Index: projects/runtime-coverage/sys/arm/freescale/imx/files.imx6 =================================================================== --- projects/runtime-coverage/sys/arm/freescale/imx/files.imx6 (revision 323974) +++ projects/runtime-coverage/sys/arm/freescale/imx/files.imx6 (revision 323975) @@ -1,74 +1,74 @@ # $FreeBSD$ # # Standard ARM support. # kern/kern_clocksource.c standard # # Standard imx6 devices and support. # arm/freescale/fsl_ocotp.c standard arm/freescale/imx/imx6_anatop.c standard arm/freescale/imx/imx6_ccm.c standard arm/freescale/imx/imx6_machdep.c standard arm/freescale/imx/imx6_mp.c optional smp arm/freescale/imx/imx6_pl310.c standard arm/freescale/imx/imx6_snvs.c optional imx6_snvs arm/freescale/imx/imx6_src.c standard arm/freescale/imx/imx_epit.c standard arm/freescale/imx/imx_iomux.c standard arm/freescale/imx/imx_machdep.c standard arm/freescale/imx/imx_gpt.c optional imx_gpt arm/freescale/imx/imx_gpio.c optional gpio arm/freescale/imx/imx_i2c.c optional fsliic -arm/freescale/imx/imx6_sdma.c optional sdma +arm/freescale/imx/imx6_sdma.c optional fslsdma arm/freescale/imx/imx6_audmux.c optional sound arm/freescale/imx/imx6_ssi.c optional sound arm/freescale/imx/imx6_ahci.c optional ahci dev/hdmi/hdmi_if.m optional hdmi dev/hdmi/dwc_hdmi.c optional hdmi arm/freescale/imx/imx6_hdmi.c optional hdmi arm/freescale/imx/imx6_ipu.c optional vt # # Optional devices. # dev/sdhci/fsl_sdhci.c optional sdhci arm/freescale/imx/imx_wdog.c optional imxwdt dev/ffec/if_ffec.c optional ffec dev/uart/uart_dev_imx.c optional uart dev/usb/controller/ehci_imx.c optional ehci arm/freescale/imx/imx6_usbphy.c optional ehci # # Low-level serial console for debugging early kernel startup. # #arm/freescale/imx/imx_console.c standard # # Not ready yet... # #arm/freescale/imx/imx51_ipuv3.c optional sc # SDMA firmware sdma_fw.c optional sdma_fw \ compile-with "${AWK} -f $S/tools/fw_stub.awk sdma-imx6q-to1.bin:sdma_fw -msdma -c${.TARGET}" \ no-implicit-rule before-depend local \ clean "sdma_fw.c" sdma-imx6q-to1.fwo optional sdma_fw \ dependency "sdma-imx6q-to1.bin" \ compile-with "${LD} -b binary -d -warn-common -r -d -o ${.TARGET} sdma-imx6q-to1.bin" \ no-implicit-rule \ clean "sdma-imx6q-to1.fwo" sdma-imx6q-to1.bin optional sdma_fw \ dependency "$S/contrib/dev/imx/sdma-imx6q-to1.bin.uu" \ compile-with "uudecode < $S/contrib/dev/imx/sdma-imx6q-to1.bin.uu" \ no-obj no-implicit-rule \ clean "sdma-imx6q-to1.bin" Index: projects/runtime-coverage/sys/arm/ti/ti_sdhci.c =================================================================== --- projects/runtime-coverage/sys/arm/ti/ti_sdhci.c (revision 323974) +++ projects/runtime-coverage/sys/arm/ti/ti_sdhci.c (revision 323975) @@ -1,764 +1,760 @@ /*- * Copyright (c) 2013 Ian Lepore * Copyright (c) 2011 Ben Gray . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sdhci_if.h" #include #include #include #include "gpio_if.h" #include "opt_mmccam.h" struct ti_sdhci_softc { device_t dev; struct sdhci_fdt_gpio * gpio; struct resource * mem_res; struct resource * irq_res; void * intr_cookie; struct sdhci_slot slot; clk_ident_t mmchs_clk_id; uint32_t mmchs_reg_off; uint32_t sdhci_reg_off; uint32_t baseclk_hz; uint32_t cmd_and_mode; uint32_t sdhci_clkdiv; boolean_t disable_highspeed; boolean_t force_card_present; boolean_t disable_readonly; }; /* * Table of supported FDT compat strings. * * Note that "ti,mmchs" is our own invention, and should be phased out in favor * of the documented names. * * Note that vendor Beaglebone dtsi files use "ti,omap3-hsmmc" for the am335x. */ static struct ofw_compat_data compat_data[] = { {"ti,omap3-hsmmc", 1}, {"ti,omap4-hsmmc", 1}, {"ti,mmchs", 1}, {NULL, 0}, }; /* * The MMCHS hardware has a few control and status registers at the beginning of * the device's memory map, followed by the standard sdhci register block. * Different SoCs have the register blocks at different offsets from the * beginning of the device. Define some constants to map out the registers we * access, and the various per-SoC offsets. The SDHCI_REG_OFFSET is how far * beyond the MMCHS block the SDHCI block is found; it's the same on all SoCs. */ #define OMAP3_MMCHS_REG_OFFSET 0x000 #define OMAP4_MMCHS_REG_OFFSET 0x100 #define AM335X_MMCHS_REG_OFFSET 0x100 #define SDHCI_REG_OFFSET 0x100 #define MMCHS_SYSCONFIG 0x010 #define MMCHS_SYSCONFIG_RESET (1 << 1) #define MMCHS_SYSSTATUS 0x014 #define MMCHS_SYSSTATUS_RESETDONE (1 << 0) #define MMCHS_CON 0x02C #define MMCHS_CON_DW8 (1 << 5) #define MMCHS_CON_DVAL_8_4MS (3 << 9) #define MMCHS_CON_OD (1 << 0) #define MMCHS_SYSCTL 0x12C #define MMCHS_SYSCTL_CLKD_MASK 0x3FF #define MMCHS_SYSCTL_CLKD_SHIFT 6 #define MMCHS_SD_CAPA 0x140 #define MMCHS_SD_CAPA_VS18 (1 << 26) #define MMCHS_SD_CAPA_VS30 (1 << 25) #define MMCHS_SD_CAPA_VS33 (1 << 24) /* Forward declarations, CAM-relataed */ // static void ti_sdhci_cam_poll(struct cam_sim *); // static void ti_sdhci_cam_action(struct cam_sim *, union ccb *); // static int ti_sdhci_cam_settran_settings(struct ti_sdhci_softc *sc, union ccb *); static inline uint32_t ti_mmchs_read_4(struct ti_sdhci_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off + sc->mmchs_reg_off)); } static inline void ti_mmchs_write_4(struct ti_sdhci_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off + sc->mmchs_reg_off, val); } static inline uint32_t RD4(struct ti_sdhci_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off + sc->sdhci_reg_off)); } static inline void WR4(struct ti_sdhci_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off + sc->sdhci_reg_off, val); } static uint8_t ti_sdhci_read_1(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct ti_sdhci_softc *sc = device_get_softc(dev); return ((RD4(sc, off & ~3) >> (off & 3) * 8) & 0xff); } static uint16_t ti_sdhci_read_2(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct ti_sdhci_softc *sc = device_get_softc(dev); uint32_t clkdiv, val32; /* * The MMCHS hardware has a non-standard interpretation of the sdclock * divisor bits. It uses the same bit positions as SDHCI 3.0 (15..6) * but doesn't split them into low:high fields. Instead they're a * single number in the range 0..1023 and the number is exactly the * clock divisor (with 0 and 1 both meaning divide by 1). The SDHCI * driver code expects a v2.0 or v3.0 divisor. The shifting and masking * here extracts the MMCHS representation from the hardware word, cleans * those bits out, applies the 2N adjustment, and plugs the result into * the bit positions for the 2.0 or 3.0 divisor in the returned register * value. The ti_sdhci_write_2() routine performs the opposite * transformation when the SDHCI driver writes to the register. */ if (off == SDHCI_CLOCK_CONTROL) { val32 = RD4(sc, SDHCI_CLOCK_CONTROL); clkdiv = ((val32 >> MMCHS_SYSCTL_CLKD_SHIFT) & MMCHS_SYSCTL_CLKD_MASK) / 2; val32 &= ~(MMCHS_SYSCTL_CLKD_MASK << MMCHS_SYSCTL_CLKD_SHIFT); val32 |= (clkdiv & SDHCI_DIVIDER_MASK) << SDHCI_DIVIDER_SHIFT; if (slot->version >= SDHCI_SPEC_300) val32 |= ((clkdiv >> SDHCI_DIVIDER_MASK_LEN) & SDHCI_DIVIDER_HI_MASK) << SDHCI_DIVIDER_HI_SHIFT; return (val32 & 0xffff); } /* * Standard 32-bit handling of command and transfer mode. */ if (off == SDHCI_TRANSFER_MODE) { return (sc->cmd_and_mode >> 16); } else if (off == SDHCI_COMMAND_FLAGS) { return (sc->cmd_and_mode & 0x0000ffff); } return ((RD4(sc, off & ~3) >> (off & 3) * 8) & 0xffff); } static uint32_t ti_sdhci_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct ti_sdhci_softc *sc = device_get_softc(dev); uint32_t val32; val32 = RD4(sc, off); /* * If we need to disallow highspeed mode due to the OMAP4 erratum, strip * that flag from the returned capabilities. */ if (off == SDHCI_CAPABILITIES && sc->disable_highspeed) val32 &= ~SDHCI_CAN_DO_HISPD; /* * Force the card-present state if necessary. */ if (off == SDHCI_PRESENT_STATE && sc->force_card_present) val32 |= SDHCI_CARD_PRESENT; return (val32); } static void ti_sdhci_read_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t *data, bus_size_t count) { struct ti_sdhci_softc *sc = device_get_softc(dev); bus_read_multi_4(sc->mem_res, off + sc->sdhci_reg_off, data, count); } static void ti_sdhci_write_1(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint8_t val) { struct ti_sdhci_softc *sc = device_get_softc(dev); uint32_t val32; #ifdef MMCCAM uint32_t newval32; if (off == SDHCI_HOST_CONTROL) { val32 = ti_mmchs_read_4(sc, MMCHS_CON); newval32 = val32; if (val & SDHCI_CTRL_8BITBUS) { device_printf(dev, "Custom-enabling 8-bit bus\n"); newval32 |= MMCHS_CON_DW8; } else { device_printf(dev, "Custom-disabling 8-bit bus\n"); newval32 &= ~MMCHS_CON_DW8; } if (newval32 != val32) ti_mmchs_write_4(sc, MMCHS_CON, newval32); } #endif val32 = RD4(sc, off & ~3); val32 &= ~(0xff << (off & 3) * 8); val32 |= (val << (off & 3) * 8); WR4(sc, off & ~3, val32); } static void ti_sdhci_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint16_t val) { struct ti_sdhci_softc *sc = device_get_softc(dev); uint32_t clkdiv, val32; /* * Translate between the hardware and SDHCI 2.0 or 3.0 representations * of the clock divisor. See the comments in ti_sdhci_read_2() for * details. */ if (off == SDHCI_CLOCK_CONTROL) { clkdiv = (val >> SDHCI_DIVIDER_SHIFT) & SDHCI_DIVIDER_MASK; if (slot->version >= SDHCI_SPEC_300) clkdiv |= ((val >> SDHCI_DIVIDER_HI_SHIFT) & SDHCI_DIVIDER_HI_MASK) << SDHCI_DIVIDER_MASK_LEN; clkdiv *= 2; if (clkdiv > MMCHS_SYSCTL_CLKD_MASK) clkdiv = MMCHS_SYSCTL_CLKD_MASK; val32 = RD4(sc, SDHCI_CLOCK_CONTROL); val32 &= 0xffff0000; val32 |= val & ~(MMCHS_SYSCTL_CLKD_MASK << MMCHS_SYSCTL_CLKD_SHIFT); val32 |= clkdiv << MMCHS_SYSCTL_CLKD_SHIFT; WR4(sc, SDHCI_CLOCK_CONTROL, val32); return; } /* * Standard 32-bit handling of command and transfer mode. */ if (off == SDHCI_TRANSFER_MODE) { sc->cmd_and_mode = (sc->cmd_and_mode & 0xffff0000) | ((uint32_t)val & 0x0000ffff); return; } else if (off == SDHCI_COMMAND_FLAGS) { sc->cmd_and_mode = (sc->cmd_and_mode & 0x0000ffff) | ((uint32_t)val << 16); WR4(sc, SDHCI_TRANSFER_MODE, sc->cmd_and_mode); return; } val32 = RD4(sc, off & ~3); val32 &= ~(0xffff << (off & 3) * 8); val32 |= ((val & 0xffff) << (off & 3) * 8); WR4(sc, off & ~3, val32); } static void ti_sdhci_write_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t val) { struct ti_sdhci_softc *sc = device_get_softc(dev); WR4(sc, off, val); } static void ti_sdhci_write_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t *data, bus_size_t count) { struct ti_sdhci_softc *sc = device_get_softc(dev); bus_write_multi_4(sc->mem_res, off + sc->sdhci_reg_off, data, count); } static void ti_sdhci_intr(void *arg) { struct ti_sdhci_softc *sc = arg; sdhci_generic_intr(&sc->slot); } static int ti_sdhci_update_ios(device_t brdev, device_t reqdev) { struct ti_sdhci_softc *sc = device_get_softc(brdev); struct sdhci_slot *slot; struct mmc_ios *ios; uint32_t val32, newval32; slot = device_get_ivars(reqdev); ios = &slot->host.ios; /* * There is an 8-bit-bus bit in the MMCHS control register which, when * set, overrides the 1 vs 4 bit setting in the standard SDHCI * registers. Set that bit first according to whether an 8-bit bus is * requested, then let the standard driver handle everything else. */ val32 = ti_mmchs_read_4(sc, MMCHS_CON); newval32 = val32; if (ios->bus_width == bus_width_8) newval32 |= MMCHS_CON_DW8; else newval32 &= ~MMCHS_CON_DW8; if (ios->bus_mode == opendrain) newval32 |= MMCHS_CON_OD; else /* if (ios->bus_mode == pushpull) */ newval32 &= ~MMCHS_CON_OD; if (newval32 != val32) ti_mmchs_write_4(sc, MMCHS_CON, newval32); return (sdhci_generic_update_ios(brdev, reqdev)); } static int ti_sdhci_get_ro(device_t brdev, device_t reqdev) { struct ti_sdhci_softc *sc = device_get_softc(brdev); if (sc->disable_readonly) return (0); return (sdhci_fdt_gpio_get_readonly(sc->gpio)); } static bool ti_sdhci_get_card_present(device_t dev, struct sdhci_slot *slot) { struct ti_sdhci_softc *sc = device_get_softc(dev); return (sdhci_fdt_gpio_get_present(sc->gpio)); } static int ti_sdhci_detach(device_t dev) { /* sdhci_fdt_gpio_teardown(sc->gpio); */ return (EBUSY); } static void ti_sdhci_hw_init(device_t dev) { struct ti_sdhci_softc *sc = device_get_softc(dev); uint32_t regval; unsigned long timeout; /* Enable the controller and interface/functional clocks */ if (ti_prcm_clk_enable(sc->mmchs_clk_id) != 0) { device_printf(dev, "Error: failed to enable MMC clock\n"); return; } /* Get the frequency of the source clock */ if (ti_prcm_clk_get_source_freq(sc->mmchs_clk_id, &sc->baseclk_hz) != 0) { device_printf(dev, "Error: failed to get source clock freq\n"); return; } /* Issue a softreset to the controller */ ti_mmchs_write_4(sc, MMCHS_SYSCONFIG, MMCHS_SYSCONFIG_RESET); timeout = 1000; while (!(ti_mmchs_read_4(sc, MMCHS_SYSSTATUS) & MMCHS_SYSSTATUS_RESETDONE)) { if (--timeout == 0) { device_printf(dev, "Error: Controller reset operation timed out\n"); break; } DELAY(100); } /* * Reset the command and data state machines and also other aspects of * the controller such as bus clock and power. * * If we read the software reset register too fast after writing it we * can get back a zero that means the reset hasn't started yet rather * than that the reset is complete. Per TI recommendations, work around * it by reading until we see the reset bit asserted, then read until * it's clear. We also set the SDHCI_QUIRK_WAITFOR_RESET_ASSERTED quirk * so that the main sdhci driver uses this same logic in its resets. */ ti_sdhci_write_1(dev, NULL, SDHCI_SOFTWARE_RESET, SDHCI_RESET_ALL); timeout = 10000; while ((ti_sdhci_read_1(dev, NULL, SDHCI_SOFTWARE_RESET) & SDHCI_RESET_ALL) != SDHCI_RESET_ALL) { if (--timeout == 0) { break; } DELAY(1); } timeout = 10000; while ((ti_sdhci_read_1(dev, NULL, SDHCI_SOFTWARE_RESET) & SDHCI_RESET_ALL)) { if (--timeout == 0) { device_printf(dev, "Error: Software reset operation timed out\n"); break; } DELAY(100); } /* * The attach() routine has examined fdt data and set flags in * slot.host.caps to reflect what voltages we can handle. Set those * values in the CAPA register. The manual says that these values can * only be set once, "before initialization" whatever that means, and * that they survive a reset. So maybe doing this will be a no-op if * u-boot has already initialized the hardware. */ regval = ti_mmchs_read_4(sc, MMCHS_SD_CAPA); if (sc->slot.host.caps & MMC_OCR_LOW_VOLTAGE) regval |= MMCHS_SD_CAPA_VS18; if (sc->slot.host.caps & (MMC_OCR_290_300 | MMC_OCR_300_310)) regval |= MMCHS_SD_CAPA_VS30; ti_mmchs_write_4(sc, MMCHS_SD_CAPA, regval); /* Set initial host configuration (1-bit, std speed, pwr off). */ ti_sdhci_write_1(dev, NULL, SDHCI_HOST_CONTROL, 0); ti_sdhci_write_1(dev, NULL, SDHCI_POWER_CONTROL, 0); /* Set the initial controller configuration. */ ti_mmchs_write_4(sc, MMCHS_CON, MMCHS_CON_DVAL_8_4MS); } static int ti_sdhci_attach(device_t dev) { struct ti_sdhci_softc *sc = device_get_softc(dev); int rid, err; pcell_t prop; phandle_t node; sc->dev = dev; /* * Get the MMCHS device id from FDT. If it's not there use the newbus * unit number (which will work as long as the devices are in order and * none are skipped in the fdt). Note that this is a property we made * up and added in freebsd, it doesn't exist in the published bindings. */ node = ofw_bus_get_node(dev); sc->mmchs_clk_id = ti_hwmods_get_clock(dev); if (sc->mmchs_clk_id == INVALID_CLK_IDENT) { device_printf(dev, "failed to get clock based on hwmods property\n"); } /* * The hardware can inherently do dual-voltage (1p8v, 3p0v) on the first * device, and only 1p8v on other devices unless an external transceiver * is used. The only way we could know about a transceiver is fdt data. * Note that we have to do this before calling ti_sdhci_hw_init() so * that it can set the right values in the CAPA register, which can only * be done once and never reset. */ sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE; if (sc->mmchs_clk_id == MMC1_CLK || OF_hasprop(node, "ti,dual-volt")) { sc->slot.host.caps |= MMC_OCR_290_300 | MMC_OCR_300_310; } /* * Set the offset from the device's memory start to the MMCHS registers. * Also for OMAP4 disable high speed mode due to erratum ID i626. */ switch (ti_chip()) { #ifdef SOC_OMAP4 case CHIP_OMAP_4: sc->mmchs_reg_off = OMAP4_MMCHS_REG_OFFSET; sc->disable_highspeed = true; break; #endif #ifdef SOC_TI_AM335X case CHIP_AM335X: sc->mmchs_reg_off = AM335X_MMCHS_REG_OFFSET; break; #endif default: panic("Unknown OMAP device\n"); } /* * The standard SDHCI registers are at a fixed offset (the same on all * SoCs) beyond the MMCHS registers. */ sc->sdhci_reg_off = sc->mmchs_reg_off + SDHCI_REG_OFFSET; /* Resource setup. */ rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->mem_res) { device_printf(dev, "cannot allocate memory window\n"); err = ENXIO; goto fail; } rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->irq_res) { device_printf(dev, "cannot allocate interrupt\n"); err = ENXIO; goto fail; } if (bus_setup_intr(dev, sc->irq_res, INTR_TYPE_BIO | INTR_MPSAFE, NULL, ti_sdhci_intr, sc, &sc->intr_cookie)) { device_printf(dev, "cannot setup interrupt handler\n"); err = ENXIO; goto fail; } /* * Set up handling of card-detect and write-protect gpio lines. * * If there is no write protect info in the fdt data, fall back to the * historical practice of assuming that the card is writable. This * works around bad fdt data from the upstream source. The alternative * would be to trust the sdhci controller's PRESENT_STATE register WP * bit, but it may say write protect is in effect when it's not if the * pinmux setup doesn't route the WP signal into the sdchi block. */ sc->gpio = sdhci_fdt_gpio_setup(sc->dev, &sc->slot); if (!OF_hasprop(node, "wp-gpios") && !OF_hasprop(node, "wp-disable")) sc->disable_readonly = true; /* Initialise the MMCHS hardware. */ ti_sdhci_hw_init(dev); /* * The capabilities register can only express base clock frequencies in * the range of 0-63MHz for a v2.0 controller. Since our clock runs * faster than that, the hardware sets the frequency to zero in the * register. When the register contains zero, the sdhci driver expects * slot.max_clk to already have the right value in it. */ sc->slot.max_clk = sc->baseclk_hz; /* * The MMCHS timeout counter is based on the output sdclock. Tell the * sdhci driver to recalculate the timeout clock whenever the output * sdclock frequency changes. */ sc->slot.quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK; /* * The MMCHS hardware shifts the 136-bit response data (in violation of * the spec), so tell the sdhci driver not to do the same in software. */ sc->slot.quirks |= SDHCI_QUIRK_DONT_SHIFT_RESPONSE; /* * Reset bits are broken, have to wait to see the bits asserted * before waiting to see them de-asserted. */ sc->slot.quirks |= SDHCI_QUIRK_WAITFOR_RESET_ASSERTED; /* * The controller waits for busy responses. */ sc->slot.quirks |= SDHCI_QUIRK_WAIT_WHILE_BUSY; /* * DMA is not really broken, I just haven't implemented it yet. */ sc->slot.quirks |= SDHCI_QUIRK_BROKEN_DMA; /* * Set up the hardware and go. Note that this sets many of the * slot.host.* fields, so we have to do this before overriding any of * those values based on fdt data, below. */ sdhci_init_slot(dev, &sc->slot, 0); /* * The SDHCI controller doesn't realize it, but we can support 8-bit * even though we're not a v3.0 controller. If there's an fdt bus-width * property, honor it. */ if (OF_getencprop(node, "bus-width", &prop, sizeof(prop)) > 0) { sc->slot.host.caps &= ~(MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA); switch (prop) { case 8: sc->slot.host.caps |= MMC_CAP_8_BIT_DATA; /* FALLTHROUGH */ case 4: sc->slot.host.caps |= MMC_CAP_4_BIT_DATA; break; case 1: break; default: device_printf(dev, "Bad bus-width value %u\n", prop); break; } } /* * If the slot is flagged with the non-removable property, set our flag * to always force the SDHCI_CARD_PRESENT bit on. */ node = ofw_bus_get_node(dev); if (OF_hasprop(node, "non-removable")) sc->force_card_present = true; bus_generic_probe(dev); bus_generic_attach(dev); -#ifdef MMCCAM - sdhci_cam_start_slot(&sc->slot); -#else sdhci_start_slot(&sc->slot); -#endif return (0); fail: if (sc->intr_cookie) bus_teardown_intr(dev, sc->irq_res, sc->intr_cookie); if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); if (sc->mem_res) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res); return (err); } static int ti_sdhci_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data != 0) { device_set_desc(dev, "TI MMCHS (SDHCI 2.0)"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static device_method_t ti_sdhci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ti_sdhci_probe), DEVMETHOD(device_attach, ti_sdhci_attach), DEVMETHOD(device_detach, ti_sdhci_detach), /* Bus interface */ DEVMETHOD(bus_read_ivar, sdhci_generic_read_ivar), DEVMETHOD(bus_write_ivar, sdhci_generic_write_ivar), /* MMC bridge interface */ DEVMETHOD(mmcbr_update_ios, ti_sdhci_update_ios), DEVMETHOD(mmcbr_request, sdhci_generic_request), DEVMETHOD(mmcbr_get_ro, ti_sdhci_get_ro), DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), /* SDHCI registers accessors */ DEVMETHOD(sdhci_read_1, ti_sdhci_read_1), DEVMETHOD(sdhci_read_2, ti_sdhci_read_2), DEVMETHOD(sdhci_read_4, ti_sdhci_read_4), DEVMETHOD(sdhci_read_multi_4, ti_sdhci_read_multi_4), DEVMETHOD(sdhci_write_1, ti_sdhci_write_1), DEVMETHOD(sdhci_write_2, ti_sdhci_write_2), DEVMETHOD(sdhci_write_4, ti_sdhci_write_4), DEVMETHOD(sdhci_write_multi_4, ti_sdhci_write_multi_4), DEVMETHOD(sdhci_get_card_present, ti_sdhci_get_card_present), DEVMETHOD_END }; static devclass_t ti_sdhci_devclass; static driver_t ti_sdhci_driver = { "sdhci_ti", ti_sdhci_methods, sizeof(struct ti_sdhci_softc), }; DRIVER_MODULE(sdhci_ti, simplebus, ti_sdhci_driver, ti_sdhci_devclass, NULL, NULL); MODULE_DEPEND(sdhci_ti, sdhci, 1, 1, 1); #ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_ti); #endif Index: projects/runtime-coverage/sys/contrib/ipfilter/netinet/ip_nat.c =================================================================== --- projects/runtime-coverage/sys/contrib/ipfilter/netinet/ip_nat.c (revision 323974) +++ projects/runtime-coverage/sys/contrib/ipfilter/netinet/ip_nat.c (revision 323975) @@ -1,8580 +1,8580 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ #if defined(KERNEL) || defined(_KERNEL) # undef KERNEL # undef _KERNEL # define KERNEL 1 # define _KERNEL 1 #endif #include #include #include #include #include #if defined(_KERNEL) && \ (defined(__NetBSD_Version) && (__NetBSD_Version >= 399002000)) # include #endif #if !defined(_KERNEL) # include # include # include # define KERNEL # ifdef _OpenBSD__ struct file; # endif # include # undef KERNEL #endif #if defined(_KERNEL) && \ defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) # include # include #else # include #endif #if !defined(AIX) # include #endif #if !defined(linux) # include #endif #include #if defined(_KERNEL) # include # if !defined(__SVR4) && !defined(__svr4__) # include # endif #endif #if defined(__SVR4) || defined(__svr4__) # include # include # ifdef KERNEL # include # endif # include # include #endif #if __FreeBSD_version >= 300000 # include #endif #include #if __FreeBSD_version >= 300000 # include #endif #ifdef sun # include #endif #include #include #include #ifdef RFC1825 # include # include extern struct ifnet vpnif; #endif #if !defined(linux) # include #endif #include #include #include #include "netinet/ip_compat.h" #include #include "netinet/ipl.h" #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #include "netinet/ip_sync.h" #if FREEBSD_GE_REV(300000) # include #endif #ifdef HAS_SYS_MD5_H # include #else # include "md5.h" #endif /* END OF INCLUDES */ #undef SOCKADDR_IN #define SOCKADDR_IN struct sockaddr_in #if !defined(lint) static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; static const char rcsid[] = "@(#)$FreeBSD$"; /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */ #endif #define NATFSUM(n,v,f) ((v) == 4 ? (n)->f.in4.s_addr : (n)->f.i6[0] + \ (n)->f.i6[1] + (n)->f.i6[2] + (n)->f.i6[3]) #define NBUMP(x) softn->(x)++ #define NBUMPD(x, y) do { \ softn->x.y++; \ DT(y); \ } while (0) #define NBUMPSIDE(y,x) softn->ipf_nat_stats.ns_side[y].x++ #define NBUMPSIDED(y,x) do { softn->ipf_nat_stats.ns_side[y].x++; \ DT(x); } while (0) #define NBUMPSIDEX(y,x,z) \ do { softn->ipf_nat_stats.ns_side[y].x++; \ DT(z); } while (0) #define NBUMPSIDEDF(y,x)do { softn->ipf_nat_stats.ns_side[y].x++; \ DT1(x, fr_info_t *, fin); } while (0) static ipftuneable_t ipf_nat_tuneables[] = { /* nat */ { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_lock) }, "nat_lock", 0, 1, stsizeof(ipf_nat_softc_t, ipf_nat_lock), IPFT_RDONLY, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_sz) }, "nat_table_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_table_sz), 0, NULL, ipf_nat_rehash }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_max) }, "nat_table_max", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_table_max), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_maprules_sz) }, "nat_rules_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_maprules_sz), 0, NULL, ipf_nat_rehash_rules }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_rdrrules_sz) }, "rdr_rules_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_rdrrules_sz), 0, NULL, ipf_nat_rehash_rules }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_hostmap_sz) }, "hostmap_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_hostmap_sz), 0, NULL, ipf_nat_hostmap_rehash }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_maxbucket) }, "nat_maxbucket",1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_maxbucket), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_logging) }, "nat_logging", 0, 1, stsizeof(ipf_nat_softc_t, ipf_nat_logging), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_doflush) }, "nat_doflush", 0, 1, stsizeof(ipf_nat_softc_t, ipf_nat_doflush), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_wm_low) }, "nat_table_wm_low", 1, 99, stsizeof(ipf_nat_softc_t, ipf_nat_table_wm_low), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_wm_high) }, "nat_table_wm_high", 2, 100, stsizeof(ipf_nat_softc_t, ipf_nat_table_wm_high), 0, NULL, NULL }, { { 0 }, NULL, 0, 0, 0, 0, NULL, NULL } }; /* ======================================================================== */ /* How the NAT is organised and works. */ /* */ /* Inside (interface y) NAT Outside (interface x) */ /* -------------------- -+- ------------------------------------- */ /* Packet going | out, processsed by ipf_nat_checkout() for x */ /* ------------> | ------------> */ /* src=10.1.1.1 | src=192.1.1.1 */ /* | */ /* | in, processed by ipf_nat_checkin() for x */ /* <------------ | <------------ */ /* dst=10.1.1.1 | dst=192.1.1.1 */ /* -------------------- -+- ------------------------------------- */ /* ipf_nat_checkout() - changes ip_src and if required, sport */ /* - creates a new mapping, if required. */ /* ipf_nat_checkin() - changes ip_dst and if required, dport */ /* */ /* In the NAT table, internal source is recorded as "in" and externally */ /* seen as "out". */ /* ======================================================================== */ #if SOLARIS && !defined(INSTANCES) extern int pfil_delayed_copy; #endif static int ipf_nat_flush_entry __P((ipf_main_softc_t *, void *)); static int ipf_nat_getent __P((ipf_main_softc_t *, caddr_t, int)); static int ipf_nat_getsz __P((ipf_main_softc_t *, caddr_t, int)); static int ipf_nat_putent __P((ipf_main_softc_t *, caddr_t, int)); static void ipf_nat_addmap __P((ipf_nat_softc_t *, ipnat_t *)); static void ipf_nat_addrdr __P((ipf_nat_softc_t *, ipnat_t *)); static int ipf_nat_builddivertmp __P((ipf_nat_softc_t *, ipnat_t *)); static int ipf_nat_clearlist __P((ipf_main_softc_t *, ipf_nat_softc_t *)); static int ipf_nat_cmp_rules __P((ipnat_t *, ipnat_t *)); static int ipf_nat_decap __P((fr_info_t *, nat_t *)); static void ipf_nat_delrule __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *, int)); static int ipf_nat_extraflush __P((ipf_main_softc_t *, ipf_nat_softc_t *, int)); static int ipf_nat_finalise __P((fr_info_t *, nat_t *)); static int ipf_nat_flushtable __P((ipf_main_softc_t *, ipf_nat_softc_t *)); static int ipf_nat_getnext __P((ipf_main_softc_t *, ipftoken_t *, ipfgeniter_t *, ipfobj_t *)); static int ipf_nat_gettable __P((ipf_main_softc_t *, ipf_nat_softc_t *, char *)); static hostmap_t *ipf_nat_hostmap __P((ipf_nat_softc_t *, ipnat_t *, struct in_addr, struct in_addr, struct in_addr, u_32_t)); static int ipf_nat_icmpquerytype __P((int)); static int ipf_nat_iterator __P((ipf_main_softc_t *, ipftoken_t *, ipfgeniter_t *, ipfobj_t *)); static int ipf_nat_match __P((fr_info_t *, ipnat_t *)); static int ipf_nat_matcharray __P((nat_t *, int *, u_long)); static int ipf_nat_matchflush __P((ipf_main_softc_t *, ipf_nat_softc_t *, caddr_t)); static void ipf_nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *)); static int ipf_nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_newdivert __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_newrewrite __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_nextaddr __P((fr_info_t *, nat_addr_t *, u_32_t *, u_32_t *)); static int ipf_nat_nextaddrinit __P((ipf_main_softc_t *, char *, nat_addr_t *, int, void *)); static int ipf_nat_resolverule __P((ipf_main_softc_t *, ipnat_t *)); static int ipf_nat_ruleaddrinit __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *)); static void ipf_nat_rule_fini __P((ipf_main_softc_t *, ipnat_t *)); static int ipf_nat_rule_init __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *)); static int ipf_nat_siocaddnat __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *, int)); static void ipf_nat_siocdelnat __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *, int)); static void ipf_nat_tabmove __P((ipf_nat_softc_t *, nat_t *)); /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_main_load */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: Nil */ /* */ /* The only global NAT structure that needs to be initialised is the filter */ /* rule that is used with blocking packets. */ /* ------------------------------------------------------------------------ */ int ipf_nat_main_load() { return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_main_unload */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: Nil */ /* */ /* A null-op function that exists as a placeholder so that the flow in */ /* other functions is obvious. */ /* ------------------------------------------------------------------------ */ int ipf_nat_main_unload() { return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_soft_create */ /* Returns: void * - NULL = failure, else pointer to NAT context */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Allocate the initial soft context structure for NAT and populate it with */ /* some default values. Creating the tables is left until we call _init so */ /* that sizes can be changed before we get under way. */ /* ------------------------------------------------------------------------ */ void * ipf_nat_soft_create(softc) ipf_main_softc_t *softc; { ipf_nat_softc_t *softn; KMALLOC(softn, ipf_nat_softc_t *); if (softn == NULL) return NULL; bzero((char *)softn, sizeof(*softn)); softn->ipf_nat_tune = ipf_tune_array_copy(softn, sizeof(ipf_nat_tuneables), ipf_nat_tuneables); if (softn->ipf_nat_tune == NULL) { ipf_nat_soft_destroy(softc, softn); return NULL; } if (ipf_tune_array_link(softc, softn->ipf_nat_tune) == -1) { ipf_nat_soft_destroy(softc, softn); return NULL; } softn->ipf_nat_list_tail = &softn->ipf_nat_list; softn->ipf_nat_table_max = NAT_TABLE_MAX; softn->ipf_nat_table_sz = NAT_TABLE_SZ; softn->ipf_nat_maprules_sz = NAT_SIZE; softn->ipf_nat_rdrrules_sz = RDR_SIZE; softn->ipf_nat_hostmap_sz = HOSTMAP_SIZE; softn->ipf_nat_doflush = 0; #ifdef IPFILTER_LOG softn->ipf_nat_logging = 1; #else softn->ipf_nat_logging = 0; #endif softn->ipf_nat_defage = DEF_NAT_AGE; softn->ipf_nat_defipage = IPF_TTLVAL(60); softn->ipf_nat_deficmpage = IPF_TTLVAL(3); softn->ipf_nat_table_wm_high = 99; softn->ipf_nat_table_wm_low = 90; return softn; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_soft_destroy */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* ------------------------------------------------------------------------ */ void ipf_nat_soft_destroy(softc, arg) ipf_main_softc_t *softc; void *arg; { ipf_nat_softc_t *softn = arg; if (softn->ipf_nat_tune != NULL) { ipf_tune_array_unlink(softc, softn->ipf_nat_tune); KFREES(softn->ipf_nat_tune, sizeof(ipf_nat_tuneables)); softn->ipf_nat_tune = NULL; } KFREE(softn); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_init */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Initialise all of the NAT locks, tables and other structures. */ /* ------------------------------------------------------------------------ */ int ipf_nat_soft_init(softc, arg) ipf_main_softc_t *softc; void *arg; { ipf_nat_softc_t *softn = arg; ipftq_t *tq; int i; KMALLOCS(softn->ipf_nat_table[0], nat_t **, \ sizeof(nat_t *) * softn->ipf_nat_table_sz); if (softn->ipf_nat_table[0] != NULL) { bzero((char *)softn->ipf_nat_table[0], softn->ipf_nat_table_sz * sizeof(nat_t *)); } else { return -1; } KMALLOCS(softn->ipf_nat_table[1], nat_t **, \ sizeof(nat_t *) * softn->ipf_nat_table_sz); if (softn->ipf_nat_table[1] != NULL) { bzero((char *)softn->ipf_nat_table[1], softn->ipf_nat_table_sz * sizeof(nat_t *)); } else { return -2; } KMALLOCS(softn->ipf_nat_map_rules, ipnat_t **, \ sizeof(ipnat_t *) * softn->ipf_nat_maprules_sz); if (softn->ipf_nat_map_rules != NULL) { bzero((char *)softn->ipf_nat_map_rules, softn->ipf_nat_maprules_sz * sizeof(ipnat_t *)); } else { return -3; } KMALLOCS(softn->ipf_nat_rdr_rules, ipnat_t **, \ sizeof(ipnat_t *) * softn->ipf_nat_rdrrules_sz); if (softn->ipf_nat_rdr_rules != NULL) { bzero((char *)softn->ipf_nat_rdr_rules, softn->ipf_nat_rdrrules_sz * sizeof(ipnat_t *)); } else { return -4; } KMALLOCS(softn->ipf_hm_maptable, hostmap_t **, \ sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz); if (softn->ipf_hm_maptable != NULL) { bzero((char *)softn->ipf_hm_maptable, sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz); } else { return -5; } softn->ipf_hm_maplist = NULL; KMALLOCS(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, u_int *, softn->ipf_nat_table_sz * sizeof(u_int)); if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen == NULL) { return -6; } bzero((char *)softn->ipf_nat_stats.ns_side[0].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); KMALLOCS(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, u_int *, softn->ipf_nat_table_sz * sizeof(u_int)); if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen == NULL) { return -7; } bzero((char *)softn->ipf_nat_stats.ns_side[1].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); if (softn->ipf_nat_maxbucket == 0) { for (i = softn->ipf_nat_table_sz; i > 0; i >>= 1) softn->ipf_nat_maxbucket++; softn->ipf_nat_maxbucket *= 2; } ipf_sttab_init(softc, softn->ipf_nat_tcptq); /* * Increase this because we may have "keep state" following this too * and packet storms can occur if this is removed too quickly. */ softn->ipf_nat_tcptq[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcplastack; softn->ipf_nat_tcptq[IPF_TCP_NSTATES - 1].ifq_next = &softn->ipf_nat_udptq; IPFTQ_INIT(&softn->ipf_nat_udptq, softn->ipf_nat_defage, "nat ipftq udp tab"); softn->ipf_nat_udptq.ifq_next = &softn->ipf_nat_udpacktq; IPFTQ_INIT(&softn->ipf_nat_udpacktq, softn->ipf_nat_defage, "nat ipftq udpack tab"); softn->ipf_nat_udpacktq.ifq_next = &softn->ipf_nat_icmptq; IPFTQ_INIT(&softn->ipf_nat_icmptq, softn->ipf_nat_deficmpage, "nat icmp ipftq tab"); softn->ipf_nat_icmptq.ifq_next = &softn->ipf_nat_icmpacktq; IPFTQ_INIT(&softn->ipf_nat_icmpacktq, softn->ipf_nat_defage, "nat icmpack ipftq tab"); softn->ipf_nat_icmpacktq.ifq_next = &softn->ipf_nat_iptq; IPFTQ_INIT(&softn->ipf_nat_iptq, softn->ipf_nat_defipage, "nat ip ipftq tab"); softn->ipf_nat_iptq.ifq_next = &softn->ipf_nat_pending; IPFTQ_INIT(&softn->ipf_nat_pending, 1, "nat pending ipftq tab"); softn->ipf_nat_pending.ifq_next = NULL; for (i = 0, tq = softn->ipf_nat_tcptq; i < IPF_TCP_NSTATES; i++, tq++) { if (tq->ifq_ttl < softn->ipf_nat_deficmpage) tq->ifq_ttl = softn->ipf_nat_deficmpage; #ifdef LARGE_NAT else if (tq->ifq_ttl > softn->ipf_nat_defage) tq->ifq_ttl = softn->ipf_nat_defage; #endif } /* * Increase this because we may have "keep state" following * this too and packet storms can occur if this is removed * too quickly. */ softn->ipf_nat_tcptq[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcplastack; MUTEX_INIT(&softn->ipf_nat_new, "ipf nat new mutex"); MUTEX_INIT(&softn->ipf_nat_io, "ipf nat io mutex"); softn->ipf_nat_inited = 1; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_soft_fini */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Free all memory used by NAT structures allocated at runtime. */ /* ------------------------------------------------------------------------ */ int ipf_nat_soft_fini(softc, arg) ipf_main_softc_t *softc; void *arg; { ipf_nat_softc_t *softn = arg; ipftq_t *ifq, *ifqnext; (void) ipf_nat_clearlist(softc, softn); (void) ipf_nat_flushtable(softc, softn); /* * Proxy timeout queues are not cleaned here because although they * exist on the NAT list, ipf_proxy_unload is called after unload * and the proxies actually are responsible for them being created. * Should the proxy timeouts have their own list? There's no real * justification as this is the only complication. */ for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (ipf_deletetimeoutqueue(ifq) == 0) ipf_freetimeoutqueue(softc, ifq); } if (softn->ipf_nat_table[0] != NULL) { KFREES(softn->ipf_nat_table[0], sizeof(nat_t *) * softn->ipf_nat_table_sz); softn->ipf_nat_table[0] = NULL; } if (softn->ipf_nat_table[1] != NULL) { KFREES(softn->ipf_nat_table[1], sizeof(nat_t *) * softn->ipf_nat_table_sz); softn->ipf_nat_table[1] = NULL; } if (softn->ipf_nat_map_rules != NULL) { KFREES(softn->ipf_nat_map_rules, sizeof(ipnat_t *) * softn->ipf_nat_maprules_sz); softn->ipf_nat_map_rules = NULL; } if (softn->ipf_nat_rdr_rules != NULL) { KFREES(softn->ipf_nat_rdr_rules, sizeof(ipnat_t *) * softn->ipf_nat_rdrrules_sz); softn->ipf_nat_rdr_rules = NULL; } if (softn->ipf_hm_maptable != NULL) { KFREES(softn->ipf_hm_maptable, sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz); softn->ipf_hm_maptable = NULL; } if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, sizeof(u_int) * softn->ipf_nat_table_sz); softn->ipf_nat_stats.ns_side[0].ns_bucketlen = NULL; } if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, sizeof(u_int) * softn->ipf_nat_table_sz); softn->ipf_nat_stats.ns_side[1].ns_bucketlen = NULL; } if (softn->ipf_nat_inited == 1) { softn->ipf_nat_inited = 0; ipf_sttab_destroy(softn->ipf_nat_tcptq); MUTEX_DESTROY(&softn->ipf_nat_new); MUTEX_DESTROY(&softn->ipf_nat_io); MUTEX_DESTROY(&softn->ipf_nat_udptq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_udpacktq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_icmptq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_icmpacktq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_iptq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_pending.ifq_lock); } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_setlock */ /* Returns: Nil */ /* Parameters: arg(I) - pointer to soft state information */ /* tmp(I) - new lock value */ /* */ /* Set the "lock status" of NAT to the value in tmp. */ /* ------------------------------------------------------------------------ */ void ipf_nat_setlock(arg, tmp) void *arg; int tmp; { ipf_nat_softc_t *softn = arg; softn->ipf_nat_lock = tmp; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_addrdr */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to add */ /* */ /* Adds a redirect rule to the hash table of redirect rules and the list of */ /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ /* use by redirect rules. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_addrdr(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { ipnat_t **np; u_32_t j; u_int hv; u_int rhv; int k; if (n->in_odstatype == FRI_NORMAL) { k = count4bits(n->in_odstmsk); ipf_inet_mask_add(k, &softn->ipf_nat_rdr_mask); j = (n->in_odstaddr & n->in_odstmsk); rhv = NAT_HASH_FN(j, 0, 0xffffffff); } else { ipf_inet_mask_add(0, &softn->ipf_nat_rdr_mask); j = 0; rhv = 0; } hv = rhv % softn->ipf_nat_rdrrules_sz; np = softn->ipf_nat_rdr_rules + hv; while (*np != NULL) np = &(*np)->in_rnext; n->in_rnext = NULL; n->in_prnext = np; n->in_hv[0] = hv; n->in_use++; *np = n; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_addmap */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to add */ /* */ /* Adds a NAT map rule to the hash table of rules and the list of loaded */ /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ /* redirect rules. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_addmap(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { ipnat_t **np; u_32_t j; u_int hv; u_int rhv; int k; if (n->in_osrcatype == FRI_NORMAL) { k = count4bits(n->in_osrcmsk); ipf_inet_mask_add(k, &softn->ipf_nat_map_mask); j = (n->in_osrcaddr & n->in_osrcmsk); rhv = NAT_HASH_FN(j, 0, 0xffffffff); } else { ipf_inet_mask_add(0, &softn->ipf_nat_map_mask); j = 0; rhv = 0; } hv = rhv % softn->ipf_nat_maprules_sz; np = softn->ipf_nat_map_rules + hv; while (*np != NULL) np = &(*np)->in_mnext; n->in_mnext = NULL; n->in_pmnext = np; n->in_hv[1] = rhv; n->in_use++; *np = n; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delrdr */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to delete */ /* */ /* Removes a redirect rule from the hash table of redirect rules. */ /* ------------------------------------------------------------------------ */ void ipf_nat_delrdr(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { if (n->in_odstatype == FRI_NORMAL) { int k = count4bits(n->in_odstmsk); ipf_inet_mask_del(k, &softn->ipf_nat_rdr_mask); } else { ipf_inet_mask_del(0, &softn->ipf_nat_rdr_mask); } if (n->in_rnext) n->in_rnext->in_prnext = n->in_prnext; *n->in_prnext = n->in_rnext; n->in_use--; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delmap */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to delete */ /* */ /* Removes a NAT map rule from the hash table of NAT map rules. */ /* ------------------------------------------------------------------------ */ void ipf_nat_delmap(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { if (n->in_osrcatype == FRI_NORMAL) { int k = count4bits(n->in_osrcmsk); ipf_inet_mask_del(k, &softn->ipf_nat_map_mask); } else { ipf_inet_mask_del(0, &softn->ipf_nat_map_mask); } if (n->in_mnext != NULL) n->in_mnext->in_pmnext = n->in_pmnext; *n->in_pmnext = n->in_mnext; n->in_use--; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hostmap */ /* Returns: struct hostmap* - NULL if no hostmap could be created, */ /* else a pointer to the hostmapping to use */ /* Parameters: np(I) - pointer to NAT rule */ /* real(I) - real IP address */ /* map(I) - mapped IP address */ /* port(I) - destination port number */ /* Write Locks: ipf_nat */ /* */ /* Check if an ip address has already been allocated for a given mapping */ /* that is not doing port based translation. If is not yet allocated, then */ /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ /* ------------------------------------------------------------------------ */ static struct hostmap * ipf_nat_hostmap(softn, np, src, dst, map, port) ipf_nat_softc_t *softn; ipnat_t *np; struct in_addr src; struct in_addr dst; struct in_addr map; u_32_t port; { hostmap_t *hm; u_int hv, rhv; hv = (src.s_addr ^ dst.s_addr); hv += src.s_addr; hv += dst.s_addr; rhv = hv; hv %= softn->ipf_nat_hostmap_sz; for (hm = softn->ipf_hm_maptable[hv]; hm; hm = hm->hm_hnext) if ((hm->hm_osrcip.s_addr == src.s_addr) && (hm->hm_odstip.s_addr == dst.s_addr) && ((np == NULL) || (np == hm->hm_ipnat)) && ((port == 0) || (port == hm->hm_port))) { softn->ipf_nat_stats.ns_hm_addref++; hm->hm_ref++; return hm; } if (np == NULL) { softn->ipf_nat_stats.ns_hm_nullnp++; return NULL; } KMALLOC(hm, hostmap_t *); if (hm) { hm->hm_next = softn->ipf_hm_maplist; hm->hm_pnext = &softn->ipf_hm_maplist; if (softn->ipf_hm_maplist != NULL) softn->ipf_hm_maplist->hm_pnext = &hm->hm_next; softn->ipf_hm_maplist = hm; hm->hm_hnext = softn->ipf_hm_maptable[hv]; hm->hm_phnext = softn->ipf_hm_maptable + hv; if (softn->ipf_hm_maptable[hv] != NULL) softn->ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext; softn->ipf_hm_maptable[hv] = hm; hm->hm_ipnat = np; np->in_use++; hm->hm_osrcip = src; hm->hm_odstip = dst; hm->hm_nsrcip = map; hm->hm_ndstip.s_addr = 0; hm->hm_ref = 1; hm->hm_port = port; hm->hm_hv = rhv; hm->hm_v = 4; softn->ipf_nat_stats.ns_hm_new++; } else { softn->ipf_nat_stats.ns_hm_newfail++; } return hm; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hostmapdel */ /* Returns: Nil */ /* Parameters: hmp(I) - pointer to hostmap structure pointer */ /* Write Locks: ipf_nat */ /* */ /* Decrement the references to this hostmap structure by one. If this */ /* reaches zero then remove it and free it. */ /* ------------------------------------------------------------------------ */ void ipf_nat_hostmapdel(softc, hmp) ipf_main_softc_t *softc; struct hostmap **hmp; { struct hostmap *hm; hm = *hmp; *hmp = NULL; hm->hm_ref--; if (hm->hm_ref == 0) { ipf_nat_rule_deref(softc, &hm->hm_ipnat); if (hm->hm_hnext) hm->hm_hnext->hm_phnext = hm->hm_phnext; *hm->hm_phnext = hm->hm_hnext; if (hm->hm_next) hm->hm_next->hm_pnext = hm->hm_pnext; *hm->hm_pnext = hm->hm_next; KFREE(hm); } } /* ------------------------------------------------------------------------ */ /* Function: ipf_fix_outcksum */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* sp(I) - location of 16bit checksum to update */ /* n((I) - amount to adjust checksum by */ /* */ /* Adjusts the 16bit checksum by "n" for packets going out. */ /* ------------------------------------------------------------------------ */ void ipf_fix_outcksum(cksum, sp, n, partial) int cksum; u_short *sp; u_32_t n, partial; { u_short sumshort; u_32_t sum1; if (n == 0) return; if (cksum == 4) { *sp = 0; return; } if (cksum == 2) { sum1 = partial; sum1 = (sum1 & 0xffff) + (sum1 >> 16); *sp = htons(sum1); return; } sum1 = (~ntohs(*sp)) & 0xffff; sum1 += (n); sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *(sp) = htons(sumshort); } /* ------------------------------------------------------------------------ */ /* Function: ipf_fix_incksum */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* sp(I) - location of 16bit checksum to update */ /* n((I) - amount to adjust checksum by */ /* */ /* Adjusts the 16bit checksum by "n" for packets going in. */ /* ------------------------------------------------------------------------ */ void ipf_fix_incksum(cksum, sp, n, partial) int cksum; u_short *sp; u_32_t n, partial; { u_short sumshort; u_32_t sum1; if (n == 0) return; if (cksum == 4) { *sp = 0; return; } if (cksum == 2) { sum1 = partial; sum1 = (sum1 & 0xffff) + (sum1 >> 16); *sp = htons(sum1); return; } sum1 = (~ntohs(*sp)) & 0xffff; sum1 += ~(n) & 0xffff; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *(sp) = htons(sumshort); } /* ------------------------------------------------------------------------ */ /* Function: ipf_fix_datacksum */ /* Returns: Nil */ /* Parameters: sp(I) - location of 16bit checksum to update */ /* n((I) - amount to adjust checksum by */ /* */ /* Fix_datacksum is used *only* for the adjustments of checksums in the */ /* data section of an IP packet. */ /* */ /* The only situation in which you need to do this is when NAT'ing an */ /* ICMP error message. Such a message, contains in its body the IP header */ /* of the original IP packet, that causes the error. */ /* */ /* You can't use fix_incksum or fix_outcksum in that case, because for the */ /* kernel the data section of the ICMP error is just data, and no special */ /* processing like hardware cksum or ntohs processing have been done by the */ /* kernel on the data section. */ /* ------------------------------------------------------------------------ */ void ipf_fix_datacksum(sp, n) u_short *sp; u_32_t n; { u_short sumshort; u_32_t sum1; if (n == 0) return; sum1 = (~ntohs(*sp)) & 0xffff; sum1 += (n); sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *(sp) = htons(sumshort); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_ioctl */ /* Returns: int - 0 == success, != 0 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to ioctl data */ /* cmd(I) - ioctl command integer */ /* mode(I) - file mode bits used with open */ /* uid(I) - uid of calling process */ /* ctx(I) - pointer used as key for finding context */ /* */ /* Processes an ioctl call made to operate on the IP Filter NAT device. */ /* ------------------------------------------------------------------------ */ int ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx) ipf_main_softc_t *softc; ioctlcmd_t cmd; caddr_t data; int mode, uid; void *ctx; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; int error = 0, ret, arg, getlock; ipnat_t *nat, *nt, *n; ipnat_t natd; SPL_INT(s); #if BSD_GE_YEAR(199306) && defined(_KERNEL) # if NETBSD_GE_REV(399002000) if ((mode & FWRITE) && kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL, KAUTH_REQ_NETWORK_FIREWALL_FW, NULL, NULL, NULL)) # else # if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034) if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) # else if ((securelevel >= 3) && (mode & FWRITE)) # endif # endif { IPFERROR(60001); return EPERM; } #endif #if defined(__osf__) && defined(_KERNEL) getlock = 0; #else getlock = (mode & NAT_LOCKHELD) ? 0 : 1; #endif n = NULL; nt = NULL; nat = NULL; if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT) || (cmd == (ioctlcmd_t)SIOCPURGENAT)) { if (mode & NAT_SYSSPACE) { bcopy(data, (char *)&natd, sizeof(natd)); nat = &natd; error = 0; } else { bzero(&natd, sizeof(natd)); error = ipf_inobj(softc, data, NULL, &natd, IPFOBJ_IPNAT); if (error != 0) goto done; if (natd.in_size < sizeof(ipnat_t)) { error = EINVAL; goto done; } KMALLOCS(nt, ipnat_t *, natd.in_size); if (nt == NULL) { IPFERROR(60070); error = ENOMEM; goto done; } bzero(nt, natd.in_size); error = ipf_inobjsz(softc, data, nt, IPFOBJ_IPNAT, natd.in_size); if (error) goto done; nat = nt; } /* * For add/delete, look to see if the NAT entry is * already present */ nat->in_flags &= IPN_USERFLAGS; if ((nat->in_redir & NAT_MAPBLK) == 0) { if (nat->in_osrcatype == FRI_NORMAL || nat->in_osrcatype == FRI_NONE) nat->in_osrcaddr &= nat->in_osrcmsk; if (nat->in_odstatype == FRI_NORMAL || nat->in_odstatype == FRI_NONE) nat->in_odstaddr &= nat->in_odstmsk; if ((nat->in_flags & (IPN_SPLIT|IPN_SIPRANGE)) == 0) { if (nat->in_nsrcatype == FRI_NORMAL) nat->in_nsrcaddr &= nat->in_nsrcmsk; if (nat->in_ndstatype == FRI_NORMAL) nat->in_ndstaddr &= nat->in_ndstmsk; } } error = ipf_nat_rule_init(softc, softn, nat); if (error != 0) goto done; MUTEX_ENTER(&softn->ipf_nat_io); for (n = softn->ipf_nat_list; n != NULL; n = n->in_next) if (ipf_nat_cmp_rules(nat, n) == 0) break; } switch (cmd) { #ifdef IPFILTER_LOG case SIOCIPFFB : { int tmp; if (!(mode & FWRITE)) { IPFERROR(60002); error = EPERM; } else { tmp = ipf_log_clear(softc, IPL_LOGNAT); error = BCOPYOUT(&tmp, data, sizeof(tmp)); if (error != 0) { IPFERROR(60057); error = EFAULT; } } break; } case SIOCSETLG : if (!(mode & FWRITE)) { IPFERROR(60003); error = EPERM; } else { error = BCOPYIN(data, &softn->ipf_nat_logging, sizeof(softn->ipf_nat_logging)); if (error != 0) error = EFAULT; } break; case SIOCGETLG : error = BCOPYOUT(&softn->ipf_nat_logging, data, sizeof(softn->ipf_nat_logging)); if (error != 0) { IPFERROR(60004); error = EFAULT; } break; case FIONREAD : arg = ipf_log_bytesused(softc, IPL_LOGNAT); error = BCOPYOUT(&arg, data, sizeof(arg)); if (error != 0) { IPFERROR(60005); error = EFAULT; } break; #endif case SIOCADNAT : if (!(mode & FWRITE)) { IPFERROR(60006); error = EPERM; } else if (n != NULL) { natd.in_flineno = n->in_flineno; (void) ipf_outobj(softc, data, &natd, IPFOBJ_IPNAT); IPFERROR(60007); error = EEXIST; } else if (nt == NULL) { IPFERROR(60008); error = ENOMEM; } if (error != 0) { MUTEX_EXIT(&softn->ipf_nat_io); break; } if (nat != nt) bcopy((char *)nat, (char *)nt, sizeof(*n)); error = ipf_nat_siocaddnat(softc, softn, nt, getlock); MUTEX_EXIT(&softn->ipf_nat_io); if (error == 0) { nat = NULL; nt = NULL; } break; case SIOCRMNAT : case SIOCPURGENAT : if (!(mode & FWRITE)) { IPFERROR(60009); error = EPERM; n = NULL; } else if (n == NULL) { IPFERROR(60010); error = ESRCH; } if (error != 0) { MUTEX_EXIT(&softn->ipf_nat_io); break; } if (cmd == (ioctlcmd_t)SIOCPURGENAT) { error = ipf_outobjsz(softc, data, n, IPFOBJ_IPNAT, n->in_size); if (error) { MUTEX_EXIT(&softn->ipf_nat_io); goto done; } n->in_flags |= IPN_PURGE; } ipf_nat_siocdelnat(softc, softn, n, getlock); MUTEX_EXIT(&softn->ipf_nat_io); n = NULL; break; case SIOCGNATS : { natstat_t *nsp = &softn->ipf_nat_stats; nsp->ns_side[0].ns_table = softn->ipf_nat_table[0]; nsp->ns_side[1].ns_table = softn->ipf_nat_table[1]; nsp->ns_list = softn->ipf_nat_list; nsp->ns_maptable = softn->ipf_hm_maptable; nsp->ns_maplist = softn->ipf_hm_maplist; nsp->ns_nattab_sz = softn->ipf_nat_table_sz; nsp->ns_nattab_max = softn->ipf_nat_table_max; nsp->ns_rultab_sz = softn->ipf_nat_maprules_sz; nsp->ns_rdrtab_sz = softn->ipf_nat_rdrrules_sz; nsp->ns_hostmap_sz = softn->ipf_nat_hostmap_sz; nsp->ns_instances = softn->ipf_nat_instances; nsp->ns_ticks = softc->ipf_ticks; #ifdef IPFILTER_LOGGING nsp->ns_log_ok = ipf_log_logok(softc, IPF_LOGNAT); nsp->ns_log_fail = ipf_log_failures(softc, IPF_LOGNAT); #else nsp->ns_log_ok = 0; nsp->ns_log_fail = 0; #endif error = ipf_outobj(softc, data, nsp, IPFOBJ_NATSTAT); break; } case SIOCGNATL : { natlookup_t nl; error = ipf_inobj(softc, data, NULL, &nl, IPFOBJ_NATLOOKUP); if (error == 0) { void *ptr; if (getlock) { READ_ENTER(&softc->ipf_nat); } switch (nl.nl_v) { case 4 : ptr = ipf_nat_lookupredir(&nl); break; #ifdef USE_INET6 case 6 : ptr = ipf_nat6_lookupredir(&nl); break; #endif default: ptr = NULL; break; } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (ptr != NULL) { error = ipf_outobj(softc, data, &nl, IPFOBJ_NATLOOKUP); } else { IPFERROR(60011); error = ESRCH; } } break; } case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ if (!(mode & FWRITE)) { IPFERROR(60012); error = EPERM; break; } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } error = BCOPYIN(data, &arg, sizeof(arg)); if (error != 0) { IPFERROR(60013); error = EFAULT; } else { if (arg == 0) ret = ipf_nat_flushtable(softc, softn); else if (arg == 1) ret = ipf_nat_clearlist(softc, softn); else ret = ipf_nat_extraflush(softc, softn, arg); ipf_proxy_flush(softc->ipf_proxy_soft, arg); } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (error == 0) { error = BCOPYOUT(&ret, data, sizeof(ret)); } break; case SIOCMATCHFLUSH : if (!(mode & FWRITE)) { IPFERROR(60014); error = EPERM; break; } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } error = ipf_nat_matchflush(softc, softn, data); if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } break; case SIOCPROXY : error = ipf_proxy_ioctl(softc, data, cmd, mode, ctx); break; case SIOCSTLCK : if (!(mode & FWRITE)) { IPFERROR(60015); error = EPERM; } else { error = ipf_lock(data, &softn->ipf_nat_lock); } break; case SIOCSTPUT : if ((mode & FWRITE) != 0) { error = ipf_nat_putent(softc, data, getlock); } else { IPFERROR(60016); error = EACCES; } break; case SIOCSTGSZ : if (softn->ipf_nat_lock) { error = ipf_nat_getsz(softc, data, getlock); } else { IPFERROR(60017); error = EACCES; } break; case SIOCSTGET : if (softn->ipf_nat_lock) { error = ipf_nat_getent(softc, data, getlock); } else { IPFERROR(60018); error = EACCES; } break; case SIOCGENITER : { ipfgeniter_t iter; ipftoken_t *token; ipfobj_t obj; error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER); if (error != 0) break; SPL_SCHED(s); token = ipf_token_find(softc, iter.igi_type, uid, ctx); if (token != NULL) { error = ipf_nat_iterator(softc, token, &iter, &obj); WRITE_ENTER(&softc->ipf_tokens); ipf_token_deref(softc, token); RWLOCK_EXIT(&softc->ipf_tokens); } SPL_X(s); break; } case SIOCIPFDELTOK : error = BCOPYIN(data, &arg, sizeof(arg)); if (error == 0) { SPL_SCHED(s); error = ipf_token_del(softc, arg, uid, ctx); SPL_X(s); } else { IPFERROR(60019); error = EFAULT; } break; case SIOCGTQTAB : error = ipf_outobj(softc, data, softn->ipf_nat_tcptq, IPFOBJ_STATETQTAB); break; case SIOCGTABL : error = ipf_nat_gettable(softc, softn, data); break; default : IPFERROR(60020); error = EINVAL; break; } done: if (nat != NULL) ipf_nat_rule_fini(softc, nat); if (nt != NULL) KFREES(nt, nt->in_size); return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_siocaddnat */ /* Returns: int - 0 == success, != 0 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - pointer to new NAT rule */ /* np(I) - pointer to where to insert new NAT rule */ /* getlock(I) - flag indicating if lock on is held */ /* Mutex Locks: ipf_nat_io */ /* */ /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ static int ipf_nat_siocaddnat(softc, softn, n, getlock) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; int getlock; { int error = 0; if (ipf_nat_resolverule(softc, n) != 0) { IPFERROR(60022); return ENOENT; } if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) { IPFERROR(60023); return EINVAL; } if (n->in_redir == (NAT_DIVERTUDP|NAT_MAP)) { /* * Prerecord whether or not the destination of the divert * is local or not to the interface the packet is going * to be sent out. */ n->in_dlocal = ipf_deliverlocal(softc, n->in_v[1], n->in_ifps[1], &n->in_ndstip6); } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } n->in_next = NULL; n->in_pnext = softn->ipf_nat_list_tail; *n->in_pnext = n; softn->ipf_nat_list_tail = &n->in_next; n->in_use++; if (n->in_redir & NAT_REDIRECT) { n->in_flags &= ~IPN_NOTDST; switch (n->in_v[0]) { case 4 : ipf_nat_addrdr(softn, n); break; #ifdef USE_INET6 case 6 : ipf_nat6_addrdr(softn, n); break; #endif default : break; } ATOMIC_INC32(softn->ipf_nat_stats.ns_rules_rdr); } if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { n->in_flags &= ~IPN_NOTSRC; switch (n->in_v[0]) { case 4 : ipf_nat_addmap(softn, n); break; #ifdef USE_INET6 case 6 : ipf_nat6_addmap(softn, n); break; #endif default : break; } ATOMIC_INC32(softn->ipf_nat_stats.ns_rules_map); } if (n->in_age[0] != 0) n->in_tqehead[0] = ipf_addtimeoutqueue(softc, &softn->ipf_nat_utqe, n->in_age[0]); if (n->in_age[1] != 0) n->in_tqehead[1] = ipf_addtimeoutqueue(softc, &softn->ipf_nat_utqe, n->in_age[1]); MUTEX_INIT(&n->in_lock, "ipnat rule lock"); n = NULL; ATOMIC_INC32(softn->ipf_nat_stats.ns_rules); #if SOLARIS && !defined(INSTANCES) pfil_delayed_copy = 0; #endif if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); /* WRITE */ } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_ruleaddrinit */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - pointer to NAT rule */ /* */ /* Initialise all of the NAT address structures in a NAT rule. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_ruleaddrinit(softc, softn, n) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; { int idx, error; if ((n->in_ndst.na_atype == FRI_LOOKUP) && (n->in_ndst.na_type != IPLT_DSTLIST)) { IPFERROR(60071); return EINVAL; } if ((n->in_nsrc.na_atype == FRI_LOOKUP) && (n->in_nsrc.na_type != IPLT_DSTLIST)) { IPFERROR(60069); return EINVAL; } if (n->in_redir == NAT_BIMAP) { n->in_ndstaddr = n->in_osrcaddr; n->in_ndstmsk = n->in_osrcmsk; n->in_odstaddr = n->in_nsrcaddr; n->in_odstmsk = n->in_nsrcmsk; } if (n->in_redir & NAT_REDIRECT) idx = 1; else idx = 0; /* * Initialise all of the address fields. */ error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_osrc, 1, n->in_ifps[idx]); if (error != 0) return error; error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_odst, 1, n->in_ifps[idx]); if (error != 0) return error; error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_nsrc, 1, n->in_ifps[idx]); if (error != 0) return error; error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_ndst, 1, n->in_ifps[idx]); if (error != 0) return error; if (n->in_redir & NAT_DIVERTUDP) ipf_nat_builddivertmp(softn, n); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_resolvrule */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* n(I) - pointer to NAT rule */ /* */ /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ static int ipf_nat_resolverule(softc, n) ipf_main_softc_t *softc; ipnat_t *n; { char *base; base = n->in_names; n->in_ifps[0] = ipf_resolvenic(softc, base + n->in_ifnames[0], n->in_v[0]); if (n->in_ifnames[1] == -1) { n->in_ifnames[1] = n->in_ifnames[0]; n->in_ifps[1] = n->in_ifps[0]; } else { n->in_ifps[1] = ipf_resolvenic(softc, base + n->in_ifnames[1], n->in_v[1]); } if (n->in_plabel != -1) { if (n->in_redir & NAT_REDIRECT) n->in_apr = ipf_proxy_lookup(softc->ipf_proxy_soft, n->in_pr[0], base + n->in_plabel); else n->in_apr = ipf_proxy_lookup(softc->ipf_proxy_soft, n->in_pr[1], base + n->in_plabel); if (n->in_apr == NULL) return -1; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_siocdelnat */ /* Returns: int - 0 == success, != 0 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - pointer to new NAT rule */ /* getlock(I) - flag indicating if lock on is held */ /* Mutex Locks: ipf_nat_io */ /* */ /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ static void ipf_nat_siocdelnat(softc, softn, n, getlock) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; int getlock; { if (getlock) { WRITE_ENTER(&softc->ipf_nat); } ipf_nat_delrule(softc, softn, n, 1); if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); /* READ/WRITE */ } } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_getsz */ /* Returns: int - 0 == success, != 0 is the error value. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to natget structure with kernel */ /* pointer get the size of. */ /* getlock(I) - flag indicating whether or not the caller */ /* holds a lock on ipf_nat */ /* */ /* Handle SIOCSTGSZ. */ /* Return the size of the nat list entry to be copied back to user space. */ /* The size of the entry is stored in the ng_sz field and the enture natget */ /* structure is copied back to the user. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_getsz(softc, data, getlock) ipf_main_softc_t *softc; caddr_t data; int getlock; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ap_session_t *aps; nat_t *nat, *n; natget_t ng; int error; error = BCOPYIN(data, &ng, sizeof(ng)); if (error != 0) { IPFERROR(60024); return EFAULT; } if (getlock) { READ_ENTER(&softc->ipf_nat); } nat = ng.ng_ptr; if (!nat) { nat = softn->ipf_nat_instances; ng.ng_sz = 0; /* * Empty list so the size returned is 0. Simple. */ if (nat == NULL) { if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } error = BCOPYOUT(&ng, data, sizeof(ng)); if (error != 0) { IPFERROR(60025); return EFAULT; } return 0; } } else { /* * Make sure the pointer we're copying from exists in the * current list of entries. Security precaution to prevent * copying of random kernel data. */ for (n = softn->ipf_nat_instances; n; n = n->nat_next) if (n == nat) break; if (n == NULL) { if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } IPFERROR(60026); return ESRCH; } } /* * Incluse any space required for proxy data structures. */ ng.ng_sz = sizeof(nat_save_t); aps = nat->nat_aps; if (aps != NULL) { ng.ng_sz += sizeof(ap_session_t) - 4; if (aps->aps_data != 0) ng.ng_sz += aps->aps_psiz; } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } error = BCOPYOUT(&ng, data, sizeof(ng)); if (error != 0) { IPFERROR(60027); return EFAULT; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_getent */ /* Returns: int - 0 == success, != 0 is the error value. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to natget structure with kernel pointer*/ /* to NAT structure to copy out. */ /* getlock(I) - flag indicating whether or not the caller */ /* holds a lock on ipf_nat */ /* */ /* Handle SIOCSTGET. */ /* Copies out NAT entry to user space. Any additional data held for a */ /* proxy is also copied, as to is the NAT rule which was responsible for it */ /* ------------------------------------------------------------------------ */ static int ipf_nat_getent(softc, data, getlock) ipf_main_softc_t *softc; caddr_t data; int getlock; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; int error, outsize; ap_session_t *aps; nat_save_t *ipn, ipns; nat_t *n, *nat; error = ipf_inobj(softc, data, NULL, &ipns, IPFOBJ_NATSAVE); if (error != 0) return error; if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) { IPFERROR(60028); return EINVAL; } KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); if (ipn == NULL) { IPFERROR(60029); return ENOMEM; } if (getlock) { READ_ENTER(&softc->ipf_nat); } ipn->ipn_dsize = ipns.ipn_dsize; nat = ipns.ipn_next; if (nat == NULL) { nat = softn->ipf_nat_instances; if (nat == NULL) { if (softn->ipf_nat_instances == NULL) { IPFERROR(60030); error = ENOENT; } goto finished; } } else { /* * Make sure the pointer we're copying from exists in the * current list of entries. Security precaution to prevent * copying of random kernel data. */ for (n = softn->ipf_nat_instances; n; n = n->nat_next) if (n == nat) break; if (n == NULL) { IPFERROR(60031); error = ESRCH; goto finished; } } ipn->ipn_next = nat->nat_next; /* * Copy the NAT structure. */ bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); /* * If we have a pointer to the NAT rule it belongs to, save that too. */ if (nat->nat_ptr != NULL) bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, ipn->ipn_ipnat.in_size); /* * If we also know the NAT entry has an associated filter rule, * save that too. */ if (nat->nat_fr != NULL) bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, sizeof(ipn->ipn_fr)); /* * Last but not least, if there is an application proxy session set * up for this NAT entry, then copy that out too, including any * private data saved along side it by the proxy. */ aps = nat->nat_aps; outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); if (aps != NULL) { char *s; if (outsize < sizeof(*aps)) { IPFERROR(60032); error = ENOBUFS; goto finished; } s = ipn->ipn_data; bcopy((char *)aps, s, sizeof(*aps)); s += sizeof(*aps); outsize -= sizeof(*aps); if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) bcopy(aps->aps_data, s, aps->aps_psiz); else { IPFERROR(60033); error = ENOBUFS; } } if (error == 0) { if (getlock) { READ_ENTER(&softc->ipf_nat); getlock = 0; } error = ipf_outobjsz(softc, data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); } finished: if (getlock) { READ_ENTER(&softc->ipf_nat); } if (ipn != NULL) { KFREES(ipn, ipns.ipn_dsize); } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_putent */ /* Returns: int - 0 == success, != 0 is the error value. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to natget structure with NAT */ /* structure information to load into the kernel */ /* getlock(I) - flag indicating whether or not a write lock */ /* on is already held. */ /* */ /* Handle SIOCSTPUT. */ /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ /* firewall rule data structures, if pointers to them indicate so. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_putent(softc, data, getlock) ipf_main_softc_t *softc; caddr_t data; int getlock; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; nat_save_t ipn, *ipnn; ap_session_t *aps; nat_t *n, *nat; frentry_t *fr; fr_info_t fin; ipnat_t *in; int error; error = ipf_inobj(softc, data, NULL, &ipn, IPFOBJ_NATSAVE); if (error != 0) return error; /* * Initialise early because of code at junkput label. */ n = NULL; in = NULL; aps = NULL; nat = NULL; ipnn = NULL; fr = NULL; /* * New entry, copy in the rest of the NAT entry if it's size is more * than just the nat_t structure. */ if (ipn.ipn_dsize > sizeof(ipn)) { if (ipn.ipn_dsize > 81920) { IPFERROR(60034); error = ENOMEM; goto junkput; } KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); if (ipnn == NULL) { IPFERROR(60035); return ENOMEM; } bzero(ipnn, ipn.ipn_dsize); error = ipf_inobjsz(softc, data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); if (error != 0) { goto junkput; } } else ipnn = &ipn; KMALLOC(nat, nat_t *); if (nat == NULL) { IPFERROR(60037); error = ENOMEM; goto junkput; } bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); switch (nat->nat_v[0]) { case 4: #ifdef USE_INET6 case 6 : #endif break; default : IPFERROR(60061); error = EPROTONOSUPPORT; goto junkput; /*NOTREACHED*/ } /* * Initialize all these so that ipf_nat_delete() doesn't cause a crash. */ bzero((char *)nat, offsetof(struct nat, nat_tqe)); nat->nat_tqe.tqe_pnext = NULL; nat->nat_tqe.tqe_next = NULL; nat->nat_tqe.tqe_ifq = NULL; nat->nat_tqe.tqe_parent = nat; /* * Restore the rule associated with this nat session */ in = ipnn->ipn_nat.nat_ptr; if (in != NULL) { KMALLOCS(in, ipnat_t *, ipnn->ipn_ipnat.in_size); nat->nat_ptr = in; if (in == NULL) { IPFERROR(60038); error = ENOMEM; goto junkput; } bcopy((char *)&ipnn->ipn_ipnat, (char *)in, ipnn->ipn_ipnat.in_size); in->in_use = 1; in->in_flags |= IPN_DELETE; ATOMIC_INC32(softn->ipf_nat_stats.ns_rules); if (ipf_nat_resolverule(softc, in) != 0) { IPFERROR(60039); error = ESRCH; goto junkput; } } /* * Check that the NAT entry doesn't already exist in the kernel. * * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do * this, we check to see if the inbound combination of addresses and * ports is already known. Similar logic is applied for NAT_INBOUND. * */ bzero((char *)&fin, sizeof(fin)); fin.fin_v = nat->nat_v[0]; fin.fin_p = nat->nat_pr[0]; fin.fin_rev = nat->nat_rev; fin.fin_ifp = nat->nat_ifps[0]; fin.fin_data[0] = ntohs(nat->nat_ndport); fin.fin_data[1] = ntohs(nat->nat_nsport); switch (nat->nat_dir) { case NAT_OUTBOUND : case NAT_DIVERTOUT : if (getlock) { READ_ENTER(&softc->ipf_nat); } fin.fin_v = nat->nat_v[1]; if (nat->nat_v[1] == 4) { n = ipf_nat_inlookup(&fin, nat->nat_flags, fin.fin_p, nat->nat_ndstip, nat->nat_nsrcip); #ifdef USE_INET6 } else if (nat->nat_v[1] == 6) { n = ipf_nat6_inlookup(&fin, nat->nat_flags, fin.fin_p, &nat->nat_ndst6.in6, &nat->nat_nsrc6.in6); #endif } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (n != NULL) { IPFERROR(60040); error = EEXIST; goto junkput; } break; case NAT_INBOUND : case NAT_DIVERTIN : if (getlock) { READ_ENTER(&softc->ipf_nat); } if (fin.fin_v == 4) { n = ipf_nat_outlookup(&fin, nat->nat_flags, fin.fin_p, nat->nat_ndstip, nat->nat_nsrcip); #ifdef USE_INET6 } else if (fin.fin_v == 6) { n = ipf_nat6_outlookup(&fin, nat->nat_flags, fin.fin_p, &nat->nat_ndst6.in6, &nat->nat_nsrc6.in6); #endif } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (n != NULL) { IPFERROR(60041); error = EEXIST; goto junkput; } break; default : IPFERROR(60042); error = EINVAL; goto junkput; } /* * Restore ap_session_t structure. Include the private data allocated * if it was there. */ aps = nat->nat_aps; if (aps != NULL) { KMALLOC(aps, ap_session_t *); nat->nat_aps = aps; if (aps == NULL) { IPFERROR(60043); error = ENOMEM; goto junkput; } bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); if (in != NULL) aps->aps_apr = in->in_apr; else aps->aps_apr = NULL; if (aps->aps_psiz != 0) { if (aps->aps_psiz > 81920) { IPFERROR(60044); error = ENOMEM; goto junkput; } KMALLOCS(aps->aps_data, void *, aps->aps_psiz); if (aps->aps_data == NULL) { IPFERROR(60045); error = ENOMEM; goto junkput; } bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, aps->aps_psiz); } else { aps->aps_psiz = 0; aps->aps_data = NULL; } } /* * If there was a filtering rule associated with this entry then * build up a new one. */ fr = nat->nat_fr; if (fr != NULL) { if ((nat->nat_flags & SI_NEWFR) != 0) { KMALLOC(fr, frentry_t *); nat->nat_fr = fr; if (fr == NULL) { IPFERROR(60046); error = ENOMEM; goto junkput; } ipnn->ipn_nat.nat_fr = fr; fr->fr_ref = 1; (void) ipf_outobj(softc, data, ipnn, IPFOBJ_NATSAVE); bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); fr->fr_ref = 1; fr->fr_dsize = 0; fr->fr_data = NULL; fr->fr_type = FR_T_NONE; MUTEX_NUKE(&fr->fr_lock); MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); } else { if (getlock) { READ_ENTER(&softc->ipf_nat); } for (n = softn->ipf_nat_instances; n; n = n->nat_next) if (n->nat_fr == fr) break; if (n != NULL) { MUTEX_ENTER(&fr->fr_lock); fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (n == NULL) { IPFERROR(60047); error = ESRCH; goto junkput; } } } if (ipnn != &ipn) { KFREES(ipnn, ipn.ipn_dsize); ipnn = NULL; } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } if (fin.fin_v == 4) error = ipf_nat_finalise(&fin, nat); #ifdef USE_INET6 else error = ipf_nat6_finalise(&fin, nat); #endif if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (error == 0) return 0; IPFERROR(60048); error = ENOMEM; junkput: if (fr != NULL) { (void) ipf_derefrule(softc, &fr); } if ((ipnn != NULL) && (ipnn != &ipn)) { KFREES(ipnn, ipn.ipn_dsize); } if (nat != NULL) { if (aps != NULL) { if (aps->aps_data != NULL) { KFREES(aps->aps_data, aps->aps_psiz); } KFREE(aps); } if (in != NULL) { if (in->in_apr) ipf_proxy_deref(in->in_apr); KFREES(in, in->in_size); } KFREE(nat); } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delete */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* nat(I) - pointer to NAT structure to delete */ /* logtype(I) - type of LOG record to create before deleting */ /* Write Lock: ipf_nat */ /* */ /* Delete a nat entry from the various lists and table. If NAT logging is */ /* enabled then generate a NAT log record for this event. */ /* ------------------------------------------------------------------------ */ void ipf_nat_delete(softc, nat, logtype) ipf_main_softc_t *softc; struct nat *nat; int logtype; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; int madeorphan = 0, bkt, removed = 0; nat_stat_side_t *nss; struct ipnat *ipn; if (logtype != 0 && softn->ipf_nat_logging != 0) ipf_nat_log(softc, softn, nat, logtype); /* * Take it as a general indication that all the pointers are set if * nat_pnext is set. */ if (nat->nat_pnext != NULL) { removed = 1; bkt = nat->nat_hv[0] % softn->ipf_nat_table_sz; nss = &softn->ipf_nat_stats.ns_side[0]; nss->ns_bucketlen[bkt]--; if (nss->ns_bucketlen[bkt] == 0) { nss->ns_inuse--; } bkt = nat->nat_hv[1] % softn->ipf_nat_table_sz; nss = &softn->ipf_nat_stats.ns_side[1]; nss->ns_bucketlen[bkt]--; if (nss->ns_bucketlen[bkt] == 0) { nss->ns_inuse--; } *nat->nat_pnext = nat->nat_next; if (nat->nat_next != NULL) { nat->nat_next->nat_pnext = nat->nat_pnext; nat->nat_next = NULL; } nat->nat_pnext = NULL; *nat->nat_phnext[0] = nat->nat_hnext[0]; if (nat->nat_hnext[0] != NULL) { nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; nat->nat_hnext[0] = NULL; } nat->nat_phnext[0] = NULL; *nat->nat_phnext[1] = nat->nat_hnext[1]; if (nat->nat_hnext[1] != NULL) { nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; nat->nat_hnext[1] = NULL; } nat->nat_phnext[1] = NULL; if ((nat->nat_flags & SI_WILDP) != 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_wilds); } madeorphan = 1; } if (nat->nat_me != NULL) { *nat->nat_me = NULL; nat->nat_me = NULL; nat->nat_ref--; ASSERT(nat->nat_ref >= 0); } if (nat->nat_tqe.tqe_ifq != NULL) { /* * No call to ipf_freetimeoutqueue() is made here, they are * garbage collected in ipf_nat_expire(). */ (void) ipf_deletequeueentry(&nat->nat_tqe); } if (nat->nat_sync) { ipf_sync_del_nat(softc->ipf_sync_soft, nat->nat_sync); nat->nat_sync = NULL; } if (logtype == NL_EXPIRE) softn->ipf_nat_stats.ns_expire++; MUTEX_ENTER(&nat->nat_lock); /* * NL_DESTROY should only be passed in when we've got nat_ref >= 2. * This happens when a nat'd packet is blocked and we want to throw * away the NAT session. */ if (logtype == NL_DESTROY) { if (nat->nat_ref > 2) { nat->nat_ref -= 2; MUTEX_EXIT(&nat->nat_lock); if (removed) softn->ipf_nat_stats.ns_orphans++; return; } } else if (nat->nat_ref > 1) { nat->nat_ref--; MUTEX_EXIT(&nat->nat_lock); if (madeorphan == 1) softn->ipf_nat_stats.ns_orphans++; return; } ASSERT(nat->nat_ref >= 0); MUTEX_EXIT(&nat->nat_lock); nat->nat_ref = 0; if (madeorphan == 0) softn->ipf_nat_stats.ns_orphans--; /* * At this point, nat_ref can be either 0 or -1 */ softn->ipf_nat_stats.ns_proto[nat->nat_pr[0]]--; if (nat->nat_fr != NULL) { (void) ipf_derefrule(softc, &nat->nat_fr); } if (nat->nat_hm != NULL) { ipf_nat_hostmapdel(softc, &nat->nat_hm); } /* * If there is an active reference from the nat entry to its parent * rule, decrement the rule's reference count and free it too if no * longer being used. */ ipn = nat->nat_ptr; nat->nat_ptr = NULL; if (ipn != NULL) { ipn->in_space++; ipf_nat_rule_deref(softc, &ipn); } if (nat->nat_aps != NULL) { ipf_proxy_free(softc, nat->nat_aps); nat->nat_aps = NULL; } MUTEX_DESTROY(&nat->nat_lock); softn->ipf_nat_stats.ns_active--; /* * If there's a fragment table entry too for this nat entry, then * dereference that as well. This is after nat_lock is released * because of Tru64. */ ipf_frag_natforget(softc, (void *)nat); KFREE(nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_flushtable */ /* Returns: int - number of NAT rules deleted */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* Write Lock: ipf_nat */ /* */ /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ /* log record should be emitted in ipf_nat_delete() if NAT logging is */ /* enabled. */ /* ------------------------------------------------------------------------ */ /* * nat_flushtable - clear the NAT table of all mapping entries. */ static int ipf_nat_flushtable(softc, softn) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; { nat_t *nat; int j = 0; /* * ALL NAT mappings deleted, so lets just make the deletions * quicker. */ if (softn->ipf_nat_table[0] != NULL) bzero((char *)softn->ipf_nat_table[0], sizeof(softn->ipf_nat_table[0]) * softn->ipf_nat_table_sz); if (softn->ipf_nat_table[1] != NULL) bzero((char *)softn->ipf_nat_table[1], sizeof(softn->ipf_nat_table[1]) * softn->ipf_nat_table_sz); while ((nat = softn->ipf_nat_instances) != NULL) { ipf_nat_delete(softc, nat, NL_FLUSH); j++; } return j; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_clearlist */ /* Returns: int - number of NAT/RDR rules deleted */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* */ /* Delete all rules in the current list of rules. There is nothing elegant */ /* about this cleanup: simply free all entries on the list of rules and */ /* clear out the tables used for hashed NAT rule lookups. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_clearlist(softc, softn) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; { ipnat_t *n; int i = 0; if (softn->ipf_nat_map_rules != NULL) { bzero((char *)softn->ipf_nat_map_rules, sizeof(*softn->ipf_nat_map_rules) * softn->ipf_nat_maprules_sz); } if (softn->ipf_nat_rdr_rules != NULL) { bzero((char *)softn->ipf_nat_rdr_rules, sizeof(*softn->ipf_nat_rdr_rules) * softn->ipf_nat_rdrrules_sz); } while ((n = softn->ipf_nat_list) != NULL) { ipf_nat_delrule(softc, softn, n, 0); i++; } #if SOLARIS && !defined(INSTANCES) pfil_delayed_copy = 1; #endif return i; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delrule */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* np(I) - pointer to NAT rule to delete */ /* purge(I) - 1 == allow purge, 0 == prevent purge */ /* Locks: WRITE(ipf_nat) */ /* */ /* Preventing "purge" from occuring is allowed because when all of the NAT */ /* rules are being removed, allowing the "purge" to walk through the list */ /* of NAT sessions, possibly multiple times, would be a large performance */ /* hit, on the order of O(N^2). */ /* ------------------------------------------------------------------------ */ static void ipf_nat_delrule(softc, softn, np, purge) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *np; int purge; { if (np->in_pnext != NULL) { *np->in_pnext = np->in_next; if (np->in_next != NULL) np->in_next->in_pnext = np->in_pnext; if (softn->ipf_nat_list_tail == &np->in_next) softn->ipf_nat_list_tail = np->in_pnext; } if ((purge == 1) && ((np->in_flags & IPN_PURGE) != 0)) { nat_t *next; nat_t *nat; for (next = softn->ipf_nat_instances; (nat = next) != NULL;) { next = nat->nat_next; if (nat->nat_ptr == np) ipf_nat_delete(softc, nat, NL_PURGE); } } if ((np->in_flags & IPN_DELETE) == 0) { if (np->in_redir & NAT_REDIRECT) { switch (np->in_v[0]) { case 4 : ipf_nat_delrdr(softn, np); break; #ifdef USE_INET6 case 6 : ipf_nat6_delrdr(softn, np); break; #endif } } if (np->in_redir & (NAT_MAPBLK|NAT_MAP)) { switch (np->in_v[0]) { case 4 : ipf_nat_delmap(softn, np); break; #ifdef USE_INET6 case 6 : ipf_nat6_delmap(softn, np); break; #endif } } } np->in_flags |= IPN_DELETE; ipf_nat_rule_deref(softc, &np); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_newmap */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* */ /* Given an empty NAT structure, populate it with new information about a */ /* new NAT session, as defined by the matching NAT rule. */ /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ /* to the new IP address for the translation. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_newmap(fin, nat, ni) fr_info_t *fin; nat_t *nat; natinfo_t *ni; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short st_port, dport, sport, port, sp, dp; struct in_addr in, inb; hostmap_t *hm; u_32_t flags; u_32_t st_ip; ipnat_t *np; nat_t *natl; int l; /* * If it's an outbound packet which doesn't match any existing * record, then create a new port */ l = 0; hm = NULL; np = ni->nai_np; st_ip = np->in_snip; st_port = np->in_spnext; flags = nat->nat_flags; if (flags & IPN_ICMPQUERY) { sport = fin->fin_data[1]; dport = 0; } else { sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); } /* * Do a loop until we either run out of entries to try or we find * a NAT mapping that isn't currently being used. This is done * because the change to the source is not (usually) being fixed. */ do { port = 0; in.s_addr = htonl(np->in_snip); if (l == 0) { /* * Check to see if there is an existing NAT * setup for this IP address pair. */ hm = ipf_nat_hostmap(softn, np, fin->fin_src, fin->fin_dst, in, 0); if (hm != NULL) in.s_addr = hm->hm_nsrcip.s_addr; } else if ((l == 1) && (hm != NULL)) { ipf_nat_hostmapdel(softc, &hm); } in.s_addr = ntohl(in.s_addr); nat->nat_hm = hm; if ((np->in_nsrcmsk == 0xffffffff) && (np->in_spnext == 0)) { if (l > 0) { NBUMPSIDEX(1, ns_exhausted, ns_exhausted_1); return -1; } } if (np->in_redir == NAT_BIMAP && np->in_osrcmsk == np->in_nsrcmsk) { /* * map the address block in a 1:1 fashion */ in.s_addr = np->in_nsrcaddr; in.s_addr |= fin->fin_saddr & ~np->in_osrcmsk; in.s_addr = ntohl(in.s_addr); } else if (np->in_redir & NAT_MAPBLK) { if ((l >= np->in_ppip) || ((l > 0) && !(flags & IPN_TCPUDP))) { NBUMPSIDEX(1, ns_exhausted, ns_exhausted_2); return -1; } /* * map-block - Calculate destination address. */ in.s_addr = ntohl(fin->fin_saddr); in.s_addr &= ntohl(~np->in_osrcmsk); inb.s_addr = in.s_addr; in.s_addr /= np->in_ippip; in.s_addr &= ntohl(~np->in_nsrcmsk); in.s_addr += ntohl(np->in_nsrcaddr); /* * Calculate destination port. */ if ((flags & IPN_TCPUDP) && (np->in_ppip != 0)) { port = ntohs(sport) + l; port %= np->in_ppip; port += np->in_ppip * (inb.s_addr % np->in_ippip); port += MAPBLK_MINPORT; port = htons(port); } } else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0xffffffff)) { i6addr_t in6; /* * 0/32 - use the interface's IP address. */ if ((l > 0) || ipf_ifpaddr(softc, 4, FRI_NORMAL, fin->fin_ifp, &in6, NULL) == -1) { NBUMPSIDEX(1, ns_new_ifpaddr, ns_new_ifpaddr_1); return -1; } in.s_addr = ntohl(in6.in4.s_addr); } else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0)) { /* * 0/0 - use the original source address/port. */ if (l > 0) { NBUMPSIDEX(1, ns_exhausted, ns_exhausted_3); return -1; } in.s_addr = ntohl(fin->fin_saddr); } else if ((np->in_nsrcmsk != 0xffffffff) && (np->in_spnext == 0) && ((l > 0) || (hm == NULL))) np->in_snip++; natl = NULL; if ((flags & IPN_TCPUDP) && ((np->in_redir & NAT_MAPBLK) == 0) && (np->in_flags & IPN_AUTOPORTMAP)) { /* * "ports auto" (without map-block) */ if ((l > 0) && (l % np->in_ppip == 0)) { if ((l > np->in_ppip) && np->in_nsrcmsk != 0xffffffff) np->in_snip++; } if (np->in_ppip != 0) { port = ntohs(sport); port += (l % np->in_ppip); port %= np->in_ppip; port += np->in_ppip * (ntohl(fin->fin_saddr) % np->in_ippip); port += MAPBLK_MINPORT; port = htons(port); } } else if (((np->in_redir & NAT_MAPBLK) == 0) && (flags & IPN_TCPUDPICMP) && (np->in_spnext != 0)) { /* * Standard port translation. Select next port. */ if (np->in_flags & IPN_SEQUENTIAL) { port = np->in_spnext; } else { port = ipf_random() % (np->in_spmax - np->in_spmin + 1); port += np->in_spmin; } port = htons(port); np->in_spnext++; if (np->in_spnext > np->in_spmax) { np->in_spnext = np->in_spmin; if (np->in_nsrcmsk != 0xffffffff) np->in_snip++; } } if (np->in_flags & IPN_SIPRANGE) { if (np->in_snip > ntohl(np->in_nsrcmsk)) np->in_snip = ntohl(np->in_nsrcaddr); } else { if ((np->in_nsrcmsk != 0xffffffff) && ((np->in_snip + 1) & ntohl(np->in_nsrcmsk)) > ntohl(np->in_nsrcaddr)) np->in_snip = ntohl(np->in_nsrcaddr) + 1; } if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) port = sport; /* * Here we do a lookup of the connection as seen from * the outside. If an IP# pair already exists, try * again. So if you have A->B becomes C->B, you can * also have D->E become C->E but not D->B causing * another C->B. Also take protocol and ports into * account when determining whether a pre-existing * NAT setup will cause an external conflict where * this is appropriate. */ inb.s_addr = htonl(in.s_addr); sp = fin->fin_data[0]; dp = fin->fin_data[1]; fin->fin_data[0] = fin->fin_data[1]; fin->fin_data[1] = ntohs(port); natl = ipf_nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)fin->fin_p, fin->fin_dst, inb); fin->fin_data[0] = sp; fin->fin_data[1] = dp; /* * Has the search wrapped around and come back to the * start ? */ if ((natl != NULL) && (np->in_spnext != 0) && (st_port == np->in_spnext) && (np->in_snip != 0) && (st_ip == np->in_snip)) { NBUMPSIDED(1, ns_wrap); return -1; } l++; } while (natl != NULL); /* Setup the NAT table */ nat->nat_osrcip = fin->fin_src; nat->nat_nsrcaddr = htonl(in.s_addr); nat->nat_odstip = fin->fin_dst; nat->nat_ndstip = fin->fin_dst; if (nat->nat_hm == NULL) nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src, fin->fin_dst, nat->nat_nsrcip, 0); if (flags & IPN_TCPUDP) { nat->nat_osport = sport; nat->nat_nsport = port; /* sport */ nat->nat_odport = dport; nat->nat_ndport = dport; ((tcphdr_t *)fin->fin_dp)->th_sport = port; } else if (flags & IPN_ICMPQUERY) { nat->nat_oicmpid = fin->fin_data[1]; ((icmphdr_t *)fin->fin_dp)->icmp_id = port; nat->nat_nicmpid = port; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_newrdr */ /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ /* allow rule to be moved if IPN_ROUNDR is set. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* */ /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ /* to the new IP address for the translation. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_newrdr(fin, nat, ni) fr_info_t *fin; nat_t *nat; natinfo_t *ni; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short nport, dport, sport; struct in_addr in, inb; u_short sp, dp; hostmap_t *hm; u_32_t flags; ipnat_t *np; nat_t *natl; int move; move = 1; hm = NULL; in.s_addr = 0; np = ni->nai_np; flags = nat->nat_flags; if (flags & IPN_ICMPQUERY) { dport = fin->fin_data[1]; sport = 0; } else { sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); } /* TRACE sport, dport */ /* * If the matching rule has IPN_STICKY set, then we want to have the * same rule kick in as before. Why would this happen? If you have * a collection of rdr rules with "round-robin sticky", the current * packet might match a different one to the previous connection but * we want the same destination to be used. */ if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) && ((np->in_flags & IPN_STICKY) != 0)) { hm = ipf_nat_hostmap(softn, NULL, fin->fin_src, fin->fin_dst, in, (u_32_t)dport); if (hm != NULL) { in.s_addr = ntohl(hm->hm_ndstip.s_addr); np = hm->hm_ipnat; ni->nai_np = np; move = 0; ipf_nat_hostmapdel(softc, &hm); } } /* * Otherwise, it's an inbound packet. Most likely, we don't * want to rewrite source ports and source addresses. Instead, * we want to rewrite to a fixed internal address and fixed * internal port. */ if (np->in_flags & IPN_SPLIT) { in.s_addr = np->in_dnip; inb.s_addr = htonl(in.s_addr); if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { hm = ipf_nat_hostmap(softn, NULL, fin->fin_src, fin->fin_dst, inb, (u_32_t)dport); if (hm != NULL) { in.s_addr = hm->hm_ndstip.s_addr; move = 0; } } if (hm == NULL || hm->hm_ref == 1) { if (np->in_ndstaddr == htonl(in.s_addr)) { np->in_dnip = ntohl(np->in_ndstmsk); move = 0; } else { np->in_dnip = ntohl(np->in_ndstaddr); } } if (hm != NULL) ipf_nat_hostmapdel(softc, &hm); } else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0xffffffff)) { i6addr_t in6; /* * 0/32 - use the interface's IP address. */ if (ipf_ifpaddr(softc, 4, FRI_NORMAL, fin->fin_ifp, &in6, NULL) == -1) { NBUMPSIDEX(0, ns_new_ifpaddr, ns_new_ifpaddr_2); return -1; } in.s_addr = ntohl(in6.in4.s_addr); } else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk== 0)) { /* * 0/0 - use the original destination address/port. */ in.s_addr = ntohl(fin->fin_daddr); } else if (np->in_redir == NAT_BIMAP && np->in_ndstmsk == np->in_odstmsk) { /* * map the address block in a 1:1 fashion */ in.s_addr = np->in_ndstaddr; in.s_addr |= fin->fin_daddr & ~np->in_ndstmsk; in.s_addr = ntohl(in.s_addr); } else { in.s_addr = ntohl(np->in_ndstaddr); } if ((np->in_dpnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) nport = dport; else { /* * Whilst not optimized for the case where * pmin == pmax, the gain is not significant. */ if (((np->in_flags & IPN_FIXEDDPORT) == 0) && (np->in_odport != np->in_dtop)) { nport = ntohs(dport) - np->in_odport + np->in_dpmax; nport = htons(nport); } else { nport = htons(np->in_dpnext); np->in_dpnext++; if (np->in_dpnext > np->in_dpmax) np->in_dpnext = np->in_dpmin; } } /* * When the redirect-to address is set to 0.0.0.0, just * assume a blank `forwarding' of the packet. We don't * setup any translation for this either. */ if (in.s_addr == 0) { if (nport == dport) { NBUMPSIDED(0, ns_xlate_null); return -1; } in.s_addr = ntohl(fin->fin_daddr); } /* * Check to see if this redirect mapping already exists and if * it does, return "failure" (allowing it to be created will just * cause one or both of these "connections" to stop working.) */ inb.s_addr = htonl(in.s_addr); sp = fin->fin_data[0]; dp = fin->fin_data[1]; fin->fin_data[1] = fin->fin_data[0]; fin->fin_data[0] = ntohs(nport); natl = ipf_nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)fin->fin_p, inb, fin->fin_src); fin->fin_data[0] = sp; fin->fin_data[1] = dp; if (natl != NULL) { DT2(ns_new_xlate_exists, fr_info_t *, fin, nat_t *, natl); NBUMPSIDE(0, ns_xlate_exists); return -1; } inb.s_addr = htonl(in.s_addr); nat->nat_ndstaddr = htonl(in.s_addr); nat->nat_odstip = fin->fin_dst; nat->nat_nsrcip = fin->fin_src; nat->nat_osrcip = fin->fin_src; if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0)) nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src, fin->fin_dst, inb, (u_32_t)dport); if (flags & IPN_TCPUDP) { nat->nat_odport = dport; nat->nat_ndport = nport; nat->nat_osport = sport; nat->nat_nsport = sport; ((tcphdr_t *)fin->fin_dp)->th_dport = nport; } else if (flags & IPN_ICMPQUERY) { nat->nat_oicmpid = fin->fin_data[1]; ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; nat->nat_nicmpid = nport; } return move; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_add */ /* Returns: nat_t* - NULL == failure to create new NAT structure, */ /* else pointer to new NAT structure */ /* Parameters: fin(I) - pointer to packet information */ /* np(I) - pointer to NAT rule */ /* natsave(I) - pointer to where to store NAT struct pointer */ /* flags(I) - flags describing the current packet */ /* direction(I) - direction of packet (in/out) */ /* Write Lock: ipf_nat */ /* */ /* Attempts to create a new NAT entry. Does not actually change the packet */ /* in any way. */ /* */ /* This fucntion is in three main parts: (1) deal with creating a new NAT */ /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ /* and (3) building that structure and putting it into the NAT table(s). */ /* */ /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */ /* as it can result in memory being corrupted. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_add(fin, np, natsave, flags, direction) fr_info_t *fin; ipnat_t *np; nat_t **natsave; u_int flags; int direction; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; hostmap_t *hm = NULL; nat_t *nat, *natl; natstat_t *nsp; u_int nflags; natinfo_t ni; int move; nsp = &softn->ipf_nat_stats; if ((nsp->ns_active * 100 / softn->ipf_nat_table_max) > softn->ipf_nat_table_wm_high) { softn->ipf_nat_doflush = 1; } if (nsp->ns_active >= softn->ipf_nat_table_max) { NBUMPSIDED(fin->fin_out, ns_table_max); return NULL; } move = 1; nflags = np->in_flags & flags; nflags &= NAT_FROMRULE; ni.nai_np = np; ni.nai_dport = 0; ni.nai_sport = 0; /* Give me a new nat */ KMALLOC(nat, nat_t *); if (nat == NULL) { NBUMPSIDED(fin->fin_out, ns_memfail); /* * Try to automatically tune the max # of entries in the * table allowed to be less than what will cause kmem_alloc() * to fail and try to eliminate panics due to out of memory * conditions arising. */ if ((softn->ipf_nat_table_max > softn->ipf_nat_table_sz) && (nsp->ns_active > 100)) { softn->ipf_nat_table_max = nsp->ns_active - 100; printf("table_max reduced to %d\n", softn->ipf_nat_table_max); } return NULL; } if (flags & IPN_ICMPQUERY) { /* * In the ICMP query NAT code, we translate the ICMP id fields * to make them unique. This is indepedent of the ICMP type * (e.g. in the unlikely event that a host sends an echo and * an tstamp request with the same id, both packets will have * their ip address/id field changed in the same way). */ /* The icmp_id field is used by the sender to identify the * process making the icmp request. (the receiver justs * copies it back in its response). So, it closely matches * the concept of source port. We overlay sport, so we can * maximally reuse the existing code. */ ni.nai_sport = fin->fin_data[1]; ni.nai_dport = 0; } bzero((char *)nat, sizeof(*nat)); nat->nat_flags = flags; nat->nat_redir = np->in_redir; nat->nat_dir = direction; nat->nat_pr[0] = fin->fin_p; nat->nat_pr[1] = fin->fin_p; /* * Search the current table for a match and create a new mapping * if there is none found. */ if (np->in_redir & NAT_DIVERTUDP) { move = ipf_nat_newdivert(fin, nat, &ni); } else if (np->in_redir & NAT_REWRITE) { move = ipf_nat_newrewrite(fin, nat, &ni); } else if (direction == NAT_OUTBOUND) { /* * We can now arrange to call this for the same connection * because ipf_nat_new doesn't protect the code path into * this function. */ natl = ipf_nat_outlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst); if (natl != NULL) { KFREE(nat); nat = natl; goto done; } move = ipf_nat_newmap(fin, nat, &ni); } else { /* * NAT_INBOUND is used for redirects rules */ natl = ipf_nat_inlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst); if (natl != NULL) { KFREE(nat); nat = natl; goto done; } move = ipf_nat_newrdr(fin, nat, &ni); } if (move == -1) goto badnat; np = ni.nai_np; nat->nat_mssclamp = np->in_mssclamp; nat->nat_me = natsave; nat->nat_fr = fin->fin_fr; nat->nat_rev = fin->fin_rev; nat->nat_ptr = np; nat->nat_dlocal = np->in_dlocal; if ((np->in_apr != NULL) && ((nat->nat_flags & NAT_SLAVE) == 0)) { if (ipf_proxy_new(fin, nat) == -1) { NBUMPSIDED(fin->fin_out, ns_appr_fail); goto badnat; } } nat->nat_ifps[0] = np->in_ifps[0]; if (np->in_ifps[0] != NULL) { COPYIFNAME(np->in_v[0], np->in_ifps[0], nat->nat_ifnames[0]); } nat->nat_ifps[1] = np->in_ifps[1]; if (np->in_ifps[1] != NULL) { COPYIFNAME(np->in_v[1], np->in_ifps[1], nat->nat_ifnames[1]); } if (ipf_nat_finalise(fin, nat) == -1) { goto badnat; } np->in_use++; if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { if ((np->in_redir & (NAT_REDIRECT|NAT_MAP)) == NAT_REDIRECT) { ipf_nat_delrdr(softn, np); ipf_nat_addrdr(softn, np); } else if ((np->in_redir & (NAT_REDIRECT|NAT_MAP)) == NAT_MAP) { ipf_nat_delmap(softn, np); ipf_nat_addmap(softn, np); } } if (flags & SI_WILDP) nsp->ns_wilds++; nsp->ns_proto[nat->nat_pr[0]]++; goto done; badnat: DT2(ns_badnatnew, fr_info_t *, fin, nat_t *, nat); NBUMPSIDE(fin->fin_out, ns_badnatnew); if ((hm = nat->nat_hm) != NULL) ipf_nat_hostmapdel(softc, &hm); KFREE(nat); nat = NULL; done: if (nat != NULL && np != NULL) np->in_hits++; if (natsave != NULL) *natsave = nat; return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_finalise */ /* Returns: int - 0 == sucess, -1 == failure */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* Write Lock: ipf_nat */ /* */ /* This is the tail end of constructing a new NAT entry and is the same */ /* for both IPv4 and IPv6. */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ static int ipf_nat_finalise(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd; frentry_t *fr; u_32_t flags; #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif flags = nat->nat_flags; switch (nat->nat_pr[0]) { case IPPROTO_ICMP : sum1 = LONG_SUM(ntohs(nat->nat_oicmpid)); sum2 = LONG_SUM(ntohs(nat->nat_nicmpid)); CALC_SUMD(sum1, sum2, sumd); nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); break; default : sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr) + \ ntohs(nat->nat_osport)); sum2 = LONG_SUM(ntohl(nat->nat_nsrcaddr) + \ ntohs(nat->nat_nsport)); CALC_SUMD(sum1, sum2, sumd); nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); sum1 = LONG_SUM(ntohl(nat->nat_odstaddr) + \ ntohs(nat->nat_odport)); sum2 = LONG_SUM(ntohl(nat->nat_ndstaddr) + \ ntohs(nat->nat_ndport)); CALC_SUMD(sum1, sum2, sumd); nat->nat_sumd[0] += (sumd & 0xffff) + (sumd >> 16); break; } /* * Compute the partial checksum, just in case. * This is only ever placed into outbound packets so care needs * to be taken over which pair of addresses are used. */ if (nat->nat_dir == NAT_OUTBOUND) { sum1 = LONG_SUM(ntohl(nat->nat_nsrcaddr)); sum1 += LONG_SUM(ntohl(nat->nat_ndstaddr)); } else { sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr)); sum1 += LONG_SUM(ntohl(nat->nat_odstaddr)); } sum1 += nat->nat_pr[1]; nat->nat_sumd[1] = (sum1 & 0xffff) + (sum1 >> 16); sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr)); sum2 = LONG_SUM(ntohl(nat->nat_nsrcaddr)); CALC_SUMD(sum1, sum2, sumd); nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); sum1 = LONG_SUM(ntohl(nat->nat_odstaddr)); sum2 = LONG_SUM(ntohl(nat->nat_ndstaddr)); CALC_SUMD(sum1, sum2, sumd); nat->nat_ipsumd += (sumd & 0xffff) + (sumd >> 16); nat->nat_v[0] = 4; nat->nat_v[1] = 4; if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) { nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]); } if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) { nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]); } if ((nat->nat_flags & SI_CLONE) == 0) nat->nat_sync = ipf_sync_new(softc, SMC_NAT, fin, nat); if (ipf_nat_insert(softc, softn, nat) == 0) { if (softn->ipf_nat_logging) ipf_nat_log(softc, softn, nat, NL_NEW); fr = nat->nat_fr; if (fr != NULL) { MUTEX_ENTER(&fr->fr_lock); fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } return 0; } NBUMPSIDED(fin->fin_out, ns_unfinalised); /* * nat_insert failed, so cleanup time... */ if (nat->nat_sync != NULL) ipf_sync_del_nat(softc->ipf_sync_soft, nat->nat_sync); return -1; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_insert */ /* Returns: int - 0 == sucess, -1 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* Write Lock: ipf_nat */ /* */ /* Insert a NAT entry into the hash tables for searching and add it to the */ /* list of active NAT entries. Adjust global counters when complete. */ /* ------------------------------------------------------------------------ */ int ipf_nat_insert(softc, softn, nat) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; nat_t *nat; { u_int hv0, hv1; u_int sp, dp; ipnat_t *in; /* * Try and return an error as early as possible, so calculate the hash * entry numbers first and then proceed. */ if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { if ((nat->nat_flags & IPN_TCPUDP) != 0) { sp = nat->nat_osport; dp = nat->nat_odport; } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { sp = 0; dp = nat->nat_oicmpid; } else { sp = 0; dp = 0; } hv0 = NAT_HASH_FN(nat->nat_osrcaddr, sp, 0xffffffff); hv0 = NAT_HASH_FN(nat->nat_odstaddr, hv0 + dp, 0xffffffff); /* * TRACE nat_osrcaddr, nat_osport, nat_odstaddr, * nat_odport, hv0 */ if ((nat->nat_flags & IPN_TCPUDP) != 0) { sp = nat->nat_nsport; dp = nat->nat_ndport; } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { sp = 0; dp = nat->nat_nicmpid; } else { sp = 0; dp = 0; } hv1 = NAT_HASH_FN(nat->nat_nsrcaddr, sp, 0xffffffff); hv1 = NAT_HASH_FN(nat->nat_ndstaddr, hv1 + dp, 0xffffffff); /* * TRACE nat_nsrcaddr, nat_nsport, nat_ndstaddr, * nat_ndport, hv1 */ } else { hv0 = NAT_HASH_FN(nat->nat_osrcaddr, 0, 0xffffffff); hv0 = NAT_HASH_FN(nat->nat_odstaddr, hv0, 0xffffffff); /* TRACE nat_osrcaddr, nat_odstaddr, hv0 */ hv1 = NAT_HASH_FN(nat->nat_nsrcaddr, 0, 0xffffffff); hv1 = NAT_HASH_FN(nat->nat_ndstaddr, hv1, 0xffffffff); /* TRACE nat_nsrcaddr, nat_ndstaddr, hv1 */ } nat->nat_hv[0] = hv0; nat->nat_hv[1] = hv1; MUTEX_INIT(&nat->nat_lock, "nat entry lock"); in = nat->nat_ptr; nat->nat_ref = nat->nat_me ? 2 : 1; nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; nat->nat_ifps[0] = ipf_resolvenic(softc, nat->nat_ifnames[0], 4); if (nat->nat_ifnames[1][0] != '\0') { nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; nat->nat_ifps[1] = ipf_resolvenic(softc, nat->nat_ifnames[1], 4); } else if (in->in_ifnames[1] != -1) { char *name; name = in->in_names + in->in_ifnames[1]; if (name[1] != '\0' && name[0] != '-' && name[0] != '*') { (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], LIFNAMSIZ); nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; nat->nat_ifps[1] = nat->nat_ifps[0]; } } if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) { nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]); } if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) { nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]); } return ipf_nat_hashtab_add(softc, softn, nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hashtab_add */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* */ /* Handle the insertion of a NAT entry into the table/list. */ /* ------------------------------------------------------------------------ */ int ipf_nat_hashtab_add(softc, softn, nat) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; nat_t *nat; { nat_t **natp; u_int hv0; u_int hv1; hv0 = nat->nat_hv[0] % softn->ipf_nat_table_sz; hv1 = nat->nat_hv[1] % softn->ipf_nat_table_sz; if (nat->nat_dir == NAT_INBOUND || nat->nat_dir == NAT_DIVERTIN) { u_int swap; swap = hv0; hv0 = hv1; hv1 = swap; } if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen[hv0] >= softn->ipf_nat_maxbucket) { DT1(ns_bucket_max_0, int, softn->ipf_nat_stats.ns_side[0].ns_bucketlen[hv0]); NBUMPSIDE(0, ns_bucket_max); return -1; } if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen[hv1] >= softn->ipf_nat_maxbucket) { DT1(ns_bucket_max_1, int, softn->ipf_nat_stats.ns_side[1].ns_bucketlen[hv1]); NBUMPSIDE(1, ns_bucket_max); return -1; } /* * The ordering of operations in the list and hash table insertion * is very important. The last operation for each task should be * to update the top of the list, after all the "nexts" have been * done so that walking the list while it is being done does not * find strange pointers. * * Global list of NAT instances */ nat->nat_next = softn->ipf_nat_instances; nat->nat_pnext = &softn->ipf_nat_instances; if (softn->ipf_nat_instances) softn->ipf_nat_instances->nat_pnext = &nat->nat_next; softn->ipf_nat_instances = nat; /* * Inbound hash table. */ natp = &softn->ipf_nat_table[0][hv0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; if (*natp) { (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; } else { NBUMPSIDE(0, ns_inuse); } *natp = nat; NBUMPSIDE(0, ns_bucketlen[hv0]); /* * Outbound hash table. */ natp = &softn->ipf_nat_table[1][hv1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; else { NBUMPSIDE(1, ns_inuse); } *natp = nat; NBUMPSIDE(1, ns_bucketlen[hv1]); ipf_nat_setqueue(softc, softn, nat); if (nat->nat_dir & NAT_OUTBOUND) { NBUMPSIDE(1, ns_added); } else { NBUMPSIDE(0, ns_added); } softn->ipf_nat_stats.ns_active++; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_icmperrorlookup */ /* Returns: nat_t* - point to matching NAT structure */ /* Parameters: fin(I) - pointer to packet information */ /* dir(I) - direction of packet (in/out) */ /* */ /* Check if the ICMP error message is related to an existing TCP, UDP or */ /* ICMP query nat entry. It is assumed that the packet is already of the */ /* the required length. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_icmperrorlookup(fin, dir) fr_info_t *fin; int dir; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; int flags = 0, type, minlen; icmphdr_t *icmp, *orgicmp; nat_stat_side_t *nside; tcphdr_t *tcp = NULL; u_short data[2]; nat_t *nat; ip_t *oip; u_int p; icmp = fin->fin_dp; type = icmp->icmp_type; nside = &softn->ipf_nat_stats.ns_side[fin->fin_out]; /* * Does it at least have the return (basic) IP header ? * Only a basic IP header (no options) should be with an ICMP error * header. Also, if it's not an error type, then return. */ if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) { ATOMIC_INCL(nside->ns_icmp_basic); return NULL; } /* * Check packet size */ oip = (ip_t *)((char *)fin->fin_dp + 8); minlen = IP_HL(oip) << 2; if ((minlen < sizeof(ip_t)) || (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) { ATOMIC_INCL(nside->ns_icmp_size); return NULL; } /* * Is the buffer big enough for all of it ? It's the size of the IP * header claimed in the encapsulated part which is of concern. It * may be too big to be in this buffer but not so big that it's * outside the ICMP packet, leading to TCP deref's causing problems. * This is possible because we don't know how big oip_hl is when we * do the pullup early in ipf_check() and thus can't gaurantee it is * all here now. */ #ifdef ipf_nat_KERNEL { mb_t *m; m = fin->fin_m; # if defined(MENTAT) if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) { ATOMIC_INCL(nside->ns_icmp_mbuf); return NULL; } # else if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)fin->fin_ip + M_LEN(m)) { ATOMIC_INCL(nside->ns_icmp_mbuf); return NULL; } # endif } #endif if (fin->fin_daddr != oip->ip_src.s_addr) { ATOMIC_INCL(nside->ns_icmp_address); return NULL; } p = oip->ip_p; if (p == IPPROTO_TCP) flags = IPN_TCP; else if (p == IPPROTO_UDP) flags = IPN_UDP; else if (p == IPPROTO_ICMP) { orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); /* see if this is related to an ICMP query */ if (ipf_nat_icmpquerytype(orgicmp->icmp_type)) { data[0] = fin->fin_data[0]; data[1] = fin->fin_data[1]; fin->fin_data[0] = 0; fin->fin_data[1] = orgicmp->icmp_id; flags = IPN_ICMPERR|IPN_ICMPQUERY; /* * NOTE : dir refers to the direction of the original * ip packet. By definition the icmp error * message flows in the opposite direction. */ if (dir == NAT_INBOUND) nat = ipf_nat_inlookup(fin, flags, p, oip->ip_dst, oip->ip_src); else nat = ipf_nat_outlookup(fin, flags, p, oip->ip_dst, oip->ip_src); fin->fin_data[0] = data[0]; fin->fin_data[1] = data[1]; return nat; } } if (flags & IPN_TCPUDP) { minlen += 8; /* + 64bits of data to get ports */ /* TRACE (fin,minlen) */ if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) { ATOMIC_INCL(nside->ns_icmp_short); return NULL; } data[0] = fin->fin_data[0]; data[1] = fin->fin_data[1]; tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); fin->fin_data[0] = ntohs(tcp->th_dport); fin->fin_data[1] = ntohs(tcp->th_sport); if (dir == NAT_INBOUND) { nat = ipf_nat_inlookup(fin, flags, p, oip->ip_dst, oip->ip_src); } else { nat = ipf_nat_outlookup(fin, flags, p, oip->ip_dst, oip->ip_src); } fin->fin_data[0] = data[0]; fin->fin_data[1] = data[1]; return nat; } if (dir == NAT_INBOUND) nat = ipf_nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); else nat = ipf_nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_icmperror */ /* Returns: nat_t* - point to matching NAT structure */ /* Parameters: fin(I) - pointer to packet information */ /* nflags(I) - NAT flags for this packet */ /* dir(I) - direction of packet (in/out) */ /* */ /* Fix up an ICMP packet which is an error message for an existing NAT */ /* session. This will correct both packet header data and checksums. */ /* */ /* This should *ONLY* be used for incoming ICMP error packets to make sure */ /* a NAT'd ICMP packet gets correctly recognised. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_icmperror(fin, nflags, dir) fr_info_t *fin; u_int *nflags; int dir; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd, sumd2; struct in_addr a1, a2, a3, a4; int flags, dlen, odst; icmphdr_t *icmp; u_short *csump; tcphdr_t *tcp; nat_t *nat; ip_t *oip; void *dp; if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { NBUMPSIDED(fin->fin_out, ns_icmp_short); return NULL; } /* * ipf_nat_icmperrorlookup() will return NULL for `defective' packets. */ if ((fin->fin_v != 4) || !(nat = ipf_nat_icmperrorlookup(fin, dir))) { NBUMPSIDED(fin->fin_out, ns_icmp_notfound); return NULL; } tcp = NULL; csump = NULL; flags = 0; sumd2 = 0; *nflags = IPN_ICMPERR; icmp = fin->fin_dp; oip = (ip_t *)&icmp->icmp_ip; dp = (((char *)oip) + (IP_HL(oip) << 2)); if (oip->ip_p == IPPROTO_TCP) { tcp = (tcphdr_t *)dp; csump = (u_short *)&tcp->th_sum; flags = IPN_TCP; } else if (oip->ip_p == IPPROTO_UDP) { udphdr_t *udp; udp = (udphdr_t *)dp; tcp = (tcphdr_t *)dp; csump = (u_short *)&udp->uh_sum; flags = IPN_UDP; } else if (oip->ip_p == IPPROTO_ICMP) flags = IPN_ICMPQUERY; dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip); /* * Need to adjust ICMP header to include the real IP#'s and * port #'s. Only apply a checksum change relative to the * IP address change as it will be modified again in ipf_nat_checkout * for both address and port. Two checksum changes are * necessary for the two header address changes. Be careful * to only modify the checksum once for the port # and twice * for the IP#. */ /* * Step 1 * Fix the IP addresses in the offending IP packet. You also need * to adjust the IP header checksum of that offending IP packet. * * Normally, you would expect that the ICMP checksum of the * ICMP error message needs to be adjusted as well for the * IP address change in oip. * However, this is a NOP, because the ICMP checksum is * calculated over the complete ICMP packet, which includes the * changed oip IP addresses and oip->ip_sum. However, these * two changes cancel each other out (if the delta for * the IP address is x, then the delta for ip_sum is minus x), * so no change in the icmp_cksum is necessary. * * Inbound ICMP * ------------ * MAP rule, SRC=a,DST=b -> SRC=c,DST=b * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b) * - OIP_SRC(c)=nat_newsrcip, OIP_DST(b)=nat_newdstip *=> OIP_SRC(c)=nat_oldsrcip, OIP_DST(b)=nat_olddstip * * RDR rule, SRC=a,DST=b -> SRC=a,DST=c * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_olddstip, OIP_DST(a)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * REWRITE out rule, SRC=a,DST=b -> SRC=c,DST=d * - response to outgoing packet (a,b)=>(c,d) (OIP_SRC=c,OIP_DST=d) * - OIP_SRC(c)=nat_newsrcip, OIP_DST(d)=nat_newdstip *=> OIP_SRC(c)=nat_oldsrcip, OIP_DST(d)=nat_olddstip * * REWRITE in rule, SRC=a,DST=b -> SRC=c,DST=d * - response to outgoing packet (d,c)=>(b,a) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_olddstip, OIP_DST(a)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * Outbound ICMP * ------------- * MAP rule, SRC=a,DST=b -> SRC=c,DST=b * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_olddstip, OIP_DST(a)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * RDR rule, SRC=a,DST=b -> SRC=a,DST=c * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c) * - OIP_SRC(a)=nat_newsrcip, OIP_DST(c)=nat_newdstip *=> OIP_SRC(a)=nat_oldsrcip, OIP_DST(c)=nat_olddstip * * REWRITE out rule, SRC=a,DST=b -> SRC=c,DST=d * - response to incoming packet (d,c)=>(b,a) (OIP_SRC=c,OIP_DST=d) * - OIP_SRC(c)=nat_olddstip, OIP_DST(d)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * REWRITE in rule, SRC=a,DST=b -> SRC=c,DST=d * - response to incoming packet (a,b)=>(c,d) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_newsrcip, OIP_DST(a)=nat_newdstip *=> OIP_SRC(a)=nat_oldsrcip, OIP_DST(c)=nat_olddstip */ if (((fin->fin_out == 0) && ((nat->nat_redir & NAT_MAP) != 0)) || ((fin->fin_out == 1) && ((nat->nat_redir & NAT_REDIRECT) != 0))) { a1.s_addr = ntohl(nat->nat_osrcaddr); a4.s_addr = ntohl(oip->ip_src.s_addr); a3.s_addr = ntohl(nat->nat_odstaddr); a2.s_addr = ntohl(oip->ip_dst.s_addr); oip->ip_src.s_addr = htonl(a1.s_addr); oip->ip_dst.s_addr = htonl(a3.s_addr); odst = 1; } else { a1.s_addr = ntohl(nat->nat_ndstaddr); a2.s_addr = ntohl(oip->ip_dst.s_addr); a3.s_addr = ntohl(nat->nat_nsrcaddr); a4.s_addr = ntohl(oip->ip_src.s_addr); oip->ip_dst.s_addr = htonl(a3.s_addr); oip->ip_src.s_addr = htonl(a1.s_addr); odst = 0; } sum1 = 0; sum2 = 0; sumd = 0; CALC_SUMD(a2.s_addr, a3.s_addr, sum1); CALC_SUMD(a4.s_addr, a1.s_addr, sum2); sumd = sum2 + sum1; if (sumd != 0) ipf_fix_datacksum(&oip->ip_sum, sumd); sumd2 = sumd; sum1 = 0; sum2 = 0; /* * Fix UDP pseudo header checksum to compensate for the * IP address change. */ if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) { u_32_t sum3, sum4, sumt; /* * Step 2 : * For offending TCP/UDP IP packets, translate the ports as * well, based on the NAT specification. Of course such * a change may be reflected in the ICMP checksum as well. * * Since the port fields are part of the TCP/UDP checksum * of the offending IP packet, you need to adjust that checksum * as well... except that the change in the port numbers should * be offset by the checksum change. However, the TCP/UDP * checksum will also need to change if there has been an * IP address change. */ if (odst == 1) { sum1 = ntohs(nat->nat_osport); sum4 = ntohs(tcp->th_sport); sum3 = ntohs(nat->nat_odport); sum2 = ntohs(tcp->th_dport); tcp->th_sport = htons(sum1); tcp->th_dport = htons(sum3); } else { sum1 = ntohs(nat->nat_ndport); sum2 = ntohs(tcp->th_dport); sum3 = ntohs(nat->nat_nsport); sum4 = ntohs(tcp->th_sport); tcp->th_dport = htons(sum3); tcp->th_sport = htons(sum1); } CALC_SUMD(sum4, sum1, sumt); sumd += sumt; CALC_SUMD(sum2, sum3, sumt); sumd += sumt; if (sumd != 0 || sumd2 != 0) { /* * At this point, sumd is the delta to apply to the * TCP/UDP header, given the changes in both the IP * address and the ports and sumd2 is the delta to * apply to the ICMP header, given the IP address * change delta that may need to be applied to the * TCP/UDP checksum instead. * * If we will both the IP and TCP/UDP checksums * then the ICMP checksum changes by the address * delta applied to the TCP/UDP checksum. If we * do not change the TCP/UDP checksum them we * apply the delta in ports to the ICMP checksum. */ if (oip->ip_p == IPPROTO_UDP) { if ((dlen >= 8) && (*csump != 0)) { ipf_fix_datacksum(csump, sumd); } else { CALC_SUMD(sum1, sum4, sumd2); CALC_SUMD(sum3, sum2, sumt); sumd2 += sumt; } } else if (oip->ip_p == IPPROTO_TCP) { if (dlen >= 18) { ipf_fix_datacksum(csump, sumd); } else { CALC_SUMD(sum1, sum4, sumd2); CALC_SUMD(sum3, sum2, sumt); sumd2 += sumt; } } if (sumd2 != 0) { sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); ipf_fix_incksum(0, &icmp->icmp_cksum, sumd2, 0); } } } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) { icmphdr_t *orgicmp; /* * XXX - what if this is bogus hl and we go off the end ? * In this case, ipf_nat_icmperrorlookup() will have * returned NULL. */ orgicmp = (icmphdr_t *)dp; if (odst == 1) { if (orgicmp->icmp_id != nat->nat_osport) { /* * Fix ICMP checksum (of the offening ICMP * query packet) to compensate the change * in the ICMP id of the offending ICMP * packet. * * Since you modify orgicmp->icmp_id with * a delta (say x) and you compensate that * in origicmp->icmp_cksum with a delta * minus x, you don't have to adjust the * overall icmp->icmp_cksum */ sum1 = ntohs(orgicmp->icmp_id); sum2 = ntohs(nat->nat_oicmpid); CALC_SUMD(sum1, sum2, sumd); orgicmp->icmp_id = nat->nat_oicmpid; ipf_fix_datacksum(&orgicmp->icmp_cksum, sumd); } } /* nat_dir == NAT_INBOUND is impossible for icmp queries */ } return nat; } /* * MAP-IN MAP-OUT RDR-IN RDR-OUT * osrc X == src == src X * odst X == dst == dst X * nsrc == dst X X == dst * ndst == src X X == src * MAP = NAT_OUTBOUND, RDR = NAT_INBOUND */ /* * NB: these lookups don't lock access to the list, it assumed that it has * already been done! */ /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_inlookup */ /* Returns: nat_t* - NULL == no match, */ /* else pointer to matching NAT entry */ /* Parameters: fin(I) - pointer to packet information */ /* flags(I) - NAT flags for this packet */ /* p(I) - protocol for this packet */ /* src(I) - source IP address */ /* mapdst(I) - destination IP address */ /* */ /* Lookup a nat entry based on the mapped destination ip address/port and */ /* real source address/port. We use this lookup when receiving a packet, */ /* we're looking for a table entry, based on the destination address. */ /* */ /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ /* */ /* NOTE: IT IS ASSUMED THAT IS ONLY HELD WITH A READ LOCK WHEN */ /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ /* */ /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ /* the packet is of said protocol */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_inlookup(fin, flags, p, src, mapdst) fr_info_t *fin; u_int flags, p; struct in_addr src , mapdst; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; grehdr_t *gre; ipnat_t *ipn; u_int sflags; nat_t *nat; int nflags; u_32_t dst; void *ifp; u_int hv, rhv; ifp = fin->fin_ifp; gre = NULL; dst = mapdst.s_addr; sflags = flags & NAT_TCPUDPICMP; switch (p) { case IPPROTO_TCP : case IPPROTO_UDP : sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); break; case IPPROTO_ICMP : sport = 0; dport = fin->fin_data[1]; break; default : sport = 0; dport = 0; break; } if ((flags & SI_WILDP) != 0) goto find_in_wild_ports; rhv = NAT_HASH_FN(dst, dport, 0xffffffff); rhv = NAT_HASH_FN(src.s_addr, rhv + sport, 0xffffffff); hv = rhv % softn->ipf_nat_table_sz; nat = softn->ipf_nat_table[1][hv]; /* TRACE dst, dport, src, sport, hv, nat */ for (; nat; nat = nat->nat_hnext[1]) { if (nat->nat_ifps[0] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) continue; } if (nat->nat_pr[0] != p) continue; switch (nat->nat_dir) { case NAT_INBOUND : case NAT_DIVERTIN : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_osport != sport) continue; if (nat->nat_odport != dport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; case NAT_DIVERTOUT : if (nat->nat_dlocal) continue; case NAT_OUTBOUND : if (nat->nat_v[1] != 4) continue; if (nat->nat_dlocal) continue; if (nat->nat_dlocal) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_ndport != sport) continue; if (nat->nat_nsport != dport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; } if ((nat->nat_flags & IPN_TCPUDP) != 0) { ipn = nat->nat_ptr; if ((ipn != NULL) && (nat->nat_aps != NULL)) if (ipf_proxy_match(fin, nat) != 0) continue; } if ((nat->nat_ifps[0] == NULL) && (ifp != NULL)) { nat->nat_ifps[0] = ifp; nat->nat_mtu[0] = GETIFMTU_4(ifp); } return nat; } /* * So if we didn't find it but there are wildcard members in the hash * table, go back and look for them. We do this search and update here * because it is modifying the NAT table and we want to do this only * for the first packet that matches. The exception, of course, is * for "dummy" (FI_IGNORE) lookups. */ find_in_wild_ports: if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) { NBUMPSIDEX(0, ns_lookup_miss, ns_lookup_miss_0); return NULL; } if (softn->ipf_nat_stats.ns_wilds == 0 || (fin->fin_flx & FI_NOWILD)) { NBUMPSIDEX(0, ns_lookup_nowild, ns_lookup_nowild_0); return NULL; } RWLOCK_EXIT(&softc->ipf_nat); hv = NAT_HASH_FN(dst, 0, 0xffffffff); hv = NAT_HASH_FN(src.s_addr, hv, softn->ipf_nat_table_sz); WRITE_ENTER(&softc->ipf_nat); nat = softn->ipf_nat_table[1][hv]; /* TRACE dst, src, hv, nat */ for (; nat; nat = nat->nat_hnext[1]) { if (nat->nat_ifps[0] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) continue; } if (nat->nat_pr[0] != fin->fin_p) continue; switch (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND)) { case NAT_INBOUND : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst) continue; break; case NAT_OUTBOUND : if (nat->nat_v[1] != 4) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst) continue; break; } nflags = nat->nat_flags; if (!(nflags & (NAT_TCPUDP|SI_WILDP))) continue; if (ipf_nat_wildok(nat, (int)sport, (int)dport, nflags, NAT_INBOUND) == 1) { if ((fin->fin_flx & FI_IGNORE) != 0) break; if ((nflags & SI_CLONE) != 0) { nat = ipf_nat_clone(fin, nat); if (nat == NULL) break; } else { MUTEX_ENTER(&softn->ipf_nat_new); softn->ipf_nat_stats.ns_wilds--; MUTEX_EXIT(&softn->ipf_nat_new); } if (nat->nat_dir == NAT_INBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = sport; nat->nat_nsport = sport; } if (nat->nat_odport == 0) { nat->nat_odport = dport; nat->nat_ndport = dport; } } else if (nat->nat_dir == NAT_OUTBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = dport; nat->nat_nsport = dport; } if (nat->nat_odport == 0) { nat->nat_odport = sport; nat->nat_ndport = sport; } } if ((nat->nat_ifps[0] == NULL) && (ifp != NULL)) { nat->nat_ifps[0] = ifp; nat->nat_mtu[0] = GETIFMTU_4(ifp); } nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); ipf_nat_tabmove(softn, nat); break; } } MUTEX_DOWNGRADE(&softc->ipf_nat); if (nat == NULL) { NBUMPSIDE(0, ns_lookup_miss); } return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_tabmove */ /* Returns: Nil */ /* Parameters: softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* Write Lock: ipf_nat */ /* */ /* This function is only called for TCP/UDP NAT table entries where the */ /* original was placed in the table without hashing on the ports and we now */ /* want to include hashing on port numbers. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_tabmove(softn, nat) ipf_nat_softc_t *softn; nat_t *nat; { u_int hv0, hv1, rhv0, rhv1; natstat_t *nsp; nat_t **natp; if (nat->nat_flags & SI_CLONE) return; nsp = &softn->ipf_nat_stats; /* * Remove the NAT entry from the old location */ if (nat->nat_hnext[0]) nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; *nat->nat_phnext[0] = nat->nat_hnext[0]; nsp->ns_side[0].ns_bucketlen[nat->nat_hv[0] % softn->ipf_nat_table_sz]--; if (nat->nat_hnext[1]) nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; *nat->nat_phnext[1] = nat->nat_hnext[1]; nsp->ns_side[1].ns_bucketlen[nat->nat_hv[1] % softn->ipf_nat_table_sz]--; /* * Add into the NAT table in the new position */ rhv0 = NAT_HASH_FN(nat->nat_osrcaddr, nat->nat_osport, 0xffffffff); rhv0 = NAT_HASH_FN(nat->nat_odstaddr, rhv0 + nat->nat_odport, 0xffffffff); rhv1 = NAT_HASH_FN(nat->nat_nsrcaddr, nat->nat_nsport, 0xffffffff); rhv1 = NAT_HASH_FN(nat->nat_ndstaddr, rhv1 + nat->nat_ndport, 0xffffffff); hv0 = rhv0 % softn->ipf_nat_table_sz; hv1 = rhv1 % softn->ipf_nat_table_sz; if (nat->nat_dir == NAT_INBOUND || nat->nat_dir == NAT_DIVERTIN) { u_int swap; swap = hv0; hv0 = hv1; hv1 = swap; } /* TRACE nat_osrcaddr, nat_osport, nat_odstaddr, nat_odport, hv0 */ /* TRACE nat_nsrcaddr, nat_nsport, nat_ndstaddr, nat_ndport, hv1 */ nat->nat_hv[0] = rhv0; natp = &softn->ipf_nat_table[0][hv0]; if (*natp) (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; nsp->ns_side[0].ns_bucketlen[hv0]++; nat->nat_hv[1] = rhv1; natp = &softn->ipf_nat_table[1][hv1]; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; nsp->ns_side[1].ns_bucketlen[hv1]++; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_outlookup */ /* Returns: nat_t* - NULL == no match, */ /* else pointer to matching NAT entry */ /* Parameters: fin(I) - pointer to packet information */ /* flags(I) - NAT flags for this packet */ /* p(I) - protocol for this packet */ /* src(I) - source IP address */ /* dst(I) - destination IP address */ /* rw(I) - 1 == write lock on held, 0 == read lock. */ /* */ /* Lookup a nat entry based on the source 'real' ip address/port and */ /* destination address/port. We use this lookup when sending a packet out, */ /* we're looking for a table entry, based on the source address. */ /* */ /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ /* */ /* NOTE: IT IS ASSUMED THAT IS ONLY HELD WITH A READ LOCK WHEN */ /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ /* */ /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ /* the packet is of said protocol */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_outlookup(fin, flags, p, src, dst) fr_info_t *fin; u_int flags, p; struct in_addr src , dst; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; u_int sflags; ipnat_t *ipn; nat_t *nat; void *ifp; u_int hv; ifp = fin->fin_ifp; sflags = flags & IPN_TCPUDPICMP; switch (p) { case IPPROTO_TCP : case IPPROTO_UDP : sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); break; case IPPROTO_ICMP : sport = 0; dport = fin->fin_data[1]; break; default : sport = 0; dport = 0; break; } if ((flags & SI_WILDP) != 0) goto find_out_wild_ports; hv = NAT_HASH_FN(src.s_addr, sport, 0xffffffff); hv = NAT_HASH_FN(dst.s_addr, hv + dport, softn->ipf_nat_table_sz); nat = softn->ipf_nat_table[0][hv]; /* TRACE src, sport, dst, dport, hv, nat */ for (; nat; nat = nat->nat_hnext[0]) { if (nat->nat_ifps[1] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) continue; } if (nat->nat_pr[1] != p) continue; switch (nat->nat_dir) { case NAT_INBOUND : case NAT_DIVERTIN : if (nat->nat_v[1] != 4) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst.s_addr) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_ndport != sport) continue; if (nat->nat_nsport != dport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; case NAT_OUTBOUND : case NAT_DIVERTOUT : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst.s_addr) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_odport != dport) continue; if (nat->nat_osport != sport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; } ipn = nat->nat_ptr; if ((ipn != NULL) && (nat->nat_aps != NULL)) if (ipf_proxy_match(fin, nat) != 0) continue; if ((nat->nat_ifps[1] == NULL) && (ifp != NULL)) { nat->nat_ifps[1] = ifp; nat->nat_mtu[1] = GETIFMTU_4(ifp); } return nat; } /* * So if we didn't find it but there are wildcard members in the hash * table, go back and look for them. We do this search and update here * because it is modifying the NAT table and we want to do this only * for the first packet that matches. The exception, of course, is * for "dummy" (FI_IGNORE) lookups. */ find_out_wild_ports: if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) { NBUMPSIDEX(1, ns_lookup_miss, ns_lookup_miss_1); return NULL; } if (softn->ipf_nat_stats.ns_wilds == 0 || (fin->fin_flx & FI_NOWILD)) { NBUMPSIDEX(1, ns_lookup_nowild, ns_lookup_nowild_1); return NULL; } RWLOCK_EXIT(&softc->ipf_nat); hv = NAT_HASH_FN(src.s_addr, 0, 0xffffffff); hv = NAT_HASH_FN(dst.s_addr, hv, softn->ipf_nat_table_sz); WRITE_ENTER(&softc->ipf_nat); nat = softn->ipf_nat_table[0][hv]; for (; nat; nat = nat->nat_hnext[0]) { if (nat->nat_ifps[1] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) continue; } if (nat->nat_pr[1] != fin->fin_p) continue; switch (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND)) { case NAT_INBOUND : if (nat->nat_v[1] != 4) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst.s_addr) continue; break; case NAT_OUTBOUND : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst.s_addr) continue; break; } if (!(nat->nat_flags & (NAT_TCPUDP|SI_WILDP))) continue; if (ipf_nat_wildok(nat, (int)sport, (int)dport, nat->nat_flags, NAT_OUTBOUND) == 1) { if ((fin->fin_flx & FI_IGNORE) != 0) break; if ((nat->nat_flags & SI_CLONE) != 0) { nat = ipf_nat_clone(fin, nat); if (nat == NULL) break; } else { MUTEX_ENTER(&softn->ipf_nat_new); softn->ipf_nat_stats.ns_wilds--; MUTEX_EXIT(&softn->ipf_nat_new); } if (nat->nat_dir == NAT_OUTBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = sport; nat->nat_nsport = sport; } if (nat->nat_odport == 0) { nat->nat_odport = dport; nat->nat_ndport = dport; } } else if (nat->nat_dir == NAT_INBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = dport; nat->nat_nsport = dport; } if (nat->nat_odport == 0) { nat->nat_odport = sport; nat->nat_ndport = sport; } } if ((nat->nat_ifps[1] == NULL) && (ifp != NULL)) { nat->nat_ifps[1] = ifp; nat->nat_mtu[1] = GETIFMTU_4(ifp); } nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); ipf_nat_tabmove(softn, nat); break; } } MUTEX_DOWNGRADE(&softc->ipf_nat); if (nat == NULL) { NBUMPSIDE(1, ns_lookup_miss); } return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_lookupredir */ /* Returns: nat_t* - NULL == no match, */ /* else pointer to matching NAT entry */ /* Parameters: np(I) - pointer to description of packet to find NAT table */ /* entry for. */ /* */ /* Lookup the NAT tables to search for a matching redirect */ /* The contents of natlookup_t should imitate those found in a packet that */ /* would be translated - ie a packet coming in for RDR or going out for MAP.*/ /* We can do the lookup in one of two ways, imitating an inbound or */ /* outbound packet. By default we assume outbound, unless IPN_IN is set. */ /* For IN, the fields are set as follows: */ /* nl_real* = source information */ /* nl_out* = destination information (translated) */ /* For an out packet, the fields are set like this: */ /* nl_in* = source information (untranslated) */ /* nl_out* = destination information (translated) */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_lookupredir(np) natlookup_t *np; { fr_info_t fi; nat_t *nat; bzero((char *)&fi, sizeof(fi)); if (np->nl_flags & IPN_IN) { fi.fin_data[0] = ntohs(np->nl_realport); fi.fin_data[1] = ntohs(np->nl_outport); } else { fi.fin_data[0] = ntohs(np->nl_inport); fi.fin_data[1] = ntohs(np->nl_outport); } if (np->nl_flags & IPN_TCP) fi.fin_p = IPPROTO_TCP; else if (np->nl_flags & IPN_UDP) fi.fin_p = IPPROTO_UDP; else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) fi.fin_p = IPPROTO_ICMP; /* * We can do two sorts of lookups: * - IPN_IN: we have the `real' and `out' address, look for `in'. * - default: we have the `in' and `out' address, look for `real'. */ if (np->nl_flags & IPN_IN) { if ((nat = ipf_nat_inlookup(&fi, np->nl_flags, fi.fin_p, np->nl_realip, np->nl_outip))) { np->nl_inip = nat->nat_odstip; np->nl_inport = nat->nat_odport; } } else { /* * If nl_inip is non null, this is a lookup based on the real * ip address. Else, we use the fake. */ if ((nat = ipf_nat_outlookup(&fi, np->nl_flags, fi.fin_p, np->nl_inip, np->nl_outip))) { if ((np->nl_flags & IPN_FINDFORWARD) != 0) { fr_info_t fin; bzero((char *)&fin, sizeof(fin)); fin.fin_p = nat->nat_pr[0]; fin.fin_data[0] = ntohs(nat->nat_ndport); fin.fin_data[1] = ntohs(nat->nat_nsport); if (ipf_nat_inlookup(&fin, np->nl_flags, fin.fin_p, nat->nat_ndstip, nat->nat_nsrcip) != NULL) { np->nl_flags &= ~IPN_FINDFORWARD; } } np->nl_realip = nat->nat_odstip; np->nl_realport = nat->nat_odport; } } return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_match */ /* Returns: int - 0 == no match, 1 == match */ /* Parameters: fin(I) - pointer to packet information */ /* np(I) - pointer to NAT rule */ /* */ /* Pull the matching of a packet against a NAT rule out of that complex */ /* loop inside ipf_nat_checkin() and lay it out properly in its own function. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_match(fin, np) fr_info_t *fin; ipnat_t *np; { ipf_main_softc_t *softc = fin->fin_main_soft; frtuc_t *ft; int match; match = 0; switch (np->in_osrcatype) { case FRI_NORMAL : match = ((fin->fin_saddr & np->in_osrcmsk) != np->in_osrcaddr); break; case FRI_LOOKUP : match = (*np->in_osrcfunc)(softc, np->in_osrcptr, 4, &fin->fin_saddr, fin->fin_plen); break; } match ^= ((np->in_flags & IPN_NOTSRC) != 0); if (match) return 0; match = 0; switch (np->in_odstatype) { case FRI_NORMAL : match = ((fin->fin_daddr & np->in_odstmsk) != np->in_odstaddr); break; case FRI_LOOKUP : match = (*np->in_odstfunc)(softc, np->in_odstptr, 4, &fin->fin_daddr, fin->fin_plen); break; } match ^= ((np->in_flags & IPN_NOTDST) != 0); if (match) return 0; ft = &np->in_tuc; if (!(fin->fin_flx & FI_TCPUDP) || (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { if (ft->ftu_scmp || ft->ftu_dcmp) return 0; return 1; } return ipf_tcpudpchk(&fin->fin_fi, ft); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_update */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* */ /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ /* called with fin_rev updated - i.e. after calling ipf_nat_proto(). */ /* */ /* This *MUST* be called after ipf_nat_proto() as it expects fin_rev to */ /* already be set. */ /* ------------------------------------------------------------------------ */ void ipf_nat_update(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipftq_t *ifq, *ifq2; ipftqent_t *tqe; ipnat_t *np = nat->nat_ptr; tqe = &nat->nat_tqe; ifq = tqe->tqe_ifq; /* * We allow over-riding of NAT timeouts from NAT rules, even for * TCP, however, if it is TCP and there is no rule timeout set, * then do not update the timeout here. */ if (np != NULL) { np->in_bytes[fin->fin_rev] += fin->fin_plen; ifq2 = np->in_tqehead[fin->fin_rev]; } else { ifq2 = NULL; } if (nat->nat_pr[0] == IPPROTO_TCP && ifq2 == NULL) { (void) ipf_tcp_age(&nat->nat_tqe, fin, softn->ipf_nat_tcptq, 0, 2); } else { if (ifq2 == NULL) { if (nat->nat_pr[0] == IPPROTO_UDP) ifq2 = fin->fin_rev ? &softn->ipf_nat_udpacktq : &softn->ipf_nat_udptq; else if (nat->nat_pr[0] == IPPROTO_ICMP || nat->nat_pr[0] == IPPROTO_ICMPV6) ifq2 = fin->fin_rev ? &softn->ipf_nat_icmpacktq: &softn->ipf_nat_icmptq; else ifq2 = &softn->ipf_nat_iptq; } ipf_movequeue(softc->ipf_ticks, tqe, ifq, ifq2); } } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_checkout */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 0 == no packet translation occurred, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* passp(I) - pointer to filtering result flags */ /* */ /* Check to see if an outcoming packet should be changed. ICMP packets are */ /* first checked to see if they match an existing entry (if an error), */ /* otherwise a search of the current NAT table is made. If neither results */ /* in a match then a search for a matching NAT rule is made. Create a new */ /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ /* packet header(s) as required. */ /* ------------------------------------------------------------------------ */ int ipf_nat_checkout(fin, passp) fr_info_t *fin; u_32_t *passp; { ipnat_t *np = NULL, *npnext; struct ifnet *ifp, *sifp; ipf_main_softc_t *softc; ipf_nat_softc_t *softn; icmphdr_t *icmp = NULL; tcphdr_t *tcp = NULL; int rval, natfailed; u_int nflags = 0; u_32_t ipa, iph; int natadd = 1; frentry_t *fr; nat_t *nat; if (fin->fin_v == 6) { #ifdef USE_INET6 return ipf_nat6_checkout(fin, passp); #else return 0; #endif } softc = fin->fin_main_soft; softn = softc->ipf_nat_soft; if (softn->ipf_nat_lock != 0) return 0; if (softn->ipf_nat_stats.ns_rules == 0 && softn->ipf_nat_instances == NULL) return 0; natfailed = 0; fr = fin->fin_fr; sifp = fin->fin_ifp; if (fr != NULL) { ifp = fr->fr_tifs[fin->fin_rev].fd_ptr; if ((ifp != NULL) && (ifp != (void *)-1)) fin->fin_ifp = ifp; } ifp = fin->fin_ifp; if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { switch (fin->fin_p) { case IPPROTO_TCP : nflags = IPN_TCP; break; case IPPROTO_UDP : nflags = IPN_UDP; break; case IPPROTO_ICMP : icmp = fin->fin_dp; /* * This is an incoming packet, so the destination is * the icmp_id and the source port equals 0 */ if ((fin->fin_flx & FI_ICMPQUERY) != 0) nflags = IPN_ICMPQUERY; break; default : break; } if ((nflags & IPN_TCPUDP)) tcp = fin->fin_dp; } ipa = fin->fin_saddr; READ_ENTER(&softc->ipf_nat); if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && (nat = ipf_nat_icmperror(fin, &nflags, NAT_OUTBOUND))) /*EMPTY*/; else if ((fin->fin_flx & FI_FRAG) && (nat = ipf_frag_natknown(fin))) natadd = 0; else if ((nat = ipf_nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst))) { nflags = nat->nat_flags; } else if (fin->fin_off == 0) { u_32_t hv, msk, nmsk = 0; /* * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ maskloop: msk = softn->ipf_nat_map_active_masks[nmsk]; iph = ipa & msk; hv = NAT_HASH_FN(iph, 0, softn->ipf_nat_maprules_sz); retry_roundrobin: for (np = softn->ipf_nat_map_rules[hv]; np; np = npnext) { npnext = np->in_mnext; if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) continue; if (np->in_v[0] != 4) continue; if (np->in_pr[1] && (np->in_pr[1] != fin->fin_p)) continue; if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) continue; if (np->in_flags & IPN_FILTER) { switch (ipf_nat_match(fin, np)) { case 0 : continue; case -1 : rval = -1; goto outmatchfail; case 1 : default : break; } } else if ((ipa & np->in_osrcmsk) != np->in_osrcaddr) continue; if ((fr != NULL) && !ipf_matchtag(&np->in_tag, &fr->fr_nattag)) continue; if (np->in_plabel != -1) { if (((np->in_flags & IPN_FILTER) == 0) && (np->in_odport != fin->fin_data[1])) continue; if (ipf_proxy_ok(fin, tcp, np) == 0) continue; } if (np->in_flags & IPN_NO) { np->in_hits++; break; } MUTEX_ENTER(&softn->ipf_nat_new); /* * If we've matched a round-robin rule but it has * moved in the list since we got it, start over as * this is now no longer correct. */ if (npnext != np->in_mnext) { if ((np->in_flags & IPN_ROUNDR) != 0) { MUTEX_EXIT(&softn->ipf_nat_new); goto retry_roundrobin; } npnext = np->in_mnext; } nat = ipf_nat_add(fin, np, NULL, nflags, NAT_OUTBOUND); MUTEX_EXIT(&softn->ipf_nat_new); if (nat != NULL) { natfailed = 0; break; } natfailed = -1; } if ((np == NULL) && (nmsk < softn->ipf_nat_map_max)) { nmsk++; goto maskloop; } } if (nat != NULL) { rval = ipf_nat_out(fin, nat, natadd, nflags); if (rval == 1) { MUTEX_ENTER(&nat->nat_lock); ipf_nat_update(fin, nat); nat->nat_bytes[1] += fin->fin_plen; nat->nat_pkts[1]++; fin->fin_pktnum = nat->nat_pkts[1]; MUTEX_EXIT(&nat->nat_lock); } } else rval = natfailed; outmatchfail: RWLOCK_EXIT(&softc->ipf_nat); switch (rval) { case -1 : if (passp != NULL) { DT1(frb_natv4out, fr_info_t *, fin); NBUMPSIDED(1, ns_drop); *passp = FR_BLOCK; fin->fin_reason = FRB_NATV4; } fin->fin_flx |= FI_BADNAT; NBUMPSIDED(1, ns_badnat); break; case 0 : NBUMPSIDE(1, ns_ignored); break; case 1 : NBUMPSIDE(1, ns_translated); break; } fin->fin_ifp = sifp; return rval; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_out */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* natadd(I) - flag indicating if it is safe to add frag cache */ /* nflags(I) - NAT flags set for this packet */ /* */ /* Translate a packet coming "out" on an interface. */ /* ------------------------------------------------------------------------ */ int ipf_nat_out(fin, nat, natadd, nflags) fr_info_t *fin; nat_t *nat; int natadd; u_32_t nflags; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; icmphdr_t *icmp; tcphdr_t *tcp; ipnat_t *np; int skip; int i; tcp = NULL; icmp = NULL; np = nat->nat_ptr; if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL)) (void) ipf_frag_natnew(softc, fin, 0, nat); /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. * This is only done for STREAMS based IP implementations where the * checksum has already been calculated by IP. In all other cases, * IPFilter is called before the checksum needs calculating so there * is no call to modify whatever is in the header now. */ if (nflags == IPN_ICMPERR) { u_32_t s1, s2, sumd, msumd; s1 = LONG_SUM(ntohl(fin->fin_saddr)); if (nat->nat_dir == NAT_OUTBOUND) { s2 = LONG_SUM(ntohl(nat->nat_nsrcaddr)); } else { s2 = LONG_SUM(ntohl(nat->nat_odstaddr)); } CALC_SUMD(s1, s2, sumd); msumd = sumd; s1 = LONG_SUM(ntohl(fin->fin_daddr)); if (nat->nat_dir == NAT_OUTBOUND) { s2 = LONG_SUM(ntohl(nat->nat_ndstaddr)); } else { s2 = LONG_SUM(ntohl(nat->nat_osrcaddr)); } CALC_SUMD(s1, s2, sumd); msumd += sumd; ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, msumd, 0); } #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__) else { /* * Strictly speaking, this isn't necessary on BSD * kernels because they do checksum calculation after * this code has run BUT if ipfilter is being used * to do NAT as a bridge, that code doesn't exist. */ switch (nat->nat_dir) { case NAT_OUTBOUND : ipf_fix_outcksum(fin->fin_cksum & FI_CK_L4PART, &fin->fin_ip->ip_sum, nat->nat_ipsumd, 0); break; case NAT_INBOUND : ipf_fix_incksum(fin->fin_cksum & FI_CK_L4PART, &fin->fin_ip->ip_sum, nat->nat_ipsumd, 0); break; default : break; } } #endif /* * Address assignment is after the checksum modification because * we are using the address in the packet for determining the * correct checksum offset (the ICMP error could be coming from * anyone...) */ switch (nat->nat_dir) { case NAT_OUTBOUND : fin->fin_ip->ip_src = nat->nat_nsrcip; fin->fin_saddr = nat->nat_nsrcaddr; fin->fin_ip->ip_dst = nat->nat_ndstip; fin->fin_daddr = nat->nat_ndstaddr; break; case NAT_INBOUND : fin->fin_ip->ip_src = nat->nat_odstip; fin->fin_saddr = nat->nat_ndstaddr; fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_nsrcaddr; break; case NAT_DIVERTIN : { mb_t *m; skip = ipf_nat_decap(fin, nat); if (skip <= 0) { NBUMPSIDED(1, ns_decap_fail); return -1; } m = fin->fin_m; #if defined(MENTAT) && defined(_KERNEL) m->b_rptr += skip; #else m->m_data += skip; m->m_len -= skip; # ifdef M_PKTHDR if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= skip; # endif #endif MUTEX_ENTER(&nat->nat_lock); ipf_nat_update(fin, nat); MUTEX_EXIT(&nat->nat_lock); fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; return 1; /* NOTREACHED */ } case NAT_DIVERTOUT : { u_32_t s1, s2, sumd; udphdr_t *uh; ip_t *ip; mb_t *m; m = M_DUP(np->in_divmp); if (m == NULL) { NBUMPSIDED(1, ns_divert_dup); return -1; } ip = MTOD(m, ip_t *); ip_fillid(ip); s2 = ntohs(ip->ip_id); s1 = ip->ip_len; ip->ip_len = ntohs(ip->ip_len); ip->ip_len += fin->fin_plen; ip->ip_len = htons(ip->ip_len); s2 += ntohs(ip->ip_len); CALC_SUMD(s1, s2, sumd); uh = (udphdr_t *)(ip + 1); uh->uh_ulen += fin->fin_plen; uh->uh_ulen = htons(uh->uh_ulen); #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif PREP_MB_T(fin, m); fin->fin_src = ip->ip_src; fin->fin_dst = ip->ip_dst; fin->fin_ip = ip; fin->fin_plen += sizeof(ip_t) + 8; /* UDP + IPv4 hdr */ fin->fin_dlen += sizeof(ip_t) + 8; /* UDP + IPv4 hdr */ nflags &= ~IPN_TCPUDPICMP; break; } default : break; } if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { u_short *csump; if ((nat->nat_nsport != 0) && (nflags & IPN_TCPUDP)) { tcp = fin->fin_dp; switch (nat->nat_dir) { case NAT_OUTBOUND : tcp->th_sport = nat->nat_nsport; fin->fin_data[0] = ntohs(nat->nat_nsport); tcp->th_dport = nat->nat_ndport; fin->fin_data[1] = ntohs(nat->nat_ndport); break; case NAT_INBOUND : tcp->th_sport = nat->nat_odport; fin->fin_data[0] = ntohs(nat->nat_odport); tcp->th_dport = nat->nat_osport; fin->fin_data[1] = ntohs(nat->nat_osport); break; } } if ((nat->nat_nsport != 0) && (nflags & IPN_ICMPQUERY)) { icmp = fin->fin_dp; icmp->icmp_id = nat->nat_nicmpid; } csump = ipf_nat_proto(fin, nat, nflags); /* * The above comments do not hold for layer 4 (or higher) * checksums... */ if (csump != NULL) { if (nat->nat_dir == NAT_OUTBOUND) ipf_fix_outcksum(fin->fin_cksum, csump, nat->nat_sumd[0], nat->nat_sumd[1] + fin->fin_dlen); else ipf_fix_incksum(fin->fin_cksum, csump, nat->nat_sumd[0], nat->nat_sumd[1] + fin->fin_dlen); } } ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync); /* ------------------------------------------------------------- */ /* A few quick notes: */ /* Following are test conditions prior to calling the */ /* ipf_proxy_check routine. */ /* */ /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ /* with a redirect rule, we attempt to match the packet's */ /* source port against in_dport, otherwise we'd compare the */ /* packet's destination. */ /* ------------------------------------------------------------- */ if ((np != NULL) && (np->in_apr != NULL)) { i = ipf_proxy_check(fin, nat); if (i == 0) { i = 1; } else if (i == -1) { NBUMPSIDED(1, ns_ipf_proxy_fail); } } else { i = 1; } fin->fin_flx |= FI_NATED; return i; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_checkin */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 0 == no packet translation occurred, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* passp(I) - pointer to filtering result flags */ /* */ /* Check to see if an incoming packet should be changed. ICMP packets are */ /* first checked to see if they match an existing entry (if an error), */ /* otherwise a search of the current NAT table is made. If neither results */ /* in a match then a search for a matching NAT rule is made. Create a new */ /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ /* packet header(s) as required. */ /* ------------------------------------------------------------------------ */ int ipf_nat_checkin(fin, passp) fr_info_t *fin; u_32_t *passp; { ipf_main_softc_t *softc; ipf_nat_softc_t *softn; u_int nflags, natadd; ipnat_t *np, *npnext; int rval, natfailed; struct ifnet *ifp; struct in_addr in; icmphdr_t *icmp; tcphdr_t *tcp; u_short dport; nat_t *nat; u_32_t iph; softc = fin->fin_main_soft; softn = softc->ipf_nat_soft; if (softn->ipf_nat_lock != 0) return 0; if (softn->ipf_nat_stats.ns_rules == 0 && softn->ipf_nat_instances == NULL) return 0; tcp = NULL; icmp = NULL; dport = 0; natadd = 1; nflags = 0; natfailed = 0; ifp = fin->fin_ifp; if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { switch (fin->fin_p) { case IPPROTO_TCP : nflags = IPN_TCP; break; case IPPROTO_UDP : nflags = IPN_UDP; break; case IPPROTO_ICMP : icmp = fin->fin_dp; /* * This is an incoming packet, so the destination is * the icmp_id and the source port equals 0 */ if ((fin->fin_flx & FI_ICMPQUERY) != 0) { nflags = IPN_ICMPQUERY; dport = icmp->icmp_id; } break; default : break; } if ((nflags & IPN_TCPUDP)) { tcp = fin->fin_dp; dport = fin->fin_data[1]; } } in = fin->fin_dst; READ_ENTER(&softc->ipf_nat); if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && (nat = ipf_nat_icmperror(fin, &nflags, NAT_INBOUND))) /*EMPTY*/; else if ((fin->fin_flx & FI_FRAG) && (nat = ipf_frag_natknown(fin))) natadd = 0; else if ((nat = ipf_nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, fin->fin_src, in))) { nflags = nat->nat_flags; } else if (fin->fin_off == 0) { u_32_t hv, msk, rmsk = 0; /* * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ maskloop: msk = softn->ipf_nat_rdr_active_masks[rmsk]; iph = in.s_addr & msk; hv = NAT_HASH_FN(iph, 0, softn->ipf_nat_rdrrules_sz); retry_roundrobin: /* TRACE (iph,msk,rmsk,hv,softn->ipf_nat_rdrrules_sz) */ for (np = softn->ipf_nat_rdr_rules[hv]; np; np = npnext) { npnext = np->in_rnext; if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) continue; if (np->in_v[0] != 4) continue; if (np->in_pr[0] && (np->in_pr[0] != fin->fin_p)) continue; if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) continue; if (np->in_flags & IPN_FILTER) { switch (ipf_nat_match(fin, np)) { case 0 : continue; case -1 : rval = -1; goto inmatchfail; case 1 : default : break; } } else { if ((in.s_addr & np->in_odstmsk) != np->in_odstaddr) continue; if (np->in_odport && ((np->in_dtop < dport) || (dport < np->in_odport))) continue; } if (np->in_plabel != -1) { if (!ipf_proxy_ok(fin, tcp, np)) { continue; } } if (np->in_flags & IPN_NO) { np->in_hits++; break; } MUTEX_ENTER(&softn->ipf_nat_new); /* * If we've matched a round-robin rule but it has * moved in the list since we got it, start over as * this is now no longer correct. */ if (npnext != np->in_rnext) { if ((np->in_flags & IPN_ROUNDR) != 0) { MUTEX_EXIT(&softn->ipf_nat_new); goto retry_roundrobin; } npnext = np->in_rnext; } nat = ipf_nat_add(fin, np, NULL, nflags, NAT_INBOUND); MUTEX_EXIT(&softn->ipf_nat_new); if (nat != NULL) { natfailed = 0; break; } natfailed = -1; } if ((np == NULL) && (rmsk < softn->ipf_nat_rdr_max)) { rmsk++; goto maskloop; } } if (nat != NULL) { rval = ipf_nat_in(fin, nat, natadd, nflags); if (rval == 1) { MUTEX_ENTER(&nat->nat_lock); ipf_nat_update(fin, nat); nat->nat_bytes[0] += fin->fin_plen; nat->nat_pkts[0]++; fin->fin_pktnum = nat->nat_pkts[0]; MUTEX_EXIT(&nat->nat_lock); } } else rval = natfailed; inmatchfail: RWLOCK_EXIT(&softc->ipf_nat); switch (rval) { case -1 : if (passp != NULL) { DT1(frb_natv4in, fr_info_t *, fin); NBUMPSIDED(0, ns_drop); *passp = FR_BLOCK; fin->fin_reason = FRB_NATV4; } fin->fin_flx |= FI_BADNAT; NBUMPSIDED(0, ns_badnat); break; case 0 : NBUMPSIDE(0, ns_ignored); break; case 1 : NBUMPSIDE(0, ns_translated); break; } return rval; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_in */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* natadd(I) - flag indicating if it is safe to add frag cache */ /* nflags(I) - NAT flags set for this packet */ /* Locks Held: ipf_nat(READ) */ /* */ /* Translate a packet coming "in" on an interface. */ /* ------------------------------------------------------------------------ */ int ipf_nat_in(fin, nat, natadd, nflags) fr_info_t *fin; nat_t *nat; int natadd; u_32_t nflags; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sumd, ipsumd, sum1, sum2; icmphdr_t *icmp; tcphdr_t *tcp; ipnat_t *np; int skip; int i; tcp = NULL; np = nat->nat_ptr; fin->fin_fr = nat->nat_fr; if (np != NULL) { if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) (void) ipf_frag_natnew(softc, fin, 0, nat); /* ------------------------------------------------------------- */ /* A few quick notes: */ /* Following are test conditions prior to calling the */ /* ipf_proxy_check routine. */ /* */ /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ /* with a map rule, we attempt to match the packet's */ /* source port against in_dport, otherwise we'd compare the */ /* packet's destination. */ /* ------------------------------------------------------------- */ if (np->in_apr != NULL) { i = ipf_proxy_check(fin, nat); if (i == -1) { NBUMPSIDED(0, ns_ipf_proxy_fail); return -1; } } } ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync); ipsumd = nat->nat_ipsumd; /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. * Why only do this for some platforms on inbound packets ? * Because for those that it is done, IP processing is yet to happen * and so the IPv4 header checksum has not yet been evaluated. * Perhaps it should always be done for the benefit of things like * fast forwarding (so that it doesn't need to be recomputed) but with * header checksum offloading, perhaps it is a moot point. */ switch (nat->nat_dir) { case NAT_INBOUND : if ((fin->fin_flx & FI_ICMPERR) == 0) { fin->fin_ip->ip_src = nat->nat_nsrcip; fin->fin_saddr = nat->nat_nsrcaddr; } else { sum1 = nat->nat_osrcaddr; sum2 = nat->nat_nsrcaddr; CALC_SUMD(sum1, sum2, sumd); ipsumd -= sumd; } fin->fin_ip->ip_dst = nat->nat_ndstip; fin->fin_daddr = nat->nat_ndstaddr; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, ipsumd, 0); #endif break; case NAT_OUTBOUND : if ((fin->fin_flx & FI_ICMPERR) == 0) { fin->fin_ip->ip_src = nat->nat_odstip; fin->fin_saddr = nat->nat_odstaddr; } else { sum1 = nat->nat_odstaddr; sum2 = nat->nat_ndstaddr; CALC_SUMD(sum1, sum2, sumd); ipsumd -= sumd; } fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_incksum(0, &fin->fin_ip->ip_sum, ipsumd, 0); #endif break; case NAT_DIVERTIN : { udphdr_t *uh; ip_t *ip; mb_t *m; m = M_DUP(np->in_divmp); if (m == NULL) { NBUMPSIDED(0, ns_divert_dup); return -1; } ip = MTOD(m, ip_t *); ip_fillid(ip); sum1 = ntohs(ip->ip_len); ip->ip_len = ntohs(ip->ip_len); ip->ip_len += fin->fin_plen; ip->ip_len = htons(ip->ip_len); uh = (udphdr_t *)(ip + 1); uh->uh_ulen += fin->fin_plen; uh->uh_ulen = htons(uh->uh_ulen); sum2 = ntohs(ip->ip_id) + ntohs(ip->ip_len); sum2 += ntohs(ip->ip_off) & IP_DF; CALC_SUMD(sum1, sum2, sumd); #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif PREP_MB_T(fin, m); fin->fin_ip = ip; fin->fin_plen += sizeof(ip_t) + 8; /* UDP + new IPv4 hdr */ fin->fin_dlen += sizeof(ip_t) + 8; /* UDP + old IPv4 hdr */ nflags &= ~IPN_TCPUDPICMP; break; } case NAT_DIVERTOUT : { mb_t *m; skip = ipf_nat_decap(fin, nat); if (skip <= 0) { NBUMPSIDED(0, ns_decap_fail); return -1; } m = fin->fin_m; #if defined(MENTAT) && defined(_KERNEL) m->b_rptr += skip; #else m->m_data += skip; m->m_len -= skip; # ifdef M_PKTHDR if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= skip; # endif #endif ipf_nat_update(fin, nat); nflags &= ~IPN_TCPUDPICMP; fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; return 1; /* NOTREACHED */ } } if (nflags & IPN_TCPUDP) tcp = fin->fin_dp; if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { u_short *csump; if ((nat->nat_odport != 0) && (nflags & IPN_TCPUDP)) { switch (nat->nat_dir) { case NAT_INBOUND : tcp->th_sport = nat->nat_nsport; fin->fin_data[0] = ntohs(nat->nat_nsport); tcp->th_dport = nat->nat_ndport; fin->fin_data[1] = ntohs(nat->nat_ndport); break; case NAT_OUTBOUND : tcp->th_sport = nat->nat_odport; fin->fin_data[0] = ntohs(nat->nat_odport); tcp->th_dport = nat->nat_osport; fin->fin_data[1] = ntohs(nat->nat_osport); break; } } if ((nat->nat_odport != 0) && (nflags & IPN_ICMPQUERY)) { icmp = fin->fin_dp; icmp->icmp_id = nat->nat_nicmpid; } csump = ipf_nat_proto(fin, nat, nflags); /* * The above comments do not hold for layer 4 (or higher) * checksums... */ if (csump != NULL) { if (nat->nat_dir == NAT_OUTBOUND) ipf_fix_incksum(0, csump, nat->nat_sumd[0], 0); else ipf_fix_outcksum(0, csump, nat->nat_sumd[0], 0); } } fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; return 1; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_proto */ /* Returns: u_short* - pointer to transport header checksum to update, */ /* NULL if the transport protocol is not recognised */ /* as needing a checksum update. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* nflags(I) - NAT flags set for this packet */ /* */ /* Return the pointer to the checksum field for each protocol so understood.*/ /* If support for making other changes to a protocol header is required, */ /* that is not strictly 'address' translation, such as clamping the MSS in */ /* TCP down to a specific value, then do it from here. */ /* ------------------------------------------------------------------------ */ u_short * ipf_nat_proto(fin, nat, nflags) fr_info_t *fin; nat_t *nat; u_int nflags; { icmphdr_t *icmp; u_short *csump; tcphdr_t *tcp; udphdr_t *udp; csump = NULL; if (fin->fin_out == 0) { fin->fin_rev = (nat->nat_dir & NAT_OUTBOUND); } else { fin->fin_rev = ((nat->nat_dir & NAT_OUTBOUND) == 0); } switch (fin->fin_p) { case IPPROTO_TCP : tcp = fin->fin_dp; if ((nflags & IPN_TCP) != 0) csump = &tcp->th_sum; /* * Do a MSS CLAMPING on a SYN packet, * only deal IPv4 for now. */ if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) ipf_nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump); break; case IPPROTO_UDP : udp = fin->fin_dp; if ((nflags & IPN_UDP) != 0) { if (udp->uh_sum != 0) csump = &udp->uh_sum; } break; case IPPROTO_ICMP : icmp = fin->fin_dp; if ((nflags & IPN_ICMPQUERY) != 0) { if (icmp->icmp_cksum != 0) csump = &icmp->icmp_cksum; } break; #ifdef USE_INET6 case IPPROTO_ICMPV6 : { struct icmp6_hdr *icmp6 = (struct icmp6_hdr *)fin->fin_dp; icmp6 = fin->fin_dp; if ((nflags & IPN_ICMPQUERY) != 0) { if (icmp6->icmp6_cksum != 0) csump = &icmp6->icmp6_cksum; } break; } #endif } return csump; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_expire */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Check all of the timeout queues for entries at the top which need to be */ /* expired. */ /* ------------------------------------------------------------------------ */ void ipf_nat_expire(softc) ipf_main_softc_t *softc; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; int i; SPL_INT(s); SPL_NET(s); WRITE_ENTER(&softc->ipf_nat); for (ifq = softn->ipf_nat_tcptq, i = 0; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { if (tqe->tqe_die > softc->ipf_ticks) break; tqn = tqe->tqe_next; ipf_nat_delete(softc, tqe->tqe_parent, NL_EXPIRE); } } for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { if (tqe->tqe_die > softc->ipf_ticks) break; tqn = tqe->tqe_next; ipf_nat_delete(softc, tqe->tqe_parent, NL_EXPIRE); } } for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (((ifq->ifq_flags & IFQF_DELETE) != 0) && (ifq->ifq_ref == 0)) { ipf_freetimeoutqueue(softc, ifq); } } if (softn->ipf_nat_doflush != 0) { ipf_nat_extraflush(softc, softn, 2); softn->ipf_nat_doflush = 0; } RWLOCK_EXIT(&softc->ipf_nat); SPL_X(s); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_sync */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* ifp(I) - pointer to network interface */ /* */ /* Walk through all of the currently active NAT sessions, looking for those */ /* which need to have their translated address updated. */ /* ------------------------------------------------------------------------ */ void ipf_nat_sync(softc, ifp) ipf_main_softc_t *softc; void *ifp; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd; i6addr_t in; ipnat_t *n; nat_t *nat; void *ifp2; int idx; SPL_INT(s); if (softc->ipf_running <= 0) return; /* * Change IP addresses for NAT sessions for any protocol except TCP * since it will break the TCP connection anyway. The only rules * which will get changed are those which are "map ... -> 0/32", * where the rule specifies the address is taken from the interface. */ SPL_NET(s); WRITE_ENTER(&softc->ipf_nat); if (softc->ipf_running <= 0) { RWLOCK_EXIT(&softc->ipf_nat); return; } for (nat = softn->ipf_nat_instances; nat; nat = nat->nat_next) { if ((nat->nat_flags & IPN_TCP) != 0) continue; n = nat->nat_ptr; if (n != NULL) { if (n->in_v[1] == 4) { if (n->in_redir & NAT_MAP) { if ((n->in_nsrcaddr != 0) || (n->in_nsrcmsk != 0xffffffff)) continue; } else if (n->in_redir & NAT_REDIRECT) { if ((n->in_ndstaddr != 0) || (n->in_ndstmsk != 0xffffffff)) continue; } } #ifdef USE_INET6 if (n->in_v[1] == 4) { if (n->in_redir & NAT_MAP) { if (!IP6_ISZERO(&n->in_nsrcaddr) || !IP6_ISONES(&n->in_nsrcmsk)) continue; } else if (n->in_redir & NAT_REDIRECT) { if (!IP6_ISZERO(&n->in_ndstaddr) || !IP6_ISONES(&n->in_ndstmsk)) continue; } } #endif } if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) || (ifp == nat->nat_ifps[1]))) { nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], nat->nat_v[0]); if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) { nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]); } if (nat->nat_ifnames[1][0] != '\0') { nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1], nat->nat_v[1]); } else { nat->nat_ifps[1] = nat->nat_ifps[0]; } if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) { nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]); } ifp2 = nat->nat_ifps[0]; if (ifp2 == NULL) continue; /* * Change the map-to address to be the same as the * new one. */ sum1 = NATFSUM(nat, nat->nat_v[1], nat_nsrc6); if (ipf_ifpaddr(softc, nat->nat_v[0], FRI_NORMAL, ifp2, &in, NULL) != -1) { if (nat->nat_v[0] == 4) nat->nat_nsrcip = in.in4; } sum2 = NATFSUM(nat, nat->nat_v[1], nat_nsrc6); if (sum1 == sum2) continue; /* * Readjust the checksum adjustment to take into * account the new IP#. */ CALC_SUMD(sum1, sum2, sumd); /* XXX - dont change for TCP when solaris does * hardware checksumming. */ sumd += nat->nat_sumd[0]; nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); nat->nat_sumd[1] = nat->nat_sumd[0]; } } for (n = softn->ipf_nat_list; (n != NULL); n = n->in_next) { char *base = n->in_names; if ((ifp == NULL) || (n->in_ifps[0] == ifp)) n->in_ifps[0] = ipf_resolvenic(softc, base + n->in_ifnames[0], n->in_v[0]); if ((ifp == NULL) || (n->in_ifps[1] == ifp)) n->in_ifps[1] = ipf_resolvenic(softc, base + n->in_ifnames[1], n->in_v[1]); if (n->in_redir & NAT_REDIRECT) idx = 1; else idx = 0; if (((ifp == NULL) || (n->in_ifps[idx] == ifp)) && (n->in_ifps[idx] != NULL && n->in_ifps[idx] != (void *)-1)) { ipf_nat_nextaddrinit(softc, n->in_names, &n->in_osrc, 0, n->in_ifps[idx]); ipf_nat_nextaddrinit(softc, n->in_names, &n->in_odst, 0, n->in_ifps[idx]); ipf_nat_nextaddrinit(softc, n->in_names, &n->in_nsrc, 0, n->in_ifps[idx]); ipf_nat_nextaddrinit(softc, n->in_names, &n->in_ndst, 0, n->in_ifps[idx]); } } RWLOCK_EXIT(&softc->ipf_nat); SPL_X(s); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_icmpquerytype */ /* Returns: int - 1 == success, 0 == failure */ /* Parameters: icmptype(I) - ICMP type number */ /* */ /* Tests to see if the ICMP type number passed is a query/response type or */ /* not. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_icmpquerytype(icmptype) int icmptype; { /* * For the ICMP query NAT code, it is essential that both the query * and the reply match on the NAT rule. Because the NAT structure * does not keep track of the icmptype, and a single NAT structure * is used for all icmp types with the same src, dest and id, we * simply define the replies as queries as well. The funny thing is, * altough it seems silly to call a reply a query, this is exactly * as it is defined in the IPv4 specification */ switch (icmptype) { case ICMP_ECHOREPLY: case ICMP_ECHO: - /* route advertisement/soliciation is currently unsupported: */ + /* route advertisement/solicitation is currently unsupported: */ /* it would require rewriting the ICMP data section */ case ICMP_TSTAMP: case ICMP_TSTAMPREPLY: case ICMP_IREQ: case ICMP_IREQREPLY: case ICMP_MASKREQ: case ICMP_MASKREPLY: return 1; default: return 0; } } /* ------------------------------------------------------------------------ */ /* Function: nat_log */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* action(I) - action related to NAT structure being performed */ /* */ /* Creates a NAT log entry. */ /* ------------------------------------------------------------------------ */ void ipf_nat_log(softc, softn, nat, action) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; struct nat *nat; u_int action; { #ifdef IPFILTER_LOG # ifndef LARGE_NAT struct ipnat *np; int rulen; # endif struct natlog natl; void *items[1]; size_t sizes[1]; int types[1]; bcopy((char *)&nat->nat_osrc6, (char *)&natl.nl_osrcip, sizeof(natl.nl_osrcip)); bcopy((char *)&nat->nat_nsrc6, (char *)&natl.nl_nsrcip, sizeof(natl.nl_nsrcip)); bcopy((char *)&nat->nat_odst6, (char *)&natl.nl_odstip, sizeof(natl.nl_odstip)); bcopy((char *)&nat->nat_ndst6, (char *)&natl.nl_ndstip, sizeof(natl.nl_ndstip)); natl.nl_bytes[0] = nat->nat_bytes[0]; natl.nl_bytes[1] = nat->nat_bytes[1]; natl.nl_pkts[0] = nat->nat_pkts[0]; natl.nl_pkts[1] = nat->nat_pkts[1]; natl.nl_odstport = nat->nat_odport; natl.nl_osrcport = nat->nat_osport; natl.nl_nsrcport = nat->nat_nsport; natl.nl_ndstport = nat->nat_ndport; natl.nl_p[0] = nat->nat_pr[0]; natl.nl_p[1] = nat->nat_pr[1]; natl.nl_v[0] = nat->nat_v[0]; natl.nl_v[1] = nat->nat_v[1]; natl.nl_type = nat->nat_redir; natl.nl_action = action; natl.nl_rule = -1; bcopy(nat->nat_ifnames[0], natl.nl_ifnames[0], sizeof(nat->nat_ifnames[0])); bcopy(nat->nat_ifnames[1], natl.nl_ifnames[1], sizeof(nat->nat_ifnames[1])); # ifndef LARGE_NAT if (nat->nat_ptr != NULL) { for (rulen = 0, np = softn->ipf_nat_list; np != NULL; np = np->in_next, rulen++) if (np == nat->nat_ptr) { natl.nl_rule = rulen; break; } } # endif items[0] = &natl; sizes[0] = sizeof(natl); types[0] = 0; (void) ipf_log_items(softc, IPL_LOGNAT, NULL, items, sizes, types, 1); #endif } #if defined(__OpenBSD__) /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_ifdetach */ /* Returns: Nil */ /* Parameters: ifp(I) - pointer to network interface */ /* */ /* Compatibility interface for OpenBSD to trigger the correct updating of */ /* interface references within IPFilter. */ /* ------------------------------------------------------------------------ */ void ipf_nat_ifdetach(ifp) void *ifp; { ipf_main_softc_t *softc; softc = ipf_get_softc(0); ipf_sync(ifp); return; } #endif /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rule_deref */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* inp(I) - pointer to pointer to NAT rule */ /* Write Locks: ipf_nat */ /* */ /* Dropping the refernce count for a rule means that whatever held the */ /* pointer to this rule (*inp) is no longer interested in it and when the */ /* reference count drops to zero, any resources allocated for the rule can */ /* be released and the rule itself free'd. */ /* ------------------------------------------------------------------------ */ void ipf_nat_rule_deref(softc, inp) ipf_main_softc_t *softc; ipnat_t **inp; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipnat_t *n; n = *inp; *inp = NULL; n->in_use--; if (n->in_use > 0) return; if (n->in_apr != NULL) ipf_proxy_deref(n->in_apr); ipf_nat_rule_fini(softc, n); if (n->in_redir & NAT_REDIRECT) { if ((n->in_flags & IPN_PROXYRULE) == 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules_rdr); } } if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { if ((n->in_flags & IPN_PROXYRULE) == 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules_map); } } if (n->in_tqehead[0] != NULL) { if (ipf_deletetimeoutqueue(n->in_tqehead[0]) == 0) { ipf_freetimeoutqueue(softc, n->in_tqehead[1]); } } if (n->in_tqehead[1] != NULL) { if (ipf_deletetimeoutqueue(n->in_tqehead[1]) == 0) { ipf_freetimeoutqueue(softc, n->in_tqehead[1]); } } if ((n->in_flags & IPN_PROXYRULE) == 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules); } MUTEX_DESTROY(&n->in_lock); KFREES(n, n->in_size); #if SOLARIS && !defined(INSTANCES) if (softn->ipf_nat_stats.ns_rules == 0) pfil_delayed_copy = 1; #endif } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_deref */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* natp(I) - pointer to pointer to NAT table entry */ /* */ /* Decrement the reference counter for this NAT table entry and free it if */ /* there are no more things using it. */ /* */ /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ /* structure *because* it only gets called on paths _after_ nat_ref has been*/ /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ /* because nat_delete() will do that and send nat_ref to -1. */ /* */ /* Holding the lock on nat_lock is required to serialise nat_delete() being */ /* called from a NAT flush ioctl with a deref happening because of a packet.*/ /* ------------------------------------------------------------------------ */ void ipf_nat_deref(softc, natp) ipf_main_softc_t *softc; nat_t **natp; { nat_t *nat; nat = *natp; *natp = NULL; MUTEX_ENTER(&nat->nat_lock); if (nat->nat_ref > 1) { nat->nat_ref--; ASSERT(nat->nat_ref >= 0); MUTEX_EXIT(&nat->nat_lock); return; } MUTEX_EXIT(&nat->nat_lock); WRITE_ENTER(&softc->ipf_nat); ipf_nat_delete(softc, nat, NL_EXPIRE); RWLOCK_EXIT(&softc->ipf_nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_clone */ /* Returns: ipstate_t* - NULL == cloning failed, */ /* else pointer to new state structure */ /* Parameters: fin(I) - pointer to packet information */ /* is(I) - pointer to master state structure */ /* Write Lock: ipf_nat */ /* */ /* Create a "duplcate" state table entry from the master. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_clone(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; frentry_t *fr; nat_t *clone; ipnat_t *np; KMALLOC(clone, nat_t *); if (clone == NULL) { NBUMPSIDED(fin->fin_out, ns_clone_nomem); return NULL; } bcopy((char *)nat, (char *)clone, sizeof(*clone)); MUTEX_NUKE(&clone->nat_lock); clone->nat_rev = fin->fin_rev; clone->nat_aps = NULL; /* * Initialize all these so that ipf_nat_delete() doesn't cause a crash. */ clone->nat_tqe.tqe_pnext = NULL; clone->nat_tqe.tqe_next = NULL; clone->nat_tqe.tqe_ifq = NULL; clone->nat_tqe.tqe_parent = clone; clone->nat_flags &= ~SI_CLONE; clone->nat_flags |= SI_CLONED; if (clone->nat_hm) clone->nat_hm->hm_ref++; if (ipf_nat_insert(softc, softn, clone) == -1) { KFREE(clone); NBUMPSIDED(fin->fin_out, ns_insert_fail); return NULL; } np = clone->nat_ptr; if (np != NULL) { if (softn->ipf_nat_logging) ipf_nat_log(softc, softn, clone, NL_CLONE); np->in_use++; } fr = clone->nat_fr; if (fr != NULL) { MUTEX_ENTER(&fr->fr_lock); fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } /* * Because the clone is created outside the normal loop of things and * TCP has special needs in terms of state, initialise the timeout * state of the new NAT from here. */ if (clone->nat_pr[0] == IPPROTO_TCP) { (void) ipf_tcp_age(&clone->nat_tqe, fin, softn->ipf_nat_tcptq, clone->nat_flags, 2); } clone->nat_sync = ipf_sync_new(softc, SMC_NAT, fin, clone); if (softn->ipf_nat_logging) ipf_nat_log(softc, softn, clone, NL_CLONE); return clone; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_wildok */ /* Returns: int - 1 == packet's ports match wildcards */ /* 0 == packet's ports don't match wildcards */ /* Parameters: nat(I) - NAT entry */ /* sport(I) - source port */ /* dport(I) - destination port */ /* flags(I) - wildcard flags */ /* dir(I) - packet direction */ /* */ /* Use NAT entry and packet direction to determine which combination of */ /* wildcard flags should be used. */ /* ------------------------------------------------------------------------ */ int ipf_nat_wildok(nat, sport, dport, flags, dir) nat_t *nat; int sport, dport, flags, dir; { /* * When called by dir is set to * nat_inlookup NAT_INBOUND (0) * nat_outlookup NAT_OUTBOUND (1) * * We simply combine the packet's direction in dir with the original * "intended" direction of that NAT entry in nat->nat_dir to decide * which combination of wildcard flags to allow. */ switch ((dir << 1) | (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND))) { case 3: /* outbound packet / outbound entry */ if (((nat->nat_osport == sport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == dport) || (flags & SI_W_DPORT))) return 1; break; case 2: /* outbound packet / inbound entry */ if (((nat->nat_osport == dport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == sport) || (flags & SI_W_DPORT))) return 1; break; case 1: /* inbound packet / outbound entry */ if (((nat->nat_osport == dport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == sport) || (flags & SI_W_DPORT))) return 1; break; case 0: /* inbound packet / inbound entry */ if (((nat->nat_osport == sport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == dport) || (flags & SI_W_DPORT))) return 1; break; default: break; } return(0); } /* ------------------------------------------------------------------------ */ /* Function: nat_mssclamp */ /* Returns: Nil */ /* Parameters: tcp(I) - pointer to TCP header */ /* maxmss(I) - value to clamp the TCP MSS to */ /* fin(I) - pointer to packet information */ /* csump(I) - pointer to TCP checksum */ /* */ /* Check for MSS option and clamp it if necessary. If found and changed, */ /* then the TCP header checksum will be updated to reflect the change in */ /* the MSS. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_mssclamp(tcp, maxmss, fin, csump) tcphdr_t *tcp; u_32_t maxmss; fr_info_t *fin; u_short *csump; { u_char *cp, *ep, opt; int hlen, advance; u_32_t mss, sumd; hlen = TCP_OFF(tcp) << 2; if (hlen > sizeof(*tcp)) { cp = (u_char *)tcp + sizeof(*tcp); ep = (u_char *)tcp + hlen; while (cp < ep) { opt = cp[0]; if (opt == TCPOPT_EOL) break; else if (opt == TCPOPT_NOP) { cp++; continue; } if (cp + 1 >= ep) break; advance = cp[1]; if ((cp + advance > ep) || (advance <= 0)) break; switch (opt) { case TCPOPT_MAXSEG: if (advance != 4) break; mss = cp[2] * 256 + cp[3]; if (mss > maxmss) { cp[2] = maxmss / 256; cp[3] = maxmss & 0xff; CALC_SUMD(mss, maxmss, sumd); ipf_fix_outcksum(0, csump, sumd, 0); } break; default: /* ignore unknown options */ break; } cp += advance; } } } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_setqueue */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I)- pointer to NAT structure */ /* Locks: ipf_nat (read or write) */ /* */ /* Put the NAT entry on its default queue entry, using rev as a helped in */ /* determining which queue it should be placed on. */ /* ------------------------------------------------------------------------ */ void ipf_nat_setqueue(softc, softn, nat) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; nat_t *nat; { ipftq_t *oifq, *nifq; int rev = nat->nat_rev; if (nat->nat_ptr != NULL) nifq = nat->nat_ptr->in_tqehead[rev]; else nifq = NULL; if (nifq == NULL) { switch (nat->nat_pr[0]) { case IPPROTO_UDP : nifq = &softn->ipf_nat_udptq; break; case IPPROTO_ICMP : nifq = &softn->ipf_nat_icmptq; break; case IPPROTO_TCP : nifq = softn->ipf_nat_tcptq + nat->nat_tqe.tqe_state[rev]; break; default : nifq = &softn->ipf_nat_iptq; break; } } oifq = nat->nat_tqe.tqe_ifq; /* * If it's currently on a timeout queue, move it from one queue to * another, else put it on the end of the newly determined queue. */ if (oifq != NULL) ipf_movequeue(softc->ipf_ticks, &nat->nat_tqe, oifq, nifq); else ipf_queueappend(softc->ipf_ticks, &nat->nat_tqe, nifq, nat); return; } /* ------------------------------------------------------------------------ */ /* Function: nat_getnext */ /* Returns: int - 0 == ok, else error */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to ipftoken structure */ /* itp(I) - pointer to ipfgeniter_t structure */ /* */ /* Fetch the next nat/ipnat structure pointer from the linked list and */ /* copy it out to the storage space pointed to by itp_data. The next item */ /* in the list to look at is put back in the ipftoken struture. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_getnext(softc, t, itp, objp) ipf_main_softc_t *softc; ipftoken_t *t; ipfgeniter_t *itp; ipfobj_t *objp; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; hostmap_t *hm, *nexthm = NULL, zerohm; ipnat_t *ipn, *nextipnat = NULL, zeroipn; nat_t *nat, *nextnat = NULL, zeronat; int error = 0; void *nnext; if (itp->igi_nitems != 1) { IPFERROR(60075); return ENOSPC; } READ_ENTER(&softc->ipf_nat); switch (itp->igi_type) { case IPFGENITER_HOSTMAP : hm = t->ipt_data; if (hm == NULL) { nexthm = softn->ipf_hm_maplist; } else { nexthm = hm->hm_next; } if (nexthm != NULL) { ATOMIC_INC32(nexthm->hm_ref); t->ipt_data = nexthm; } else { bzero(&zerohm, sizeof(zerohm)); nexthm = &zerohm; t->ipt_data = NULL; } nnext = nexthm->hm_next; break; case IPFGENITER_IPNAT : ipn = t->ipt_data; if (ipn == NULL) { nextipnat = softn->ipf_nat_list; } else { nextipnat = ipn->in_next; } if (nextipnat != NULL) { ATOMIC_INC32(nextipnat->in_use); t->ipt_data = nextipnat; } else { bzero(&zeroipn, sizeof(zeroipn)); nextipnat = &zeroipn; t->ipt_data = NULL; } nnext = nextipnat->in_next; break; case IPFGENITER_NAT : nat = t->ipt_data; if (nat == NULL) { nextnat = softn->ipf_nat_instances; } else { nextnat = nat->nat_next; } if (nextnat != NULL) { MUTEX_ENTER(&nextnat->nat_lock); nextnat->nat_ref++; MUTEX_EXIT(&nextnat->nat_lock); t->ipt_data = nextnat; } else { bzero(&zeronat, sizeof(zeronat)); nextnat = &zeronat; t->ipt_data = NULL; } nnext = nextnat->nat_next; break; default : RWLOCK_EXIT(&softc->ipf_nat); IPFERROR(60055); return EINVAL; } RWLOCK_EXIT(&softc->ipf_nat); objp->ipfo_ptr = itp->igi_data; switch (itp->igi_type) { case IPFGENITER_HOSTMAP : error = COPYOUT(nexthm, objp->ipfo_ptr, sizeof(*nexthm)); if (error != 0) { IPFERROR(60049); error = EFAULT; } if (hm != NULL) { WRITE_ENTER(&softc->ipf_nat); ipf_nat_hostmapdel(softc, &hm); RWLOCK_EXIT(&softc->ipf_nat); } break; case IPFGENITER_IPNAT : objp->ipfo_size = nextipnat->in_size; objp->ipfo_type = IPFOBJ_IPNAT; error = ipf_outobjk(softc, objp, nextipnat); if (ipn != NULL) { WRITE_ENTER(&softc->ipf_nat); ipf_nat_rule_deref(softc, &ipn); RWLOCK_EXIT(&softc->ipf_nat); } break; case IPFGENITER_NAT : objp->ipfo_size = sizeof(nat_t); objp->ipfo_type = IPFOBJ_NAT; error = ipf_outobjk(softc, objp, nextnat); if (nat != NULL) ipf_nat_deref(softc, &nat); break; } if (nnext == NULL) ipf_token_mark_complete(t); return error; } /* ------------------------------------------------------------------------ */ /* Function: nat_extraflush */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* which(I) - how to flush the active NAT table */ /* Write Locks: ipf_nat */ /* */ /* Flush nat tables. Three actions currently defined: */ /* which == 0 : flush all nat table entries */ /* which == 1 : flush TCP connections which have started to close but are */ /* stuck for some reason. */ /* which == 2 : flush TCP connections which have been idle for a long time, */ /* starting at > 4 days idle and working back in successive half-*/ /* days to at most 12 hours old. If this fails to free enough */ /* slots then work backwards in half hour slots to 30 minutes. */ /* If that too fails, then work backwards in 30 second intervals */ /* for the last 30 minutes to at worst 30 seconds idle. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_extraflush(softc, softn, which) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; int which; { nat_t *nat, **natp; ipftqent_t *tqn; ipftq_t *ifq; int removed; SPL_INT(s); removed = 0; SPL_NET(s); switch (which) { case 0 : softn->ipf_nat_stats.ns_flush_all++; /* * Style 0 flush removes everything... */ for (natp = &softn->ipf_nat_instances; ((nat = *natp) != NULL); ) { ipf_nat_delete(softc, nat, NL_FLUSH); removed++; } break; case 1 : softn->ipf_nat_stats.ns_flush_closing++; /* * Since we're only interested in things that are closing, * we can start with the appropriate timeout queue. */ for (ifq = softn->ipf_nat_tcptq + IPF_TCPS_CLOSE_WAIT; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; tqn != NULL; ) { nat = tqn->tqe_parent; tqn = tqn->tqe_next; if (nat->nat_pr[0] != IPPROTO_TCP || nat->nat_pr[1] != IPPROTO_TCP) break; ipf_nat_delete(softc, nat, NL_EXPIRE); removed++; } } /* * Also need to look through the user defined queues. */ for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; tqn != NULL; ) { nat = tqn->tqe_parent; tqn = tqn->tqe_next; if (nat->nat_pr[0] != IPPROTO_TCP || nat->nat_pr[1] != IPPROTO_TCP) continue; if ((nat->nat_tcpstate[0] > IPF_TCPS_ESTABLISHED) && (nat->nat_tcpstate[1] > IPF_TCPS_ESTABLISHED)) { ipf_nat_delete(softc, nat, NL_EXPIRE); removed++; } } } break; /* * Args 5-11 correspond to flushing those particular states * for TCP connections. */ case IPF_TCPS_CLOSE_WAIT : case IPF_TCPS_FIN_WAIT_1 : case IPF_TCPS_CLOSING : case IPF_TCPS_LAST_ACK : case IPF_TCPS_FIN_WAIT_2 : case IPF_TCPS_TIME_WAIT : case IPF_TCPS_CLOSED : softn->ipf_nat_stats.ns_flush_state++; tqn = softn->ipf_nat_tcptq[which].ifq_head; while (tqn != NULL) { nat = tqn->tqe_parent; tqn = tqn->tqe_next; ipf_nat_delete(softc, nat, NL_FLUSH); removed++; } break; default : if (which < 30) break; softn->ipf_nat_stats.ns_flush_timeout++; /* * Take a large arbitrary number to mean the number of seconds * for which which consider to be the maximum value we'll allow * the expiration to be. */ which = IPF_TTLVAL(which); for (natp = &softn->ipf_nat_instances; ((nat = *natp) != NULL); ) { if (softc->ipf_ticks - nat->nat_touched > which) { ipf_nat_delete(softc, nat, NL_FLUSH); removed++; } else natp = &nat->nat_next; } break; } if (which != 2) { SPL_X(s); return removed; } softn->ipf_nat_stats.ns_flush_queue++; /* * Asked to remove inactive entries because the table is full, try * again, 3 times, if first attempt failed with a different criteria * each time. The order tried in must be in decreasing age. * Another alternative is to implement random drop and drop N entries * at random until N have been freed up. */ if (softc->ipf_ticks - softn->ipf_nat_last_force_flush > IPF_TTLVAL(5)) { softn->ipf_nat_last_force_flush = softc->ipf_ticks; removed = ipf_queueflush(softc, ipf_nat_flush_entry, softn->ipf_nat_tcptq, softn->ipf_nat_utqe, &softn->ipf_nat_stats.ns_active, softn->ipf_nat_table_sz, softn->ipf_nat_table_wm_low); } SPL_X(s); return removed; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_flush_entry */ /* Returns: 0 - always succeeds */ /* Parameters: softc(I) - pointer to soft context main structure */ /* entry(I) - pointer to NAT entry */ /* Write Locks: ipf_nat */ /* */ /* This function is a stepping stone between ipf_queueflush() and */ /* nat_dlete(). It is used so we can provide a uniform interface via the */ /* ipf_queueflush() function. Since the nat_delete() function returns void */ /* we translate that to mean it always succeeds in deleting something. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_flush_entry(softc, entry) ipf_main_softc_t *softc; void *entry; { ipf_nat_delete(softc, entry, NL_FLUSH); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_iterator */ /* Returns: int - 0 == ok, else error */ /* Parameters: softc(I) - pointer to soft context main structure */ /* token(I) - pointer to ipftoken structure */ /* itp(I) - pointer to ipfgeniter_t structure */ /* obj(I) - pointer to data description structure */ /* */ /* This function acts as a handler for the SIOCGENITER ioctls that use a */ /* generic structure to iterate through a list. There are three different */ /* linked lists of NAT related information to go through: NAT rules, active */ /* NAT mappings and the NAT fragment cache. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_iterator(softc, token, itp, obj) ipf_main_softc_t *softc; ipftoken_t *token; ipfgeniter_t *itp; ipfobj_t *obj; { int error; if (itp->igi_data == NULL) { IPFERROR(60052); return EFAULT; } switch (itp->igi_type) { case IPFGENITER_HOSTMAP : case IPFGENITER_IPNAT : case IPFGENITER_NAT : error = ipf_nat_getnext(softc, token, itp, obj); break; case IPFGENITER_NATFRAG : error = ipf_frag_nat_next(softc, token, itp); break; default : IPFERROR(60053); error = EINVAL; break; } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_setpending */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* nat(I) - pointer to NAT structure */ /* Locks: ipf_nat (read or write) */ /* */ /* Put the NAT entry on to the pending queue - this queue has a very short */ /* lifetime where items are put that can't be deleted straight away because */ /* of locking issues but we want to delete them ASAP, anyway. In calling */ /* this function, it is assumed that the owner (if there is one, as shown */ /* by nat_me) is no longer interested in it. */ /* ------------------------------------------------------------------------ */ void ipf_nat_setpending(softc, nat) ipf_main_softc_t *softc; nat_t *nat; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipftq_t *oifq; oifq = nat->nat_tqe.tqe_ifq; if (oifq != NULL) ipf_movequeue(softc->ipf_ticks, &nat->nat_tqe, oifq, &softn->ipf_nat_pending); else ipf_queueappend(softc->ipf_ticks, &nat->nat_tqe, &softn->ipf_nat_pending, nat); if (nat->nat_me != NULL) { *nat->nat_me = NULL; nat->nat_me = NULL; nat->nat_ref--; ASSERT(nat->nat_ref >= 0); } } /* ------------------------------------------------------------------------ */ /* Function: nat_newrewrite */ /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ /* allow rule to be moved if IPN_ROUNDR is set. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* Write Lock: ipf_nat */ /* */ /* This function is responsible for setting up an active NAT session where */ /* we are changing both the source and destination parameters at the same */ /* time. The loop in here works differently to elsewhere - each iteration */ /* is responsible for changing a single parameter that can be incremented. */ /* So one pass may increase the source IP#, next source port, next dest. IP#*/ /* and the last destination port for a total of 4 iterations to try each. */ /* This is done to try and exhaustively use the translation space available.*/ /* ------------------------------------------------------------------------ */ static int ipf_nat_newrewrite(fin, nat, nai) fr_info_t *fin; nat_t *nat; natinfo_t *nai; { int src_search = 1; int dst_search = 1; fr_info_t frnat; u_32_t flags; u_short swap; ipnat_t *np; nat_t *natl; int l = 0; int changed; natl = NULL; changed = -1; np = nai->nai_np; flags = nat->nat_flags; bcopy((char *)fin, (char *)&frnat, sizeof(*fin)); nat->nat_hm = NULL; do { changed = -1; /* TRACE (l, src_search, dst_search, np) */ if ((src_search == 0) && (np->in_spnext == 0) && (dst_search == 0) && (np->in_dpnext == 0)) { if (l > 0) return -1; } /* * Find a new source address */ if (ipf_nat_nextaddr(fin, &np->in_nsrc, &frnat.fin_saddr, &frnat.fin_saddr) == -1) { return -1; } if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0xffffffff)) { src_search = 0; if (np->in_stepnext == 0) np->in_stepnext = 1; } else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0)) { src_search = 0; if (np->in_stepnext == 0) np->in_stepnext = 1; } else if (np->in_nsrcmsk == 0xffffffff) { src_search = 0; if (np->in_stepnext == 0) np->in_stepnext = 1; } else if (np->in_nsrcmsk != 0xffffffff) { if (np->in_stepnext == 0 && changed == -1) { np->in_snip++; np->in_stepnext++; changed = 0; } } if ((flags & IPN_TCPUDPICMP) != 0) { if (np->in_spnext != 0) frnat.fin_data[0] = np->in_spnext; /* * Standard port translation. Select next port. */ if ((flags & IPN_FIXEDSPORT) != 0) { np->in_stepnext = 2; } else if ((np->in_stepnext == 1) && (changed == -1) && (natl != NULL)) { np->in_spnext++; np->in_stepnext++; changed = 1; if (np->in_spnext > np->in_spmax) np->in_spnext = np->in_spmin; } } else { np->in_stepnext = 2; } np->in_stepnext &= 0x3; /* * Find a new destination address */ /* TRACE (fin, np, l, frnat) */ if (ipf_nat_nextaddr(fin, &np->in_ndst, &frnat.fin_daddr, &frnat.fin_daddr) == -1) return -1; if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0xffffffff)) { dst_search = 0; if (np->in_stepnext == 2) np->in_stepnext = 3; } else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0)) { dst_search = 0; if (np->in_stepnext == 2) np->in_stepnext = 3; } else if (np->in_ndstmsk == 0xffffffff) { dst_search = 0; if (np->in_stepnext == 2) np->in_stepnext = 3; } else if (np->in_ndstmsk != 0xffffffff) { if ((np->in_stepnext == 2) && (changed == -1) && (natl != NULL)) { changed = 2; np->in_stepnext++; np->in_dnip++; } } if ((flags & IPN_TCPUDPICMP) != 0) { if (np->in_dpnext != 0) frnat.fin_data[1] = np->in_dpnext; /* * Standard port translation. Select next port. */ if ((flags & IPN_FIXEDDPORT) != 0) { np->in_stepnext = 0; } else if (np->in_stepnext == 3 && changed == -1) { np->in_dpnext++; np->in_stepnext++; changed = 3; if (np->in_dpnext > np->in_dpmax) np->in_dpnext = np->in_dpmin; } } else { if (np->in_stepnext == 3) np->in_stepnext = 0; } /* TRACE (frnat) */ /* * Here we do a lookup of the connection as seen from * the outside. If an IP# pair already exists, try * again. So if you have A->B becomes C->B, you can * also have D->E become C->E but not D->B causing * another C->B. Also take protocol and ports into * account when determining whether a pre-existing * NAT setup will cause an external conflict where * this is appropriate. * * fin_data[] is swapped around because we are doing a * lookup of the packet is if it were moving in the opposite * direction of the one we are working with now. */ if (flags & IPN_TCPUDP) { swap = frnat.fin_data[0]; frnat.fin_data[0] = frnat.fin_data[1]; frnat.fin_data[1] = swap; } if (fin->fin_out == 1) { natl = ipf_nat_inlookup(&frnat, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)frnat.fin_p, frnat.fin_dst, frnat.fin_src); } else { natl = ipf_nat_outlookup(&frnat, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)frnat.fin_p, frnat.fin_dst, frnat.fin_src); } if (flags & IPN_TCPUDP) { swap = frnat.fin_data[0]; frnat.fin_data[0] = frnat.fin_data[1]; frnat.fin_data[1] = swap; } /* TRACE natl, in_stepnext, l */ if ((natl != NULL) && (l > 8)) /* XXX 8 is arbitrary */ return -1; np->in_stepnext &= 0x3; l++; changed = -1; } while (natl != NULL); nat->nat_osrcip = fin->fin_src; nat->nat_odstip = fin->fin_dst; nat->nat_nsrcip = frnat.fin_src; nat->nat_ndstip = frnat.fin_dst; if ((flags & IPN_TCPUDP) != 0) { nat->nat_osport = htons(fin->fin_data[0]); nat->nat_odport = htons(fin->fin_data[1]); nat->nat_nsport = htons(frnat.fin_data[0]); nat->nat_ndport = htons(frnat.fin_data[1]); } else if ((flags & IPN_ICMPQUERY) != 0) { nat->nat_oicmpid = fin->fin_data[1]; nat->nat_nicmpid = frnat.fin_data[1]; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: nat_newdivert */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* Write Lock: ipf_nat */ /* */ /* Create a new NAT divert session as defined by the NAT rule. This is */ /* somewhat different to other NAT session creation routines because we */ /* do not iterate through either port numbers or IP addresses, searching */ /* for a unique mapping, however, a complimentary duplicate check is made. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_newdivert(fin, nat, nai) fr_info_t *fin; nat_t *nat; natinfo_t *nai; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; fr_info_t frnat; ipnat_t *np; nat_t *natl; int p; np = nai->nai_np; bcopy((char *)fin, (char *)&frnat, sizeof(*fin)); nat->nat_pr[0] = 0; nat->nat_osrcaddr = fin->fin_saddr; nat->nat_odstaddr = fin->fin_daddr; frnat.fin_saddr = htonl(np->in_snip); frnat.fin_daddr = htonl(np->in_dnip); if ((nat->nat_flags & IPN_TCPUDP) != 0) { nat->nat_osport = htons(fin->fin_data[0]); nat->nat_odport = htons(fin->fin_data[1]); } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { nat->nat_oicmpid = fin->fin_data[1]; } if (np->in_redir & NAT_DIVERTUDP) { frnat.fin_data[0] = np->in_spnext; frnat.fin_data[1] = np->in_dpnext; frnat.fin_flx |= FI_TCPUDP; p = IPPROTO_UDP; } else { frnat.fin_flx &= ~FI_TCPUDP; p = IPPROTO_IPIP; } if (fin->fin_out == 1) { natl = ipf_nat_inlookup(&frnat, 0, p, frnat.fin_dst, frnat.fin_src); } else { natl = ipf_nat_outlookup(&frnat, 0, p, frnat.fin_dst, frnat.fin_src); } if (natl != NULL) { NBUMPSIDED(fin->fin_out, ns_divert_exist); return -1; } nat->nat_nsrcaddr = frnat.fin_saddr; nat->nat_ndstaddr = frnat.fin_daddr; if ((nat->nat_flags & IPN_TCPUDP) != 0) { nat->nat_nsport = htons(frnat.fin_data[0]); nat->nat_ndport = htons(frnat.fin_data[1]); } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { nat->nat_nicmpid = frnat.fin_data[1]; } nat->nat_pr[fin->fin_out] = fin->fin_p; nat->nat_pr[1 - fin->fin_out] = p; if (np->in_redir & NAT_REDIRECT) nat->nat_dir = NAT_DIVERTIN; else nat->nat_dir = NAT_DIVERTOUT; return 0; } /* ------------------------------------------------------------------------ */ /* Function: nat_builddivertmp */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: softn(I) - pointer to NAT context structure */ /* np(I) - pointer to a NAT rule */ /* */ /* For divert rules, a skeleton packet representing what will be prepended */ /* to the real packet is created. Even though we don't have the full */ /* packet here, a checksum is calculated that we update later when we */ /* fill in the final details. At present a 0 checksum for UDP is being set */ /* here because it is expected that divert will be used for localhost. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_builddivertmp(softn, np) ipf_nat_softc_t *softn; ipnat_t *np; { udphdr_t *uh; size_t len; ip_t *ip; if ((np->in_redir & NAT_DIVERTUDP) != 0) len = sizeof(ip_t) + sizeof(udphdr_t); else len = sizeof(ip_t); ALLOC_MB_T(np->in_divmp, len); if (np->in_divmp == NULL) { NBUMPD(ipf_nat_stats, ns_divert_build); return -1; } /* * First, the header to get the packet diverted to the new destination */ ip = MTOD(np->in_divmp, ip_t *); IP_V_A(ip, 4); IP_HL_A(ip, 5); ip->ip_tos = 0; if ((np->in_redir & NAT_DIVERTUDP) != 0) ip->ip_p = IPPROTO_UDP; else ip->ip_p = IPPROTO_IPIP; ip->ip_ttl = 255; ip->ip_off = 0; ip->ip_sum = 0; ip->ip_len = htons(len); ip->ip_id = 0; ip->ip_src.s_addr = htonl(np->in_snip); ip->ip_dst.s_addr = htonl(np->in_dnip); ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); if (np->in_redir & NAT_DIVERTUDP) { uh = (udphdr_t *)(ip + 1); uh->uh_sum = 0; uh->uh_ulen = 8; uh->uh_sport = htons(np->in_spnext); uh->uh_dport = htons(np->in_dpnext); } return 0; } #define MINDECAP (sizeof(ip_t) + sizeof(udphdr_t) + sizeof(ip_t)) /* ------------------------------------------------------------------------ */ /* Function: nat_decap */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to current NAT session */ /* */ /* This function is responsible for undoing a packet's encapsulation in the */ /* reverse of an encap/divert rule. After removing the outer encapsulation */ /* it is necessary to call ipf_makefrip() again so that the contents of 'fin'*/ /* match the "new" packet as it may still be used by IPFilter elsewhere. */ /* We use "dir" here as the basis for some of the expectations about the */ /* outer header. If we return an error, the goal is to leave the original */ /* packet information undisturbed - this falls short at the end where we'd */ /* need to back a backup copy of "fin" - expensive. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_decap(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; char *hdr; int hlen; int skip; mb_t *m; if ((fin->fin_flx & FI_ICMPERR) != 0) { /* * ICMP packets don't get decapsulated, instead what we need * to do is change the ICMP reply from including (in the data * portion for errors) the encapsulated packet that we sent * out to something that resembles the original packet prior * to encapsulation. This isn't done here - all we're doing * here is changing the outer address to ensure that it gets * targetted back to the correct system. */ if (nat->nat_dir & NAT_OUTBOUND) { u_32_t sum1, sum2, sumd; sum1 = ntohl(fin->fin_daddr); sum2 = ntohl(nat->nat_osrcaddr); CALC_SUMD(sum1, sum2, sumd); fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, sumd, 0); #endif } return 0; } m = fin->fin_m; skip = fin->fin_hlen; switch (nat->nat_dir) { case NAT_DIVERTIN : case NAT_DIVERTOUT : if (fin->fin_plen < MINDECAP) return -1; skip += sizeof(udphdr_t); break; case NAT_ENCAPIN : case NAT_ENCAPOUT : if (fin->fin_plen < (skip + sizeof(ip_t))) return -1; break; default : return -1; /* NOTREACHED */ } /* * The aim here is to keep the original packet details in "fin" for * as long as possible so that returning with an error is for the * original packet and there is little undoing work to do. */ if (M_LEN(m) < skip + sizeof(ip_t)) { if (ipf_pr_pullup(fin, skip + sizeof(ip_t)) == -1) return -1; } hdr = MTOD(fin->fin_m, char *); fin->fin_ip = (ip_t *)(hdr + skip); hlen = IP_HL(fin->fin_ip) << 2; if (ipf_pr_pullup(fin, skip + hlen) == -1) { NBUMPSIDED(fin->fin_out, ns_decap_pullup); return -1; } fin->fin_hlen = hlen; fin->fin_dlen -= skip; fin->fin_plen -= skip; fin->fin_ipoff += skip; if (ipf_makefrip(hlen, (ip_t *)hdr, fin) == -1) { NBUMPSIDED(fin->fin_out, ns_decap_bad); return -1; } return skip; } /* ------------------------------------------------------------------------ */ /* Function: nat_nextaddr */ /* Returns: int - -1 == bad input (no new address), */ /* 0 == success and dst has new address */ /* Parameters: fin(I) - pointer to packet information */ /* na(I) - how to generate new address */ /* old(I) - original address being replaced */ /* dst(O) - where to put the new address */ /* Write Lock: ipf_nat */ /* */ /* This function uses the contents of the "na" structure, in combination */ /* with "old" to produce a new address to store in "dst". Not all of the */ /* possible uses of "na" will result in a new address. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_nextaddr(fin, na, old, dst) fr_info_t *fin; nat_addr_t *na; u_32_t *old, *dst; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t amin, amax, new; i6addr_t newip; int error; new = 0; amin = na->na_addr[0].in4.s_addr; switch (na->na_atype) { case FRI_RANGE : amax = na->na_addr[1].in4.s_addr; break; case FRI_NETMASKED : case FRI_DYNAMIC : case FRI_NORMAL : /* * Compute the maximum address by adding the inverse of the * netmask to the minimum address. */ amax = ~na->na_addr[1].in4.s_addr; amax |= amin; break; case FRI_LOOKUP : break; case FRI_BROADCAST : case FRI_PEERADDR : case FRI_NETWORK : default : return -1; } error = -1; if (na->na_atype == FRI_LOOKUP) { if (na->na_type == IPLT_DSTLIST) { error = ipf_dstlist_select_node(fin, na->na_ptr, dst, NULL); } else { NBUMPSIDE(fin->fin_out, ns_badnextaddr); } } else if (na->na_atype == IPLT_NONE) { /* * 0/0 as the new address means leave it alone. */ if (na->na_addr[0].in4.s_addr == 0 && na->na_addr[1].in4.s_addr == 0) { new = *old; /* * 0/32 means get the interface's address */ } else if (na->na_addr[0].in4.s_addr == 0 && na->na_addr[1].in4.s_addr == 0xffffffff) { if (ipf_ifpaddr(softc, 4, na->na_atype, fin->fin_ifp, &newip, NULL) == -1) { NBUMPSIDED(fin->fin_out, ns_ifpaddrfail); return -1; } new = newip.in4.s_addr; } else { new = htonl(na->na_nextip); } *dst = new; error = 0; } else { NBUMPSIDE(fin->fin_out, ns_badnextaddr); } return error; } /* ------------------------------------------------------------------------ */ /* Function: nat_nextaddrinit */ /* Returns: int - 0 == success, else error number */ /* Parameters: softc(I) - pointer to soft context main structure */ /* na(I) - NAT address information for generating new addr*/ /* initial(I) - flag indicating if it is the first call for */ /* this "na" structure. */ /* ifp(I) - network interface to derive address */ /* information from. */ /* */ /* This function is expected to be called in two scenarious: when a new NAT */ /* rule is loaded into the kernel and when the list of NAT rules is sync'd */ /* up with the valid network interfaces (possibly due to them changing.) */ /* To distinguish between these, the "initial" parameter is used. If it is */ /* 1 then this indicates the rule has just been reloaded and 0 for when we */ /* are updating information. This difference is important because in */ /* instances where we are not updating address information associated with */ /* a network interface, we don't want to disturb what the "next" address to */ /* come out of ipf_nat_nextaddr() will be. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_nextaddrinit(softc, base, na, initial, ifp) ipf_main_softc_t *softc; char *base; nat_addr_t *na; int initial; void *ifp; { switch (na->na_atype) { case FRI_LOOKUP : if (na->na_subtype == 0) { na->na_ptr = ipf_lookup_res_num(softc, IPL_LOGNAT, na->na_type, na->na_num, &na->na_func); } else if (na->na_subtype == 1) { na->na_ptr = ipf_lookup_res_name(softc, IPL_LOGNAT, na->na_type, base + na->na_num, &na->na_func); } if (na->na_func == NULL) { IPFERROR(60060); return ESRCH; } if (na->na_ptr == NULL) { IPFERROR(60056); return ESRCH; } break; case FRI_DYNAMIC : case FRI_BROADCAST : case FRI_NETWORK : case FRI_NETMASKED : case FRI_PEERADDR : if (ifp != NULL) (void )ipf_ifpaddr(softc, 4, na->na_atype, ifp, &na->na_addr[0], &na->na_addr[1]); break; case FRI_SPLIT : case FRI_RANGE : if (initial) na->na_nextip = ntohl(na->na_addr[0].in4.s_addr); break; case FRI_NONE : na->na_addr[0].in4.s_addr &= na->na_addr[1].in4.s_addr; return 0; case FRI_NORMAL : na->na_addr[0].in4.s_addr &= na->na_addr[1].in4.s_addr; break; default : IPFERROR(60054); return EINVAL; } if (initial && (na->na_atype == FRI_NORMAL)) { if (na->na_addr[0].in4.s_addr == 0) { if ((na->na_addr[1].in4.s_addr == 0xffffffff) || (na->na_addr[1].in4.s_addr == 0)) { return 0; } } if (na->na_addr[1].in4.s_addr == 0xffffffff) { na->na_nextip = ntohl(na->na_addr[0].in4.s_addr); } else { na->na_nextip = ntohl(na->na_addr[0].in4.s_addr) + 1; } } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_matchflush */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to current NAT session */ /* */ /* ------------------------------------------------------------------------ */ static int ipf_nat_matchflush(softc, softn, data) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; caddr_t data; { int *array, flushed, error; nat_t *nat, *natnext; ipfobj_t obj; error = ipf_matcharray_load(softc, data, &obj, &array); if (error != 0) return error; flushed = 0; for (nat = softn->ipf_nat_instances; nat != NULL; nat = natnext) { natnext = nat->nat_next; if (ipf_nat_matcharray(nat, array, softc->ipf_ticks) == 0) { ipf_nat_delete(softc, nat, NL_FLUSH); flushed++; } } obj.ipfo_retval = flushed; error = BCOPYOUT(&obj, data, sizeof(obj)); KFREES(array, array[0] * sizeof(*array)); return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_matcharray */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to current NAT session */ /* */ /* ------------------------------------------------------------------------ */ static int ipf_nat_matcharray(nat, array, ticks) nat_t *nat; int *array; u_long ticks; { int i, n, *x, e, p; e = 0; n = array[0]; x = array + 1; for (; n > 0; x += 3 + x[2]) { if (x[0] == IPF_EXP_END) break; e = 0; n -= x[2] + 3; if (n < 0) break; p = x[0] >> 16; if (p != 0 && p != nat->nat_pr[1]) break; switch (x[0]) { case IPF_EXP_IP_PR : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_pr[1] == x[i + 3]); } break; case IPF_EXP_IP_SRCADDR : if (nat->nat_v[0] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_osrcaddr & x[i + 4]) == x[i + 3]); } } if (nat->nat_v[1] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_nsrcaddr & x[i + 4]) == x[i + 3]); } } break; case IPF_EXP_IP_DSTADDR : if (nat->nat_v[0] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_odstaddr & x[i + 4]) == x[i + 3]); } } if (nat->nat_v[1] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_ndstaddr & x[i + 4]) == x[i + 3]); } } break; case IPF_EXP_IP_ADDR : for (i = 0; !e && i < x[2]; i++) { if (nat->nat_v[0] == 4) { e |= ((nat->nat_osrcaddr & x[i + 4]) == x[i + 3]); } if (nat->nat_v[1] == 4) { e |= ((nat->nat_nsrcaddr & x[i + 4]) == x[i + 3]); } if (nat->nat_v[0] == 4) { e |= ((nat->nat_odstaddr & x[i + 4]) == x[i + 3]); } if (nat->nat_v[1] == 4) { e |= ((nat->nat_ndstaddr & x[i + 4]) == x[i + 3]); } } break; #ifdef USE_INET6 case IPF_EXP_IP6_SRCADDR : if (nat->nat_v[0] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_osrc6, x + i + 7, x + i + 3); } } if (nat->nat_v[1] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_nsrc6, x + i + 7, x + i + 3); } } break; case IPF_EXP_IP6_DSTADDR : if (nat->nat_v[0] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_odst6, x + i + 7, x + i + 3); } } if (nat->nat_v[1] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_ndst6, x + i + 7, x + i + 3); } } break; case IPF_EXP_IP6_ADDR : for (i = 0; !e && i < x[3]; i++) { if (nat->nat_v[0] == 6) { e |= IP6_MASKEQ(&nat->nat_osrc6, x + i + 7, x + i + 3); } if (nat->nat_v[0] == 6) { e |= IP6_MASKEQ(&nat->nat_odst6, x + i + 7, x + i + 3); } if (nat->nat_v[1] == 6) { e |= IP6_MASKEQ(&nat->nat_nsrc6, x + i + 7, x + i + 3); } if (nat->nat_v[1] == 6) { e |= IP6_MASKEQ(&nat->nat_ndst6, x + i + 7, x + i + 3); } } break; #endif case IPF_EXP_UDP_PORT : case IPF_EXP_TCP_PORT : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_nsport == x[i + 3]) || (nat->nat_ndport == x[i + 3]); } break; case IPF_EXP_UDP_SPORT : case IPF_EXP_TCP_SPORT : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_nsport == x[i + 3]); } break; case IPF_EXP_UDP_DPORT : case IPF_EXP_TCP_DPORT : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_ndport == x[i + 3]); } break; case IPF_EXP_TCP_STATE : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_tcpstate[0] == x[i + 3]) || (nat->nat_tcpstate[1] == x[i + 3]); } break; case IPF_EXP_IDLE_GT : e |= (ticks - nat->nat_touched > x[3]); break; } e ^= x[1]; if (!e) break; } return e; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_gettable */ /* Returns: int - 0 = success, else error */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* data(I) - pointer to ioctl data */ /* */ /* This function handles ioctl requests for tables of nat information. */ /* At present the only table it deals with is the hash bucket statistics. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_gettable(softc, softn, data) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; char *data; { ipftable_t table; int error; error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE); if (error != 0) return error; switch (table.ita_type) { case IPFTABLE_BUCKETS_NATIN : error = COPYOUT(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, table.ita_table, softn->ipf_nat_table_sz * sizeof(u_int)); break; case IPFTABLE_BUCKETS_NATOUT : error = COPYOUT(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, table.ita_table, softn->ipf_nat_table_sz * sizeof(u_int)); break; default : IPFERROR(60058); return EINVAL; } if (error != 0) { IPFERROR(60059); error = EFAULT; } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_settimeout */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* Apply the timeout change to the NAT timeout queues. */ /* ------------------------------------------------------------------------ */ int ipf_nat_settimeout(softc, t, p) struct ipf_main_softc_s *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; if (!strncmp(t->ipft_name, "tcp_", 4)) return ipf_settimeout_tcp(t, p, softn->ipf_nat_tcptq); if (!strcmp(t->ipft_name, "udp_timeout")) { ipf_apply_timeout(&softn->ipf_nat_udptq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) { ipf_apply_timeout(&softn->ipf_nat_udpacktq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "icmp_timeout")) { ipf_apply_timeout(&softn->ipf_nat_icmptq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) { ipf_apply_timeout(&softn->ipf_nat_icmpacktq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "ip_timeout")) { ipf_apply_timeout(&softn->ipf_nat_iptq, p->ipftu_int); } else { IPFERROR(60062); return ESRCH; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rehash */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* To change the size of the basic NAT table, we need to first allocate the */ /* new tables (lest it fails and we've got nowhere to store all of the NAT */ /* sessions currently active) and then walk through the entire list and */ /* insert them into the table. There are two tables here: an inbound one */ /* and an outbound one. Each NAT entry goes into each table once. */ /* ------------------------------------------------------------------------ */ int ipf_nat_rehash(softc, t, p) ipf_main_softc_t *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; nat_t **newtab[2], *nat, **natp; u_int *bucketlens[2]; u_int maxbucket; u_int newsize; int error; u_int hv; int i; newsize = p->ipftu_int; /* * In case there is nothing to do... */ if (newsize == softn->ipf_nat_table_sz) return 0; newtab[0] = NULL; newtab[1] = NULL; bucketlens[0] = NULL; bucketlens[1] = NULL; /* * 4 tables depend on the NAT table size: the inbound looking table, * the outbound lookup table and the hash chain length for each. */ KMALLOCS(newtab[0], nat_t **, newsize * sizeof(nat_t *)); if (newtab[0] == NULL) { error = 60063; goto badrehash; } KMALLOCS(newtab[1], nat_t **, newsize * sizeof(nat_t *)); if (newtab[1] == NULL) { error = 60064; goto badrehash; } KMALLOCS(bucketlens[0], u_int *, newsize * sizeof(u_int)); if (bucketlens[0] == NULL) { error = 60065; goto badrehash; } KMALLOCS(bucketlens[1], u_int *, newsize * sizeof(u_int)); if (bucketlens[1] == NULL) { error = 60066; goto badrehash; } /* * Recalculate the maximum length based on the new size. */ for (maxbucket = 0, i = newsize; i > 0; i >>= 1) maxbucket++; maxbucket *= 2; bzero((char *)newtab[0], newsize * sizeof(nat_t *)); bzero((char *)newtab[1], newsize * sizeof(nat_t *)); bzero((char *)bucketlens[0], newsize * sizeof(u_int)); bzero((char *)bucketlens[1], newsize * sizeof(u_int)); WRITE_ENTER(&softc->ipf_nat); if (softn->ipf_nat_table[0] != NULL) { KFREES(softn->ipf_nat_table[0], softn->ipf_nat_table_sz * sizeof(*softn->ipf_nat_table[0])); } softn->ipf_nat_table[0] = newtab[0]; if (softn->ipf_nat_table[1] != NULL) { KFREES(softn->ipf_nat_table[1], softn->ipf_nat_table_sz * sizeof(*softn->ipf_nat_table[1])); } softn->ipf_nat_table[1] = newtab[1]; if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side[0].ns_bucketlen = bucketlens[0]; if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side[1].ns_bucketlen = bucketlens[1]; #ifdef USE_INET6 if (softn->ipf_nat_stats.ns_side6[0].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side6[0].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side6[0].ns_bucketlen = bucketlens[0]; if (softn->ipf_nat_stats.ns_side6[1].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side6[1].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side6[1].ns_bucketlen = bucketlens[1]; #endif softn->ipf_nat_maxbucket = maxbucket; softn->ipf_nat_table_sz = newsize; /* * Walk through the entire list of NAT table entries and put them * in the new NAT table, somewhere. Because we have a new table, * we need to restart the counter of how many chains are in use. */ softn->ipf_nat_stats.ns_side[0].ns_inuse = 0; softn->ipf_nat_stats.ns_side[1].ns_inuse = 0; #ifdef USE_INET6 softn->ipf_nat_stats.ns_side6[0].ns_inuse = 0; softn->ipf_nat_stats.ns_side6[1].ns_inuse = 0; #endif for (nat = softn->ipf_nat_instances; nat != NULL; nat = nat->nat_next) { nat->nat_hnext[0] = NULL; nat->nat_phnext[0] = NULL; hv = nat->nat_hv[0] % softn->ipf_nat_table_sz; natp = &softn->ipf_nat_table[0][hv]; if (*natp) { (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; } else { NBUMPSIDE(0, ns_inuse); } nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; NBUMPSIDE(0, ns_bucketlen[hv]); nat->nat_hnext[1] = NULL; nat->nat_phnext[1] = NULL; hv = nat->nat_hv[1] % softn->ipf_nat_table_sz; natp = &softn->ipf_nat_table[1][hv]; if (*natp) { (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; } else { NBUMPSIDE(1, ns_inuse); } nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; NBUMPSIDE(1, ns_bucketlen[hv]); } RWLOCK_EXIT(&softc->ipf_nat); return 0; badrehash: if (bucketlens[1] != NULL) { KFREES(bucketlens[0], newsize * sizeof(u_int)); } if (bucketlens[0] != NULL) { KFREES(bucketlens[0], newsize * sizeof(u_int)); } if (newtab[0] != NULL) { KFREES(newtab[0], newsize * sizeof(nat_t *)); } if (newtab[1] != NULL) { KFREES(newtab[1], newsize * sizeof(nat_t *)); } IPFERROR(error); return ENOMEM; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rehash_rules */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* All of the NAT rules hang off of a hash table that is searched with a */ /* hash on address after the netmask is applied. There is a different table*/ /* for both inbound rules (rdr) and outbound (map.) The resizing will only */ /* affect one of these two tables. */ /* ------------------------------------------------------------------------ */ int ipf_nat_rehash_rules(softc, t, p) ipf_main_softc_t *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipnat_t **newtab, *np, ***old, **npp; u_int newsize; u_int mask; u_int hv; newsize = p->ipftu_int; /* * In case there is nothing to do... */ if (newsize == *t->ipft_pint) return 0; /* * All inbound rules have the NAT_REDIRECT bit set in in_redir and * all outbound rules have either NAT_MAP or MAT_MAPBLK set. * This if statement allows for some more generic code to be below, * rather than two huge gobs of code that almost do the same thing. */ if (t->ipft_pint == &softn->ipf_nat_rdrrules_sz) { old = &softn->ipf_nat_rdr_rules; mask = NAT_REDIRECT; } else { old = &softn->ipf_nat_map_rules; mask = NAT_MAP|NAT_MAPBLK; } KMALLOCS(newtab, ipnat_t **, newsize * sizeof(ipnat_t *)); if (newtab == NULL) { IPFERROR(60067); return ENOMEM; } bzero((char *)newtab, newsize * sizeof(ipnat_t *)); WRITE_ENTER(&softc->ipf_nat); if (*old != NULL) { KFREES(*old, *t->ipft_pint * sizeof(ipnat_t **)); } *old = newtab; *t->ipft_pint = newsize; for (np = softn->ipf_nat_list; np != NULL; np = np->in_next) { if ((np->in_redir & mask) == 0) continue; if (np->in_redir & NAT_REDIRECT) { np->in_rnext = NULL; hv = np->in_hv[0] % newsize; for (npp = newtab + hv; *npp != NULL; ) npp = &(*npp)->in_rnext; np->in_prnext = npp; *npp = np; } if (np->in_redir & NAT_MAP) { np->in_mnext = NULL; hv = np->in_hv[1] % newsize; for (npp = newtab + hv; *npp != NULL; ) npp = &(*npp)->in_mnext; np->in_pmnext = npp; *npp = np; } } RWLOCK_EXIT(&softc->ipf_nat); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hostmap_rehash */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* Allocate and populate a new hash table that will contain a reference to */ /* all of the active IP# translations currently in place. */ /* ------------------------------------------------------------------------ */ int ipf_nat_hostmap_rehash(softc, t, p) ipf_main_softc_t *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; hostmap_t *hm, **newtab; u_int newsize; u_int hv; newsize = p->ipftu_int; /* * In case there is nothing to do... */ if (newsize == *t->ipft_pint) return 0; KMALLOCS(newtab, hostmap_t **, newsize * sizeof(hostmap_t *)); if (newtab == NULL) { IPFERROR(60068); return ENOMEM; } bzero((char *)newtab, newsize * sizeof(hostmap_t *)); WRITE_ENTER(&softc->ipf_nat); if (softn->ipf_hm_maptable != NULL) { KFREES(softn->ipf_hm_maptable, softn->ipf_nat_hostmap_sz * sizeof(hostmap_t *)); } softn->ipf_hm_maptable = newtab; softn->ipf_nat_hostmap_sz = newsize; for (hm = softn->ipf_hm_maplist; hm != NULL; hm = hm->hm_next) { hv = hm->hm_hv % softn->ipf_nat_hostmap_sz; hm->hm_hnext = softn->ipf_hm_maptable[hv]; hm->hm_phnext = softn->ipf_hm_maptable + hv; if (softn->ipf_hm_maptable[hv] != NULL) softn->ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext; softn->ipf_hm_maptable[hv] = hm; } RWLOCK_EXIT(&softc->ipf_nat); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_add_tq */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* ------------------------------------------------------------------------ */ ipftq_t * ipf_nat_add_tq(softc, ttl) ipf_main_softc_t *softc; int ttl; { ipf_nat_softc_t *softs = softc->ipf_nat_soft; return ipf_addtimeoutqueue(softc, &softs->ipf_nat_utqe, ttl); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_uncreate */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* */ /* This function is used to remove a NAT entry from the NAT table when we */ /* decide that the create was actually in error. It is thus assumed that */ /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */ /* with the translated packet (not the original), we have to reverse the */ /* lookup. Although doing the lookup is expensive (relatively speaking), it */ /* is not anticipated that this will be a frequent occurance for normal */ /* traffic patterns. */ /* ------------------------------------------------------------------------ */ void ipf_nat_uncreate(fin) fr_info_t *fin; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; int nflags; nat_t *nat; switch (fin->fin_p) { case IPPROTO_TCP : nflags = IPN_TCP; break; case IPPROTO_UDP : nflags = IPN_UDP; break; default : nflags = 0; break; } WRITE_ENTER(&softc->ipf_nat); if (fin->fin_out == 0) { nat = ipf_nat_outlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_dst, fin->fin_src); } else { nat = ipf_nat_inlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst); } if (nat != NULL) { NBUMPSIDE(fin->fin_out, ns_uncreate[0]); ipf_nat_delete(softc, nat, NL_DESTROY); } else { NBUMPSIDE(fin->fin_out, ns_uncreate[1]); } RWLOCK_EXIT(&softc->ipf_nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_cmp_rules */ /* Returns: int - 0 == success, else rules do not match. */ /* Parameters: n1(I) - first rule to compare */ /* n2(I) - first rule to compare */ /* */ /* Compare two rules using pointers to each rule. A straight bcmp will not */ /* work as some fields (such as in_dst, in_pkts) actually do change once */ /* the rule has been loaded into the kernel. Whilst this function returns */ /* various non-zero returns, they're strictly to aid in debugging. Use of */ /* this function should simply care if the result is zero or not. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_cmp_rules(n1, n2) ipnat_t *n1, *n2; { if (n1->in_size != n2->in_size) return 1; if (bcmp((char *)&n1->in_v, (char *)&n2->in_v, offsetof(ipnat_t, in_ndst) - offsetof(ipnat_t, in_v)) != 0) return 2; if (bcmp((char *)&n1->in_tuc, (char *)&n2->in_tuc, n1->in_size - offsetof(ipnat_t, in_tuc)) != 0) return 3; if (n1->in_ndst.na_atype != n2->in_ndst.na_atype) return 5; if (n1->in_ndst.na_function != n2->in_ndst.na_function) return 6; if (bcmp((char *)&n1->in_ndst.na_addr, (char *)&n2->in_ndst.na_addr, sizeof(n1->in_ndst.na_addr))) return 7; if (n1->in_nsrc.na_atype != n2->in_nsrc.na_atype) return 8; if (n1->in_nsrc.na_function != n2->in_nsrc.na_function) return 9; if (bcmp((char *)&n1->in_nsrc.na_addr, (char *)&n2->in_nsrc.na_addr, sizeof(n1->in_nsrc.na_addr))) return 10; if (n1->in_odst.na_atype != n2->in_odst.na_atype) return 11; if (n1->in_odst.na_function != n2->in_odst.na_function) return 12; if (bcmp((char *)&n1->in_odst.na_addr, (char *)&n2->in_odst.na_addr, sizeof(n1->in_odst.na_addr))) return 13; if (n1->in_osrc.na_atype != n2->in_osrc.na_atype) return 14; if (n1->in_osrc.na_function != n2->in_osrc.na_function) return 15; if (bcmp((char *)&n1->in_osrc.na_addr, (char *)&n2->in_osrc.na_addr, sizeof(n1->in_osrc.na_addr))) return 16; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rule_init */ /* Returns: int - 0 == success, else rules do not match. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - first rule to compare */ /* */ /* ------------------------------------------------------------------------ */ static int ipf_nat_rule_init(softc, softn, n) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; { int error = 0; if ((n->in_flags & IPN_SIPRANGE) != 0) n->in_nsrcatype = FRI_RANGE; if ((n->in_flags & IPN_DIPRANGE) != 0) n->in_ndstatype = FRI_RANGE; if ((n->in_flags & IPN_SPLIT) != 0) n->in_ndstatype = FRI_SPLIT; if ((n->in_redir & (NAT_MAP|NAT_REWRITE|NAT_DIVERTUDP)) != 0) n->in_spnext = n->in_spmin; if ((n->in_redir & (NAT_REWRITE|NAT_DIVERTUDP)) != 0) { n->in_dpnext = n->in_dpmin; } else if (n->in_redir == NAT_REDIRECT) { n->in_dpnext = n->in_dpmin; } n->in_stepnext = 0; switch (n->in_v[0]) { case 4 : error = ipf_nat_ruleaddrinit(softc, softn, n); if (error != 0) return error; break; #ifdef USE_INET6 case 6 : error = ipf_nat6_ruleaddrinit(softc, softn, n); if (error != 0) return error; break; #endif default : break; } if (n->in_redir == (NAT_DIVERTUDP|NAT_MAP)) { /* * Prerecord whether or not the destination of the divert * is local or not to the interface the packet is going * to be sent out. */ n->in_dlocal = ipf_deliverlocal(softc, n->in_v[1], n->in_ifps[1], &n->in_ndstip6); } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rule_fini */ /* Returns: int - 0 == success, else rules do not match. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* n(I) - rule to work on */ /* */ /* This function is used to release any objects that were referenced during */ /* the rule initialisation. This is useful both when free'ing the rule and */ /* when handling ioctls that need to initialise these fields but not */ /* actually use them after the ioctl processing has finished. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_rule_fini(softc, n) ipf_main_softc_t *softc; ipnat_t *n; { if (n->in_odst.na_atype == FRI_LOOKUP && n->in_odst.na_ptr != NULL) ipf_lookup_deref(softc, n->in_odst.na_type, n->in_odst.na_ptr); if (n->in_osrc.na_atype == FRI_LOOKUP && n->in_osrc.na_ptr != NULL) ipf_lookup_deref(softc, n->in_osrc.na_type, n->in_osrc.na_ptr); if (n->in_ndst.na_atype == FRI_LOOKUP && n->in_ndst.na_ptr != NULL) ipf_lookup_deref(softc, n->in_ndst.na_type, n->in_ndst.na_ptr); if (n->in_nsrc.na_atype == FRI_LOOKUP && n->in_nsrc.na_ptr != NULL) ipf_lookup_deref(softc, n->in_nsrc.na_type, n->in_nsrc.na_ptr); if (n->in_divmp != NULL) FREE_MB_T(n->in_divmp); } Index: projects/runtime-coverage/sys/contrib/ipfilter =================================================================== --- projects/runtime-coverage/sys/contrib/ipfilter (revision 323974) +++ projects/runtime-coverage/sys/contrib/ipfilter (revision 323975) Property changes on: projects/runtime-coverage/sys/contrib/ipfilter ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/contrib/ipfilter:r323957-323974 Index: projects/runtime-coverage/sys/dev/mpr/mpr.c =================================================================== --- projects/runtime-coverage/sys/dev/mpr/mpr.c (revision 323974) +++ projects/runtime-coverage/sys/dev/mpr/mpr.c (revision 323975) @@ -1,3693 +1,3799 @@ /*- * Copyright (c) 2009 Yahoo! Inc. * Copyright (c) 2011-2015 LSI Corp. * Copyright (c) 2013-2016 Avago Technologies * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Avago Technologies (LSI) MPT-Fusion Host Adapter FreeBSD * */ #include __FBSDID("$FreeBSD$"); /* Communications core for Avago Technologies (LSI) MPT3 */ /* TODO Move headers to mprvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mpr_diag_reset(struct mpr_softc *sc, int sleep_flag); static int mpr_init_queues(struct mpr_softc *sc); static void mpr_resize_queues(struct mpr_softc *sc); static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag); static int mpr_transition_operational(struct mpr_softc *sc); static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching); static void mpr_iocfacts_free(struct mpr_softc *sc); static void mpr_startup(void *arg); static int mpr_send_iocinit(struct mpr_softc *sc); static int mpr_alloc_queues(struct mpr_softc *sc); static int mpr_alloc_hw_queues(struct mpr_softc *sc); static int mpr_alloc_replies(struct mpr_softc *sc); static int mpr_alloc_requests(struct mpr_softc *sc); static int mpr_alloc_nvme_prp_pages(struct mpr_softc *sc); static int mpr_attach_log(struct mpr_softc *sc); static __inline void mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm); static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply); static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm); static void mpr_periodic(void *); static int mpr_reregister_events(struct mpr_softc *sc); static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm); static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts); static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag); +static int mpr_debug_sysctl(SYSCTL_HANDLER_ARGS); +static void mpr_parse_debug(struct mpr_softc *sc, char *list); + SYSCTL_NODE(_hw, OID_AUTO, mpr, CTLFLAG_RD, 0, "MPR Driver Parameters"); MALLOC_DEFINE(M_MPR, "mpr", "mpr driver memory"); /* * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of * any state and back to its initialization state machine. */ static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; /* * Added this union to smoothly convert le64toh cm->cm_desc.Words. * Compiler only supports uint64_t to be passed as an argument. * Otherwise it will throw this error: * "aggregate value used where an integer was expected" */ typedef union _reply_descriptor { u64 word; struct { u32 low; u32 high; } u; } reply_descriptor, request_descriptor; /* Rate limit chain-fail messages to 1 per minute */ static struct timeval mpr_chainfail_interval = { 60, 0 }; /* * sleep_flag can be either CAN_SLEEP or NO_SLEEP. * If this function is called from process context, it can sleep * and there is no harm to sleep, in case if this fuction is called * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. * based on sleep flags driver will call either msleep, pause or DELAY. * msleep and pause are of same variant, but pause is used when mpr_mtx * is not hold by driver. */ static int mpr_diag_reset(struct mpr_softc *sc,int sleep_flag) { uint32_t reg; int i, error, tries = 0; uint8_t first_wait_done = FALSE; mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); /* Clear any pending interrupts */ mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* * Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. */ #if __FreeBSD_version >= 1000029 if (curthread->td_no_sleeping) #else //__FreeBSD_version < 1000029 if (curthread->td_pflags & TDP_NOSLEEPING) #endif //__FreeBSD_version >= 1000029 sleep_flag = NO_SLEEP; mpr_dprint(sc, MPR_INIT, "sequence start, sleep_flag=%d\n", sleep_flag); /* Push the magic sequence */ error = ETIMEDOUT; while (tries++ < 20) { for (i = 0; i < sizeof(mpt2_reset_magic); i++) mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, mpt2_reset_magic[i]); /* wait 100 msec */ if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdiag", hz/10); else if (sleep_flag == CAN_SLEEP) pause("mprdiag", hz/10); else DELAY(100 * 1000); reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { error = 0; break; } } if (error) { mpr_dprint(sc, MPR_INIT, "sequence failed, error=%d, exit\n", error); return (error); } /* Send the actual reset. XXX need to refresh the reg? */ reg |= MPI2_DIAG_RESET_ADAPTER; mpr_dprint(sc, MPR_INIT, "sequence success, sending reset, reg= 0x%x\n", reg); mpr_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, reg); /* Wait up to 300 seconds in 50ms intervals */ error = ETIMEDOUT; for (i = 0; i < 6000; i++) { /* * Wait 50 msec. If this is the first time through, wait 256 * msec to satisfy Diag Reset timing requirements. */ if (first_wait_done) { if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdiag", hz/20); else if (sleep_flag == CAN_SLEEP) pause("mprdiag", hz/20); else DELAY(50 * 1000); } else { DELAY(256 * 1000); first_wait_done = TRUE; } /* * Check for the RESET_ADAPTER bit to be cleared first, then * wait for the RESET state to be cleared, which takes a little * longer. */ reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_RESET_ADAPTER) { continue; } reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { error = 0; break; } } if (error) { mpr_dprint(sc, MPR_INIT, "reset failed, error= %d, exit\n", error); return (error); } mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); mpr_dprint(sc, MPR_INIT, "diag reset success, exit\n"); return (0); } static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag) { int error; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); error = 0; mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << MPI2_DOORBELL_FUNCTION_SHIFT); if (mpr_wait_db_ack(sc, 5, sleep_flag) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Doorbell handshake failed\n"); error = ETIMEDOUT; } mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } static int mpr_transition_ready(struct mpr_softc *sc) { uint32_t reg, state; int error, tries = 0; int sleep_flags; MPR_FUNCTRACE(sc); /* If we are in attach call, do not sleep */ sleep_flags = (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE) ? CAN_SLEEP : NO_SLEEP; error = 0; mpr_dprint(sc, MPR_INIT, "%s entered, sleep_flags= %d\n", __func__, sleep_flags); while (tries++ < 1200) { reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_dprint(sc, MPR_INIT, " Doorbell= 0x%x\n", reg); /* * Ensure the IOC is ready to talk. If it's not, try * resetting it. */ if (reg & MPI2_DOORBELL_USED) { mpr_dprint(sc, MPR_INIT, " Not ready, sending diag " "reset\n"); mpr_diag_reset(sc, sleep_flags); DELAY(50000); continue; } /* Is the adapter owned by another peer? */ if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC is under the " "control of another peer host, aborting " "initialization.\n"); error = ENXIO; break; } state = reg & MPI2_IOC_STATE_MASK; if (state == MPI2_IOC_STATE_READY) { /* Ready to go! */ error = 0; break; } else if (state == MPI2_IOC_STATE_FAULT) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC in fault " "state 0x%x, resetting\n", state & MPI2_DOORBELL_FAULT_CODE_MASK); mpr_diag_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_OPERATIONAL) { /* Need to take ownership */ mpr_message_unit_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_RESET) { /* Wait a bit, IOC might be in transition */ mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC in unexpected reset state\n"); } else { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC in unknown state 0x%x\n", state); error = EINVAL; break; } /* Wait 50ms for things to settle down. */ DELAY(50000); } if (error) mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Cannot transition IOC to ready\n"); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } static int mpr_transition_operational(struct mpr_softc *sc) { uint32_t reg, state; int error; MPR_FUNCTRACE(sc); error = 0; reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_dprint(sc, MPR_INIT, "%s entered, Doorbell= 0x%x\n", __func__, reg); state = reg & MPI2_IOC_STATE_MASK; if (state != MPI2_IOC_STATE_READY) { mpr_dprint(sc, MPR_INIT, "IOC not ready\n"); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "failed to transition ready, exit\n"); return (error); } } error = mpr_send_iocinit(sc); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } static void mpr_resize_queues(struct mpr_softc *sc) { int reqcr, prireqcr; /* * Size the queues. Since the reply queues always need one free * entry, we'll deduct one reply message here. The LSI documents * suggest instead to add a count to the request queue, but I think * that it's better to deduct from reply queue. */ prireqcr = MAX(1, sc->max_prireqframes); prireqcr = MIN(prireqcr, sc->facts->HighPriorityCredit); reqcr = MAX(2, sc->max_reqframes); reqcr = MIN(reqcr, sc->facts->RequestCredit); sc->num_reqs = prireqcr + reqcr; sc->num_replies = MIN(sc->max_replyframes + sc->max_evtframes, sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; /* * Figure out the number of MSIx-based queues. If the firmware or * user has done something crazy and not allowed enough credit for * the queues to be useful then don't enable multi-queue. */ if (sc->facts->MaxMSIxVectors < 2) sc->msi_msgs = 1; if (sc->msi_msgs > 1) { sc->msi_msgs = MIN(sc->msi_msgs, mp_ncpus); sc->msi_msgs = MIN(sc->msi_msgs, sc->facts->MaxMSIxVectors); if (sc->num_reqs / sc->msi_msgs < 2) sc->msi_msgs = 1; } mpr_dprint(sc, MPR_INIT, "Sized queues to q=%d reqs=%d replies=%d\n", sc->msi_msgs, sc->num_reqs, sc->num_replies); } /* * This is called during attach and when re-initializing due to a Diag Reset. * IOC Facts is used to allocate many of the structures needed by the driver. * If called from attach, de-allocation is not required because the driver has * not allocated any structures yet, but if called from a Diag Reset, previously * allocated structures based on IOC Facts will need to be freed and re- * allocated bases on the latest IOC Facts. */ static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching) { int error; Mpi2IOCFactsReply_t saved_facts; uint8_t saved_mode, reallocating; mpr_dprint(sc, MPR_INIT|MPR_TRACE, "%s entered\n", __func__); /* Save old IOC Facts and then only reallocate if Facts have changed */ if (!attaching) { bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); } /* * Get IOC Facts. In all cases throughout this function, panic if doing * a re-initialization and only return the error if attaching so the OS * can handle it. */ if ((error = mpr_get_iocfacts(sc, sc->facts)) != 0) { if (attaching) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to get " "IOC Facts with error %d, exit\n", error); return (error); } else { panic("%s failed to get IOC Facts with error %d\n", __func__, error); } } MPR_DPRINT_PAGE(sc, MPR_XINFO, iocfacts, sc->facts); snprintf(sc->fw_version, sizeof(sc->fw_version), "%02d.%02d.%02d.%02d", sc->facts->FWVersion.Struct.Major, sc->facts->FWVersion.Struct.Minor, sc->facts->FWVersion.Struct.Unit, sc->facts->FWVersion.Struct.Dev); mpr_dprint(sc, MPR_INFO, "Firmware: %s, Driver: %s\n", sc->fw_version, MPR_DRIVER_VERSION); mpr_dprint(sc, MPR_INFO, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc" "\22FastPath" "\23RDPQArray" "\24AtomicReqDesc" "\25PCIeSRIOV"); /* * If the chip doesn't support event replay then a hard reset will be * required to trigger a full discovery. Do the reset here then * retransition to Ready. A hard reset might have already been done, * but it doesn't hurt to do it again. Only do this if attaching, not * for a Diag Reset. */ if (attaching && ((sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0)) { mpr_dprint(sc, MPR_INIT, "No event replay, resetting\n"); mpr_diag_reset(sc, NO_SLEEP); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to " "transition to ready with error %d, exit\n", error); return (error); } } /* * Set flag if IR Firmware is loaded. If the RAID Capability has * changed from the previous IOC Facts, log a warning, but only if * checking this after a Diag Reset and not during attach. */ saved_mode = sc->ir_firmware; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) sc->ir_firmware = 1; if (!attaching) { if (sc->ir_firmware != saved_mode) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "new IR/IT mode " "in IOC Facts does not match previous mode\n"); } } /* Only deallocate and reallocate if relevant IOC Facts have changed */ reallocating = FALSE; sc->mpr_flags &= ~MPR_FLAGS_REALLOCATED; if ((!attaching) && ((saved_facts.MsgVersion != sc->facts->MsgVersion) || (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || (saved_facts.RequestCredit != sc->facts->RequestCredit) || (saved_facts.ProductID != sc->facts->ProductID) || (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || (saved_facts.IOCRequestFrameSize != sc->facts->IOCRequestFrameSize) || (saved_facts.IOCMaxChainSegmentSize != sc->facts->IOCMaxChainSegmentSize) || (saved_facts.MaxTargets != sc->facts->MaxTargets) || (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || (saved_facts.MaxReplyDescriptorPostQueueDepth != sc->facts->MaxReplyDescriptorPostQueueDepth) || (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || (saved_facts.MaxPersistentEntries != sc->facts->MaxPersistentEntries))) { reallocating = TRUE; /* Record that we reallocated everything */ sc->mpr_flags |= MPR_FLAGS_REALLOCATED; } /* * Some things should be done if attaching or re-allocating after a Diag * Reset, but are not needed after a Diag Reset if the FW has not * changed. */ if (attaching || reallocating) { /* * Check if controller supports FW diag buffers and set flag to * enable each type. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. enabled = TRUE; /* * Set flags for some supported items. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) sc->eedp_enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) sc->control_TLR = TRUE; if (sc->facts->IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ) sc->atomic_desc_capable = TRUE; mpr_resize_queues(sc); /* * Initialize all Tail Queues */ TAILQ_INIT(&sc->req_list); TAILQ_INIT(&sc->high_priority_req_list); TAILQ_INIT(&sc->chain_list); TAILQ_INIT(&sc->prp_page_list); TAILQ_INIT(&sc->tm_list); } /* * If doing a Diag Reset and the FW is significantly different * (reallocating will be set above in IOC Facts comparison), then all * buffers based on the IOC Facts will need to be freed before they are * reallocated. */ if (reallocating) { mpr_iocfacts_free(sc); mprsas_realloc_targets(sc, saved_facts.MaxTargets + saved_facts.MaxVolumes); } /* * Any deallocation has been completed. Now start reallocating * if needed. Will only need to reallocate if attaching or if the new * IOC Facts are different from the previous IOC Facts after a Diag * Reset. Targets have already been allocated above if needed. */ error = 0; while (attaching || reallocating) { if ((error = mpr_alloc_hw_queues(sc)) != 0) break; if ((error = mpr_alloc_replies(sc)) != 0) break; if ((error = mpr_alloc_requests(sc)) != 0) break; if ((error = mpr_alloc_queues(sc)) != 0) break; break; } if (error) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Failed to alloc queues with error %d\n", error); mpr_free(sc); return (error); } /* Always initialize the queues */ bzero(sc->free_queue, sc->fqdepth * 4); mpr_init_queues(sc); /* * Always get the chip out of the reset state, but only panic if not * attaching. If attaching and there is an error, that is handled by * the OS. */ error = mpr_transition_operational(sc); if (error != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to " "transition to operational with error %d\n", error); mpr_free(sc); return (error); } /* * Finish the queue initialization. * These are set here instead of in mpr_init_queues() because the * IOC resets these values during the state transition in * mpr_transition_operational(). The free index is set to 1 * because the corresponding index in the IOC is set to 0, and the * IOC treats the queues as full if both are set to the same value. * Hence the reason that the queue can't hold all of the possible * replies. */ sc->replypostindex = 0; mpr_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); /* * Attach the subsystems so they can prepare their event masks. * XXX Should be dynamic so that IM/IR and user modules can attach */ error = 0; while (attaching) { mpr_dprint(sc, MPR_INIT, "Attaching subsystems\n"); if ((error = mpr_attach_log(sc)) != 0) break; if ((error = mpr_attach_sas(sc)) != 0) break; if ((error = mpr_attach_user(sc)) != 0) break; break; } if (error) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Failed to attach all subsystems: error %d\n", error); mpr_free(sc); return (error); } if ((error = mpr_pci_setup_interrupts(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Failed to setup interrupts\n"); mpr_free(sc); return (error); } return (error); } /* * This is called if memory is being free (during detach for example) and when * buffers need to be reallocated due to a Diag Reset. */ static void mpr_iocfacts_free(struct mpr_softc *sc) { struct mpr_command *cm; int i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if (sc->free_busaddr != 0) bus_dmamap_unload(sc->queues_dmat, sc->queues_map); if (sc->free_queue != NULL) bus_dmamem_free(sc->queues_dmat, sc->free_queue, sc->queues_map); if (sc->queues_dmat != NULL) bus_dma_tag_destroy(sc->queues_dmat); if (sc->chain_busaddr != 0) bus_dmamap_unload(sc->chain_dmat, sc->chain_map); if (sc->chain_frames != NULL) bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); if (sc->chain_dmat != NULL) bus_dma_tag_destroy(sc->chain_dmat); if (sc->sense_busaddr != 0) bus_dmamap_unload(sc->sense_dmat, sc->sense_map); if (sc->sense_frames != NULL) bus_dmamem_free(sc->sense_dmat, sc->sense_frames, sc->sense_map); if (sc->sense_dmat != NULL) bus_dma_tag_destroy(sc->sense_dmat); if (sc->prp_page_busaddr != 0) bus_dmamap_unload(sc->prp_page_dmat, sc->prp_page_map); if (sc->prp_pages != NULL) bus_dmamem_free(sc->prp_page_dmat, sc->prp_pages, sc->prp_page_map); if (sc->prp_page_dmat != NULL) bus_dma_tag_destroy(sc->prp_page_dmat); if (sc->reply_busaddr != 0) bus_dmamap_unload(sc->reply_dmat, sc->reply_map); if (sc->reply_frames != NULL) bus_dmamem_free(sc->reply_dmat, sc->reply_frames, sc->reply_map); if (sc->reply_dmat != NULL) bus_dma_tag_destroy(sc->reply_dmat); if (sc->req_busaddr != 0) bus_dmamap_unload(sc->req_dmat, sc->req_map); if (sc->req_frames != NULL) bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); if (sc->req_dmat != NULL) bus_dma_tag_destroy(sc->req_dmat); if (sc->chains != NULL) free(sc->chains, M_MPR); if (sc->prps != NULL) free(sc->prps, M_MPR); if (sc->commands != NULL) { for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); } free(sc->commands, M_MPR); } if (sc->buffer_dmat != NULL) bus_dma_tag_destroy(sc->buffer_dmat); mpr_pci_free_interrupts(sc); free(sc->queues, M_MPR); sc->queues = NULL; } /* * The terms diag reset and hard reset are used interchangeably in the MPI * docs to mean resetting the controller chip. In this code diag reset * cleans everything up, and the hard reset function just sends the reset * sequence to the chip. This should probably be refactored so that every * subsystem gets a reset notification of some sort, and can clean up * appropriately. */ int mpr_reinit(struct mpr_softc *sc) { int error; struct mprsas_softc *sassc; sassc = sc->sassc; MPR_FUNCTRACE(sc); mtx_assert(&sc->mpr_mtx, MA_OWNED); mpr_dprint(sc, MPR_INIT|MPR_INFO, "Reinitializing controller\n"); if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) { mpr_dprint(sc, MPR_INIT, "Reset already in progress\n"); return 0; } /* * Make sure the completion callbacks can recognize they're getting * a NULL cm_reply due to a reset. */ sc->mpr_flags |= MPR_FLAGS_DIAGRESET; /* * Mask interrupts here. */ mpr_dprint(sc, MPR_INIT, "Masking interrupts and resetting\n"); mpr_mask_intr(sc); error = mpr_diag_reset(sc, CAN_SLEEP); if (error != 0) { panic("%s hard reset failed with error %d\n", __func__, error); } /* Restore the PCI state, including the MSI-X registers */ mpr_pci_restore(sc); /* Give the I/O subsystem special priority to get itself prepared */ mprsas_handle_reinit(sc); /* * Get IOC Facts and allocate all structures based on this information. * The attach function will also call mpr_iocfacts_allocate at startup. * If relevant values have changed in IOC Facts, this function will free * all of the memory based on IOC Facts and reallocate that memory. */ if ((error = mpr_iocfacts_allocate(sc, FALSE)) != 0) { panic("%s IOC Facts based allocation failed with error %d\n", __func__, error); } /* * Mapping structures will be re-allocated after getting IOC Page8, so * free these structures here. */ mpr_mapping_exit(sc); /* * The static page function currently read is IOC Page8. Others can be * added in future. It's possible that the values in IOC Page8 have * changed after a Diag Reset due to user modification, so always read * these. Interrupts are masked, so unmask them before getting config * pages. */ mpr_unmask_intr(sc); sc->mpr_flags &= ~MPR_FLAGS_DIAGRESET; mpr_base_static_config_pages(sc); /* * Some mapping info is based in IOC Page8 data, so re-initialize the * mapping tables. */ mpr_mapping_initialize(sc); /* * Restart will reload the event masks clobbered by the reset, and * then enable the port. */ mpr_reregister_events(sc); /* the end of discovery will release the simq, so we're done. */ mpr_dprint(sc, MPR_INIT|MPR_XINFO, "Finished sc %p post %u free %u\n", sc, sc->replypostindex, sc->replyfreeindex); mprsas_release_simq_reinit(sassc); mpr_dprint(sc, MPR_INIT, "%s exit error= %d\n", __func__, error); return 0; } /* Wait for the chip to ACK a word that we've put into its FIFO * Wait for seconds. In single loop wait for busy loop * for 500 microseconds. * Total is [ 0.5 * (2000 * ) ] in miliseconds. * */ static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag) { u32 cntdn, count; u32 int_status; u32 doorbell; count = 0; cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; do { int_status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { mpr_dprint(sc, MPR_TRACE, "%s: successful count(%d), " "timeout(%d)\n", __func__, count, timeout); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { doorbell = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mpr_dprint(sc, MPR_FAULT, "fault_state(0x%04x)!\n", doorbell); return (EFAULT); } } else if (int_status == 0xFFFFFFFF) goto out; /* * If it can sleep, sleep for 1 milisecond, else busy loop for * 0.5 milisecond */ if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdba", hz/1000); else if (sleep_flag == CAN_SLEEP) pause("mprdba", hz/1000); else DELAY(500); count++; } while (--cntdn); out: mpr_dprint(sc, MPR_FAULT, "%s: failed due to timeout count(%d), " "int_status(%x)!\n", __func__, count, int_status); return (ETIMEDOUT); } /* Wait for the chip to signal that the next word in its FIFO can be fetched */ static int mpr_wait_db_int(struct mpr_softc *sc) { int retry; for (retry = 0; retry < MPR_DB_MAX_WAIT; retry++) { if ((mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & MPI2_HIS_IOC2SYS_DB_STATUS) != 0) return (0); DELAY(2000); } return (ETIMEDOUT); } /* Step through the synchronous command state machine, i.e. "Doorbell mode" */ static int mpr_request_sync(struct mpr_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, int req_sz, int reply_sz, int timeout) { uint32_t *data32; uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; #if __FreeBSD_version >= 1000029 if (curthread->td_no_sleeping) #else //__FreeBSD_version < 1000029 if (curthread->td_pflags & TDP_NOSLEEPING) #endif //__FreeBSD_version >= 1000029 sleep_flags = NO_SLEEP; /* Step 1 */ mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Step 2 */ if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) return (EBUSY); /* Step 3 * Announce that a message is coming through the doorbell. Messages * are pushed at 32bit words, so round up if needed. */ count = (req_sz + 3) / 4; mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); /* Step 4 */ if (mpr_wait_db_int(sc) || (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell failed to activate\n"); return (ENXIO); } mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed\n"); return (ENXIO); } /* Step 5 */ /* Clock out the message data synchronously in 32-bit dwords*/ data32 = (uint32_t *)req; for (i = 0; i < count; i++) { mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout while writing doorbell\n"); return (ENXIO); } } /* Step 6 */ /* Clock in the reply in 16-bit words. The total length of the * message is always in the 4th byte, so clock out the first 2 words * manually, then loop the rest. */ data16 = (uint16_t *)reply; if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 0\n"); return (ENXIO); } data16[0] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 1\n"); return (ENXIO); } data16[1] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Number of 32bit words in the message */ ioc_sz = reply->MsgLength; /* * Figure out how many 16bit words to clock in without overrunning. * The precision loss with dividing reply_sz can safely be * ignored because the messages can only be multiples of 32bits. */ residual = 0; count = MIN((reply_sz / 4), ioc_sz) * 2; if (count < ioc_sz * 2) { residual = ioc_sz * 2 - count; mpr_dprint(sc, MPR_ERROR, "Driver error, throwing away %d " "residual message words\n", residual); } for (i = 2; i < count; i++) { if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell %d\n", i); return (ENXIO); } data16[i] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* * Pull out residual words that won't fit into the provided buffer. * This keeps the chip from hanging due to a driver programming * error. */ while (residual--) { if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell\n"); return (ENXIO); } (void)mpr_regread(sc, MPI2_DOORBELL_OFFSET); mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* Step 7 */ if (mpr_wait_db_int(sc) != 0) { mpr_dprint(sc, MPR_FAULT, "Timeout waiting to exit doorbell\n"); return (ENXIO); } if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) mpr_dprint(sc, MPR_FAULT, "Warning, doorbell still active\n"); mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); return (0); } static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm) { request_descriptor rd; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_TRACE, "SMID %u cm %p ccb %p\n", cm->cm_desc.Default.SMID, cm, cm->cm_ccb); if (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE && !(sc->mpr_flags & MPR_FLAGS_SHUTDOWN)) mtx_assert(&sc->mpr_mtx, MA_OWNED); if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; if (sc->atomic_desc_capable) { rd.u.low = cm->cm_desc.Words.Low; mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET, rd.u.low); } else { rd.u.low = cm->cm_desc.Words.Low; rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, rd.u.high); } } /* * Just the FACTS, ma'am. */ static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts) { MPI2_DEFAULT_REPLY *reply; MPI2_IOC_FACTS_REQUEST request; int error, req_sz, reply_sz; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); reply = (MPI2_DEFAULT_REPLY *)facts; bzero(&request, req_sz); request.Function = MPI2_FUNCTION_IOC_FACTS; error = mpr_request_sync(sc, &request, reply, req_sz, reply_sz, 5); mpr_dprint(sc, MPR_INIT, "%s exit, error= %d\n", __func__, error); return (error); } static int mpr_send_iocinit(struct mpr_softc *sc) { MPI2_IOC_INIT_REQUEST init; MPI2_DEFAULT_REPLY reply; int req_sz, reply_sz, error; struct timeval now; uint64_t time_in_msec; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); req_sz = sizeof(MPI2_IOC_INIT_REQUEST); reply_sz = sizeof(MPI2_IOC_INIT_REPLY); bzero(&init, req_sz); bzero(&reply, reply_sz); /* * Fill in the init block. Note that most addresses are * deliberately in the lower 32bits of memory. This is a micro- * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. */ init.Function = MPI2_FUNCTION_IOC_INIT; init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; init.MsgVersion = htole16(MPI2_VERSION); init.HeaderVersion = htole16(MPI2_HEADER_VERSION); init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize); init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); init.ReplyFreeQueueDepth = htole16(sc->fqdepth); init.SenseBufferAddressHigh = 0; init.SystemReplyAddressHigh = 0; init.SystemRequestFrameBaseAddress.High = 0; init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr); init.ReplyDescriptorPostQueueAddress.High = 0; init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr); init.ReplyFreeQueueAddress.High = 0; init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); getmicrotime(&now); time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); init.HostPageSize = HOST_PAGE_SIZE_4K; error = mpr_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mpr_dprint(sc, MPR_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (error); } void mpr_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int mpr_alloc_queues(struct mpr_softc *sc) { struct mpr_queue *q; int nq, i; nq = sc->msi_msgs; mpr_dprint(sc, MPR_INIT|MPR_XINFO, "Allocating %d I/O queues\n", nq); sc->queues = malloc(sizeof(struct mpr_queue) * nq, M_MPR, M_NOWAIT|M_ZERO); if (sc->queues == NULL) return (ENOMEM); for (i = 0; i < nq; i++) { q = &sc->queues[i]; mpr_dprint(sc, MPR_INIT, "Configuring queue %d %p\n", i, q); q->sc = sc; q->qnum = i; } return (0); } static int mpr_alloc_hw_queues(struct mpr_softc *sc) { bus_addr_t queues_busaddr; uint8_t *queues; int qsize, fqsize, pqsize; /* * The reply free queue contains 4 byte entries in multiples of 16 and * aligned on a 16 byte boundary. There must always be an unused entry. * This queue supplies fresh reply frames for the firmware to use. * * The reply descriptor post queue contains 8 byte entries in * multiples of 16 and aligned on a 16 byte boundary. This queue * contains filled-in reply frames sent from the firmware to the host. * * These two queues are allocated together for simplicity. */ sc->fqdepth = roundup2(sc->num_replies + 1, 16); sc->pqdepth = roundup2(sc->num_replies + 1, 16); fqsize= sc->fqdepth * 4; pqsize = sc->pqdepth * 8; qsize = fqsize + pqsize; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ qsize, /* maxsize */ 1, /* nsegments */ qsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->queues_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate queues DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, &sc->queues_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate queues memory\n"); return (ENOMEM); } bzero(queues, qsize); bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, mpr_memaddr_cb, &queues_busaddr, 0); sc->free_queue = (uint32_t *)queues; sc->free_busaddr = queues_busaddr; sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); sc->post_busaddr = queues_busaddr + fqsize; return (0); } static int mpr_alloc_replies(struct mpr_softc *sc) { int rsize, num_replies; /* * sc->num_replies should be one less than sc->fqdepth. We need to * allocate space for sc->fqdepth replies, but only sc->num_replies * replies can be used at once. */ num_replies = max(sc->fqdepth, sc->num_replies); rsize = sc->facts->ReplyFrameSize * num_replies * 4; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->reply_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate replies DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, BUS_DMA_NOWAIT, &sc->reply_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate replies memory\n"); return (ENOMEM); } bzero(sc->reply_frames, rsize); bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, mpr_memaddr_cb, &sc->reply_busaddr, 0); return (0); } static int mpr_alloc_requests(struct mpr_softc *sc) { struct mpr_command *cm; struct mpr_chain *chain; int i, rsize, nsegs; rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->req_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate request DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, BUS_DMA_NOWAIT, &sc->req_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate request memory\n"); return (ENOMEM); } bzero(sc->req_frames, rsize); bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, mpr_memaddr_cb, &sc->req_busaddr, 0); /* * Gen3 and beyond uses the IOCMaxChainSegmentSize from IOC Facts to * get the size of a Chain Frame. Previous versions use the size as a * Request Frame for the Chain Frame size. If IOCMaxChainSegmentSize * is 0, use the default value. The IOCMaxChainSegmentSize is the * number of 16-byte elelements that can fit in a Chain Frame, which is * the size of an IEEE Simple SGE. */ if (sc->facts->MsgVersion >= MPI2_VERSION_02_05) { sc->chain_seg_size = htole16(sc->facts->IOCMaxChainSegmentSize); if (sc->chain_seg_size == 0) { sc->chain_frame_size = MPR_DEFAULT_CHAIN_SEG_SIZE * MPR_MAX_CHAIN_ELEMENT_SIZE; } else { sc->chain_frame_size = sc->chain_seg_size * MPR_MAX_CHAIN_ELEMENT_SIZE; } } else { sc->chain_frame_size = sc->facts->IOCRequestFrameSize * 4; } rsize = sc->chain_frame_size * sc->max_chains; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->chain_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate chain DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, BUS_DMA_NOWAIT, &sc->chain_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } bzero(sc->chain_frames, rsize); bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, mpr_memaddr_cb, &sc->chain_busaddr, 0); rsize = MPR_SENSE_LEN * sc->num_reqs; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sense_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, BUS_DMA_NOWAIT, &sc->sense_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate sense memory\n"); return (ENOMEM); } bzero(sc->sense_frames, rsize); bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, mpr_memaddr_cb, &sc->sense_busaddr, 0); sc->chains = malloc(sizeof(struct mpr_chain) * sc->max_chains, M_MPR, M_WAITOK | M_ZERO); if (!sc->chains) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } for (i = 0; i < sc->max_chains; i++) { chain = &sc->chains[i]; chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames + i * sc->chain_frame_size); chain->chain_busaddr = sc->chain_busaddr + i * sc->chain_frame_size; mpr_free_chain(sc, chain); sc->chain_free_lowwater++; } /* * Allocate NVMe PRP Pages for NVMe SGL support only if the FW supports * these devices. */ if ((sc->facts->MsgVersion >= MPI2_VERSION_02_06) && (sc->facts->ProtocolFlags & MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES)) { if (mpr_alloc_nvme_prp_pages(sc) == ENOMEM) return (ENOMEM); } /* XXX Need to pick a more precise value */ nsegs = (MAXPHYS / PAGE_SIZE) + 1; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ nsegs, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->mpr_mtx, /* lockarg */ &sc->buffer_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * SMID 0 cannot be used as a free command per the firmware spec. * Just drop that command instead of risking accounting bugs. */ sc->commands = malloc(sizeof(struct mpr_command) * sc->num_reqs, M_MPR, M_WAITOK | M_ZERO); if (!sc->commands) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate command memory\n"); return (ENOMEM); } for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; cm->cm_req = sc->req_frames + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_req_busaddr = sc->req_busaddr + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_sense = &sc->sense_frames[i]; cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; TAILQ_INIT(&cm->cm_chain_list); TAILQ_INIT(&cm->cm_prp_page_list); callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0); /* XXX Is a failure here a critical problem? */ if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0) { if (i <= sc->facts->HighPriorityCredit) mpr_free_high_priority_command(sc, cm); else mpr_free_command(sc, cm); } else { panic("failed to allocate command %d\n", i); sc->num_reqs = i; break; } } return (0); } /* * Allocate contiguous buffers for PCIe NVMe devices for building native PRPs, * which are scatter/gather lists for NVMe devices. * * This buffer must be contiguous due to the nature of how NVMe PRPs are built * and translated by FW. * * returns ENOMEM if memory could not be allocated, otherwise returns 0. */ static int mpr_alloc_nvme_prp_pages(struct mpr_softc *sc) { int PRPs_per_page, PRPs_required, pages_required; int rsize, i; struct mpr_prp_page *prp_page; /* * Assuming a MAX_IO_SIZE of 1MB and a PAGE_SIZE of 4k, the max number * of PRPs (NVMe's Scatter/Gather Element) needed per I/O is: * MAX_IO_SIZE / PAGE_SIZE = 256 * * 1 PRP entry in main frame for PRP list pointer still leaves 255 PRPs * required for the remainder of the 1MB I/O. 512 PRPs can fit into one * page (4096 / 8 = 512), so only one page is required for each I/O. * * Each of these buffers will need to be contiguous. For simplicity, * only one buffer is allocated here, which has all of the space * required for the NVMe Queue Depth. If there are problems allocating * this one buffer, this function will need to change to allocate * individual, contiguous NVME_QDEPTH buffers. * * The real calculation will use the real max io size. Above is just an * example. * */ PRPs_required = sc->maxio / PAGE_SIZE; PRPs_per_page = (PAGE_SIZE / PRP_ENTRY_SIZE) - 1; pages_required = (PRPs_required / PRPs_per_page) + 1; sc->prp_buffer_size = PAGE_SIZE * pages_required; rsize = sc->prp_buffer_size * NVME_QDEPTH; if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->prp_page_dmat)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate NVMe PRP DMA " "tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->prp_page_dmat, (void **)&sc->prp_pages, BUS_DMA_NOWAIT, &sc->prp_page_map)) { mpr_dprint(sc, MPR_ERROR, "Cannot allocate NVMe PRP memory\n"); return (ENOMEM); } bzero(sc->prp_pages, rsize); bus_dmamap_load(sc->prp_page_dmat, sc->prp_page_map, sc->prp_pages, rsize, mpr_memaddr_cb, &sc->prp_page_busaddr, 0); sc->prps = malloc(sizeof(struct mpr_prp_page) * NVME_QDEPTH, M_MPR, M_WAITOK | M_ZERO); for (i = 0; i < NVME_QDEPTH; i++) { prp_page = &sc->prps[i]; prp_page->prp_page = (uint64_t *)(sc->prp_pages + i * sc->prp_buffer_size); prp_page->prp_page_busaddr = (uint64_t)(sc->prp_page_busaddr + i * sc->prp_buffer_size); mpr_free_prp_page(sc, prp_page); sc->prp_pages_free_lowwater++; } return (0); } static int mpr_init_queues(struct mpr_softc *sc) { int i; memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); /* * According to the spec, we need to use one less reply than we * have space for on the queue. So sc->num_replies (the number we * use) should be less than sc->fqdepth (allocated size). */ if (sc->num_replies >= sc->fqdepth) return (EINVAL); /* * Initialize all of the free queue entries. */ for (i = 0; i < sc->fqdepth; i++) { sc->free_queue[i] = sc->reply_busaddr + (i * sc->facts->ReplyFrameSize * 4); } sc->replyfreeindex = sc->num_replies; return (0); } /* Get the driver parameter tunables. Lowest priority are the driver defaults. * Next are the global settings, if they exist. Highest are the per-unit * settings, if they exist. */ void mpr_get_tunables(struct mpr_softc *sc) { - char tmpstr[80]; + char tmpstr[80], mpr_debug[80]; /* XXX default to some debugging for now */ sc->mpr_debug = MPR_INFO | MPR_FAULT; sc->disable_msix = 0; sc->disable_msi = 0; sc->max_msix = MPR_MSIX_MAX; sc->max_chains = MPR_CHAIN_FRAMES; sc->max_io_pages = MPR_MAXIO_PAGES; sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD; sc->spinup_wait_time = DEFAULT_SPINUP_WAIT; sc->use_phynum = 1; sc->max_reqframes = MPR_REQ_FRAMES; sc->max_prireqframes = MPR_PRI_REQ_FRAMES; sc->max_replyframes = MPR_REPLY_FRAMES; sc->max_evtframes = MPR_EVT_REPLY_FRAMES; /* * Grab the global variables. */ - TUNABLE_INT_FETCH("hw.mpr.debug_level", &sc->mpr_debug); + bzero(mpr_debug, 80); + if (TUNABLE_STR_FETCH("hw.mpr.debug_level", mpr_debug, 80) != 0) + mpr_parse_debug(sc, mpr_debug); TUNABLE_INT_FETCH("hw.mpr.disable_msix", &sc->disable_msix); TUNABLE_INT_FETCH("hw.mpr.disable_msi", &sc->disable_msi); TUNABLE_INT_FETCH("hw.mpr.max_msix", &sc->max_msix); TUNABLE_INT_FETCH("hw.mpr.max_chains", &sc->max_chains); TUNABLE_INT_FETCH("hw.mpr.max_io_pages", &sc->max_io_pages); TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu); TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time); TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum); TUNABLE_INT_FETCH("hw.mpr.max_reqframes", &sc->max_reqframes); TUNABLE_INT_FETCH("hw.mpr.max_prireqframes", &sc->max_prireqframes); TUNABLE_INT_FETCH("hw.mpr.max_replyframes", &sc->max_replyframes); TUNABLE_INT_FETCH("hw.mpr.max_evtframes", &sc->max_evtframes); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.debug_level", device_get_unit(sc->mpr_dev)); - TUNABLE_INT_FETCH(tmpstr, &sc->mpr_debug); + bzero(mpr_debug, 80); + if (TUNABLE_STR_FETCH(tmpstr, mpr_debug, 80) != 0) + mpr_parse_debug(sc, mpr_debug); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msix", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msi", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_msix", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_chains", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_io_pages", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_io_pages); bzero(sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.exclude_ids", device_get_unit(sc->mpr_dev)); TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.enable_ssu", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->enable_ssu); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.spinup_wait_time", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->spinup_wait_time); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.use_phy_num", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_reqframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_prireqframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_prireqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_replyframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_replyframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_evtframes", device_get_unit(sc->mpr_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_evtframes); } static void mpr_setup_sysctl(struct mpr_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MPR controller %d", device_get_unit(sc->mpr_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mpr_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mpr_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mpr_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mpr), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } - SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), - OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mpr_debug, 0, - "mpr debug level"); + SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, "debug_level", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, + mpr_debug_sysctl, "A", "mpr debug level"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, "Disable the use of MSI-X interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_msix", CTLFLAG_RD, &sc->max_msix, 0, "User-defined maximum number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "msix_msgs", CTLFLAG_RD, &sc->msi_msgs, 0, "Negotiated number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_reqframes", CTLFLAG_RD, &sc->max_reqframes, 0, "Total number of allocated request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_prireqframes", CTLFLAG_RD, &sc->max_prireqframes, 0, "Total number of allocated high priority request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_replyframes", CTLFLAG_RD, &sc->max_replyframes, 0, "Total number of allocated reply frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_evtframes", CTLFLAG_RD, &sc->max_evtframes, 0, "Total number of event frames allocated"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version, strlen(sc->fw_version), "firmware version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RW, MPR_DRIVER_VERSION, strlen(MPR_DRIVER_VERSION), "driver version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_active", CTLFLAG_RD, &sc->io_cmds_active, 0, "number of currently active commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "maximum active commands seen"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free", CTLFLAG_RD, &sc->chain_free, 0, "number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_chains", CTLFLAG_RD, &sc->max_chains, 0,"maximum chain frames that will be allocated"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_io_pages", CTLFLAG_RD, &sc->max_io_pages, 0,"maximum pages to allow per I/O (if <1 use " "IOCFacts)"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "enable_ssu", CTLFLAG_RW, &sc->enable_ssu, 0, "enable SSU to SATA SSD/HDD at shutdown"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, &sc->chain_alloc_fail, "chain allocation failures"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "spinup_wait_time", CTLFLAG_RD, &sc->spinup_wait_time, DEFAULT_SPINUP_WAIT, "seconds to wait for " "spinup after SATA ID error"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, "Use the phy number for enumeration"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_pages_free", CTLFLAG_RD, &sc->prp_pages_free, 0, "number of free PRP pages"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_pages_free_lowwater", CTLFLAG_RD, &sc->prp_pages_free_lowwater, 0,"lowest number of free PRP pages"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD, &sc->prp_page_alloc_fail, "PRP page allocation failures"); +} + +static struct mpr_debug_string { + char *name; + int flag; +} mpr_debug_strings[] = { + {"info", MPR_INFO}, + {"fault", MPR_FAULT}, + {"event", MPR_EVENT}, + {"log", MPR_LOG}, + {"recovery", MPR_RECOVERY}, + {"error", MPR_ERROR}, + {"init", MPR_INIT}, + {"xinfo", MPR_XINFO}, + {"user", MPR_USER}, + {"mapping", MPR_MAPPING}, + {"trace", MPR_TRACE} +}; + +static int +mpr_debug_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct mpr_softc *sc; + struct mpr_debug_string *string; + struct sbuf sbuf; + char *buffer; + size_t sz; + int i, len, debug, error; + + sc = (struct mpr_softc *)arg1; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + debug = sc->mpr_debug; + + sbuf_printf(&sbuf, "%#x", debug); + + sz = sizeof(mpr_debug_strings) / sizeof(mpr_debug_strings[0]); + for (i = 0; i < sz; i++) { + string = &mpr_debug_strings[i]; + if (debug & string->flag) + sbuf_printf(&sbuf, ",%s", string->name); + } + + error = sbuf_finish(&sbuf); + sbuf_delete(&sbuf); + + if (error || req->newptr == NULL) + return (error); + + len = req->newlen - req->newidx; + if (len == 0) + return (0); + + buffer = malloc(len, M_MPR, M_ZERO|M_WAITOK); + error = SYSCTL_IN(req, buffer, len); + + mpr_parse_debug(sc, buffer); + + free(buffer, M_MPR); + return (error); +} + +static void +mpr_parse_debug(struct mpr_softc *sc, char *list) +{ + struct mpr_debug_string *string; + char *token, *endtoken; + size_t sz; + int flags, i; + + if (list == NULL || *list == '\0') + return; + + flags = 0; + sz = sizeof(mpr_debug_strings) / sizeof(mpr_debug_strings[0]); + while ((token = strsep(&list, ":,")) != NULL) { + + /* Handle integer flags */ + flags |= strtol(token, &endtoken, 0); + if (token != endtoken) + continue; + + /* Handle text flags */ + for (i = 0; i < sz; i++) { + string = &mpr_debug_strings[i]; + if (strcasecmp(token, string->name) == 0) { + flags |= string->flag; + break; + } + } + } + + sc->mpr_debug = flags; + return; } int mpr_attach(struct mpr_softc *sc) { int error; MPR_FUNCTRACE(sc); mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); mtx_init(&sc->mpr_mtx, "MPR lock", NULL, MTX_DEF); callout_init_mtx(&sc->periodic, &sc->mpr_mtx, 0); callout_init_mtx(&sc->device_check_callout, &sc->mpr_mtx, 0); TAILQ_INIT(&sc->event_list); timevalclear(&sc->lastfail); if ((error = mpr_transition_ready(sc)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Failed to transition ready\n"); return (error); } sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPR, M_ZERO|M_NOWAIT); if (!sc->facts) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "Cannot allocate memory, exit\n"); return (ENOMEM); } /* * Get IOC Facts and allocate all structures based on this information. * A Diag Reset will also call mpr_iocfacts_allocate and re-read the IOC * Facts. If relevant values have changed in IOC Facts, this function * will free all of the memory based on IOC Facts and reallocate that * memory. If this fails, any allocated memory should already be freed. */ if ((error = mpr_iocfacts_allocate(sc, TRUE)) != 0) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "IOC Facts allocation " "failed with error %d\n", error); return (error); } /* Start the periodic watchdog check on the IOC Doorbell */ mpr_periodic(sc); /* * The portenable will kick off discovery events that will drive the * rest of the initialization process. The CAM/SAS module will * hold up the boot sequence until discovery is complete. */ sc->mpr_ich.ich_func = mpr_startup; sc->mpr_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mpr_ich) != 0) { mpr_dprint(sc, MPR_INIT|MPR_ERROR, "Cannot establish MPR config hook\n"); error = EINVAL; } /* * Allow IR to shutdown gracefully when shutdown occurs. */ sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, mprsas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); if (sc->shutdown_eh == NULL) mpr_dprint(sc, MPR_INIT|MPR_ERROR, "shutdown event registration failed\n"); mpr_setup_sysctl(sc); sc->mpr_flags |= MPR_FLAGS_ATTACH_DONE; mpr_dprint(sc, MPR_INIT, "%s exit error= %d\n", __func__, error); return (error); } /* Run through any late-start handlers. */ static void mpr_startup(void *arg) { struct mpr_softc *sc; sc = (struct mpr_softc *)arg; mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); mpr_lock(sc); mpr_unmask_intr(sc); /* initialize device mapping tables */ mpr_base_static_config_pages(sc); mpr_mapping_initialize(sc); mprsas_startup(sc); mpr_unlock(sc); mpr_dprint(sc, MPR_INIT, "disestablish config intrhook\n"); config_intrhook_disestablish(&sc->mpr_ich); sc->mpr_ich.ich_arg = NULL; mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); } /* Periodic watchdog. Is called with the driver lock already held. */ static void mpr_periodic(void *arg) { struct mpr_softc *sc; uint32_t db; sc = (struct mpr_softc *)arg; if (sc->mpr_flags & MPR_FLAGS_SHUTDOWN) return; db = mpr_regread(sc, MPI2_DOORBELL_OFFSET); if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { if ((db & MPI2_DOORBELL_FAULT_CODE_MASK) == IFAULT_IOP_OVER_TEMP_THRESHOLD_EXCEEDED) { panic("TEMPERATURE FAULT: STOPPING."); } mpr_dprint(sc, MPR_FAULT, "IOC Fault 0x%08x, Resetting\n", db); mpr_reinit(sc); } callout_reset(&sc->periodic, MPR_PERIODIC_DELAY * hz, mpr_periodic, sc); } static void mpr_log_evt_handler(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *event) { MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; MPR_DPRINT_EVENT(sc, generic, event); switch (event->Event) { case MPI2_EVENT_LOG_DATA: mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_DATA:\n"); if (sc->mpr_debug & MPR_EVENT) hexdump(event->EventData, event->EventDataLength, NULL, 0); break; case MPI2_EVENT_LOG_ENTRY_ADDED: entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " "0x%x Sequence %d:\n", entry->LogEntryQualifier, entry->LogSequence); break; default: break; } return; } static int mpr_attach_log(struct mpr_softc *sc) { uint8_t events[16]; bzero(events, 16); setbit(events, MPI2_EVENT_LOG_DATA); setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); mpr_register_events(sc, events, mpr_log_evt_handler, NULL, &sc->mpr_log_eh); return (0); } static int mpr_detach_log(struct mpr_softc *sc) { if (sc->mpr_log_eh != NULL) mpr_deregister_events(sc, sc->mpr_log_eh); return (0); } /* * Free all of the driver resources and detach submodules. Should be called * without the lock held. */ int mpr_free(struct mpr_softc *sc) { int error; mpr_dprint(sc, MPR_INIT, "%s entered\n", __func__); /* Turn off the watchdog */ mpr_lock(sc); sc->mpr_flags |= MPR_FLAGS_SHUTDOWN; mpr_unlock(sc); /* Lock must not be held for this */ callout_drain(&sc->periodic); callout_drain(&sc->device_check_callout); if (((error = mpr_detach_log(sc)) != 0) || ((error = mpr_detach_sas(sc)) != 0)) { mpr_dprint(sc, MPR_INIT|MPR_FAULT, "failed to detach " "subsystems, error= %d, exit\n", error); return (error); } mpr_detach_user(sc); /* Put the IOC back in the READY state. */ mpr_lock(sc); if ((error = mpr_transition_ready(sc)) != 0) { mpr_unlock(sc); return (error); } mpr_unlock(sc); if (sc->facts != NULL) free(sc->facts, M_MPR); /* * Free all buffers that are based on IOC Facts. A Diag Reset may need * to free these buffers too. */ mpr_iocfacts_free(sc); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); /* Deregister the shutdown function */ if (sc->shutdown_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); mtx_destroy(&sc->mpr_mtx); mpr_dprint(sc, MPR_INIT, "%s exit\n", __func__); return (0); } static __inline void mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm) { MPR_FUNCTRACE(sc); if (cm == NULL) { mpr_dprint(sc, MPR_ERROR, "Completing NULL command\n"); return; } if (cm->cm_flags & MPR_CM_FLAGS_POLLED) cm->cm_flags |= MPR_CM_FLAGS_COMPLETE; if (cm->cm_complete != NULL) { mpr_dprint(sc, MPR_TRACE, "%s cm %p calling cm_complete %p data %p reply %p\n", __func__, cm, cm->cm_complete, cm->cm_complete_data, cm->cm_reply); cm->cm_complete(sc, cm); } if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) { mpr_dprint(sc, MPR_TRACE, "waking up %p\n", cm); wakeup(cm); } if (sc->io_cmds_active != 0) { sc->io_cmds_active--; } else { mpr_dprint(sc, MPR_ERROR, "Warning: io_cmds_active is " "out of sync - resynching to 0\n"); } } static void mpr_sas_log_info(struct mpr_softc *sc , u32 log_info) { union loginfo_type { u32 loginfo; struct { u32 subcode:16; u32 code:8; u32 originator:4; u32 bus_type:4; } dw; }; union loginfo_type sas_loginfo; char *originator_str = NULL; sas_loginfo.loginfo = log_info; if (sas_loginfo.dw.bus_type != 3 /*SAS*/) return; /* each nexus loss loginfo */ if (log_info == 0x31170000) return; /* eat the loginfos associated with task aborts */ if ((log_info == 30050000) || (log_info == 0x31140000) || (log_info == 0x31130000)) return; switch (sas_loginfo.dw.originator) { case 0: originator_str = "IOP"; break; case 1: originator_str = "PL"; break; case 2: originator_str = "IR"; break; } mpr_dprint(sc, MPR_LOG, "log_info(0x%08x): originator(%s), " "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode); } static void mpr_display_reply_info(struct mpr_softc *sc, uint8_t *reply) { MPI2DefaultReply_t *mpi_reply; u16 sc_status; mpi_reply = (MPI2DefaultReply_t*)reply; sc_status = le16toh(mpi_reply->IOCStatus); if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) mpr_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); } void mpr_intr(void *data) { struct mpr_softc *sc; uint32_t status; sc = (struct mpr_softc *)data; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* * Check interrupt status register to flush the bus. This is * needed for both INTx interrupts and driver-driven polling */ status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) return; mpr_lock(sc); mpr_intr_locked(data); mpr_unlock(sc); return; } /* * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the * chip. Hopefully this theory is correct. */ void mpr_intr_msi(void *data) { struct mpr_softc *sc; sc = (struct mpr_softc *)data; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); mpr_lock(sc); mpr_intr_locked(data); mpr_unlock(sc); return; } /* * The locking is overly broad and simplistic, but easy to deal with for now. */ void mpr_intr_locked(void *data) { MPI2_REPLY_DESCRIPTORS_UNION *desc; struct mpr_softc *sc; struct mpr_command *cm = NULL; uint8_t flags; u_int pq; MPI2_DIAG_RELEASE_REPLY *rel_rep; mpr_fw_diagnostic_buffer_t *pBuffer; sc = (struct mpr_softc *)data; pq = sc->replypostindex; mpr_dprint(sc, MPR_TRACE, "%s sc %p starting with replypostindex %u\n", __func__, sc, sc->replypostindex); for ( ;; ) { cm = NULL; desc = &sc->post_queue[sc->replypostindex]; flags = desc->Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || (le32toh(desc->Words.High) == 0xffffffff)) break; /* increment the replypostindex now, so that event handlers * and cm completion handlers which decide to do a diag * reset can zero it without it getting incremented again * afterwards, and we break out of this loop on the next * iteration since the reply post queue has been cleared to * 0xFF and all descriptors look unused (which they are). */ if (++sc->replypostindex >= sc->pqdepth) sc->replypostindex = 0; switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS: case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: { uint32_t baddr; uint8_t *reply; /* * Re-compose the reply address from the address * sent back from the chip. The ReplyFrameAddress * is the lower 32 bits of the physical address of * particular reply frame. Convert that address to * host format, and then use that to provide the * offset against the virtual address base * (sc->reply_frames). */ baddr = le32toh(desc->AddressReply.ReplyFrameAddress); reply = sc->reply_frames + (baddr - ((uint32_t)sc->reply_busaddr)); /* * Make sure the reply we got back is in a valid * range. If not, go ahead and panic here, since * we'll probably panic as soon as we deference the * reply pointer anyway. */ if ((reply < sc->reply_frames) || (reply > (sc->reply_frames + (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) { printf("%s: WARNING: reply %p out of range!\n", __func__, reply); printf("%s: reply_frames %p, fqdepth %d, " "frame size %d\n", __func__, sc->reply_frames, sc->fqdepth, sc->facts->ReplyFrameSize * 4); printf("%s: baddr %#x,\n", __func__, baddr); /* LSI-TODO. See Linux Code for Graceful exit */ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { if (((MPI2_DEFAULT_REPLY *)reply)->Function == MPI2_FUNCTION_DIAG_BUFFER_POST) { /* * If SMID is 0 for Diag Buffer Post, * this implies that the reply is due to * a release function with a status that * the buffer has been released. Set * the buffer flags accordingly. */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; if ((le16toh(rel_rep->IOCStatus) & MPI2_IOCSTATUS_MASK) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; pBuffer->valid_data = TRUE; pBuffer->owned_by_firmware = FALSE; pBuffer->immediate = FALSE; } } else mpr_dispatch_event(sc, baddr, (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; cm->cm_reply = reply; cm->cm_reply_data = le32toh(desc->AddressReply. ReplyFrameAddress); } break; } case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: default: /* Unhandled */ mpr_dprint(sc, MPR_ERROR, "Unhandled reply 0x%x\n", desc->Default.ReplyFlags); cm = NULL; break; } if (cm != NULL) { // Print Error reply frame if (cm->cm_reply) mpr_display_reply_info(sc,cm->cm_reply); mpr_complete_command(sc, cm); } desc->Words.Low = 0xffffffff; desc->Words.High = 0xffffffff; } if (pq != sc->replypostindex) { mpr_dprint(sc, MPR_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); } return; } static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mpr_event_handle *eh; int event, handled = 0; event = le16toh(reply->Event); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { if (isset(eh->mask, event)) { eh->callback(sc, data, reply); handled++; } } if (handled == 0) mpr_dprint(sc, MPR_EVENT, "Unhandled event 0x%x\n", le16toh(event)); /* * This is the only place that the event/reply should be freed. * Anything wanting to hold onto the event data should have * already copied it into their own storage. */ mpr_free_reply(sc, data); } static void mpr_reregister_events_complete(struct mpr_softc *sc, struct mpr_command *cm) { mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if (cm->cm_reply) MPR_DPRINT_EVENT(sc, generic, (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); mpr_free_command(sc, cm); /* next, send a port enable */ mprsas_startup(sc); } /* * For both register_events and update_events, the caller supplies a bitmap * of events that it _wants_. These functions then turn that into a bitmask * suitable for the controller. */ int mpr_register_events(struct mpr_softc *sc, uint8_t *mask, mpr_evt_callback_t *cb, void *data, struct mpr_event_handle **handle) { struct mpr_event_handle *eh; int error = 0; eh = malloc(sizeof(struct mpr_event_handle), M_MPR, M_WAITOK|M_ZERO); if (!eh) { mpr_dprint(sc, MPR_EVENT|MPR_ERROR, "Cannot allocate event memory\n"); return (ENOMEM); } eh->callback = cb; eh->data = data; TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); if (mask != NULL) error = mpr_update_events(sc, eh, mask); *handle = eh; return (error); } int mpr_update_events(struct mpr_softc *sc, struct mpr_event_handle *handle, uint8_t *mask) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; MPI2_EVENT_NOTIFICATION_REPLY *reply = NULL; struct mpr_command *cm = NULL; struct mpr_event_handle *eh; int error, i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); if ((mask != NULL) && (handle != NULL)) bcopy(mask, &handle->mask[0], 16); memset(sc->event_mask, 0xff, 16); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < 16; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mpr_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPR_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); } #else bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; error = mpr_request_polled(sc, &cm); if (cm != NULL) reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; if ((reply == NULL) || (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; if (reply) MPR_DPRINT_EVENT(sc, generic, reply); mpr_dprint(sc, MPR_TRACE, "%s finished error %d\n", __func__, error); if (cm != NULL) mpr_free_command(sc, cm); return (error); } static int mpr_reregister_events(struct mpr_softc *sc) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; struct mpr_command *cm; struct mpr_event_handle *eh; int error, i; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* first, reregister events */ memset(sc->event_mask, 0xff, 16); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < 16; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mpr_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPR_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); } #else bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; cm->cm_complete = mpr_reregister_events_complete; error = mpr_map_command(sc, cm); mpr_dprint(sc, MPR_TRACE, "%s finished with error %d\n", __func__, error); return (error); } int mpr_deregister_events(struct mpr_softc *sc, struct mpr_event_handle *handle) { TAILQ_REMOVE(&sc->event_list, handle, eh_list); free(handle, M_MPR); return (mpr_update_events(sc, NULL, NULL)); } /** * mpr_build_nvme_prp - This function is called for NVMe end devices to build a * native SGL (NVMe PRP). The native SGL is built starting in the first PRP entry * of the NVMe message (PRP1). If the data buffer is small enough to be described * entirely using PRP1, then PRP2 is not used. If needed, PRP2 is used to * describe a larger data buffer. If the data buffer is too large to describe * using the two PRP entriess inside the NVMe message, then PRP1 describes the * first data memory segment, and PRP2 contains a pointer to a PRP list located * elsewhere in memory to describe the remaining data memory segments. The PRP * list will be contiguous. * The native SGL for NVMe devices is a Physical Region Page (PRP). A PRP * consists of a list of PRP entries to describe a number of noncontigous * physical memory segments as a single memory buffer, just as a SGL does. Note * however, that this function is only used by the IOCTL call, so the memory * given will be guaranteed to be contiguous. There is no need to translate * non-contiguous SGL into a PRP in this case. All PRPs will describe contiguous * space that is one page size each. * * Each NVMe message contains two PRP entries. The first (PRP1) either contains * a PRP list pointer or a PRP element, depending upon the command. PRP2 contains * the second PRP element if the memory being described fits within 2 PRP * entries, or a PRP list pointer if the PRP spans more than two entries. * * A PRP list pointer contains the address of a PRP list, structured as a linear * array of PRP entries. Each PRP entry in this list describes a segment of * physical memory. * * Each 64-bit PRP entry comprises an address and an offset field. The address * always points to the beginning of a PAGE_SIZE physical memory page, and the * offset describes where within that page the memory segment begins. Only the * first element in a PRP list may contain a non-zero offest, implying that all * memory segments following the first begin at the start of a PAGE_SIZE page. * * Each PRP element normally describes a chunck of PAGE_SIZE physical memory, * with exceptions for the first and last elements in the list. If the memory * being described by the list begins at a non-zero offset within the first page, * then the first PRP element will contain a non-zero offset indicating where the * region begins within the page. The last memory segment may end before the end * of the PAGE_SIZE segment, depending upon the overall size of the memory being * described by the PRP list. * * Since PRP entries lack any indication of size, the overall data buffer length * is used to determine where the end of the data memory buffer is located, and * how many PRP entries are required to describe it. * * Returns nothing. */ void mpr_build_nvme_prp(struct mpr_softc *sc, struct mpr_command *cm, Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request, void *data, uint32_t data_in_sz, uint32_t data_out_sz) { int prp_size = PRP_ENTRY_SIZE; uint64_t *prp_entry, *prp1_entry, *prp2_entry; uint64_t *prp_entry_phys, *prp_page, *prp_page_phys; uint32_t offset, entry_len, page_mask_result, page_mask; bus_addr_t paddr; size_t length; struct mpr_prp_page *prp_page_info = NULL; /* * Not all commands require a data transfer. If no data, just return * without constructing any PRP. */ if (!data_in_sz && !data_out_sz) return; /* * Set pointers to PRP1 and PRP2, which are in the NVMe command. PRP1 is * located at a 24 byte offset from the start of the NVMe command. Then * set the current PRP entry pointer to PRP1. */ prp1_entry = (uint64_t *)(nvme_encap_request->NVMe_Command + NVME_CMD_PRP1_OFFSET); prp2_entry = (uint64_t *)(nvme_encap_request->NVMe_Command + NVME_CMD_PRP2_OFFSET); prp_entry = prp1_entry; /* * For the PRP entries, use the specially allocated buffer of * contiguous memory. PRP Page allocation failures should not happen * because there should be enough PRP page buffers to account for the * possible NVMe QDepth. */ prp_page_info = mpr_alloc_prp_page(sc); KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be " "used for building a native NVMe SGL.\n", __func__)); prp_page = (uint64_t *)prp_page_info->prp_page; prp_page_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr; /* * Insert the allocated PRP page into the command's PRP page list. This * will be freed when the command is freed. */ TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link); /* * Check if we are within 1 entry of a page boundary we don't want our * first entry to be a PRP List entry. */ page_mask = PAGE_SIZE - 1; page_mask_result = (uintptr_t)((uint8_t *)prp_page + prp_size) & page_mask; if (!page_mask_result) { /* Bump up to next page boundary. */ prp_page = (uint64_t *)((uint8_t *)prp_page + prp_size); prp_page_phys = (uint64_t *)((uint8_t *)prp_page_phys + prp_size); } /* * Set PRP physical pointer, which initially points to the current PRP * DMA memory page. */ prp_entry_phys = prp_page_phys; /* Get physical address and length of the data buffer. */ paddr = (bus_addr_t)data; if (data_in_sz) length = data_in_sz; else length = data_out_sz; /* Loop while the length is not zero. */ while (length) { /* * Check if we need to put a list pointer here if we are at page * boundary - prp_size (8 bytes). */ page_mask_result = (uintptr_t)((uint8_t *)prp_entry_phys + prp_size) & page_mask; if (!page_mask_result) { /* * This is the last entry in a PRP List, so we need to * put a PRP list pointer here. What this does is: * - bump the current memory pointer to the next * address, which will be the next full page. * - set the PRP Entry to point to that page. This is * now the PRP List pointer. * - bump the PRP Entry pointer the start of the next * page. Since all of this PRP memory is contiguous, * no need to get a new page - it's just the next * address. */ prp_entry_phys++; *prp_entry = htole64((uint64_t)(uintptr_t)prp_entry_phys); prp_entry++; } /* Need to handle if entry will be part of a page. */ offset = (uint32_t)paddr & page_mask; entry_len = PAGE_SIZE - offset; if (prp_entry == prp1_entry) { /* * Must fill in the first PRP pointer (PRP1) before * moving on. */ *prp1_entry = htole64((uint64_t)paddr); /* * Now point to the second PRP entry within the * command (PRP2). */ prp_entry = prp2_entry; } else if (prp_entry == prp2_entry) { /* * Should the PRP2 entry be a PRP List pointer or just a * regular PRP pointer? If there is more than one more * page of data, must use a PRP List pointer. */ if (length > PAGE_SIZE) { /* * PRP2 will contain a PRP List pointer because * more PRP's are needed with this command. The * list will start at the beginning of the * contiguous buffer. */ *prp2_entry = htole64( (uint64_t)(uintptr_t)prp_entry_phys); /* * The next PRP Entry will be the start of the * first PRP List. */ prp_entry = prp_page; } else { /* * After this, the PRP Entries are complete. * This command uses 2 PRP's and no PRP list. */ *prp2_entry = htole64((uint64_t)paddr); } } else { /* * Put entry in list and bump the addresses. * * After PRP1 and PRP2 are filled in, this will fill in * all remaining PRP entries in a PRP List, one per each * time through the loop. */ *prp_entry = htole64((uint64_t)paddr); prp_entry++; prp_entry_phys++; } /* * Bump the phys address of the command's data buffer by the * entry_len. */ paddr += entry_len; /* Decrement length accounting for last partial page. */ if (entry_len > length) length = 0; else length -= entry_len; } } /* * mpr_check_pcie_native_sgl - This function is called for PCIe end devices to * determine if the driver needs to build a native SGL. If so, that native SGL * is built in the contiguous buffers allocated especially for PCIe SGL * creation. If the driver will not build a native SGL, return TRUE and a * normal IEEE SGL will be built. Currently this routine supports NVMe devices * only. * * Returns FALSE (0) if native SGL was built, TRUE (1) if no SGL was built. */ static int mpr_check_pcie_native_sgl(struct mpr_softc *sc, struct mpr_command *cm, bus_dma_segment_t *segs, int segs_left) { uint32_t i, sge_dwords, length, offset, entry_len; uint32_t num_entries, buff_len = 0, sges_in_segment; uint32_t page_mask, page_mask_result, *curr_buff; uint32_t *ptr_sgl, *ptr_first_sgl, first_page_offset; uint32_t first_page_data_size, end_residual; uint64_t *msg_phys; bus_addr_t paddr; int build_native_sgl = 0, first_prp_entry; int prp_size = PRP_ENTRY_SIZE; Mpi25IeeeSgeChain64_t *main_chain_element = NULL; struct mpr_prp_page *prp_page_info = NULL; mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); /* * Add up the sizes of each segment length to get the total transfer * size, which will be checked against the Maximum Data Transfer Size. * If the data transfer length exceeds the MDTS for this device, just * return 1 so a normal IEEE SGL will be built. F/W will break the I/O * up into multiple I/O's. [nvme_mdts = 0 means unlimited] */ for (i = 0; i < segs_left; i++) buff_len += htole32(segs[i].ds_len); if ((cm->cm_targ->MDTS > 0) && (buff_len > cm->cm_targ->MDTS)) return 1; /* Create page_mask (to get offset within page) */ page_mask = PAGE_SIZE - 1; /* * Check if the number of elements exceeds the max number that can be * put in the main message frame (H/W can only translate an SGL that * is contained entirely in the main message frame). */ sges_in_segment = (sc->facts->IOCRequestFrameSize - offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof(MPI25_SGE_IO_UNION); if (segs_left > sges_in_segment) build_native_sgl = 1; else { /* * NVMe uses one PRP for each physical page (or part of physical * page). * if 4 pages or less then IEEE is OK * if > 5 pages then we need to build a native SGL * if > 4 and <= 5 pages, then check the physical address of * the first SG entry, then if this first size in the page * is >= the residual beyond 4 pages then use IEEE, * otherwise use native SGL */ if (buff_len > (PAGE_SIZE * 5)) build_native_sgl = 1; else if ((buff_len > (PAGE_SIZE * 4)) && (buff_len <= (PAGE_SIZE * 5)) ) { msg_phys = (uint64_t *)segs[0].ds_addr; first_page_offset = ((uint32_t)(uint64_t)(uintptr_t)msg_phys & page_mask); first_page_data_size = PAGE_SIZE - first_page_offset; end_residual = buff_len % PAGE_SIZE; /* * If offset into first page pushes the end of the data * beyond end of the 5th page, we need the extra PRP * list. */ if (first_page_data_size < end_residual) build_native_sgl = 1; /* * Check if first SG entry size is < residual beyond 4 * pages. */ if (htole32(segs[0].ds_len) < (buff_len - (PAGE_SIZE * 4))) build_native_sgl = 1; } } /* check if native SGL is needed */ if (!build_native_sgl) return 1; /* * Native SGL is needed. * Put a chain element in main message frame that points to the first * chain buffer. * * NOTE: The ChainOffset field must be 0 when using a chain pointer to * a native SGL. */ /* Set main message chain element pointer */ main_chain_element = (pMpi25IeeeSgeChain64_t)cm->cm_sge; /* * For NVMe the chain element needs to be the 2nd SGL entry in the main * message. */ main_chain_element = (Mpi25IeeeSgeChain64_t *) ((uint8_t *)main_chain_element + sizeof(MPI25_IEEE_SGE_CHAIN64)); /* * For the PRP entries, use the specially allocated buffer of * contiguous memory. PRP Page allocation failures should not happen * because there should be enough PRP page buffers to account for the * possible NVMe QDepth. */ prp_page_info = mpr_alloc_prp_page(sc); KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be " "used for building a native NVMe SGL.\n", __func__)); curr_buff = (uint32_t *)prp_page_info->prp_page; msg_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr; /* * Insert the allocated PRP page into the command's PRP page list. This * will be freed when the command is freed. */ TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link); /* * Check if we are within 1 entry of a page boundary we don't want our * first entry to be a PRP List entry. */ page_mask_result = (uintptr_t)((uint8_t *)curr_buff + prp_size) & page_mask; if (!page_mask_result) { /* Bump up to next page boundary. */ curr_buff = (uint32_t *)((uint8_t *)curr_buff + prp_size); msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); } /* Fill in the chain element and make it an NVMe segment type. */ main_chain_element->Address.High = htole32((uint32_t)((uint64_t)(uintptr_t)msg_phys >> 32)); main_chain_element->Address.Low = htole32((uint32_t)(uintptr_t)msg_phys); main_chain_element->NextChainOffset = 0; main_chain_element->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP; /* Set SGL pointer to start of contiguous PCIe buffer. */ ptr_sgl = curr_buff; sge_dwords = 2; num_entries = 0; /* * NVMe has a very convoluted PRP format. One PRP is required for each * page or partial page. We need to split up OS SG entries if they are * longer than one page or cross a page boundary. We also have to insert * a PRP list pointer entry as the last entry in each physical page of * the PRP list. * * NOTE: The first PRP "entry" is actually placed in the first SGL entry * in the main message in IEEE 64 format. The 2nd entry in the main * message is the chain element, and the rest of the PRP entries are * built in the contiguous PCIe buffer. */ first_prp_entry = 1; ptr_first_sgl = (uint32_t *)cm->cm_sge; for (i = 0; i < segs_left; i++) { /* Get physical address and length of this SG entry. */ paddr = segs[i].ds_addr; length = segs[i].ds_len; /* * Check whether a given SGE buffer lies on a non-PAGED * boundary if this is not the first page. If so, this is not * expected so have FW build the SGL. */ if ((i != 0) && (((uint32_t)paddr & page_mask) != 0)) { mpr_dprint(sc, MPR_ERROR, "Unaligned SGE while " "building NVMe PRPs, low address is 0x%x\n", (uint32_t)paddr); return 1; } /* Apart from last SGE, if any other SGE boundary is not page * aligned then it means that hole exists. Existence of hole * leads to data corruption. So fallback to IEEE SGEs. */ if (i != (segs_left - 1)) { if (((uint32_t)paddr + length) & page_mask) { mpr_dprint(sc, MPR_ERROR, "Unaligned SGE " "boundary while building NVMe PRPs, low " "address: 0x%x and length: %u\n", (uint32_t)paddr, length); return 1; } } /* Loop while the length is not zero. */ while (length) { /* * Check if we need to put a list pointer here if we are * at page boundary - prp_size. */ page_mask_result = (uintptr_t)((uint8_t *)ptr_sgl + prp_size) & page_mask; if (!page_mask_result) { /* * Need to put a PRP list pointer here. */ msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); *ptr_sgl = htole32((uintptr_t)msg_phys); *(ptr_sgl+1) = htole32((uint64_t)(uintptr_t) msg_phys >> 32); ptr_sgl += sge_dwords; num_entries++; } /* Need to handle if entry will be part of a page. */ offset = (uint32_t)paddr & page_mask; entry_len = PAGE_SIZE - offset; if (first_prp_entry) { /* * Put IEEE entry in first SGE in main message. * (Simple element, System addr, not end of * list.) */ *ptr_first_sgl = htole32((uint32_t)paddr); *(ptr_first_sgl + 1) = htole32((uint32_t)((uint64_t)paddr >> 32)); *(ptr_first_sgl + 2) = htole32(entry_len); *(ptr_first_sgl + 3) = 0; /* No longer the first PRP entry. */ first_prp_entry = 0; } else { /* Put entry in list. */ *ptr_sgl = htole32((uint32_t)paddr); *(ptr_sgl + 1) = htole32((uint32_t)((uint64_t)paddr >> 32)); /* Bump ptr_sgl, msg_phys, and num_entries. */ ptr_sgl += sge_dwords; msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); num_entries++; } /* Bump the phys address by the entry_len. */ paddr += entry_len; /* Decrement length accounting for last partial page. */ if (entry_len > length) length = 0; else length -= entry_len; } } /* Set chain element Length. */ main_chain_element->Length = htole32(num_entries * prp_size); /* Return 0, indicating we built a native SGL. */ return 0; } /* * Add a chain element as the next SGE for the specified command. * Reset cm_sge and cm_sgesize to indicate all the available space. Chains are * only required for IEEE commands. Therefore there is no code for commands * that have the MPR_CM_FLAGS_SGE_SIMPLE flag set (and those commands * shouldn't be requesting chains). */ static int mpr_add_chain(struct mpr_command *cm, int segsleft) { struct mpr_softc *sc = cm->cm_sc; MPI2_REQUEST_HEADER *req; MPI25_IEEE_SGE_CHAIN64 *ieee_sgc; struct mpr_chain *chain; int sgc_size, current_segs, rem_segs, segs_per_frame; uint8_t next_chain_offset = 0; /* * Fail if a command is requesting a chain for SIMPLE SGE's. For SAS3 * only IEEE commands should be requesting chains. Return some error * code other than 0. */ if (cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE) { mpr_dprint(sc, MPR_ERROR, "A chain element cannot be added to " "an MPI SGL.\n"); return(ENOBUFS); } sgc_size = sizeof(MPI25_IEEE_SGE_CHAIN64); if (cm->cm_sglsize < sgc_size) panic("MPR: Need SGE Error Code\n"); chain = mpr_alloc_chain(cm->cm_sc); if (chain == NULL) return (ENOBUFS); /* * Note: a double-linked list is used to make it easier to walk for * debugging. */ TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); /* * Need to know if the number of frames left is more than 1 or not. If * more than 1 frame is required, NextChainOffset will need to be set, * which will just be the last segment of the frame. */ rem_segs = 0; if (cm->cm_sglsize < (sgc_size * segsleft)) { /* * rem_segs is the number of segements remaining after the * segments that will go into the current frame. Since it is * known that at least one more frame is required, account for * the chain element. To know if more than one more frame is * required, just check if there will be a remainder after using * the current frame (with this chain) and the next frame. If * so the NextChainOffset must be the last element of the next * frame. */ current_segs = (cm->cm_sglsize / sgc_size) - 1; rem_segs = segsleft - current_segs; segs_per_frame = sc->chain_frame_size / sgc_size; if (rem_segs > segs_per_frame) { next_chain_offset = segs_per_frame - 1; } } ieee_sgc = &((MPI25_SGE_IO_UNION *)cm->cm_sge)->IeeeChain; ieee_sgc->Length = next_chain_offset ? htole32((uint32_t)sc->chain_frame_size) : htole32((uint32_t)rem_segs * (uint32_t)sgc_size); ieee_sgc->NextChainOffset = next_chain_offset; ieee_sgc->Flags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); ieee_sgc->Address.Low = htole32(chain->chain_busaddr); ieee_sgc->Address.High = htole32(chain->chain_busaddr >> 32); cm->cm_sge = &((MPI25_SGE_IO_UNION *)chain->chain)->IeeeSimple; req = (MPI2_REQUEST_HEADER *)cm->cm_req; req->ChainOffset = (sc->chain_frame_size - sgc_size) >> 4; cm->cm_sglsize = sc->chain_frame_size; return (0); } /* * Add one scatter-gather element to the scatter-gather list for a command. * Maintain cm_sglsize and cm_sge as the remaining size and pointer to the * next SGE to fill in, respectively. In Gen3, the MPI SGL does not have a * chain, so don't consider any chain additions. */ int mpr_push_sge(struct mpr_command *cm, MPI2_SGE_SIMPLE64 *sge, size_t len, int segsleft) { uint32_t saved_buf_len, saved_address_low, saved_address_high; u32 sge_flags; /* * case 1: >=1 more segment, no room for anything (error) * case 2: 1 more segment and enough room for it */ if (cm->cm_sglsize < (segsleft * sizeof(MPI2_SGE_SIMPLE64))) { mpr_dprint(cm->cm_sc, MPR_ERROR, "%s: warning: Not enough room for MPI SGL in frame.\n", __func__); return(ENOBUFS); } KASSERT(segsleft == 1, ("segsleft cannot be more than 1 for an MPI SGL; segsleft = %d\n", segsleft)); /* * There is one more segment left to add for the MPI SGL and there is * enough room in the frame to add it. This is the normal case because * MPI SGL's don't have chains, otherwise something is wrong. * * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->FlagsLength & 0x00FFFFFF; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->FlagsLength = cm->cm_out_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sge, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); } sge->FlagsLength = saved_buf_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); if (cm->cm_flags & MPR_CM_FLAGS_DATAIN) { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << MPI2_SGE_FLAGS_SHIFT); } else { sge->FlagsLength |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << MPI2_SGE_FLAGS_SHIFT); } sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; cm->cm_sglsize -= len; /* Endian Safe code */ sge_flags = sge->FlagsLength; sge->FlagsLength = htole32(sge_flags); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sge, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (0); } /* * Add one IEEE scatter-gather element (chain or simple) to the IEEE scatter- * gather list for a command. Maintain cm_sglsize and cm_sge as the * remaining size and pointer to the next SGE to fill in, respectively. */ int mpr_push_ieee_sge(struct mpr_command *cm, void *sgep, int segsleft) { MPI2_IEEE_SGE_SIMPLE64 *sge = sgep; int error, ieee_sge_size = sizeof(MPI25_SGE_IO_UNION); uint32_t saved_buf_len, saved_address_low, saved_address_high; uint32_t sge_length; /* * case 1: No room for chain or segment (error). * case 2: Two or more segments left but only room for chain. * case 3: Last segment and room for it, so set flags. */ /* * There should be room for at least one element, or there is a big * problem. */ if (cm->cm_sglsize < ieee_sge_size) panic("MPR: Need SGE Error Code\n"); if ((segsleft >= 2) && (cm->cm_sglsize < (ieee_sge_size * 2))) { if ((error = mpr_add_chain(cm, segsleft)) != 0) return (error); } if (segsleft == 1) { /* * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = sge->Length; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->Length = cm->cm_out_len; sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); cm->cm_sglsize -= ieee_sge_size; /* Endian Safe code */ sge_length = sge->Length; sge->Length = htole32(sge_length); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, ieee_sge_size); cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + ieee_sge_size); } sge->Length = saved_buf_len; sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | MPI25_IEEE_SGE_FLAGS_END_OF_LIST); sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; } cm->cm_sglsize -= ieee_sge_size; /* Endian Safe code */ sge_length = sge->Length; sge->Length = htole32(sge_length); sge->Address.High = htole32(sge->Address.High); sge->Address.Low = htole32(sge->Address.Low); bcopy(sgep, cm->cm_sge, ieee_sge_size); cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + ieee_sge_size); return (0); } /* * Add one dma segment to the scatter-gather list for a command. */ int mpr_add_dmaseg(struct mpr_command *cm, vm_paddr_t pa, size_t len, u_int flags, int segsleft) { MPI2_SGE_SIMPLE64 sge; MPI2_IEEE_SGE_SIMPLE64 ieee_sge; if (!(cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE)) { ieee_sge.Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); ieee_sge.Length = len; mpr_from_u64(pa, &ieee_sge.Address); return (mpr_push_ieee_sge(cm, &ieee_sge, segsleft)); } else { /* * This driver always uses 64-bit address elements for * simplicity. */ flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING; /* Set Endian safe macro in mpr_push_sge */ sge.FlagsLength = len | (flags << MPI2_SGE_FLAGS_SHIFT); mpr_from_u64(pa, &sge.Address); return (mpr_push_sge(cm, &sge, sizeof sge, segsleft)); } } static void mpr_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mpr_softc *sc; struct mpr_command *cm; u_int i, dir, sflags; cm = (struct mpr_command *)arg; sc = cm->cm_sc; /* * In this case, just print out a warning and let the chip tell the * user they did the wrong thing. */ if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { mpr_dprint(sc, MPR_ERROR, "%s: warning: busdma returned %d " "segments, more than the %d allowed\n", __func__, nsegs, cm->cm_max_segs); } /* * Set up DMA direction flags. Bi-directional requests are also handled * here. In that case, both direction flags will be set. */ sflags = 0; if (cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) { /* * We have to add a special case for SMP passthrough, there * is no easy way to generically handle it. The first * S/G element is used for the command (therefore the * direction bit needs to be set). The second one is used * for the reply. We'll leave it to the caller to make * sure we only have two buffers. */ /* * Even though the busdma man page says it doesn't make * sense to have both direction flags, it does in this case. * We have one s/g element being accessed in each direction. */ dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; /* * Set the direction flag on the first buffer in the SMP * passthrough request. We'll clear it for the second one. */ sflags |= MPI2_SGE_FLAGS_DIRECTION | MPI2_SGE_FLAGS_END_OF_BUFFER; } else if (cm->cm_flags & MPR_CM_FLAGS_DATAOUT) { sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; dir = BUS_DMASYNC_PREWRITE; } else dir = BUS_DMASYNC_PREREAD; /* Check if a native SG list is needed for an NVMe PCIe device. */ if (cm->cm_targ && cm->cm_targ->is_nvme && mpr_check_pcie_native_sgl(sc, cm, segs, nsegs) == 0) { /* A native SG list was built, skip to end. */ goto out; } for (i = 0; i < nsegs; i++) { if ((cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) && (i != 0)) { sflags &= ~MPI2_SGE_FLAGS_DIRECTION; } error = mpr_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, sflags, nsegs - i); if (error != 0) { /* Resource shortage, roll back! */ if (ratecheck(&sc->lastfail, &mpr_chainfail_interval)) mpr_dprint(sc, MPR_INFO, "Out of chain frames, " "consider increasing hw.mpr.max_chains.\n"); cm->cm_flags |= MPR_CM_FLAGS_CHAIN_FAILED; mpr_complete_command(sc, cm); return; } } out: bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); mpr_enqueue_request(sc, cm); return; } static void mpr_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { mpr_data_cb(arg, segs, nsegs, error); } /* * This is the routine to enqueue commands ansynchronously. * Note that the only error path here is from bus_dmamap_load(), which can * return EINPROGRESS if it is waiting for resources. Other than this, it's * assumed that if you have a command in-hand, then you have enough credits * to use it. */ int mpr_map_command(struct mpr_softc *sc, struct mpr_command *cm) { int error = 0; if (cm->cm_flags & MPR_CM_FLAGS_USE_UIO) { error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, &cm->cm_uio, mpr_data_cb2, cm, 0); } else if (cm->cm_flags & MPR_CM_FLAGS_USE_CCB) { error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, mpr_data_cb, cm, 0); } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_length, mpr_data_cb, cm, 0); } else { /* Add a zero-length element as needed */ if (cm->cm_sge != NULL) mpr_add_dmaseg(cm, 0, 0, 0, 1); mpr_enqueue_request(sc, cm); } return (error); } /* * This is the routine to enqueue commands synchronously. An error of * EINPROGRESS from mpr_map_command() is ignored since the command will * be executed and enqueued automatically. Other errors come from msleep(). */ int mpr_wait_command(struct mpr_softc *sc, struct mpr_command **cmp, int timeout, int sleep_flag) { int error, rc; struct timeval cur_time, start_time; struct mpr_command *cm = *cmp; if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) return EBUSY; cm->cm_complete = NULL; cm->cm_flags |= (MPR_CM_FLAGS_WAKEUP + MPR_CM_FLAGS_POLLED); error = mpr_map_command(sc, cm); if ((error != 0) && (error != EINPROGRESS)) return (error); // Check for context and wait for 50 mSec at a time until time has // expired or the command has finished. If msleep can't be used, need // to poll. #if __FreeBSD_version >= 1000029 if (curthread->td_no_sleeping) #else //__FreeBSD_version < 1000029 if (curthread->td_pflags & TDP_NOSLEEPING) #endif //__FreeBSD_version >= 1000029 sleep_flag = NO_SLEEP; getmicrouptime(&start_time); if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) { error = msleep(cm, &sc->mpr_mtx, 0, "mprwait", timeout*hz); if (error == EWOULDBLOCK) { /* * Record the actual elapsed time in the case of a * timeout for the message below. */ getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); } } else { while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { mpr_intr_locked(sc); if (sleep_flag == CAN_SLEEP) pause("mprwait", hz/20); else DELAY(50000); getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); if (cur_time.tv_sec > timeout) { error = EWOULDBLOCK; break; } } } if (error == EWOULDBLOCK) { mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s, timeout=%d," " elapsed=%jd\n", __func__, timeout, (intmax_t)cur_time.tv_sec); rc = mpr_reinit(sc); mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) { /* * Tell the caller that we freed the command in a * reinit. */ *cmp = NULL; } error = ETIMEDOUT; } return (error); } /* * This is the routine to enqueue a command synchonously and poll for * completion. Its use should be rare. */ int mpr_request_polled(struct mpr_softc *sc, struct mpr_command **cmp) { int error, rc; struct timeval cur_time, start_time; struct mpr_command *cm = *cmp; error = 0; cm->cm_flags |= MPR_CM_FLAGS_POLLED; cm->cm_complete = NULL; mpr_map_command(sc, cm); getmicrouptime(&start_time); while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { mpr_intr_locked(sc); if (mtx_owned(&sc->mpr_mtx)) msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprpoll", hz/20); else pause("mprpoll", hz/20); /* * Check for real-time timeout and fail if more than 60 seconds. */ getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); if (cur_time.tv_sec > 60) { mpr_dprint(sc, MPR_FAULT, "polling failed\n"); error = ETIMEDOUT; break; } } if (error) { mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s\n", __func__); rc = mpr_reinit(sc); mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) { /* * Tell the caller that we freed the command in a * reinit. */ *cmp = NULL; } } return (error); } /* * The MPT driver had a verbose interface for config pages. In this driver, * reduce it to much simpler terms, similar to the Linux driver. */ int mpr_read_config_page(struct mpr_softc *sc, struct mpr_config_params *params) { MPI2_CONFIG_REQUEST *req; struct mpr_command *cm; int error; if (sc->mpr_flags & MPR_FLAGS_BUSY) { return (EBUSY); } cm = mpr_alloc_command(sc); if (cm == NULL) { return (EBUSY); } req = (MPI2_CONFIG_REQUEST *)cm->cm_req; req->Function = MPI2_FUNCTION_CONFIG; req->Action = params->action; req->SGLFlags = 0; req->ChainOffset = 0; req->PageAddress = params->page_address; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Ext; req->ExtPageType = hdr->ExtPageType; req->ExtPageLength = hdr->ExtPageLength; req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; req->Header.PageLength = 0; /* Must be set to zero */ req->Header.PageNumber = hdr->PageNumber; req->Header.PageVersion = hdr->PageVersion; } else { MPI2_CONFIG_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Struct; req->Header.PageType = hdr->PageType; req->Header.PageNumber = hdr->PageNumber; req->Header.PageLength = hdr->PageLength; req->Header.PageVersion = hdr->PageVersion; } cm->cm_data = params->buffer; cm->cm_length = params->length; if (cm->cm_data != NULL) { cm->cm_sge = &req->PageBufferSGE; cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); cm->cm_flags = MPR_CM_FLAGS_SGE_SIMPLE | MPR_CM_FLAGS_DATAIN; } else cm->cm_sge = NULL; cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_complete_data = params; if (params->callback != NULL) { cm->cm_complete = mpr_config_complete; return (mpr_map_command(sc, cm)); } else { error = mpr_wait_command(sc, &cm, 0, CAN_SLEEP); if (error) { mpr_dprint(sc, MPR_FAULT, "Error %d reading config page\n", error); if (cm != NULL) mpr_free_command(sc, cm); return (error); } mpr_config_complete(sc, cm); } return (0); } int mpr_write_config_page(struct mpr_softc *sc, struct mpr_config_params *params) { return (EINVAL); } static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm) { MPI2_CONFIG_REPLY *reply; struct mpr_config_params *params; MPR_FUNCTRACE(sc); params = cm->cm_complete_data; if (cm->cm_data != NULL) { bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); } /* * XXX KDM need to do more error recovery? This results in the * device in question not getting probed. */ if ((cm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; if (reply == NULL) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } params->status = reply->IOCStatus; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { params->hdr.Ext.ExtPageType = reply->ExtPageType; params->hdr.Ext.ExtPageLength = reply->ExtPageLength; params->hdr.Ext.PageType = reply->Header.PageType; params->hdr.Ext.PageNumber = reply->Header.PageNumber; params->hdr.Ext.PageVersion = reply->Header.PageVersion; } else { params->hdr.Struct.PageType = reply->Header.PageType; params->hdr.Struct.PageNumber = reply->Header.PageNumber; params->hdr.Struct.PageLength = reply->Header.PageLength; params->hdr.Struct.PageVersion = reply->Header.PageVersion; } done: mpr_free_command(sc, cm); if (params->callback != NULL) params->callback(sc, params); return; } Index: projects/runtime-coverage/sys/dev/mps/mps.c =================================================================== --- projects/runtime-coverage/sys/dev/mps/mps.c (revision 323974) +++ projects/runtime-coverage/sys/dev/mps/mps.c (revision 323975) @@ -1,2914 +1,3020 @@ /*- * Copyright (c) 2009 Yahoo! Inc. * Copyright (c) 2011-2015 LSI Corp. * Copyright (c) 2013-2015 Avago Technologies * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Avago Technologies (LSI) MPT-Fusion Host Adapter FreeBSD * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* Communications core for Avago Technologies (LSI) MPT2 */ /* TODO Move headers to mpsvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mps_diag_reset(struct mps_softc *sc, int sleep_flag); static int mps_init_queues(struct mps_softc *sc); static void mps_resize_queues(struct mps_softc *sc); static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag); static int mps_transition_operational(struct mps_softc *sc); static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching); static void mps_iocfacts_free(struct mps_softc *sc); static void mps_startup(void *arg); static int mps_send_iocinit(struct mps_softc *sc); static int mps_alloc_queues(struct mps_softc *sc); static int mps_alloc_hw_queues(struct mps_softc *sc); static int mps_alloc_replies(struct mps_softc *sc); static int mps_alloc_requests(struct mps_softc *sc); static int mps_attach_log(struct mps_softc *sc); static __inline void mps_complete_command(struct mps_softc *sc, struct mps_command *cm); static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply); static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm); static void mps_periodic(void *); static int mps_reregister_events(struct mps_softc *sc); static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm); static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts); static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag); +static int mps_debug_sysctl(SYSCTL_HANDLER_ARGS); +static void mps_parse_debug(struct mps_softc *sc, char *list); + SYSCTL_NODE(_hw, OID_AUTO, mps, CTLFLAG_RD, 0, "MPS Driver Parameters"); MALLOC_DEFINE(M_MPT2, "mps", "mpt2 driver memory"); /* * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of * any state and back to its initialization state machine. */ static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; /* Added this union to smoothly convert le64toh cm->cm_desc.Words. * Compiler only support unint64_t to be passed as argument. * Otherwise it will throw below error * "aggregate value used where an integer was expected" */ typedef union _reply_descriptor { u64 word; struct { u32 low; u32 high; } u; }reply_descriptor,address_descriptor; /* Rate limit chain-fail messages to 1 per minute */ static struct timeval mps_chainfail_interval = { 60, 0 }; /* * sleep_flag can be either CAN_SLEEP or NO_SLEEP. * If this function is called from process context, it can sleep * and there is no harm to sleep, in case if this fuction is called * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. * based on sleep flags driver will call either msleep, pause or DELAY. * msleep and pause are of same variant, but pause is used when mps_mtx * is not hold by driver. * */ static int mps_diag_reset(struct mps_softc *sc,int sleep_flag) { uint32_t reg; int i, error, tries = 0; uint8_t first_wait_done = FALSE; mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); /* Clear any pending interrupts */ mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* * Force NO_SLEEP for threads prohibited to sleep * e.a Thread from interrupt handler are prohibited to sleep. */ if (curthread->td_no_sleeping != 0) sleep_flag = NO_SLEEP; mps_dprint(sc, MPS_INIT, "sequence start, sleep_flag= %d\n", sleep_flag); /* Push the magic sequence */ error = ETIMEDOUT; while (tries++ < 20) { for (i = 0; i < sizeof(mpt2_reset_magic); i++) mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, mpt2_reset_magic[i]); /* wait 100 msec */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdiag", hz/10); else if (sleep_flag == CAN_SLEEP) pause("mpsdiag", hz/10); else DELAY(100 * 1000); reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { error = 0; break; } } if (error) { mps_dprint(sc, MPS_INIT, "sequence failed, error=%d, exit\n", error); return (error); } /* Send the actual reset. XXX need to refresh the reg? */ reg |= MPI2_DIAG_RESET_ADAPTER; mps_dprint(sc, MPS_INIT, "sequence success, sending reset, reg= 0x%x\n", reg); mps_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, reg); /* Wait up to 300 seconds in 50ms intervals */ error = ETIMEDOUT; for (i = 0; i < 6000; i++) { /* * Wait 50 msec. If this is the first time through, wait 256 * msec to satisfy Diag Reset timing requirements. */ if (first_wait_done) { if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdiag", hz/20); else if (sleep_flag == CAN_SLEEP) pause("mpsdiag", hz/20); else DELAY(50 * 1000); } else { DELAY(256 * 1000); first_wait_done = TRUE; } /* * Check for the RESET_ADAPTER bit to be cleared first, then * wait for the RESET state to be cleared, which takes a little * longer. */ reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); if (reg & MPI2_DIAG_RESET_ADAPTER) { continue; } reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { error = 0; break; } } if (error) { mps_dprint(sc, MPS_INIT, "reset failed, error= %d, exit\n", error); return (error); } mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); mps_dprint(sc, MPS_INIT, "diag reset success, exit\n"); return (0); } static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag) { int error; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); error = 0; mps_regwrite(sc, MPI2_DOORBELL_OFFSET, MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << MPI2_DOORBELL_FUNCTION_SHIFT); if (mps_wait_db_ack(sc, 5, sleep_flag) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Doorbell handshake failed\n"); error = ETIMEDOUT; } mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } static int mps_transition_ready(struct mps_softc *sc) { uint32_t reg, state; int error, tries = 0; int sleep_flags; MPS_FUNCTRACE(sc); /* If we are in attach call, do not sleep */ sleep_flags = (sc->mps_flags & MPS_FLAGS_ATTACH_DONE) ? CAN_SLEEP:NO_SLEEP; error = 0; mps_dprint(sc, MPS_INIT, "%s entered, sleep_flags= %d\n", __func__, sleep_flags); while (tries++ < 1200) { reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_dprint(sc, MPS_INIT, " Doorbell= 0x%x\n", reg); /* * Ensure the IOC is ready to talk. If it's not, try * resetting it. */ if (reg & MPI2_DOORBELL_USED) { mps_dprint(sc, MPS_INIT, " Not ready, sending diag " "reset\n"); mps_diag_reset(sc, sleep_flags); DELAY(50000); continue; } /* Is the adapter owned by another peer? */ if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC is under the " "control of another peer host, aborting " "initialization.\n"); error = ENXIO; break; } state = reg & MPI2_IOC_STATE_MASK; if (state == MPI2_IOC_STATE_READY) { /* Ready to go! */ error = 0; break; } else if (state == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC in fault " "state 0x%x, resetting\n", state & MPI2_DOORBELL_FAULT_CODE_MASK); mps_diag_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_OPERATIONAL) { /* Need to take ownership */ mps_message_unit_reset(sc, sleep_flags); } else if (state == MPI2_IOC_STATE_RESET) { /* Wait a bit, IOC might be in transition */ mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC in unexpected reset state\n"); } else { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC in unknown state 0x%x\n", state); error = EINVAL; break; } /* Wait 50ms for things to settle down. */ DELAY(50000); } if (error) mps_dprint(sc, MPS_INIT|MPS_FAULT, "Cannot transition IOC to ready\n"); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } static int mps_transition_operational(struct mps_softc *sc) { uint32_t reg, state; int error; MPS_FUNCTRACE(sc); error = 0; reg = mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_dprint(sc, MPS_INIT, "%s entered, Doorbell= 0x%x\n", __func__, reg); state = reg & MPI2_IOC_STATE_MASK; if (state != MPI2_IOC_STATE_READY) { mps_dprint(sc, MPS_INIT, "IOC not ready\n"); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "failed to transition ready, exit\n"); return (error); } } error = mps_send_iocinit(sc); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } static void mps_resize_queues(struct mps_softc *sc) { int reqcr, prireqcr; /* * Size the queues. Since the reply queues always need one free * entry, we'll deduct one reply message here. The LSI documents * suggest instead to add a count to the request queue, but I think * that it's better to deduct from reply queue. */ prireqcr = MAX(1, sc->max_prireqframes); prireqcr = MIN(prireqcr, sc->facts->HighPriorityCredit); reqcr = MAX(2, sc->max_reqframes); reqcr = MIN(reqcr, sc->facts->RequestCredit); sc->num_reqs = prireqcr + reqcr; sc->num_replies = MIN(sc->max_replyframes + sc->max_evtframes, sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; /* * Figure out the number of MSIx-based queues. If the firmware or * user has done something crazy and not allowed enough credit for * the queues to be useful then don't enable multi-queue. */ if (sc->facts->MaxMSIxVectors < 2) sc->msi_msgs = 1; if (sc->msi_msgs > 1) { sc->msi_msgs = MIN(sc->msi_msgs, mp_ncpus); sc->msi_msgs = MIN(sc->msi_msgs, sc->facts->MaxMSIxVectors); if (sc->num_reqs / sc->msi_msgs < 2) sc->msi_msgs = 1; } mps_dprint(sc, MPS_INIT, "Sized queues to q=%d reqs=%d replies=%d\n", sc->msi_msgs, sc->num_reqs, sc->num_replies); } /* * This is called during attach and when re-initializing due to a Diag Reset. * IOC Facts is used to allocate many of the structures needed by the driver. * If called from attach, de-allocation is not required because the driver has * not allocated any structures yet, but if called from a Diag Reset, previously * allocated structures based on IOC Facts will need to be freed and re- * allocated bases on the latest IOC Facts. */ static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching) { int error; Mpi2IOCFactsReply_t saved_facts; uint8_t saved_mode, reallocating; mps_dprint(sc, MPS_INIT|MPS_TRACE, "%s entered\n", __func__); /* Save old IOC Facts and then only reallocate if Facts have changed */ if (!attaching) { bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); } /* * Get IOC Facts. In all cases throughout this function, panic if doing * a re-initialization and only return the error if attaching so the OS * can handle it. */ if ((error = mps_get_iocfacts(sc, sc->facts)) != 0) { if (attaching) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to get " "IOC Facts with error %d, exit\n", error); return (error); } else { panic("%s failed to get IOC Facts with error %d\n", __func__, error); } } MPS_DPRINT_PAGE(sc, MPS_XINFO, iocfacts, sc->facts); snprintf(sc->fw_version, sizeof(sc->fw_version), "%02d.%02d.%02d.%02d", sc->facts->FWVersion.Struct.Major, sc->facts->FWVersion.Struct.Minor, sc->facts->FWVersion.Struct.Unit, sc->facts->FWVersion.Struct.Dev); mps_dprint(sc, MPS_INFO, "Firmware: %s, Driver: %s\n", sc->fw_version, MPS_DRIVER_VERSION); mps_dprint(sc, MPS_INFO, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc"); /* * If the chip doesn't support event replay then a hard reset will be * required to trigger a full discovery. Do the reset here then * retransition to Ready. A hard reset might have already been done, * but it doesn't hurt to do it again. Only do this if attaching, not * for a Diag Reset. */ if (attaching && ((sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0)) { mps_dprint(sc, MPS_INIT, "No event replay, reseting\n"); mps_diag_reset(sc, NO_SLEEP); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to " "transition to ready with error %d, exit\n", error); return (error); } } /* * Set flag if IR Firmware is loaded. If the RAID Capability has * changed from the previous IOC Facts, log a warning, but only if * checking this after a Diag Reset and not during attach. */ saved_mode = sc->ir_firmware; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) sc->ir_firmware = 1; if (!attaching) { if (sc->ir_firmware != saved_mode) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "new IR/IT mode " "in IOC Facts does not match previous mode\n"); } } /* Only deallocate and reallocate if relevant IOC Facts have changed */ reallocating = FALSE; sc->mps_flags &= ~MPS_FLAGS_REALLOCATED; if ((!attaching) && ((saved_facts.MsgVersion != sc->facts->MsgVersion) || (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || (saved_facts.RequestCredit != sc->facts->RequestCredit) || (saved_facts.ProductID != sc->facts->ProductID) || (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || (saved_facts.IOCRequestFrameSize != sc->facts->IOCRequestFrameSize) || (saved_facts.MaxTargets != sc->facts->MaxTargets) || (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || (saved_facts.MaxReplyDescriptorPostQueueDepth != sc->facts->MaxReplyDescriptorPostQueueDepth) || (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || (saved_facts.MaxPersistentEntries != sc->facts->MaxPersistentEntries))) { reallocating = TRUE; /* Record that we reallocated everything */ sc->mps_flags |= MPS_FLAGS_REALLOCATED; } /* * Some things should be done if attaching or re-allocating after a Diag * Reset, but are not needed after a Diag Reset if the FW has not * changed. */ if (attaching || reallocating) { /* * Check if controller supports FW diag buffers and set flag to * enable each type. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. enabled = TRUE; /* * Set flag if EEDP is supported and if TLR is supported. */ if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) sc->eedp_enabled = TRUE; if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) sc->control_TLR = TRUE; mps_resize_queues(sc); /* * Initialize all Tail Queues */ TAILQ_INIT(&sc->req_list); TAILQ_INIT(&sc->high_priority_req_list); TAILQ_INIT(&sc->chain_list); TAILQ_INIT(&sc->tm_list); } /* * If doing a Diag Reset and the FW is significantly different * (reallocating will be set above in IOC Facts comparison), then all * buffers based on the IOC Facts will need to be freed before they are * reallocated. */ if (reallocating) { mps_iocfacts_free(sc); mpssas_realloc_targets(sc, saved_facts.MaxTargets + saved_facts.MaxVolumes); } /* * Any deallocation has been completed. Now start reallocating * if needed. Will only need to reallocate if attaching or if the new * IOC Facts are different from the previous IOC Facts after a Diag * Reset. Targets have already been allocated above if needed. */ error = 0; while (attaching || reallocating) { if ((error = mps_alloc_hw_queues(sc)) != 0) break; if ((error = mps_alloc_replies(sc)) != 0) break; if ((error = mps_alloc_requests(sc)) != 0) break; if ((error = mps_alloc_queues(sc)) != 0) break; break; } if (error) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to alloc queues with error %d\n", error); mps_free(sc); return (error); } /* Always initialize the queues */ bzero(sc->free_queue, sc->fqdepth * 4); mps_init_queues(sc); /* * Always get the chip out of the reset state, but only panic if not * attaching. If attaching and there is an error, that is handled by * the OS. */ error = mps_transition_operational(sc); if (error != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to " "transition to operational with error %d\n", error); mps_free(sc); return (error); } /* * Finish the queue initialization. * These are set here instead of in mps_init_queues() because the * IOC resets these values during the state transition in * mps_transition_operational(). The free index is set to 1 * because the corresponding index in the IOC is set to 0, and the * IOC treats the queues as full if both are set to the same value. * Hence the reason that the queue can't hold all of the possible * replies. */ sc->replypostindex = 0; mps_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); /* * Attach the subsystems so they can prepare their event masks. * XXX Should be dynamic so that IM/IR and user modules can attach */ error = 0; while (attaching) { mps_dprint(sc, MPS_INIT, "Attaching subsystems\n"); if ((error = mps_attach_log(sc)) != 0) break; if ((error = mps_attach_sas(sc)) != 0) break; if ((error = mps_attach_user(sc)) != 0) break; break; } if (error) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to attach all " "subsystems: error %d\n", error); mps_free(sc); return (error); } if ((error = mps_pci_setup_interrupts(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Failed to setup " "interrupts\n"); mps_free(sc); return (error); } /* * Set flag if this is a WD controller. This shouldn't ever change, but * reset it after a Diag Reset, just in case. */ sc->WD_available = FALSE; if (pci_get_device(sc->mps_dev) == MPI2_MFGPAGE_DEVID_SSS6200) sc->WD_available = TRUE; return (error); } /* * This is called if memory is being free (during detach for example) and when * buffers need to be reallocated due to a Diag Reset. */ static void mps_iocfacts_free(struct mps_softc *sc) { struct mps_command *cm; int i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if (sc->free_busaddr != 0) bus_dmamap_unload(sc->queues_dmat, sc->queues_map); if (sc->free_queue != NULL) bus_dmamem_free(sc->queues_dmat, sc->free_queue, sc->queues_map); if (sc->queues_dmat != NULL) bus_dma_tag_destroy(sc->queues_dmat); if (sc->chain_busaddr != 0) bus_dmamap_unload(sc->chain_dmat, sc->chain_map); if (sc->chain_frames != NULL) bus_dmamem_free(sc->chain_dmat, sc->chain_frames, sc->chain_map); if (sc->chain_dmat != NULL) bus_dma_tag_destroy(sc->chain_dmat); if (sc->sense_busaddr != 0) bus_dmamap_unload(sc->sense_dmat, sc->sense_map); if (sc->sense_frames != NULL) bus_dmamem_free(sc->sense_dmat, sc->sense_frames, sc->sense_map); if (sc->sense_dmat != NULL) bus_dma_tag_destroy(sc->sense_dmat); if (sc->reply_busaddr != 0) bus_dmamap_unload(sc->reply_dmat, sc->reply_map); if (sc->reply_frames != NULL) bus_dmamem_free(sc->reply_dmat, sc->reply_frames, sc->reply_map); if (sc->reply_dmat != NULL) bus_dma_tag_destroy(sc->reply_dmat); if (sc->req_busaddr != 0) bus_dmamap_unload(sc->req_dmat, sc->req_map); if (sc->req_frames != NULL) bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); if (sc->req_dmat != NULL) bus_dma_tag_destroy(sc->req_dmat); if (sc->chains != NULL) free(sc->chains, M_MPT2); if (sc->commands != NULL) { for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); } free(sc->commands, M_MPT2); } if (sc->buffer_dmat != NULL) bus_dma_tag_destroy(sc->buffer_dmat); mps_pci_free_interrupts(sc); free(sc->queues, M_MPT2); sc->queues = NULL; } /* * The terms diag reset and hard reset are used interchangeably in the MPI * docs to mean resetting the controller chip. In this code diag reset * cleans everything up, and the hard reset function just sends the reset * sequence to the chip. This should probably be refactored so that every * subsystem gets a reset notification of some sort, and can clean up * appropriately. */ int mps_reinit(struct mps_softc *sc) { int error; struct mpssas_softc *sassc; sassc = sc->sassc; MPS_FUNCTRACE(sc); mtx_assert(&sc->mps_mtx, MA_OWNED); mps_dprint(sc, MPS_INIT|MPS_INFO, "Reinitializing controller\n"); if (sc->mps_flags & MPS_FLAGS_DIAGRESET) { mps_dprint(sc, MPS_INIT, "Reset already in progress\n"); return 0; } /* make sure the completion callbacks can recognize they're getting * a NULL cm_reply due to a reset. */ sc->mps_flags |= MPS_FLAGS_DIAGRESET; /* * Mask interrupts here. */ mps_dprint(sc, MPS_INIT, "masking interrupts and resetting\n"); mps_mask_intr(sc); error = mps_diag_reset(sc, CAN_SLEEP); if (error != 0) { /* XXXSL No need to panic here */ panic("%s hard reset failed with error %d\n", __func__, error); } /* Restore the PCI state, including the MSI-X registers */ mps_pci_restore(sc); /* Give the I/O subsystem special priority to get itself prepared */ mpssas_handle_reinit(sc); /* * Get IOC Facts and allocate all structures based on this information. * The attach function will also call mps_iocfacts_allocate at startup. * If relevant values have changed in IOC Facts, this function will free * all of the memory based on IOC Facts and reallocate that memory. */ if ((error = mps_iocfacts_allocate(sc, FALSE)) != 0) { panic("%s IOC Facts based allocation failed with error %d\n", __func__, error); } /* * Mapping structures will be re-allocated after getting IOC Page8, so * free these structures here. */ mps_mapping_exit(sc); /* * The static page function currently read is IOC Page8. Others can be * added in future. It's possible that the values in IOC Page8 have * changed after a Diag Reset due to user modification, so always read * these. Interrupts are masked, so unmask them before getting config * pages. */ mps_unmask_intr(sc); sc->mps_flags &= ~MPS_FLAGS_DIAGRESET; mps_base_static_config_pages(sc); /* * Some mapping info is based in IOC Page8 data, so re-initialize the * mapping tables. */ mps_mapping_initialize(sc); /* * Restart will reload the event masks clobbered by the reset, and * then enable the port. */ mps_reregister_events(sc); /* the end of discovery will release the simq, so we're done. */ mps_dprint(sc, MPS_INIT|MPS_XINFO, "Finished sc %p post %u free %u\n", sc, sc->replypostindex, sc->replyfreeindex); mpssas_release_simq_reinit(sassc); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return 0; } /* Wait for the chip to ACK a word that we've put into its FIFO * Wait for seconds. In single loop wait for busy loop * for 500 microseconds. * Total is [ 0.5 * (2000 * ) ] in miliseconds. * */ static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag) { u32 cntdn, count; u32 int_status; u32 doorbell; count = 0; cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; do { int_status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { mps_dprint(sc, MPS_TRACE, "%s: successful count(%d), timeout(%d)\n", __func__, count, timeout); return 0; } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { doorbell = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "fault_state(0x%04x)!\n", doorbell); return (EFAULT); } } else if (int_status == 0xFFFFFFFF) goto out; /* If it can sleep, sleep for 1 milisecond, else busy loop for * 0.5 milisecond */ if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0, "mpsdba", hz/1000); else if (sleep_flag == CAN_SLEEP) pause("mpsdba", hz/1000); else DELAY(500); count++; } while (--cntdn); out: mps_dprint(sc, MPS_FAULT, "%s: failed due to timeout count(%d), " "int_status(%x)!\n", __func__, count, int_status); return (ETIMEDOUT); } /* Wait for the chip to signal that the next word in its FIFO can be fetched */ static int mps_wait_db_int(struct mps_softc *sc) { int retry; for (retry = 0; retry < MPS_DB_MAX_WAIT; retry++) { if ((mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & MPI2_HIS_IOC2SYS_DB_STATUS) != 0) return (0); DELAY(2000); } return (ETIMEDOUT); } /* Step through the synchronous command state machine, i.e. "Doorbell mode" */ static int mps_request_sync(struct mps_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, int req_sz, int reply_sz, int timeout) { uint32_t *data32; uint16_t *data16; int i, count, ioc_sz, residual; int sleep_flags = CAN_SLEEP; if (curthread->td_no_sleeping != 0) sleep_flags = NO_SLEEP; /* Step 1 */ mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Step 2 */ if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) return (EBUSY); /* Step 3 * Announce that a message is coming through the doorbell. Messages * are pushed at 32bit words, so round up if needed. */ count = (req_sz + 3) / 4; mps_regwrite(sc, MPI2_DOORBELL_OFFSET, (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); /* Step 4 */ if (mps_wait_db_int(sc) || (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { mps_dprint(sc, MPS_FAULT, "Doorbell failed to activate\n"); return (ENXIO); } mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) { mps_dprint(sc, MPS_FAULT, "Doorbell handshake failed\n"); return (ENXIO); } /* Step 5 */ /* Clock out the message data synchronously in 32-bit dwords*/ data32 = (uint32_t *)req; for (i = 0; i < count; i++) { mps_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout while writing doorbell\n"); return (ENXIO); } } /* Step 6 */ /* Clock in the reply in 16-bit words. The total length of the * message is always in the 4th byte, so clock out the first 2 words * manually, then loop the rest. */ data16 = (uint16_t *)reply; if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 0\n"); return (ENXIO); } data16[0] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 1\n"); return (ENXIO); } data16[1] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); /* Number of 32bit words in the message */ ioc_sz = reply->MsgLength; /* * Figure out how many 16bit words to clock in without overrunning. * The precision loss with dividing reply_sz can safely be * ignored because the messages can only be multiples of 32bits. */ residual = 0; count = MIN((reply_sz / 4), ioc_sz) * 2; if (count < ioc_sz * 2) { residual = ioc_sz * 2 - count; mps_dprint(sc, MPS_ERROR, "Driver error, throwing away %d " "residual message words\n", residual); } for (i = 2; i < count; i++) { if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell %d\n", i); return (ENXIO); } data16[i] = mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* * Pull out residual words that won't fit into the provided buffer. * This keeps the chip from hanging due to a driver programming * error. */ while (residual--) { if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell\n"); return (ENXIO); } (void)mps_regread(sc, MPI2_DOORBELL_OFFSET); mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); } /* Step 7 */ if (mps_wait_db_int(sc) != 0) { mps_dprint(sc, MPS_FAULT, "Timeout waiting to exit doorbell\n"); return (ENXIO); } if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) mps_dprint(sc, MPS_FAULT, "Warning, doorbell still active\n"); mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); return (0); } static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm) { reply_descriptor rd; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_TRACE, "SMID %u cm %p ccb %p\n", cm->cm_desc.Default.SMID, cm, cm->cm_ccb); if (sc->mps_flags & MPS_FLAGS_ATTACH_DONE && !(sc->mps_flags & MPS_FLAGS_SHUTDOWN)) mtx_assert(&sc->mps_mtx, MA_OWNED); if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; rd.u.low = cm->cm_desc.Words.Low; rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); /* TODO-We may need to make below regwrite atomic */ mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, rd.u.high); } /* * Just the FACTS, ma'am. */ static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts) { MPI2_DEFAULT_REPLY *reply; MPI2_IOC_FACTS_REQUEST request; int error, req_sz, reply_sz; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); reply = (MPI2_DEFAULT_REPLY *)facts; bzero(&request, req_sz); request.Function = MPI2_FUNCTION_IOC_FACTS; error = mps_request_sync(sc, &request, reply, req_sz, reply_sz, 5); mps_dprint(sc, MPS_INIT, "%s exit error= %d\n", __func__, error); return (error); } static int mps_send_iocinit(struct mps_softc *sc) { MPI2_IOC_INIT_REQUEST init; MPI2_DEFAULT_REPLY reply; int req_sz, reply_sz, error; struct timeval now; uint64_t time_in_msec; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); req_sz = sizeof(MPI2_IOC_INIT_REQUEST); reply_sz = sizeof(MPI2_IOC_INIT_REPLY); bzero(&init, req_sz); bzero(&reply, reply_sz); /* * Fill in the init block. Note that most addresses are * deliberately in the lower 32bits of memory. This is a micro- * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. */ init.Function = MPI2_FUNCTION_IOC_INIT; init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; init.MsgVersion = htole16(MPI2_VERSION); init.HeaderVersion = htole16(MPI2_HEADER_VERSION); init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize); init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); init.ReplyFreeQueueDepth = htole16(sc->fqdepth); init.SenseBufferAddressHigh = 0; init.SystemReplyAddressHigh = 0; init.SystemRequestFrameBaseAddress.High = 0; init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr); init.ReplyDescriptorPostQueueAddress.High = 0; init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr); init.ReplyFreeQueueAddress.High = 0; init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); getmicrotime(&now); time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); error = mps_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; mps_dprint(sc, MPS_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (error); } void mps_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int mps_alloc_queues(struct mps_softc *sc) { struct mps_queue *q; int nq, i; nq = sc->msi_msgs; mps_dprint(sc, MPS_INIT|MPS_XINFO, "Allocating %d I/O queues\n", nq); sc->queues = malloc(sizeof(struct mps_queue) * nq, M_MPT2, M_NOWAIT|M_ZERO); if (sc->queues == NULL) return (ENOMEM); for (i = 0; i < nq; i++) { q = &sc->queues[i]; mps_dprint(sc, MPS_INIT, "Configuring queue %d %p\n", i, q); q->sc = sc; q->qnum = i; } return (0); } static int mps_alloc_hw_queues(struct mps_softc *sc) { bus_addr_t queues_busaddr; uint8_t *queues; int qsize, fqsize, pqsize; /* * The reply free queue contains 4 byte entries in multiples of 16 and * aligned on a 16 byte boundary. There must always be an unused entry. * This queue supplies fresh reply frames for the firmware to use. * * The reply descriptor post queue contains 8 byte entries in * multiples of 16 and aligned on a 16 byte boundary. This queue * contains filled-in reply frames sent from the firmware to the host. * * These two queues are allocated together for simplicity. */ sc->fqdepth = roundup2(sc->num_replies + 1, 16); sc->pqdepth = roundup2(sc->num_replies + 1, 16); fqsize= sc->fqdepth * 4; pqsize = sc->pqdepth * 8; qsize = fqsize + pqsize; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ qsize, /* maxsize */ 1, /* nsegments */ qsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->queues_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate queues DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, &sc->queues_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate queues memory\n"); return (ENOMEM); } bzero(queues, qsize); bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, mps_memaddr_cb, &queues_busaddr, 0); sc->free_queue = (uint32_t *)queues; sc->free_busaddr = queues_busaddr; sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); sc->post_busaddr = queues_busaddr + fqsize; return (0); } static int mps_alloc_replies(struct mps_softc *sc) { int rsize, num_replies; /* * sc->num_replies should be one less than sc->fqdepth. We need to * allocate space for sc->fqdepth replies, but only sc->num_replies * replies can be used at once. */ num_replies = max(sc->fqdepth, sc->num_replies); rsize = sc->facts->ReplyFrameSize * num_replies * 4; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->reply_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate replies DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, BUS_DMA_NOWAIT, &sc->reply_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate replies memory\n"); return (ENOMEM); } bzero(sc->reply_frames, rsize); bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, mps_memaddr_cb, &sc->reply_busaddr, 0); return (0); } static int mps_alloc_requests(struct mps_softc *sc) { struct mps_command *cm; struct mps_chain *chain; int i, rsize, nsegs; rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->req_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate request DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, BUS_DMA_NOWAIT, &sc->req_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate request memory\n"); return (ENOMEM); } bzero(sc->req_frames, rsize); bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, mps_memaddr_cb, &sc->req_busaddr, 0); rsize = sc->facts->IOCRequestFrameSize * sc->max_chains * 4; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 16, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->chain_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate chain DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, BUS_DMA_NOWAIT, &sc->chain_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate chain memory\n"); return (ENOMEM); } bzero(sc->chain_frames, rsize); bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, mps_memaddr_cb, &sc->chain_busaddr, 0); rsize = MPS_SENSE_LEN * sc->num_reqs; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ rsize, /* maxsize */ 1, /* nsegments */ rsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->sense_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, BUS_DMA_NOWAIT, &sc->sense_map)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate sense memory\n"); return (ENOMEM); } bzero(sc->sense_frames, rsize); bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, mps_memaddr_cb, &sc->sense_busaddr, 0); sc->chains = malloc(sizeof(struct mps_chain) * sc->max_chains, M_MPT2, M_WAITOK | M_ZERO); if(!sc->chains) { mps_dprint(sc, MPS_ERROR, "Cannot allocate chains memory\n"); return (ENOMEM); } for (i = 0; i < sc->max_chains; i++) { chain = &sc->chains[i]; chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames + i * sc->facts->IOCRequestFrameSize * 4); chain->chain_busaddr = sc->chain_busaddr + i * sc->facts->IOCRequestFrameSize * 4; mps_free_chain(sc, chain); sc->chain_free_lowwater++; } /* XXX Need to pick a more precise value */ nsegs = (MAXPHYS / PAGE_SIZE) + 1; if (bus_dma_tag_create( sc->mps_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ nsegs, /* nsegments */ BUS_SPACE_MAXSIZE_24BIT,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->mps_mtx, /* lockarg */ &sc->buffer_dmat)) { mps_dprint(sc, MPS_ERROR, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * SMID 0 cannot be used as a free command per the firmware spec. * Just drop that command instead of risking accounting bugs. */ sc->commands = malloc(sizeof(struct mps_command) * sc->num_reqs, M_MPT2, M_WAITOK | M_ZERO); if(!sc->commands) { mps_dprint(sc, MPS_ERROR, "Cannot allocate command memory\n"); return (ENOMEM); } for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; cm->cm_req = sc->req_frames + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_req_busaddr = sc->req_busaddr + i * sc->facts->IOCRequestFrameSize * 4; cm->cm_sense = &sc->sense_frames[i]; cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; TAILQ_INIT(&cm->cm_chain_list); callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0); /* XXX Is a failure here a critical problem? */ if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0) if (i <= sc->facts->HighPriorityCredit) mps_free_high_priority_command(sc, cm); else mps_free_command(sc, cm); else { panic("failed to allocate command %d\n", i); sc->num_reqs = i; break; } } return (0); } static int mps_init_queues(struct mps_softc *sc) { int i; memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); /* * According to the spec, we need to use one less reply than we * have space for on the queue. So sc->num_replies (the number we * use) should be less than sc->fqdepth (allocated size). */ if (sc->num_replies >= sc->fqdepth) return (EINVAL); /* * Initialize all of the free queue entries. */ for (i = 0; i < sc->fqdepth; i++) sc->free_queue[i] = sc->reply_busaddr + (i * sc->facts->ReplyFrameSize * 4); sc->replyfreeindex = sc->num_replies; return (0); } /* Get the driver parameter tunables. Lowest priority are the driver defaults. * Next are the global settings, if they exist. Highest are the per-unit * settings, if they exist. */ void mps_get_tunables(struct mps_softc *sc) { - char tmpstr[80]; + char tmpstr[80], mps_debug[80]; /* XXX default to some debugging for now */ sc->mps_debug = MPS_INFO|MPS_FAULT; sc->disable_msix = 0; sc->disable_msi = 0; sc->max_msix = MPS_MSIX_MAX; sc->max_chains = MPS_CHAIN_FRAMES; sc->max_io_pages = MPS_MAXIO_PAGES; sc->enable_ssu = MPS_SSU_ENABLE_SSD_DISABLE_HDD; sc->spinup_wait_time = DEFAULT_SPINUP_WAIT; sc->use_phynum = 1; sc->max_reqframes = MPS_REQ_FRAMES; sc->max_prireqframes = MPS_PRI_REQ_FRAMES; sc->max_replyframes = MPS_REPLY_FRAMES; sc->max_evtframes = MPS_EVT_REPLY_FRAMES; /* * Grab the global variables. */ - TUNABLE_INT_FETCH("hw.mps.debug_level", &sc->mps_debug); + bzero(mps_debug, 80); + if (TUNABLE_STR_FETCH("hw.mps.debug_level", mps_debug, 80) != 0) + mps_parse_debug(sc, mps_debug); TUNABLE_INT_FETCH("hw.mps.disable_msix", &sc->disable_msix); TUNABLE_INT_FETCH("hw.mps.disable_msi", &sc->disable_msi); TUNABLE_INT_FETCH("hw.mps.max_msix", &sc->max_msix); TUNABLE_INT_FETCH("hw.mps.max_chains", &sc->max_chains); TUNABLE_INT_FETCH("hw.mps.max_io_pages", &sc->max_io_pages); TUNABLE_INT_FETCH("hw.mps.enable_ssu", &sc->enable_ssu); TUNABLE_INT_FETCH("hw.mps.spinup_wait_time", &sc->spinup_wait_time); TUNABLE_INT_FETCH("hw.mps.use_phy_num", &sc->use_phynum); TUNABLE_INT_FETCH("hw.mps.max_reqframes", &sc->max_reqframes); TUNABLE_INT_FETCH("hw.mps.max_prireqframes", &sc->max_prireqframes); TUNABLE_INT_FETCH("hw.mps.max_replyframes", &sc->max_replyframes); TUNABLE_INT_FETCH("hw.mps.max_evtframes", &sc->max_evtframes); /* Grab the unit-instance variables */ snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.debug_level", device_get_unit(sc->mps_dev)); - TUNABLE_INT_FETCH(tmpstr, &sc->mps_debug); + bzero(mps_debug, 80); + if (TUNABLE_STR_FETCH(tmpstr, mps_debug, 80) != 0) + mps_parse_debug(sc, mps_debug); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msix", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msi", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_msix", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_msix); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_chains", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_io_pages", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_io_pages); bzero(sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.exclude_ids", device_get_unit(sc->mps_dev)); TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids)); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.enable_ssu", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->enable_ssu); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.spinup_wait_time", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->spinup_wait_time); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.use_phy_num", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_reqframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_reqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_prireqframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_prireqframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_replyframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_replyframes); snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_evtframes", device_get_unit(sc->mps_dev)); TUNABLE_INT_FETCH(tmpstr, &sc->max_evtframes); } static void mps_setup_sysctl(struct mps_softc *sc) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; char tmpstr[80], tmpstr2[80]; /* * Setup the sysctl variable so the user can change the debug level * on the fly. */ snprintf(tmpstr, sizeof(tmpstr), "MPS controller %d", device_get_unit(sc->mps_dev)); snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mps_dev)); sysctl_ctx = device_get_sysctl_ctx(sc->mps_dev); if (sysctl_ctx != NULL) sysctl_tree = device_get_sysctl_tree(sc->mps_dev); if (sysctl_tree == NULL) { sysctl_ctx_init(&sc->sysctl_ctx); sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_mps), OID_AUTO, tmpstr2, CTLFLAG_RD, 0, tmpstr); if (sc->sysctl_tree == NULL) return; sysctl_ctx = &sc->sysctl_ctx; sysctl_tree = sc->sysctl_tree; } - SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), - OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mps_debug, 0, - "mps debug level"); + SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), + OID_AUTO, "debug_level", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, + mps_debug_sysctl, "A", "mps debug level"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, "Disable the use of MSI-X interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0, "Disable the use of MSI interrupts"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_msix", CTLFLAG_RD, &sc->max_msix, 0, "User-defined maximum number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "msix_msgs", CTLFLAG_RD, &sc->msi_msgs, 0, "Negotiated number of MSIX queues"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_reqframes", CTLFLAG_RD, &sc->max_reqframes, 0, "Total number of allocated request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_prireqframes", CTLFLAG_RD, &sc->max_prireqframes, 0, "Total number of allocated high priority request frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_replyframes", CTLFLAG_RD, &sc->max_replyframes, 0, "Total number of allocated reply frames"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_evtframes", CTLFLAG_RD, &sc->max_evtframes, 0, "Total number of event frames allocated"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version, strlen(sc->fw_version), "firmware version"); SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "driver_version", CTLFLAG_RW, MPS_DRIVER_VERSION, strlen(MPS_DRIVER_VERSION), "driver version"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_active", CTLFLAG_RD, &sc->io_cmds_active, 0, "number of currently active commands"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, &sc->io_cmds_highwater, 0, "maximum active commands seen"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free", CTLFLAG_RD, &sc->chain_free, 0, "number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_chains", CTLFLAG_RD, &sc->max_chains, 0,"maximum chain frames that will be allocated"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_io_pages", CTLFLAG_RD, &sc->max_io_pages, 0,"maximum pages to allow per I/O (if <1 use " "IOCFacts)"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "enable_ssu", CTLFLAG_RW, &sc->enable_ssu, 0, "enable SSU to SATA SSD/HDD at shutdown"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, &sc->chain_alloc_fail, "chain allocation failures"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "spinup_wait_time", CTLFLAG_RD, &sc->spinup_wait_time, DEFAULT_SPINUP_WAIT, "seconds to wait for " "spinup after SATA ID error"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "mapping_table_dump", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, mps_mapping_dump, "A", "Mapping Table Dump"); SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "encl_table_dump", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, mps_mapping_encl_dump, "A", "Enclosure Table Dump"); SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, "Use the phy number for enumeration"); +} + +struct mps_debug_string { + char *name; + int flag; +} mps_debug_strings[] = { + {"info", MPS_INFO}, + {"fault", MPS_FAULT}, + {"event", MPS_EVENT}, + {"log", MPS_LOG}, + {"recovery", MPS_RECOVERY}, + {"error", MPS_ERROR}, + {"init", MPS_INIT}, + {"xinfo", MPS_XINFO}, + {"user", MPS_USER}, + {"mapping", MPS_MAPPING}, + {"trace", MPS_TRACE} +}; + +static int +mps_debug_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct mps_softc *sc; + struct mps_debug_string *string; + struct sbuf sbuf; + char *buffer; + size_t sz; + int i, len, debug, error; + + sc = (struct mps_softc *)arg1; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + debug = sc->mps_debug; + + sbuf_printf(&sbuf, "%#x", debug); + + sz = sizeof(mps_debug_strings) / sizeof(mps_debug_strings[0]); + for (i = 0; i < sz; i++) { + string = &mps_debug_strings[i]; + if (debug & string->flag) + sbuf_printf(&sbuf, ",%s", string->name); + } + + error = sbuf_finish(&sbuf); + sbuf_delete(&sbuf); + + if (error || req->newptr == NULL) + return (error); + + len = req->newlen - req->newidx; + if (len == 0) + return (0); + + buffer = malloc(len, M_MPT2, M_ZERO|M_WAITOK); + error = SYSCTL_IN(req, buffer, len); + + mps_parse_debug(sc, buffer); + + free(buffer, M_MPT2); + return (error); +} + +static void +mps_parse_debug(struct mps_softc *sc, char *list) +{ + struct mps_debug_string *string; + char *token, *endtoken; + size_t sz; + int flags, i; + + if (list == NULL || *list == '\0') + return; + + flags = 0; + sz = sizeof(mps_debug_strings) / sizeof(mps_debug_strings[0]); + while ((token = strsep(&list, ":,")) != NULL) { + + /* Handle integer flags */ + flags |= strtol(token, &endtoken, 0); + if (token != endtoken) + continue; + + /* Handle text flags */ + for (i = 0; i < sz; i++) { + string = &mps_debug_strings[i]; + if (strcasecmp(token, string->name) == 0) { + flags |= string->flag; + break; + } + } + } + + sc->mps_debug = flags; + return; } int mps_attach(struct mps_softc *sc) { int error; MPS_FUNCTRACE(sc); mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); mtx_init(&sc->mps_mtx, "MPT2SAS lock", NULL, MTX_DEF); callout_init_mtx(&sc->periodic, &sc->mps_mtx, 0); callout_init_mtx(&sc->device_check_callout, &sc->mps_mtx, 0); TAILQ_INIT(&sc->event_list); timevalclear(&sc->lastfail); if ((error = mps_transition_ready(sc)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "failed to transition " "ready\n"); return (error); } sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPT2, M_ZERO|M_NOWAIT); if(!sc->facts) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "Cannot allocate memory, " "exit\n"); return (ENOMEM); } /* * Get IOC Facts and allocate all structures based on this information. * A Diag Reset will also call mps_iocfacts_allocate and re-read the IOC * Facts. If relevant values have changed in IOC Facts, this function * will free all of the memory based on IOC Facts and reallocate that * memory. If this fails, any allocated memory should already be freed. */ if ((error = mps_iocfacts_allocate(sc, TRUE)) != 0) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "IOC Facts based allocation " "failed with error %d, exit\n", error); return (error); } /* Start the periodic watchdog check on the IOC Doorbell */ mps_periodic(sc); /* * The portenable will kick off discovery events that will drive the * rest of the initialization process. The CAM/SAS module will * hold up the boot sequence until discovery is complete. */ sc->mps_ich.ich_func = mps_startup; sc->mps_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mps_ich) != 0) { mps_dprint(sc, MPS_INIT|MPS_ERROR, "Cannot establish MPS config hook\n"); error = EINVAL; } /* * Allow IR to shutdown gracefully when shutdown occurs. */ sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, mpssas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); if (sc->shutdown_eh == NULL) mps_dprint(sc, MPS_INIT|MPS_ERROR, "shutdown event registration failed\n"); mps_setup_sysctl(sc); sc->mps_flags |= MPS_FLAGS_ATTACH_DONE; mps_dprint(sc, MPS_INIT, "%s exit error= %d\n", __func__, error); return (error); } /* Run through any late-start handlers. */ static void mps_startup(void *arg) { struct mps_softc *sc; sc = (struct mps_softc *)arg; mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); mps_lock(sc); mps_unmask_intr(sc); /* initialize device mapping tables */ mps_base_static_config_pages(sc); mps_mapping_initialize(sc); mpssas_startup(sc); mps_unlock(sc); mps_dprint(sc, MPS_INIT, "disestablish config intrhook\n"); config_intrhook_disestablish(&sc->mps_ich); sc->mps_ich.ich_arg = NULL; mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); } /* Periodic watchdog. Is called with the driver lock already held. */ static void mps_periodic(void *arg) { struct mps_softc *sc; uint32_t db; sc = (struct mps_softc *)arg; if (sc->mps_flags & MPS_FLAGS_SHUTDOWN) return; db = mps_regread(sc, MPI2_DOORBELL_OFFSET); if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mps_dprint(sc, MPS_FAULT, "IOC Fault 0x%08x, Resetting\n", db); mps_reinit(sc); } callout_reset(&sc->periodic, MPS_PERIODIC_DELAY * hz, mps_periodic, sc); } static void mps_log_evt_handler(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *event) { MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; MPS_DPRINT_EVENT(sc, generic, event); switch (event->Event) { case MPI2_EVENT_LOG_DATA: mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_DATA:\n"); if (sc->mps_debug & MPS_EVENT) hexdump(event->EventData, event->EventDataLength, NULL, 0); break; case MPI2_EVENT_LOG_ENTRY_ADDED: entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " "0x%x Sequence %d:\n", entry->LogEntryQualifier, entry->LogSequence); break; default: break; } return; } static int mps_attach_log(struct mps_softc *sc) { u32 events[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS]; bzero(events, 16); setbit(events, MPI2_EVENT_LOG_DATA); setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); mps_register_events(sc, events, mps_log_evt_handler, NULL, &sc->mps_log_eh); return (0); } static int mps_detach_log(struct mps_softc *sc) { if (sc->mps_log_eh != NULL) mps_deregister_events(sc, sc->mps_log_eh); return (0); } /* * Free all of the driver resources and detach submodules. Should be called * without the lock held. */ int mps_free(struct mps_softc *sc) { int error; mps_dprint(sc, MPS_INIT, "%s entered\n", __func__); /* Turn off the watchdog */ mps_lock(sc); sc->mps_flags |= MPS_FLAGS_SHUTDOWN; mps_unlock(sc); /* Lock must not be held for this */ callout_drain(&sc->periodic); callout_drain(&sc->device_check_callout); if (((error = mps_detach_log(sc)) != 0) || ((error = mps_detach_sas(sc)) != 0)) { mps_dprint(sc, MPS_INIT|MPS_FAULT, "failed to detach " "subsystems, exit\n"); return (error); } mps_detach_user(sc); /* Put the IOC back in the READY state. */ mps_lock(sc); if ((error = mps_transition_ready(sc)) != 0) { mps_unlock(sc); return (error); } mps_unlock(sc); if (sc->facts != NULL) free(sc->facts, M_MPT2); /* * Free all buffers that are based on IOC Facts. A Diag Reset may need * to free these buffers too. */ mps_iocfacts_free(sc); if (sc->sysctl_tree != NULL) sysctl_ctx_free(&sc->sysctl_ctx); /* Deregister the shutdown function */ if (sc->shutdown_eh != NULL) EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); mtx_destroy(&sc->mps_mtx); mps_dprint(sc, MPS_INIT, "%s exit\n", __func__); return (0); } static __inline void mps_complete_command(struct mps_softc *sc, struct mps_command *cm) { MPS_FUNCTRACE(sc); if (cm == NULL) { mps_dprint(sc, MPS_ERROR, "Completing NULL command\n"); return; } if (cm->cm_flags & MPS_CM_FLAGS_POLLED) cm->cm_flags |= MPS_CM_FLAGS_COMPLETE; if (cm->cm_complete != NULL) { mps_dprint(sc, MPS_TRACE, "%s cm %p calling cm_complete %p data %p reply %p\n", __func__, cm, cm->cm_complete, cm->cm_complete_data, cm->cm_reply); cm->cm_complete(sc, cm); } if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) { mps_dprint(sc, MPS_TRACE, "waking up %p\n", cm); wakeup(cm); } if (cm->cm_sc->io_cmds_active != 0) { cm->cm_sc->io_cmds_active--; } else { mps_dprint(sc, MPS_ERROR, "Warning: io_cmds_active is " "out of sync - resynching to 0\n"); } } static void mps_sas_log_info(struct mps_softc *sc , u32 log_info) { union loginfo_type { u32 loginfo; struct { u32 subcode:16; u32 code:8; u32 originator:4; u32 bus_type:4; } dw; }; union loginfo_type sas_loginfo; char *originator_str = NULL; sas_loginfo.loginfo = log_info; if (sas_loginfo.dw.bus_type != 3 /*SAS*/) return; /* each nexus loss loginfo */ if (log_info == 0x31170000) return; /* eat the loginfos associated with task aborts */ if ((log_info == 30050000 || log_info == 0x31140000 || log_info == 0x31130000)) return; switch (sas_loginfo.dw.originator) { case 0: originator_str = "IOP"; break; case 1: originator_str = "PL"; break; case 2: originator_str = "IR"; break; } mps_dprint(sc, MPS_LOG, "log_info(0x%08x): originator(%s), " "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode); } static void mps_display_reply_info(struct mps_softc *sc, uint8_t *reply) { MPI2DefaultReply_t *mpi_reply; u16 sc_status; mpi_reply = (MPI2DefaultReply_t*)reply; sc_status = le16toh(mpi_reply->IOCStatus); if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) mps_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); } void mps_intr(void *data) { struct mps_softc *sc; uint32_t status; sc = (struct mps_softc *)data; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* * Check interrupt status register to flush the bus. This is * needed for both INTx interrupts and driver-driven polling */ status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) return; mps_lock(sc); mps_intr_locked(data); mps_unlock(sc); return; } /* * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the * chip. Hopefully this theory is correct. */ void mps_intr_msi(void *data) { struct mps_softc *sc; sc = (struct mps_softc *)data; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); mps_lock(sc); mps_intr_locked(data); mps_unlock(sc); return; } /* * The locking is overly broad and simplistic, but easy to deal with for now. */ void mps_intr_locked(void *data) { MPI2_REPLY_DESCRIPTORS_UNION *desc; struct mps_softc *sc; struct mps_command *cm = NULL; uint8_t flags; u_int pq; MPI2_DIAG_RELEASE_REPLY *rel_rep; mps_fw_diagnostic_buffer_t *pBuffer; sc = (struct mps_softc *)data; pq = sc->replypostindex; mps_dprint(sc, MPS_TRACE, "%s sc %p starting with replypostindex %u\n", __func__, sc, sc->replypostindex); for ( ;; ) { cm = NULL; desc = &sc->post_queue[sc->replypostindex]; flags = desc->Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || (le32toh(desc->Words.High) == 0xffffffff)) break; /* increment the replypostindex now, so that event handlers * and cm completion handlers which decide to do a diag * reset can zero it without it getting incremented again * afterwards, and we break out of this loop on the next * iteration since the reply post queue has been cleared to * 0xFF and all descriptors look unused (which they are). */ if (++sc->replypostindex >= sc->pqdepth) sc->replypostindex = 0; switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: { uint32_t baddr; uint8_t *reply; /* * Re-compose the reply address from the address * sent back from the chip. The ReplyFrameAddress * is the lower 32 bits of the physical address of * particular reply frame. Convert that address to * host format, and then use that to provide the * offset against the virtual address base * (sc->reply_frames). */ baddr = le32toh(desc->AddressReply.ReplyFrameAddress); reply = sc->reply_frames + (baddr - ((uint32_t)sc->reply_busaddr)); /* * Make sure the reply we got back is in a valid * range. If not, go ahead and panic here, since * we'll probably panic as soon as we deference the * reply pointer anyway. */ if ((reply < sc->reply_frames) || (reply > (sc->reply_frames + (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) { printf("%s: WARNING: reply %p out of range!\n", __func__, reply); printf("%s: reply_frames %p, fqdepth %d, " "frame size %d\n", __func__, sc->reply_frames, sc->fqdepth, sc->facts->ReplyFrameSize * 4); printf("%s: baddr %#x,\n", __func__, baddr); /* LSI-TODO. See Linux Code. Need Graceful exit*/ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { if (((MPI2_DEFAULT_REPLY *)reply)->Function == MPI2_FUNCTION_DIAG_BUFFER_POST) { /* * If SMID is 0 for Diag Buffer Post, * this implies that the reply is due to * a release function with a status that * the buffer has been released. Set * the buffer flags accordingly. */ rel_rep = (MPI2_DIAG_RELEASE_REPLY *)reply; if ((le16toh(rel_rep->IOCStatus) & MPI2_IOCSTATUS_MASK) == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) { pBuffer = &sc->fw_diag_buffer_list[ rel_rep->BufferType]; pBuffer->valid_data = TRUE; pBuffer->owned_by_firmware = FALSE; pBuffer->immediate = FALSE; } } else mps_dispatch_event(sc, baddr, (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { cm = &sc->commands[le16toh(desc->AddressReply.SMID)]; cm->cm_reply = reply; cm->cm_reply_data = le32toh(desc->AddressReply.ReplyFrameAddress); } break; } case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: default: /* Unhandled */ mps_dprint(sc, MPS_ERROR, "Unhandled reply 0x%x\n", desc->Default.ReplyFlags); cm = NULL; break; } if (cm != NULL) { // Print Error reply frame if (cm->cm_reply) mps_display_reply_info(sc,cm->cm_reply); mps_complete_command(sc, cm); } desc->Words.Low = 0xffffffff; desc->Words.High = 0xffffffff; } if (pq != sc->replypostindex) { mps_dprint(sc, MPS_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); } return; } static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mps_event_handle *eh; int event, handled = 0; event = le16toh(reply->Event); TAILQ_FOREACH(eh, &sc->event_list, eh_list) { if (isset(eh->mask, event)) { eh->callback(sc, data, reply); handled++; } } if (handled == 0) mps_dprint(sc, MPS_EVENT, "Unhandled event 0x%x\n", le16toh(event)); /* * This is the only place that the event/reply should be freed. * Anything wanting to hold onto the event data should have * already copied it into their own storage. */ mps_free_reply(sc, data); } static void mps_reregister_events_complete(struct mps_softc *sc, struct mps_command *cm) { mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if (cm->cm_reply) MPS_DPRINT_EVENT(sc, generic, (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); mps_free_command(sc, cm); /* next, send a port enable */ mpssas_startup(sc); } /* * For both register_events and update_events, the caller supplies a bitmap * of events that it _wants_. These functions then turn that into a bitmask * suitable for the controller. */ int mps_register_events(struct mps_softc *sc, u32 *mask, mps_evt_callback_t *cb, void *data, struct mps_event_handle **handle) { struct mps_event_handle *eh; int error = 0; eh = malloc(sizeof(struct mps_event_handle), M_MPT2, M_WAITOK|M_ZERO); if(!eh) { mps_dprint(sc, MPS_ERROR, "Cannot allocate event memory\n"); return (ENOMEM); } eh->callback = cb; eh->data = data; TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); if (mask != NULL) error = mps_update_events(sc, eh, mask); *handle = eh; return (error); } int mps_update_events(struct mps_softc *sc, struct mps_event_handle *handle, u32 *mask) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; MPI2_EVENT_NOTIFICATION_REPLY *reply = NULL; struct mps_command *cm; int error, i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); if ((mask != NULL) && (handle != NULL)) bcopy(mask, &handle->mask[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] = -1; for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] &= ~handle->mask[i]; if ((cm = mps_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPS_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; error = mps_wait_command(sc, &cm, 60, 0); if (cm != NULL) reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; if ((reply == NULL) || (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) error = ENXIO; if (reply) MPS_DPRINT_EVENT(sc, generic, reply); mps_dprint(sc, MPS_TRACE, "%s finished error %d\n", __func__, error); if (cm != NULL) mps_free_command(sc, cm); return (error); } static int mps_reregister_events(struct mps_softc *sc) { MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; struct mps_command *cm; struct mps_event_handle *eh; int error, i; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); /* first, reregister events */ for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] = -1; TAILQ_FOREACH(eh, &sc->event_list, eh_list) { for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) sc->event_mask[i] &= ~eh->mask[i]; } if ((cm = mps_alloc_command(sc)) == NULL) return (EBUSY); evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; evtreq->MsgFlags = 0; evtreq->SASBroadcastPrimitiveMasks = 0; #ifdef MPS_DEBUG_ALL_EVENTS { u_char fullmask[16]; memset(fullmask, 0x00, 16); bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) * MPI2_EVENT_NOTIFY_EVENTMASK_WORDS); } #else for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++) evtreq->EventMasks[i] = htole32(sc->event_mask[i]); #endif cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_data = NULL; cm->cm_complete = mps_reregister_events_complete; error = mps_map_command(sc, cm); mps_dprint(sc, MPS_TRACE, "%s finished with error %d\n", __func__, error); return (error); } void mps_deregister_events(struct mps_softc *sc, struct mps_event_handle *handle) { TAILQ_REMOVE(&sc->event_list, handle, eh_list); free(handle, M_MPT2); } /* * Add a chain element as the next SGE for the specified command. * Reset cm_sge and cm_sgesize to indicate all the available space. */ static int mps_add_chain(struct mps_command *cm) { MPI2_SGE_CHAIN32 *sgc; struct mps_chain *chain; int space; if (cm->cm_sglsize < MPS_SGC_SIZE) panic("MPS: Need SGE Error Code\n"); chain = mps_alloc_chain(cm->cm_sc); if (chain == NULL) return (ENOBUFS); space = (int)cm->cm_sc->facts->IOCRequestFrameSize * 4; /* * Note: a double-linked list is used to make it easier to * walk for debugging. */ TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); sgc = (MPI2_SGE_CHAIN32 *)&cm->cm_sge->MpiChain; sgc->Length = htole16(space); sgc->NextChainOffset = 0; /* TODO Looks like bug in Setting sgc->Flags. * sgc->Flags = ( MPI2_SGE_FLAGS_CHAIN_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING | * MPI2_SGE_FLAGS_SYSTEM_ADDRESS) << MPI2_SGE_FLAGS_SHIFT * This is fine.. because we are not using simple element. In case of * MPI2_SGE_CHAIN32, we have separate Length and Flags feild. */ sgc->Flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT; sgc->Address = htole32(chain->chain_busaddr); cm->cm_sge = (MPI2_SGE_IO_UNION *)&chain->chain->MpiSimple; cm->cm_sglsize = space; return (0); } /* * Add one scatter-gather element (chain, simple, transaction context) * to the scatter-gather list for a command. Maintain cm_sglsize and * cm_sge as the remaining size and pointer to the next SGE to fill * in, respectively. */ int mps_push_sge(struct mps_command *cm, void *sgep, size_t len, int segsleft) { MPI2_SGE_TRANSACTION_UNION *tc = sgep; MPI2_SGE_SIMPLE64 *sge = sgep; int error, type; uint32_t saved_buf_len, saved_address_low, saved_address_high; type = (tc->Flags & MPI2_SGE_FLAGS_ELEMENT_MASK); #ifdef INVARIANTS switch (type) { case MPI2_SGE_FLAGS_TRANSACTION_ELEMENT: { if (len != tc->DetailsLength + 4) panic("TC %p length %u or %zu?", tc, tc->DetailsLength + 4, len); } break; case MPI2_SGE_FLAGS_CHAIN_ELEMENT: /* Driver only uses 32-bit chain elements */ if (len != MPS_SGC_SIZE) panic("CHAIN %p length %u or %zu?", sgep, MPS_SGC_SIZE, len); break; case MPI2_SGE_FLAGS_SIMPLE_ELEMENT: /* Driver only uses 64-bit SGE simple elements */ if (len != MPS_SGE64_SIZE) panic("SGE simple %p length %u or %zu?", sge, MPS_SGE64_SIZE, len); if (((le32toh(sge->FlagsLength) >> MPI2_SGE_FLAGS_SHIFT) & MPI2_SGE_FLAGS_ADDRESS_SIZE) == 0) panic("SGE simple %p not marked 64-bit?", sge); break; default: panic("Unexpected SGE %p, flags %02x", tc, tc->Flags); } #endif /* * case 1: 1 more segment, enough room for it * case 2: 2 more segments, enough room for both * case 3: >=2 more segments, only enough room for 1 and a chain * case 4: >=1 more segment, enough room for only a chain * case 5: >=1 more segment, no room for anything (error) */ /* * There should be room for at least a chain element, or this * code is buggy. Case (5). */ if (cm->cm_sglsize < MPS_SGC_SIZE) panic("MPS: Need SGE Error Code\n"); if (segsleft >= 2 && cm->cm_sglsize < len + MPS_SGC_SIZE + MPS_SGE64_SIZE) { /* * There are 2 or more segments left to add, and only * enough room for 1 and a chain. Case (3). * * Mark as last element in this chain if necessary. */ if (type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) { sge->FlagsLength |= htole32( MPI2_SGE_FLAGS_LAST_ELEMENT << MPI2_SGE_FLAGS_SHIFT); } /* * Add the item then a chain. Do the chain now, * rather than on the next iteration, to simplify * understanding the code. */ cm->cm_sglsize -= len; bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (mps_add_chain(cm)); } if (segsleft >= 1 && cm->cm_sglsize < len + MPS_SGC_SIZE) { /* * 1 or more segment, enough room for only a chain. * Hope the previous element wasn't a Simple entry * that needed to be marked with * MPI2_SGE_FLAGS_LAST_ELEMENT. Case (4). */ if ((error = mps_add_chain(cm)) != 0) return (error); } #ifdef INVARIANTS /* Case 1: 1 more segment, enough room for it. */ if (segsleft == 1 && cm->cm_sglsize < len) panic("1 seg left and no room? %u versus %zu", cm->cm_sglsize, len); /* Case 2: 2 more segments, enough room for both */ if (segsleft == 2 && cm->cm_sglsize < len + MPS_SGE64_SIZE) panic("2 segs left and no room? %u versus %zu", cm->cm_sglsize, len); #endif if (segsleft == 1 && type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) { /* * If this is a bi-directional request, need to account for that * here. Save the pre-filled sge values. These will be used * either for the 2nd SGL or for a single direction SGL. If * cm_out_len is non-zero, this is a bi-directional request, so * fill in the OUT SGL first, then the IN SGL, otherwise just * fill in the IN SGL. Note that at this time, when filling in * 2 SGL's for a bi-directional request, they both use the same * DMA buffer (same cm command). */ saved_buf_len = le32toh(sge->FlagsLength) & 0x00FFFFFF; saved_address_low = sge->Address.Low; saved_address_high = sge->Address.High; if (cm->cm_out_len) { sge->FlagsLength = htole32(cm->cm_out_len | ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT)); cm->cm_sglsize -= len; bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); } saved_buf_len |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << MPI2_SGE_FLAGS_SHIFT); if (cm->cm_flags & MPS_CM_FLAGS_DATAIN) { saved_buf_len |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << MPI2_SGE_FLAGS_SHIFT); } else { saved_buf_len |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << MPI2_SGE_FLAGS_SHIFT); } sge->FlagsLength = htole32(saved_buf_len); sge->Address.Low = saved_address_low; sge->Address.High = saved_address_high; } cm->cm_sglsize -= len; bcopy(sgep, cm->cm_sge, len); cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); return (0); } /* * Add one dma segment to the scatter-gather list for a command. */ int mps_add_dmaseg(struct mps_command *cm, vm_paddr_t pa, size_t len, u_int flags, int segsleft) { MPI2_SGE_SIMPLE64 sge; /* * This driver always uses 64-bit address elements for simplicity. */ bzero(&sge, sizeof(sge)); flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING; sge.FlagsLength = htole32(len | (flags << MPI2_SGE_FLAGS_SHIFT)); mps_from_u64(pa, &sge.Address); return (mps_push_sge(cm, &sge, sizeof sge, segsleft)); } static void mps_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mps_softc *sc; struct mps_command *cm; u_int i, dir, sflags; cm = (struct mps_command *)arg; sc = cm->cm_sc; /* * In this case, just print out a warning and let the chip tell the * user they did the wrong thing. */ if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { mps_dprint(sc, MPS_ERROR, "%s: warning: busdma returned %d segments, " "more than the %d allowed\n", __func__, nsegs, cm->cm_max_segs); } /* * Set up DMA direction flags. Bi-directional requests are also handled * here. In that case, both direction flags will be set. */ sflags = 0; if (cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) { /* * We have to add a special case for SMP passthrough, there * is no easy way to generically handle it. The first * S/G element is used for the command (therefore the * direction bit needs to be set). The second one is used * for the reply. We'll leave it to the caller to make * sure we only have two buffers. */ /* * Even though the busdma man page says it doesn't make * sense to have both direction flags, it does in this case. * We have one s/g element being accessed in each direction. */ dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; /* * Set the direction flag on the first buffer in the SMP * passthrough request. We'll clear it for the second one. */ sflags |= MPI2_SGE_FLAGS_DIRECTION | MPI2_SGE_FLAGS_END_OF_BUFFER; } else if (cm->cm_flags & MPS_CM_FLAGS_DATAOUT) { sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; dir = BUS_DMASYNC_PREWRITE; } else dir = BUS_DMASYNC_PREREAD; for (i = 0; i < nsegs; i++) { if ((cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) && (i != 0)) { sflags &= ~MPI2_SGE_FLAGS_DIRECTION; } error = mps_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, sflags, nsegs - i); if (error != 0) { /* Resource shortage, roll back! */ if (ratecheck(&sc->lastfail, &mps_chainfail_interval)) mps_dprint(sc, MPS_INFO, "Out of chain frames, " "consider increasing hw.mps.max_chains.\n"); cm->cm_flags |= MPS_CM_FLAGS_CHAIN_FAILED; mps_complete_command(sc, cm); return; } } bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); mps_enqueue_request(sc, cm); return; } static void mps_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { mps_data_cb(arg, segs, nsegs, error); } /* * This is the routine to enqueue commands ansynchronously. * Note that the only error path here is from bus_dmamap_load(), which can * return EINPROGRESS if it is waiting for resources. Other than this, it's * assumed that if you have a command in-hand, then you have enough credits * to use it. */ int mps_map_command(struct mps_softc *sc, struct mps_command *cm) { int error = 0; if (cm->cm_flags & MPS_CM_FLAGS_USE_UIO) { error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, &cm->cm_uio, mps_data_cb2, cm, 0); } else if (cm->cm_flags & MPS_CM_FLAGS_USE_CCB) { error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, mps_data_cb, cm, 0); } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_length, mps_data_cb, cm, 0); } else { /* Add a zero-length element as needed */ if (cm->cm_sge != NULL) mps_add_dmaseg(cm, 0, 0, 0, 1); mps_enqueue_request(sc, cm); } return (error); } /* * This is the routine to enqueue commands synchronously. An error of * EINPROGRESS from mps_map_command() is ignored since the command will * be executed and enqueued automatically. Other errors come from msleep(). */ int mps_wait_command(struct mps_softc *sc, struct mps_command **cmp, int timeout, int sleep_flag) { int error, rc; struct timeval cur_time, start_time; struct mps_command *cm = *cmp; if (sc->mps_flags & MPS_FLAGS_DIAGRESET) return EBUSY; cm->cm_complete = NULL; cm->cm_flags |= MPS_CM_FLAGS_POLLED; error = mps_map_command(sc, cm); if ((error != 0) && (error != EINPROGRESS)) return (error); /* * Check for context and wait for 50 mSec at a time until time has * expired or the command has finished. If msleep can't be used, need * to poll. */ if (curthread->td_no_sleeping != 0) sleep_flag = NO_SLEEP; getmicrouptime(&start_time); if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) { cm->cm_flags |= MPS_CM_FLAGS_WAKEUP; error = msleep(cm, &sc->mps_mtx, 0, "mpswait", timeout*hz); if (error == EWOULDBLOCK) { /* * Record the actual elapsed time in the case of a * timeout for the message below. */ getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); } } else { while ((cm->cm_flags & MPS_CM_FLAGS_COMPLETE) == 0) { mps_intr_locked(sc); if (sleep_flag == CAN_SLEEP) pause("mpswait", hz/20); else DELAY(50000); getmicrouptime(&cur_time); timevalsub(&cur_time, &start_time); if (cur_time.tv_sec > timeout) { error = EWOULDBLOCK; break; } } } if (error == EWOULDBLOCK) { mps_dprint(sc, MPS_FAULT, "Calling Reinit from %s, timeout=%d," " elapsed=%jd\n", __func__, timeout, (intmax_t)cur_time.tv_sec); rc = mps_reinit(sc); mps_dprint(sc, MPS_FAULT, "Reinit %s\n", (rc == 0) ? "success" : "failed"); if (sc->mps_flags & MPS_FLAGS_REALLOCATED) { /* * Tell the caller that we freed the command in a * reinit. */ *cmp = NULL; } error = ETIMEDOUT; } return (error); } /* * The MPT driver had a verbose interface for config pages. In this driver, * reduce it to much simpler terms, similar to the Linux driver. */ int mps_read_config_page(struct mps_softc *sc, struct mps_config_params *params) { MPI2_CONFIG_REQUEST *req; struct mps_command *cm; int error; if (sc->mps_flags & MPS_FLAGS_BUSY) { return (EBUSY); } cm = mps_alloc_command(sc); if (cm == NULL) { return (EBUSY); } req = (MPI2_CONFIG_REQUEST *)cm->cm_req; req->Function = MPI2_FUNCTION_CONFIG; req->Action = params->action; req->SGLFlags = 0; req->ChainOffset = 0; req->PageAddress = params->page_address; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Ext; req->ExtPageType = hdr->ExtPageType; req->ExtPageLength = hdr->ExtPageLength; req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; req->Header.PageLength = 0; /* Must be set to zero */ req->Header.PageNumber = hdr->PageNumber; req->Header.PageVersion = hdr->PageVersion; } else { MPI2_CONFIG_PAGE_HEADER *hdr; hdr = ¶ms->hdr.Struct; req->Header.PageType = hdr->PageType; req->Header.PageNumber = hdr->PageNumber; req->Header.PageLength = hdr->PageLength; req->Header.PageVersion = hdr->PageVersion; } cm->cm_data = params->buffer; cm->cm_length = params->length; if (cm->cm_data != NULL) { cm->cm_sge = &req->PageBufferSGE; cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); cm->cm_flags = MPS_CM_FLAGS_SGE_SIMPLE | MPS_CM_FLAGS_DATAIN; } else cm->cm_sge = NULL; cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cm->cm_complete_data = params; if (params->callback != NULL) { cm->cm_complete = mps_config_complete; return (mps_map_command(sc, cm)); } else { error = mps_wait_command(sc, &cm, 0, CAN_SLEEP); if (error) { mps_dprint(sc, MPS_FAULT, "Error %d reading config page\n", error); if (cm != NULL) mps_free_command(sc, cm); return (error); } mps_config_complete(sc, cm); } return (0); } int mps_write_config_page(struct mps_softc *sc, struct mps_config_params *params) { return (EINVAL); } static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm) { MPI2_CONFIG_REPLY *reply; struct mps_config_params *params; MPS_FUNCTRACE(sc); params = cm->cm_complete_data; if (cm->cm_data != NULL) { bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); } /* * XXX KDM need to do more error recovery? This results in the * device in question not getting probed. */ if ((cm->cm_flags & MPS_CM_FLAGS_ERROR_MASK) != 0) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; if (reply == NULL) { params->status = MPI2_IOCSTATUS_BUSY; goto done; } params->status = reply->IOCStatus; if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { params->hdr.Ext.ExtPageType = reply->ExtPageType; params->hdr.Ext.ExtPageLength = reply->ExtPageLength; params->hdr.Ext.PageType = reply->Header.PageType; params->hdr.Ext.PageNumber = reply->Header.PageNumber; params->hdr.Ext.PageVersion = reply->Header.PageVersion; } else { params->hdr.Struct.PageType = reply->Header.PageType; params->hdr.Struct.PageNumber = reply->Header.PageNumber; params->hdr.Struct.PageLength = reply->Header.PageLength; params->hdr.Struct.PageVersion = reply->Header.PageVersion; } done: mps_free_command(sc, cm); if (params->callback != NULL) params->callback(sc, params); return; } Index: projects/runtime-coverage/sys/dev/sdhci/fsl_sdhci.c =================================================================== --- projects/runtime-coverage/sys/dev/sdhci/fsl_sdhci.c (revision 323974) +++ projects/runtime-coverage/sys/dev/sdhci/fsl_sdhci.c (revision 323975) @@ -1,1000 +1,996 @@ /*- * Copyright (c) 2013 Ian Lepore * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * SDHCI driver glue for Freescale i.MX SoC and QorIQ families. * * This supports both eSDHC (earlier SoCs) and uSDHC (more recent SoCs). */ #include "opt_mmccam.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __arm__ #include #include #endif #ifdef __powerpc__ #include #endif #include #include #include #include #include #include #include "mmcbr_if.h" #include "sdhci_if.h" struct fsl_sdhci_softc { device_t dev; struct resource * mem_res; struct resource * irq_res; void * intr_cookie; struct sdhci_slot slot; struct callout r1bfix_callout; sbintime_t r1bfix_timeout_at; struct sdhci_fdt_gpio * gpio; uint32_t baseclk_hz; uint32_t cmd_and_mode; uint32_t r1bfix_intmask; uint16_t sdclockreg_freq_bits; uint8_t r1bfix_type; uint8_t hwtype; }; #define R1BFIX_NONE 0 /* No fix needed at next interrupt. */ #define R1BFIX_NODATA 1 /* Synthesize DATA_END for R1B w/o data. */ #define R1BFIX_AC12 2 /* Wait for busy after auto command 12. */ #define HWTYPE_NONE 0 /* Hardware not recognized/supported. */ #define HWTYPE_ESDHC 1 /* fsl5x and earlier. */ #define HWTYPE_USDHC 2 /* fsl6. */ /* * Freescale-specific registers, or in some cases the layout of bits within the * sdhci-defined register is different on Freescale. These names all begin with * SDHC_ (not SDHCI_). */ #define SDHC_WTMK_LVL 0x44 /* Watermark Level register. */ #define USDHC_MIX_CONTROL 0x48 /* Mix(ed) Control register. */ #define SDHC_VEND_SPEC 0xC0 /* Vendor-specific register. */ #define SDHC_VEND_FRC_SDCLK_ON (1 << 8) #define SDHC_VEND_IPGEN (1 << 11) #define SDHC_VEND_HCKEN (1 << 12) #define SDHC_VEND_PEREN (1 << 13) #define SDHC_PRES_STATE 0x24 #define SDHC_PRES_CIHB (1 << 0) #define SDHC_PRES_CDIHB (1 << 1) #define SDHC_PRES_DLA (1 << 2) #define SDHC_PRES_SDSTB (1 << 3) #define SDHC_PRES_IPGOFF (1 << 4) #define SDHC_PRES_HCKOFF (1 << 5) #define SDHC_PRES_PEROFF (1 << 6) #define SDHC_PRES_SDOFF (1 << 7) #define SDHC_PRES_WTA (1 << 8) #define SDHC_PRES_RTA (1 << 9) #define SDHC_PRES_BWEN (1 << 10) #define SDHC_PRES_BREN (1 << 11) #define SDHC_PRES_RTR (1 << 12) #define SDHC_PRES_CINST (1 << 16) #define SDHC_PRES_CDPL (1 << 18) #define SDHC_PRES_WPSPL (1 << 19) #define SDHC_PRES_CLSL (1 << 23) #define SDHC_PRES_DLSL_SHIFT 24 #define SDHC_PRES_DLSL_MASK (0xffU << SDHC_PRES_DLSL_SHIFT) #define SDHC_PROT_CTRL 0x28 #define SDHC_PROT_LED (1 << 0) #define SDHC_PROT_WIDTH_1BIT (0 << 1) #define SDHC_PROT_WIDTH_4BIT (1 << 1) #define SDHC_PROT_WIDTH_8BIT (2 << 1) #define SDHC_PROT_WIDTH_MASK (3 << 1) #define SDHC_PROT_D3CD (1 << 3) #define SDHC_PROT_EMODE_BIG (0 << 4) #define SDHC_PROT_EMODE_HALF (1 << 4) #define SDHC_PROT_EMODE_LITTLE (2 << 4) #define SDHC_PROT_EMODE_MASK (3 << 4) #define SDHC_PROT_SDMA (0 << 8) #define SDHC_PROT_ADMA1 (1 << 8) #define SDHC_PROT_ADMA2 (2 << 8) #define SDHC_PROT_ADMA264 (3 << 8) #define SDHC_PROT_DMA_MASK (3 << 8) #define SDHC_PROT_CDTL (1 << 6) #define SDHC_PROT_CDSS (1 << 7) #define SDHC_SYS_CTRL 0x2c /* * The clock enable bits exist in different registers for ESDHC vs USDHC, but * they are the same bits in both cases. The divisor values go into the * standard sdhci clock register, but in different bit positions and meanings than the sdhci spec values. */ #define SDHC_CLK_IPGEN (1 << 0) #define SDHC_CLK_HCKEN (1 << 1) #define SDHC_CLK_PEREN (1 << 2) #define SDHC_CLK_SDCLKEN (1 << 3) #define SDHC_CLK_ENABLE_MASK 0x0000000f #define SDHC_CLK_DIVISOR_MASK 0x000000f0 #define SDHC_CLK_DIVISOR_SHIFT 4 #define SDHC_CLK_PRESCALE_MASK 0x0000ff00 #define SDHC_CLK_PRESCALE_SHIFT 8 static struct ofw_compat_data compat_data[] = { {"fsl,imx6q-usdhc", HWTYPE_USDHC}, {"fsl,imx6sl-usdhc", HWTYPE_USDHC}, {"fsl,imx53-esdhc", HWTYPE_ESDHC}, {"fsl,imx51-esdhc", HWTYPE_ESDHC}, {"fsl,esdhc", HWTYPE_ESDHC}, {NULL, HWTYPE_NONE}, }; static uint16_t fsl_sdhc_get_clock(struct fsl_sdhci_softc *sc); static void fsl_sdhc_set_clock(struct fsl_sdhci_softc *sc, uint16_t val); static void fsl_sdhci_r1bfix_func(void *arg); static inline uint32_t RD4(struct fsl_sdhci_softc *sc, bus_size_t off) { return (bus_read_4(sc->mem_res, off)); } static inline void WR4(struct fsl_sdhci_softc *sc, bus_size_t off, uint32_t val) { bus_write_4(sc->mem_res, off, val); } static uint8_t fsl_sdhci_read_1(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct fsl_sdhci_softc *sc = device_get_softc(dev); uint32_t val32, wrk32; /* * Most of the things in the standard host control register are in the * hardware's wider protocol control register, but some of the bits are * moved around. */ if (off == SDHCI_HOST_CONTROL) { wrk32 = RD4(sc, SDHC_PROT_CTRL); val32 = wrk32 & (SDHCI_CTRL_LED | SDHCI_CTRL_CARD_DET | SDHCI_CTRL_FORCE_CARD); switch (wrk32 & SDHC_PROT_WIDTH_MASK) { case SDHC_PROT_WIDTH_1BIT: /* Value is already 0. */ break; case SDHC_PROT_WIDTH_4BIT: val32 |= SDHCI_CTRL_4BITBUS; break; case SDHC_PROT_WIDTH_8BIT: val32 |= SDHCI_CTRL_8BITBUS; break; } switch (wrk32 & SDHC_PROT_DMA_MASK) { case SDHC_PROT_SDMA: /* Value is already 0. */ break; case SDHC_PROT_ADMA1: /* This value is deprecated, should never appear. */ break; case SDHC_PROT_ADMA2: val32 |= SDHCI_CTRL_ADMA2; break; case SDHC_PROT_ADMA264: val32 |= SDHCI_CTRL_ADMA264; break; } return val32; } /* * XXX can't find the bus power on/off knob. For now we have to say the * power is always on and always set to the same voltage. */ if (off == SDHCI_POWER_CONTROL) { return (SDHCI_POWER_ON | SDHCI_POWER_300); } return ((RD4(sc, off & ~3) >> (off & 3) * 8) & 0xff); } static uint16_t fsl_sdhci_read_2(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct fsl_sdhci_softc *sc = device_get_softc(dev); uint32_t val32; if (sc->hwtype == HWTYPE_USDHC) { /* * The USDHC hardware has nothing in the version register, but * it's v3 compatible with all our translation code. */ if (off == SDHCI_HOST_VERSION) { return (SDHCI_SPEC_300 << SDHCI_SPEC_VER_SHIFT); } /* * The USDHC hardware moved the transfer mode bits to the mixed * control register, fetch them from there. */ if (off == SDHCI_TRANSFER_MODE) return (RD4(sc, USDHC_MIX_CONTROL) & 0x37); } else if (sc->hwtype == HWTYPE_ESDHC) { /* * The ESDHC hardware has the typical 32-bit combined "command * and mode" register that we have to cache so that command * isn't written until after mode. On a read, just retrieve the * cached values last written. */ if (off == SDHCI_TRANSFER_MODE) { return (sc->cmd_and_mode & 0x0000ffff); } else if (off == SDHCI_COMMAND_FLAGS) { return (sc->cmd_and_mode >> 16); } } /* * This hardware only manages one slot. Synthesize a slot interrupt * status register... if there are any enabled interrupts active they * must be coming from our one and only slot. */ if (off == SDHCI_SLOT_INT_STATUS) { val32 = RD4(sc, SDHCI_INT_STATUS); val32 &= RD4(sc, SDHCI_SIGNAL_ENABLE); return (val32 ? 1 : 0); } /* * Clock bits are scattered into various registers which differ by * hardware type, complex enough to have their own function. */ if (off == SDHCI_CLOCK_CONTROL) { return (fsl_sdhc_get_clock(sc)); } return ((RD4(sc, off & ~3) >> (off & 3) * 8) & 0xffff); } static uint32_t fsl_sdhci_read_4(device_t dev, struct sdhci_slot *slot, bus_size_t off) { struct fsl_sdhci_softc *sc = device_get_softc(dev); uint32_t val32, wrk32; val32 = RD4(sc, off); /* * The hardware leaves the base clock frequency out of the capabilities * register, but we filled it in by setting slot->max_clk at attach time * rather than here, because we can't represent frequencies above 63MHz * in an sdhci 2.0 capabliities register. The timeout clock is the same * as the active output sdclock; we indicate that with a quirk setting * so don't populate the timeout frequency bits. * * XXX Turn off (for now) features the hardware can do but this driver * doesn't yet handle (1.8v, suspend/resume, etc). */ if (off == SDHCI_CAPABILITIES) { val32 &= ~SDHCI_CAN_VDD_180; val32 &= ~SDHCI_CAN_DO_SUSPEND; val32 |= SDHCI_CAN_DO_8BITBUS; return (val32); } /* * The hardware moves bits around in the present state register to make * room for all 8 data line state bits. To translate, mask out all the * bits which are not in the same position in both registers (this also * masks out some Freescale-specific bits in locations defined as * reserved by sdhci), then shift the data line and retune request bits * down to their standard locations. */ if (off == SDHCI_PRESENT_STATE) { wrk32 = val32; val32 &= 0x000F0F07; val32 |= (wrk32 >> 4) & SDHCI_STATE_DAT_MASK; val32 |= (wrk32 >> 9) & SDHCI_RETUNE_REQUEST; return (val32); } /* * fsl_sdhci_intr() can synthesize a DATA_END interrupt following a * command with an R1B response, mix it into the hardware status. */ if (off == SDHCI_INT_STATUS) { return (val32 | sc->r1bfix_intmask); } return val32; } static void fsl_sdhci_read_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t *data, bus_size_t count) { struct fsl_sdhci_softc *sc = device_get_softc(dev); bus_read_multi_4(sc->mem_res, off, data, count); } static void fsl_sdhci_write_1(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint8_t val) { struct fsl_sdhci_softc *sc = device_get_softc(dev); uint32_t val32; /* * Most of the things in the standard host control register are in the * hardware's wider protocol control register, but some of the bits are * moved around. */ if (off == SDHCI_HOST_CONTROL) { val32 = RD4(sc, SDHC_PROT_CTRL); val32 &= ~(SDHC_PROT_LED | SDHC_PROT_DMA_MASK | SDHC_PROT_WIDTH_MASK | SDHC_PROT_CDTL | SDHC_PROT_CDSS); val32 |= (val & SDHCI_CTRL_LED); if (val & SDHCI_CTRL_8BITBUS) val32 |= SDHC_PROT_WIDTH_8BIT; else val32 |= (val & SDHCI_CTRL_4BITBUS); val32 |= (val & (SDHCI_CTRL_SDMA | SDHCI_CTRL_ADMA2)) << 4; val32 |= (val & (SDHCI_CTRL_CARD_DET | SDHCI_CTRL_FORCE_CARD)); WR4(sc, SDHC_PROT_CTRL, val32); return; } /* XXX I can't find the bus power on/off knob; do nothing. */ if (off == SDHCI_POWER_CONTROL) { return; } #ifdef __powerpc__ /* XXX Reset doesn't seem to work as expected. Do nothing for now. */ if (off == SDHCI_SOFTWARE_RESET) return; #endif val32 = RD4(sc, off & ~3); val32 &= ~(0xff << (off & 3) * 8); val32 |= (val << (off & 3) * 8); WR4(sc, off & ~3, val32); } static void fsl_sdhci_write_2(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint16_t val) { struct fsl_sdhci_softc *sc = device_get_softc(dev); uint32_t val32; /* * The clock control stuff is complex enough to have its own function * that can handle the ESDHC versus USDHC differences. */ if (off == SDHCI_CLOCK_CONTROL) { fsl_sdhc_set_clock(sc, val); return; } /* * Figure out whether we need to check the DAT0 line for busy status at * interrupt time. The controller should be doing this, but for some * reason it doesn't. There are two cases: * - R1B response with no data transfer should generate a DATA_END (aka * TRANSFER_COMPLETE) interrupt after waiting for busy, but if * there's no data transfer there's no DATA_END interrupt. This is * documented; they seem to think it's a feature. * - R1B response after Auto-CMD12 appears to not work, even though * there's a control bit for it (bit 3) in the vendor register. * When we're starting a command that needs a manual DAT0 line check at * interrupt time, we leave ourselves a note in r1bfix_type so that we * can do the extra work in fsl_sdhci_intr(). */ if (off == SDHCI_COMMAND_FLAGS) { if (val & SDHCI_CMD_DATA) { const uint32_t MBAUTOCMD = SDHCI_TRNS_ACMD12 | SDHCI_TRNS_MULTI; val32 = RD4(sc, USDHC_MIX_CONTROL); if ((val32 & MBAUTOCMD) == MBAUTOCMD) sc->r1bfix_type = R1BFIX_AC12; } else { if ((val & SDHCI_CMD_RESP_MASK) == SDHCI_CMD_RESP_SHORT_BUSY) { WR4(sc, SDHCI_INT_ENABLE, slot->intmask | SDHCI_INT_RESPONSE); WR4(sc, SDHCI_SIGNAL_ENABLE, slot->intmask | SDHCI_INT_RESPONSE); sc->r1bfix_type = R1BFIX_NODATA; } } } /* * The USDHC hardware moved the transfer mode bits to mixed control; we * just write them there and we're done. The ESDHC hardware has the * typical combined cmd-and-mode register that allows only 32-bit * access, so when writing the mode bits just save them, then later when * writing the command bits, add in the saved mode bits. */ if (sc->hwtype == HWTYPE_USDHC) { if (off == SDHCI_TRANSFER_MODE) { val32 = RD4(sc, USDHC_MIX_CONTROL); val32 &= ~0x3f; val32 |= val & 0x37; // XXX acmd23 not supported here (or by sdhci driver) WR4(sc, USDHC_MIX_CONTROL, val32); return; } } else if (sc->hwtype == HWTYPE_ESDHC) { if (off == SDHCI_TRANSFER_MODE) { sc->cmd_and_mode = (sc->cmd_and_mode & 0xffff0000) | val; return; } else if (off == SDHCI_COMMAND_FLAGS) { sc->cmd_and_mode = (sc->cmd_and_mode & 0xffff) | (val << 16); WR4(sc, SDHCI_TRANSFER_MODE, sc->cmd_and_mode); return; } } val32 = RD4(sc, off & ~3); val32 &= ~(0xffff << (off & 3) * 8); val32 |= ((val & 0xffff) << (off & 3) * 8); WR4(sc, off & ~3, val32); } static void fsl_sdhci_write_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t val) { struct fsl_sdhci_softc *sc = device_get_softc(dev); /* Clear synthesized interrupts, then pass the value to the hardware. */ if (off == SDHCI_INT_STATUS) { sc->r1bfix_intmask &= ~val; } WR4(sc, off, val); } static void fsl_sdhci_write_multi_4(device_t dev, struct sdhci_slot *slot, bus_size_t off, uint32_t *data, bus_size_t count) { struct fsl_sdhci_softc *sc = device_get_softc(dev); bus_write_multi_4(sc->mem_res, off, data, count); } static uint16_t fsl_sdhc_get_clock(struct fsl_sdhci_softc *sc) { uint16_t val; /* * Whenever the sdhci driver writes the clock register we save a * snapshot of just the frequency bits, so that we can play them back * here on a register read without recalculating the frequency from the * prescalar and divisor bits in the real register. We'll start with * those bits, and mix in the clock status and enable bits that come * from different places depending on which hardware we've got. */ val = sc->sdclockreg_freq_bits; /* * The internal clock is always enabled (actually, the hardware manages * it). Whether the internal clock is stable yet after a frequency * change comes from the present-state register on both hardware types. */ val |= SDHCI_CLOCK_INT_EN; if (RD4(sc, SDHC_PRES_STATE) & SDHC_PRES_SDSTB) val |= SDHCI_CLOCK_INT_STABLE; /* * On i.MX ESDHC hardware the card bus clock enable is in the usual * sdhci register but it's a different bit, so transcribe it (note the * difference between standard SDHCI_ and Freescale SDHC_ prefixes * here). On USDHC and QorIQ ESDHC hardware there is a force-on bit, but * no force-off for the card bus clock (the hardware runs the clock when * transfers are active no matter what), so we always say the clock is * on. * XXX Maybe we should say it's in whatever state the sdhci driver last * set it to. */ if (sc->hwtype == HWTYPE_ESDHC) { #ifdef __arm__ if (RD4(sc, SDHC_SYS_CTRL) & SDHC_CLK_SDCLKEN) #endif val |= SDHCI_CLOCK_CARD_EN; } else { val |= SDHCI_CLOCK_CARD_EN; } return (val); } static void fsl_sdhc_set_clock(struct fsl_sdhci_softc *sc, uint16_t val) { uint32_t divisor, freq, prescale, val32; val32 = RD4(sc, SDHCI_CLOCK_CONTROL); /* * Save the frequency-setting bits in SDHCI format so that we can play * them back in get_clock without complex decoding of hardware regs, * then deal with the freqency part of the value based on hardware type. */ sc->sdclockreg_freq_bits = val & SDHCI_DIVIDERS_MASK; if (sc->hwtype == HWTYPE_ESDHC) { /* * The i.MX5 ESDHC hardware requires the driver to manually * start and stop the sd bus clock. If the enable bit is not * set, turn off the clock in hardware and we're done, otherwise * decode the requested frequency. ESDHC hardware is sdhci 2.0; * the sdhci driver will use the original 8-bit divisor field * and the "base / 2^N" divisor scheme. */ if ((val & SDHCI_CLOCK_CARD_EN) == 0) { #ifdef __arm__ /* On QorIQ, this is a reserved bit. */ WR4(sc, SDHCI_CLOCK_CONTROL, val32 & ~SDHC_CLK_SDCLKEN); #endif return; } divisor = (val >> SDHCI_DIVIDER_SHIFT) & SDHCI_DIVIDER_MASK; freq = sc->baseclk_hz >> ffs(divisor); } else { /* * The USDHC hardware provides only "force always on" control * over the sd bus clock, but no way to turn it off. (If a cmd * or data transfer is in progress the clock is on, otherwise it * is off.) If the clock is being disabled, we can just return * now, otherwise we decode the requested frequency. USDHC * hardware is sdhci 3.0; the sdhci driver will use a 10-bit * divisor using the "base / 2*N" divisor scheme. */ if ((val & SDHCI_CLOCK_CARD_EN) == 0) return; divisor = ((val >> SDHCI_DIVIDER_SHIFT) & SDHCI_DIVIDER_MASK) | ((val >> SDHCI_DIVIDER_HI_SHIFT) & SDHCI_DIVIDER_HI_MASK) << SDHCI_DIVIDER_MASK_LEN; if (divisor == 0) freq = sc->baseclk_hz; else freq = sc->baseclk_hz / (2 * divisor); } /* * Get a prescaler and final divisor to achieve the desired frequency. */ for (prescale = 2; freq < sc->baseclk_hz / (prescale * 16);) prescale <<= 1; for (divisor = 1; freq < sc->baseclk_hz / (prescale * divisor);) ++divisor; #ifdef DEBUG device_printf(sc->dev, "desired SD freq: %d, actual: %d; base %d prescale %d divisor %d\n", freq, sc->baseclk_hz / (prescale * divisor), sc->baseclk_hz, prescale, divisor); #endif /* * Adjust to zero-based values, and store them to the hardware. */ prescale >>= 1; divisor -= 1; val32 &= ~(SDHC_CLK_DIVISOR_MASK | SDHC_CLK_PRESCALE_MASK); val32 |= divisor << SDHC_CLK_DIVISOR_SHIFT; val32 |= prescale << SDHC_CLK_PRESCALE_SHIFT; val32 |= SDHC_CLK_IPGEN; WR4(sc, SDHCI_CLOCK_CONTROL, val32); } static boolean_t fsl_sdhci_r1bfix_is_wait_done(struct fsl_sdhci_softc *sc) { uint32_t inhibit; mtx_assert(&sc->slot.mtx, MA_OWNED); /* * Check the DAT0 line status using both the DLA (data line active) and * CDIHB (data inhibit) bits in the present state register. In theory * just DLA should do the trick, but in practice it takes both. If the * DAT0 line is still being held and we're not yet beyond the timeout * point, just schedule another callout to check again later. */ inhibit = RD4(sc, SDHC_PRES_STATE) & (SDHC_PRES_DLA | SDHC_PRES_CDIHB); if (inhibit && getsbinuptime() < sc->r1bfix_timeout_at) { callout_reset_sbt(&sc->r1bfix_callout, SBT_1MS, 0, fsl_sdhci_r1bfix_func, sc, 0); return (false); } /* * If we reach this point with the inhibit bits still set, we've got a * timeout, synthesize a DATA_TIMEOUT interrupt. Otherwise the DAT0 * line has been released, and we synthesize a DATA_END, and if the type * of fix needed was on a command-without-data we also now add in the * original INT_RESPONSE that we suppressed earlier. */ if (inhibit) sc->r1bfix_intmask |= SDHCI_INT_DATA_TIMEOUT; else { sc->r1bfix_intmask |= SDHCI_INT_DATA_END; if (sc->r1bfix_type == R1BFIX_NODATA) sc->r1bfix_intmask |= SDHCI_INT_RESPONSE; } sc->r1bfix_type = R1BFIX_NONE; return (true); } static void fsl_sdhci_r1bfix_func(void * arg) { struct fsl_sdhci_softc *sc = arg; boolean_t r1bwait_done; mtx_lock(&sc->slot.mtx); r1bwait_done = fsl_sdhci_r1bfix_is_wait_done(sc); mtx_unlock(&sc->slot.mtx); if (r1bwait_done) sdhci_generic_intr(&sc->slot); } static void fsl_sdhci_intr(void *arg) { struct fsl_sdhci_softc *sc = arg; uint32_t intmask; mtx_lock(&sc->slot.mtx); /* * Manually check the DAT0 line for R1B response types that the * controller fails to handle properly. The controller asserts the done * interrupt while the card is still asserting busy with the DAT0 line. * * We check DAT0 immediately because most of the time, especially on a * read, the card will actually be done by time we get here. If it's * not, then the wait_done routine will schedule a callout to re-check * periodically until it is done. In that case we clear the interrupt * out of the hardware now so that we can present it later when the DAT0 * line is released. * * If we need to wait for the DAT0 line to be released, we set up a * timeout point 250ms in the future. This number comes from the SD * spec, which allows a command to take that long. In the real world, * cards tend to take 10-20ms for a long-running command such as a write * or erase that spans two pages. */ switch (sc->r1bfix_type) { case R1BFIX_NODATA: intmask = RD4(sc, SDHCI_INT_STATUS) & SDHCI_INT_RESPONSE; break; case R1BFIX_AC12: intmask = RD4(sc, SDHCI_INT_STATUS) & SDHCI_INT_DATA_END; break; default: intmask = 0; break; } if (intmask) { sc->r1bfix_timeout_at = getsbinuptime() + 250 * SBT_1MS; if (!fsl_sdhci_r1bfix_is_wait_done(sc)) { WR4(sc, SDHCI_INT_STATUS, intmask); bus_barrier(sc->mem_res, SDHCI_INT_STATUS, 4, BUS_SPACE_BARRIER_WRITE); } } mtx_unlock(&sc->slot.mtx); sdhci_generic_intr(&sc->slot); } static int fsl_sdhci_get_ro(device_t bus, device_t child) { struct fsl_sdhci_softc *sc = device_get_softc(bus); return (sdhci_fdt_gpio_get_readonly(sc->gpio)); } static bool fsl_sdhci_get_card_present(device_t dev, struct sdhci_slot *slot) { struct fsl_sdhci_softc *sc = device_get_softc(dev); return (sdhci_fdt_gpio_get_present(sc->gpio)); } #ifdef __powerpc__ static uint32_t fsl_sdhci_get_platform_clock(device_t dev) { phandle_t node; uint32_t clock; node = ofw_bus_get_node(dev); /* Get sdhci node properties */ if((OF_getprop(node, "clock-frequency", (void *)&clock, sizeof(clock)) <= 0) || (clock == 0)) { clock = mpc85xx_get_system_clock(); if (clock == 0) { device_printf(dev,"Cannot acquire correct sdhci " "frequency from DTS.\n"); return (0); } } if (bootverbose) device_printf(dev, "Acquired clock: %d from DTS\n", clock); return (clock); } #endif static int fsl_sdhci_detach(device_t dev) { /* sdhci_fdt_gpio_teardown(sc->gpio); */ return (EBUSY); } static int fsl_sdhci_attach(device_t dev) { struct fsl_sdhci_softc *sc = device_get_softc(dev); int rid, err; #ifdef __powerpc__ phandle_t node; uint32_t protctl; #endif sc->dev = dev; sc->hwtype = ofw_bus_search_compatible(dev, compat_data)->ocd_data; if (sc->hwtype == HWTYPE_NONE) panic("Impossible: not compatible in fsl_sdhci_attach()"); rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->mem_res) { device_printf(dev, "cannot allocate memory window\n"); err = ENXIO; goto fail; } rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->irq_res) { device_printf(dev, "cannot allocate interrupt\n"); err = ENXIO; goto fail; } if (bus_setup_intr(dev, sc->irq_res, INTR_TYPE_BIO | INTR_MPSAFE, NULL, fsl_sdhci_intr, sc, &sc->intr_cookie)) { device_printf(dev, "cannot setup interrupt handler\n"); err = ENXIO; goto fail; } sc->slot.quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK; /* * DMA is not really broken, I just haven't implemented it yet. */ sc->slot.quirks |= SDHCI_QUIRK_BROKEN_DMA; /* * Set the buffer watermark level to 128 words (512 bytes) for both read * and write. The hardware has a restriction that when the read or * write ready status is asserted, that means you can read exactly the * number of words set in the watermark register before you have to * re-check the status and potentially wait for more data. The main * sdhci driver provides no hook for doing status checking on less than * a full block boundary, so we set the watermark level to be a full * block. Reads and writes where the block size is less than the * watermark size will work correctly too, no need to change the * watermark for different size blocks. However, 128 is the maximum * allowed for the watermark, so PIO is limitted to 512 byte blocks * (which works fine for SD cards, may be a problem for SDIO some day). * * XXX need named constants for this stuff. */ /* P1022 has the '*_BRST_LEN' fields as reserved, always reading 0x10 */ if (ofw_bus_is_compatible(dev, "fsl,p1022-esdhc")) WR4(sc, SDHC_WTMK_LVL, 0x10801080); else WR4(sc, SDHC_WTMK_LVL, 0x08800880); /* * We read in native byte order in the main driver, but the register * defaults to little endian. */ #ifdef __powerpc__ sc->baseclk_hz = fsl_sdhci_get_platform_clock(dev); #else sc->baseclk_hz = imx_ccm_sdhci_hz(); #endif sc->slot.max_clk = sc->baseclk_hz; /* * Set up any gpio pin handling described in the FDT data. This cannot * fail; see comments in sdhci_fdt_gpio.h for details. */ sc->gpio = sdhci_fdt_gpio_setup(dev, &sc->slot); #ifdef __powerpc__ node = ofw_bus_get_node(dev); /* Default to big-endian on powerpc */ protctl = RD4(sc, SDHC_PROT_CTRL); protctl &= ~SDHC_PROT_EMODE_MASK; if (OF_hasprop(node, "little-endian")) protctl |= SDHC_PROT_EMODE_LITTLE; else protctl |= SDHC_PROT_EMODE_BIG; WR4(sc, SDHC_PROT_CTRL, protctl); #endif callout_init(&sc->r1bfix_callout, 1); sdhci_init_slot(dev, &sc->slot, 0); bus_generic_probe(dev); bus_generic_attach(dev); -#ifdef MMCCAM - sdhci_cam_start_slot(&sc->slot); -#else sdhci_start_slot(&sc->slot); -#endif return (0); fail: if (sc->intr_cookie) bus_teardown_intr(dev, sc->irq_res, sc->intr_cookie); if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); if (sc->mem_res) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res); return (err); } static int fsl_sdhci_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); switch (ofw_bus_search_compatible(dev, compat_data)->ocd_data) { case HWTYPE_ESDHC: device_set_desc(dev, "Freescale eSDHC controller"); return (BUS_PROBE_DEFAULT); case HWTYPE_USDHC: device_set_desc(dev, "Freescale uSDHC controller"); return (BUS_PROBE_DEFAULT); default: break; } return (ENXIO); } static device_method_t fsl_sdhci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fsl_sdhci_probe), DEVMETHOD(device_attach, fsl_sdhci_attach), DEVMETHOD(device_detach, fsl_sdhci_detach), /* Bus interface */ DEVMETHOD(bus_read_ivar, sdhci_generic_read_ivar), DEVMETHOD(bus_write_ivar, sdhci_generic_write_ivar), /* MMC bridge interface */ DEVMETHOD(mmcbr_update_ios, sdhci_generic_update_ios), DEVMETHOD(mmcbr_request, sdhci_generic_request), DEVMETHOD(mmcbr_get_ro, fsl_sdhci_get_ro), DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), /* SDHCI accessors */ DEVMETHOD(sdhci_read_1, fsl_sdhci_read_1), DEVMETHOD(sdhci_read_2, fsl_sdhci_read_2), DEVMETHOD(sdhci_read_4, fsl_sdhci_read_4), DEVMETHOD(sdhci_read_multi_4, fsl_sdhci_read_multi_4), DEVMETHOD(sdhci_write_1, fsl_sdhci_write_1), DEVMETHOD(sdhci_write_2, fsl_sdhci_write_2), DEVMETHOD(sdhci_write_4, fsl_sdhci_write_4), DEVMETHOD(sdhci_write_multi_4, fsl_sdhci_write_multi_4), DEVMETHOD(sdhci_get_card_present,fsl_sdhci_get_card_present), DEVMETHOD_END }; static devclass_t fsl_sdhci_devclass; static driver_t fsl_sdhci_driver = { "sdhci_fsl", fsl_sdhci_methods, sizeof(struct fsl_sdhci_softc), }; DRIVER_MODULE(sdhci_fsl, simplebus, fsl_sdhci_driver, fsl_sdhci_devclass, NULL, NULL); MODULE_DEPEND(sdhci_fsl, sdhci, 1, 1, 1); #ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_fsl); #endif Index: projects/runtime-coverage/sys/dev/sdhci/sdhci.c =================================================================== --- projects/runtime-coverage/sys/dev/sdhci/sdhci.c (revision 323974) +++ projects/runtime-coverage/sys/dev/sdhci/sdhci.c (revision 323975) @@ -1,2698 +1,2700 @@ /*- * Copyright (c) 2008 Alexander Motin * Copyright (c) 2017 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmcbr_if.h" #include "sdhci_if.h" #include "opt_mmccam.h" SYSCTL_NODE(_hw, OID_AUTO, sdhci, CTLFLAG_RD, 0, "sdhci driver"); static int sdhci_debug = 0; SYSCTL_INT(_hw_sdhci, OID_AUTO, debug, CTLFLAG_RWTUN, &sdhci_debug, 0, "Debug level"); u_int sdhci_quirk_clear = 0; SYSCTL_INT(_hw_sdhci, OID_AUTO, quirk_clear, CTLFLAG_RWTUN, &sdhci_quirk_clear, 0, "Mask of quirks to clear"); u_int sdhci_quirk_set = 0; SYSCTL_INT(_hw_sdhci, OID_AUTO, quirk_set, CTLFLAG_RWTUN, &sdhci_quirk_set, 0, "Mask of quirks to set"); #define RD1(slot, off) SDHCI_READ_1((slot)->bus, (slot), (off)) #define RD2(slot, off) SDHCI_READ_2((slot)->bus, (slot), (off)) #define RD4(slot, off) SDHCI_READ_4((slot)->bus, (slot), (off)) #define RD_MULTI_4(slot, off, ptr, count) \ SDHCI_READ_MULTI_4((slot)->bus, (slot), (off), (ptr), (count)) #define WR1(slot, off, val) SDHCI_WRITE_1((slot)->bus, (slot), (off), (val)) #define WR2(slot, off, val) SDHCI_WRITE_2((slot)->bus, (slot), (off), (val)) #define WR4(slot, off, val) SDHCI_WRITE_4((slot)->bus, (slot), (off), (val)) #define WR_MULTI_4(slot, off, ptr, count) \ SDHCI_WRITE_MULTI_4((slot)->bus, (slot), (off), (ptr), (count)) static void sdhci_card_poll(void *arg); static void sdhci_card_task(void *arg, int pending); static int sdhci_exec_tuning(struct sdhci_slot *slot, bool reset); static void sdhci_req_wakeup(struct mmc_request *req); static void sdhci_retune(void *arg); static void sdhci_set_clock(struct sdhci_slot *slot, uint32_t clock); static void sdhci_start(struct sdhci_slot *slot); static void sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data); #ifdef MMCCAM /* CAM-related */ int sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock); static int sdhci_cam_update_ios(struct sdhci_slot *slot); static int sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb); static void sdhci_cam_action(struct cam_sim *sim, union ccb *ccb); static void sdhci_cam_poll(struct cam_sim *sim); static int sdhci_cam_settran_settings(struct sdhci_slot *slot, union ccb *ccb); #endif /* helper routines */ static void sdhci_dumpregs(struct sdhci_slot *slot); static int slot_printf(struct sdhci_slot *slot, const char * fmt, ...) __printflike(2, 3); static uint32_t sdhci_tuning_intmask(struct sdhci_slot *slot); #define SDHCI_LOCK(_slot) mtx_lock(&(_slot)->mtx) #define SDHCI_UNLOCK(_slot) mtx_unlock(&(_slot)->mtx) #define SDHCI_LOCK_INIT(_slot) \ mtx_init(&_slot->mtx, "SD slot mtx", "sdhci", MTX_DEF) #define SDHCI_LOCK_DESTROY(_slot) mtx_destroy(&_slot->mtx); #define SDHCI_ASSERT_LOCKED(_slot) mtx_assert(&_slot->mtx, MA_OWNED); #define SDHCI_ASSERT_UNLOCKED(_slot) mtx_assert(&_slot->mtx, MA_NOTOWNED); #define SDHCI_DEFAULT_MAX_FREQ 50 #define SDHCI_200_MAX_DIVIDER 256 #define SDHCI_300_MAX_DIVIDER 2046 #define SDHCI_CARD_PRESENT_TICKS (hz / 5) #define SDHCI_INSERT_DELAY_TICKS (hz / 2) /* * Broadcom BCM577xx Controller Constants */ /* Maximum divider supported by the default clock source. */ #define BCM577XX_DEFAULT_MAX_DIVIDER 256 /* Alternative clock's base frequency. */ #define BCM577XX_ALT_CLOCK_BASE 63000000 #define BCM577XX_HOST_CONTROL 0x198 #define BCM577XX_CTRL_CLKSEL_MASK 0xFFFFCFFF #define BCM577XX_CTRL_CLKSEL_SHIFT 12 #define BCM577XX_CTRL_CLKSEL_DEFAULT 0x0 #define BCM577XX_CTRL_CLKSEL_64MHZ 0x3 static void sdhci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) { printf("getaddr: error %d\n", error); return; } *(bus_addr_t *)arg = segs[0].ds_addr; } static int slot_printf(struct sdhci_slot *slot, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s-slot%d: ", device_get_nameunit(slot->bus), slot->num); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } static void sdhci_dumpregs(struct sdhci_slot *slot) { slot_printf(slot, "============== REGISTER DUMP ==============\n"); slot_printf(slot, "Sys addr: 0x%08x | Version: 0x%08x\n", RD4(slot, SDHCI_DMA_ADDRESS), RD2(slot, SDHCI_HOST_VERSION)); slot_printf(slot, "Blk size: 0x%08x | Blk cnt: 0x%08x\n", RD2(slot, SDHCI_BLOCK_SIZE), RD2(slot, SDHCI_BLOCK_COUNT)); slot_printf(slot, "Argument: 0x%08x | Trn mode: 0x%08x\n", RD4(slot, SDHCI_ARGUMENT), RD2(slot, SDHCI_TRANSFER_MODE)); slot_printf(slot, "Present: 0x%08x | Host ctl: 0x%08x\n", RD4(slot, SDHCI_PRESENT_STATE), RD1(slot, SDHCI_HOST_CONTROL)); slot_printf(slot, "Power: 0x%08x | Blk gap: 0x%08x\n", RD1(slot, SDHCI_POWER_CONTROL), RD1(slot, SDHCI_BLOCK_GAP_CONTROL)); slot_printf(slot, "Wake-up: 0x%08x | Clock: 0x%08x\n", RD1(slot, SDHCI_WAKE_UP_CONTROL), RD2(slot, SDHCI_CLOCK_CONTROL)); slot_printf(slot, "Timeout: 0x%08x | Int stat: 0x%08x\n", RD1(slot, SDHCI_TIMEOUT_CONTROL), RD4(slot, SDHCI_INT_STATUS)); slot_printf(slot, "Int enab: 0x%08x | Sig enab: 0x%08x\n", RD4(slot, SDHCI_INT_ENABLE), RD4(slot, SDHCI_SIGNAL_ENABLE)); slot_printf(slot, "AC12 err: 0x%08x | Host ctl2:0x%08x\n", RD2(slot, SDHCI_ACMD12_ERR), RD2(slot, SDHCI_HOST_CONTROL2)); slot_printf(slot, "Caps: 0x%08x | Caps2: 0x%08x\n", RD4(slot, SDHCI_CAPABILITIES), RD4(slot, SDHCI_CAPABILITIES2)); slot_printf(slot, "Max curr: 0x%08x | ADMA err: 0x%08x\n", RD4(slot, SDHCI_MAX_CURRENT), RD1(slot, SDHCI_ADMA_ERR)); slot_printf(slot, "ADMA addr:0x%08x | Slot int: 0x%08x\n", RD4(slot, SDHCI_ADMA_ADDRESS_LO), RD2(slot, SDHCI_SLOT_INT_STATUS)); slot_printf(slot, "===========================================\n"); } static void sdhci_reset(struct sdhci_slot *slot, uint8_t mask) { int timeout; uint32_t clock; if (slot->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) { if (!SDHCI_GET_CARD_PRESENT(slot->bus, slot)) return; } /* Some controllers need this kick or reset won't work. */ if ((mask & SDHCI_RESET_ALL) == 0 && (slot->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)) { /* This is to force an update */ clock = slot->clock; slot->clock = 0; sdhci_set_clock(slot, clock); } if (mask & SDHCI_RESET_ALL) { slot->clock = 0; slot->power = 0; } WR1(slot, SDHCI_SOFTWARE_RESET, mask); if (slot->quirks & SDHCI_QUIRK_WAITFOR_RESET_ASSERTED) { /* * Resets on TI OMAPs and AM335x are incompatible with SDHCI * specification. The reset bit has internal propagation delay, * so a fast read after write returns 0 even if reset process is * in progress. The workaround is to poll for 1 before polling * for 0. In the worst case, if we miss seeing it asserted the * time we spent waiting is enough to ensure the reset finishes. */ timeout = 10000; while ((RD1(slot, SDHCI_SOFTWARE_RESET) & mask) != mask) { if (timeout <= 0) break; timeout--; DELAY(1); } } /* Wait max 100 ms */ timeout = 10000; /* Controller clears the bits when it's done */ while (RD1(slot, SDHCI_SOFTWARE_RESET) & mask) { if (timeout <= 0) { slot_printf(slot, "Reset 0x%x never completed.\n", mask); sdhci_dumpregs(slot); return; } timeout--; DELAY(10); } } static uint32_t sdhci_tuning_intmask(struct sdhci_slot *slot) { uint32_t intmask; intmask = 0; if (slot->opt & SDHCI_TUNING_SUPPORTED) { intmask |= SDHCI_INT_TUNEERR; if (slot->retune_mode == SDHCI_RETUNE_MODE_2 || slot->retune_mode == SDHCI_RETUNE_MODE_3) intmask |= SDHCI_INT_RETUNE; } return (intmask); } static void sdhci_init(struct sdhci_slot *slot) { sdhci_reset(slot, SDHCI_RESET_ALL); /* Enable interrupts. */ slot->intmask = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT | SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DMA_END | SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE | SDHCI_INT_ACMD12ERR; if (!(slot->quirks & SDHCI_QUIRK_POLL_CARD_PRESENT) && !(slot->opt & SDHCI_NON_REMOVABLE)) { slot->intmask |= SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT; } WR4(slot, SDHCI_INT_ENABLE, slot->intmask | sdhci_tuning_intmask(slot)); WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask); } static void sdhci_set_clock(struct sdhci_slot *slot, uint32_t clock) { uint32_t clk_base; uint32_t clk_sel; uint32_t res; uint16_t clk; uint16_t div; int timeout; if (clock == slot->clock) return; slot->clock = clock; /* Turn off the clock. */ clk = RD2(slot, SDHCI_CLOCK_CONTROL); WR2(slot, SDHCI_CLOCK_CONTROL, clk & ~SDHCI_CLOCK_CARD_EN); /* If no clock requested - leave it so. */ if (clock == 0) return; /* Determine the clock base frequency */ clk_base = slot->max_clk; if (slot->quirks & SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC) { clk_sel = RD2(slot, BCM577XX_HOST_CONTROL) & BCM577XX_CTRL_CLKSEL_MASK; /* * Select clock source appropriate for the requested frequency. */ if ((clk_base / BCM577XX_DEFAULT_MAX_DIVIDER) > clock) { clk_base = BCM577XX_ALT_CLOCK_BASE; clk_sel |= (BCM577XX_CTRL_CLKSEL_64MHZ << BCM577XX_CTRL_CLKSEL_SHIFT); } else { clk_sel |= (BCM577XX_CTRL_CLKSEL_DEFAULT << BCM577XX_CTRL_CLKSEL_SHIFT); } WR2(slot, BCM577XX_HOST_CONTROL, clk_sel); } /* Recalculate timeout clock frequency based on the new sd clock. */ if (slot->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) slot->timeout_clk = slot->clock / 1000; if (slot->version < SDHCI_SPEC_300) { /* Looking for highest freq <= clock. */ res = clk_base; for (div = 1; div < SDHCI_200_MAX_DIVIDER; div <<= 1) { if (res <= clock) break; res >>= 1; } /* Divider 1:1 is 0x00, 2:1 is 0x01, 256:1 is 0x80 ... */ div >>= 1; } else { /* Version 3.0 divisors are multiples of two up to 1023 * 2 */ if (clock >= clk_base) div = 0; else { for (div = 2; div < SDHCI_300_MAX_DIVIDER; div += 2) { if ((clk_base / div) <= clock) break; } } div >>= 1; } if (bootverbose || sdhci_debug) slot_printf(slot, "Divider %d for freq %d (base %d)\n", div, clock, clk_base); /* Now we have got divider, set it. */ clk = (div & SDHCI_DIVIDER_MASK) << SDHCI_DIVIDER_SHIFT; clk |= ((div >> SDHCI_DIVIDER_MASK_LEN) & SDHCI_DIVIDER_HI_MASK) << SDHCI_DIVIDER_HI_SHIFT; WR2(slot, SDHCI_CLOCK_CONTROL, clk); /* Enable clock. */ clk |= SDHCI_CLOCK_INT_EN; WR2(slot, SDHCI_CLOCK_CONTROL, clk); /* Wait up to 10 ms until it stabilize. */ timeout = 10; while (!((clk = RD2(slot, SDHCI_CLOCK_CONTROL)) & SDHCI_CLOCK_INT_STABLE)) { if (timeout == 0) { slot_printf(slot, "Internal clock never stabilised.\n"); sdhci_dumpregs(slot); return; } timeout--; DELAY(1000); } /* Pass clock signal to the bus. */ clk |= SDHCI_CLOCK_CARD_EN; WR2(slot, SDHCI_CLOCK_CONTROL, clk); } static void sdhci_set_power(struct sdhci_slot *slot, u_char power) { int i; uint8_t pwr; if (slot->power == power) return; slot->power = power; /* Turn off the power. */ pwr = 0; WR1(slot, SDHCI_POWER_CONTROL, pwr); /* If power down requested - leave it so. */ if (power == 0) return; /* Set voltage. */ switch (1 << power) { case MMC_OCR_LOW_VOLTAGE: pwr |= SDHCI_POWER_180; break; case MMC_OCR_290_300: case MMC_OCR_300_310: pwr |= SDHCI_POWER_300; break; case MMC_OCR_320_330: case MMC_OCR_330_340: pwr |= SDHCI_POWER_330; break; } WR1(slot, SDHCI_POWER_CONTROL, pwr); /* * Turn on VDD1 power. Note that at least some Intel controllers can * fail to enable bus power on the first try after transiting from D3 * to D0, so we give them up to 2 ms. */ pwr |= SDHCI_POWER_ON; for (i = 0; i < 20; i++) { WR1(slot, SDHCI_POWER_CONTROL, pwr); if (RD1(slot, SDHCI_POWER_CONTROL) & SDHCI_POWER_ON) break; DELAY(100); } if (!(RD1(slot, SDHCI_POWER_CONTROL) & SDHCI_POWER_ON)) slot_printf(slot, "Bus power failed to enable"); if (slot->quirks & SDHCI_QUIRK_INTEL_POWER_UP_RESET) { WR1(slot, SDHCI_POWER_CONTROL, pwr | 0x10); DELAY(10); WR1(slot, SDHCI_POWER_CONTROL, pwr); DELAY(300); } } static void sdhci_read_block_pio(struct sdhci_slot *slot) { uint32_t data; char *buffer; size_t left; buffer = slot->curcmd->data->data; buffer += slot->offset; /* Transfer one block at a time. */ left = min(512, slot->curcmd->data->len - slot->offset); slot->offset += left; /* If we are too fast, broken controllers return zeroes. */ if (slot->quirks & SDHCI_QUIRK_BROKEN_TIMINGS) DELAY(10); /* Handle unaligned and aligned buffer cases. */ if ((intptr_t)buffer & 3) { while (left > 3) { data = RD4(slot, SDHCI_BUFFER); buffer[0] = data; buffer[1] = (data >> 8); buffer[2] = (data >> 16); buffer[3] = (data >> 24); buffer += 4; left -= 4; } } else { RD_MULTI_4(slot, SDHCI_BUFFER, (uint32_t *)buffer, left >> 2); left &= 3; } /* Handle uneven size case. */ if (left > 0) { data = RD4(slot, SDHCI_BUFFER); while (left > 0) { *(buffer++) = data; data >>= 8; left--; } } } static void sdhci_write_block_pio(struct sdhci_slot *slot) { uint32_t data = 0; char *buffer; size_t left; buffer = slot->curcmd->data->data; buffer += slot->offset; /* Transfer one block at a time. */ left = min(512, slot->curcmd->data->len - slot->offset); slot->offset += left; /* Handle unaligned and aligned buffer cases. */ if ((intptr_t)buffer & 3) { while (left > 3) { data = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); left -= 4; buffer += 4; WR4(slot, SDHCI_BUFFER, data); } } else { WR_MULTI_4(slot, SDHCI_BUFFER, (uint32_t *)buffer, left >> 2); left &= 3; } /* Handle uneven size case. */ if (left > 0) { while (left > 0) { data <<= 8; data += *(buffer++); left--; } WR4(slot, SDHCI_BUFFER, data); } } static void sdhci_transfer_pio(struct sdhci_slot *slot) { /* Read as many blocks as possible. */ if (slot->curcmd->data->flags & MMC_DATA_READ) { while (RD4(slot, SDHCI_PRESENT_STATE) & SDHCI_DATA_AVAILABLE) { sdhci_read_block_pio(slot); if (slot->offset >= slot->curcmd->data->len) break; } } else { while (RD4(slot, SDHCI_PRESENT_STATE) & SDHCI_SPACE_AVAILABLE) { sdhci_write_block_pio(slot); if (slot->offset >= slot->curcmd->data->len) break; } } } static void sdhci_card_task(void *arg, int pending __unused) { struct sdhci_slot *slot = arg; device_t d; SDHCI_LOCK(slot); if (SDHCI_GET_CARD_PRESENT(slot->bus, slot)) { #ifdef MMCCAM if (slot->card_present == 0) { #else if (slot->dev == NULL) { #endif /* If card is present - attach mmc bus. */ if (bootverbose || sdhci_debug) slot_printf(slot, "Card inserted\n"); #ifdef MMCCAM slot->card_present = 1; union ccb *ccb; uint32_t pathid; pathid = cam_sim_path(slot->sim); ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { slot_printf(slot, "Unable to alloc CCB for rescan\n"); SDHCI_UNLOCK(slot); return; } /* * We create a rescan request for BUS:0:0, since the card * will be at lun 0. */ if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, /* target */ 0, /* lun */ 0) != CAM_REQ_CMP) { slot_printf(slot, "Unable to create path for rescan\n"); SDHCI_UNLOCK(slot); xpt_free_ccb(ccb); return; } SDHCI_UNLOCK(slot); xpt_rescan(ccb); #else d = slot->dev = device_add_child(slot->bus, "mmc", -1); SDHCI_UNLOCK(slot); if (d) { device_set_ivars(d, slot); (void)device_probe_and_attach(d); } #endif } else SDHCI_UNLOCK(slot); } else { #ifdef MMCCAM if (slot->card_present == 1) { #else if (slot->dev != NULL) { #endif /* If no card present - detach mmc bus. */ if (bootverbose || sdhci_debug) slot_printf(slot, "Card removed\n"); d = slot->dev; slot->dev = NULL; #ifdef MMCCAM slot->card_present = 0; union ccb *ccb; uint32_t pathid; pathid = cam_sim_path(slot->sim); ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { slot_printf(slot, "Unable to alloc CCB for rescan\n"); SDHCI_UNLOCK(slot); return; } /* * We create a rescan request for BUS:0:0, since the card * will be at lun 0. */ if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, /* target */ 0, /* lun */ 0) != CAM_REQ_CMP) { slot_printf(slot, "Unable to create path for rescan\n"); SDHCI_UNLOCK(slot); xpt_free_ccb(ccb); return; } SDHCI_UNLOCK(slot); xpt_rescan(ccb); #else slot->intmask &= ~sdhci_tuning_intmask(slot); WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask); slot->opt &= ~SDHCI_TUNING_ENABLED; SDHCI_UNLOCK(slot); callout_drain(&slot->retune_callout); device_delete_child(slot->bus, d); #endif } else SDHCI_UNLOCK(slot); } } static void sdhci_handle_card_present_locked(struct sdhci_slot *slot, bool is_present) { bool was_present; /* * If there was no card and now there is one, schedule the task to * create the child device after a short delay. The delay is to * debounce the card insert (sometimes the card detect pin stabilizes * before the other pins have made good contact). * * If there was a card present and now it's gone, immediately schedule * the task to delete the child device. No debouncing -- gone is gone, * because once power is removed, a full card re-init is needed, and * that happens by deleting and recreating the child device. */ #ifdef MMCCAM was_present = slot->card_present; #else was_present = slot->dev != NULL; #endif if (!was_present && is_present) { taskqueue_enqueue_timeout(taskqueue_swi_giant, &slot->card_delayed_task, -SDHCI_INSERT_DELAY_TICKS); } else if (was_present && !is_present) { taskqueue_enqueue(taskqueue_swi_giant, &slot->card_task); } } void sdhci_handle_card_present(struct sdhci_slot *slot, bool is_present) { SDHCI_LOCK(slot); sdhci_handle_card_present_locked(slot, is_present); SDHCI_UNLOCK(slot); } static void sdhci_card_poll(void *arg) { struct sdhci_slot *slot = arg; sdhci_handle_card_present(slot, SDHCI_GET_CARD_PRESENT(slot->bus, slot)); callout_reset(&slot->card_poll_callout, SDHCI_CARD_PRESENT_TICKS, sdhci_card_poll, slot); } int sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num) { kobjop_desc_t kobj_desc; kobj_method_t *kobj_method; uint32_t caps, caps2, freq, host_caps; int err; SDHCI_LOCK_INIT(slot); slot->num = num; slot->bus = dev; /* Allocate DMA tag. */ err = bus_dma_tag_create(bus_get_dma_tag(dev), DMA_BLOCK_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, DMA_BLOCK_SIZE, 1, DMA_BLOCK_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &slot->dmatag); if (err != 0) { device_printf(dev, "Can't create DMA tag\n"); SDHCI_LOCK_DESTROY(slot); return (err); } /* Allocate DMA memory. */ err = bus_dmamem_alloc(slot->dmatag, (void **)&slot->dmamem, BUS_DMA_NOWAIT, &slot->dmamap); if (err != 0) { device_printf(dev, "Can't alloc DMA memory\n"); bus_dma_tag_destroy(slot->dmatag); SDHCI_LOCK_DESTROY(slot); return (err); } /* Map the memory. */ err = bus_dmamap_load(slot->dmatag, slot->dmamap, (void *)slot->dmamem, DMA_BLOCK_SIZE, sdhci_getaddr, &slot->paddr, 0); if (err != 0 || slot->paddr == 0) { device_printf(dev, "Can't load DMA memory\n"); bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap); bus_dma_tag_destroy(slot->dmatag); SDHCI_LOCK_DESTROY(slot); if (err) return (err); else return (EFAULT); } slot->version = (RD2(slot, SDHCI_HOST_VERSION) >> SDHCI_SPEC_VER_SHIFT) & SDHCI_SPEC_VER_MASK; if (slot->quirks & SDHCI_QUIRK_MISSING_CAPS) { caps = slot->caps; caps2 = slot->caps2; } else { caps = RD4(slot, SDHCI_CAPABILITIES); if (slot->version >= SDHCI_SPEC_300) caps2 = RD4(slot, SDHCI_CAPABILITIES2); else caps2 = 0; } if (slot->version >= SDHCI_SPEC_300) { if ((caps & SDHCI_SLOTTYPE_MASK) != SDHCI_SLOTTYPE_REMOVABLE && (caps & SDHCI_SLOTTYPE_MASK) != SDHCI_SLOTTYPE_EMBEDDED) { device_printf(dev, "Driver doesn't support shared bus slots\n"); bus_dmamap_unload(slot->dmatag, slot->dmamap); bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap); bus_dma_tag_destroy(slot->dmatag); SDHCI_LOCK_DESTROY(slot); return (ENXIO); } else if ((caps & SDHCI_SLOTTYPE_MASK) == SDHCI_SLOTTYPE_EMBEDDED) { slot->opt |= SDHCI_SLOT_EMBEDDED | SDHCI_NON_REMOVABLE; } } /* Calculate base clock frequency. */ if (slot->version >= SDHCI_SPEC_300) freq = (caps & SDHCI_CLOCK_V3_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; else freq = (caps & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT; if (freq != 0) slot->max_clk = freq * 1000000; /* * If the frequency wasn't in the capabilities and the hardware driver * hasn't already set max_clk we're probably not going to work right * with an assumption, so complain about it. */ if (slot->max_clk == 0) { slot->max_clk = SDHCI_DEFAULT_MAX_FREQ * 1000000; device_printf(dev, "Hardware doesn't specify base clock " "frequency, using %dMHz as default.\n", SDHCI_DEFAULT_MAX_FREQ); } /* Calculate/set timeout clock frequency. */ if (slot->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) { slot->timeout_clk = slot->max_clk / 1000; } else if (slot->quirks & SDHCI_QUIRK_DATA_TIMEOUT_1MHZ) { slot->timeout_clk = 1000; } else { slot->timeout_clk = (caps & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT; if (caps & SDHCI_TIMEOUT_CLK_UNIT) slot->timeout_clk *= 1000; } /* * If the frequency wasn't in the capabilities and the hardware driver * hasn't already set timeout_clk we'll probably work okay using the * max timeout, but still mention it. */ if (slot->timeout_clk == 0) { device_printf(dev, "Hardware doesn't specify timeout clock " "frequency, setting BROKEN_TIMEOUT quirk.\n"); slot->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; } slot->host.f_min = SDHCI_MIN_FREQ(slot->bus, slot); slot->host.f_max = slot->max_clk; slot->host.host_ocr = 0; if (caps & SDHCI_CAN_VDD_330) slot->host.host_ocr |= MMC_OCR_320_330 | MMC_OCR_330_340; if (caps & SDHCI_CAN_VDD_300) slot->host.host_ocr |= MMC_OCR_290_300 | MMC_OCR_300_310; /* 1.8V VDD is not supposed to be used for removable cards. */ if ((caps & SDHCI_CAN_VDD_180) && (slot->opt & SDHCI_SLOT_EMBEDDED)) slot->host.host_ocr |= MMC_OCR_LOW_VOLTAGE; if (slot->host.host_ocr == 0) { device_printf(dev, "Hardware doesn't report any " "support voltages.\n"); } host_caps = MMC_CAP_4_BIT_DATA; if (caps & SDHCI_CAN_DO_8BITBUS) host_caps |= MMC_CAP_8_BIT_DATA; if (caps & SDHCI_CAN_DO_HISPD) host_caps |= MMC_CAP_HSPEED; if (slot->quirks & SDHCI_QUIRK_BOOT_NOACC) host_caps |= MMC_CAP_BOOT_NOACC; if (slot->quirks & SDHCI_QUIRK_WAIT_WHILE_BUSY) host_caps |= MMC_CAP_WAIT_WHILE_BUSY; /* Determine supported UHS-I and eMMC modes. */ if (caps2 & (SDHCI_CAN_SDR50 | SDHCI_CAN_SDR104 | SDHCI_CAN_DDR50)) host_caps |= MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25; if (caps2 & SDHCI_CAN_SDR104) { host_caps |= MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_SDR50; if (!(slot->quirks & SDHCI_QUIRK_BROKEN_MMC_HS200)) host_caps |= MMC_CAP_MMC_HS200; } else if (caps2 & SDHCI_CAN_SDR50) host_caps |= MMC_CAP_UHS_SDR50; if (caps2 & SDHCI_CAN_DDR50 && !(slot->quirks & SDHCI_QUIRK_BROKEN_UHS_DDR50)) host_caps |= MMC_CAP_UHS_DDR50; if (slot->quirks & SDHCI_QUIRK_MMC_DDR52) host_caps |= MMC_CAP_MMC_DDR52; if (slot->quirks & SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 && caps2 & SDHCI_CAN_MMC_HS400) host_caps |= MMC_CAP_MMC_HS400; /* * Disable UHS-I and eMMC modes if the set_uhs_timing method is the * default NULL implementation. */ kobj_desc = &sdhci_set_uhs_timing_desc; kobj_method = kobj_lookup_method(((kobj_t)dev)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) host_caps &= ~(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_DDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_MMC_DDR52 | MMC_CAP_MMC_HS200 | MMC_CAP_MMC_HS400); #define SDHCI_CAP_MODES_TUNING(caps2) \ (((caps2) & SDHCI_TUNE_SDR50 ? MMC_CAP_UHS_SDR50 : 0) | \ MMC_CAP_UHS_DDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_MMC_HS200 | \ MMC_CAP_MMC_HS400) /* * Disable UHS-I and eMMC modes that require (re-)tuning if either * the tune or re-tune method is the default NULL implementation. */ kobj_desc = &mmcbr_tune_desc; kobj_method = kobj_lookup_method(((kobj_t)dev)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) goto no_tuning; kobj_desc = &mmcbr_retune_desc; kobj_method = kobj_lookup_method(((kobj_t)dev)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) { no_tuning: host_caps &= ~(SDHCI_CAP_MODES_TUNING(caps2)); } /* Allocate tuning structures and determine tuning parameters. */ if (host_caps & SDHCI_CAP_MODES_TUNING(caps2)) { slot->opt |= SDHCI_TUNING_SUPPORTED; slot->tune_req = malloc(sizeof(*slot->tune_req), M_DEVBUF, M_WAITOK); slot->tune_cmd = malloc(sizeof(*slot->tune_cmd), M_DEVBUF, M_WAITOK); slot->tune_data = malloc(sizeof(*slot->tune_data), M_DEVBUF, M_WAITOK); if (caps2 & SDHCI_TUNE_SDR50) slot->opt |= SDHCI_SDR50_NEEDS_TUNING; slot->retune_mode = (caps2 & SDHCI_RETUNE_MODES_MASK) >> SDHCI_RETUNE_MODES_SHIFT; if (slot->retune_mode == SDHCI_RETUNE_MODE_1) { slot->retune_count = (caps2 & SDHCI_RETUNE_CNT_MASK) >> SDHCI_RETUNE_CNT_SHIFT; if (slot->retune_count > 0xb) { device_printf(dev, "Unknown re-tuning count " "%x, using 1 sec\n", slot->retune_count); slot->retune_count = 1; } else if (slot->retune_count != 0) slot->retune_count = 1 << (slot->retune_count - 1); } } #undef SDHCI_CAP_MODES_TUNING /* Determine supported VCCQ signaling levels. */ host_caps |= MMC_CAP_SIGNALING_330; if (host_caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_DDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_MMC_DDR52_180 | MMC_CAP_MMC_HS200_180 | MMC_CAP_MMC_HS400_180)) host_caps |= MMC_CAP_SIGNALING_120 | MMC_CAP_SIGNALING_180; /* * Disable 1.2 V and 1.8 V signaling if the switch_vccq method is the * default NULL implementation. Disable 1.2 V support if it's the * generic SDHCI implementation. */ kobj_desc = &mmcbr_switch_vccq_desc; kobj_method = kobj_lookup_method(((kobj_t)dev)->ops->cls, NULL, kobj_desc); if (kobj_method == &kobj_desc->deflt) host_caps &= ~(MMC_CAP_SIGNALING_120 | MMC_CAP_SIGNALING_180); else if (kobj_method->func == (kobjop_t)sdhci_generic_switch_vccq) host_caps &= ~MMC_CAP_SIGNALING_120; /* Determine supported driver types (type B is always mandatory). */ if (caps2 & SDHCI_CAN_DRIVE_TYPE_A) host_caps |= MMC_CAP_DRIVER_TYPE_A; if (caps2 & SDHCI_CAN_DRIVE_TYPE_C) host_caps |= MMC_CAP_DRIVER_TYPE_C; if (caps2 & SDHCI_CAN_DRIVE_TYPE_D) host_caps |= MMC_CAP_DRIVER_TYPE_D; slot->host.caps = host_caps; /* Decide if we have usable DMA. */ if (caps & SDHCI_CAN_DO_DMA) slot->opt |= SDHCI_HAVE_DMA; if (slot->quirks & SDHCI_QUIRK_BROKEN_DMA) slot->opt &= ~SDHCI_HAVE_DMA; if (slot->quirks & SDHCI_QUIRK_FORCE_DMA) slot->opt |= SDHCI_HAVE_DMA; if (slot->quirks & SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE) slot->opt |= SDHCI_NON_REMOVABLE; /* * Use platform-provided transfer backend * with PIO as a fallback mechanism */ if (slot->opt & SDHCI_PLATFORM_TRANSFER) slot->opt &= ~SDHCI_HAVE_DMA; if (bootverbose || sdhci_debug) { slot_printf(slot, "%uMHz%s %s VDD:%s%s%s VCCQ: 3.3V%s%s DRV: B%s%s%s %s %s\n", slot->max_clk / 1000000, (caps & SDHCI_CAN_DO_HISPD) ? " HS" : "", (host_caps & MMC_CAP_8_BIT_DATA) ? "8bits" : ((host_caps & MMC_CAP_4_BIT_DATA) ? "4bits" : "1bit"), (caps & SDHCI_CAN_VDD_330) ? " 3.3V" : "", (caps & SDHCI_CAN_VDD_300) ? " 3.0V" : "", ((caps & SDHCI_CAN_VDD_180) && (slot->opt & SDHCI_SLOT_EMBEDDED)) ? " 1.8V" : "", (host_caps & MMC_CAP_SIGNALING_180) ? " 1.8V" : "", (host_caps & MMC_CAP_SIGNALING_120) ? " 1.2V" : "", (host_caps & MMC_CAP_DRIVER_TYPE_A) ? "A" : "", (host_caps & MMC_CAP_DRIVER_TYPE_C) ? "C" : "", (host_caps & MMC_CAP_DRIVER_TYPE_D) ? "D" : "", (slot->opt & SDHCI_HAVE_DMA) ? "DMA" : "PIO", (slot->opt & SDHCI_SLOT_EMBEDDED) ? "embedded" : (slot->opt & SDHCI_NON_REMOVABLE) ? "non-removable" : "removable"); if (host_caps & (MMC_CAP_MMC_DDR52 | MMC_CAP_MMC_HS200 | MMC_CAP_MMC_HS400 | MMC_CAP_MMC_ENH_STROBE)) slot_printf(slot, "eMMC:%s%s%s%s\n", (host_caps & MMC_CAP_MMC_DDR52) ? " DDR52" : "", (host_caps & MMC_CAP_MMC_HS200) ? " HS200" : "", (host_caps & MMC_CAP_MMC_HS400) ? " HS400" : "", ((host_caps & (MMC_CAP_MMC_HS400 | MMC_CAP_MMC_ENH_STROBE)) == (MMC_CAP_MMC_HS400 | MMC_CAP_MMC_ENH_STROBE)) ? " HS400ES" : ""); if (host_caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104)) slot_printf(slot, "UHS-I:%s%s%s%s%s\n", (host_caps & MMC_CAP_UHS_SDR12) ? " SDR12" : "", (host_caps & MMC_CAP_UHS_SDR25) ? " SDR25" : "", (host_caps & MMC_CAP_UHS_SDR50) ? " SDR50" : "", (host_caps & MMC_CAP_UHS_SDR104) ? " SDR104" : "", (host_caps & MMC_CAP_UHS_DDR50) ? " DDR50" : ""); if (slot->opt & SDHCI_TUNING_SUPPORTED) slot_printf(slot, "Re-tuning count %d secs, mode %d\n", slot->retune_count, slot->retune_mode + 1); sdhci_dumpregs(slot); } slot->timeout = 10; SYSCTL_ADD_INT(device_get_sysctl_ctx(slot->bus), SYSCTL_CHILDREN(device_get_sysctl_tree(slot->bus)), OID_AUTO, "timeout", CTLFLAG_RW, &slot->timeout, 0, "Maximum timeout for SDHCI transfers (in secs)"); TASK_INIT(&slot->card_task, 0, sdhci_card_task, slot); TIMEOUT_TASK_INIT(taskqueue_swi_giant, &slot->card_delayed_task, 0, sdhci_card_task, slot); callout_init(&slot->card_poll_callout, 1); callout_init_mtx(&slot->timeout_callout, &slot->mtx, 0); callout_init_mtx(&slot->retune_callout, &slot->mtx, 0); if ((slot->quirks & SDHCI_QUIRK_POLL_CARD_PRESENT) && !(slot->opt & SDHCI_NON_REMOVABLE)) { callout_reset(&slot->card_poll_callout, SDHCI_CARD_PRESENT_TICKS, sdhci_card_poll, slot); } sdhci_init(slot); return (0); } +#ifndef MMCCAM void sdhci_start_slot(struct sdhci_slot *slot) { sdhci_card_task(slot, 0); } +#endif int sdhci_cleanup_slot(struct sdhci_slot *slot) { device_t d; callout_drain(&slot->timeout_callout); callout_drain(&slot->card_poll_callout); callout_drain(&slot->retune_callout); taskqueue_drain(taskqueue_swi_giant, &slot->card_task); taskqueue_drain_timeout(taskqueue_swi_giant, &slot->card_delayed_task); SDHCI_LOCK(slot); d = slot->dev; slot->dev = NULL; SDHCI_UNLOCK(slot); if (d != NULL) device_delete_child(slot->bus, d); SDHCI_LOCK(slot); sdhci_reset(slot, SDHCI_RESET_ALL); SDHCI_UNLOCK(slot); bus_dmamap_unload(slot->dmatag, slot->dmamap); bus_dmamem_free(slot->dmatag, slot->dmamem, slot->dmamap); bus_dma_tag_destroy(slot->dmatag); if (slot->opt & SDHCI_TUNING_SUPPORTED) { free(slot->tune_req, M_DEVBUF); free(slot->tune_cmd, M_DEVBUF); free(slot->tune_data, M_DEVBUF); } SDHCI_LOCK_DESTROY(slot); return (0); } int sdhci_generic_suspend(struct sdhci_slot *slot) { /* * We expect the MMC layer to issue initial tuning after resume. * Otherwise, we'd need to indicate re-tuning including circuit reset * being required at least for re-tuning modes 1 and 2 ourselves. */ callout_drain(&slot->retune_callout); SDHCI_LOCK(slot); slot->opt &= ~SDHCI_TUNING_ENABLED; sdhci_reset(slot, SDHCI_RESET_ALL); SDHCI_UNLOCK(slot); return (0); } int sdhci_generic_resume(struct sdhci_slot *slot) { SDHCI_LOCK(slot); sdhci_init(slot); SDHCI_UNLOCK(slot); return (0); } uint32_t sdhci_generic_min_freq(device_t brdev __unused, struct sdhci_slot *slot) { if (slot->version >= SDHCI_SPEC_300) return (slot->max_clk / SDHCI_300_MAX_DIVIDER); else return (slot->max_clk / SDHCI_200_MAX_DIVIDER); } bool sdhci_generic_get_card_present(device_t brdev __unused, struct sdhci_slot *slot) { if (slot->opt & SDHCI_NON_REMOVABLE) return true; return (RD4(slot, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); } void sdhci_generic_set_uhs_timing(device_t brdev __unused, struct sdhci_slot *slot) { struct mmc_ios *ios; uint16_t hostctrl2; if (slot->version < SDHCI_SPEC_300) return; SDHCI_ASSERT_LOCKED(slot); ios = &slot->host.ios; sdhci_set_clock(slot, 0); hostctrl2 = RD2(slot, SDHCI_HOST_CONTROL2); hostctrl2 &= ~SDHCI_CTRL2_UHS_MASK; if (ios->clock > SD_SDR50_MAX) { if (ios->timing == bus_timing_mmc_hs400 || ios->timing == bus_timing_mmc_hs400es) hostctrl2 |= SDHCI_CTRL2_MMC_HS400; else hostctrl2 |= SDHCI_CTRL2_UHS_SDR104; } else if (ios->clock > SD_SDR25_MAX) hostctrl2 |= SDHCI_CTRL2_UHS_SDR50; else if (ios->clock > SD_SDR12_MAX) { if (ios->timing == bus_timing_uhs_ddr50 || ios->timing == bus_timing_mmc_ddr52) hostctrl2 |= SDHCI_CTRL2_UHS_DDR50; else hostctrl2 |= SDHCI_CTRL2_UHS_SDR25; } else if (ios->clock > SD_MMC_CARD_ID_FREQUENCY) hostctrl2 |= SDHCI_CTRL2_UHS_SDR12; WR2(slot, SDHCI_HOST_CONTROL2, hostctrl2); sdhci_set_clock(slot, ios->clock); } int sdhci_generic_update_ios(device_t brdev, device_t reqdev) { struct sdhci_slot *slot = device_get_ivars(reqdev); struct mmc_ios *ios = &slot->host.ios; SDHCI_LOCK(slot); /* Do full reset on bus power down to clear from any state. */ if (ios->power_mode == power_off) { WR4(slot, SDHCI_SIGNAL_ENABLE, 0); sdhci_init(slot); } /* Configure the bus. */ sdhci_set_clock(slot, ios->clock); sdhci_set_power(slot, (ios->power_mode == power_off) ? 0 : ios->vdd); if (ios->bus_width == bus_width_8) { slot->hostctrl |= SDHCI_CTRL_8BITBUS; slot->hostctrl &= ~SDHCI_CTRL_4BITBUS; } else if (ios->bus_width == bus_width_4) { slot->hostctrl &= ~SDHCI_CTRL_8BITBUS; slot->hostctrl |= SDHCI_CTRL_4BITBUS; } else if (ios->bus_width == bus_width_1) { slot->hostctrl &= ~SDHCI_CTRL_8BITBUS; slot->hostctrl &= ~SDHCI_CTRL_4BITBUS; } else { panic("Invalid bus width: %d", ios->bus_width); } if (ios->clock > SD_SDR12_MAX && !(slot->quirks & SDHCI_QUIRK_DONT_SET_HISPD_BIT)) slot->hostctrl |= SDHCI_CTRL_HISPD; else slot->hostctrl &= ~SDHCI_CTRL_HISPD; WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl); SDHCI_SET_UHS_TIMING(brdev, slot); /* Some controllers like reset after bus changes. */ if (slot->quirks & SDHCI_QUIRK_RESET_ON_IOS) sdhci_reset(slot, SDHCI_RESET_CMD | SDHCI_RESET_DATA); SDHCI_UNLOCK(slot); return (0); } int sdhci_generic_switch_vccq(device_t brdev __unused, device_t reqdev) { struct sdhci_slot *slot = device_get_ivars(reqdev); enum mmc_vccq vccq; int err; uint16_t hostctrl2; if (slot->version < SDHCI_SPEC_300) return (0); err = 0; vccq = slot->host.ios.vccq; SDHCI_LOCK(slot); sdhci_set_clock(slot, 0); hostctrl2 = RD2(slot, SDHCI_HOST_CONTROL2); switch (vccq) { case vccq_330: if (!(hostctrl2 & SDHCI_CTRL2_S18_ENABLE)) goto done; hostctrl2 &= ~SDHCI_CTRL2_S18_ENABLE; WR2(slot, SDHCI_HOST_CONTROL2, hostctrl2); DELAY(5000); hostctrl2 = RD2(slot, SDHCI_HOST_CONTROL2); if (!(hostctrl2 & SDHCI_CTRL2_S18_ENABLE)) goto done; err = EAGAIN; break; case vccq_180: if (!(slot->host.caps & MMC_CAP_SIGNALING_180)) { err = EINVAL; goto done; } if (hostctrl2 & SDHCI_CTRL2_S18_ENABLE) goto done; hostctrl2 |= SDHCI_CTRL2_S18_ENABLE; WR2(slot, SDHCI_HOST_CONTROL2, hostctrl2); DELAY(5000); hostctrl2 = RD2(slot, SDHCI_HOST_CONTROL2); if (hostctrl2 & SDHCI_CTRL2_S18_ENABLE) goto done; err = EAGAIN; break; default: slot_printf(slot, "Attempt to set unsupported signaling voltage\n"); err = EINVAL; break; } done: sdhci_set_clock(slot, slot->host.ios.clock); SDHCI_UNLOCK(slot); return (err); } int sdhci_generic_tune(device_t brdev __unused, device_t reqdev, bool hs400) { struct sdhci_slot *slot = device_get_ivars(reqdev); struct mmc_ios *ios = &slot->host.ios; struct mmc_command *tune_cmd; struct mmc_data *tune_data; uint32_t opcode; int err; if (!(slot->opt & SDHCI_TUNING_SUPPORTED)) return (0); slot->retune_ticks = slot->retune_count * hz; opcode = MMC_SEND_TUNING_BLOCK; SDHCI_LOCK(slot); switch (ios->timing) { case bus_timing_mmc_hs400: slot_printf(slot, "HS400 must be tuned in HS200 mode\n"); SDHCI_UNLOCK(slot); return (EINVAL); case bus_timing_mmc_hs200: /* * In HS400 mode, controllers use the data strobe line to * latch data from the devices so periodic re-tuning isn't * expected to be required. */ if (hs400) slot->retune_ticks = 0; opcode = MMC_SEND_TUNING_BLOCK_HS200; break; case bus_timing_uhs_ddr50: case bus_timing_uhs_sdr104: break; case bus_timing_uhs_sdr50: if (slot->opt & SDHCI_SDR50_NEEDS_TUNING) break; /* FALLTHROUGH */ default: SDHCI_UNLOCK(slot); return (0); } tune_cmd = slot->tune_cmd; memset(tune_cmd, 0, sizeof(*tune_cmd)); tune_cmd->opcode = opcode; tune_cmd->flags = MMC_RSP_R1 | MMC_CMD_ADTC; tune_data = tune_cmd->data = slot->tune_data; memset(tune_data, 0, sizeof(*tune_data)); tune_data->len = (opcode == MMC_SEND_TUNING_BLOCK_HS200 && ios->bus_width == bus_width_8) ? MMC_TUNING_LEN_HS200 : MMC_TUNING_LEN; tune_data->flags = MMC_DATA_READ; tune_data->mrq = tune_cmd->mrq = slot->tune_req; slot->opt &= ~SDHCI_TUNING_ENABLED; err = sdhci_exec_tuning(slot, true); if (err == 0) { slot->opt |= SDHCI_TUNING_ENABLED; slot->intmask |= sdhci_tuning_intmask(slot); WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask); if (slot->retune_ticks) { callout_reset(&slot->retune_callout, slot->retune_ticks, sdhci_retune, slot); } } SDHCI_UNLOCK(slot); return (err); } int sdhci_generic_retune(device_t brdev __unused, device_t reqdev, bool reset) { struct sdhci_slot *slot = device_get_ivars(reqdev); int err; if (!(slot->opt & SDHCI_TUNING_ENABLED)) return (0); /* HS400 must be tuned in HS200 mode. */ if (slot->host.ios.timing == bus_timing_mmc_hs400) return (EINVAL); SDHCI_LOCK(slot); err = sdhci_exec_tuning(slot, reset); /* * There are two ways sdhci_exec_tuning() can fail: * EBUSY should not actually happen when requests are only issued * with the host properly acquired, and * EIO re-tuning failed (but it did work initially). * * In both cases, we should retry at later point if periodic re-tuning * is enabled. Note that due to slot->retune_req not being cleared in * these failure cases, the MMC layer should trigger another attempt at * re-tuning with the next request anyway, though. */ if (slot->retune_ticks) { callout_reset(&slot->retune_callout, slot->retune_ticks, sdhci_retune, slot); } SDHCI_UNLOCK(slot); return (err); } static int sdhci_exec_tuning(struct sdhci_slot *slot, bool reset) { struct mmc_request *tune_req; struct mmc_command *tune_cmd; int i; uint32_t intmask; uint16_t hostctrl2; u_char opt; SDHCI_ASSERT_LOCKED(slot); if (slot->req != NULL) return (EBUSY); /* Tuning doesn't work with DMA enabled. */ opt = slot->opt; slot->opt = opt & ~SDHCI_HAVE_DMA; /* * Ensure that as documented, SDHCI_INT_DATA_AVAIL is the only * kind of interrupt we receive in response to a tuning request. */ intmask = slot->intmask; slot->intmask = SDHCI_INT_DATA_AVAIL; WR4(slot, SDHCI_SIGNAL_ENABLE, SDHCI_INT_DATA_AVAIL); hostctrl2 = RD2(slot, SDHCI_HOST_CONTROL2); if (reset) hostctrl2 &= ~SDHCI_CTRL2_SAMPLING_CLOCK; else hostctrl2 |= SDHCI_CTRL2_SAMPLING_CLOCK; WR2(slot, SDHCI_HOST_CONTROL2, hostctrl2 | SDHCI_CTRL2_EXEC_TUNING); tune_req = slot->tune_req; tune_cmd = slot->tune_cmd; for (i = 0; i < MMC_TUNING_MAX; i++) { memset(tune_req, 0, sizeof(*tune_req)); tune_req->cmd = tune_cmd; tune_req->done = sdhci_req_wakeup; tune_req->done_data = slot; slot->req = tune_req; slot->flags = 0; sdhci_start(slot); while (!(tune_req->flags & MMC_REQ_DONE)) msleep(tune_req, &slot->mtx, 0, "sdhciet", 0); if (!(tune_req->flags & MMC_TUNE_DONE)) break; hostctrl2 = RD2(slot, SDHCI_HOST_CONTROL2); if (!(hostctrl2 & SDHCI_CTRL2_EXEC_TUNING)) break; if (tune_cmd->opcode == MMC_SEND_TUNING_BLOCK) DELAY(1000); } slot->opt = opt; slot->intmask = intmask; WR4(slot, SDHCI_SIGNAL_ENABLE, intmask); if ((hostctrl2 & (SDHCI_CTRL2_EXEC_TUNING | SDHCI_CTRL2_SAMPLING_CLOCK)) == SDHCI_CTRL2_SAMPLING_CLOCK) { slot->retune_req = 0; return (0); } slot_printf(slot, "Tuning failed, using fixed sampling clock\n"); WR2(slot, SDHCI_HOST_CONTROL2, hostctrl2 & ~(SDHCI_CTRL2_EXEC_TUNING | SDHCI_CTRL2_SAMPLING_CLOCK)); sdhci_reset(slot, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return (EIO); } static void sdhci_retune(void *arg) { struct sdhci_slot *slot = arg; slot->retune_req |= SDHCI_RETUNE_REQ_NEEDED; } #ifdef MMCCAM static void sdhci_req_done(struct sdhci_slot *slot) { union ccb *ccb; if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "%s\n", __func__); if (slot->ccb != NULL && slot->curcmd != NULL) { callout_stop(&slot->timeout_callout); ccb = slot->ccb; slot->ccb = NULL; slot->curcmd = NULL; /* Tell CAM the request is finished */ struct ccb_mmcio *mmcio; mmcio = &ccb->mmcio; ccb->ccb_h.status = (mmcio->cmd.error == 0 ? CAM_REQ_CMP : CAM_REQ_CMP_ERR); xpt_done(ccb); } } #else static void sdhci_req_done(struct sdhci_slot *slot) { struct mmc_request *req; if (slot->req != NULL && slot->curcmd != NULL) { callout_stop(&slot->timeout_callout); req = slot->req; slot->req = NULL; slot->curcmd = NULL; req->done(req); } } #endif static void sdhci_req_wakeup(struct mmc_request *req) { struct sdhci_slot *slot; slot = req->done_data; req->flags |= MMC_REQ_DONE; wakeup(req); } static void sdhci_timeout(void *arg) { struct sdhci_slot *slot = arg; if (slot->curcmd != NULL) { slot_printf(slot, "Controller timeout\n"); sdhci_dumpregs(slot); sdhci_reset(slot, SDHCI_RESET_CMD | SDHCI_RESET_DATA); slot->curcmd->error = MMC_ERR_TIMEOUT; sdhci_req_done(slot); } else { slot_printf(slot, "Spurious timeout - no active command\n"); } } static void sdhci_set_transfer_mode(struct sdhci_slot *slot, struct mmc_data *data) { uint16_t mode; if (data == NULL) return; mode = SDHCI_TRNS_BLK_CNT_EN; if (data->len > 512) mode |= SDHCI_TRNS_MULTI; if (data->flags & MMC_DATA_READ) mode |= SDHCI_TRNS_READ; #ifdef MMCCAM struct ccb_mmcio *mmcio; mmcio = &slot->ccb->mmcio; if (mmcio->stop.opcode == MMC_STOP_TRANSMISSION && !(slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP)) mode |= SDHCI_TRNS_ACMD12; #else if (slot->req->stop && !(slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP)) mode |= SDHCI_TRNS_ACMD12; #endif if (slot->flags & SDHCI_USE_DMA) mode |= SDHCI_TRNS_DMA; WR2(slot, SDHCI_TRANSFER_MODE, mode); } static void sdhci_start_command(struct sdhci_slot *slot, struct mmc_command *cmd) { int flags, timeout; uint32_t mask; slot->curcmd = cmd; slot->cmd_done = 0; cmd->error = MMC_ERR_NONE; /* This flags combination is not supported by controller. */ if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) { slot_printf(slot, "Unsupported response type!\n"); cmd->error = MMC_ERR_FAILED; sdhci_req_done(slot); return; } /* * Do not issue command if there is no card, clock or power. * Controller will not detect timeout without clock active. */ if (!SDHCI_GET_CARD_PRESENT(slot->bus, slot) || slot->power == 0 || slot->clock == 0) { slot_printf(slot, "Cannot issue a command (power=%d clock=%d)", slot->power, slot->clock); cmd->error = MMC_ERR_FAILED; sdhci_req_done(slot); return; } /* Always wait for free CMD bus. */ mask = SDHCI_CMD_INHIBIT; /* Wait for free DAT if we have data or busy signal. */ if (cmd->data != NULL || (cmd->flags & MMC_RSP_BUSY)) mask |= SDHCI_DAT_INHIBIT; /* * We shouldn't wait for DAT for stop commands or CMD19/CMD21. Note * that these latter are also special in that SDHCI_CMD_DATA should * be set below but no actual data is ever read from the controller. */ #ifdef MMCCAM if (cmd == &slot->ccb->mmcio.stop || #else if (cmd == slot->req->stop || #endif __predict_false(cmd->opcode == MMC_SEND_TUNING_BLOCK || cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)) mask &= ~SDHCI_DAT_INHIBIT; /* * Wait for bus no more then 250 ms. Typically there will be no wait * here at all, but when writing a crash dump we may be bypassing the * host platform's interrupt handler, and in some cases that handler * may be working around hardware quirks such as not respecting r1b * busy indications. In those cases, this wait-loop serves the purpose * of waiting for the prior command and data transfers to be done, and * SD cards are allowed to take up to 250ms for write and erase ops. * (It's usually more like 20-30ms in the real world.) */ timeout = 250; while (mask & RD4(slot, SDHCI_PRESENT_STATE)) { if (timeout == 0) { slot_printf(slot, "Controller never released " "inhibit bit(s).\n"); sdhci_dumpregs(slot); cmd->error = MMC_ERR_FAILED; sdhci_req_done(slot); return; } timeout--; DELAY(1000); } /* Prepare command flags. */ if (!(cmd->flags & MMC_RSP_PRESENT)) flags = SDHCI_CMD_RESP_NONE; else if (cmd->flags & MMC_RSP_136) flags = SDHCI_CMD_RESP_LONG; else if (cmd->flags & MMC_RSP_BUSY) flags = SDHCI_CMD_RESP_SHORT_BUSY; else flags = SDHCI_CMD_RESP_SHORT; if (cmd->flags & MMC_RSP_CRC) flags |= SDHCI_CMD_CRC; if (cmd->flags & MMC_RSP_OPCODE) flags |= SDHCI_CMD_INDEX; if (cmd->data != NULL) flags |= SDHCI_CMD_DATA; if (cmd->opcode == MMC_STOP_TRANSMISSION) flags |= SDHCI_CMD_TYPE_ABORT; /* Prepare data. */ sdhci_start_data(slot, cmd->data); /* * Interrupt aggregation: To reduce total number of interrupts * group response interrupt with data interrupt when possible. * If there going to be data interrupt, mask response one. */ if (slot->data_done == 0) { WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask &= ~SDHCI_INT_RESPONSE); } /* Set command argument. */ WR4(slot, SDHCI_ARGUMENT, cmd->arg); /* Set data transfer mode. */ sdhci_set_transfer_mode(slot, cmd->data); if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "Starting command!\n"); /* Start command. */ WR2(slot, SDHCI_COMMAND_FLAGS, (cmd->opcode << 8) | (flags & 0xff)); /* Start timeout callout. */ callout_reset(&slot->timeout_callout, slot->timeout * hz, sdhci_timeout, slot); } static void sdhci_finish_command(struct sdhci_slot *slot) { int i; uint32_t val; uint8_t extra; if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "%s: called, err %d flags %d\n", __func__, slot->curcmd->error, slot->curcmd->flags); slot->cmd_done = 1; /* * Interrupt aggregation: Restore command interrupt. * Main restore point for the case when command interrupt * happened first. */ if (__predict_true(slot->curcmd->opcode != MMC_SEND_TUNING_BLOCK && slot->curcmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)) WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask |= SDHCI_INT_RESPONSE); /* In case of error - reset host and return. */ if (slot->curcmd->error) { if (slot->curcmd->error == MMC_ERR_BADCRC) slot->retune_req |= SDHCI_RETUNE_REQ_RESET; sdhci_reset(slot, SDHCI_RESET_CMD); sdhci_reset(slot, SDHCI_RESET_DATA); sdhci_start(slot); return; } /* If command has response - fetch it. */ if (slot->curcmd->flags & MMC_RSP_PRESENT) { if (slot->curcmd->flags & MMC_RSP_136) { /* CRC is stripped so we need one byte shift. */ extra = 0; for (i = 0; i < 4; i++) { val = RD4(slot, SDHCI_RESPONSE + i * 4); if (slot->quirks & SDHCI_QUIRK_DONT_SHIFT_RESPONSE) slot->curcmd->resp[3 - i] = val; else { slot->curcmd->resp[3 - i] = (val << 8) | extra; extra = val >> 24; } } } else slot->curcmd->resp[0] = RD4(slot, SDHCI_RESPONSE); } if (__predict_false(sdhci_debug > 1)) printf("Resp: %02x %02x %02x %02x\n", slot->curcmd->resp[0], slot->curcmd->resp[1], slot->curcmd->resp[2], slot->curcmd->resp[3]); /* If data ready - finish. */ if (slot->data_done) sdhci_start(slot); } static void sdhci_start_data(struct sdhci_slot *slot, struct mmc_data *data) { uint32_t target_timeout, current_timeout; uint8_t div; if (data == NULL && (slot->curcmd->flags & MMC_RSP_BUSY) == 0) { slot->data_done = 1; return; } slot->data_done = 0; /* Calculate and set data timeout.*/ /* XXX: We should have this from mmc layer, now assume 1 sec. */ if (slot->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL) { div = 0xE; } else { target_timeout = 1000000; div = 0; current_timeout = (1 << 13) * 1000 / slot->timeout_clk; while (current_timeout < target_timeout && div < 0xE) { ++div; current_timeout <<= 1; } /* Compensate for an off-by-one error in the CaFe chip.*/ if (div < 0xE && (slot->quirks & SDHCI_QUIRK_INCR_TIMEOUT_CONTROL)) { ++div; } } WR1(slot, SDHCI_TIMEOUT_CONTROL, div); if (data == NULL) return; /* Use DMA if possible. */ if ((slot->opt & SDHCI_HAVE_DMA)) slot->flags |= SDHCI_USE_DMA; /* If data is small, broken DMA may return zeroes instead of data, */ if ((slot->quirks & SDHCI_QUIRK_BROKEN_TIMINGS) && (data->len <= 512)) slot->flags &= ~SDHCI_USE_DMA; /* Some controllers require even block sizes. */ if ((slot->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE) && ((data->len) & 0x3)) slot->flags &= ~SDHCI_USE_DMA; /* Load DMA buffer. */ if (slot->flags & SDHCI_USE_DMA) { if (data->flags & MMC_DATA_READ) bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_PREREAD); else { memcpy(slot->dmamem, data->data, (data->len < DMA_BLOCK_SIZE) ? data->len : DMA_BLOCK_SIZE); bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_PREWRITE); } WR4(slot, SDHCI_DMA_ADDRESS, slot->paddr); /* Interrupt aggregation: Mask border interrupt * for the last page and unmask else. */ if (data->len == DMA_BLOCK_SIZE) slot->intmask &= ~SDHCI_INT_DMA_END; else slot->intmask |= SDHCI_INT_DMA_END; WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask); } /* Current data offset for both PIO and DMA. */ slot->offset = 0; /* Set block size and request IRQ on 4K border. */ WR2(slot, SDHCI_BLOCK_SIZE, SDHCI_MAKE_BLKSZ(DMA_BOUNDARY, (data->len < 512) ? data->len : 512)); /* Set block count. */ WR2(slot, SDHCI_BLOCK_COUNT, (data->len + 511) / 512); if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "Block size: %02x, count %lu\n", (unsigned int)SDHCI_MAKE_BLKSZ(DMA_BOUNDARY, (data->len < 512) ? data->len : 512), (unsigned long)(data->len + 511) / 512); } void sdhci_finish_data(struct sdhci_slot *slot) { struct mmc_data *data = slot->curcmd->data; size_t left; /* Interrupt aggregation: Restore command interrupt. * Auxiliary restore point for the case when data interrupt * happened first. */ if (!slot->cmd_done) { WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask |= SDHCI_INT_RESPONSE); } /* Unload rest of data from DMA buffer. */ if (!slot->data_done && (slot->flags & SDHCI_USE_DMA) && slot->curcmd->data != NULL) { if (data->flags & MMC_DATA_READ) { left = data->len - slot->offset; bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_POSTREAD); memcpy((u_char*)data->data + slot->offset, slot->dmamem, (left < DMA_BLOCK_SIZE) ? left : DMA_BLOCK_SIZE); } else bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_POSTWRITE); } slot->data_done = 1; /* If there was error - reset the host. */ if (slot->curcmd->error) { if (slot->curcmd->error == MMC_ERR_BADCRC) slot->retune_req |= SDHCI_RETUNE_REQ_RESET; sdhci_reset(slot, SDHCI_RESET_CMD); sdhci_reset(slot, SDHCI_RESET_DATA); sdhci_start(slot); return; } /* If we already have command response - finish. */ if (slot->cmd_done) sdhci_start(slot); } #ifdef MMCCAM static void sdhci_start(struct sdhci_slot *slot) { union ccb *ccb; ccb = slot->ccb; if (ccb == NULL) return; struct ccb_mmcio *mmcio; mmcio = &ccb->mmcio; if (!(slot->flags & CMD_STARTED)) { slot->flags |= CMD_STARTED; sdhci_start_command(slot, &mmcio->cmd); return; } /* * Old stack doesn't use this! * Enabling this code causes significant performance degradation * and IRQ storms on BBB, Wandboard behaves fine. * Not using this code does no harm... if (!(slot->flags & STOP_STARTED) && mmcio->stop.opcode != 0) { slot->flags |= STOP_STARTED; sdhci_start_command(slot, &mmcio->stop); return; } */ if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "result: %d\n", mmcio->cmd.error); if (mmcio->cmd.error == 0 && (slot->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST)) { sdhci_reset(slot, SDHCI_RESET_CMD); sdhci_reset(slot, SDHCI_RESET_DATA); } sdhci_req_done(slot); } #else static void sdhci_start(struct sdhci_slot *slot) { struct mmc_request *req; req = slot->req; if (req == NULL) return; if (!(slot->flags & CMD_STARTED)) { slot->flags |= CMD_STARTED; sdhci_start_command(slot, req->cmd); return; } if ((slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP) && !(slot->flags & STOP_STARTED) && req->stop) { slot->flags |= STOP_STARTED; sdhci_start_command(slot, req->stop); return; } if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "result: %d\n", req->cmd->error); if (!req->cmd->error && ((slot->curcmd == req->stop && (slot->quirks & SDHCI_QUIRK_BROKEN_AUTO_STOP)) || (slot->quirks & SDHCI_QUIRK_RESET_AFTER_REQUEST))) { sdhci_reset(slot, SDHCI_RESET_CMD); sdhci_reset(slot, SDHCI_RESET_DATA); } sdhci_req_done(slot); } #endif int sdhci_generic_request(device_t brdev __unused, device_t reqdev, struct mmc_request *req) { struct sdhci_slot *slot = device_get_ivars(reqdev); SDHCI_LOCK(slot); if (slot->req != NULL) { SDHCI_UNLOCK(slot); return (EBUSY); } if (__predict_false(sdhci_debug > 1)) { slot_printf(slot, "CMD%u arg %#x flags %#x dlen %u dflags %#x\n", req->cmd->opcode, req->cmd->arg, req->cmd->flags, (req->cmd->data)?(u_int)req->cmd->data->len:0, (req->cmd->data)?req->cmd->data->flags:0); } slot->req = req; slot->flags = 0; sdhci_start(slot); SDHCI_UNLOCK(slot); if (dumping) { while (slot->req != NULL) { sdhci_generic_intr(slot); DELAY(10); } } return (0); } int sdhci_generic_get_ro(device_t brdev __unused, device_t reqdev) { struct sdhci_slot *slot = device_get_ivars(reqdev); uint32_t val; SDHCI_LOCK(slot); val = RD4(slot, SDHCI_PRESENT_STATE); SDHCI_UNLOCK(slot); return (!(val & SDHCI_WRITE_PROTECT)); } int sdhci_generic_acquire_host(device_t brdev __unused, device_t reqdev) { struct sdhci_slot *slot = device_get_ivars(reqdev); int err = 0; SDHCI_LOCK(slot); while (slot->bus_busy) msleep(slot, &slot->mtx, 0, "sdhciah", 0); slot->bus_busy++; /* Activate led. */ WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl |= SDHCI_CTRL_LED); SDHCI_UNLOCK(slot); return (err); } int sdhci_generic_release_host(device_t brdev __unused, device_t reqdev) { struct sdhci_slot *slot = device_get_ivars(reqdev); SDHCI_LOCK(slot); /* Deactivate led. */ WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl &= ~SDHCI_CTRL_LED); slot->bus_busy--; SDHCI_UNLOCK(slot); wakeup(slot); return (0); } static void sdhci_cmd_irq(struct sdhci_slot *slot, uint32_t intmask) { if (!slot->curcmd) { slot_printf(slot, "Got command interrupt 0x%08x, but " "there is no active command.\n", intmask); sdhci_dumpregs(slot); return; } if (intmask & SDHCI_INT_TIMEOUT) slot->curcmd->error = MMC_ERR_TIMEOUT; else if (intmask & SDHCI_INT_CRC) slot->curcmd->error = MMC_ERR_BADCRC; else if (intmask & (SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) slot->curcmd->error = MMC_ERR_FIFO; sdhci_finish_command(slot); } static void sdhci_data_irq(struct sdhci_slot *slot, uint32_t intmask) { struct mmc_data *data; size_t left; if (!slot->curcmd) { slot_printf(slot, "Got data interrupt 0x%08x, but " "there is no active command.\n", intmask); sdhci_dumpregs(slot); return; } if (slot->curcmd->data == NULL && (slot->curcmd->flags & MMC_RSP_BUSY) == 0) { slot_printf(slot, "Got data interrupt 0x%08x, but " "there is no active data operation.\n", intmask); sdhci_dumpregs(slot); return; } if (intmask & SDHCI_INT_DATA_TIMEOUT) slot->curcmd->error = MMC_ERR_TIMEOUT; else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT)) slot->curcmd->error = MMC_ERR_BADCRC; if (slot->curcmd->data == NULL && (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | SDHCI_INT_DMA_END))) { slot_printf(slot, "Got data interrupt 0x%08x, but " "there is busy-only command.\n", intmask); sdhci_dumpregs(slot); slot->curcmd->error = MMC_ERR_INVALID; } if (slot->curcmd->error) { /* No need to continue after any error. */ goto done; } /* Handle tuning completion interrupt. */ if (__predict_false((intmask & SDHCI_INT_DATA_AVAIL) && (slot->curcmd->opcode == MMC_SEND_TUNING_BLOCK || slot->curcmd->opcode == MMC_SEND_TUNING_BLOCK_HS200))) { slot->req->flags |= MMC_TUNE_DONE; sdhci_finish_command(slot); sdhci_finish_data(slot); return; } /* Handle PIO interrupt. */ if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL)) { if ((slot->opt & SDHCI_PLATFORM_TRANSFER) && SDHCI_PLATFORM_WILL_HANDLE(slot->bus, slot)) { SDHCI_PLATFORM_START_TRANSFER(slot->bus, slot, &intmask); slot->flags |= PLATFORM_DATA_STARTED; } else sdhci_transfer_pio(slot); } /* Handle DMA border. */ if (intmask & SDHCI_INT_DMA_END) { data = slot->curcmd->data; /* Unload DMA buffer ... */ left = data->len - slot->offset; if (data->flags & MMC_DATA_READ) { bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_POSTREAD); memcpy((u_char*)data->data + slot->offset, slot->dmamem, (left < DMA_BLOCK_SIZE) ? left : DMA_BLOCK_SIZE); } else { bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_POSTWRITE); } /* ... and reload it again. */ slot->offset += DMA_BLOCK_SIZE; left = data->len - slot->offset; if (data->flags & MMC_DATA_READ) { bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_PREREAD); } else { memcpy(slot->dmamem, (u_char*)data->data + slot->offset, (left < DMA_BLOCK_SIZE)? left : DMA_BLOCK_SIZE); bus_dmamap_sync(slot->dmatag, slot->dmamap, BUS_DMASYNC_PREWRITE); } /* Interrupt aggregation: Mask border interrupt * for the last page. */ if (left == DMA_BLOCK_SIZE) { slot->intmask &= ~SDHCI_INT_DMA_END; WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask); } /* Restart DMA. */ WR4(slot, SDHCI_DMA_ADDRESS, slot->paddr); } /* We have got all data. */ if (intmask & SDHCI_INT_DATA_END) { if (slot->flags & PLATFORM_DATA_STARTED) { slot->flags &= ~PLATFORM_DATA_STARTED; SDHCI_PLATFORM_FINISH_TRANSFER(slot->bus, slot); } else sdhci_finish_data(slot); } done: if (slot->curcmd != NULL && slot->curcmd->error != 0) { if (slot->flags & PLATFORM_DATA_STARTED) { slot->flags &= ~PLATFORM_DATA_STARTED; SDHCI_PLATFORM_FINISH_TRANSFER(slot->bus, slot); } else sdhci_finish_data(slot); } } static void sdhci_acmd_irq(struct sdhci_slot *slot) { uint16_t err; err = RD4(slot, SDHCI_ACMD12_ERR); if (!slot->curcmd) { slot_printf(slot, "Got AutoCMD12 error 0x%04x, but " "there is no active command.\n", err); sdhci_dumpregs(slot); return; } slot_printf(slot, "Got AutoCMD12 error 0x%04x\n", err); sdhci_reset(slot, SDHCI_RESET_CMD); } void sdhci_generic_intr(struct sdhci_slot *slot) { uint32_t intmask, present; SDHCI_LOCK(slot); /* Read slot interrupt status. */ intmask = RD4(slot, SDHCI_INT_STATUS); if (intmask == 0 || intmask == 0xffffffff) { SDHCI_UNLOCK(slot); return; } if (__predict_false(sdhci_debug > 2)) slot_printf(slot, "Interrupt %#x\n", intmask); /* Handle tuning error interrupt. */ if (__predict_false(intmask & SDHCI_INT_TUNEERR)) { slot_printf(slot, "Tuning error indicated\n"); slot->retune_req |= SDHCI_RETUNE_REQ_RESET; if (slot->curcmd) { slot->curcmd->error = MMC_ERR_BADCRC; sdhci_finish_command(slot); } } /* Handle re-tuning interrupt. */ if (__predict_false(intmask & SDHCI_INT_RETUNE)) slot->retune_req |= SDHCI_RETUNE_REQ_NEEDED; /* Handle card presence interrupts. */ if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) { present = (intmask & SDHCI_INT_CARD_INSERT) != 0; slot->intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE); slot->intmask |= present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT; WR4(slot, SDHCI_INT_ENABLE, slot->intmask); WR4(slot, SDHCI_SIGNAL_ENABLE, slot->intmask); WR4(slot, SDHCI_INT_STATUS, intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)); sdhci_handle_card_present_locked(slot, present); } /* Handle command interrupts. */ if (intmask & SDHCI_INT_CMD_MASK) { WR4(slot, SDHCI_INT_STATUS, intmask & SDHCI_INT_CMD_MASK); sdhci_cmd_irq(slot, intmask & SDHCI_INT_CMD_MASK); } /* Handle data interrupts. */ if (intmask & SDHCI_INT_DATA_MASK) { WR4(slot, SDHCI_INT_STATUS, intmask & SDHCI_INT_DATA_MASK); /* Don't call data_irq in case of errored command. */ if ((intmask & SDHCI_INT_CMD_ERROR_MASK) == 0) sdhci_data_irq(slot, intmask & SDHCI_INT_DATA_MASK); } /* Handle AutoCMD12 error interrupt. */ if (intmask & SDHCI_INT_ACMD12ERR) { WR4(slot, SDHCI_INT_STATUS, SDHCI_INT_ACMD12ERR); sdhci_acmd_irq(slot); } /* Handle bus power interrupt. */ if (intmask & SDHCI_INT_BUS_POWER) { WR4(slot, SDHCI_INT_STATUS, SDHCI_INT_BUS_POWER); slot_printf(slot, "Card is consuming too much power!\n"); } intmask &= ~(SDHCI_INT_ERROR | SDHCI_INT_TUNEERR | SDHCI_INT_RETUNE | SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE | SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK | SDHCI_INT_ACMD12ERR | SDHCI_INT_BUS_POWER); /* The rest is unknown. */ if (intmask) { WR4(slot, SDHCI_INT_STATUS, intmask); slot_printf(slot, "Unexpected interrupt 0x%08x.\n", intmask); sdhci_dumpregs(slot); } SDHCI_UNLOCK(slot); } int sdhci_generic_read_ivar(device_t bus, device_t child, int which, uintptr_t *result) { struct sdhci_slot *slot = device_get_ivars(child); switch (which) { default: return (EINVAL); case MMCBR_IVAR_BUS_MODE: *result = slot->host.ios.bus_mode; break; case MMCBR_IVAR_BUS_WIDTH: *result = slot->host.ios.bus_width; break; case MMCBR_IVAR_CHIP_SELECT: *result = slot->host.ios.chip_select; break; case MMCBR_IVAR_CLOCK: *result = slot->host.ios.clock; break; case MMCBR_IVAR_F_MIN: *result = slot->host.f_min; break; case MMCBR_IVAR_F_MAX: *result = slot->host.f_max; break; case MMCBR_IVAR_HOST_OCR: *result = slot->host.host_ocr; break; case MMCBR_IVAR_MODE: *result = slot->host.mode; break; case MMCBR_IVAR_OCR: *result = slot->host.ocr; break; case MMCBR_IVAR_POWER_MODE: *result = slot->host.ios.power_mode; break; case MMCBR_IVAR_VDD: *result = slot->host.ios.vdd; break; case MMCBR_IVAR_RETUNE_REQ: if (slot->opt & SDHCI_TUNING_ENABLED) { if (slot->retune_req & SDHCI_RETUNE_REQ_RESET) { *result = retune_req_reset; break; } if (slot->retune_req & SDHCI_RETUNE_REQ_NEEDED) { *result = retune_req_normal; break; } } *result = retune_req_none; break; case MMCBR_IVAR_VCCQ: *result = slot->host.ios.vccq; break; case MMCBR_IVAR_CAPS: *result = slot->host.caps; break; case MMCBR_IVAR_TIMING: *result = slot->host.ios.timing; break; case MMCBR_IVAR_MAX_DATA: /* * Re-tuning modes 1 and 2 restrict the maximum data length * per read/write command to 4 MiB. */ if (slot->opt & SDHCI_TUNING_ENABLED && (slot->retune_mode == SDHCI_RETUNE_MODE_1 || slot->retune_mode == SDHCI_RETUNE_MODE_2)) { *result = 4 * 1024 * 1024 / MMC_SECTOR_SIZE; break; } *result = 65535; break; case MMCBR_IVAR_MAX_BUSY_TIMEOUT: /* * Currently, sdhci_start_data() hardcodes 1 s for all CMDs. */ *result = 1000000; break; } return (0); } int sdhci_generic_write_ivar(device_t bus, device_t child, int which, uintptr_t value) { struct sdhci_slot *slot = device_get_ivars(child); uint32_t clock, max_clock; int i; if (sdhci_debug > 1) slot_printf(slot, "%s: var=%d\n", __func__, which); switch (which) { default: return (EINVAL); case MMCBR_IVAR_BUS_MODE: slot->host.ios.bus_mode = value; break; case MMCBR_IVAR_BUS_WIDTH: slot->host.ios.bus_width = value; break; case MMCBR_IVAR_CHIP_SELECT: slot->host.ios.chip_select = value; break; case MMCBR_IVAR_CLOCK: if (value > 0) { max_clock = slot->max_clk; clock = max_clock; if (slot->version < SDHCI_SPEC_300) { for (i = 0; i < SDHCI_200_MAX_DIVIDER; i <<= 1) { if (clock <= value) break; clock >>= 1; } } else { for (i = 0; i < SDHCI_300_MAX_DIVIDER; i += 2) { if (clock <= value) break; clock = max_clock / (i + 2); } } slot->host.ios.clock = clock; } else slot->host.ios.clock = 0; break; case MMCBR_IVAR_MODE: slot->host.mode = value; break; case MMCBR_IVAR_OCR: slot->host.ocr = value; break; case MMCBR_IVAR_POWER_MODE: slot->host.ios.power_mode = value; break; case MMCBR_IVAR_VDD: slot->host.ios.vdd = value; break; case MMCBR_IVAR_VCCQ: slot->host.ios.vccq = value; break; case MMCBR_IVAR_TIMING: slot->host.ios.timing = value; break; case MMCBR_IVAR_CAPS: case MMCBR_IVAR_HOST_OCR: case MMCBR_IVAR_F_MIN: case MMCBR_IVAR_F_MAX: case MMCBR_IVAR_MAX_DATA: case MMCBR_IVAR_RETUNE_REQ: return (EINVAL); } return (0); } #ifdef MMCCAM void -sdhci_cam_start_slot(struct sdhci_slot *slot) +sdhci_start_slot(struct sdhci_slot *slot) { if ((slot->devq = cam_simq_alloc(1)) == NULL) { goto fail; } mtx_init(&slot->sim_mtx, "sdhcisim", NULL, MTX_DEF); slot->sim = cam_sim_alloc(sdhci_cam_action, sdhci_cam_poll, "sdhci_slot", slot, device_get_unit(slot->bus), &slot->sim_mtx, 1, 1, slot->devq); if (slot->sim == NULL) { cam_simq_free(slot->devq); slot_printf(slot, "cannot allocate CAM SIM\n"); goto fail; } mtx_lock(&slot->sim_mtx); if (xpt_bus_register(slot->sim, slot->bus, 0) != 0) { slot_printf(slot, "cannot register SCSI pass-through bus\n"); cam_sim_free(slot->sim, FALSE); cam_simq_free(slot->devq); mtx_unlock(&slot->sim_mtx); goto fail; } mtx_unlock(&slot->sim_mtx); /* End CAM-specific init */ slot->card_present = 0; sdhci_card_task(slot, 0); return; fail: if (slot->sim != NULL) { mtx_lock(&slot->sim_mtx); xpt_bus_deregister(cam_sim_path(slot->sim)); cam_sim_free(slot->sim, FALSE); mtx_unlock(&slot->sim_mtx); } if (slot->devq != NULL) cam_simq_free(slot->devq); } static void sdhci_cam_handle_mmcio(struct cam_sim *sim, union ccb *ccb) { struct sdhci_slot *slot; slot = cam_sim_softc(sim); sdhci_cam_request(slot, ccb); } void sdhci_cam_action(struct cam_sim *sim, union ccb *ccb) { struct sdhci_slot *slot; slot = cam_sim_softc(sim); if (slot == NULL) { ccb->ccb_h.status = CAM_SEL_TIMEOUT; xpt_done(ccb); return; } mtx_assert(&slot->sim_mtx, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi; cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = 0; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET | PIM_SEQSCAN; cpi->hba_eng_cnt = 0; cpi->max_target = 0; cpi->max_lun = 0; cpi->initiator_id = 1; cpi->maxio = MAXPHYS; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, "Deglitch Networks", HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 100; /* XXX WTF? */ cpi->protocol = PROTO_MMCSD; cpi->protocol_version = SCSI_REV_0; cpi->transport = XPORT_MMCSD; cpi->transport_version = 0; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; if (sdhci_debug > 1) slot_printf(slot, "Got XPT_GET_TRAN_SETTINGS\n"); cts->protocol = PROTO_MMCSD; cts->protocol_version = 1; cts->transport = XPORT_MMCSD; cts->transport_version = 1; cts->xport_specific.valid = 0; cts->proto_specific.mmc.host_ocr = slot->host.host_ocr; cts->proto_specific.mmc.host_f_min = slot->host.f_min; cts->proto_specific.mmc.host_f_max = slot->host.f_max; cts->proto_specific.mmc.host_caps = slot->host.caps; memcpy(&cts->proto_specific.mmc.ios, &slot->host.ios, sizeof(struct mmc_ios)); ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_SET_TRAN_SETTINGS: { if (sdhci_debug > 1) slot_printf(slot, "Got XPT_SET_TRAN_SETTINGS\n"); sdhci_cam_settran_settings(slot, ccb); ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: if (sdhci_debug > 1) slot_printf(slot, "Got XPT_RESET_BUS, ACK it...\n"); ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_MMC_IO: /* * Here is the HW-dependent part of * sending the command to the underlying h/w * At some point in the future an interrupt comes. * Then the request will be marked as completed. */ if (__predict_false(sdhci_debug > 1)) slot_printf(slot, "Got XPT_MMC_IO\n"); ccb->ccb_h.status = CAM_REQ_INPROG; sdhci_cam_handle_mmcio(sim, ccb); return; /* NOTREACHED */ break; default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } xpt_done(ccb); return; } void sdhci_cam_poll(struct cam_sim *sim) { return; } int sdhci_cam_get_possible_host_clock(struct sdhci_slot *slot, int proposed_clock) { int max_clock, clock, i; if (proposed_clock == 0) return 0; max_clock = slot->max_clk; clock = max_clock; if (slot->version < SDHCI_SPEC_300) { for (i = 0; i < SDHCI_200_MAX_DIVIDER; i <<= 1) { if (clock <= proposed_clock) break; clock >>= 1; } } else { for (i = 0; i < SDHCI_300_MAX_DIVIDER; i += 2) { if (clock <= proposed_clock) break; clock = max_clock / (i + 2); } } return clock; } int sdhci_cam_settran_settings(struct sdhci_slot *slot, union ccb *ccb) { struct mmc_ios *ios; struct mmc_ios *new_ios; struct ccb_trans_settings_mmc *cts; ios = &slot->host.ios; cts = &ccb->cts.proto_specific.mmc; new_ios = &cts->ios; /* Update only requested fields */ if (cts->ios_valid & MMC_CLK) { ios->clock = sdhci_cam_get_possible_host_clock(slot, new_ios->clock); slot_printf(slot, "Clock => %d\n", ios->clock); } if (cts->ios_valid & MMC_VDD) { ios->vdd = new_ios->vdd; slot_printf(slot, "VDD => %d\n", ios->vdd); } if (cts->ios_valid & MMC_CS) { ios->chip_select = new_ios->chip_select; slot_printf(slot, "CS => %d\n", ios->chip_select); } if (cts->ios_valid & MMC_BW) { ios->bus_width = new_ios->bus_width; slot_printf(slot, "Bus width => %d\n", ios->bus_width); } if (cts->ios_valid & MMC_PM) { ios->power_mode = new_ios->power_mode; slot_printf(slot, "Power mode => %d\n", ios->power_mode); } if (cts->ios_valid & MMC_BT) { ios->timing = new_ios->timing; slot_printf(slot, "Timing => %d\n", ios->timing); } if (cts->ios_valid & MMC_BM) { ios->bus_mode = new_ios->bus_mode; slot_printf(slot, "Bus mode => %d\n", ios->bus_mode); } /* XXX Provide a way to call a chip-specific IOS update, required for TI */ return (sdhci_cam_update_ios(slot)); } int sdhci_cam_update_ios(struct sdhci_slot *slot) { struct mmc_ios *ios = &slot->host.ios; slot_printf(slot, "%s: power_mode=%d, clk=%d, bus_width=%d, timing=%d\n", __func__, ios->power_mode, ios->clock, ios->bus_width, ios->timing); SDHCI_LOCK(slot); /* Do full reset on bus power down to clear from any state. */ if (ios->power_mode == power_off) { WR4(slot, SDHCI_SIGNAL_ENABLE, 0); sdhci_init(slot); } /* Configure the bus. */ sdhci_set_clock(slot, ios->clock); sdhci_set_power(slot, (ios->power_mode == power_off) ? 0 : ios->vdd); if (ios->bus_width == bus_width_8) { slot->hostctrl |= SDHCI_CTRL_8BITBUS; slot->hostctrl &= ~SDHCI_CTRL_4BITBUS; } else if (ios->bus_width == bus_width_4) { slot->hostctrl &= ~SDHCI_CTRL_8BITBUS; slot->hostctrl |= SDHCI_CTRL_4BITBUS; } else if (ios->bus_width == bus_width_1) { slot->hostctrl &= ~SDHCI_CTRL_8BITBUS; slot->hostctrl &= ~SDHCI_CTRL_4BITBUS; } else { panic("Invalid bus width: %d", ios->bus_width); } if (ios->timing == bus_timing_hs && !(slot->quirks & SDHCI_QUIRK_DONT_SET_HISPD_BIT)) slot->hostctrl |= SDHCI_CTRL_HISPD; else slot->hostctrl &= ~SDHCI_CTRL_HISPD; WR1(slot, SDHCI_HOST_CONTROL, slot->hostctrl); /* Some controllers like reset after bus changes. */ if(slot->quirks & SDHCI_QUIRK_RESET_ON_IOS) sdhci_reset(slot, SDHCI_RESET_CMD | SDHCI_RESET_DATA); SDHCI_UNLOCK(slot); return (0); } int sdhci_cam_request(struct sdhci_slot *slot, union ccb *ccb) { struct ccb_mmcio *mmcio; mmcio = &ccb->mmcio; SDHCI_LOCK(slot); /* if (slot->req != NULL) { SDHCI_UNLOCK(slot); return (EBUSY); } */ if (__predict_false(sdhci_debug > 1)) { slot_printf(slot, "CMD%u arg %#x flags %#x dlen %u dflags %#x\n", mmcio->cmd.opcode, mmcio->cmd.arg, mmcio->cmd.flags, mmcio->cmd.data != NULL ? (unsigned int) mmcio->cmd.data->len : 0, mmcio->cmd.data != NULL ? mmcio->cmd.data->flags: 0); } if (mmcio->cmd.data != NULL) { if (mmcio->cmd.data->len == 0 || mmcio->cmd.data->flags == 0) panic("data->len = %d, data->flags = %d -- something is b0rked", (int)mmcio->cmd.data->len, mmcio->cmd.data->flags); } slot->ccb = ccb; slot->flags = 0; sdhci_start(slot); SDHCI_UNLOCK(slot); if (dumping) { while (slot->ccb != NULL) { sdhci_generic_intr(slot); DELAY(10); } } return (0); } #endif /* MMCCAM */ MODULE_VERSION(sdhci, 1); Index: projects/runtime-coverage/sys/dev/sdhci/sdhci.h =================================================================== --- projects/runtime-coverage/sys/dev/sdhci/sdhci.h (revision 323974) +++ projects/runtime-coverage/sys/dev/sdhci/sdhci.h (revision 323975) @@ -1,438 +1,433 @@ /*- * Copyright (c) 2008 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __SDHCI_H__ #define __SDHCI_H__ #include "opt_mmccam.h" #define DMA_BLOCK_SIZE 4096 #define DMA_BOUNDARY 0 /* DMA reload every 4K */ /* Controller doesn't honor resets unless we touch the clock register */ #define SDHCI_QUIRK_CLOCK_BEFORE_RESET (1 << 0) /* Controller really supports DMA */ #define SDHCI_QUIRK_FORCE_DMA (1 << 1) /* Controller has unusable DMA engine */ #define SDHCI_QUIRK_BROKEN_DMA (1 << 2) /* Controller doesn't like to be reset when there is no card inserted. */ #define SDHCI_QUIRK_NO_CARD_NO_RESET (1 << 3) /* Controller has flaky internal state so reset it on each ios change */ #define SDHCI_QUIRK_RESET_ON_IOS (1 << 4) /* Controller can only DMA chunk sizes that are a multiple of 32 bits */ #define SDHCI_QUIRK_32BIT_DMA_SIZE (1 << 5) /* Controller needs to be reset after each request to stay stable */ #define SDHCI_QUIRK_RESET_AFTER_REQUEST (1 << 6) /* Controller has an off-by-one issue with timeout value */ #define SDHCI_QUIRK_INCR_TIMEOUT_CONTROL (1 << 7) /* Controller has broken read timings */ #define SDHCI_QUIRK_BROKEN_TIMINGS (1 << 8) /* Controller needs lowered frequency */ #define SDHCI_QUIRK_LOWER_FREQUENCY (1 << 9) /* Data timeout is invalid, should use SD clock */ #define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK (1 << 10) /* Timeout value is invalid, should be overriden */ #define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1 << 11) /* SDHCI_CAPABILITIES is invalid */ #define SDHCI_QUIRK_MISSING_CAPS (1 << 12) /* Hardware shifts the 136-bit response, don't do it in software. */ #define SDHCI_QUIRK_DONT_SHIFT_RESPONSE (1 << 13) /* Wait to see reset bit asserted before waiting for de-asserted */ #define SDHCI_QUIRK_WAITFOR_RESET_ASSERTED (1 << 14) /* Leave controller in standard mode when putting card in HS mode. */ #define SDHCI_QUIRK_DONT_SET_HISPD_BIT (1 << 15) /* Alternate clock source is required when supplying a 400 KHz clock. */ #define SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC (1 << 16) /* Card insert/remove interrupts don't work, polling required. */ #define SDHCI_QUIRK_POLL_CARD_PRESENT (1 << 17) /* All controller slots are non-removable. */ #define SDHCI_QUIRK_ALL_SLOTS_NON_REMOVABLE (1 << 18) /* Issue custom Intel controller reset sequence after power-up. */ #define SDHCI_QUIRK_INTEL_POWER_UP_RESET (1 << 19) /* Data timeout is invalid, use 1 MHz clock instead. */ #define SDHCI_QUIRK_DATA_TIMEOUT_1MHZ (1 << 20) /* Controller doesn't allow access boot partitions. */ #define SDHCI_QUIRK_BOOT_NOACC (1 << 21) /* Controller waits for busy responses. */ #define SDHCI_QUIRK_WAIT_WHILE_BUSY (1 << 22) /* Controller supports eMMC DDR52 mode. */ #define SDHCI_QUIRK_MMC_DDR52 (1 << 23) /* Controller support for UHS DDR50 mode is broken. */ #define SDHCI_QUIRK_BROKEN_UHS_DDR50 (1 << 24) /* Controller support for eMMC HS200 mode is broken. */ #define SDHCI_QUIRK_BROKEN_MMC_HS200 (1 << 25) /* Controller reports support for eMMC HS400 mode as SDHCI_CAN_MMC_HS400. */ #define SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 (1 << 26) /* Controller support for SDHCI_CTRL2_PRESET_VALUE is broken. */ #define SDHCI_QUIRK_PRESET_VALUE_BROKEN (1 << 27) /* Controller does not support or the support for ACMD12 is broken. */ #define SDHCI_QUIRK_BROKEN_AUTO_STOP (1 << 28) /* * Controller registers */ #define SDHCI_DMA_ADDRESS 0x00 #define SDHCI_BLOCK_SIZE 0x04 #define SDHCI_MAKE_BLKSZ(dma, blksz) (((dma & 0x7) << 12) | (blksz & 0xFFF)) #define SDHCI_BLOCK_COUNT 0x06 #define SDHCI_ARGUMENT 0x08 #define SDHCI_TRANSFER_MODE 0x0C #define SDHCI_TRNS_DMA 0x01 #define SDHCI_TRNS_BLK_CNT_EN 0x02 #define SDHCI_TRNS_ACMD12 0x04 #define SDHCI_TRNS_READ 0x10 #define SDHCI_TRNS_MULTI 0x20 #define SDHCI_COMMAND_FLAGS 0x0E #define SDHCI_CMD_RESP_NONE 0x00 #define SDHCI_CMD_RESP_LONG 0x01 #define SDHCI_CMD_RESP_SHORT 0x02 #define SDHCI_CMD_RESP_SHORT_BUSY 0x03 #define SDHCI_CMD_RESP_MASK 0x03 #define SDHCI_CMD_CRC 0x08 #define SDHCI_CMD_INDEX 0x10 #define SDHCI_CMD_DATA 0x20 #define SDHCI_CMD_TYPE_NORMAL 0x00 #define SDHCI_CMD_TYPE_SUSPEND 0x40 #define SDHCI_CMD_TYPE_RESUME 0x80 #define SDHCI_CMD_TYPE_ABORT 0xc0 #define SDHCI_CMD_TYPE_MASK 0xc0 #define SDHCI_COMMAND 0x0F #define SDHCI_RESPONSE 0x10 #define SDHCI_BUFFER 0x20 #define SDHCI_PRESENT_STATE 0x24 #define SDHCI_CMD_INHIBIT 0x00000001 #define SDHCI_DAT_INHIBIT 0x00000002 #define SDHCI_DAT_ACTIVE 0x00000004 #define SDHCI_RETUNE_REQUEST 0x00000008 #define SDHCI_DOING_WRITE 0x00000100 #define SDHCI_DOING_READ 0x00000200 #define SDHCI_SPACE_AVAILABLE 0x00000400 #define SDHCI_DATA_AVAILABLE 0x00000800 #define SDHCI_CARD_PRESENT 0x00010000 #define SDHCI_CARD_STABLE 0x00020000 #define SDHCI_CARD_PIN 0x00040000 #define SDHCI_WRITE_PROTECT 0x00080000 #define SDHCI_STATE_DAT_MASK 0x00f00000 #define SDHCI_STATE_CMD 0x01000000 #define SDHCI_HOST_CONTROL 0x28 #define SDHCI_CTRL_LED 0x01 #define SDHCI_CTRL_4BITBUS 0x02 #define SDHCI_CTRL_HISPD 0x04 #define SDHCI_CTRL_SDMA 0x08 #define SDHCI_CTRL_ADMA2 0x10 #define SDHCI_CTRL_ADMA264 0x18 #define SDHCI_CTRL_DMA_MASK 0x18 #define SDHCI_CTRL_8BITBUS 0x20 #define SDHCI_CTRL_CARD_DET 0x40 #define SDHCI_CTRL_FORCE_CARD 0x80 #define SDHCI_POWER_CONTROL 0x29 #define SDHCI_POWER_ON 0x01 #define SDHCI_POWER_180 0x0A #define SDHCI_POWER_300 0x0C #define SDHCI_POWER_330 0x0E #define SDHCI_BLOCK_GAP_CONTROL 0x2A #define SDHCI_WAKE_UP_CONTROL 0x2B #define SDHCI_CLOCK_CONTROL 0x2C #define SDHCI_DIVIDER_MASK 0xff #define SDHCI_DIVIDER_MASK_LEN 8 #define SDHCI_DIVIDER_SHIFT 8 #define SDHCI_DIVIDER_HI_MASK 3 #define SDHCI_DIVIDER_HI_SHIFT 6 #define SDHCI_CLOCK_CARD_EN 0x0004 #define SDHCI_CLOCK_INT_STABLE 0x0002 #define SDHCI_CLOCK_INT_EN 0x0001 #define SDHCI_DIVIDERS_MASK \ ((SDHCI_DIVIDER_MASK << SDHCI_DIVIDER_SHIFT) | \ (SDHCI_DIVIDER_HI_MASK << SDHCI_DIVIDER_HI_SHIFT)) #define SDHCI_TIMEOUT_CONTROL 0x2E #define SDHCI_SOFTWARE_RESET 0x2F #define SDHCI_RESET_ALL 0x01 #define SDHCI_RESET_CMD 0x02 #define SDHCI_RESET_DATA 0x04 #define SDHCI_INT_STATUS 0x30 #define SDHCI_INT_ENABLE 0x34 #define SDHCI_SIGNAL_ENABLE 0x38 #define SDHCI_INT_RESPONSE 0x00000001 #define SDHCI_INT_DATA_END 0x00000002 #define SDHCI_INT_BLOCK_GAP 0x00000004 #define SDHCI_INT_DMA_END 0x00000008 #define SDHCI_INT_SPACE_AVAIL 0x00000010 #define SDHCI_INT_DATA_AVAIL 0x00000020 #define SDHCI_INT_CARD_INSERT 0x00000040 #define SDHCI_INT_CARD_REMOVE 0x00000080 #define SDHCI_INT_CARD_INT 0x00000100 #define SDHCI_INT_INT_A 0x00000200 #define SDHCI_INT_INT_B 0x00000400 #define SDHCI_INT_INT_C 0x00000800 #define SDHCI_INT_RETUNE 0x00001000 #define SDHCI_INT_ERROR 0x00008000 #define SDHCI_INT_TIMEOUT 0x00010000 #define SDHCI_INT_CRC 0x00020000 #define SDHCI_INT_END_BIT 0x00040000 #define SDHCI_INT_INDEX 0x00080000 #define SDHCI_INT_DATA_TIMEOUT 0x00100000 #define SDHCI_INT_DATA_CRC 0x00200000 #define SDHCI_INT_DATA_END_BIT 0x00400000 #define SDHCI_INT_BUS_POWER 0x00800000 #define SDHCI_INT_ACMD12ERR 0x01000000 #define SDHCI_INT_ADMAERR 0x02000000 #define SDHCI_INT_TUNEERR 0x04000000 #define SDHCI_INT_NORMAL_MASK 0x00007FFF #define SDHCI_INT_ERROR_MASK 0xFFFF8000 #define SDHCI_INT_CMD_ERROR_MASK (SDHCI_INT_TIMEOUT | \ SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX) #define SDHCI_INT_CMD_MASK (SDHCI_INT_RESPONSE | SDHCI_INT_CMD_ERROR_MASK) #define SDHCI_INT_DATA_MASK (SDHCI_INT_DATA_END | SDHCI_INT_DMA_END | \ SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | \ SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_DATA_CRC | \ SDHCI_INT_DATA_END_BIT) #define SDHCI_ACMD12_ERR 0x3C #define SDHCI_HOST_CONTROL2 0x3E #define SDHCI_CTRL2_PRESET_VALUE 0x8000 #define SDHCI_CTRL2_ASYNC_INTR 0x4000 #define SDHCI_CTRL2_64BIT_ENABLE 0x2000 #define SDHCI_CTRL2_HOST_V4_ENABLE 0x1000 #define SDHCI_CTRL2_CMD23_ENABLE 0x0800 #define SDHCI_CTRL2_ADMA2_LENGTH_MODE 0x0400 #define SDHCI_CTRL2_UHS2_IFACE_ENABLE 0x0100 #define SDHCI_CTRL2_SAMPLING_CLOCK 0x0080 #define SDHCI_CTRL2_EXEC_TUNING 0x0040 #define SDHCI_CTRL2_DRIVER_TYPE_MASK 0x0030 #define SDHCI_CTRL2_DRIVER_TYPE_B 0x0000 #define SDHCI_CTRL2_DRIVER_TYPE_A 0x0010 #define SDHCI_CTRL2_DRIVER_TYPE_C 0x0020 #define SDHCI_CTRL2_DRIVER_TYPE_D 0x0030 #define SDHCI_CTRL2_S18_ENABLE 0x0008 #define SDHCI_CTRL2_UHS_MASK 0x0007 #define SDHCI_CTRL2_UHS_SDR12 0x0000 #define SDHCI_CTRL2_UHS_SDR25 0x0001 #define SDHCI_CTRL2_UHS_SDR50 0x0002 #define SDHCI_CTRL2_UHS_SDR104 0x0003 #define SDHCI_CTRL2_UHS_DDR50 0x0004 #define SDHCI_CTRL2_MMC_HS400 0x0005 /* non-standard */ #define SDHCI_CAPABILITIES 0x40 #define SDHCI_TIMEOUT_CLK_MASK 0x0000003F #define SDHCI_TIMEOUT_CLK_SHIFT 0 #define SDHCI_TIMEOUT_CLK_UNIT 0x00000080 #define SDHCI_CLOCK_BASE_MASK 0x00003F00 #define SDHCI_CLOCK_V3_BASE_MASK 0x0000FF00 #define SDHCI_CLOCK_BASE_SHIFT 8 #define SDHCI_MAX_BLOCK_MASK 0x00030000 #define SDHCI_MAX_BLOCK_SHIFT 16 #define SDHCI_CAN_DO_8BITBUS 0x00040000 #define SDHCI_CAN_DO_ADMA2 0x00080000 #define SDHCI_CAN_DO_HISPD 0x00200000 #define SDHCI_CAN_DO_DMA 0x00400000 #define SDHCI_CAN_DO_SUSPEND 0x00800000 #define SDHCI_CAN_VDD_330 0x01000000 #define SDHCI_CAN_VDD_300 0x02000000 #define SDHCI_CAN_VDD_180 0x04000000 #define SDHCI_CAN_DO_64BIT 0x10000000 #define SDHCI_CAN_ASYNC_INTR 0x20000000 #define SDHCI_SLOTTYPE_MASK 0xC0000000 #define SDHCI_SLOTTYPE_REMOVABLE 0x00000000 #define SDHCI_SLOTTYPE_EMBEDDED 0x40000000 #define SDHCI_SLOTTYPE_SHARED 0x80000000 #define SDHCI_CAPABILITIES2 0x44 #define SDHCI_CAN_SDR50 0x00000001 #define SDHCI_CAN_SDR104 0x00000002 #define SDHCI_CAN_DDR50 0x00000004 #define SDHCI_CAN_DRIVE_TYPE_A 0x00000010 #define SDHCI_CAN_DRIVE_TYPE_C 0x00000020 #define SDHCI_CAN_DRIVE_TYPE_D 0x00000040 #define SDHCI_RETUNE_CNT_MASK 0x00000F00 #define SDHCI_RETUNE_CNT_SHIFT 8 #define SDHCI_TUNE_SDR50 0x00002000 #define SDHCI_RETUNE_MODES_MASK 0x0000C000 #define SDHCI_RETUNE_MODES_SHIFT 14 #define SDHCI_CLOCK_MULT_MASK 0x00FF0000 #define SDHCI_CLOCK_MULT_SHIFT 16 #define SDHCI_CAN_MMC_HS400 0x80000000 /* non-standard */ #define SDHCI_MAX_CURRENT 0x48 #define SDHCI_FORCE_AUTO_EVENT 0x50 #define SDHCI_FORCE_INTR_EVENT 0x52 #define SDHCI_ADMA_ERR 0x54 #define SDHCI_ADMA_ERR_LENGTH 0x04 #define SDHCI_ADMA_ERR_STATE_MASK 0x03 #define SDHCI_ADMA_ERR_STATE_STOP 0x00 #define SDHCI_ADMA_ERR_STATE_FDS 0x01 #define SDHCI_ADMA_ERR_STATE_TFR 0x03 #define SDHCI_ADMA_ADDRESS_LO 0x58 #define SDHCI_ADMA_ADDRESS_HI 0x5C #define SDHCI_PRESET_VALUE 0x60 #define SDHCI_SHARED_BUS_CTRL 0xE0 #define SDHCI_SLOT_INT_STATUS 0xFC #define SDHCI_HOST_VERSION 0xFE #define SDHCI_VENDOR_VER_MASK 0xFF00 #define SDHCI_VENDOR_VER_SHIFT 8 #define SDHCI_SPEC_VER_MASK 0x00FF #define SDHCI_SPEC_VER_SHIFT 0 #define SDHCI_SPEC_100 0 #define SDHCI_SPEC_200 1 #define SDHCI_SPEC_300 2 #define SDHCI_SPEC_400 3 #define SDHCI_SPEC_410 4 #define SDHCI_SPEC_420 5 SYSCTL_DECL(_hw_sdhci); extern u_int sdhci_quirk_clear; extern u_int sdhci_quirk_set; struct sdhci_slot { struct mtx mtx; /* Slot mutex */ u_int quirks; /* Chip specific quirks */ u_int caps; /* Override SDHCI_CAPABILITIES */ u_int caps2; /* Override SDHCI_CAPABILITIES2 */ device_t bus; /* Bus device */ device_t dev; /* Slot device */ u_char num; /* Slot number */ u_char opt; /* Slot options */ #define SDHCI_HAVE_DMA 0x01 #define SDHCI_PLATFORM_TRANSFER 0x02 #define SDHCI_NON_REMOVABLE 0x04 #define SDHCI_TUNING_SUPPORTED 0x08 #define SDHCI_TUNING_ENABLED 0x10 #define SDHCI_SDR50_NEEDS_TUNING 0x20 #define SDHCI_SLOT_EMBEDDED 0x40 u_char version; int timeout; /* Transfer timeout */ uint32_t max_clk; /* Max possible freq */ uint32_t timeout_clk; /* Timeout freq */ bus_dma_tag_t dmatag; bus_dmamap_t dmamap; u_char *dmamem; bus_addr_t paddr; /* DMA buffer address */ struct task card_task; /* Card presence check task */ struct timeout_task card_delayed_task;/* Card insert delayed task */ struct callout card_poll_callout;/* Card present polling callout */ struct callout timeout_callout;/* Card command/data response timeout */ struct callout retune_callout; /* Re-tuning mode 1 callout */ struct mmc_host host; /* Host parameters */ struct mmc_request *req; /* Current request */ struct mmc_command *curcmd; /* Current command of current request */ struct mmc_request *tune_req; /* Tuning request */ struct mmc_command *tune_cmd; /* Tuning command of tuning request */ struct mmc_data *tune_data; /* Tuning data of tuning command */ uint32_t retune_ticks; /* Re-tuning callout ticks [hz] */ uint32_t intmask; /* Current interrupt mask */ uint32_t clock; /* Current clock freq. */ size_t offset; /* Data buffer offset */ uint8_t hostctrl; /* Current host control register */ uint8_t retune_count; /* Controller re-tuning count [s] */ uint8_t retune_mode; /* Controller re-tuning mode */ #define SDHCI_RETUNE_MODE_1 0x00 #define SDHCI_RETUNE_MODE_2 0x01 #define SDHCI_RETUNE_MODE_3 0x02 uint8_t retune_req; /* Re-tuning request status */ #define SDHCI_RETUNE_REQ_NEEDED 0x01 /* Re-tuning w/o circuit reset needed */ #define SDHCI_RETUNE_REQ_RESET 0x02 /* Re-tuning w/ circuit reset needed */ u_char power; /* Current power */ u_char bus_busy; /* Bus busy status */ u_char cmd_done; /* CMD command part done flag */ u_char data_done; /* DAT command part done flag */ u_char flags; /* Request execution flags */ #define CMD_STARTED 1 #define STOP_STARTED 2 #define SDHCI_USE_DMA 4 /* Use DMA for this req. */ #define PLATFORM_DATA_STARTED 8 /* Data xfer is handled by platform */ #ifdef MMCCAM /* CAM stuff */ union ccb *ccb; struct cam_devq *devq; struct cam_sim *sim; struct mtx sim_mtx; u_char card_present; /* XXX Maybe derive this from elsewhere? */ #endif }; int sdhci_generic_read_ivar(device_t bus, device_t child, int which, uintptr_t *result); int sdhci_generic_write_ivar(device_t bus, device_t child, int which, uintptr_t value); int sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num); void sdhci_start_slot(struct sdhci_slot *slot); /* performs generic clean-up for platform transfers */ void sdhci_finish_data(struct sdhci_slot *slot); int sdhci_cleanup_slot(struct sdhci_slot *slot); int sdhci_generic_suspend(struct sdhci_slot *slot); int sdhci_generic_resume(struct sdhci_slot *slot); int sdhci_generic_update_ios(device_t brdev, device_t reqdev); int sdhci_generic_tune(device_t brdev, device_t reqdev, bool hs400); int sdhci_generic_switch_vccq(device_t brdev, device_t reqdev); int sdhci_generic_retune(device_t brdev, device_t reqdev, bool reset); int sdhci_generic_request(device_t brdev, device_t reqdev, struct mmc_request *req); int sdhci_generic_get_ro(device_t brdev, device_t reqdev); int sdhci_generic_acquire_host(device_t brdev, device_t reqdev); int sdhci_generic_release_host(device_t brdev, device_t reqdev); void sdhci_generic_intr(struct sdhci_slot *slot); uint32_t sdhci_generic_min_freq(device_t brdev, struct sdhci_slot *slot); bool sdhci_generic_get_card_present(device_t brdev, struct sdhci_slot *slot); void sdhci_generic_set_uhs_timing(device_t brdev, struct sdhci_slot *slot); void sdhci_handle_card_present(struct sdhci_slot *slot, bool is_present); -#ifdef MMCCAM -/* CAM-related */ -void sdhci_cam_start_slot(struct sdhci_slot *slot); -#endif - #endif /* __SDHCI_H__ */ Index: projects/runtime-coverage/sys/dev/sdhci/sdhci_pci.c =================================================================== --- projects/runtime-coverage/sys/dev/sdhci/sdhci_pci.c (revision 323974) +++ projects/runtime-coverage/sys/dev/sdhci/sdhci_pci.c (revision 323975) @@ -1,529 +1,525 @@ /*- * Copyright (c) 2008 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mmccam.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmcbr_if.h" #include "sdhci_if.h" /* * PCI registers */ #define PCI_SDHCI_IFPIO 0x00 #define PCI_SDHCI_IFDMA 0x01 #define PCI_SDHCI_IFVENDOR 0x02 #define PCI_SLOT_INFO 0x40 /* 8 bits */ #define PCI_SLOT_INFO_SLOTS(x) (((x >> 4) & 7) + 1) #define PCI_SLOT_INFO_FIRST_BAR(x) ((x) & 7) /* * RICOH specific PCI registers */ #define SDHC_PCI_MODE_KEY 0xf9 #define SDHC_PCI_MODE 0x150 #define SDHC_PCI_MODE_SD20 0x10 #define SDHC_PCI_BASE_FREQ_KEY 0xfc #define SDHC_PCI_BASE_FREQ 0xe1 static const struct sdhci_device { uint32_t model; uint16_t subvendor; const char *desc; u_int quirks; } sdhci_devices[] = { { 0x08221180, 0xffff, "RICOH R5C822 SD", SDHCI_QUIRK_FORCE_DMA }, { 0xe8221180, 0xffff, "RICOH R5CE822 SD", SDHCI_QUIRK_FORCE_DMA | SDHCI_QUIRK_LOWER_FREQUENCY }, { 0xe8231180, 0xffff, "RICOH R5CE823 SD", SDHCI_QUIRK_LOWER_FREQUENCY }, { 0x8034104c, 0xffff, "TI XX21/XX11 SD", SDHCI_QUIRK_FORCE_DMA }, { 0x05501524, 0xffff, "ENE CB712 SD", SDHCI_QUIRK_BROKEN_TIMINGS }, { 0x05511524, 0xffff, "ENE CB712 SD 2", SDHCI_QUIRK_BROKEN_TIMINGS }, { 0x07501524, 0xffff, "ENE CB714 SD", SDHCI_QUIRK_RESET_ON_IOS | SDHCI_QUIRK_BROKEN_TIMINGS }, { 0x07511524, 0xffff, "ENE CB714 SD 2", SDHCI_QUIRK_RESET_ON_IOS | SDHCI_QUIRK_BROKEN_TIMINGS }, { 0x410111ab, 0xffff, "Marvell CaFe SD", SDHCI_QUIRK_INCR_TIMEOUT_CONTROL }, { 0x2381197B, 0xffff, "JMicron JMB38X SD", SDHCI_QUIRK_32BIT_DMA_SIZE | SDHCI_QUIRK_RESET_AFTER_REQUEST }, { 0x16bc14e4, 0xffff, "Broadcom BCM577xx SDXC/MMC Card Reader", SDHCI_QUIRK_BCM577XX_400KHZ_CLKSRC }, { 0x0f148086, 0xffff, "Intel Bay Trail eMMC 4.5 Controller", SDHCI_QUIRK_INTEL_POWER_UP_RESET | SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_MMC_DDR52 | SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 | SDHCI_QUIRK_PRESET_VALUE_BROKEN}, { 0x0f158086, 0xffff, "Intel Bay Trail SDXC Controller", SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { 0x0f508086, 0xffff, "Intel Bay Trail eMMC 4.5 Controller", SDHCI_QUIRK_INTEL_POWER_UP_RESET | SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_MMC_DDR52 | SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { 0x22948086, 0xffff, "Intel Braswell eMMC 4.5.1 Controller", SDHCI_QUIRK_DATA_TIMEOUT_1MHZ | SDHCI_QUIRK_INTEL_POWER_UP_RESET | SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_MMC_DDR52 | SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { 0x22968086, 0xffff, "Intel Braswell SDXC Controller", SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { 0x5aca8086, 0xffff, "Intel Apollo Lake SDXC Controller", SDHCI_QUIRK_BROKEN_DMA | /* APL18 erratum */ SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { 0x5acc8086, 0xffff, "Intel Apollo Lake eMMC 5.0 Controller", SDHCI_QUIRK_BROKEN_DMA | /* APL18 erratum */ SDHCI_QUIRK_INTEL_POWER_UP_RESET | SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_MMC_DDR52 | SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { 0, 0xffff, NULL, 0 } }; struct sdhci_pci_softc { u_int quirks; /* Chip specific quirks */ struct resource *irq_res; /* IRQ resource */ void *intrhand; /* Interrupt handle */ int num_slots; /* Number of slots on this controller */ struct sdhci_slot slots[6]; struct resource *mem_res[6]; /* Memory resource */ uint8_t cfg_freq; /* Saved frequency */ uint8_t cfg_mode; /* Saved mode */ }; static int sdhci_enable_msi = 1; SYSCTL_INT(_hw_sdhci, OID_AUTO, enable_msi, CTLFLAG_RDTUN, &sdhci_enable_msi, 0, "Enable MSI interrupts"); static uint8_t sdhci_pci_read_1(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res[slot->num], 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return bus_read_1(sc->mem_res[slot->num], off); } static void sdhci_pci_write_1(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint8_t val) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res[slot->num], 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); bus_write_1(sc->mem_res[slot->num], off, val); } static uint16_t sdhci_pci_read_2(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res[slot->num], 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return bus_read_2(sc->mem_res[slot->num], off); } static void sdhci_pci_write_2(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint16_t val) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res[slot->num], 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); bus_write_2(sc->mem_res[slot->num], off, val); } static uint32_t sdhci_pci_read_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res[slot->num], 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return bus_read_4(sc->mem_res[slot->num], off); } static void sdhci_pci_write_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint32_t val) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res[slot->num], 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); bus_write_4(sc->mem_res[slot->num], off, val); } static void sdhci_pci_read_multi_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint32_t *data, bus_size_t count) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_read_multi_stream_4(sc->mem_res[slot->num], off, data, count); } static void sdhci_pci_write_multi_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint32_t *data, bus_size_t count) { struct sdhci_pci_softc *sc = device_get_softc(dev); bus_write_multi_stream_4(sc->mem_res[slot->num], off, data, count); } static void sdhci_pci_intr(void *arg); static void sdhci_lower_frequency(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); /* * Enable SD2.0 mode. * NB: for RICOH R5CE823, this changes the PCI device ID to 0xe822. */ pci_write_config(dev, SDHC_PCI_MODE_KEY, 0xfc, 1); sc->cfg_mode = pci_read_config(dev, SDHC_PCI_MODE, 1); pci_write_config(dev, SDHC_PCI_MODE, SDHC_PCI_MODE_SD20, 1); pci_write_config(dev, SDHC_PCI_MODE_KEY, 0x00, 1); /* * Some SD/MMC cards don't work with the default base * clock frequency of 200 MHz. Lower it to 50 MHz. */ pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x01, 1); sc->cfg_freq = pci_read_config(dev, SDHC_PCI_BASE_FREQ, 1); pci_write_config(dev, SDHC_PCI_BASE_FREQ, 50, 1); pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x00, 1); } static void sdhci_restore_frequency(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); /* Restore mode. */ pci_write_config(dev, SDHC_PCI_MODE_KEY, 0xfc, 1); pci_write_config(dev, SDHC_PCI_MODE, sc->cfg_mode, 1); pci_write_config(dev, SDHC_PCI_MODE_KEY, 0x00, 1); /* Restore frequency. */ pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x01, 1); pci_write_config(dev, SDHC_PCI_BASE_FREQ, sc->cfg_freq, 1); pci_write_config(dev, SDHC_PCI_BASE_FREQ_KEY, 0x00, 1); } static int sdhci_pci_probe(device_t dev) { uint32_t model; uint16_t subvendor; uint8_t class, subclass; int i, result; model = (uint32_t)pci_get_device(dev) << 16; model |= (uint32_t)pci_get_vendor(dev) & 0x0000ffff; subvendor = pci_get_subvendor(dev); class = pci_get_class(dev); subclass = pci_get_subclass(dev); result = ENXIO; for (i = 0; sdhci_devices[i].model != 0; i++) { if (sdhci_devices[i].model == model && (sdhci_devices[i].subvendor == 0xffff || sdhci_devices[i].subvendor == subvendor)) { device_set_desc(dev, sdhci_devices[i].desc); result = BUS_PROBE_DEFAULT; break; } } if (result == ENXIO && class == PCIC_BASEPERIPH && subclass == PCIS_BASEPERIPH_SDHC) { device_set_desc(dev, "Generic SD HCI"); result = BUS_PROBE_GENERIC; } return (result); } static int sdhci_pci_attach(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); struct sdhci_slot *slot; uint32_t model; uint16_t subvendor; int bar, err, rid, slots, i; model = (uint32_t)pci_get_device(dev) << 16; model |= (uint32_t)pci_get_vendor(dev) & 0x0000ffff; subvendor = pci_get_subvendor(dev); /* Apply chip specific quirks. */ for (i = 0; sdhci_devices[i].model != 0; i++) { if (sdhci_devices[i].model == model && (sdhci_devices[i].subvendor == 0xffff || sdhci_devices[i].subvendor == subvendor)) { sc->quirks = sdhci_devices[i].quirks; break; } } sc->quirks &= ~sdhci_quirk_clear; sc->quirks |= sdhci_quirk_set; /* Some controllers need to be bumped into the right mode. */ if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY) sdhci_lower_frequency(dev); /* Read slots info from PCI registers. */ slots = pci_read_config(dev, PCI_SLOT_INFO, 1); bar = PCI_SLOT_INFO_FIRST_BAR(slots); slots = PCI_SLOT_INFO_SLOTS(slots); if (slots > 6 || bar > 5) { device_printf(dev, "Incorrect slots information (%d, %d).\n", slots, bar); return (EINVAL); } /* Allocate IRQ. */ i = 1; rid = 0; if (sdhci_enable_msi != 0 && pci_alloc_msi(dev, &i) == 0) rid = 1; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE | (rid != 0 ? 0 : RF_SHAREABLE)); if (sc->irq_res == NULL) { device_printf(dev, "Can't allocate IRQ\n"); pci_release_msi(dev); return (ENOMEM); } /* Scan all slots. */ for (i = 0; i < slots; i++) { slot = &sc->slots[sc->num_slots]; /* Allocate memory. */ rid = PCIR_BAR(bar + i); sc->mem_res[i] = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res[i] == NULL) { device_printf(dev, "Can't allocate memory for slot %d\n", i); continue; } slot->quirks = sc->quirks; if (sdhci_init_slot(dev, slot, i) != 0) continue; sc->num_slots++; } device_printf(dev, "%d slot(s) allocated\n", sc->num_slots); /* Activate the interrupt */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, sdhci_pci_intr, sc, &sc->intrhand); if (err) device_printf(dev, "Can't setup IRQ\n"); pci_enable_busmaster(dev); /* Process cards detection. */ for (i = 0; i < sc->num_slots; i++) { -#ifdef MMCCAM - sdhci_cam_start_slot(&sc->slots[i]); -#else sdhci_start_slot(&sc->slots[i]); -#endif } return (0); } static int sdhci_pci_detach(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); int i; bus_teardown_intr(dev, sc->irq_res, sc->intrhand); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); pci_release_msi(dev); for (i = 0; i < sc->num_slots; i++) { sdhci_cleanup_slot(&sc->slots[i]); bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem_res[i]), sc->mem_res[i]); } if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY) sdhci_restore_frequency(dev); return (0); } static int sdhci_pci_shutdown(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY) sdhci_restore_frequency(dev); return (0); } static int sdhci_pci_suspend(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); int i, err; err = bus_generic_suspend(dev); if (err) return (err); for (i = 0; i < sc->num_slots; i++) sdhci_generic_suspend(&sc->slots[i]); return (0); } static int sdhci_pci_resume(device_t dev) { struct sdhci_pci_softc *sc = device_get_softc(dev); int i, err; for (i = 0; i < sc->num_slots; i++) sdhci_generic_resume(&sc->slots[i]); err = bus_generic_resume(dev); if (err) return (err); if (sc->quirks & SDHCI_QUIRK_LOWER_FREQUENCY) sdhci_lower_frequency(dev); return (0); } static void sdhci_pci_intr(void *arg) { struct sdhci_pci_softc *sc = (struct sdhci_pci_softc *)arg; int i; for (i = 0; i < sc->num_slots; i++) sdhci_generic_intr(&sc->slots[i]); } static device_method_t sdhci_methods[] = { /* device_if */ DEVMETHOD(device_probe, sdhci_pci_probe), DEVMETHOD(device_attach, sdhci_pci_attach), DEVMETHOD(device_detach, sdhci_pci_detach), DEVMETHOD(device_shutdown, sdhci_pci_shutdown), DEVMETHOD(device_suspend, sdhci_pci_suspend), DEVMETHOD(device_resume, sdhci_pci_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, sdhci_generic_read_ivar), DEVMETHOD(bus_write_ivar, sdhci_generic_write_ivar), /* mmcbr_if */ DEVMETHOD(mmcbr_update_ios, sdhci_generic_update_ios), DEVMETHOD(mmcbr_switch_vccq, sdhci_generic_switch_vccq), DEVMETHOD(mmcbr_tune, sdhci_generic_tune), DEVMETHOD(mmcbr_retune, sdhci_generic_retune), DEVMETHOD(mmcbr_request, sdhci_generic_request), DEVMETHOD(mmcbr_get_ro, sdhci_generic_get_ro), DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), /* SDHCI accessors */ DEVMETHOD(sdhci_read_1, sdhci_pci_read_1), DEVMETHOD(sdhci_read_2, sdhci_pci_read_2), DEVMETHOD(sdhci_read_4, sdhci_pci_read_4), DEVMETHOD(sdhci_read_multi_4, sdhci_pci_read_multi_4), DEVMETHOD(sdhci_write_1, sdhci_pci_write_1), DEVMETHOD(sdhci_write_2, sdhci_pci_write_2), DEVMETHOD(sdhci_write_4, sdhci_pci_write_4), DEVMETHOD(sdhci_write_multi_4, sdhci_pci_write_multi_4), DEVMETHOD(sdhci_set_uhs_timing, sdhci_generic_set_uhs_timing), DEVMETHOD_END }; static driver_t sdhci_pci_driver = { "sdhci_pci", sdhci_methods, sizeof(struct sdhci_pci_softc), }; static devclass_t sdhci_pci_devclass; DRIVER_MODULE(sdhci_pci, pci, sdhci_pci_driver, sdhci_pci_devclass, NULL, NULL); MODULE_DEPEND(sdhci_pci, sdhci, 1, 1, 1); #ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_pci); #endif Index: projects/runtime-coverage/sys/dev/ti/if_ti.c =================================================================== --- projects/runtime-coverage/sys/dev/ti/if_ti.c (revision 323974) +++ projects/runtime-coverage/sys/dev/ti/if_ti.c (revision 323975) @@ -1,4057 +1,4057 @@ /*- * Copyright (c) 1997, 1998, 1999 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Alteon Networks Tigon PCI gigabit ethernet driver for FreeBSD. * Manuals, sample driver and firmware source kits are available * from http://www.alteon.com/support/openkits. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ /* * The Alteon Networks Tigon chip contains an embedded R4000 CPU, * gigabit MAC, dual DMA channels and a PCI interface unit. NICs * using the Tigon may have anywhere from 512K to 2MB of SRAM. The * Tigon supports hardware IP, TCP and UCP checksumming, multicast * filtering and jumbo (9014 byte) frames. The hardware is largely * controlled by firmware, which must be loaded into the NIC during * initialization. * * The Tigon 2 contains 2 R4000 CPUs and requires a newer firmware * revision, which supports new features such as extended commands, * extended jumbo receive ring desciptors and a mini receive ring. * * Alteon Networks is to be commended for releasing such a vast amount * of development material for the Tigon NIC without requiring an NDA * (although they really should have done it a long time ago). With * any luck, the other vendors will finally wise up and follow Alteon's * stellar example. * * The firmware for the Tigon 1 and 2 NICs is compiled directly into * this driver by #including it as a C header file. This bloats the * driver somewhat, but it's the easiest method considering that the * driver code and firmware code need to be kept in sync. The source * for the firmware is not provided with the FreeBSD distribution since * compiling it requires a GNU toolchain targeted for mips-sgi-irix5.3. * * The following people deserve special thanks: * - Terry Murphy of 3Com, for providing a 3c985 Tigon 1 board * for testing * - Raymond Lee of Netgear, for providing a pair of Netgear * GA620 Tigon 2 boards for testing * - Ulf Zimmermann, for bringing the GA260 to my attention and * convincing me to write this driver. * - Andrew Gallatin for providing FreeBSD/Alpha support. */ #include __FBSDID("$FreeBSD$"); #include "opt_ti.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TI_SF_BUF_JUMBO #include #include #endif #include #include #include #include #include #include #include #define TI_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP) /* * We can only turn on header splitting if we're using extended receive * BDs. */ #if defined(TI_JUMBO_HDRSPLIT) && !defined(TI_SF_BUF_JUMBO) #error "options TI_JUMBO_HDRSPLIT requires TI_SF_BUF_JUMBO" #endif /* TI_JUMBO_HDRSPLIT && !TI_SF_BUF_JUMBO */ typedef enum { TI_SWAP_HTON, TI_SWAP_NTOH } ti_swap_type; /* * Various supported device vendors/types and their names. */ static const struct ti_type ti_devs[] = { { ALT_VENDORID, ALT_DEVICEID_ACENIC, "Alteon AceNIC 1000baseSX Gigabit Ethernet" }, { ALT_VENDORID, ALT_DEVICEID_ACENIC_COPPER, "Alteon AceNIC 1000baseT Gigabit Ethernet" }, { TC_VENDORID, TC_DEVICEID_3C985, "3Com 3c985-SX Gigabit Ethernet" }, { NG_VENDORID, NG_DEVICEID_GA620, "Netgear GA620 1000baseSX Gigabit Ethernet" }, { NG_VENDORID, NG_DEVICEID_GA620T, "Netgear GA620 1000baseT Gigabit Ethernet" }, { SGI_VENDORID, SGI_DEVICEID_TIGON, "Silicon Graphics Gigabit Ethernet" }, { DEC_VENDORID, DEC_DEVICEID_FARALLON_PN9000SX, "Farallon PN9000SX Gigabit Ethernet" }, { 0, 0, NULL } }; static d_open_t ti_open; static d_close_t ti_close; static d_ioctl_t ti_ioctl2; static struct cdevsw ti_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = ti_open, .d_close = ti_close, .d_ioctl = ti_ioctl2, .d_name = "ti", }; static int ti_probe(device_t); static int ti_attach(device_t); static int ti_detach(device_t); static void ti_txeof(struct ti_softc *); static void ti_rxeof(struct ti_softc *); static int ti_encap(struct ti_softc *, struct mbuf **); static void ti_intr(void *); static void ti_start(struct ifnet *); static void ti_start_locked(struct ifnet *); static int ti_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t ti_get_counter(struct ifnet *, ift_counter); static void ti_init(void *); static void ti_init_locked(void *); static void ti_init2(struct ti_softc *); static void ti_stop(struct ti_softc *); static void ti_watchdog(void *); static int ti_shutdown(device_t); static int ti_ifmedia_upd(struct ifnet *); static int ti_ifmedia_upd_locked(struct ti_softc *); static void ti_ifmedia_sts(struct ifnet *, struct ifmediareq *); static uint32_t ti_eeprom_putbyte(struct ti_softc *, int); static uint8_t ti_eeprom_getbyte(struct ti_softc *, int, uint8_t *); static int ti_read_eeprom(struct ti_softc *, caddr_t, int, int); static void ti_add_mcast(struct ti_softc *, struct ether_addr *); static void ti_del_mcast(struct ti_softc *, struct ether_addr *); static void ti_setmulti(struct ti_softc *); static void ti_mem_read(struct ti_softc *, uint32_t, uint32_t, void *); static void ti_mem_write(struct ti_softc *, uint32_t, uint32_t, void *); static void ti_mem_zero(struct ti_softc *, uint32_t, uint32_t); static int ti_copy_mem(struct ti_softc *, uint32_t, uint32_t, caddr_t, int, int); static int ti_copy_scratch(struct ti_softc *, uint32_t, uint32_t, caddr_t, int, int, int); static int ti_bcopy_swap(const void *, void *, size_t, ti_swap_type); static void ti_loadfw(struct ti_softc *); static void ti_cmd(struct ti_softc *, struct ti_cmd_desc *); static void ti_cmd_ext(struct ti_softc *, struct ti_cmd_desc *, caddr_t, int); static void ti_handle_events(struct ti_softc *); static void ti_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int ti_dma_alloc(struct ti_softc *); static void ti_dma_free(struct ti_softc *); static int ti_dma_ring_alloc(struct ti_softc *, bus_size_t, bus_size_t, bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *); static void ti_dma_ring_free(struct ti_softc *, bus_dma_tag_t *, uint8_t **, bus_dmamap_t, bus_addr_t *); static int ti_newbuf_std(struct ti_softc *, int); static int ti_newbuf_mini(struct ti_softc *, int); static int ti_newbuf_jumbo(struct ti_softc *, int, struct mbuf *); static int ti_init_rx_ring_std(struct ti_softc *); static void ti_free_rx_ring_std(struct ti_softc *); static int ti_init_rx_ring_jumbo(struct ti_softc *); static void ti_free_rx_ring_jumbo(struct ti_softc *); static int ti_init_rx_ring_mini(struct ti_softc *); static void ti_free_rx_ring_mini(struct ti_softc *); static void ti_free_tx_ring(struct ti_softc *); static int ti_init_tx_ring(struct ti_softc *); static void ti_discard_std(struct ti_softc *, int); #ifndef TI_SF_BUF_JUMBO static void ti_discard_jumbo(struct ti_softc *, int); #endif static void ti_discard_mini(struct ti_softc *, int); static int ti_64bitslot_war(struct ti_softc *); static int ti_chipinit(struct ti_softc *); static int ti_gibinit(struct ti_softc *); #ifdef TI_JUMBO_HDRSPLIT static __inline void ti_hdr_split(struct mbuf *top, int hdr_len, int pkt_len, int idx); #endif /* TI_JUMBO_HDRSPLIT */ static void ti_sysctl_node(struct ti_softc *); static device_method_t ti_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ti_probe), DEVMETHOD(device_attach, ti_attach), DEVMETHOD(device_detach, ti_detach), DEVMETHOD(device_shutdown, ti_shutdown), { 0, 0 } }; static driver_t ti_driver = { "ti", ti_methods, sizeof(struct ti_softc) }; static devclass_t ti_devclass; DRIVER_MODULE(ti, pci, ti_driver, ti_devclass, 0, 0); MODULE_DEPEND(ti, pci, 1, 1, 1); MODULE_DEPEND(ti, ether, 1, 1, 1); /* * Send an instruction or address to the EEPROM, check for ACK. */ static uint32_t ti_eeprom_putbyte(struct ti_softc *sc, int byte) { int i, ack = 0; /* * Make sure we're in TX mode. */ TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN); /* * Feed in each bit and stobe the clock. */ for (i = 0x80; i; i >>= 1) { if (byte & i) { TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_DOUT); } else { TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_DOUT); } DELAY(1); TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); DELAY(1); TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); } /* * Turn off TX mode. */ TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN); /* * Check for ack. */ TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); ack = CSR_READ_4(sc, TI_MISC_LOCAL_CTL) & TI_MLC_EE_DIN; TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); return (ack); } /* * Read a byte of data stored in the EEPROM at address 'addr.' * We have to send two address bytes since the EEPROM can hold * more than 256 bytes of data. */ static uint8_t ti_eeprom_getbyte(struct ti_softc *sc, int addr, uint8_t *dest) { int i; uint8_t byte = 0; EEPROM_START; /* * Send write control code to EEPROM. */ if (ti_eeprom_putbyte(sc, EEPROM_CTL_WRITE)) { device_printf(sc->ti_dev, "failed to send write command, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } /* * Send first byte of address of byte we want to read. */ if (ti_eeprom_putbyte(sc, (addr >> 8) & 0xFF)) { device_printf(sc->ti_dev, "failed to send address, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } /* * Send second byte address of byte we want to read. */ if (ti_eeprom_putbyte(sc, addr & 0xFF)) { device_printf(sc->ti_dev, "failed to send address, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } EEPROM_STOP; EEPROM_START; /* * Send read control code to EEPROM. */ if (ti_eeprom_putbyte(sc, EEPROM_CTL_READ)) { device_printf(sc->ti_dev, "failed to send read command, status: %x\n", CSR_READ_4(sc, TI_MISC_LOCAL_CTL)); return (1); } /* * Start reading bits from EEPROM. */ TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN); for (i = 0x80; i; i >>= 1) { TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); DELAY(1); if (CSR_READ_4(sc, TI_MISC_LOCAL_CTL) & TI_MLC_EE_DIN) byte |= i; TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK); DELAY(1); } EEPROM_STOP; /* * No ACK generated for read, so just return byte. */ *dest = byte; return (0); } /* * Read a sequence of bytes from the EEPROM. */ static int ti_read_eeprom(struct ti_softc *sc, caddr_t dest, int off, int cnt) { int err = 0, i; uint8_t byte = 0; for (i = 0; i < cnt; i++) { err = ti_eeprom_getbyte(sc, off + i, &byte); if (err) break; *(dest + i) = byte; } return (err ? 1 : 0); } /* * NIC memory read function. * Can be used to copy data from NIC local memory. */ static void ti_mem_read(struct ti_softc *sc, uint32_t addr, uint32_t len, void *buf) { int segptr, segsize, cnt; char *ptr; segptr = addr; cnt = len; ptr = buf; while (cnt) { if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, rounddown2(segptr, TI_WINLEN)); bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, TI_WINDOW + (segptr & (TI_WINLEN - 1)), (uint32_t *)ptr, segsize / 4); ptr += segsize; segptr += segsize; cnt -= segsize; } } /* * NIC memory write function. * Can be used to copy data into NIC local memory. */ static void ti_mem_write(struct ti_softc *sc, uint32_t addr, uint32_t len, void *buf) { int segptr, segsize, cnt; char *ptr; segptr = addr; cnt = len; ptr = buf; while (cnt) { if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, rounddown2(segptr, TI_WINLEN)); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, TI_WINDOW + (segptr & (TI_WINLEN - 1)), (uint32_t *)ptr, segsize / 4); ptr += segsize; segptr += segsize; cnt -= segsize; } } /* * NIC memory read function. * Can be used to clear a section of NIC local memory. */ static void ti_mem_zero(struct ti_softc *sc, uint32_t addr, uint32_t len) { int segptr, segsize, cnt; segptr = addr; cnt = len; while (cnt) { if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, rounddown2(segptr, TI_WINLEN)); bus_space_set_region_4(sc->ti_btag, sc->ti_bhandle, TI_WINDOW + (segptr & (TI_WINLEN - 1)), 0, segsize / 4); segptr += segsize; cnt -= segsize; } } static int ti_copy_mem(struct ti_softc *sc, uint32_t tigon_addr, uint32_t len, caddr_t buf, int useraddr, int readdata) { int segptr, segsize, cnt; caddr_t ptr; uint32_t origwin; int resid, segresid; int first_pass; TI_LOCK_ASSERT(sc); /* * At the moment, we don't handle non-aligned cases, we just bail. * If this proves to be a problem, it will be fixed. */ if (readdata == 0 && (tigon_addr & 0x3) != 0) { device_printf(sc->ti_dev, "%s: tigon address %#x isn't " "word-aligned\n", __func__, tigon_addr); device_printf(sc->ti_dev, "%s: unaligned writes aren't " "yet supported\n", __func__); return (EINVAL); } segptr = tigon_addr & ~0x3; segresid = tigon_addr - segptr; /* * This is the non-aligned amount left over that we'll need to * copy. */ resid = len & 0x3; /* Add in the left over amount at the front of the buffer */ resid += segresid; cnt = len & ~0x3; /* * If resid + segresid is >= 4, add multiples of 4 to the count and * decrease the residual by that much. */ cnt += resid & ~0x3; resid -= resid & ~0x3; ptr = buf; first_pass = 1; /* * Save the old window base value. */ origwin = CSR_READ_4(sc, TI_WINBASE); while (cnt) { bus_size_t ti_offset; if (cnt < TI_WINLEN) segsize = cnt; else segsize = TI_WINLEN - (segptr % TI_WINLEN); CSR_WRITE_4(sc, TI_WINBASE, rounddown2(segptr, TI_WINLEN)); ti_offset = TI_WINDOW + (segptr & (TI_WINLEN -1)); if (readdata) { bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, (uint32_t *)sc->ti_membuf, segsize >> 2); if (useraddr) { /* * Yeah, this is a little on the kludgy * side, but at least this code is only * used for debugging. */ ti_bcopy_swap(sc->ti_membuf, sc->ti_membuf2, segsize, TI_SWAP_NTOH); TI_UNLOCK(sc); if (first_pass) { copyout(&sc->ti_membuf2[segresid], ptr, segsize - segresid); first_pass = 0; } else copyout(sc->ti_membuf2, ptr, segsize); TI_LOCK(sc); } else { if (first_pass) { ti_bcopy_swap(sc->ti_membuf, sc->ti_membuf2, segsize, TI_SWAP_NTOH); TI_UNLOCK(sc); bcopy(&sc->ti_membuf2[segresid], ptr, segsize - segresid); TI_LOCK(sc); first_pass = 0; } else ti_bcopy_swap(sc->ti_membuf, ptr, segsize, TI_SWAP_NTOH); } } else { if (useraddr) { TI_UNLOCK(sc); copyin(ptr, sc->ti_membuf2, segsize); TI_LOCK(sc); ti_bcopy_swap(sc->ti_membuf2, sc->ti_membuf, segsize, TI_SWAP_HTON); } else ti_bcopy_swap(ptr, sc->ti_membuf, segsize, TI_SWAP_HTON); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, (uint32_t *)sc->ti_membuf, segsize >> 2); } segptr += segsize; ptr += segsize; cnt -= segsize; } /* * Handle leftover, non-word-aligned bytes. */ if (resid != 0) { uint32_t tmpval, tmpval2; bus_size_t ti_offset; /* * Set the segment pointer. */ CSR_WRITE_4(sc, TI_WINBASE, rounddown2(segptr, TI_WINLEN)); ti_offset = TI_WINDOW + (segptr & (TI_WINLEN - 1)); /* * First, grab whatever is in our source/destination. * We'll obviously need this for reads, but also for * writes, since we'll be doing read/modify/write. */ bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, &tmpval, 1); /* * Next, translate this from little-endian to big-endian * (at least on i386 boxes). */ tmpval2 = ntohl(tmpval); if (readdata) { /* * If we're reading, just copy the leftover number * of bytes from the host byte order buffer to * the user's buffer. */ if (useraddr) { TI_UNLOCK(sc); copyout(&tmpval2, ptr, resid); TI_LOCK(sc); } else bcopy(&tmpval2, ptr, resid); } else { /* * If we're writing, first copy the bytes to be * written into the network byte order buffer, * leaving the rest of the buffer with whatever was * originally in there. Then, swap the bytes * around into host order and write them out. * * XXX KDM the read side of this has been verified * to work, but the write side of it has not been * verified. So user beware. */ if (useraddr) { TI_UNLOCK(sc); copyin(ptr, &tmpval2, resid); TI_LOCK(sc); } else bcopy(ptr, &tmpval2, resid); tmpval = htonl(tmpval2); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, ti_offset, &tmpval, 1); } } CSR_WRITE_4(sc, TI_WINBASE, origwin); return (0); } static int ti_copy_scratch(struct ti_softc *sc, uint32_t tigon_addr, uint32_t len, caddr_t buf, int useraddr, int readdata, int cpu) { uint32_t segptr; int cnt; uint32_t tmpval, tmpval2; caddr_t ptr; TI_LOCK_ASSERT(sc); /* * At the moment, we don't handle non-aligned cases, we just bail. * If this proves to be a problem, it will be fixed. */ if (tigon_addr & 0x3) { device_printf(sc->ti_dev, "%s: tigon address %#x " "isn't word-aligned\n", __func__, tigon_addr); return (EINVAL); } if (len & 0x3) { device_printf(sc->ti_dev, "%s: transfer length %d " "isn't word-aligned\n", __func__, len); return (EINVAL); } segptr = tigon_addr; cnt = len; ptr = buf; while (cnt) { CSR_WRITE_4(sc, CPU_REG(TI_SRAM_ADDR, cpu), segptr); if (readdata) { tmpval2 = CSR_READ_4(sc, CPU_REG(TI_SRAM_DATA, cpu)); tmpval = ntohl(tmpval2); /* * Note: I've used this debugging interface * extensively with Alteon's 12.3.15 firmware, * compiled with GCC 2.7.2.1 and binutils 2.9.1. * * When you compile the firmware without * optimization, which is necessary sometimes in * order to properly step through it, you sometimes * read out a bogus value of 0xc0017c instead of * whatever was supposed to be in that scratchpad * location. That value is on the stack somewhere, * but I've never been able to figure out what was * causing the problem. * * The address seems to pop up in random places, * often not in the same place on two subsequent * reads. * * In any case, the underlying data doesn't seem * to be affected, just the value read out. * * KDM, 3/7/2000 */ if (tmpval2 == 0xc0017c) device_printf(sc->ti_dev, "found 0xc0017c at " "%#x (tmpval2)\n", segptr); if (tmpval == 0xc0017c) device_printf(sc->ti_dev, "found 0xc0017c at " "%#x (tmpval)\n", segptr); if (useraddr) copyout(&tmpval, ptr, 4); else bcopy(&tmpval, ptr, 4); } else { if (useraddr) copyin(ptr, &tmpval2, 4); else bcopy(ptr, &tmpval2, 4); tmpval = htonl(tmpval2); CSR_WRITE_4(sc, CPU_REG(TI_SRAM_DATA, cpu), tmpval); } cnt -= 4; segptr += 4; ptr += 4; } return (0); } static int ti_bcopy_swap(const void *src, void *dst, size_t len, ti_swap_type swap_type) { const uint8_t *tmpsrc; uint8_t *tmpdst; size_t tmplen; if (len & 0x3) { printf("ti_bcopy_swap: length %zd isn't 32-bit aligned\n", len); return (-1); } tmpsrc = src; tmpdst = dst; tmplen = len; while (tmplen) { if (swap_type == TI_SWAP_NTOH) *(uint32_t *)tmpdst = ntohl(*(const uint32_t *)tmpsrc); else *(uint32_t *)tmpdst = htonl(*(const uint32_t *)tmpsrc); tmpsrc += 4; tmpdst += 4; tmplen -= 4; } return (0); } /* * Load firmware image into the NIC. Check that the firmware revision * is acceptable and see if we want the firmware for the Tigon 1 or * Tigon 2. */ static void ti_loadfw(struct ti_softc *sc) { TI_LOCK_ASSERT(sc); switch (sc->ti_hwrev) { case TI_HWREV_TIGON: if (tigonFwReleaseMajor != TI_FIRMWARE_MAJOR || tigonFwReleaseMinor != TI_FIRMWARE_MINOR || tigonFwReleaseFix != TI_FIRMWARE_FIX) { device_printf(sc->ti_dev, "firmware revision mismatch; " "want %d.%d.%d, got %d.%d.%d\n", TI_FIRMWARE_MAJOR, TI_FIRMWARE_MINOR, TI_FIRMWARE_FIX, tigonFwReleaseMajor, tigonFwReleaseMinor, tigonFwReleaseFix); return; } ti_mem_write(sc, tigonFwTextAddr, tigonFwTextLen, tigonFwText); ti_mem_write(sc, tigonFwDataAddr, tigonFwDataLen, tigonFwData); ti_mem_write(sc, tigonFwRodataAddr, tigonFwRodataLen, tigonFwRodata); ti_mem_zero(sc, tigonFwBssAddr, tigonFwBssLen); ti_mem_zero(sc, tigonFwSbssAddr, tigonFwSbssLen); CSR_WRITE_4(sc, TI_CPU_PROGRAM_COUNTER, tigonFwStartAddr); break; case TI_HWREV_TIGON_II: if (tigon2FwReleaseMajor != TI_FIRMWARE_MAJOR || tigon2FwReleaseMinor != TI_FIRMWARE_MINOR || tigon2FwReleaseFix != TI_FIRMWARE_FIX) { device_printf(sc->ti_dev, "firmware revision mismatch; " "want %d.%d.%d, got %d.%d.%d\n", TI_FIRMWARE_MAJOR, TI_FIRMWARE_MINOR, TI_FIRMWARE_FIX, tigon2FwReleaseMajor, tigon2FwReleaseMinor, tigon2FwReleaseFix); return; } ti_mem_write(sc, tigon2FwTextAddr, tigon2FwTextLen, tigon2FwText); ti_mem_write(sc, tigon2FwDataAddr, tigon2FwDataLen, tigon2FwData); ti_mem_write(sc, tigon2FwRodataAddr, tigon2FwRodataLen, tigon2FwRodata); ti_mem_zero(sc, tigon2FwBssAddr, tigon2FwBssLen); ti_mem_zero(sc, tigon2FwSbssAddr, tigon2FwSbssLen); CSR_WRITE_4(sc, TI_CPU_PROGRAM_COUNTER, tigon2FwStartAddr); break; default: device_printf(sc->ti_dev, "can't load firmware: unknown hardware rev\n"); break; } } /* * Send the NIC a command via the command ring. */ static void ti_cmd(struct ti_softc *sc, struct ti_cmd_desc *cmd) { int index; index = sc->ti_cmd_saved_prodidx; CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(cmd)); TI_INC(index, TI_CMD_RING_CNT); CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, index); sc->ti_cmd_saved_prodidx = index; } /* * Send the NIC an extended command. The 'len' parameter specifies the * number of command slots to include after the initial command. */ static void ti_cmd_ext(struct ti_softc *sc, struct ti_cmd_desc *cmd, caddr_t arg, int len) { int index; int i; index = sc->ti_cmd_saved_prodidx; CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(cmd)); TI_INC(index, TI_CMD_RING_CNT); for (i = 0; i < len; i++) { CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(&arg[i * 4])); TI_INC(index, TI_CMD_RING_CNT); } CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, index); sc->ti_cmd_saved_prodidx = index; } /* * Handle events that have triggered interrupts. */ static void ti_handle_events(struct ti_softc *sc) { struct ti_event_desc *e; if (sc->ti_rdata.ti_event_ring == NULL) return; bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag, sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_POSTREAD); while (sc->ti_ev_saved_considx != sc->ti_ev_prodidx.ti_idx) { e = &sc->ti_rdata.ti_event_ring[sc->ti_ev_saved_considx]; switch (TI_EVENT_EVENT(e)) { case TI_EV_LINKSTAT_CHANGED: sc->ti_linkstat = TI_EVENT_CODE(e); if (sc->ti_linkstat == TI_EV_CODE_LINK_UP) { if_link_state_change(sc->ti_ifp, LINK_STATE_UP); sc->ti_ifp->if_baudrate = IF_Mbps(100); if (bootverbose) device_printf(sc->ti_dev, "10/100 link up\n"); } else if (sc->ti_linkstat == TI_EV_CODE_GIG_LINK_UP) { if_link_state_change(sc->ti_ifp, LINK_STATE_UP); sc->ti_ifp->if_baudrate = IF_Gbps(1UL); if (bootverbose) device_printf(sc->ti_dev, "gigabit link up\n"); } else if (sc->ti_linkstat == TI_EV_CODE_LINK_DOWN) { if_link_state_change(sc->ti_ifp, LINK_STATE_DOWN); sc->ti_ifp->if_baudrate = 0; if (bootverbose) device_printf(sc->ti_dev, "link down\n"); } break; case TI_EV_ERROR: if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_INVAL_CMD) device_printf(sc->ti_dev, "invalid command\n"); else if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_UNIMP_CMD) device_printf(sc->ti_dev, "unknown command\n"); else if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_BADCFG) device_printf(sc->ti_dev, "bad config data\n"); break; case TI_EV_FIRMWARE_UP: ti_init2(sc); break; case TI_EV_STATS_UPDATED: case TI_EV_RESET_JUMBO_RING: case TI_EV_MCAST_UPDATED: /* Who cares. */ break; default: device_printf(sc->ti_dev, "unknown event: %d\n", TI_EVENT_EVENT(e)); break; } /* Advance the consumer index. */ TI_INC(sc->ti_ev_saved_considx, TI_EVENT_RING_CNT); CSR_WRITE_4(sc, TI_GCR_EVENTCONS_IDX, sc->ti_ev_saved_considx); } bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag, sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_PREREAD); } struct ti_dmamap_arg { bus_addr_t ti_busaddr; }; static void ti_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct ti_dmamap_arg *ctx; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); ctx = arg; ctx->ti_busaddr = segs->ds_addr; } static int ti_dma_ring_alloc(struct ti_softc *sc, bus_size_t alignment, bus_size_t maxsize, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t *map, bus_addr_t *paddr, const char *msg) { struct ti_dmamap_arg ctx; int error; error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag); if (error != 0) { device_printf(sc->ti_dev, "could not create %s dma tag\n", msg); return (error); } /* Allocate DMA'able memory for ring. */ error = bus_dmamem_alloc(*tag, (void **)ring, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, map); if (error != 0) { device_printf(sc->ti_dev, "could not allocate DMA'able memory for %s\n", msg); return (error); } /* Load the address of the ring. */ ctx.ti_busaddr = 0; error = bus_dmamap_load(*tag, *map, *ring, maxsize, ti_dma_map_addr, &ctx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->ti_dev, "could not load DMA'able memory for %s\n", msg); return (error); } *paddr = ctx.ti_busaddr; return (0); } static void ti_dma_ring_free(struct ti_softc *sc, bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t map, bus_addr_t *paddr) { if (*paddr != 0) { bus_dmamap_unload(*tag, map); *paddr = 0; } if (*ring != NULL) { bus_dmamem_free(*tag, *ring, map); *ring = NULL; } if (*tag) { bus_dma_tag_destroy(*tag); *tag = NULL; } } static int ti_dma_alloc(struct ti_softc *sc) { bus_addr_t lowaddr; int i, error; lowaddr = BUS_SPACE_MAXADDR; if (sc->ti_dac == 0) lowaddr = BUS_SPACE_MAXADDR_32BIT; error = bus_dma_tag_create(bus_get_dma_tag(sc->ti_dev), 1, 0, lowaddr, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->ti_cdata.ti_parent_tag); if (error != 0) { device_printf(sc->ti_dev, "could not allocate parent dma tag\n"); return (ENOMEM); } error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, sizeof(struct ti_gib), &sc->ti_cdata.ti_gib_tag, (uint8_t **)&sc->ti_rdata.ti_info, &sc->ti_cdata.ti_gib_map, &sc->ti_rdata.ti_info_paddr, "GIB"); if (error) return (error); /* Producer/consumer status */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, sizeof(struct ti_status), &sc->ti_cdata.ti_status_tag, (uint8_t **)&sc->ti_rdata.ti_status, &sc->ti_cdata.ti_status_map, &sc->ti_rdata.ti_status_paddr, "event ring"); if (error) return (error); /* Event ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_EVENT_RING_SZ, &sc->ti_cdata.ti_event_ring_tag, (uint8_t **)&sc->ti_rdata.ti_event_ring, &sc->ti_cdata.ti_event_ring_map, &sc->ti_rdata.ti_event_ring_paddr, "event ring"); if (error) return (error); /* Command ring lives in shared memory so no need to create DMA area. */ /* Standard RX ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_STD_RX_RING_SZ, &sc->ti_cdata.ti_rx_std_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_std_ring, &sc->ti_cdata.ti_rx_std_ring_map, &sc->ti_rdata.ti_rx_std_ring_paddr, "RX ring"); if (error) return (error); /* Jumbo RX ring */ error = ti_dma_ring_alloc(sc, TI_JUMBO_RING_ALIGN, TI_JUMBO_RX_RING_SZ, &sc->ti_cdata.ti_rx_jumbo_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_jumbo_ring, &sc->ti_cdata.ti_rx_jumbo_ring_map, &sc->ti_rdata.ti_rx_jumbo_ring_paddr, "jumbo RX ring"); if (error) return (error); /* RX return ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_RX_RETURN_RING_SZ, &sc->ti_cdata.ti_rx_return_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_return_ring, &sc->ti_cdata.ti_rx_return_ring_map, &sc->ti_rdata.ti_rx_return_ring_paddr, "RX return ring"); if (error) return (error); /* Create DMA tag for standard RX mbufs. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->ti_cdata.ti_rx_std_tag); if (error) { device_printf(sc->ti_dev, "could not allocate RX dma tag\n"); return (error); } /* Create DMA tag for jumbo RX mbufs. */ #ifdef TI_SF_BUF_JUMBO /* * The VM system will take care of providing aligned pages. Alignment * is set to 1 here so that busdma resources won't be wasted. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE * 4, 4, PAGE_SIZE, 0, NULL, NULL, &sc->ti_cdata.ti_rx_jumbo_tag); #else error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL, &sc->ti_cdata.ti_rx_jumbo_tag); #endif if (error) { device_printf(sc->ti_dev, "could not allocate jumbo RX dma tag\n"); return (error); } /* Create DMA tag for TX mbufs. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * TI_MAXTXSEGS, TI_MAXTXSEGS, MCLBYTES, 0, NULL, NULL, &sc->ti_cdata.ti_tx_tag); if (error) { device_printf(sc->ti_dev, "could not allocate TX dma tag\n"); return (ENOMEM); } /* Create DMA maps for RX buffers. */ for (i = 0; i < TI_STD_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_rx_std_tag, 0, &sc->ti_cdata.ti_rx_std_maps[i]); if (error) { device_printf(sc->ti_dev, "could not create DMA map for RX\n"); return (error); } } error = bus_dmamap_create(sc->ti_cdata.ti_rx_std_tag, 0, &sc->ti_cdata.ti_rx_std_sparemap); if (error) { device_printf(sc->ti_dev, "could not create spare DMA map for RX\n"); return (error); } /* Create DMA maps for jumbo RX buffers. */ for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_rx_jumbo_tag, 0, &sc->ti_cdata.ti_rx_jumbo_maps[i]); if (error) { device_printf(sc->ti_dev, "could not create DMA map for jumbo RX\n"); return (error); } } error = bus_dmamap_create(sc->ti_cdata.ti_rx_jumbo_tag, 0, &sc->ti_cdata.ti_rx_jumbo_sparemap); if (error) { device_printf(sc->ti_dev, "could not create spare DMA map for jumbo RX\n"); return (error); } /* Create DMA maps for TX buffers. */ for (i = 0; i < TI_TX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_tx_tag, 0, &sc->ti_cdata.ti_txdesc[i].tx_dmamap); if (error) { device_printf(sc->ti_dev, "could not create DMA map for TX\n"); return (ENOMEM); } } /* Mini ring and TX ring is not available on Tigon 1. */ if (sc->ti_hwrev == TI_HWREV_TIGON) return (0); /* TX ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_TX_RING_SZ, &sc->ti_cdata.ti_tx_ring_tag, (uint8_t **)&sc->ti_rdata.ti_tx_ring, &sc->ti_cdata.ti_tx_ring_map, &sc->ti_rdata.ti_tx_ring_paddr, "TX ring"); if (error) return (error); /* Mini RX ring */ error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_MINI_RX_RING_SZ, &sc->ti_cdata.ti_rx_mini_ring_tag, (uint8_t **)&sc->ti_rdata.ti_rx_mini_ring, &sc->ti_cdata.ti_rx_mini_ring_map, &sc->ti_rdata.ti_rx_mini_ring_paddr, "mini RX ring"); if (error) return (error); /* Create DMA tag for mini RX mbufs. */ error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MHLEN, 1, MHLEN, 0, NULL, NULL, &sc->ti_cdata.ti_rx_mini_tag); if (error) { device_printf(sc->ti_dev, "could not allocate mini RX dma tag\n"); return (error); } /* Create DMA maps for mini RX buffers. */ for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { error = bus_dmamap_create(sc->ti_cdata.ti_rx_mini_tag, 0, &sc->ti_cdata.ti_rx_mini_maps[i]); if (error) { device_printf(sc->ti_dev, "could not create DMA map for mini RX\n"); return (error); } } error = bus_dmamap_create(sc->ti_cdata.ti_rx_mini_tag, 0, &sc->ti_cdata.ti_rx_mini_sparemap); if (error) { device_printf(sc->ti_dev, "could not create spare DMA map for mini RX\n"); return (error); } return (0); } static void ti_dma_free(struct ti_softc *sc) { int i; /* Destroy DMA maps for RX buffers. */ for (i = 0; i < TI_STD_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_std_maps[i]) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i]); sc->ti_cdata.ti_rx_std_maps[i] = NULL; } } if (sc->ti_cdata.ti_rx_std_sparemap) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_sparemap); sc->ti_cdata.ti_rx_std_sparemap = NULL; } if (sc->ti_cdata.ti_rx_std_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_rx_std_tag); sc->ti_cdata.ti_rx_std_tag = NULL; } /* Destroy DMA maps for jumbo RX buffers. */ for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_jumbo_maps[i]) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i]); sc->ti_cdata.ti_rx_jumbo_maps[i] = NULL; } } if (sc->ti_cdata.ti_rx_jumbo_sparemap) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_sparemap); sc->ti_cdata.ti_rx_jumbo_sparemap = NULL; } if (sc->ti_cdata.ti_rx_jumbo_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_rx_jumbo_tag); sc->ti_cdata.ti_rx_jumbo_tag = NULL; } /* Destroy DMA maps for mini RX buffers. */ for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_mini_maps[i]) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i]); sc->ti_cdata.ti_rx_mini_maps[i] = NULL; } } if (sc->ti_cdata.ti_rx_mini_sparemap) { bus_dmamap_destroy(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_sparemap); sc->ti_cdata.ti_rx_mini_sparemap = NULL; } if (sc->ti_cdata.ti_rx_mini_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_rx_mini_tag); sc->ti_cdata.ti_rx_mini_tag = NULL; } /* Destroy DMA maps for TX buffers. */ for (i = 0; i < TI_TX_RING_CNT; i++) { if (sc->ti_cdata.ti_txdesc[i].tx_dmamap) { bus_dmamap_destroy(sc->ti_cdata.ti_tx_tag, sc->ti_cdata.ti_txdesc[i].tx_dmamap); sc->ti_cdata.ti_txdesc[i].tx_dmamap = NULL; } } if (sc->ti_cdata.ti_tx_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_tx_tag); sc->ti_cdata.ti_tx_tag = NULL; } /* Destroy standard RX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_std_ring_tag, (void *)&sc->ti_rdata.ti_rx_std_ring, sc->ti_cdata.ti_rx_std_ring_map, &sc->ti_rdata.ti_rx_std_ring_paddr); /* Destroy jumbo RX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_jumbo_ring_tag, (void *)&sc->ti_rdata.ti_rx_jumbo_ring, sc->ti_cdata.ti_rx_jumbo_ring_map, &sc->ti_rdata.ti_rx_jumbo_ring_paddr); /* Destroy mini RX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_mini_ring_tag, (void *)&sc->ti_rdata.ti_rx_mini_ring, sc->ti_cdata.ti_rx_mini_ring_map, &sc->ti_rdata.ti_rx_mini_ring_paddr); /* Destroy RX return ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_return_ring_tag, (void *)&sc->ti_rdata.ti_rx_return_ring, sc->ti_cdata.ti_rx_return_ring_map, &sc->ti_rdata.ti_rx_return_ring_paddr); /* Destroy TX ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_tx_ring_tag, (void *)&sc->ti_rdata.ti_tx_ring, sc->ti_cdata.ti_tx_ring_map, &sc->ti_rdata.ti_tx_ring_paddr); /* Destroy status block. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_status_tag, (void *)&sc->ti_rdata.ti_status, sc->ti_cdata.ti_status_map, &sc->ti_rdata.ti_status_paddr); /* Destroy event ring. */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_event_ring_tag, (void *)&sc->ti_rdata.ti_event_ring, sc->ti_cdata.ti_event_ring_map, &sc->ti_rdata.ti_event_ring_paddr); /* Destroy GIB */ ti_dma_ring_free(sc, &sc->ti_cdata.ti_gib_tag, (void *)&sc->ti_rdata.ti_info, sc->ti_cdata.ti_gib_map, &sc->ti_rdata.ti_info_paddr); /* Destroy the parent tag. */ if (sc->ti_cdata.ti_parent_tag) { bus_dma_tag_destroy(sc->ti_cdata.ti_parent_tag); sc->ti_cdata.ti_parent_tag = NULL; } } /* * Intialize a standard receive ring descriptor. */ static int ti_newbuf_std(struct ti_softc *sc, int i) { bus_dmamap_t map; bus_dma_segment_t segs[1]; struct mbuf *m; struct ti_rx_desc *r; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MCLBYTES; m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (sc->ti_cdata.ti_rx_std_chain[i] != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i]); } map = sc->ti_cdata.ti_rx_std_maps[i]; sc->ti_cdata.ti_rx_std_maps[i] = sc->ti_cdata.ti_rx_std_sparemap; sc->ti_cdata.ti_rx_std_sparemap = map; sc->ti_cdata.ti_rx_std_chain[i] = m; r = &sc->ti_rdata.ti_rx_std_ring[i]; ti_hostaddr64(&r->ti_addr, segs[0].ds_addr); r->ti_len = segs[0].ds_len; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = 0; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, sc->ti_cdata.ti_rx_std_maps[i], BUS_DMASYNC_PREREAD); return (0); } /* * Intialize a mini receive ring descriptor. This only applies to * the Tigon 2. */ static int ti_newbuf_mini(struct ti_softc *sc, int i) { bus_dmamap_t map; bus_dma_segment_t segs[1]; struct mbuf *m; struct ti_rx_desc *r; int error, nsegs; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MHLEN; m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (sc->ti_cdata.ti_rx_mini_chain[i] != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i]); } map = sc->ti_cdata.ti_rx_mini_maps[i]; sc->ti_cdata.ti_rx_mini_maps[i] = sc->ti_cdata.ti_rx_mini_sparemap; sc->ti_cdata.ti_rx_mini_sparemap = map; sc->ti_cdata.ti_rx_mini_chain[i] = m; r = &sc->ti_rdata.ti_rx_mini_ring[i]; ti_hostaddr64(&r->ti_addr, segs[0].ds_addr); r->ti_len = segs[0].ds_len; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = TI_BDFLAG_MINI_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, sc->ti_cdata.ti_rx_mini_maps[i], BUS_DMASYNC_PREREAD); return (0); } #ifndef TI_SF_BUF_JUMBO /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int ti_newbuf_jumbo(struct ti_softc *sc, int i, struct mbuf *dummy) { bus_dmamap_t map; bus_dma_segment_t segs[1]; struct mbuf *m; struct ti_rx_desc *r; int error, nsegs; (void)dummy; m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES); if (m == NULL) return (ENOBUFS); m->m_len = m->m_pkthdr.len = MJUM9BYTES; m_adj(m, ETHER_ALIGN); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_sparemap, m, segs, &nsegs, 0); if (error != 0) { m_freem(m); return (error); } KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); if (sc->ti_cdata.ti_rx_jumbo_chain[i] != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i], BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i]); } map = sc->ti_cdata.ti_rx_jumbo_maps[i]; sc->ti_cdata.ti_rx_jumbo_maps[i] = sc->ti_cdata.ti_rx_jumbo_sparemap; sc->ti_cdata.ti_rx_jumbo_sparemap = map; sc->ti_cdata.ti_rx_jumbo_chain[i] = m; r = &sc->ti_rdata.ti_rx_jumbo_ring[i]; ti_hostaddr64(&r->ti_addr, segs[0].ds_addr); r->ti_len = segs[0].ds_len; r->ti_type = TI_BDTYPE_RECV_JUMBO_BD; r->ti_flags = TI_BDFLAG_JUMBO_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, sc->ti_cdata.ti_rx_jumbo_maps[i], BUS_DMASYNC_PREREAD); return (0); } #else #if (PAGE_SIZE == 4096) #define NPAYLOAD 2 #else #define NPAYLOAD 1 #endif #define TCP_HDR_LEN (52 + sizeof(struct ether_header)) #define UDP_HDR_LEN (28 + sizeof(struct ether_header)) #define NFS_HDR_LEN (UDP_HDR_LEN) static int HDR_LEN = TCP_HDR_LEN; /* * Initialize a jumbo receive ring descriptor. This allocates * a jumbo buffer from the pool managed internally by the driver. */ static int ti_newbuf_jumbo(struct ti_softc *sc, int idx, struct mbuf *m_old) { bus_dmamap_t map; struct mbuf *cur, *m_new = NULL; struct mbuf *m[3] = {NULL, NULL, NULL}; struct ti_rx_desc_ext *r; vm_page_t frame; /* 1 extra buf to make nobufs easy*/ struct sf_buf *sf[3] = {NULL, NULL, NULL}; int i; bus_dma_segment_t segs[4]; int nsegs; if (m_old != NULL) { m_new = m_old; cur = m_old->m_next; for (i = 0; i <= NPAYLOAD; i++){ m[i] = cur; cur = cur->m_next; } } else { /* Allocate the mbufs. */ MGETHDR(m_new, M_NOWAIT, MT_DATA); if (m_new == NULL) { device_printf(sc->ti_dev, "mbuf allocation failed " "-- packet dropped!\n"); goto nobufs; } MGET(m[NPAYLOAD], M_NOWAIT, MT_DATA); if (m[NPAYLOAD] == NULL) { device_printf(sc->ti_dev, "cluster mbuf allocation " "failed -- packet dropped!\n"); goto nobufs; } if (!(MCLGET(m[NPAYLOAD], M_NOWAIT))) { device_printf(sc->ti_dev, "mbuf allocation failed " "-- packet dropped!\n"); goto nobufs; } m[NPAYLOAD]->m_len = MCLBYTES; for (i = 0; i < NPAYLOAD; i++){ MGET(m[i], M_NOWAIT, MT_DATA); if (m[i] == NULL) { device_printf(sc->ti_dev, "mbuf allocation " "failed -- packet dropped!\n"); goto nobufs; } frame = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (frame == NULL) { device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); printf(" index %d page %d\n", idx, i); goto nobufs; } sf[i] = sf_buf_alloc(frame, SFB_NOWAIT); if (sf[i] == NULL) { - vm_page_unwire(frame, PQ_INACTIVE); + vm_page_unwire(frame, PQ_NONE); vm_page_free(frame); device_printf(sc->ti_dev, "buffer allocation " "failed -- packet dropped!\n"); printf(" index %d page %d\n", idx, i); goto nobufs; } } for (i = 0; i < NPAYLOAD; i++){ /* Attach the buffer to the mbuf. */ m[i]->m_data = (void *)sf_buf_kva(sf[i]); m[i]->m_len = PAGE_SIZE; MEXTADD(m[i], sf_buf_kva(sf[i]), PAGE_SIZE, sf_mext_free, (void*)sf_buf_kva(sf[i]), sf[i], 0, EXT_DISPOSABLE); m[i]->m_next = m[i+1]; } /* link the buffers to the header */ m_new->m_next = m[0]; m_new->m_data += ETHER_ALIGN; if (sc->ti_hdrsplit) m_new->m_len = MHLEN - ETHER_ALIGN; else m_new->m_len = HDR_LEN; m_new->m_pkthdr.len = NPAYLOAD * PAGE_SIZE + m_new->m_len; } /* Set up the descriptor. */ r = &sc->ti_rdata.ti_rx_jumbo_ring[idx]; sc->ti_cdata.ti_rx_jumbo_chain[idx] = m_new; map = sc->ti_cdata.ti_rx_jumbo_maps[i]; if (bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_jumbo_tag, map, m_new, segs, &nsegs, 0)) return (ENOBUFS); if ((nsegs < 1) || (nsegs > 4)) return (ENOBUFS); ti_hostaddr64(&r->ti_addr0, segs[0].ds_addr); r->ti_len0 = m_new->m_len; ti_hostaddr64(&r->ti_addr1, segs[1].ds_addr); r->ti_len1 = PAGE_SIZE; ti_hostaddr64(&r->ti_addr2, segs[2].ds_addr); r->ti_len2 = m[1]->m_ext.ext_size; /* could be PAGE_SIZE or MCLBYTES */ if (PAGE_SIZE == 4096) { ti_hostaddr64(&r->ti_addr3, segs[3].ds_addr); r->ti_len3 = MCLBYTES; } else { r->ti_len3 = 0; } r->ti_type = TI_BDTYPE_RECV_JUMBO_BD; r->ti_flags = TI_BDFLAG_JUMBO_RING|TI_RCB_FLAG_USE_EXT_RX_BD; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM|TI_BDFLAG_IP_CKSUM; r->ti_idx = idx; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_PREREAD); return (0); nobufs: /* * Warning! : * This can only be called before the mbufs are strung together. * If the mbufs are strung together, m_freem() will free the chain, * so that the later mbufs will be freed multiple times. */ if (m_new) m_freem(m_new); for (i = 0; i < 3; i++) { if (m[i]) m_freem(m[i]); if (sf[i]) sf_mext_free((void *)sf_buf_kva(sf[i]), sf[i]); } return (ENOBUFS); } #endif /* * The standard receive ring has 512 entries in it. At 2K per mbuf cluster, * that's 1MB or memory, which is a lot. For now, we fill only the first * 256 ring entries and hope that our CPU is fast enough to keep up with * the NIC. */ static int ti_init_rx_ring_std(struct ti_softc *sc) { int i; struct ti_cmd_desc cmd; for (i = 0; i < TI_STD_RX_RING_CNT; i++) { if (ti_newbuf_std(sc, i) != 0) return (ENOBUFS); } sc->ti_std = TI_STD_RX_RING_CNT - 1; TI_UPDATE_STDPROD(sc, TI_STD_RX_RING_CNT - 1); return (0); } static void ti_free_rx_ring_std(struct ti_softc *sc) { bus_dmamap_t map; int i; for (i = 0; i < TI_STD_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_std_chain[i] != NULL) { map = sc->ti_cdata.ti_rx_std_maps[i]; bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_std_tag, map); m_freem(sc->ti_cdata.ti_rx_std_chain[i]); sc->ti_cdata.ti_rx_std_chain[i] = NULL; } } bzero(sc->ti_rdata.ti_rx_std_ring, TI_STD_RX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag, sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_PREWRITE); } static int ti_init_rx_ring_jumbo(struct ti_softc *sc) { struct ti_cmd_desc cmd; int i; for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { if (ti_newbuf_jumbo(sc, i, NULL) != 0) return (ENOBUFS); } sc->ti_jumbo = TI_JUMBO_RX_RING_CNT - 1; TI_UPDATE_JUMBOPROD(sc, TI_JUMBO_RX_RING_CNT - 1); return (0); } static void ti_free_rx_ring_jumbo(struct ti_softc *sc) { bus_dmamap_t map; int i; for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_jumbo_chain[i] != NULL) { map = sc->ti_cdata.ti_rx_jumbo_maps[i]; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, map); m_freem(sc->ti_cdata.ti_rx_jumbo_chain[i]); sc->ti_cdata.ti_rx_jumbo_chain[i] = NULL; } } bzero(sc->ti_rdata.ti_rx_jumbo_ring, TI_JUMBO_RX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag, sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); } static int ti_init_rx_ring_mini(struct ti_softc *sc) { int i; for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { if (ti_newbuf_mini(sc, i) != 0) return (ENOBUFS); } sc->ti_mini = TI_MINI_RX_RING_CNT - 1; TI_UPDATE_MINIPROD(sc, TI_MINI_RX_RING_CNT - 1); return (0); } static void ti_free_rx_ring_mini(struct ti_softc *sc) { bus_dmamap_t map; int i; if (sc->ti_rdata.ti_rx_mini_ring == NULL) return; for (i = 0; i < TI_MINI_RX_RING_CNT; i++) { if (sc->ti_cdata.ti_rx_mini_chain[i] != NULL) { map = sc->ti_cdata.ti_rx_mini_maps[i]; bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_mini_tag, map); m_freem(sc->ti_cdata.ti_rx_mini_chain[i]); sc->ti_cdata.ti_rx_mini_chain[i] = NULL; } } bzero(sc->ti_rdata.ti_rx_mini_ring, TI_MINI_RX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag, sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_PREWRITE); } static void ti_free_tx_ring(struct ti_softc *sc) { struct ti_txdesc *txd; int i; if (sc->ti_rdata.ti_tx_ring == NULL) return; for (i = 0; i < TI_TX_RING_CNT; i++) { txd = &sc->ti_cdata.ti_txdesc[i]; if (txd->tx_m != NULL) { bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap); m_freem(txd->tx_m); txd->tx_m = NULL; } } bzero(sc->ti_rdata.ti_tx_ring, TI_TX_RING_SZ); bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE); } static int ti_init_tx_ring(struct ti_softc *sc) { struct ti_txdesc *txd; int i; STAILQ_INIT(&sc->ti_cdata.ti_txfreeq); STAILQ_INIT(&sc->ti_cdata.ti_txbusyq); for (i = 0; i < TI_TX_RING_CNT; i++) { txd = &sc->ti_cdata.ti_txdesc[i]; STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txfreeq, txd, tx_q); } sc->ti_txcnt = 0; sc->ti_tx_saved_considx = 0; sc->ti_tx_saved_prodidx = 0; CSR_WRITE_4(sc, TI_MB_SENDPROD_IDX, 0); return (0); } /* * The Tigon 2 firmware has a new way to add/delete multicast addresses, * but we have to support the old way too so that Tigon 1 cards will * work. */ static void ti_add_mcast(struct ti_softc *sc, struct ether_addr *addr) { struct ti_cmd_desc cmd; uint16_t *m; uint32_t ext[2] = {0, 0}; m = (uint16_t *)&addr->octet[0]; switch (sc->ti_hwrev) { case TI_HWREV_TIGON: CSR_WRITE_4(sc, TI_GCR_MAR0, htons(m[0])); CSR_WRITE_4(sc, TI_GCR_MAR1, (htons(m[1]) << 16) | htons(m[2])); TI_DO_CMD(TI_CMD_ADD_MCAST_ADDR, 0, 0); break; case TI_HWREV_TIGON_II: ext[0] = htons(m[0]); ext[1] = (htons(m[1]) << 16) | htons(m[2]); TI_DO_CMD_EXT(TI_CMD_EXT_ADD_MCAST, 0, 0, (caddr_t)&ext, 2); break; default: device_printf(sc->ti_dev, "unknown hwrev\n"); break; } } static void ti_del_mcast(struct ti_softc *sc, struct ether_addr *addr) { struct ti_cmd_desc cmd; uint16_t *m; uint32_t ext[2] = {0, 0}; m = (uint16_t *)&addr->octet[0]; switch (sc->ti_hwrev) { case TI_HWREV_TIGON: CSR_WRITE_4(sc, TI_GCR_MAR0, htons(m[0])); CSR_WRITE_4(sc, TI_GCR_MAR1, (htons(m[1]) << 16) | htons(m[2])); TI_DO_CMD(TI_CMD_DEL_MCAST_ADDR, 0, 0); break; case TI_HWREV_TIGON_II: ext[0] = htons(m[0]); ext[1] = (htons(m[1]) << 16) | htons(m[2]); TI_DO_CMD_EXT(TI_CMD_EXT_DEL_MCAST, 0, 0, (caddr_t)&ext, 2); break; default: device_printf(sc->ti_dev, "unknown hwrev\n"); break; } } /* * Configure the Tigon's multicast address filter. * * The actual multicast table management is a bit of a pain, thanks to * slight brain damage on the part of both Alteon and us. With our * multicast code, we are only alerted when the multicast address table * changes and at that point we only have the current list of addresses: * we only know the current state, not the previous state, so we don't * actually know what addresses were removed or added. The firmware has * state, but we can't get our grubby mits on it, and there is no 'delete * all multicast addresses' command. Hence, we have to maintain our own * state so we know what addresses have been programmed into the NIC at * any given time. */ static void ti_setmulti(struct ti_softc *sc) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct ti_cmd_desc cmd; struct ti_mc_entry *mc; uint32_t intrs; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; if (ifp->if_flags & IFF_ALLMULTI) { TI_DO_CMD(TI_CMD_SET_ALLMULTI, TI_CMD_CODE_ALLMULTI_ENB, 0); return; } else { TI_DO_CMD(TI_CMD_SET_ALLMULTI, TI_CMD_CODE_ALLMULTI_DIS, 0); } /* Disable interrupts. */ intrs = CSR_READ_4(sc, TI_MB_HOSTINTR); CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); /* First, zot all the existing filters. */ while (SLIST_FIRST(&sc->ti_mc_listhead) != NULL) { mc = SLIST_FIRST(&sc->ti_mc_listhead); ti_del_mcast(sc, &mc->mc_addr); SLIST_REMOVE_HEAD(&sc->ti_mc_listhead, mc_entries); free(mc, M_DEVBUF); } /* Now program new ones. */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct ti_mc_entry), M_DEVBUF, M_NOWAIT); if (mc == NULL) { device_printf(sc->ti_dev, "no memory for mcast filter entry\n"); continue; } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), (char *)&mc->mc_addr, ETHER_ADDR_LEN); SLIST_INSERT_HEAD(&sc->ti_mc_listhead, mc, mc_entries); ti_add_mcast(sc, &mc->mc_addr); } if_maddr_runlock(ifp); /* Re-enable interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, intrs); } /* * Check to see if the BIOS has configured us for a 64 bit slot when * we aren't actually in one. If we detect this condition, we can work * around it on the Tigon 2 by setting a bit in the PCI state register, * but for the Tigon 1 we must give up and abort the interface attach. */ static int ti_64bitslot_war(struct ti_softc *sc) { if (!(CSR_READ_4(sc, TI_PCI_STATE) & TI_PCISTATE_32BIT_BUS)) { CSR_WRITE_4(sc, 0x600, 0); CSR_WRITE_4(sc, 0x604, 0); CSR_WRITE_4(sc, 0x600, 0x5555AAAA); if (CSR_READ_4(sc, 0x604) == 0x5555AAAA) { if (sc->ti_hwrev == TI_HWREV_TIGON) return (EINVAL); else { TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_32BIT_BUS); return (0); } } } return (0); } /* * Do endian, PCI and DMA initialization. Also check the on-board ROM * self-test results. */ static int ti_chipinit(struct ti_softc *sc) { uint32_t cacheline; uint32_t pci_writemax = 0; uint32_t hdrsplit; /* Initialize link to down state. */ sc->ti_linkstat = TI_EV_CODE_LINK_DOWN; /* Set endianness before we access any non-PCI registers. */ #if 0 && BYTE_ORDER == BIG_ENDIAN CSR_WRITE_4(sc, TI_MISC_HOST_CTL, TI_MHC_BIGENDIAN_INIT | (TI_MHC_BIGENDIAN_INIT << 24)); #else CSR_WRITE_4(sc, TI_MISC_HOST_CTL, TI_MHC_LITTLEENDIAN_INIT | (TI_MHC_LITTLEENDIAN_INIT << 24)); #endif /* Check the ROM failed bit to see if self-tests passed. */ if (CSR_READ_4(sc, TI_CPU_STATE) & TI_CPUSTATE_ROMFAIL) { device_printf(sc->ti_dev, "board self-diagnostics failed!\n"); return (ENODEV); } /* Halt the CPU. */ TI_SETBIT(sc, TI_CPU_STATE, TI_CPUSTATE_HALT); /* Figure out the hardware revision. */ switch (CSR_READ_4(sc, TI_MISC_HOST_CTL) & TI_MHC_CHIP_REV_MASK) { case TI_REV_TIGON_I: sc->ti_hwrev = TI_HWREV_TIGON; break; case TI_REV_TIGON_II: sc->ti_hwrev = TI_HWREV_TIGON_II; break; default: device_printf(sc->ti_dev, "unsupported chip revision\n"); return (ENODEV); } /* Do special setup for Tigon 2. */ if (sc->ti_hwrev == TI_HWREV_TIGON_II) { TI_SETBIT(sc, TI_CPU_CTL_B, TI_CPUSTATE_HALT); TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_SRAM_BANK_512K); TI_SETBIT(sc, TI_MISC_CONF, TI_MCR_SRAM_SYNCHRONOUS); } /* * We don't have firmware source for the Tigon 1, so Tigon 1 boards * can't do header splitting. */ #ifdef TI_JUMBO_HDRSPLIT if (sc->ti_hwrev != TI_HWREV_TIGON) sc->ti_hdrsplit = 1; else device_printf(sc->ti_dev, "can't do header splitting on a Tigon I board\n"); #endif /* TI_JUMBO_HDRSPLIT */ /* Set up the PCI state register. */ CSR_WRITE_4(sc, TI_PCI_STATE, TI_PCI_READ_CMD|TI_PCI_WRITE_CMD); if (sc->ti_hwrev == TI_HWREV_TIGON_II) { TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_USE_MEM_RD_MULT); } /* Clear the read/write max DMA parameters. */ TI_CLRBIT(sc, TI_PCI_STATE, (TI_PCISTATE_WRITE_MAXDMA| TI_PCISTATE_READ_MAXDMA)); /* Get cache line size. */ cacheline = CSR_READ_4(sc, TI_PCI_BIST) & 0xFF; /* * If the system has set enabled the PCI memory write * and invalidate command in the command register, set * the write max parameter accordingly. This is necessary * to use MWI with the Tigon 2. */ if (CSR_READ_4(sc, TI_PCI_CMDSTAT) & PCIM_CMD_MWIEN) { switch (cacheline) { case 1: case 4: case 8: case 16: case 32: case 64: break; default: /* Disable PCI memory write and invalidate. */ if (bootverbose) device_printf(sc->ti_dev, "cache line size %d" " not supported; disabling PCI MWI\n", cacheline); CSR_WRITE_4(sc, TI_PCI_CMDSTAT, CSR_READ_4(sc, TI_PCI_CMDSTAT) & ~PCIM_CMD_MWIEN); break; } } TI_SETBIT(sc, TI_PCI_STATE, pci_writemax); /* This sets the min dma param all the way up (0xff). */ TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_MINDMA); if (sc->ti_hdrsplit) hdrsplit = TI_OPMODE_JUMBO_HDRSPLIT; else hdrsplit = 0; /* Configure DMA variables. */ #if BYTE_ORDER == BIG_ENDIAN CSR_WRITE_4(sc, TI_GCR_OPMODE, TI_OPMODE_BYTESWAP_BD | TI_OPMODE_BYTESWAP_DATA | TI_OPMODE_WORDSWAP_BD | TI_OPMODE_WARN_ENB | TI_OPMODE_FATAL_ENB | TI_OPMODE_DONT_FRAG_JUMBO | hdrsplit); #else /* BYTE_ORDER */ CSR_WRITE_4(sc, TI_GCR_OPMODE, TI_OPMODE_BYTESWAP_DATA| TI_OPMODE_WORDSWAP_BD|TI_OPMODE_DONT_FRAG_JUMBO| TI_OPMODE_WARN_ENB|TI_OPMODE_FATAL_ENB | hdrsplit); #endif /* BYTE_ORDER */ /* * Only allow 1 DMA channel to be active at a time. * I don't think this is a good idea, but without it * the firmware racks up lots of nicDmaReadRingFull * errors. This is not compatible with hardware checksums. */ if ((sc->ti_ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_RXCSUM)) == 0) TI_SETBIT(sc, TI_GCR_OPMODE, TI_OPMODE_1_DMA_ACTIVE); /* Recommended settings from Tigon manual. */ CSR_WRITE_4(sc, TI_GCR_DMA_WRITECFG, TI_DMA_STATE_THRESH_8W); CSR_WRITE_4(sc, TI_GCR_DMA_READCFG, TI_DMA_STATE_THRESH_8W); if (ti_64bitslot_war(sc)) { device_printf(sc->ti_dev, "bios thinks we're in a 64 bit slot, " "but we aren't"); return (EINVAL); } return (0); } /* * Initialize the general information block and firmware, and * start the CPU(s) running. */ static int ti_gibinit(struct ti_softc *sc) { struct ifnet *ifp; struct ti_rcb *rcb; int i; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; /* Disable interrupts for now. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); /* Tell the chip where to find the general information block. */ CSR_WRITE_4(sc, TI_GCR_GENINFO_HI, (uint64_t)sc->ti_rdata.ti_info_paddr >> 32); CSR_WRITE_4(sc, TI_GCR_GENINFO_LO, sc->ti_rdata.ti_info_paddr & 0xFFFFFFFF); /* Load the firmware into SRAM. */ ti_loadfw(sc); /* Set up the contents of the general info and ring control blocks. */ /* Set up the event ring and producer pointer. */ bzero(sc->ti_rdata.ti_event_ring, TI_EVENT_RING_SZ); rcb = &sc->ti_rdata.ti_info->ti_ev_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_event_ring_paddr); rcb->ti_flags = 0; ti_hostaddr64(&sc->ti_rdata.ti_info->ti_ev_prodidx_ptr, sc->ti_rdata.ti_status_paddr + offsetof(struct ti_status, ti_ev_prodidx_r)); sc->ti_ev_prodidx.ti_idx = 0; CSR_WRITE_4(sc, TI_GCR_EVENTCONS_IDX, 0); sc->ti_ev_saved_considx = 0; /* Set up the command ring and producer mailbox. */ rcb = &sc->ti_rdata.ti_info->ti_cmd_rcb; ti_hostaddr64(&rcb->ti_hostaddr, TI_GCR_NIC_ADDR(TI_GCR_CMDRING)); rcb->ti_flags = 0; rcb->ti_max_len = 0; for (i = 0; i < TI_CMD_RING_CNT; i++) { CSR_WRITE_4(sc, TI_GCR_CMDRING + (i * 4), 0); } CSR_WRITE_4(sc, TI_GCR_CMDCONS_IDX, 0); CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, 0); sc->ti_cmd_saved_prodidx = 0; /* * Assign the address of the stats refresh buffer. * We re-use the current stats buffer for this to * conserve memory. */ bzero(&sc->ti_rdata.ti_info->ti_stats, sizeof(struct ti_stats)); ti_hostaddr64(&sc->ti_rdata.ti_info->ti_refresh_stats_ptr, sc->ti_rdata.ti_info_paddr + offsetof(struct ti_gib, ti_stats)); /* Set up the standard receive ring. */ rcb = &sc->ti_rdata.ti_info->ti_std_rx_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_std_ring_paddr); rcb->ti_max_len = TI_FRAMELEN; rcb->ti_flags = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; /* Set up the jumbo receive ring. */ rcb = &sc->ti_rdata.ti_info->ti_jumbo_rx_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_jumbo_ring_paddr); #ifndef TI_SF_BUF_JUMBO rcb->ti_max_len = MJUM9BYTES - ETHER_ALIGN; rcb->ti_flags = 0; #else rcb->ti_max_len = PAGE_SIZE; rcb->ti_flags = TI_RCB_FLAG_USE_EXT_RX_BD; #endif if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; /* * Set up the mini ring. Only activated on the * Tigon 2 but the slot in the config block is * still there on the Tigon 1. */ rcb = &sc->ti_rdata.ti_info->ti_mini_rx_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_mini_ring_paddr); rcb->ti_max_len = MHLEN - ETHER_ALIGN; if (sc->ti_hwrev == TI_HWREV_TIGON) rcb->ti_flags = TI_RCB_FLAG_RING_DISABLED; else rcb->ti_flags = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; /* * Set up the receive return ring. */ rcb = &sc->ti_rdata.ti_info->ti_return_rcb; ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_return_ring_paddr); rcb->ti_flags = 0; rcb->ti_max_len = TI_RETURN_RING_CNT; ti_hostaddr64(&sc->ti_rdata.ti_info->ti_return_prodidx_ptr, sc->ti_rdata.ti_status_paddr + offsetof(struct ti_status, ti_return_prodidx_r)); /* * Set up the tx ring. Note: for the Tigon 2, we have the option * of putting the transmit ring in the host's address space and * letting the chip DMA it instead of leaving the ring in the NIC's * memory and accessing it through the shared memory region. We * do this for the Tigon 2, but it doesn't work on the Tigon 1, * so we have to revert to the shared memory scheme if we detect * a Tigon 1 chip. */ CSR_WRITE_4(sc, TI_WINBASE, TI_TX_RING_BASE); if (sc->ti_rdata.ti_tx_ring != NULL) bzero(sc->ti_rdata.ti_tx_ring, TI_TX_RING_SZ); rcb = &sc->ti_rdata.ti_info->ti_tx_rcb; if (sc->ti_hwrev == TI_HWREV_TIGON) rcb->ti_flags = 0; else rcb->ti_flags = TI_RCB_FLAG_HOST_RING; if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST; if (sc->ti_ifp->if_capenable & IFCAP_TXCSUM) rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM | TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM; rcb->ti_max_len = TI_TX_RING_CNT; if (sc->ti_hwrev == TI_HWREV_TIGON) ti_hostaddr64(&rcb->ti_hostaddr, TI_TX_RING_BASE); else ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_tx_ring_paddr); ti_hostaddr64(&sc->ti_rdata.ti_info->ti_tx_considx_ptr, sc->ti_rdata.ti_status_paddr + offsetof(struct ti_status, ti_tx_considx_r)); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag, sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (sc->ti_rdata.ti_tx_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Set up tunables */ #if 0 if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, (sc->ti_rx_coal_ticks / 10)); else #endif CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, sc->ti_rx_coal_ticks); CSR_WRITE_4(sc, TI_GCR_TX_COAL_TICKS, sc->ti_tx_coal_ticks); CSR_WRITE_4(sc, TI_GCR_STAT_TICKS, sc->ti_stat_ticks); CSR_WRITE_4(sc, TI_GCR_RX_MAX_COAL_BD, sc->ti_rx_max_coal_bds); CSR_WRITE_4(sc, TI_GCR_TX_MAX_COAL_BD, sc->ti_tx_max_coal_bds); CSR_WRITE_4(sc, TI_GCR_TX_BUFFER_RATIO, sc->ti_tx_buf_ratio); /* Turn interrupts on. */ CSR_WRITE_4(sc, TI_GCR_MASK_INTRS, 0); CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0); /* Start CPU. */ TI_CLRBIT(sc, TI_CPU_STATE, (TI_CPUSTATE_HALT|TI_CPUSTATE_STEP)); return (0); } /* * Probe for a Tigon chip. Check the PCI vendor and device IDs * against our list and return its name if we find a match. */ static int ti_probe(device_t dev) { const struct ti_type *t; t = ti_devs; while (t->ti_name != NULL) { if ((pci_get_vendor(dev) == t->ti_vid) && (pci_get_device(dev) == t->ti_did)) { device_set_desc(dev, t->ti_name); return (BUS_PROBE_DEFAULT); } t++; } return (ENXIO); } static int ti_attach(device_t dev) { struct ifnet *ifp; struct ti_softc *sc; int error = 0, rid; u_char eaddr[6]; sc = device_get_softc(dev); sc->ti_dev = dev; mtx_init(&sc->ti_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->ti_watchdog, &sc->ti_mtx, 0); ifmedia_init(&sc->ifmedia, IFM_IMASK, ti_ifmedia_upd, ti_ifmedia_sts); ifp = sc->ti_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not if_alloc()\n"); error = ENOSPC; goto fail; } sc->ti_ifp->if_hwassist = TI_CSUM_FEATURES; sc->ti_ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_RXCSUM; sc->ti_ifp->if_capenable = sc->ti_ifp->if_capabilities; /* * Map control/status registers. */ pci_enable_busmaster(dev); rid = PCIR_BAR(0); sc->ti_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->ti_res == NULL) { device_printf(dev, "couldn't map memory\n"); error = ENXIO; goto fail; } sc->ti_btag = rman_get_bustag(sc->ti_res); sc->ti_bhandle = rman_get_bushandle(sc->ti_res); /* Allocate interrupt */ rid = 0; sc->ti_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->ti_irq == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } if (ti_chipinit(sc)) { device_printf(dev, "chip initialization failed\n"); error = ENXIO; goto fail; } /* Zero out the NIC's on-board SRAM. */ ti_mem_zero(sc, 0x2000, 0x100000 - 0x2000); /* Init again -- zeroing memory may have clobbered some registers. */ if (ti_chipinit(sc)) { device_printf(dev, "chip initialization failed\n"); error = ENXIO; goto fail; } /* * Get station address from the EEPROM. Note: the manual states * that the MAC address is at offset 0x8c, however the data is * stored as two longwords (since that's how it's loaded into * the NIC). This means the MAC address is actually preceded * by two zero bytes. We need to skip over those. */ if (ti_read_eeprom(sc, eaddr, TI_EE_MAC_OFFSET + 2, ETHER_ADDR_LEN)) { device_printf(dev, "failed to read station address\n"); error = ENXIO; goto fail; } /* Allocate working area for memory dump. */ sc->ti_membuf = malloc(sizeof(uint8_t) * TI_WINLEN, M_DEVBUF, M_NOWAIT); sc->ti_membuf2 = malloc(sizeof(uint8_t) * TI_WINLEN, M_DEVBUF, M_NOWAIT); if (sc->ti_membuf == NULL || sc->ti_membuf2 == NULL) { device_printf(dev, "cannot allocate memory buffer\n"); error = ENOMEM; goto fail; } if ((error = ti_dma_alloc(sc)) != 0) goto fail; /* * We really need a better way to tell a 1000baseTX card * from a 1000baseSX one, since in theory there could be * OEMed 1000baseTX cards from lame vendors who aren't * clever enough to change the PCI ID. For the moment * though, the AceNIC is the only copper card available. */ if (pci_get_vendor(dev) == ALT_VENDORID && pci_get_device(dev) == ALT_DEVICEID_ACENIC_COPPER) sc->ti_copper = 1; /* Ok, it's not the only copper card available. */ if (pci_get_vendor(dev) == NG_VENDORID && pci_get_device(dev) == NG_DEVICEID_GA620T) sc->ti_copper = 1; /* Set default tunable values. */ ti_sysctl_node(sc); /* Set up ifnet structure */ ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ti_ioctl; ifp->if_start = ti_start; ifp->if_init = ti_init; ifp->if_get_counter = ti_get_counter; ifp->if_baudrate = IF_Gbps(1UL); ifp->if_snd.ifq_drv_maxlen = TI_TX_RING_CNT - 1; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); /* Set up ifmedia support. */ if (sc->ti_copper) { /* * Copper cards allow manual 10/100 mode selection, * but not manual 1000baseTX mode selection. Why? * Because currently there's no way to specify the * master/slave setting through the firmware interface, * so Alteon decided to just bag it and handle it * via autonegotiation. */ ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL); } else { /* Fiber cards don't support 10/100 modes. */ ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL); } ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO); /* * We're assuming here that card initialization is a sequential * thing. If it isn't, multiple cards probing at the same time * could stomp on the list of softcs here. */ /* Register the device */ sc->dev = make_dev(&ti_cdevsw, device_get_unit(dev), UID_ROOT, GID_OPERATOR, 0600, "ti%d", device_get_unit(dev)); sc->dev->si_drv1 = sc; /* * Call MI attach routine. */ ether_ifattach(ifp, eaddr); /* VLAN capability setup. */ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTAGGING; ifp->if_capenable = ifp->if_capabilities; /* Tell the upper layer we support VLAN over-sized frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* Driver supports link state tracking. */ ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; /* Hook interrupt last to avoid having to lock softc */ error = bus_setup_intr(dev, sc->ti_irq, INTR_TYPE_NET|INTR_MPSAFE, NULL, ti_intr, sc, &sc->ti_intrhand); if (error) { device_printf(dev, "couldn't set up irq\n"); goto fail; } fail: if (error) ti_detach(dev); return (error); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ static int ti_detach(device_t dev) { struct ti_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); if (sc->dev) destroy_dev(sc->dev); KASSERT(mtx_initialized(&sc->ti_mtx), ("ti mutex not initialized")); ifp = sc->ti_ifp; if (device_is_attached(dev)) { ether_ifdetach(ifp); TI_LOCK(sc); ti_stop(sc); TI_UNLOCK(sc); } /* These should only be active if attach succeeded */ callout_drain(&sc->ti_watchdog); bus_generic_detach(dev); ti_dma_free(sc); ifmedia_removeall(&sc->ifmedia); if (sc->ti_intrhand) bus_teardown_intr(dev, sc->ti_irq, sc->ti_intrhand); if (sc->ti_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ti_irq); if (sc->ti_res) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), sc->ti_res); } if (ifp) if_free(ifp); if (sc->ti_membuf) free(sc->ti_membuf, M_DEVBUF); if (sc->ti_membuf2) free(sc->ti_membuf2, M_DEVBUF); mtx_destroy(&sc->ti_mtx); return (0); } #ifdef TI_JUMBO_HDRSPLIT /* * If hdr_len is 0, that means that header splitting wasn't done on * this packet for some reason. The two most likely reasons are that * the protocol isn't a supported protocol for splitting, or this * packet had a fragment offset that wasn't 0. * * The header length, if it is non-zero, will always be the length of * the headers on the packet, but that length could be longer than the * first mbuf. So we take the minimum of the two as the actual * length. */ static __inline void ti_hdr_split(struct mbuf *top, int hdr_len, int pkt_len, int idx) { int i = 0; int lengths[4] = {0, 0, 0, 0}; struct mbuf *m, *mp; if (hdr_len != 0) top->m_len = min(hdr_len, top->m_len); pkt_len -= top->m_len; lengths[i++] = top->m_len; mp = top; for (m = top->m_next; m && pkt_len; m = m->m_next) { m->m_len = m->m_ext.ext_size = min(m->m_len, pkt_len); pkt_len -= m->m_len; lengths[i++] = m->m_len; mp = m; } #if 0 if (hdr_len != 0) printf("got split packet: "); else printf("got non-split packet: "); printf("%d,%d,%d,%d = %d\n", lengths[0], lengths[1], lengths[2], lengths[3], lengths[0] + lengths[1] + lengths[2] + lengths[3]); #endif if (pkt_len) panic("header splitting didn't"); if (m) { m_freem(m); mp->m_next = NULL; } if (mp->m_next != NULL) panic("ti_hdr_split: last mbuf in chain should be null"); } #endif /* TI_JUMBO_HDRSPLIT */ static void ti_discard_std(struct ti_softc *sc, int i) { struct ti_rx_desc *r; r = &sc->ti_rdata.ti_rx_std_ring[i]; r->ti_len = MCLBYTES - ETHER_ALIGN; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = 0; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; } static void ti_discard_mini(struct ti_softc *sc, int i) { struct ti_rx_desc *r; r = &sc->ti_rdata.ti_rx_mini_ring[i]; r->ti_len = MHLEN - ETHER_ALIGN; r->ti_type = TI_BDTYPE_RECV_BD; r->ti_flags = TI_BDFLAG_MINI_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; } #ifndef TI_SF_BUF_JUMBO static void ti_discard_jumbo(struct ti_softc *sc, int i) { struct ti_rx_desc *r; r = &sc->ti_rdata.ti_rx_jumbo_ring[i]; r->ti_len = MJUM9BYTES - ETHER_ALIGN; r->ti_type = TI_BDTYPE_RECV_JUMBO_BD; r->ti_flags = TI_BDFLAG_JUMBO_RING; r->ti_vlan_tag = 0; r->ti_tcp_udp_cksum = 0; if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM) r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM; r->ti_idx = i; } #endif /* * Frame reception handling. This is called if there's a frame * on the receive return list. * * Note: we have to be able to handle three possibilities here: * 1) the frame is from the mini receive ring (can only happen) * on Tigon 2 boards) * 2) the frame is from the jumbo receive ring * 3) the frame is from the standard receive ring */ static void ti_rxeof(struct ti_softc *sc) { struct ifnet *ifp; #ifdef TI_SF_BUF_JUMBO bus_dmamap_t map; #endif struct ti_cmd_desc cmd; int jumbocnt, minicnt, stdcnt, ti_len; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag, sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_POSTWRITE); if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag, sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE); if (sc->ti_rdata.ti_rx_mini_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag, sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->ti_cdata.ti_rx_return_ring_tag, sc->ti_cdata.ti_rx_return_ring_map, BUS_DMASYNC_POSTREAD); jumbocnt = minicnt = stdcnt = 0; while (sc->ti_rx_saved_considx != sc->ti_return_prodidx.ti_idx) { struct ti_rx_desc *cur_rx; uint32_t rxidx; struct mbuf *m = NULL; uint16_t vlan_tag = 0; int have_tag = 0; cur_rx = &sc->ti_rdata.ti_rx_return_ring[sc->ti_rx_saved_considx]; rxidx = cur_rx->ti_idx; ti_len = cur_rx->ti_len; TI_INC(sc->ti_rx_saved_considx, TI_RETURN_RING_CNT); if (cur_rx->ti_flags & TI_BDFLAG_VLAN_TAG) { have_tag = 1; vlan_tag = cur_rx->ti_vlan_tag; } if (cur_rx->ti_flags & TI_BDFLAG_JUMBO_RING) { jumbocnt++; TI_INC(sc->ti_jumbo, TI_JUMBO_RX_RING_CNT); m = sc->ti_cdata.ti_rx_jumbo_chain[rxidx]; #ifndef TI_SF_BUF_JUMBO if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_discard_jumbo(sc, rxidx); continue; } if (ti_newbuf_jumbo(sc, rxidx, NULL) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_discard_jumbo(sc, rxidx); continue; } m->m_len = ti_len; #else /* !TI_SF_BUF_JUMBO */ sc->ti_cdata.ti_rx_jumbo_chain[rxidx] = NULL; map = sc->ti_cdata.ti_rx_jumbo_maps[rxidx]; bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, map); if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_newbuf_jumbo(sc, sc->ti_jumbo, m); continue; } if (ti_newbuf_jumbo(sc, sc->ti_jumbo, NULL) == ENOBUFS) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_newbuf_jumbo(sc, sc->ti_jumbo, m); continue; } #ifdef TI_JUMBO_HDRSPLIT if (sc->ti_hdrsplit) ti_hdr_split(m, TI_HOSTADDR(cur_rx->ti_addr), ti_len, rxidx); else #endif /* TI_JUMBO_HDRSPLIT */ m_adj(m, ti_len - m->m_pkthdr.len); #endif /* TI_SF_BUF_JUMBO */ } else if (cur_rx->ti_flags & TI_BDFLAG_MINI_RING) { minicnt++; TI_INC(sc->ti_mini, TI_MINI_RX_RING_CNT); m = sc->ti_cdata.ti_rx_mini_chain[rxidx]; if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_discard_mini(sc, rxidx); continue; } if (ti_newbuf_mini(sc, rxidx) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_discard_mini(sc, rxidx); continue; } m->m_len = ti_len; } else { stdcnt++; TI_INC(sc->ti_std, TI_STD_RX_RING_CNT); m = sc->ti_cdata.ti_rx_std_chain[rxidx]; if (cur_rx->ti_flags & TI_BDFLAG_ERROR) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); ti_discard_std(sc, rxidx); continue; } if (ti_newbuf_std(sc, rxidx) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); ti_discard_std(sc, rxidx); continue; } m->m_len = ti_len; } m->m_pkthdr.len = ti_len; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); m->m_pkthdr.rcvif = ifp; if (ifp->if_capenable & IFCAP_RXCSUM) { if (cur_rx->ti_flags & TI_BDFLAG_IP_CKSUM) { m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; if ((cur_rx->ti_ip_cksum ^ 0xffff) == 0) m->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (cur_rx->ti_flags & TI_BDFLAG_TCP_UDP_CKSUM) { m->m_pkthdr.csum_data = cur_rx->ti_tcp_udp_cksum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; } } /* * If we received a packet with a vlan tag, * tag it before passing the packet upward. */ if (have_tag) { m->m_pkthdr.ether_vtag = vlan_tag; m->m_flags |= M_VLANTAG; } TI_UNLOCK(sc); (*ifp->if_input)(ifp, m); TI_LOCK(sc); } bus_dmamap_sync(sc->ti_cdata.ti_rx_return_ring_tag, sc->ti_cdata.ti_rx_return_ring_map, BUS_DMASYNC_PREREAD); /* Only necessary on the Tigon 1. */ if (sc->ti_hwrev == TI_HWREV_TIGON) CSR_WRITE_4(sc, TI_GCR_RXRETURNCONS_IDX, sc->ti_rx_saved_considx); if (stdcnt > 0) { bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag, sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_PREWRITE); TI_UPDATE_STDPROD(sc, sc->ti_std); } if (minicnt > 0) { bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag, sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_PREWRITE); TI_UPDATE_MINIPROD(sc, sc->ti_mini); } if (jumbocnt > 0) { bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag, sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE); TI_UPDATE_JUMBOPROD(sc, sc->ti_jumbo); } } static void ti_txeof(struct ti_softc *sc) { struct ti_txdesc *txd; struct ti_tx_desc txdesc; struct ti_tx_desc *cur_tx = NULL; struct ifnet *ifp; int idx; ifp = sc->ti_ifp; txd = STAILQ_FIRST(&sc->ti_cdata.ti_txbusyq); if (txd == NULL) return; if (sc->ti_rdata.ti_tx_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_POSTWRITE); /* * Go through our tx ring and free mbufs for those * frames that have been sent. */ for (idx = sc->ti_tx_saved_considx; idx != sc->ti_tx_considx.ti_idx; TI_INC(idx, TI_TX_RING_CNT)) { if (sc->ti_hwrev == TI_HWREV_TIGON) { ti_mem_read(sc, TI_TX_RING_BASE + idx * sizeof(txdesc), sizeof(txdesc), &txdesc); cur_tx = &txdesc; } else cur_tx = &sc->ti_rdata.ti_tx_ring[idx]; sc->ti_txcnt--; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if ((cur_tx->ti_flags & TI_BDFLAG_END) == 0) continue; bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); m_freem(txd->tx_m); txd->tx_m = NULL; STAILQ_REMOVE_HEAD(&sc->ti_cdata.ti_txbusyq, tx_q); STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txfreeq, txd, tx_q); txd = STAILQ_FIRST(&sc->ti_cdata.ti_txbusyq); } sc->ti_tx_saved_considx = idx; if (sc->ti_txcnt == 0) sc->ti_timer = 0; } static void ti_intr(void *xsc) { struct ti_softc *sc; struct ifnet *ifp; sc = xsc; TI_LOCK(sc); ifp = sc->ti_ifp; /* Make sure this is really our interrupt. */ if (!(CSR_READ_4(sc, TI_MISC_HOST_CTL) & TI_MHC_INTSTATE)) { TI_UNLOCK(sc); return; } /* Ack interrupt and stop others from occurring. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map, BUS_DMASYNC_POSTREAD); /* Check RX return ring producer/consumer */ ti_rxeof(sc); /* Check TX ring producer/consumer */ ti_txeof(sc); bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map, BUS_DMASYNC_PREREAD); } ti_handle_events(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* Re-enable interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ti_start_locked(ifp); } TI_UNLOCK(sc); } static uint64_t ti_get_counter(struct ifnet *ifp, ift_counter cnt) { switch (cnt) { case IFCOUNTER_COLLISIONS: { struct ti_softc *sc; struct ti_stats *s; uint64_t rv; sc = if_getsoftc(ifp); s = &sc->ti_rdata.ti_info->ti_stats; TI_LOCK(sc); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_POSTREAD); rv = s->dot3StatsSingleCollisionFrames + s->dot3StatsMultipleCollisionFrames + s->dot3StatsExcessiveCollisions + s->dot3StatsLateCollisions; bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD); TI_UNLOCK(sc); return (rv); } default: return (if_get_counter_default(ifp, cnt)); } } /* * Encapsulate an mbuf chain in the tx ring by coupling the mbuf data * pointers to descriptors. */ static int ti_encap(struct ti_softc *sc, struct mbuf **m_head) { struct ti_txdesc *txd; struct ti_tx_desc *f; struct ti_tx_desc txdesc; struct mbuf *m; bus_dma_segment_t txsegs[TI_MAXTXSEGS]; uint16_t csum_flags; int error, frag, i, nseg; if ((txd = STAILQ_FIRST(&sc->ti_cdata.ti_txfreeq)) == NULL) return (ENOBUFS); error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nseg, 0); if (error == EFBIG) { m = m_defrag(*m_head, M_NOWAIT); if (m == NULL) { m_freem(*m_head); *m_head = NULL; return (ENOMEM); } *m_head = m; error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, *m_head, txsegs, &nseg, 0); if (error) { m_freem(*m_head); *m_head = NULL; return (error); } } else if (error != 0) return (error); if (nseg == 0) { m_freem(*m_head); *m_head = NULL; return (EIO); } if (sc->ti_txcnt + nseg >= TI_TX_RING_CNT) { bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap); return (ENOBUFS); } bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap, BUS_DMASYNC_PREWRITE); m = *m_head; csum_flags = 0; if (m->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= TI_BDFLAG_IP_CKSUM; if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) csum_flags |= TI_BDFLAG_TCP_UDP_CKSUM; frag = sc->ti_tx_saved_prodidx; for (i = 0; i < nseg; i++) { if (sc->ti_hwrev == TI_HWREV_TIGON) { bzero(&txdesc, sizeof(txdesc)); f = &txdesc; } else f = &sc->ti_rdata.ti_tx_ring[frag]; ti_hostaddr64(&f->ti_addr, txsegs[i].ds_addr); f->ti_len = txsegs[i].ds_len; f->ti_flags = csum_flags; if (m->m_flags & M_VLANTAG) { f->ti_flags |= TI_BDFLAG_VLAN_TAG; f->ti_vlan_tag = m->m_pkthdr.ether_vtag; } else { f->ti_vlan_tag = 0; } if (sc->ti_hwrev == TI_HWREV_TIGON) ti_mem_write(sc, TI_TX_RING_BASE + frag * sizeof(txdesc), sizeof(txdesc), &txdesc); TI_INC(frag, TI_TX_RING_CNT); } sc->ti_tx_saved_prodidx = frag; /* set TI_BDFLAG_END on the last descriptor */ frag = (frag + TI_TX_RING_CNT - 1) % TI_TX_RING_CNT; if (sc->ti_hwrev == TI_HWREV_TIGON) { txdesc.ti_flags |= TI_BDFLAG_END; ti_mem_write(sc, TI_TX_RING_BASE + frag * sizeof(txdesc), sizeof(txdesc), &txdesc); } else sc->ti_rdata.ti_tx_ring[frag].ti_flags |= TI_BDFLAG_END; STAILQ_REMOVE_HEAD(&sc->ti_cdata.ti_txfreeq, tx_q); STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txbusyq, txd, tx_q); txd->tx_m = m; sc->ti_txcnt += nseg; return (0); } static void ti_start(struct ifnet *ifp) { struct ti_softc *sc; sc = ifp->if_softc; TI_LOCK(sc); ti_start_locked(ifp); TI_UNLOCK(sc); } /* * Main transmit routine. To avoid having to do mbuf copies, we put pointers * to the mbuf data regions directly in the transmit descriptors. */ static void ti_start_locked(struct ifnet *ifp) { struct ti_softc *sc; struct mbuf *m_head = NULL; int enq = 0; sc = ifp->if_softc; for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) && sc->ti_txcnt < (TI_TX_RING_CNT - 16);) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. */ if (ti_encap(sc, &m_head)) { if (m_head == NULL) break; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ ETHER_BPF_MTAP(ifp, m_head); } if (enq > 0) { if (sc->ti_rdata.ti_tx_ring != NULL) bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag, sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE); /* Transmit */ CSR_WRITE_4(sc, TI_MB_SENDPROD_IDX, sc->ti_tx_saved_prodidx); /* * Set a timeout in case the chip goes out to lunch. */ sc->ti_timer = 5; } } static void ti_init(void *xsc) { struct ti_softc *sc; sc = xsc; TI_LOCK(sc); ti_init_locked(sc); TI_UNLOCK(sc); } static void ti_init_locked(void *xsc) { struct ti_softc *sc = xsc; if (sc->ti_ifp->if_drv_flags & IFF_DRV_RUNNING) return; /* Cancel pending I/O and flush buffers. */ ti_stop(sc); /* Init the gen info block, ring control blocks and firmware. */ if (ti_gibinit(sc)) { device_printf(sc->ti_dev, "initialization failure\n"); return; } } static void ti_init2(struct ti_softc *sc) { struct ti_cmd_desc cmd; struct ifnet *ifp; uint8_t *ea; struct ifmedia *ifm; int tmp; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; /* Specify MTU and interface index. */ CSR_WRITE_4(sc, TI_GCR_IFINDEX, device_get_unit(sc->ti_dev)); CSR_WRITE_4(sc, TI_GCR_IFMTU, ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN); TI_DO_CMD(TI_CMD_UPDATE_GENCOM, 0, 0); /* Load our MAC address. */ ea = IF_LLADDR(sc->ti_ifp); CSR_WRITE_4(sc, TI_GCR_PAR0, (ea[0] << 8) | ea[1]); CSR_WRITE_4(sc, TI_GCR_PAR1, (ea[2] << 24) | (ea[3] << 16) | (ea[4] << 8) | ea[5]); TI_DO_CMD(TI_CMD_SET_MAC_ADDR, 0, 0); /* Enable or disable promiscuous mode as needed. */ if (ifp->if_flags & IFF_PROMISC) { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_ENB, 0); } else { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_DIS, 0); } /* Program multicast filter. */ ti_setmulti(sc); /* * If this is a Tigon 1, we should tell the * firmware to use software packet filtering. */ if (sc->ti_hwrev == TI_HWREV_TIGON) { TI_DO_CMD(TI_CMD_FDR_FILTERING, TI_CMD_CODE_FILT_ENB, 0); } /* Init RX ring. */ if (ti_init_rx_ring_std(sc) != 0) { /* XXX */ device_printf(sc->ti_dev, "no memory for std Rx buffers.\n"); return; } /* Init jumbo RX ring. */ if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) { if (ti_init_rx_ring_jumbo(sc) != 0) { /* XXX */ device_printf(sc->ti_dev, "no memory for jumbo Rx buffers.\n"); return; } } /* * If this is a Tigon 2, we can also configure the * mini ring. */ if (sc->ti_hwrev == TI_HWREV_TIGON_II) { if (ti_init_rx_ring_mini(sc) != 0) { /* XXX */ device_printf(sc->ti_dev, "no memory for mini Rx buffers.\n"); return; } } CSR_WRITE_4(sc, TI_GCR_RXRETURNCONS_IDX, 0); sc->ti_rx_saved_considx = 0; /* Init TX ring. */ ti_init_tx_ring(sc); /* Tell firmware we're alive. */ TI_DO_CMD(TI_CMD_HOST_STATE, TI_CMD_CODE_STACK_UP, 0); /* Enable host interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->ti_watchdog, hz, ti_watchdog, sc); /* * Make sure to set media properly. We have to do this * here since we have to issue commands in order to set * the link negotiation and we can't issue commands until * the firmware is running. */ ifm = &sc->ifmedia; tmp = ifm->ifm_media; ifm->ifm_media = ifm->ifm_cur->ifm_media; ti_ifmedia_upd_locked(sc); ifm->ifm_media = tmp; } /* * Set media options. */ static int ti_ifmedia_upd(struct ifnet *ifp) { struct ti_softc *sc; int error; sc = ifp->if_softc; TI_LOCK(sc); error = ti_ifmedia_upd_locked(sc); TI_UNLOCK(sc); return (error); } static int ti_ifmedia_upd_locked(struct ti_softc *sc) { struct ifmedia *ifm; struct ti_cmd_desc cmd; uint32_t flowctl; ifm = &sc->ifmedia; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); flowctl = 0; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: /* * Transmit flow control doesn't work on the Tigon 1. */ flowctl = TI_GLNK_RX_FLOWCTL_Y; /* * Transmit flow control can also cause problems on the * Tigon 2, apparently with both the copper and fiber * boards. The symptom is that the interface will just * hang. This was reproduced with Alteon 180 switches. */ #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_GLNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_GLINK, TI_GLNK_PREF|TI_GLNK_1000MB| TI_GLNK_FULL_DUPLEX| flowctl | TI_GLNK_AUTONEGENB|TI_GLNK_ENB); flowctl = TI_LNK_RX_FLOWCTL_Y; #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_LNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_LINK, TI_LNK_100MB|TI_LNK_10MB| TI_LNK_FULL_DUPLEX|TI_LNK_HALF_DUPLEX| flowctl | TI_LNK_AUTONEGENB|TI_LNK_ENB); TI_DO_CMD(TI_CMD_LINK_NEGOTIATION, TI_CMD_CODE_NEGOTIATE_BOTH, 0); break; case IFM_1000_SX: case IFM_1000_T: flowctl = TI_GLNK_RX_FLOWCTL_Y; #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_GLNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_GLINK, TI_GLNK_PREF|TI_GLNK_1000MB| flowctl |TI_GLNK_ENB); CSR_WRITE_4(sc, TI_GCR_LINK, 0); if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { TI_SETBIT(sc, TI_GCR_GLINK, TI_GLNK_FULL_DUPLEX); } TI_DO_CMD(TI_CMD_LINK_NEGOTIATION, TI_CMD_CODE_NEGOTIATE_GIGABIT, 0); break; case IFM_100_FX: case IFM_10_FL: case IFM_100_TX: case IFM_10_T: flowctl = TI_LNK_RX_FLOWCTL_Y; #if 0 if (sc->ti_hwrev != TI_HWREV_TIGON) flowctl |= TI_LNK_TX_FLOWCTL_Y; #endif CSR_WRITE_4(sc, TI_GCR_GLINK, 0); CSR_WRITE_4(sc, TI_GCR_LINK, TI_LNK_ENB|TI_LNK_PREF|flowctl); if (IFM_SUBTYPE(ifm->ifm_media) == IFM_100_FX || IFM_SUBTYPE(ifm->ifm_media) == IFM_100_TX) { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_100MB); } else { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_10MB); } if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_FULL_DUPLEX); } else { TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_HALF_DUPLEX); } TI_DO_CMD(TI_CMD_LINK_NEGOTIATION, TI_CMD_CODE_NEGOTIATE_10_100, 0); break; } return (0); } /* * Report current media status. */ static void ti_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ti_softc *sc; uint32_t media = 0; sc = ifp->if_softc; TI_LOCK(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (sc->ti_linkstat == TI_EV_CODE_LINK_DOWN) { TI_UNLOCK(sc); return; } ifmr->ifm_status |= IFM_ACTIVE; if (sc->ti_linkstat == TI_EV_CODE_GIG_LINK_UP) { media = CSR_READ_4(sc, TI_GCR_GLINK_STAT); if (sc->ti_copper) ifmr->ifm_active |= IFM_1000_T; else ifmr->ifm_active |= IFM_1000_SX; if (media & TI_GLNK_FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } else if (sc->ti_linkstat == TI_EV_CODE_LINK_UP) { media = CSR_READ_4(sc, TI_GCR_LINK_STAT); if (sc->ti_copper) { if (media & TI_LNK_100MB) ifmr->ifm_active |= IFM_100_TX; if (media & TI_LNK_10MB) ifmr->ifm_active |= IFM_10_T; } else { if (media & TI_LNK_100MB) ifmr->ifm_active |= IFM_100_FX; if (media & TI_LNK_10MB) ifmr->ifm_active |= IFM_10_FL; } if (media & TI_LNK_FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; if (media & TI_LNK_HALF_DUPLEX) ifmr->ifm_active |= IFM_HDX; } TI_UNLOCK(sc); } static int ti_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ti_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; struct ti_cmd_desc cmd; int mask, error = 0; switch (command) { case SIOCSIFMTU: TI_LOCK(sc); if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TI_JUMBO_MTU) error = EINVAL; else { ifp->if_mtu = ifr->ifr_mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ti_init_locked(sc); } } TI_UNLOCK(sc); break; case SIOCSIFFLAGS: TI_LOCK(sc); if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->ti_if_flags & IFF_PROMISC)) { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_ENB, 0); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->ti_if_flags & IFF_PROMISC) { TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_DIS, 0); } else ti_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ti_stop(sc); } } sc->ti_if_flags = ifp->if_flags; TI_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: TI_LOCK(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) ti_setmulti(sc); TI_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCSIFCAP: TI_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if ((mask & IFCAP_TXCSUM) != 0 && (ifp->if_capabilities & IFCAP_TXCSUM) != 0) { ifp->if_capenable ^= IFCAP_TXCSUM; if ((ifp->if_capenable & IFCAP_TXCSUM) != 0) ifp->if_hwassist |= TI_CSUM_FEATURES; else ifp->if_hwassist &= ~TI_CSUM_FEATURES; } if ((mask & IFCAP_RXCSUM) != 0 && (ifp->if_capabilities & IFCAP_RXCSUM) != 0) ifp->if_capenable ^= IFCAP_RXCSUM; if ((mask & IFCAP_VLAN_HWTAGGING) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; if ((mask & (IFCAP_TXCSUM | IFCAP_RXCSUM | IFCAP_VLAN_HWTAGGING)) != 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ti_init_locked(sc); } } TI_UNLOCK(sc); VLAN_CAPABILITIES(ifp); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int ti_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct ti_softc *sc; sc = dev->si_drv1; if (sc == NULL) return (ENODEV); TI_LOCK(sc); sc->ti_flags |= TI_FLAG_DEBUGING; TI_UNLOCK(sc); return (0); } static int ti_close(struct cdev *dev, int flag, int fmt, struct thread *td) { struct ti_softc *sc; sc = dev->si_drv1; if (sc == NULL) return (ENODEV); TI_LOCK(sc); sc->ti_flags &= ~TI_FLAG_DEBUGING; TI_UNLOCK(sc); return (0); } /* * This ioctl routine goes along with the Tigon character device. */ static int ti_ioctl2(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ti_softc *sc; int error; sc = dev->si_drv1; if (sc == NULL) return (ENODEV); error = 0; switch (cmd) { case TIIOCGETSTATS: { struct ti_stats *outstats; outstats = (struct ti_stats *)addr; TI_LOCK(sc); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_POSTREAD); bcopy(&sc->ti_rdata.ti_info->ti_stats, outstats, sizeof(struct ti_stats)); bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD); TI_UNLOCK(sc); break; } case TIIOCGETPARAMS: { struct ti_params *params; params = (struct ti_params *)addr; TI_LOCK(sc); params->ti_stat_ticks = sc->ti_stat_ticks; params->ti_rx_coal_ticks = sc->ti_rx_coal_ticks; params->ti_tx_coal_ticks = sc->ti_tx_coal_ticks; params->ti_rx_max_coal_bds = sc->ti_rx_max_coal_bds; params->ti_tx_max_coal_bds = sc->ti_tx_max_coal_bds; params->ti_tx_buf_ratio = sc->ti_tx_buf_ratio; params->param_mask = TI_PARAM_ALL; TI_UNLOCK(sc); break; } case TIIOCSETPARAMS: { struct ti_params *params; params = (struct ti_params *)addr; TI_LOCK(sc); if (params->param_mask & TI_PARAM_STAT_TICKS) { sc->ti_stat_ticks = params->ti_stat_ticks; CSR_WRITE_4(sc, TI_GCR_STAT_TICKS, sc->ti_stat_ticks); } if (params->param_mask & TI_PARAM_RX_COAL_TICKS) { sc->ti_rx_coal_ticks = params->ti_rx_coal_ticks; CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, sc->ti_rx_coal_ticks); } if (params->param_mask & TI_PARAM_TX_COAL_TICKS) { sc->ti_tx_coal_ticks = params->ti_tx_coal_ticks; CSR_WRITE_4(sc, TI_GCR_TX_COAL_TICKS, sc->ti_tx_coal_ticks); } if (params->param_mask & TI_PARAM_RX_COAL_BDS) { sc->ti_rx_max_coal_bds = params->ti_rx_max_coal_bds; CSR_WRITE_4(sc, TI_GCR_RX_MAX_COAL_BD, sc->ti_rx_max_coal_bds); } if (params->param_mask & TI_PARAM_TX_COAL_BDS) { sc->ti_tx_max_coal_bds = params->ti_tx_max_coal_bds; CSR_WRITE_4(sc, TI_GCR_TX_MAX_COAL_BD, sc->ti_tx_max_coal_bds); } if (params->param_mask & TI_PARAM_TX_BUF_RATIO) { sc->ti_tx_buf_ratio = params->ti_tx_buf_ratio; CSR_WRITE_4(sc, TI_GCR_TX_BUFFER_RATIO, sc->ti_tx_buf_ratio); } TI_UNLOCK(sc); break; } case TIIOCSETTRACE: { ti_trace_type trace_type; trace_type = *(ti_trace_type *)addr; /* * Set tracing to whatever the user asked for. Setting * this register to 0 should have the effect of disabling * tracing. */ TI_LOCK(sc); CSR_WRITE_4(sc, TI_GCR_NIC_TRACING, trace_type); TI_UNLOCK(sc); break; } case TIIOCGETTRACE: { struct ti_trace_buf *trace_buf; uint32_t trace_start, cur_trace_ptr, trace_len; trace_buf = (struct ti_trace_buf *)addr; TI_LOCK(sc); trace_start = CSR_READ_4(sc, TI_GCR_NICTRACE_START); cur_trace_ptr = CSR_READ_4(sc, TI_GCR_NICTRACE_PTR); trace_len = CSR_READ_4(sc, TI_GCR_NICTRACE_LEN); #if 0 if_printf(sc->ti_ifp, "trace_start = %#x, cur_trace_ptr = %#x, " "trace_len = %d\n", trace_start, cur_trace_ptr, trace_len); if_printf(sc->ti_ifp, "trace_buf->buf_len = %d\n", trace_buf->buf_len); #endif error = ti_copy_mem(sc, trace_start, min(trace_len, trace_buf->buf_len), (caddr_t)trace_buf->buf, 1, 1); if (error == 0) { trace_buf->fill_len = min(trace_len, trace_buf->buf_len); if (cur_trace_ptr < trace_start) trace_buf->cur_trace_ptr = trace_start - cur_trace_ptr; else trace_buf->cur_trace_ptr = cur_trace_ptr - trace_start; } else trace_buf->fill_len = 0; TI_UNLOCK(sc); break; } /* * For debugging, five ioctls are needed: * ALT_ATTACH * ALT_READ_TG_REG * ALT_WRITE_TG_REG * ALT_READ_TG_MEM * ALT_WRITE_TG_MEM */ case ALT_ATTACH: /* * From what I can tell, Alteon's Solaris Tigon driver * only has one character device, so you have to attach * to the Tigon board you're interested in. This seems * like a not-so-good way to do things, since unless you * subsequently specify the unit number of the device * you're interested in every ioctl, you'll only be * able to debug one board at a time. */ break; case ALT_READ_TG_MEM: case ALT_WRITE_TG_MEM: { struct tg_mem *mem_param; uint32_t sram_end, scratch_end; mem_param = (struct tg_mem *)addr; if (sc->ti_hwrev == TI_HWREV_TIGON) { sram_end = TI_END_SRAM_I; scratch_end = TI_END_SCRATCH_I; } else { sram_end = TI_END_SRAM_II; scratch_end = TI_END_SCRATCH_II; } /* * For now, we'll only handle accessing regular SRAM, * nothing else. */ TI_LOCK(sc); if (mem_param->tgAddr >= TI_BEG_SRAM && mem_param->tgAddr + mem_param->len <= sram_end) { /* * In this instance, we always copy to/from user * space, so the user space argument is set to 1. */ error = ti_copy_mem(sc, mem_param->tgAddr, mem_param->len, mem_param->userAddr, 1, cmd == ALT_READ_TG_MEM ? 1 : 0); } else if (mem_param->tgAddr >= TI_BEG_SCRATCH && mem_param->tgAddr <= scratch_end) { error = ti_copy_scratch(sc, mem_param->tgAddr, mem_param->len, mem_param->userAddr, 1, cmd == ALT_READ_TG_MEM ? 1 : 0, TI_PROCESSOR_A); } else if (mem_param->tgAddr >= TI_BEG_SCRATCH_B_DEBUG && mem_param->tgAddr <= TI_BEG_SCRATCH_B_DEBUG) { if (sc->ti_hwrev == TI_HWREV_TIGON) { if_printf(sc->ti_ifp, "invalid memory range for Tigon I\n"); error = EINVAL; break; } error = ti_copy_scratch(sc, mem_param->tgAddr - TI_SCRATCH_DEBUG_OFF, mem_param->len, mem_param->userAddr, 1, cmd == ALT_READ_TG_MEM ? 1 : 0, TI_PROCESSOR_B); } else { if_printf(sc->ti_ifp, "memory address %#x len %d is " "out of supported range\n", mem_param->tgAddr, mem_param->len); error = EINVAL; } TI_UNLOCK(sc); break; } case ALT_READ_TG_REG: case ALT_WRITE_TG_REG: { struct tg_reg *regs; uint32_t tmpval; regs = (struct tg_reg *)addr; /* * Make sure the address in question isn't out of range. */ if (regs->addr > TI_REG_MAX) { error = EINVAL; break; } TI_LOCK(sc); if (cmd == ALT_READ_TG_REG) { bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle, regs->addr, &tmpval, 1); regs->data = ntohl(tmpval); #if 0 if ((regs->addr == TI_CPU_STATE) || (regs->addr == TI_CPU_CTL_B)) { if_printf(sc->ti_ifp, "register %#x = %#x\n", regs->addr, tmpval); } #endif } else { tmpval = htonl(regs->data); bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle, regs->addr, &tmpval, 1); } TI_UNLOCK(sc); break; } default: error = ENOTTY; break; } return (error); } static void ti_watchdog(void *arg) { struct ti_softc *sc; struct ifnet *ifp; sc = arg; TI_LOCK_ASSERT(sc); callout_reset(&sc->ti_watchdog, hz, ti_watchdog, sc); if (sc->ti_timer == 0 || --sc->ti_timer > 0) return; /* * When we're debugging, the chip is often stopped for long periods * of time, and that would normally cause the watchdog timer to fire. * Since that impedes debugging, we don't want to do that. */ if (sc->ti_flags & TI_FLAG_DEBUGING) return; ifp = sc->ti_ifp; if_printf(ifp, "watchdog timeout -- resetting\n"); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ti_init_locked(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void ti_stop(struct ti_softc *sc) { struct ifnet *ifp; struct ti_cmd_desc cmd; TI_LOCK_ASSERT(sc); ifp = sc->ti_ifp; /* Disable host interrupts. */ CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1); /* * Tell firmware we're shutting down. */ TI_DO_CMD(TI_CMD_HOST_STATE, TI_CMD_CODE_STACK_DOWN, 0); /* Halt and reinitialize. */ if (ti_chipinit(sc) == 0) { ti_mem_zero(sc, 0x2000, 0x100000 - 0x2000); /* XXX ignore init errors. */ ti_chipinit(sc); } /* Free the RX lists. */ ti_free_rx_ring_std(sc); /* Free jumbo RX list. */ ti_free_rx_ring_jumbo(sc); /* Free mini RX list. */ ti_free_rx_ring_mini(sc); /* Free TX buffers. */ ti_free_tx_ring(sc); sc->ti_ev_prodidx.ti_idx = 0; sc->ti_return_prodidx.ti_idx = 0; sc->ti_tx_considx.ti_idx = 0; sc->ti_tx_saved_considx = TI_TXCONS_UNSET; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&sc->ti_watchdog); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static int ti_shutdown(device_t dev) { struct ti_softc *sc; sc = device_get_softc(dev); TI_LOCK(sc); ti_chipinit(sc); TI_UNLOCK(sc); return (0); } static void ti_sysctl_node(struct ti_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *child; char tname[32]; ctx = device_get_sysctl_ctx(sc->ti_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ti_dev)); /* Use DAC */ sc->ti_dac = 1; snprintf(tname, sizeof(tname), "dev.ti.%d.dac", device_get_unit(sc->ti_dev)); TUNABLE_INT_FETCH(tname, &sc->ti_dac); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_coal_ticks", CTLFLAG_RW, &sc->ti_rx_coal_ticks, 0, "Receive coalcesced ticks"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_max_coal_bds", CTLFLAG_RW, &sc->ti_rx_max_coal_bds, 0, "Receive max coalcesced BDs"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_coal_ticks", CTLFLAG_RW, &sc->ti_tx_coal_ticks, 0, "Send coalcesced ticks"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_max_coal_bds", CTLFLAG_RW, &sc->ti_tx_max_coal_bds, 0, "Send max coalcesced BDs"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_buf_ratio", CTLFLAG_RW, &sc->ti_tx_buf_ratio, 0, "Ratio of NIC memory devoted to TX buffer"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "stat_ticks", CTLFLAG_RW, &sc->ti_stat_ticks, 0, "Number of clock ticks for statistics update interval"); /* Pull in device tunables. */ sc->ti_rx_coal_ticks = 170; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "rx_coal_ticks", &sc->ti_rx_coal_ticks); sc->ti_rx_max_coal_bds = 64; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "rx_max_coal_bds", &sc->ti_rx_max_coal_bds); sc->ti_tx_coal_ticks = TI_TICKS_PER_SEC / 500; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "tx_coal_ticks", &sc->ti_tx_coal_ticks); sc->ti_tx_max_coal_bds = 32; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "tx_max_coal_bds", &sc->ti_tx_max_coal_bds); sc->ti_tx_buf_ratio = 21; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "tx_buf_ratio", &sc->ti_tx_buf_ratio); sc->ti_stat_ticks = 2 * TI_TICKS_PER_SEC; resource_int_value(device_get_name(sc->ti_dev), device_get_unit(sc->ti_dev), "stat_ticks", &sc->ti_stat_ticks); } Index: projects/runtime-coverage/sys/fs/nfsclient/nfs_clcomsubs.c =================================================================== --- projects/runtime-coverage/sys/fs/nfsclient/nfs_clcomsubs.c (revision 323974) +++ projects/runtime-coverage/sys/fs/nfsclient/nfs_clcomsubs.c (revision 323975) @@ -1,680 +1,668 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #ifndef APPLEKEXT #include extern struct nfsstatsv1 nfsstatsv1; extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; extern enum vtype nv34tov_type[8]; extern int nfs_bigreply[NFSV41_NPROCS]; NFSCLSTATEMUTEX; #endif /* !APPLEKEXT */ static nfsuint64 nfs_nullcookie = {{ 0, 0 }}; static struct { int op; int opcnt; const u_char *tag; int taglen; } nfsv4_opmap[NFSV41_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, { NFSV4OP_LOOKUP, 3, "Lookup", 6, }, { NFSV4OP_ACCESS, 2, "Access", 6, }, { NFSV4OP_READLINK, 2, "Readlink", 8, }, { NFSV4OP_READ, 1, "Read", 4, }, { NFSV4OP_WRITE, 2, "Write", 5, }, { NFSV4OP_OPEN, 5, "Open", 4, }, { NFSV4OP_CREATE, 5, "Create", 6, }, { NFSV4OP_CREATE, 1, "Create", 6, }, { NFSV4OP_CREATE, 3, "Create", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_REMOVE, 1, "Remove", 6, }, { NFSV4OP_SAVEFH, 5, "Rename", 6, }, { NFSV4OP_SAVEFH, 4, "Link", 4, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_READDIR, 2, "Readdir", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_COMMIT, 2, "Commit", 6, }, { NFSV4OP_LOOKUPP, 3, "Lookupp", 7, }, { NFSV4OP_SETCLIENTID, 1, "SetClientID", 11, }, { NFSV4OP_SETCLIENTIDCFRM, 1, "SetClientIDConfirm", 18, }, { NFSV4OP_LOCK, 1, "Lock", 4, }, { NFSV4OP_LOCKU, 1, "LockU", 5, }, { NFSV4OP_OPEN, 2, "Open", 4, }, { NFSV4OP_CLOSE, 1, "Close", 5, }, { NFSV4OP_OPENCONFIRM, 1, "Openconfirm", 11, }, { NFSV4OP_LOCKT, 1, "LockT", 5, }, { NFSV4OP_OPENDOWNGRADE, 1, "Opendowngrade", 13, }, { NFSV4OP_RENEW, 1, "Renew", 5, }, { NFSV4OP_PUTROOTFH, 1, "Dirpath", 7, }, { NFSV4OP_RELEASELCKOWN, 1, "Rellckown", 9, }, { NFSV4OP_DELEGRETURN, 1, "Delegret", 8, }, { NFSV4OP_DELEGRETURN, 3, "DelegRemove", 11, }, { NFSV4OP_DELEGRETURN, 7, "DelegRename1", 12, }, { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, { NFSV4OP_WRITE, 1, "WriteDS", 7, }, { NFSV4OP_READ, 1, "ReadDS", 6, }, { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, { NFSV4OP_OPEN, 3, "OpenLayoutGet", 13, }, { NFSV4OP_OPEN, 8, "CreateLayGet", 12, }, }; /* * NFS RPCS that have large request message size. */ static int nfs_bigrequest[NFSV41_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }; /* * Start building a request. Mostly just put the first file handle in * place. */ APPLESTATIC void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep) { struct mbuf *mb; u_int32_t *tl; int opcnt; nfsattrbit_t attrbits; /* * First, fill in some of the fields of nd. */ nd->nd_slotseq = NULL; if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; if (NFSHASNFSV4N(nmp)) nd->nd_flag |= ND_NFSV41; } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; nd->nd_procnum = procnum; nd->nd_repstat = 0; /* * Get the first mbuf for the request. */ if (nfs_bigrequest[procnum]) NFSMCLGET(mb, M_WAITOK); else NFSMGET(mb); mbuf_setlen(mb, 0); nd->nd_mreq = nd->nd_mb = mb; nd->nd_bpos = NFSMTOD(mb, caddr_t); /* * And fill the first file handle into the request. */ if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; if ((nd->nd_flag & ND_NFSV41) != 0) { opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; if (procnum == NFSPROC_RENEW) /* * For the special case of Renew, just do a * Sequence Op. */ opcnt = 1; else if (procnum == NFSPROC_WRITEDS || procnum == NFSPROC_COMMITDS) /* * For the special case of a Writeor Commit to * a DS, the opcnt == 3, for Sequence, PutFH, * Write/Commit. */ opcnt = 3; } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) *tl++ = txdr_unsigned(NFSV41_MINORVERSION); else *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; *tl = txdr_unsigned(opcnt); if ((nd->nd_flag & ND_NFSV41) != 0 && nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { if (nfsv4_opflag[nfsv4_opmap[procnum].op].loopbadsess > 0) nd->nd_flag |= ND_LOOPBADSESS; NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_SEQUENCE); if (sep == NULL) { sep = nfsmnt_mdssession(nmp); nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } else nfsv4_setsequence(nmp, nd, sep, nfs_bigreply[procnum]); } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh == 2 && procnum != NFSPROC_WRITEDS && procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); /* * For Lookup Ops, we want all the directory * attributes, so we can load the name cache. */ if (procnum == NFSPROC_LOOKUP || procnum == NFSPROC_LOOKUPP) NFSGETATTR_ATTRBIT(&attrbits); else { NFSWCCATTR_ATTRBIT(&attrbits); nd->nd_flag |= ND_V4WCCATTR; } (void) nfsrv_putattrbit(nd, &attrbits); } } if (procnum != NFSPROC_RENEW || (nd->nd_flag & ND_NFSV41) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } if (procnum < NFSV41_NPROCS) NFSINCRGLOBAL(nfsstatsv1.rpccnt[procnum]); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ APPLESTATIC void nfsm_uiombuf(struct nfsrv_descript *nd, struct uio *uiop, int siz) { char *uiocp; struct mbuf *mp, *mp2; int xfer, left, mlen; int uiosiz, clflg, rem; char *cp, *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; mp = mp2 = nd->nd_mb; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else copyin(CAST_USER_ADDR_T(uiocp), NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); mbuf_setlen(mp, mbuf_len(mp) + xfer); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); } cp = NFSMTOD(mp, caddr_t) + mbuf_len(mp); for (left = 0; left < rem; left++) *cp++ = '\0'; mbuf_setlen(mp, mbuf_len(mp) + rem); nd->nd_bpos = cp; } else nd->nd_bpos = NFSMTOD(mp, caddr_t) + mbuf_len(mp); nd->nd_mb = mp; } /* * copies a uio scatter/gather list to an mbuf chain. * This version returns the mbuf list and does not use "nd". * NOTE: can ony handle iovcnt == 1 */ struct mbuf * nfsm_uiombuflist(struct uio *uiop, int siz, struct mbuf **mbp, char **cpp) { char *uiocp; struct mbuf *mp, *mp2, *firstmp; int xfer, left, mlen; int uiosiz, clflg, rem; - char *cp, *tcp; + char *tcp; KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); if (siz > ncl_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = NFSM_RNDUP(siz) - siz; if (clflg != 0) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); firstmp = mp2 = mp; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { if (clflg) NFSMCLGET(mp, M_WAITOK); else NFSMGET(mp); mbuf_setlen(mp, 0); mbuf_setnext(mp2, mp); mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; if (uiop->uio_segflg == UIO_SYSSPACE) NFSBCOPY(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); else copyin(uiocp, NFSMTOD(mp, caddr_t) + mbuf_len(mp), xfer); mbuf_setlen(mp, mbuf_len(mp) + xfer); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } tcp = (char *)uiop->uio_iov->iov_base; tcp += uiosiz; uiop->uio_iov->iov_base = (void *)tcp; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } - if (rem > 0) { - if (rem > M_TRAILINGSPACE(mp)) { - NFSMGET(mp); - mbuf_setlen(mp, 0); - mbuf_setnext(mp2, mp); - } - cp = NFSMTOD(mp, caddr_t) + mbuf_len(mp); - for (left = 0; left < rem; left++) - *cp++ = '\0'; - mbuf_setlen(mp, mbuf_len(mp) + rem); - if (cpp != NULL) - *cpp = cp; - } else if (cpp != NULL) + if (cpp != NULL) *cpp = NFSMTOD(mp, caddr_t) + mbuf_len(mp); if (mbp != NULL) *mbp = mp; return (firstmp); } /* * Load vnode attributes from the xdr file attributes. * Returns EBADRPC if they can't be parsed, 0 otherwise. */ APPLESTATIC int nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap) { struct nfs_fattr *fp; int error = 0; if (nd->nd_flag & ND_NFSV4) { error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V3FATTR); nap->na_type = nfsv34tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); nap->na_rdev = makedev(fxdr_unsigned(u_char, fp->fa3_rdev.specdata1), fxdr_unsigned(u_char, fp->fa3_rdev.specdata2)); nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_hyper(&fp->fa3_size); nap->na_blocksize = NFS_FABLKSIZE; nap->na_bytes = fxdr_hyper(&fp->fa3_used); nap->na_fileid = fxdr_hyper(&fp->fa3_fileid); fxdr_nfsv3time(&fp->fa3_atime, &nap->na_atime); fxdr_nfsv3time(&fp->fa3_ctime, &nap->na_ctime); fxdr_nfsv3time(&fp->fa3_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_filerev = 0; } else { NFSM_DISSECT(fp, struct nfs_fattr *, NFSX_V2FATTR); nap->na_type = nfsv2tov_type(fp->fa_type); nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); if (nap->na_type == VNON || nap->na_type == VREG) nap->na_type = IFTOVT(nap->na_mode); nap->na_rdev = fxdr_unsigned(dev_t, fp->fa2_rdev); /* * Really ugly NFSv2 kludge. */ if (nap->na_type == VCHR && nap->na_rdev == ((dev_t)-1)) nap->na_type = VFIFO; nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_unsigned(u_int32_t, fp->fa2_size); nap->na_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); nap->na_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) * NFS_FABLKSIZE; nap->na_fileid = fxdr_unsigned(uint64_t, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &nap->na_atime); fxdr_nfsv2time(&fp->fa2_mtime, &nap->na_mtime); nap->na_flags = 0; nap->na_ctime.tv_sec = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_sec); nap->na_ctime.tv_nsec = 0; nap->na_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); nap->na_filerev = 0; } nfsmout: return (error); } /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ APPLESTATIC nfsuint64 * nfscl_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { KASSERT(!add, ("nfs getcookie add at 0")); return (&nfs_nullcookie); } pos--; dp = LIST_FIRST(&np->n_cookies); if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return (NULL); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (LIST_NEXT(dp, ndm_list) != NULL) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return (NULL); dp = LIST_NEXT(dp, ndm_list); } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return (NULL); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return (NULL); } return (&dp->ndm_cookies[pos]); } /* * Gets a file handle out of an nfs reply sent to the client and returns * the file handle and the file's attributes. * For V4, it assumes that Getfh and Getattr Op's results are here. */ APPLESTATIC int nfscl_mtofh(struct nfsrv_descript *nd, struct nfsfh **nfhpp, struct nfsvattr *nap, int *attrflagp) { u_int32_t *tl; int error = 0, flag = 1; *nfhpp = NULL; *attrflagp = 0; /* * First get the file handle and vnode. */ if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); flag = fxdr_unsigned(int, *tl); } else if (nd->nd_flag & ND_NFSV4) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); /* If the GetFH failed, clear flag. */ if (*++tl != 0) { nd->nd_flag |= ND_NOMOREDATA; flag = 0; error = ENXIO; /* Return ENXIO so *nfhpp isn't used. */ } } if (flag) { error = nfsm_getfh(nd, nfhpp); if (error) return (error); } /* * Now, get the attributes. */ if (flag != 0 && (nd->nd_flag & ND_NFSV4) != 0) { NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED); if (*++tl != 0) { nd->nd_flag |= ND_NOMOREDATA; flag = 0; } } else if (nd->nd_flag & ND_NFSV3) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (flag) { flag = fxdr_unsigned(int, *tl); } else if (fxdr_unsigned(int, *tl)) { error = nfsm_advance(nd, NFSX_V3FATTR, -1); if (error) return (error); } } if (flag) { error = nfsm_loadattr(nd, nap); if (!error) *attrflagp = 1; } nfsmout: return (error); } /* * Put a state Id in the mbuf list. */ APPLESTATIC void nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) { nfsv4stateid_t *st; NFSM_BUILD(st, nfsv4stateid_t *, NFSX_STATEID); if (flag == NFSSTATEID_PUTALLZERO) { st->seqid = 0; st->other[0] = 0; st->other[1] = 0; st->other[2] = 0; } else if (flag == NFSSTATEID_PUTALLONE) { st->seqid = 0xffffffff; st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; } else if (flag == NFSSTATEID_PUTSEQIDZERO) { st->seqid = 0; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; st->other[1] = stateidp->other[1]; st->other[2] = stateidp->other[2]; } } /* * Initialize the owner/delegation sleep lock. */ APPLESTATIC void nfscl_lockinit(struct nfsv4lock *lckp) { lckp->nfslock_usecnt = 0; lckp->nfslock_lock = 0; } /* * Get an exclusive lock. (Not needed for OpenBSD4, since there is only one * thread for each posix process in the kernel.) */ APPLESTATIC void nfscl_lockexcl(struct nfsv4lock *lckp, void *mutex) { int igotlock; do { igotlock = nfsv4_lock(lckp, 1, NULL, mutex, NULL); } while (!igotlock); } /* * Release an exclusive lock. */ APPLESTATIC void nfscl_lockunlock(struct nfsv4lock *lckp) { nfsv4_unlock(lckp, 0); } /* * Called to derefernce a lock on a stateid (delegation or open owner). */ APPLESTATIC void nfscl_lockderef(struct nfsv4lock *lckp) { NFSLOCKCLSTATE(); lckp->nfslock_usecnt--; if (lckp->nfslock_usecnt == 0 && (lckp->nfslock_lock & NFSV4LOCK_WANTED)) { lckp->nfslock_lock &= ~NFSV4LOCK_WANTED; wakeup((caddr_t)lckp); } NFSUNLOCKCLSTATE(); } Index: projects/runtime-coverage/sys/vm/vm_page.c =================================================================== --- projects/runtime-coverage/sys/vm/vm_page.c (revision 323974) +++ projects/runtime-coverage/sys/vm/vm_page.c (revision 323975) @@ -1,3830 +1,3832 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 */ /*- * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * GENERAL RULES ON VM_PAGE MANIPULATION * * - A page queue lock is required when adding or removing a page from a * page queue regardless of other locks or the busy state of a page. * * * In general, no thread besides the page daemon can acquire or * hold more than one page queue lock at a time. * * * The page daemon can acquire and hold any pair of page queue * locks in any order. * * - The object lock is required when inserting or removing * pages from an object (vm_page_insert() or vm_page_remove()). * */ /* * Resident memory management module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Associated with page of user-allocatable memory is a * page structure. */ struct vm_domain vm_dom[MAXMEMDOM]; struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx; struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; /* * bogus page -- for I/O to/from partially complete buffers, * or for paging into sparsely invalid regions. */ vm_page_t bogus_page; vm_page_t vm_page_array; long vm_page_array_size; long first_page; static int boot_pages = UMA_BOOT_PAGES; SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &boot_pages, 0, "number of pages allocated for bootstrapping the VM system"); static int pa_tryrelock_restart; SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); static TAILQ_HEAD(, vm_page) blacklist_head; static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); /* Is the page daemon waiting for free pages? */ static int vm_pageout_pages_needed; static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); static void vm_page_free_phys(vm_page_t m); static void vm_page_free_wakeup(void); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); static void vm_page_init(void *dummy) { fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); } /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ #if PAGE_SIZE == 32768 #ifdef CTASSERT CTASSERT(sizeof(u_long) >= 8); #endif #endif /* * Try to acquire a physical address lock while a pmap is locked. If we * fail to trylock we unlock and lock the pmap directly and cache the * locked pa in *locked. The caller should then restart their loop in case * the virtual to physical mapping has changed. */ int vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) { vm_paddr_t lockpa; lockpa = *locked; *locked = pa; if (lockpa) { PA_LOCK_ASSERT(lockpa, MA_OWNED); if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) return (0); PA_UNLOCK(lockpa); } if (PA_TRYLOCK(pa)) return (0); PMAP_UNLOCK(pmap); atomic_add_int(&pa_tryrelock_restart, 1); PA_LOCK(pa); PMAP_LOCK(pmap); return (EAGAIN); } /* * vm_set_page_size: * * Sets the page size, perhaps based upon the memory * size. Must be called before any use of page-size * dependent functions. */ void vm_set_page_size(void) { if (vm_cnt.v_page_size == 0) vm_cnt.v_page_size = PAGE_SIZE; if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0) panic("vm_set_page_size: page size not a power of two"); } /* * vm_page_blacklist_next: * * Find the next entry in the provided string of blacklist * addresses. Entries are separated by space, comma, or newline. * If an invalid integer is encountered then the rest of the * string is skipped. Updates the list pointer to the next * character, or NULL if the string is exhausted or invalid. */ static vm_paddr_t vm_page_blacklist_next(char **list, char *end) { vm_paddr_t bad; char *cp, *pos; if (list == NULL || *list == NULL) return (0); if (**list =='\0') { *list = NULL; return (0); } /* * If there's no end pointer then the buffer is coming from * the kenv and we know it's null-terminated. */ if (end == NULL) end = *list + strlen(*list); /* Ensure that strtoq() won't walk off the end */ if (*end != '\0') { if (*end == '\n' || *end == ' ' || *end == ',') *end = '\0'; else { printf("Blacklist not terminated, skipping\n"); *list = NULL; return (0); } } for (pos = *list; *pos != '\0'; pos = cp) { bad = strtoq(pos, &cp, 0); if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') { if (bad == 0) { if (++cp < end) continue; else break; } } else break; if (*cp == '\0' || ++cp >= end) *list = NULL; else *list = cp; return (trunc_page(bad)); } printf("Garbage in RAM blacklist, skipping\n"); *list = NULL; return (0); } /* * vm_page_blacklist_check: * * Iterate through the provided string of blacklist addresses, pulling * each entry out of the physical allocator free list and putting it * onto a list for reporting via the vm.page_blacklist sysctl. */ static void vm_page_blacklist_check(char *list, char *end) { vm_paddr_t pa; vm_page_t m; char *next; int ret; next = list; while (next != NULL) { if ((pa = vm_page_blacklist_next(&next, end)) == 0) continue; m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) continue; mtx_lock(&vm_page_queue_free_mtx); ret = vm_phys_unfree_page(m); mtx_unlock(&vm_page_queue_free_mtx); if (ret == TRUE) { TAILQ_INSERT_TAIL(&blacklist_head, m, listq); if (bootverbose) printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa); } } } /* * vm_page_blacklist_load: * * Search for a special module named "ram_blacklist". It'll be a * plain text file provided by the user via the loader directive * of the same name. */ static void vm_page_blacklist_load(char **list, char **end) { void *mod; u_char *ptr; u_int len; mod = NULL; ptr = NULL; mod = preload_search_by_type("ram_blacklist"); if (mod != NULL) { ptr = preload_fetch_addr(mod); len = preload_fetch_size(mod); } *list = ptr; if (ptr != NULL) *end = ptr + len; else *end = NULL; return; } static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) { vm_page_t m; struct sbuf sbuf; int error, first; first = 1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); TAILQ_FOREACH(m, &blacklist_head, listq) { sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", (uintmax_t)m->phys_addr); first = 0; } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } static void vm_page_domain_init(struct vm_domain *vmd) { struct vm_pagequeue *pq; int i; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = "vm inactive pagequeue"; *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) = &vm_cnt.v_inactive_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = "vm active pagequeue"; *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = &vm_cnt.v_active_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = "vm laundry pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) = &vm_cnt.v_laundry_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = "vm unswappable pagequeue"; /* Unswappable dirty pages are counted as being in the laundry. */ *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) = &vm_cnt.v_laundry_count; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; vmd->vmd_oom = FALSE; for (i = 0; i < PQ_COUNT; i++) { pq = &vmd->vmd_pagequeues[i]; TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); } } /* * vm_page_startup: * * Initializes the resident memory module. Allocates physical memory for * bootstrapping UMA and some data structures that are used to manage * physical pages. Initializes these structures, and populates the free * page queues. */ vm_offset_t vm_page_startup(vm_offset_t vaddr) { struct vm_domain *vmd; struct vm_phys_seg *seg; vm_page_t m; char *list, *listend; vm_offset_t mapped; vm_paddr_t end, high_avail, low_avail, new_end, page_range, size; vm_paddr_t biggestsize, last_pa, pa; u_long pagecount; int biggestone, i, pages_per_zone, segind; biggestsize = 0; biggestone = 0; vaddr = round_page(vaddr); for (i = 0; phys_avail[i + 1]; i += 2) { phys_avail[i] = round_page(phys_avail[i]); phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); } for (i = 0; phys_avail[i + 1]; i += 2) { size = phys_avail[i + 1] - phys_avail[i]; if (size > biggestsize) { biggestone = i; biggestsize = size; } } end = phys_avail[biggestone+1]; /* * Initialize the page and queue locks. */ mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); for (i = 0; i < vm_ndomains; i++) vm_page_domain_init(&vm_dom[i]); /* * Almost all of the pages needed for bootstrapping UMA are used * for zone structures, so if the number of CPUs results in those * structures taking more than one page each, we set aside more pages * in proportion to the zone structure size. */ pages_per_zone = howmany(sizeof(struct uma_zone) + sizeof(struct uma_cache) * (mp_maxid + 1) + roundup2(sizeof(struct uma_slab), sizeof(void *)), UMA_SLAB_SIZE); if (pages_per_zone > 1) { /* Reserve more pages so that we don't run out. */ boot_pages = UMA_BOOT_PAGES_ZONES * pages_per_zone; } /* * Allocate memory for use when boot strapping the kernel memory * allocator. * * CTFLAG_RDTUN doesn't work during the early boot process, so we must * manually fetch the value. */ TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages); new_end = end - (boot_pages * UMA_SLAB_SIZE); new_end = trunc_page(new_end); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)mapped, end - new_end); uma_startup((void *)mapped, boot_pages); #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__i386__) || defined(__mips__) /* * Allocate a bitmap to indicate that a random physical page * needs to be included in a minidump. * * The amd64 port needs this to indicate which direct map pages * need to be dumped, via calls to dump_add_page()/dump_drop_page(). * * However, i386 still needs this workspace internally within the * minidump code. In theory, they are not needed on i386, but are * included should the sf_buf code decide to use them. */ last_pa = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) if (dump_avail[i + 1] > last_pa) last_pa = dump_avail[i + 1]; page_range = last_pa / PAGE_SIZE; vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); new_end -= vm_page_dump_size; vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)vm_page_dump, vm_page_dump_size); #else (void)last_pa; #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) /* * Include the UMA bootstrap pages and vm_page_dump in a crash dump. * When pmap_map() uses the direct map, they are not automatically * included. */ for (pa = new_end; pa < end; pa += PAGE_SIZE) dump_add_page(pa); #endif phys_avail[biggestone + 1] = new_end; #ifdef __amd64__ /* * Request that the physical pages underlying the message buffer be * included in a crash dump. Since the message buffer is accessed * through the direct map, they are not automatically included. */ pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); last_pa = pa + round_page(msgbufsize); while (pa < last_pa) { dump_add_page(pa); pa += PAGE_SIZE; } #endif /* * Compute the number of pages of memory that will be available for * use, taking into account the overhead of a page structure per page. * In other words, solve * "available physical memory" - round_page(page_range * * sizeof(struct vm_page)) = page_range * PAGE_SIZE * for page_range. */ low_avail = phys_avail[0]; high_avail = phys_avail[1]; for (i = 0; i < vm_phys_nsegs; i++) { if (vm_phys_segs[i].start < low_avail) low_avail = vm_phys_segs[i].start; if (vm_phys_segs[i].end > high_avail) high_avail = vm_phys_segs[i].end; } /* Skip the first chunk. It is already accounted for. */ for (i = 2; phys_avail[i + 1] != 0; i += 2) { if (phys_avail[i] < low_avail) low_avail = phys_avail[i]; if (phys_avail[i + 1] > high_avail) high_avail = phys_avail[i + 1]; } first_page = low_avail / PAGE_SIZE; #ifdef VM_PHYSSEG_SPARSE size = 0; for (i = 0; i < vm_phys_nsegs; i++) size += vm_phys_segs[i].end - vm_phys_segs[i].start; for (i = 0; phys_avail[i + 1] != 0; i += 2) size += phys_avail[i + 1] - phys_avail[i]; #elif defined(VM_PHYSSEG_DENSE) size = high_avail - low_avail; #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif #ifdef VM_PHYSSEG_DENSE /* * In the VM_PHYSSEG_DENSE case, the number of pages can account for * the overhead of a page structure per page only if vm_page_array is * allocated from the last physical memory chunk. Otherwise, we must * allocate page structures representing the physical memory * underlying vm_page_array, even though they will not be used. */ if (new_end != high_avail) page_range = size / PAGE_SIZE; else #endif { page_range = size / (PAGE_SIZE + sizeof(struct vm_page)); /* * If the partial bytes remaining are large enough for * a page (PAGE_SIZE) without a corresponding * 'struct vm_page', then new_end will contain an * extra page after subtracting the length of the VM * page array. Compensate by subtracting an extra * page from new_end. */ if (size % (PAGE_SIZE + sizeof(struct vm_page)) >= PAGE_SIZE) { if (new_end == high_avail) high_avail -= PAGE_SIZE; new_end -= PAGE_SIZE; } } end = new_end; /* * Reserve an unmapped guard page to trap access to vm_page_array[-1]. * However, because this page is allocated from KVM, out-of-bounds * accesses using the direct map will not be trapped. */ vaddr += PAGE_SIZE; /* * Allocate physical memory for the page structures, and map it. */ new_end = trunc_page(end - page_range * sizeof(struct vm_page)); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); vm_page_array = (vm_page_t)mapped; vm_page_array_size = page_range; #if VM_NRESERVLEVEL > 0 /* * Allocate physical memory for the reservation management system's * data structures, and map it. */ if (high_avail == end) high_avail = new_end; new_end = vm_reserv_startup(&vaddr, new_end, high_avail); #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) /* * Include vm_page_array and vm_reserv_array in a crash dump. */ for (pa = new_end; pa < end; pa += PAGE_SIZE) dump_add_page(pa); #endif phys_avail[biggestone + 1] = new_end; /* * Add physical memory segments corresponding to the available * physical pages. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); /* * Initialize the physical memory allocator. */ vm_phys_init(); /* * Initialize the page structures and add every available page to the * physical memory allocator's free lists. */ vm_cnt.v_page_count = 0; vm_cnt.v_free_count = 0; for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) vm_phys_init_page(pa); /* * Add the segment to the free lists only if it is covered by * one of the ranges in phys_avail. Because we've added the * ranges to the vm_phys_segs array, we can assume that each * segment is either entirely contained in one of the ranges, * or doesn't overlap any of them. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) { if (seg->start < phys_avail[i] || seg->end > phys_avail[i + 1]) continue; m = seg->first_page; pagecount = (u_long)atop(seg->end - seg->start); mtx_lock(&vm_page_queue_free_mtx); vm_phys_free_contig(m, pagecount); vm_phys_freecnt_adj(m, (int)pagecount); mtx_unlock(&vm_page_queue_free_mtx); vm_cnt.v_page_count += (u_int)pagecount; vmd = &vm_dom[seg->domain]; vmd->vmd_page_count += (u_int)pagecount; vmd->vmd_segs |= 1UL << m->segind; break; } } /* * Remove blacklisted pages from the physical memory allocator. */ TAILQ_INIT(&blacklist_head); vm_page_blacklist_load(&list, &listend); vm_page_blacklist_check(list, listend); list = kern_getenv("vm.blacklist"); vm_page_blacklist_check(list, NULL); freeenv(list); #if VM_NRESERVLEVEL > 0 /* * Initialize the reservation management system. */ vm_reserv_init(); #endif return (vaddr); } void vm_page_reference(vm_page_t m) { vm_page_aflag_set(m, PGA_REFERENCED); } /* * vm_page_busy_downgrade: * * Downgrade an exclusive busy page into a single shared busy page. */ void vm_page_busy_downgrade(vm_page_t m) { u_int x; bool locked; vm_page_assert_xbusied(m); locked = mtx_owned(vm_page_lockptr(m)); for (;;) { x = m->busy_lock; x &= VPB_BIT_WAITERS; if (x != 0 && !locked) vm_page_lock(m); if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1))) break; if (x != 0 && !locked) vm_page_unlock(m); } if (x != 0) { wakeup(m); if (!locked) vm_page_unlock(m); } } /* * vm_page_sbusied: * * Return a positive value if the page is shared busied, 0 otherwise. */ int vm_page_sbusied(vm_page_t m) { u_int x; x = m->busy_lock; return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); } /* * vm_page_sunbusy: * * Shared unbusy a page. */ void vm_page_sunbusy(vm_page_t m) { u_int x; vm_page_lock_assert(m, MA_NOTOWNED); vm_page_assert_sbusied(m); for (;;) { x = m->busy_lock; if (VPB_SHARERS(x) > 1) { if (atomic_cmpset_int(&m->busy_lock, x, x - VPB_ONE_SHARER)) break; continue; } if ((x & VPB_BIT_WAITERS) == 0) { KASSERT(x == VPB_SHARERS_WORD(1), ("vm_page_sunbusy: invalid lock state")); if (atomic_cmpset_int(&m->busy_lock, VPB_SHARERS_WORD(1), VPB_UNBUSIED)) break; continue; } KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS), ("vm_page_sunbusy: invalid lock state for waiters")); vm_page_lock(m); if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) { vm_page_unlock(m); continue; } wakeup(m); vm_page_unlock(m); break; } } /* * vm_page_busy_sleep: * * Sleep and release the page lock, using the page pointer as wchan. * This is used to implement the hard-path of busying mechanism. * * The given page must be locked. * * If nonshared is true, sleep only if the page is xbusy. */ void vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) { u_int x; vm_page_assert_locked(m); x = m->busy_lock; if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) || ((x & VPB_BIT_WAITERS) == 0 && !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) { vm_page_unlock(m); return; } msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); } /* * vm_page_trysbusy: * * Try to shared busy a page. * If the operation succeeds 1 is returned otherwise 0. * The operation never sleeps. */ int vm_page_trysbusy(vm_page_t m) { u_int x; for (;;) { x = m->busy_lock; if ((x & VPB_BIT_SHARED) == 0) return (0); if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER)) return (1); } } static void vm_page_xunbusy_locked(vm_page_t m) { vm_page_assert_xbusied(m); vm_page_assert_locked(m); atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); /* There is a waiter, do wakeup() instead of vm_page_flash(). */ wakeup(m); } void vm_page_xunbusy_maybelocked(vm_page_t m) { bool lockacq; vm_page_assert_xbusied(m); /* * Fast path for unbusy. If it succeeds, we know that there * are no waiters, so we do not need a wakeup. */ if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) return; lockacq = !mtx_owned(vm_page_lockptr(m)); if (lockacq) vm_page_lock(m); vm_page_xunbusy_locked(m); if (lockacq) vm_page_unlock(m); } /* * vm_page_xunbusy_hard: * * Called after the first try the exclusive unbusy of a page failed. * It is assumed that the waiters bit is on. */ void vm_page_xunbusy_hard(vm_page_t m) { vm_page_assert_xbusied(m); vm_page_lock(m); vm_page_xunbusy_locked(m); vm_page_unlock(m); } /* * vm_page_flash: * * Wakeup anyone waiting for the page. * The ownership bits do not change. * * The given page must be locked. */ void vm_page_flash(vm_page_t m) { u_int x; vm_page_lock_assert(m, MA_OWNED); for (;;) { x = m->busy_lock; if ((x & VPB_BIT_WAITERS) == 0) return; if (atomic_cmpset_int(&m->busy_lock, x, x & (~VPB_BIT_WAITERS))) break; } wakeup(m); } /* * Avoid releasing and reacquiring the same page lock. */ void vm_page_change_lock(vm_page_t m, struct mtx **mtx) { struct mtx *mtx1; mtx1 = vm_page_lockptr(m); if (*mtx == mtx1) return; if (*mtx != NULL) mtx_unlock(*mtx); *mtx = mtx1; mtx_lock(mtx1); } /* * Keep page from being freed by the page daemon * much of the same effect as wiring, except much lower * overhead and should be used only for *very* temporary * holding ("wiring"). */ void vm_page_hold(vm_page_t mem) { vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } void vm_page_unhold(vm_page_t mem) { vm_page_lock_assert(mem, MA_OWNED); KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!")); --mem->hold_count; if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0) vm_page_free_toq(mem); } /* * vm_page_unhold_pages: * * Unhold each of the pages that is referenced by the given array. */ void vm_page_unhold_pages(vm_page_t *ma, int count) { struct mtx *mtx; mtx = NULL; for (; count != 0; count--) { vm_page_change_lock(*ma, &mtx); vm_page_unhold(*ma); ma++; } if (mtx != NULL) mtx_unlock(mtx); } vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa) { vm_page_t m; #ifdef VM_PHYSSEG_SPARSE m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) m = vm_phys_fictitious_to_vm_page(pa); return (m); #elif defined(VM_PHYSSEG_DENSE) long pi; pi = atop(pa); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { m = &vm_page_array[pi - first_page]; return (m); } return (vm_phys_fictitious_to_vm_page(pa)); #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif } /* * vm_page_getfake: * * Create a fictitious page with the specified physical address and * memory attribute. The memory attribute is the only the machine- * dependent aspect of a fictitious page that must be initialized. */ vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) { vm_page_t m; m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); vm_page_initfake(m, paddr, memattr); return (m); } void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { if ((m->flags & PG_FICTITIOUS) != 0) { /* * The page's memattr might have changed since the * previous initialization. Update the pmap to the * new memattr. */ goto memattr; } m->phys_addr = paddr; m->queue = PQ_NONE; /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; m->wire_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); } /* * vm_page_putfake: * * Release a fictitious page. */ void vm_page_putfake(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_putfake: bad page %p", m)); uma_zfree(fakepg_zone, m); } /* * vm_page_updatefake: * * Update the given fictitious page to the specified physical address and * memory attribute. */ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_updatefake: bad page %p", m)); m->phys_addr = paddr; pmap_page_set_memattr(m, memattr); } /* * vm_page_free: * * Free a page. */ void vm_page_free(vm_page_t m) { m->flags &= ~PG_ZERO; vm_page_free_toq(m); } /* * vm_page_free_zero: * * Free a page to the zerod-pages queue */ void vm_page_free_zero(vm_page_t m) { m->flags |= PG_ZERO; vm_page_free_toq(m); } /* * Unbusy and handle the page queueing for a page from a getpages request that * was optionally read ahead or behind. */ void vm_page_readahead_finish(vm_page_t m) { /* We shouldn't put invalid pages on queues. */ KASSERT(m->valid != 0, ("%s: %p is invalid", __func__, m)); /* * Since the page is not the actually needed one, whether it should * be activated or deactivated is not obvious. Empirical results * have shown that deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); vm_page_unlock(m); vm_page_xunbusy(m); } /* * vm_page_sleep_if_busy: * * Sleep and release the page queues lock if the page is busied. * Returns TRUE if the thread slept. * * The given page must be unlocked and object containing it must * be locked. */ int vm_page_sleep_if_busy(vm_page_t m, const char *msg) { vm_object_t obj; vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); if (vm_page_busied(m)) { /* * The page-specific object must be cached because page * identity can change during the sleep, causing the * re-lock of a different object. * It is assumed that a reference to the object is already * held by the callers. */ obj = m->object; vm_page_lock(m); VM_OBJECT_WUNLOCK(obj); vm_page_busy_sleep(m, msg, false); VM_OBJECT_WLOCK(obj); return (TRUE); } return (FALSE); } /* * vm_page_dirty_KBI: [ internal use only ] * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). * * This function should only be called by vm_page_dirty(). */ void vm_page_dirty_KBI(vm_page_t m) { /* Refer to this operation by its public name. */ KASSERT(m->valid == VM_PAGE_BITS_ALL, ("vm_page_dirty: page is invalid!")); m->dirty = VM_PAGE_BITS_ALL; } /* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. * * The object must be locked. */ int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { vm_page_t mpred; VM_OBJECT_ASSERT_WLOCKED(object); mpred = vm_radix_lookup_le(&object->rtree, pindex); return (vm_page_insert_after(m, object, pindex, mpred)); } /* * vm_page_insert_after: * * Inserts the page "m" into the specified object at offset "pindex". * * The page "mpred" must immediately precede the offset "pindex" within * the specified object. * * The object must be locked. */ static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) { vm_page_t msucc; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(m->object == NULL, ("vm_page_insert_after: page already inserted")); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); msucc = TAILQ_NEXT(mpred, listq); } else msucc = TAILQ_FIRST(&object->memq); if (msucc != NULL) KASSERT(msucc->pindex > pindex, ("vm_page_insert_after: msucc doesn't succeed pindex")); /* * Record the object/offset pair in this page */ m->object = object; m->pindex = pindex; /* * Now link into the object's ordered list of backed pages. */ if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; return (1); } vm_page_insert_radixdone(m, object, mpred); return (0); } /* * vm_page_insert_radixdone: * * Complete page "m" insertion into the specified object after the * radix trie hooking. * * The page "mpred" must precede the offset "m->pindex" within the * specified object. * * The object must be locked. */ static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); } if (mpred != NULL) TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); else TAILQ_INSERT_HEAD(&object->memq, m, listq); /* * Show that the object has one more resident page. */ object->resident_page_count++; /* * Hold the vnode until the last page is released. */ if (object->resident_page_count == 1 && object->type == OBJT_VNODE) vhold(object->handle); /* * Since we are inserting a new and possibly dirty page, * update the object's OBJ_MIGHTBEDIRTY flag. */ if (pmap_page_is_write_mapped(m)) vm_object_set_writeable_dirty(object); } /* * vm_page_remove: * * Removes the specified page from its containing object, but does not * invalidate any backing storage. * * The object must be locked. The page must be locked if it is managed. */ void vm_page_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); if ((object = m->object) == NULL) return; VM_OBJECT_ASSERT_WLOCKED(object); if (vm_page_xbusied(m)) vm_page_xunbusy_maybelocked(m); mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. */ TAILQ_REMOVE(&object->memq, m, listq); /* * And show that the object has one fewer resident page. */ object->resident_page_count--; /* * The vnode may now be recycled. */ if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); m->object = NULL; } /* * vm_page_lookup: * * Returns the page associated with the object/offset * pair specified; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { VM_OBJECT_ASSERT_LOCKED(object); return (vm_radix_lookup(&object->rtree, pindex)); } /* * vm_page_find_least: * * Returns the page associated with the object with least pindex * greater than or equal to the parameter pindex, or NULL. * * The object must be locked. */ vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; VM_OBJECT_ASSERT_LOCKED(object); if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) m = vm_radix_lookup_ge(&object->rtree, pindex); return (m); } /* * Returns the given page's successor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_next(vm_page_t m) { vm_page_t next; VM_OBJECT_ASSERT_LOCKED(m->object); if ((next = TAILQ_NEXT(m, listq)) != NULL) { MPASS(next->object == m->object); if (next->pindex != m->pindex + 1) next = NULL; } return (next); } /* * Returns the given page's predecessor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_prev(vm_page_t m) { vm_page_t prev; VM_OBJECT_ASSERT_LOCKED(m->object); if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) { MPASS(prev->object == m->object); if (prev->pindex != m->pindex - 1) prev = NULL; } return (prev); } /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. * * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) { vm_page_t mold; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(mnew->object == NULL, ("vm_page_replace: page already in object")); /* * This function mostly follows vm_page_insert() and * vm_page_remove() without the radix, object count and vnode * dance. Double check such functions for more comments. */ mnew->object = object; mnew->pindex = pindex; mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: mold is on a paging queue")); /* Keep the resident page list in sorted order. */ TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; vm_page_xunbusy_maybelocked(mold); /* * The object's resident_page_count does not change because we have * swapped one page for another, but OBJ_MIGHTBEDIRTY. */ if (pmap_page_is_write_mapped(mnew)) vm_object_set_writeable_dirty(object); return (mold); } /* * vm_page_rename: * * Move the given memory entry from its * current object to the specified target object/offset. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the * page, (2) we are dirtying the page, (3) the VM system is probably * moving the page from object A to B, and will then later move * the backing store from A to B and we can't have a conflict. * * Note: we *always* dirty the page. It is necessary both for the * fact that we moved it, and because we may be invalidating * swap. * * The objects must be locked. */ int vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { vm_page_t mpred; vm_pindex_t opidx; VM_OBJECT_ASSERT_WLOCKED(new_object); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); /* * Create a custom version of vm_page_insert() which does not depend * by m_prev and can cheat on the implementation aspects of the * function. */ opidx = m->pindex; m->pindex = new_pindex; if (vm_radix_insert(&new_object->rtree, m)) { m->pindex = opidx; return (1); } /* * The operation cannot fail anymore. The removal must happen before * the listq iterator is tainted. */ m->pindex = opidx; vm_page_lock(m); vm_page_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; vm_page_unlock(m); vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); return (0); } /* * vm_page_alloc: * * Allocate and return a page that is associated with the specified * object and offset pair. By default, this page is exclusive busied. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { return (vm_page_alloc_after(object, pindex, req, object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) : NULL)); } /* * Allocate a page in the specified object with the given page index. To * optimize insertion of the page into the object, the caller must also specifiy * the resident page in the object with largest index smaller than the given * page index, or NULL if no such page exists. */ vm_page_t vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, vm_page_t mpred) { vm_page_t m; int flags, req_class; KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("inconsistent object(%p)/req(%x)", object, req)); KASSERT(mpred == NULL || mpred->pindex < pindex, ("mpred %p doesn't precede pindex 0x%jx", mpred, (uintmax_t)pindex)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Allocate a page if the number of free pages exceeds the minimum * for the request class. */ mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count > 0)) { /* * Can we allocate the page from a reservation? */ #if VM_NRESERVLEVEL > 0 if (object == NULL || (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) != OBJ_COLORED || (m = vm_reserv_alloc_page(object, pindex, mpred)) == NULL) #endif { /* * If not, allocate it from the free page queues. */ m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); #if VM_NRESERVLEVEL > 0 if (m == NULL && vm_reserv_reclaim_inactive()) { m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); } #endif } } else { /* * Not allocatable, give up. */ mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } /* * At this point we had better have found a good page. */ KASSERT(m != NULL, ("missing page")); vm_phys_freecnt_adj(m, -1); mtx_unlock(&vm_page_queue_free_mtx); vm_page_alloc_check(m); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; flags &= m->flags; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; m->flags = flags; m->aflags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; m->busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) m->busy_lock = VPB_SINGLE_EXCLUSIVER; if ((req & VM_ALLOC_SBUSY) != 0) m->busy_lock = VPB_SHARERS_WORD(1); if (req & VM_ALLOC_WIRED) { /* * The page lock is not required for wiring a page until that * page is inserted into the object. */ atomic_add_int(&vm_cnt.v_wire_count, 1); m->wire_count = 1; } m->act_count = 0; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { pagedaemon_wakeup(); if (req & VM_ALLOC_WIRED) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); m->wire_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); return (NULL); } /* Ignore device objects; the pager sets "memattr" for them. */ if (object->memattr != VM_MEMATTR_DEFAULT && (object->flags & OBJ_FICTITIOUS) == 0) pmap_page_set_memattr(m, object->memattr); } else m->pindex = pindex; /* * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. */ if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } /* * vm_page_alloc_contig: * * Allocate a contiguous set of physical pages of the given size "npages" * from the free lists. All of the physical pages must be at or above * the given physical address "low" and below the given physical address * "high". The given value "alignment" determines the alignment of the * first physical page in the set. If the given value "boundary" is * non-zero, then the set of physical pages cannot cross any physical * address boundary that is a multiple of that value. Both "alignment" * and "boundary" must be a power of two. * * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, * then the memory attribute setting for the physical pages is configured * to the object's memory attribute setting. Otherwise, the memory * attribute setting for the physical pages is configured to "memattr", * overriding the object's memory attribute setting. However, if the * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the * memory attribute setting for the physical pages cannot be configured * to VM_MEMATTR_DEFAULT. * * The specified object may not contain fictitious pages. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; int req_class; mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, req)); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_FICTITIOUS) == 0, ("vm_page_alloc_contig: object %p has fictitious pages", object)); } KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, ("vm_page_alloc_contig: pindex already allocated")); } /* * Can we allocate the pages without the number of free pages falling * below the lower bound for the allocation class? */ mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count >= npages)) { /* * Can we allocate the pages from a reservation? */ #if VM_NRESERVLEVEL > 0 retry: if (object == NULL || (object->flags & OBJ_COLORED) == 0 || (m_ret = vm_reserv_alloc_contig(object, pindex, npages, low, high, alignment, boundary, mpred)) == NULL) #endif /* * If not, allocate them from the free page queues. */ m_ret = vm_phys_alloc_contig(npages, low, high, alignment, boundary); } else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, npages); pagedaemon_wakeup(); return (NULL); } if (m_ret != NULL) vm_phys_freecnt_adj(m_ret, -npages); else { #if VM_NRESERVLEVEL > 0 if (vm_reserv_reclaim_contig(npages, low, high, alignment, boundary)) goto retry; #endif } mtx_unlock(&vm_page_queue_free_mtx); if (m_ret == NULL) return (NULL); for (m = m_ret; m < &m_ret[npages]; m++) vm_page_alloc_check(m); /* * Initialize the pages. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) busy_lock = VPB_SINGLE_EXCLUSIVER; if ((req & VM_ALLOC_SBUSY) != 0) busy_lock = VPB_SHARERS_WORD(1); if ((req & VM_ALLOC_WIRED) != 0) atomic_add_int(&vm_cnt.v_wire_count, npages); if (object != NULL) { if (object->memattr != VM_MEMATTR_DEFAULT && memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; } for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 1; m->act_count = 0; m->oflags = oflags; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { pagedaemon_wakeup(); if ((req & VM_ALLOC_WIRED) != 0) atomic_subtract_int( &vm_cnt.v_wire_count, npages); KASSERT(m->object == NULL, ("page %p has object", m)); mpred = m; for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) m->wire_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); } return (NULL); } mpred = m; } else m->pindex = pindex; if (memattr != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, memattr); pindex++; } if (vm_paging_needed()) pagedaemon_wakeup(); return (m_ret); } /* * Check a page that has been freshly dequeued from a freelist. */ static void vm_page_alloc_check(vm_page_t m) { KASSERT(m->object == NULL, ("page %p has object", m)); KASSERT(m->queue == PQ_NONE, ("page %p has unexpected queue %d", m, m->queue)); KASSERT(m->wire_count == 0, ("page %p is wired", m)); KASSERT(m->hold_count == 0, ("page %p is held", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); KASSERT(m->valid == 0, ("free page %p is valid", m)); } /* * vm_page_alloc_freelist: * * Allocate a physical page from the specified free page list. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc_freelist(int flind, int req) { vm_page_t m; u_int flags; int req_class; req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Do not allocate reserved pages unless the req has asked for it. */ mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count > 0)) m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } if (m == NULL) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } vm_phys_freecnt_adj(m, -1); mtx_unlock(&vm_page_queue_free_mtx); vm_page_alloc_check(m); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ m->aflags = 0; flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; m->flags &= flags; if ((req & VM_ALLOC_WIRED) != 0) { /* * The page lock is not required for wiring a page that does * not belong to an object. */ atomic_add_int(&vm_cnt.v_wire_count, 1); m->wire_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } #define VPSC_ANY 0 /* No restrictions. */ #define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ #define VPSC_NOSUPER 2 /* Skip superpages. */ /* * vm_page_scan_contig: * * Scan vm_page_array[] between the specified entries "m_start" and * "m_end" for a run of contiguous physical pages that satisfy the * specified conditions, and return the lowest page in the run. The * specified "alignment" determines the alignment of the lowest physical * page in the run. If the specified "boundary" is non-zero, then the * run of physical pages cannot span a physical address that is a * multiple of "boundary". * * "m_end" is never dereferenced, so it need not point to a vm_page * structure within vm_page_array[]. * * "npages" must be greater than zero. "m_start" and "m_end" must not * span a hole (or discontiguity) in the physical address space. Both * "alignment" and "boundary" must be a power of two. */ vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options) { struct mtx *m_mtx; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_run; #if VM_NRESERVLEVEL > 0 int level; #endif int m_inc, order, run_ext, run_len; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); m_run = NULL; run_len = 0; m_mtx = NULL; for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, ("page %p is PG_FICTITIOUS or PG_MARKER", m)); /* * If the current page would be the start of a run, check its * physical address against the end, alignment, and boundary * conditions. If it doesn't satisfy these conditions, either * terminate the scan or advance to the next page that * satisfies the failed condition. */ if (run_len == 0) { KASSERT(m_run == NULL, ("m_run != NULL")); if (m + npages > m_end) break; pa = VM_PAGE_TO_PHYS(m); if ((pa & (alignment - 1)) != 0) { m_inc = atop(roundup2(pa, alignment) - pa); continue; } if (rounddown2(pa ^ (pa + ptoa(npages) - 1), boundary) != 0) { m_inc = atop(roundup2(pa, boundary) - pa); continue; } } else KASSERT(m_run != NULL, ("m_run == NULL")); vm_page_change_lock(m, &m_mtx); m_inc = 1; retry: if (m->wire_count != 0 || m->hold_count != 0) run_ext = 0; #if VM_NRESERVLEVEL > 0 else if ((level = vm_reserv_level(m)) >= 0 && (options & VPSC_NORESERV) != 0) { run_ext = 0; /* Advance to the end of the reservation. */ pa = VM_PAGE_TO_PHYS(m); m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - pa); } #endif else if ((object = m->object) != NULL) { /* * The page is considered eligible for relocation if * and only if it could be laundered or reclaimed by * the page daemon. */ if (!VM_OBJECT_TRYRLOCK(object)) { mtx_unlock(m_mtx); VM_OBJECT_RLOCK(object); mtx_lock(m_mtx); if (m->object != object) { /* * The page may have been freed. */ VM_OBJECT_RUNLOCK(object); goto retry; } else if (m->wire_count != 0 || m->hold_count != 0) { run_ext = 0; goto unlock; } } KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("page %p is PG_UNHOLDFREE", m)); /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE) { run_ext = 0; #if VM_NRESERVLEVEL > 0 } else if ((options & VPSC_NOSUPER) != 0 && (level = vm_reserv_level_iffullpop(m)) >= 0) { run_ext = 0; /* Advance to the end of the superpage. */ pa = VM_PAGE_TO_PHYS(m); m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && m->queue != PQ_NONE && !vm_page_busied(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one * page. */ KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); KASSERT((m->oflags & (VPO_SWAPINPROG | VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, ("page %p has unexpected oflags", m)); /* Don't care: VPO_NOSYNC. */ run_ext = 1; } else run_ext = 0; unlock: VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { /* * The page is reserved but not yet allocated. In * other words, it is still free. Extend the current * run by one page. */ run_ext = 1; #endif } else if ((order = m->order) < VM_NFREEORDER) { /* * The page is enqueued in the physical memory * allocator's free page queues. Moreover, it is the * first page in a power-of-two-sized run of * contiguous free pages. Add these pages to the end * of the current run, and jump ahead. */ run_ext = 1 << order; m_inc = 1 << order; } else { /* * Skip the page for one of the following reasons: (1) * It is enqueued in the physical memory allocator's * free page queues. However, it is not the first * page in a run of contiguous free pages. (This case * rarely occurs because the scan is performed in * ascending order.) (2) It is not reserved, and it is * transitioning from free to allocated. (Conversely, * the transition from allocated to free for managed * pages is blocked by the page lock.) (3) It is * allocated but not contained by an object and not * wired, e.g., allocated by Xen's balloon driver. */ run_ext = 0; } /* * Extend or reset the current run of pages. */ if (run_ext > 0) { if (run_len == 0) m_run = m; run_len += run_ext; } else { if (run_len > 0) { m_run = NULL; run_len = 0; } } } if (m_mtx != NULL) mtx_unlock(m_mtx); if (run_len >= npages) return (m_run); return (NULL); } /* * vm_page_reclaim_run: * * Try to relocate each of the allocated virtual pages within the * specified run of physical pages to a new physical address. Free the * physical pages underlying the relocated virtual pages. A virtual page * is relocatable if and only if it could be laundered or reclaimed by * the page daemon. Whenever possible, a virtual page is relocated to a * physical address above "high". * * Returns 0 if every physical page within the run was already free or * just freed by a successful relocation. Otherwise, returns a non-zero * value indicating why the last attempt to relocate a virtual page was * unsuccessful. * * "req_class" must be an allocation class. */ static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high) { struct mtx *m_mtx; struct spglist free; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_end, m_new; int error, order, req; KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class, ("req_class is not an allocation class")); SLIST_INIT(&free); error = 0; m = m_run; m_end = m_run + npages; m_mtx = NULL; for (; error == 0 && m < m_end; m++) { KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, ("page %p is PG_FICTITIOUS or PG_MARKER", m)); /* * Avoid releasing and reacquiring the same page lock. */ vm_page_change_lock(m, &m_mtx); retry: if (m->wire_count != 0 || m->hold_count != 0) error = EBUSY; else if ((object = m->object) != NULL) { /* * The page is relocated if and only if it could be * laundered or reclaimed by the page daemon. */ if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(m_mtx); VM_OBJECT_WLOCK(object); mtx_lock(m_mtx); if (m->object != object) { /* * The page may have been freed. */ VM_OBJECT_WUNLOCK(object); goto retry; } else if (m->wire_count != 0 || m->hold_count != 0) { error = EBUSY; goto unlock; } } KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("page %p is PG_UNHOLDFREE", m)); /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE) error = EINVAL; else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (m->queue != PQ_NONE && !vm_page_busied(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); KASSERT((m->oflags & (VPO_SWAPINPROG | VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, ("page %p has unexpected oflags", m)); /* Don't care: VPO_NOSYNC. */ if (m->valid != 0) { /* * First, try to allocate a new page * that is above "high". Failing * that, try to allocate a new page * that is below "m_run". Allocate * the new page between the end of * "m_run" and "high" only as a last * resort. */ req = req_class | VM_ALLOC_NOOBJ; if ((m->flags & PG_NODUMP) != 0) req |= VM_ALLOC_NODUMP; if (trunc_page(high) != ~(vm_paddr_t)PAGE_MASK) { m_new = vm_page_alloc_contig( NULL, 0, req, 1, round_page(high), ~(vm_paddr_t)0, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } else m_new = NULL; if (m_new == NULL) { pa = VM_PAGE_TO_PHYS(m_run); m_new = vm_page_alloc_contig( NULL, 0, req, 1, 0, pa - 1, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } if (m_new == NULL) { pa += ptoa(npages); m_new = vm_page_alloc_contig( NULL, 0, req, 1, pa, high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } if (m_new == NULL) { error = ENOMEM; goto unlock; } KASSERT(m_new->wire_count == 0, ("page %p is wired", m)); /* * Replace "m" with the new page. For * vm_page_replace(), "m" must be busy * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ if (object->ref_count != 0) pmap_remove_all(m); m_new->aflags = m->aflags; KASSERT(m_new->oflags == VPO_UNMANAGED, ("page %p is managed", m)); m_new->oflags = m->oflags & VPO_NOSYNC; pmap_copy_page(m, m_new); m_new->valid = m->valid; m_new->dirty = m->dirty; m->flags &= ~PG_ZERO; vm_page_xbusy(m); vm_page_remque(m); vm_page_replace_checked(m_new, object, m->pindex, m); m->valid = 0; vm_page_undirty(m); /* * The new page must be deactivated * before the object is unlocked. */ vm_page_change_lock(m_new, &m_mtx); vm_page_deactivate(m_new); } else { m->flags &= ~PG_ZERO; vm_page_remque(m); vm_page_remove(m); KASSERT(m->dirty == 0, ("page %p is dirty", m)); } SLIST_INSERT_HEAD(&free, m, plinks.s.ss); } else error = EBUSY; unlock: VM_OBJECT_WUNLOCK(object); } else { mtx_lock(&vm_page_queue_free_mtx); order = m->order; if (order < VM_NFREEORDER) { /* * The page is enqueued in the physical memory * allocator's free page queues. Moreover, it * is the first page in a power-of-two-sized * run of contiguous free pages. Jump ahead * to the last page within that run, and * continue from there. */ m += (1 << order) - 1; } #if VM_NRESERVLEVEL > 0 else if (vm_reserv_is_page_free(m)) order = 0; #endif mtx_unlock(&vm_page_queue_free_mtx); if (order == VM_NFREEORDER) error = EINVAL; } } if (m_mtx != NULL) mtx_unlock(m_mtx); if ((m = SLIST_FIRST(&free)) != NULL) { mtx_lock(&vm_page_queue_free_mtx); do { SLIST_REMOVE_HEAD(&free, plinks.s.ss); vm_page_free_phys(m); } while ((m = SLIST_FIRST(&free)) != NULL); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); } return (error); } #define NRUNS 16 CTASSERT(powerof2(NRUNS)); #define RUN_INDEX(count) ((count) & (NRUNS - 1)) #define MIN_RECLAIM 8 /* * vm_page_reclaim_contig: * * Reclaim allocated, contiguous physical memory satisfying the specified * conditions by relocating the virtual pages using that physical memory. * Returns true if reclamation is successful and false otherwise. Since * relocation requires the allocation of physical pages, reclamation may * fail due to a shortage of free pages. When reclamation fails, callers * are expected to perform VM_WAIT before retrying a failed allocation * operation, e.g., vm_page_alloc_contig(). * * The caller must always specify an allocation class through "req". * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * The optional allocation flags are ignored. * * "npages" must be greater than zero. Both "alignment" and "boundary" * must be a power of two. */ bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t curr_low; vm_page_t m_run, m_runs[NRUNS]; u_long count, reclaimed; int error, i, options, req_class; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Return if the number of free pages cannot satisfy the requested * allocation. */ count = vm_cnt.v_free_count; if (count < npages + vm_cnt.v_free_reserved || (count < npages + vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || (count < npages && req_class == VM_ALLOC_INTERRUPT)) return (false); /* * Scan up to three times, relaxing the restrictions ("options") on * the reclamation of reservations and superpages each time. */ for (options = VPSC_NORESERV;;) { /* * Find the highest runs that satisfy the given constraints * and restrictions, and record them in "m_runs". */ curr_low = low; count = 0; for (;;) { m_run = vm_phys_scan_contig(npages, curr_low, high, alignment, boundary, options); if (m_run == NULL) break; curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); m_runs[RUN_INDEX(count)] = m_run; count++; } /* * Reclaim the highest runs in LIFO (descending) order until * the number of reclaimed pages, "reclaimed", is at least * MIN_RECLAIM. Reset "reclaimed" each time because each * reclamation is idempotent, and runs will (likely) recur * from one scan to the next as restrictions are relaxed. */ reclaimed = 0; for (i = 0; count > 0 && i < NRUNS; i++) { count--; m_run = m_runs[RUN_INDEX(count)]; error = vm_page_reclaim_run(req_class, npages, m_run, high); if (error == 0) { reclaimed += npages; if (reclaimed >= MIN_RECLAIM) return (true); } } /* * Either relax the restrictions on the next scan or return if * the last scan had no restrictions. */ if (options == VPSC_NORESERV) options = VPSC_NOSUPER; else if (options == VPSC_NOSUPER) options = VPSC_ANY; else if (options == VPSC_ANY) return (reclaimed != 0); } } /* * vm_wait: (also see VM_WAIT macro) * * Sleep until free pages are available for allocation. * - Called in various places before memory allocations. */ void vm_wait(void) { mtx_lock(&vm_page_queue_free_mtx); if (curproc == pageproc) { vm_pageout_pages_needed = 1; msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, PDROP | PSWP, "VMWait", 0); } else { if (__predict_false(pageproc == NULL)) panic("vm_wait in early boot"); if (!vm_pageout_wanted) { vm_pageout_wanted = true; wakeup(&vm_pageout_wanted); } vm_pages_needed = true; msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM, "vmwait", 0); } } /* * vm_waitpfault: (also see VM_WAITPFAULT macro) * * Sleep until free pages are available for allocation. * - Called only in vm_fault so that processes page faulting * can be easily tracked. * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing * processes will be able to grab memory first. Do not change * this balance without careful testing first. */ void vm_waitpfault(void) { mtx_lock(&vm_page_queue_free_mtx); if (!vm_pageout_wanted) { vm_pageout_wanted = true; wakeup(&vm_pageout_wanted); } vm_pages_needed = true; msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER, "pfault", 0); } struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { if (vm_page_in_laundry(m)) return (&vm_dom[0].vmd_pagequeues[m->queue]); else return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); } /* * vm_page_dequeue: * * Remove the given page from its current page queue. * * The page must be locked. */ void vm_page_dequeue(vm_page_t m) { struct vm_pagequeue *pq; vm_page_assert_locked(m); KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); vm_pagequeue_unlock(pq); } /* * vm_page_dequeue_locked: * * Remove the given page from its current page queue. * * The page and page queue must be locked. */ void vm_page_dequeue_locked(vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); } /* * vm_page_enqueue: * * Add the given page to the specified page queue. * * The page must be locked. */ static void vm_page_enqueue(uint8_t queue, vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE) pq = &vm_dom[0].vmd_pagequeues[queue]; else pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); m->queue = queue; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } /* * vm_page_requeue: * * Move the given page to the tail of its current page queue. * * The page must be locked. */ void vm_page_requeue(vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_unlock(pq); } /* * vm_page_requeue_locked: * * Move the given page to the tail of its current page queue. * * The page queue must be locked. */ void vm_page_requeue_locked(vm_page_t m) { struct vm_pagequeue *pq; KASSERT(m->queue != PQ_NONE, ("vm_page_requeue_locked: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); } /* * vm_page_activate: * * Put the specified page on the active list (if appropriate). * Ensure that act_count is at least ACT_INIT but do not otherwise * mess with it. * * The page must be locked. */ void vm_page_activate(vm_page_t m) { int queue; vm_page_lock_assert(m, MA_OWNED); if ((queue = m->queue) != PQ_ACTIVE) { if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_ACTIVE, m); } else KASSERT(queue == PQ_NONE, ("vm_page_activate: wired page %p is queued", m)); } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; } } /* * vm_page_free_wakeup: * * Helper routine for vm_page_free_toq(). This routine is called * when a page is added to the free queues. * * The page queues must be locked. */ static void vm_page_free_wakeup(void) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); /* * if pageout daemon needs pages, then tell it that there are * some free. */ if (vm_pageout_pages_needed && vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) { wakeup(&vm_pageout_pages_needed); vm_pageout_pages_needed = 0; } /* * wakeup processes that are waiting on memory if we hit a * high water mark. And wakeup scheduler process if we have * lots of memory. this process will swapin processes. */ if (vm_pages_needed && !vm_page_count_min()) { vm_pages_needed = false; wakeup(&vm_cnt.v_free_count); } } /* * vm_page_free_prep: * * Prepares the given page to be put on the free list, * disassociating it from any VM object. The caller may return * the page to the free list only if this function returns true. * * The object must be locked. The page must be locked if it is * managed. For a queued managed page, the pagequeue_locked * argument specifies whether the page queue is already locked. */ bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked) { if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_toq: freeing mapped page %p", m)); } else KASSERT(m->queue == PQ_NONE, ("vm_page_free_toq: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); if (vm_page_sbusied(m)) panic("vm_page_free: freeing busy page %p", m); /* * Unqueue, then remove page. Note that we cannot destroy * the page here because we do not want to call the pager's * callback routine until after we've put the page on the * appropriate free queue. */ if (m->queue != PQ_NONE) { if (pagequeue_locked) vm_page_dequeue_locked(m); else vm_page_dequeue(m); } vm_page_remove(m); /* * If fictitious remove object association and * return, otherwise delay object association removal. */ if ((m->flags & PG_FICTITIOUS) != 0) return (false); m->valid = 0; vm_page_undirty(m); if (m->wire_count != 0) panic("vm_page_free: freeing wired page %p", m); if (m->hold_count != 0) { m->flags &= ~PG_ZERO; KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("vm_page_free: freeing PG_UNHOLDFREE page %p", m)); m->flags |= PG_UNHOLDFREE; return (false); } /* * Restore the default memory attribute to the page. */ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); return (true); } /* * Insert the page into the physical memory allocator's free page * queues. This is the last step to free a page. */ static void vm_page_free_phys(vm_page_t m) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_phys_freecnt_adj(m, 1); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) #endif vm_phys_free_pages(m, 0); } void vm_page_free_phys_pglist(struct pglist *tq) { vm_page_t m; mtx_lock(&vm_page_queue_free_mtx); TAILQ_FOREACH(m, tq, listq) vm_page_free_phys(m); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); } /* * vm_page_free_toq: * * Returns the given page to the free list, disassociating it * from any VM object. * * The object must be locked. The page must be locked if it is * managed. */ void vm_page_free_toq(vm_page_t m) { if (!vm_page_free_prep(m, false)) return; mtx_lock(&vm_page_queue_free_mtx); vm_page_free_phys(m); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); } /* * vm_page_wire: * * Mark this page as wired down by yet * another map, removing it from paging queues * as necessary. * * If the page is fictitious, then its wire count must remain one. * * The page must be locked. */ void vm_page_wire(vm_page_t m) { /* * Only bump the wire statistics if the page is not already wired, * and only unqueue the page if it is on some queue (if it is unmanaged * it is already off the queues). */ vm_page_lock_assert(m, MA_OWNED); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_wire: fictitious page %p's wire count isn't one", m)); return; } if (m->wire_count == 0) { KASSERT((m->oflags & VPO_UNMANAGED) == 0 || m->queue == PQ_NONE, ("vm_page_wire: unmanaged page %p is queued", m)); vm_page_remque(m); atomic_add_int(&vm_cnt.v_wire_count, 1); } m->wire_count++; KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } /* * vm_page_unwire: * * Release one wiring of the specified page, potentially allowing it to be * paged out. Returns TRUE if the number of wirings transitions to zero and * FALSE otherwise. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then * the page is added to the specified paging queue (unless PQ_NONE is * specified). * * If a page is fictitious, then its wire count must always be one. * * A managed page must be locked. */ boolean_t vm_page_unwire(vm_page_t m, uint8_t queue) { KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); return (FALSE); } if (m->wire_count > 0) { m->wire_count--; if (m->wire_count == 0) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); if ((m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL && queue != PQ_NONE) vm_page_enqueue(queue, m); return (TRUE); } else return (FALSE); } else panic("vm_page_unwire: page %p's wire count is zero", m); } /* * Move the specified page to the inactive queue. * * Normally, "noreuse" is FALSE, resulting in LRU ordering of the inactive * queue. However, setting "noreuse" to TRUE will accelerate the specified * page's reclamation, but it will not unmap the page from any address space. * This is implemented by inserting the page near the head of the inactive * queue, using a marker page to guide FIFO insertion ordering. * * The page must be locked. */ static inline void _vm_page_deactivate(vm_page_t m, boolean_t noreuse) { struct vm_pagequeue *pq; int queue; vm_page_assert_locked(m); /* * Ignore if the page is already inactive, unless it is unlikely to be * reactivated. */ if ((queue = m->queue) == PQ_INACTIVE && !noreuse) return; if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; /* Avoid multiple acquisitions of the inactive queue lock. */ if (queue == PQ_INACTIVE) { vm_pagequeue_lock(pq); vm_page_dequeue_locked(m); } else { if (queue != PQ_NONE) vm_page_dequeue(m); vm_pagequeue_lock(pq); } m->queue = PQ_INACTIVE; if (noreuse) TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead, m, plinks.q); else TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } } /* * Move the specified page to the inactive queue. * * The page must be locked. */ void vm_page_deactivate(vm_page_t m) { _vm_page_deactivate(m, FALSE); } /* * Move the specified page to the inactive queue with the expectation * that it is unlikely to be reused. * * The page must be locked. */ void vm_page_deactivate_noreuse(vm_page_t m) { _vm_page_deactivate(m, TRUE); } /* * vm_page_launder * * Put a page in the laundry. */ void vm_page_launder(vm_page_t m) { int queue; vm_page_assert_locked(m); if ((queue = m->queue) != PQ_LAUNDRY) { if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_LAUNDRY, m); } else KASSERT(queue == PQ_NONE, ("wired page %p is queued", m)); } } /* * vm_page_unswappable * * Put a page in the PQ_UNSWAPPABLE holding queue. */ void vm_page_unswappable(vm_page_t m) { vm_page_assert_locked(m); KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0, ("page %p already unswappable", m)); if (m->queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_UNSWAPPABLE, m); } /* * vm_page_try_to_free() * * Attempt to free the page. If we cannot free it, we do nothing. - * 1 is returned on success, 0 on failure. + * true is returned on success, false on failure. */ -int +bool vm_page_try_to_free(vm_page_t m) { - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); if (m->object != NULL) VM_OBJECT_ASSERT_WLOCKED(m->object); - if (m->dirty || m->hold_count || m->wire_count || + if (m->dirty != 0 || m->hold_count != 0 || m->wire_count != 0 || (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) - return (0); - pmap_remove_all(m); - if (m->dirty) - return (0); + return (false); + if (m->object != NULL && m->object->ref_count != 0) { + pmap_remove_all(m); + if (m->dirty != 0) + return (false); + } vm_page_free(m); - return (1); + return (true); } /* * vm_page_advise * * Apply the specified advice to the given page. * * The object and page must be locked. */ void vm_page_advise(vm_page_t m, int advice) { vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(m->object); if (advice == MADV_FREE) /* * Mark the page clean. This will allow the page to be freed * without first paging it out. MADV_FREE pages are often * quickly reused by malloc(3), so we do not do anything that * would result in a page fault on a later access. */ vm_page_undirty(m); else if (advice != MADV_DONTNEED) { if (advice == MADV_WILLNEED) vm_page_activate(m); return; } /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. */ vm_page_aflag_clear(m, PGA_REFERENCED); if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); /* * Place clean pages near the head of the inactive queue rather than * the tail, thus defeating the queue's LRU operation and ensuring that * the page will be reused quickly. Dirty pages not already in the * laundry are moved there. */ if (m->dirty == 0) vm_page_deactivate_noreuse(m); else vm_page_launder(m); } /* * Grab a page, waiting until we are waken up due to the page * changing state. We keep on waiting, if the page continues * to be in the object. If the page doesn't exist, first allocate it * and then conditionally zero it. * * This routine may sleep. * * The object must be locked on entry. The lock will, however, be released * and reacquired if the routine sleeps. */ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; int sleep; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? vm_page_xbusied(m) : vm_page_busied(m); if (sleep) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); vm_page_lock(m); VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(m, "pgrbwt", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); if ((allocflags & VM_ALLOC_SBUSY) != 0) vm_page_sbusy(m); return (m); } } m = vm_page_alloc(object, pindex, allocflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); goto retrylookup; } if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); return (m); } /* * Return the specified range of pages from the given object. For each * page offset within the range, if a page already exists within the object * at that offset and it is busy, then wait for it to change state. If, * instead, the page doesn't exist, then allocate it. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs the pages * * The caller must always specify that the pages are to be busied and/or * wired. * * optional allocation flags: * VM_ALLOC_IGN_SBUSY do not sleep on soft busy pages * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NOWAIT do not sleep * VM_ALLOC_SBUSY set page to sbusy state * VM_ALLOC_WIRED wire the pages * VM_ALLOC_ZERO zero and validate any invalid pages * * If VM_ALLOC_NOWAIT is not specified, this routine may sleep. Otherwise, it * may return a partial prefix of the requested range. */ int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count) { vm_page_t m, mpred; int i; bool sleep; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed")); KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || (allocflags & VM_ALLOC_WIRED) != 0, ("vm_page_grab_pages: the pages must be busied or wired")); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab_pages: VM_ALLOC_SBUSY/IGN_SBUSY mismatch")); if (count == 0) return (0); i = 0; retrylookup: m = vm_radix_lookup_le(&object->rtree, pindex + i); if (m == NULL || m->pindex != pindex + i) { mpred = m; m = NULL; } else mpred = TAILQ_PREV(m, pglist, listq); for (; i < count; i++) { if (m != NULL) { sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? vm_page_xbusied(m) : vm_page_busied(m); if (sleep) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) break; /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); vm_page_lock(m); VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(m, "grbmaw", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); goto retrylookup; } if ((allocflags & VM_ALLOC_WIRED) != 0) { vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); if ((allocflags & VM_ALLOC_SBUSY) != 0) vm_page_sbusy(m); } else { m = vm_page_alloc_after(object, pindex + i, (allocflags & ~VM_ALLOC_IGN_SBUSY) | VM_ALLOC_COUNT(count - i), mpred); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) break; VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); goto retrylookup; } } if (m->valid == 0 && (allocflags & VM_ALLOC_ZERO) != 0) { if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; } ma[i] = mpred = m; m = vm_page_next(m); } return (i); } /* * Mapping function for valid or dirty bits in a page. * * Inputs are required to range within a page. */ vm_page_bits_t vm_page_bits(int base, int size) { int first_bit; int last_bit; KASSERT( base + size <= PAGE_SIZE, ("vm_page_bits: illegal base/size %d/%d", base, size) ); if (size == 0) /* handle degenerate case */ return (0); first_bit = base >> DEV_BSHIFT; last_bit = (base + size - 1) >> DEV_BSHIFT; return (((vm_page_bits_t)2 << last_bit) - ((vm_page_bits_t)1 << first_bit)); } /* * vm_page_set_valid_range: * * Sets portions of a page valid. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zeroed. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_valid_range(vm_page_t m, int base, int size) { int endoff, frag; VM_OBJECT_ASSERT_WLOCKED(m->object); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = rounddown2(base, DEV_BSIZE)) != base && (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Assert that no previously invalid block that is now being validated * is already dirty. */ KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, ("vm_page_set_valid_range: page %p is dirty", m)); /* * Set valid bits inclusive of any overlap. */ m->valid |= vm_page_bits(base, size); } /* * Clear the given bits from the specified page's dirty field. */ static __inline void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) { uintptr_t addr; #if PAGE_SIZE < 16384 int shift; #endif /* * If the object is locked and the page is neither exclusive busy nor * write mapped, then the page's dirty field cannot possibly be * set by a concurrent pmap operation. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; else { /* * The pmap layer can call vm_page_dirty() without * holding a distinguished lock. The combination of * the object's lock and an atomic operation suffice * to guarantee consistency of the page dirty field. * * For PAGE_SIZE == 32768 case, compiler already * properly aligns the dirty field, so no forcible * alignment is needed. Only require existence of * atomic_clear_64 when page size is 32768. */ addr = (uintptr_t)&m->dirty; #if PAGE_SIZE == 32768 atomic_clear_64((uint64_t *)addr, pagebits); #elif PAGE_SIZE == 16384 atomic_clear_32((uint32_t *)addr, pagebits); #else /* PAGE_SIZE <= 8192 */ /* * Use a trick to perform a 32-bit atomic on the * containing aligned word, to not depend on the existence * of atomic_clear_{8, 16}. */ shift = addr & (sizeof(uint32_t) - 1); #if BYTE_ORDER == BIG_ENDIAN shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; #else shift *= NBBY; #endif addr &= ~(sizeof(uint32_t) - 1); atomic_clear_32((uint32_t *)addr, pagebits << shift); #endif /* PAGE_SIZE */ } } /* * vm_page_set_validclean: * * Sets portions of a page valid and clean. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zero'd. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_validclean(vm_page_t m, int base, int size) { vm_page_bits_t oldvalid, pagebits; int endoff, frag; VM_OBJECT_ASSERT_WLOCKED(m->object); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = rounddown2(base, DEV_BSIZE)) != base && (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Set valid, clear dirty bits. If validating the entire * page we can safely clear the pmap modify bit. We also * use this opportunity to clear the VPO_NOSYNC flag. If a process * takes a write fault on a MAP_NOSYNC memory area the flag will * be set again. * * We set valid bits inclusive of any overlap, but we can only * clear dirty bits for DEV_BSIZE chunks that are fully within * the range. */ oldvalid = m->valid; pagebits = vm_page_bits(base, size); m->valid |= pagebits; #if 0 /* NOT YET */ if ((frag = base & (DEV_BSIZE - 1)) != 0) { frag = DEV_BSIZE - frag; base += frag; size -= frag; if (size < 0) size = 0; } pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); #endif if (base == 0 && size == PAGE_SIZE) { /* * The page can only be modified within the pmap if it is * mapped, and it can only be mapped if it was previously * fully valid. */ if (oldvalid == VM_PAGE_BITS_ALL) /* * Perform the pmap_clear_modify() first. Otherwise, * a concurrent pmap operation, such as * pmap_protect(), could clear a modification in the * pmap and set the dirty field on the page before * pmap_clear_modify() had begun and after the dirty * field was cleared here. */ pmap_clear_modify(m); m->dirty = 0; m->oflags &= ~VPO_NOSYNC; } else if (oldvalid != VM_PAGE_BITS_ALL) m->dirty &= ~pagebits; else vm_page_clear_dirty_mask(m, pagebits); } void vm_page_clear_dirty(vm_page_t m, int base, int size) { vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); } /* * vm_page_set_invalid: * * Invalidates DEV_BSIZE'd chunks within a page. Both the * valid and dirty bits for the effected areas are cleared. */ void vm_page_set_invalid(vm_page_t m, int base, int size) { vm_page_bits_t bits; vm_object_t object; object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + size >= object->un_pager.vnp.vnp_size) bits = VM_PAGE_BITS_ALL; else bits = vm_page_bits(base, size); if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL && bits != 0) pmap_remove_all(m); KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) || !pmap_page_is_mapped(m), ("vm_page_set_invalid: page %p is mapped", m)); m->valid &= ~bits; m->dirty &= ~bits; } /* * vm_page_zero_invalid() * * The kernel assumes that the invalid portions of a page contain * garbage, but such pages can be mapped into memory by user code. * When this occurs, we must zero out the non-valid portions of the * page so user code sees what it expects. * * Pages are most often semi-valid when the end of a file is mapped * into memory and the file's size is not page aligned. */ void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) { int b; int i; VM_OBJECT_ASSERT_WLOCKED(m->object); /* * Scan the valid bits looking for invalid sections that * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the * valid bit may be set ) have already been zeroed by * vm_page_set_validclean(). */ for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { if (i == (PAGE_SIZE / DEV_BSIZE) || (m->valid & ((vm_page_bits_t)1 << i))) { if (i > b) { pmap_zero_page_area(m, b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); } b = i + 1; } } /* * setvalid is TRUE when we can safely set the zero'd areas * as being valid. We can do this if there are no cache consistancy * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. */ if (setvalid) m->valid = VM_PAGE_BITS_ALL; } /* * vm_page_is_valid: * * Is (partial) page valid? Note that the case where size == 0 * will return FALSE in the degenerate case where the page is * entirely invalid, and TRUE otherwise. */ int vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; VM_OBJECT_ASSERT_LOCKED(m->object); bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } /* * Returns true if all of the specified predicates are true for the entire * (super)page and false otherwise. */ bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m) { vm_object_t object; int i, npages; object = m->object; VM_OBJECT_ASSERT_LOCKED(object); npages = atop(pagesizes[m->psind]); /* * The physically contiguous pages that make up a superpage, i.e., a * page with a page size index ("psind") greater than zero, will * occupy adjacent entries in vm_page_array[]. */ for (i = 0; i < npages; i++) { /* Always test object consistency, including "skip_m". */ if (m[i].object != object) return (false); if (&m[i] == skip_m) continue; if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i])) return (false); if ((flags & PS_ALL_DIRTY) != 0) { /* * Calling vm_page_test_dirty() or pmap_is_modified() * might stop this case from spuriously returning * "false". However, that would require a write lock * on the object containing "m[i]". */ if (m[i].dirty != VM_PAGE_BITS_ALL) return (false); } if ((flags & PS_ALL_VALID) != 0 && m[i].valid != VM_PAGE_BITS_ALL) return (false); } return (true); } /* * Set the page's dirty bits if the page is modified. */ void vm_page_test_dirty(vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(m->object); if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) vm_page_dirty(m); } void vm_page_lock_KBI(vm_page_t m, const char *file, int line) { mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); } void vm_page_unlock_KBI(vm_page_t m, const char *file, int line) { mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); } int vm_page_trylock_KBI(vm_page_t m, const char *file, int line) { return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); } #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line) { vm_page_lock_assert_KBI(m, MA_OWNED, file, line); } void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) { mtx_assert_(vm_page_lockptr(m), a, file, line); } #endif #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m) { /* * Certain of the page's fields may only be modified by the * holder of the containing object's lock or the exclusive busy. * holder. Unfortunately, the holder of the write busy is * not recorded, and thus cannot be checked here. */ if (m->object != NULL && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); } void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits) { if ((bits & PGA_WRITEABLE) == 0) return; /* * The PGA_WRITEABLE flag can only be set if the page is * managed, is exclusively busied or the object is locked. * Currently, this flag is only set by pmap_enter(). */ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("PGA_WRITEABLE on unmanaged page")); if (!vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); } #endif #include "opt_ddb.h" #ifdef DDB #include #include DB_SHOW_COMMAND(page, vm_page_print_page_info) { db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count); db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count); db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count); db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count); db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count); db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target); } DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) { int dom; db_printf("pq_free %d\n", vm_cnt.v_free_count); for (dom = 0; dom < vm_ndomains; dom++) { db_printf( "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", dom, vm_dom[dom].vmd_page_count, vm_dom[dom].vmd_free_count, vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt); } } DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) { vm_page_t m; boolean_t phys; if (!have_addr) { db_printf("show pginfo addr\n"); return; } phys = strchr(modif, 'p') != NULL; if (phys) m = PHYS_TO_VM_PAGE(addr); else m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: projects/runtime-coverage/sys/vm/vm_page.h =================================================================== --- projects/runtime-coverage/sys/vm/vm_page.h (revision 323974) +++ projects/runtime-coverage/sys/vm/vm_page.h (revision 323975) @@ -1,747 +1,747 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ /* * Resident memory system definitions. */ #ifndef _VM_PAGE_ #define _VM_PAGE_ #include /* * Management of resident (logical) pages. * * A small structure is kept for each resident * page, indexed by page number. Each structure * is an element of several collections: * * A radix tree used to quickly * perform object/offset lookups * * A list of all pages for a given object, * so they can be quickly deactivated at * time of deallocation. * * An ordered list of pages due for pageout. * * In addition, the structure contains the object * and offset to which this page belongs (for pageout), * and sundry status bits. * * In general, operations on this structure's mutable fields are * synchronized using either one of or a combination of the lock on the * object that the page belongs to (O), the pool lock for the page (P), * or the lock for either the free or paging queue (Q). If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write * access. * * In contrast, the synchronization of accesses to the page's * dirty field is machine dependent (M). In the * machine-independent layer, the lock on the object that the * page belongs to must be held in order to operate on the field. * However, the pmap layer is permitted to set all bits within * the field without holding that lock. If the underlying * architecture does not support atomic read-modify-write * operations on the field's type, then the machine-independent * layer uses a 32-bit atomic on the aligned 32-bit word that * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). */ #if PAGE_SIZE == 4096 #define VM_PAGE_BITS_ALL 0xffu typedef uint8_t vm_page_bits_t; #elif PAGE_SIZE == 8192 #define VM_PAGE_BITS_ALL 0xffffu typedef uint16_t vm_page_bits_t; #elif PAGE_SIZE == 16384 #define VM_PAGE_BITS_ALL 0xffffffffu typedef uint32_t vm_page_bits_t; #elif PAGE_SIZE == 32768 #define VM_PAGE_BITS_ALL 0xfffffffffffffffflu typedef uint64_t vm_page_bits_t; #endif struct vm_page { union { TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */ struct { SLIST_ENTRY(vm_page) ss; /* private slists */ void *pv; } s; struct { u_long p; u_long v; } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ vm_object_t object; /* which object am I in (O,P) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page */ struct md_page md; /* machine dependent stuff */ u_int wire_count; /* wired down maps refs (P) */ volatile u_int busy_lock; /* busy owners lock */ uint16_t hold_count; /* page hold count (P) */ uint16_t flags; /* page PG_* flags (P) */ uint8_t aflags; /* access is atomic */ uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (P,Q) */ int8_t psind; /* pagesizes[] index (O) */ int8_t segind; uint8_t order; /* index of the buddy queue */ uint8_t pool; u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; /* * Page flags stored in oflags: * * Access to these page flags is synchronized by the lock on the object * containing the page (O). * * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG) * indicates that the page is not under PV management but * otherwise should be treated as a normal page. Pages not * under PV management cannot be paged out via the * object/vm_page_t because there is no knowledge of their pte * mappings, and such pages are also not on any PQ queue. * */ #define VPO_UNUSED01 0x01 /* --available-- */ #define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ #define VPO_NOSYNC 0x10 /* do not collect for syncer */ /* * Busy page implementation details. * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation, * even if the support for owner identity is removed because of size * constraints. Checks on lock recursion are then not possible, while the * lock assertions effectiveness is someway reduced. */ #define VPB_BIT_SHARED 0x01 #define VPB_BIT_EXCLUSIVE 0x02 #define VPB_BIT_WAITERS 0x04 #define VPB_BIT_FLAGMASK \ (VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS) #define VPB_SHARERS_SHIFT 3 #define VPB_SHARERS(x) \ (((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT) #define VPB_SHARERS_WORD(x) ((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED) #define VPB_ONE_SHARER (1 << VPB_SHARERS_SHIFT) #define VPB_SINGLE_EXCLUSIVER VPB_BIT_EXCLUSIVE #define VPB_UNBUSIED VPB_SHARERS_WORD(0) #define PQ_NONE 255 #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 #define PQ_LAUNDRY 2 #define PQ_UNSWAPPABLE 3 #define PQ_COUNT 4 #ifndef VM_PAGE_HAVE_PGLIST TAILQ_HEAD(pglist, vm_page); #define VM_PAGE_HAVE_PGLIST #endif SLIST_HEAD(spglist, vm_page); struct vm_pagequeue { struct mtx pq_mutex; struct pglist pq_pl; int pq_cnt; u_int * const pq_vcnt; const char * const pq_name; } __aligned(CACHE_LINE_SIZE); struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ boolean_t vmd_oom; int vmd_oom_seq; int vmd_last_active_scan; struct vm_page vmd_laundry_marker; struct vm_page vmd_marker; /* marker for pagedaemon private use */ struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */ }; extern struct vm_domain vm_dom[MAXMEMDOM]; #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) #define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex) #define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) #ifdef _KERNEL extern vm_page_t bogus_page; static __inline void vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) { #ifdef notyet vm_pagequeue_assert_locked(pq); #endif pq->pq_cnt += addend; atomic_add_int(pq->pq_vcnt, addend); } #define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1) #define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1) #endif /* _KERNEL */ extern struct mtx_padalign vm_page_queue_free_mtx; extern struct mtx_padalign pa_lock[]; #if defined(__arm__) #define PDRSHIFT PDR_SHIFT #elif !defined(PDRSHIFT) #define PDRSHIFT 21 #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define PA_LOCKPTR(pa) ((struct mtx *)(&pa_lock[pa_index(pa) % PA_LOCK_COUNT])) #define PA_LOCKOBJPTR(pa) ((struct lock_object *)PA_LOCKPTR((pa))) #define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) #define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) #define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) #define PA_UNLOCK_COND(pa) \ do { \ if ((pa) != 0) { \ PA_UNLOCK((pa)); \ (pa) = 0; \ } \ } while (0) #define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) #ifdef KLD_MODULE #define vm_page_lock(m) vm_page_lock_KBI((m), LOCK_FILE, LOCK_LINE) #define vm_page_unlock(m) vm_page_unlock_KBI((m), LOCK_FILE, LOCK_LINE) #define vm_page_trylock(m) vm_page_trylock_KBI((m), LOCK_FILE, LOCK_LINE) #else /* !KLD_MODULE */ #define vm_page_lockptr(m) (PA_LOCKPTR(VM_PAGE_TO_PHYS((m)))) #define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) #define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) #define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) #endif #if defined(INVARIANTS) #define vm_page_assert_locked(m) \ vm_page_assert_locked_KBI((m), __FILE__, __LINE__) #define vm_page_lock_assert(m, a) \ vm_page_lock_assert_KBI((m), (a), __FILE__, __LINE__) #else #define vm_page_assert_locked(m) #define vm_page_lock_assert(m, a) #endif /* * The vm_page's aflags are updated using atomic operations. To set or clear * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear() * must be used. Neither these flags nor these functions are part of the KBI. * * PGA_REFERENCED may be cleared only if the page is locked. It is set by * both the MI and MD VM layers. However, kernel loadable modules should not * directly set this flag. They should call vm_page_reference() instead. * * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). * When it does so, the object must be locked, or the page must be * exclusive busied. The MI VM layer must never access this flag * directly. Instead, it should call pmap_page_is_write_mapped(). * * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has * at least one executable mapping. It is not consumed by the MI VM layer. */ #define PGA_WRITEABLE 0x01 /* page may be mapped writeable */ #define PGA_REFERENCED 0x02 /* page has been referenced */ #define PGA_EXECUTABLE 0x04 /* page may be mapped executable */ /* * Page flags. If changed at any other time than page allocation or * freeing, the modification must be protected by the vm_page lock. */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ #define PG_NODUMP 0x0080 /* don't include this page in a dump */ #define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */ /* * Misc constants. */ #define ACT_DECLINE 1 #define ACT_ADVANCE 3 #define ACT_INIT 5 #define ACT_MAX 64 #ifdef _KERNEL #include #include /* * Each pageable resident page falls into one of five lists: * * free * Available for allocation now. * * inactive * Low activity, candidates for reclamation. * This list is approximately LRU ordered. * * laundry * This is the list of pages that should be * paged out next. * * unswappable * Dirty anonymous pages that cannot be paged * out because no swap device is configured. * * active * Pages that are "active", i.e., they have been * recently referenced. * */ extern int vm_page_zero_count; extern vm_page_t vm_page_array; /* First resident page in table */ extern long vm_page_array_size; /* number of vm_page_t's */ extern long first_page; /* first physical page number */ #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) /* * PHYS_TO_VM_PAGE() returns the vm_page_t object that represents a memory * page to which the given physical address belongs. The correct vm_page_t * object is returned for addresses that are not page-aligned. */ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); /* * Page allocation parameters for vm_page for the functions * vm_page_alloc(), vm_page_grab(), vm_page_alloc_contig() and * vm_page_alloc_freelist(). Some functions support only a subset * of the flags, and ignore others, see the flags legend. * * The meaning of VM_ALLOC_ZERO differs slightly between the vm_page_alloc*() * and the vm_page_grab*() functions. See these functions for details. * * Bits 0 - 1 define class. * Bits 2 - 15 dedicated for flags. * Legend: * (a) - vm_page_alloc() supports the flag. * (c) - vm_page_alloc_contig() supports the flag. * (f) - vm_page_alloc_freelist() supports the flag. * (g) - vm_page_grab() supports the flag. * (p) - vm_page_grab_pages() supports the flag. * Bits above 15 define the count of additional pages that the caller * intends to allocate. */ #define VM_ALLOC_NORMAL 0 #define VM_ALLOC_INTERRUPT 1 #define VM_ALLOC_SYSTEM 2 #define VM_ALLOC_CLASS_MASK 3 #define VM_ALLOC_WIRED 0x0020 /* (acfgp) Allocate a wired page */ #define VM_ALLOC_ZERO 0x0040 /* (acfgp) Allocate a prezeroed page */ #define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */ #define VM_ALLOC_NOBUSY 0x0200 /* (acgp) Do not excl busy the page */ #define VM_ALLOC_IGN_SBUSY 0x1000 /* (gp) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* (acgp) Shared busy the page */ #define VM_ALLOC_NOWAIT 0x8000 /* (gp) Do not sleep */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) #ifdef M_NOWAIT static inline int malloc2vm_flags(int malloc_flags) { int pflags; KASSERT((malloc_flags & M_USE_RESERVE) == 0 || (malloc_flags & M_NOWAIT) != 0, ("M_USE_RESERVE requires M_NOWAIT")); pflags = (malloc_flags & M_USE_RESERVE) != 0 ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM; if ((malloc_flags & M_ZERO) != 0) pflags |= VM_ALLOC_ZERO; if ((malloc_flags & M_NODUMP) != 0) pflags |= VM_ALLOC_NODUMP; return (pflags); } #endif /* * Predicates supported by vm_page_ps_test(): * * PS_ALL_DIRTY is true only if the entire (super)page is dirty. * However, it can be spuriously false when the (super)page has become * dirty in the pmap but that information has not been propagated to the * machine-independent layer. */ #define PS_ALL_DIRTY 0x1 #define PS_ALL_VALID 0x2 #define PS_NONE_BUSY 0x4 void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared); void vm_page_flash(vm_page_t m); void vm_page_hold(vm_page_t mem); void vm_page_unhold(vm_page_t mem); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); void vm_page_change_lock(vm_page_t m, struct mtx **mtx); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count); -int vm_page_try_to_free (vm_page_t); void vm_page_deactivate (vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_locked(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); void vm_page_free_phys_pglist(struct pglist *tq); bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_launder(vm_page_t m); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); void vm_page_reference(vm_page_t m); void vm_page_remove (vm_page_t); int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex); void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); int vm_page_sbusied(vm_page_t m); vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options); void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); +bool vm_page_try_to_free(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); void vm_page_clear_dirty (vm_page_t, int, int); void vm_page_set_invalid (vm_page_t, int, int); int vm_page_is_valid (vm_page_t, int, int); void vm_page_test_dirty (vm_page_t); vm_page_bits_t vm_page_bits(int base, int size); void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); void vm_page_free_toq(vm_page_t m); void vm_page_dirty_KBI(vm_page_t m); void vm_page_lock_KBI(vm_page_t m, const char *file, int line); void vm_page_unlock_KBI(vm_page_t m, const char *file, int line); int vm_page_trylock_KBI(vm_page_t m, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line); void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line); #endif #define vm_page_assert_sbusied(m) \ KASSERT(vm_page_sbusied(m), \ ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \ (m), __FILE__, __LINE__)) #define vm_page_assert_unbusied(m) \ KASSERT(!vm_page_busied(m), \ ("vm_page_assert_unbusied: page %p busy @ %s:%d", \ (m), __FILE__, __LINE__)) #define vm_page_assert_xbusied(m) \ KASSERT(vm_page_xbusied(m), \ ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \ (m), __FILE__, __LINE__)) #define vm_page_busied(m) \ ((m)->busy_lock != VPB_UNBUSIED) #define vm_page_sbusy(m) do { \ if (!vm_page_trysbusy(m)) \ panic("%s: page %p failed shared busying", __func__, \ (m)); \ } while (0) #define vm_page_tryxbusy(m) \ (atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED, \ VPB_SINGLE_EXCLUSIVER)) #define vm_page_xbusied(m) \ (((m)->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0) #define vm_page_xbusy(m) do { \ if (!vm_page_tryxbusy(m)) \ panic("%s: page %p failed exclusive busying", __func__, \ (m)); \ } while (0) /* Note: page m's lock must not be owned by the caller. */ #define vm_page_xunbusy(m) do { \ if (!atomic_cmpset_rel_int(&(m)->busy_lock, \ VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) \ vm_page_xunbusy_hard(m); \ } while (0) #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m); #define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits); #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) \ vm_page_assert_pga_writeable(m, bits) #else #define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0 #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) (void)0 #endif /* * We want to use atomic updates for the aflags field, which is 8 bits wide. * However, not all architectures support atomic operations on 8-bit * destinations. In order that we can easily use a 32-bit operation, we * require that the aflags field be 32-bit aligned. */ CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); /* * Clear the given bits in the specified page. */ static inline void vm_page_aflag_clear(vm_page_t m, uint8_t bits) { uint32_t *addr, val; /* * The PGA_REFERENCED flag can only be cleared if the page is locked. */ if ((bits & PGA_REFERENCED) != 0) vm_page_assert_locked(m); /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ addr = (void *)&m->aflags; KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, ("vm_page_aflag_clear: aflags is misaligned")); val = bits; #if BYTE_ORDER == BIG_ENDIAN val <<= 24; #endif atomic_clear_32(addr, val); } /* * Set the given bits in the specified page. */ static inline void vm_page_aflag_set(vm_page_t m, uint8_t bits) { uint32_t *addr, val; VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits); /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ addr = (void *)&m->aflags; KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, ("vm_page_aflag_set: aflags is misaligned")); val = bits; #if BYTE_ORDER == BIG_ENDIAN val <<= 24; #endif atomic_set_32(addr, val); } /* * vm_page_dirty: * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). */ static __inline void vm_page_dirty(vm_page_t m) { /* Use vm_page_dirty_KBI() under INVARIANTS to save memory. */ #if defined(KLD_MODULE) || defined(INVARIANTS) vm_page_dirty_KBI(m); #else m->dirty = VM_PAGE_BITS_ALL; #endif } /* * vm_page_remque: * * If the given page is in a page queue, then remove it from that page * queue. * * The page must be locked. */ static inline void vm_page_remque(vm_page_t m) { if (m->queue != PQ_NONE) vm_page_dequeue(m); } /* * vm_page_undirty: * * Set page to not be dirty. Note: does not clear pmap modify bits */ static __inline void vm_page_undirty(vm_page_t m) { VM_PAGE_OBJECT_LOCK_ASSERT(m); m->dirty = 0; } static inline void vm_page_replace_checked(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, vm_page_t mold) { vm_page_t mret; mret = vm_page_replace(mnew, object, pindex); KASSERT(mret == mold, ("invalid page replacement, mold=%p, mret=%p", mold, mret)); /* Unused if !INVARIANTS. */ (void)mold; (void)mret; } static inline bool vm_page_active(vm_page_t m) { return (m->queue == PQ_ACTIVE); } static inline bool vm_page_inactive(vm_page_t m) { return (m->queue == PQ_INACTIVE); } static inline bool vm_page_in_laundry(vm_page_t m) { return (m->queue == PQ_LAUNDRY || m->queue == PQ_UNSWAPPABLE); } #endif /* _KERNEL */ #endif /* !_VM_PAGE_ */ Index: projects/runtime-coverage/tools/tools/nanobsd/defaults.sh =================================================================== --- projects/runtime-coverage/tools/tools/nanobsd/defaults.sh (revision 323974) +++ projects/runtime-coverage/tools/tools/nanobsd/defaults.sh (revision 323975) @@ -1,937 +1,937 @@ #!/bin/sh # # Copyright (c) 2005 Poul-Henning Kamp. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # set -e ####################################################################### # # Setup default values for all controlling variables. # These values can be overridden from the config file(s) # ####################################################################### # Name of this NanoBSD build. (Used to construct workdir names) NANO_NAME=full # Source tree directory NANO_SRC=/usr/src # Where nanobsd additional files live under the source tree NANO_TOOLS=tools/tools/nanobsd # Where cust_pkgng() finds packages to install NANO_PACKAGE_DIR=${NANO_SRC}/${NANO_TOOLS}/Pkg NANO_PACKAGE_LIST="*" # where package metadata gets placed NANO_PKG_META_BASE=/var/db # Path to mtree file to apply to anything copied by cust_install_files(). # If you specify this, the mtree file *must* have an entry for every file and # directory located in Files. #NANO_CUST_FILES_MTREE="" # Object tree directory # default is subdir of /usr/obj #NANO_OBJ="" # The directory to put the final images # default is ${NANO_OBJ} #NANO_DISKIMGDIR="" # Make & parallel Make NANO_MAKE="make" NANO_PMAKE="make -j 3" # The default name for any image we create. NANO_IMGNAME="_.disk.full" NANO_IMG1NAME="_.disk.image" # Options to put in make.conf during buildworld only CONF_BUILD=' ' # Options to put in make.conf during installworld only CONF_INSTALL=' ' # Options to put in make.conf during both build- & installworld. CONF_WORLD=' ' # Kernel config file to use NANO_KERNEL=GENERIC # Kernel modules to install. If empty, no modules are installed. # Use "default" to install all built modules. NANO_MODULES= # Early customize commands. NANO_EARLY_CUSTOMIZE="" # Customize commands. NANO_CUSTOMIZE="" # Late customize commands. NANO_LATE_CUSTOMIZE="" # Newfs parameters to use NANO_NEWFS="-b 4096 -f 512 -i 8192 -U" # The drive name of the media at runtime NANO_DRIVE=ada0 # Target media size in 512 bytes sectors NANO_MEDIASIZE=2000000 # Number of code images on media (1 or 2) NANO_IMAGES=2 # 0 -> Leave second image all zeroes so it compresses better. # 1 -> Initialize second image with a copy of the first NANO_INIT_IMG2=1 # Size of code file system in 512 bytes sectors # If zero, size will be as large as possible. NANO_CODESIZE=0 # Size of configuration file system in 512 bytes sectors # Cannot be zero. NANO_CONFSIZE=2048 # Size of data file system in 512 bytes sectors # If zero: no partition configured. # If negative: max size possible NANO_DATASIZE=0 # Size of the /etc ramdisk in 512 bytes sectors NANO_RAM_ETCSIZE=10240 # Size of the /tmp+/var ramdisk in 512 bytes sectors NANO_RAM_TMPVARSIZE=10240 # boot0 flags/options and configuration NANO_BOOT0CFG="-o packet -s 1 -m 3" NANO_BOOTLOADER="boot/boot0sio" # boot2 flags/options # default force serial console NANO_BOOT2CFG="-h -S115200" # Backing type of md(4) device # Can be "file" or "swap" NANO_MD_BACKING="file" # for swap type md(4) backing, write out the mbr only NANO_IMAGE_MBRONLY=true # Progress Print level PPLEVEL=3 # Set NANO_LABEL to non-blank to form the basis for using /dev/ufs/label # in preference to /dev/${NANO_DRIVE} # Root partition will be ${NANO_LABEL}s{1,2} # /cfg partition will be ${NANO_LABEL}s3 # /data partition will be ${NANO_LABEL}s4 NANO_LABEL="" NANO_SLICE_ROOT=s1 NANO_SLICE_ALTROOT=s2 NANO_SLICE_CFG=s3 NANO_SLICE_DATA=s4 NANO_ROOT=s1a NANO_ALTROOT=s2a # Default ownwership for nopriv build NANO_DEF_UNAME=root NANO_DEF_GNAME=wheel ####################################################################### # Architecture to build. Corresponds to TARGET_ARCH in a buildworld. # Unfortunately, there's no way to set TARGET at this time, and it # conflates the two, so architectures where TARGET != TARGET_ARCH and # TARGET can't be guessed from TARGET_ARCH do not work. This defaults # to the arch of the current machine. NANO_ARCH=`uname -p` # CPUTYPE defaults to "" which is the default when CPUTYPE isn't # defined. NANO_CPUTYPE="" # Directory to populate /cfg from NANO_CFGDIR="" # Directory to populate /data from NANO_DATADIR="" # We don't need SRCCONF or SRC_ENV_CONF. NanoBSD puts everything we # need for the build in files included with __MAKE_CONF. Override in your # config file if you really must. We set them unconditionally here, though # in case they are stray in the build environment SRCCONF=/dev/null SRC_ENV_CONF=/dev/null ####################################################################### # # The functions which do the real work. # Can be overridden from the config file(s) # ####################################################################### # Export values into the shell. Must use { } instead of ( ) like # other functions to avoid a subshell. # We set __MAKE_CONF as a global since it is easier to get quoting # right for paths with spaces in them. make_export ( ) { # Similar to export_var, except puts the data out to stdout var=$1 eval val=\$$var echo "Setting variable: $var=\"$val\"" export $1 } nano_make_build_env ( ) { __MAKE_CONF="${NANO_MAKE_CONF_BUILD}" make_export __MAKE_CONF } nano_make_install_env ( ) { __MAKE_CONF="${NANO_MAKE_CONF_INSTALL}" make_export __MAKE_CONF } # Extra environment variables for kernel builds nano_make_kernel_env ( ) { if [ -f "${NANO_KERNEL}" ] ; then KERNCONFDIR="$(realpath $(dirname ${NANO_KERNEL}))" KERNCONF="$(basename ${NANO_KERNEL})" make_export KERNCONFDIR make_export KERNCONF else export KERNCONF="${NANO_KERNEL}" make_export KERNCONF fi } nano_global_make_env ( ) ( # global settings for the make.conf file, if set [ -z "${NANO_ARCH}" ] || echo TARGET_ARCH="${NANO_ARCH}" [ -z "${NANO_CPUTYPE}" ] || echo TARGET_CPUTYPE="${NANO_CPUTYPE}" ) # rm doesn't know -x prior to FreeBSD 10, so cope with a variety of build # hosts for now. This will go away when support in the base goes away. rm ( ) { echo "NANO RM $*" case $(uname -r) in 7*|8*|9*) command rm $* ;; *) command rm -x $* ;; esac } # # Create empty files in the target tree, and record the fact. All paths # are relative to NANO_WORLDDIR. # tgt_touch ( ) ( cd "${NANO_WORLDDIR}" for i; do touch $i echo "./${i} type=file" >> ${NANO_METALOG} done ) # # Convert a directory into a symlink. Takes two arguments, the # current directory and what it should become a symlink to. The # directory is removed and a symlink is created. If we're doing # a nopriv build, then append this fact to the metalog # tgt_dir2symlink ( ) ( dir=$1 symlink=$2 cd "${NANO_WORLDDIR}" rm -rf "$dir" ln -s "$symlink" "$dir" if [ -n "$NANO_METALOG" ]; then echo "./${dir} type=link mode=0777 link=${symlink}" >> ${NANO_METALOG} fi ) # run in the world chroot, errors fatal CR ( ) { chroot "${NANO_WORLDDIR}" /bin/sh -exc "$*" } # run in the world chroot, errors not fatal CR0 ( ) { chroot "${NANO_WORLDDIR}" /bin/sh -c "$*" || true } clean_build ( ) ( pprint 2 "Clean and create object directory (${MAKEOBJDIRPREFIX})" if ! rm -rf ${MAKEOBJDIRPREFIX}/ > /dev/null 2>&1 ; then chflags -R noschg ${MAKEOBJDIRPREFIX}/ rm -r ${MAKEOBJDIRPREFIX}/ fi ) make_conf_build ( ) ( pprint 2 "Construct build make.conf ($NANO_MAKE_CONF_BUILD)" mkdir -p ${MAKEOBJDIRPREFIX} printenv > ${MAKEOBJDIRPREFIX}/_.env # Make sure we get all the global settings that NanoBSD wants # in addition to the user's global settings ( nano_global_make_env echo "${CONF_WORLD}" echo "${CONF_BUILD}" ) > ${NANO_MAKE_CONF_BUILD} ) build_world ( ) ( pprint 2 "run buildworld" pprint 3 "log: ${MAKEOBJDIRPREFIX}/_.bw" ( nano_make_build_env set -o xtrace cd "${NANO_SRC}" ${NANO_PMAKE} buildworld ) > ${MAKEOBJDIRPREFIX}/_.bw 2>&1 ) build_kernel ( ) ( pprint 2 "build kernel ($NANO_KERNEL)" pprint 3 "log: ${MAKEOBJDIRPREFIX}/_.bk" ( nano_make_build_env nano_make_kernel_env # Note: We intentionally build all modules, not only the ones in # NANO_MODULES so the built world can be reused by multiple images. # Although MODULES_OVERRIDE can be defined in the kernel config # file to override this behavior. Just set NANO_MODULES=default. set -o xtrace cd "${NANO_SRC}" ${NANO_PMAKE} buildkernel ) > ${MAKEOBJDIRPREFIX}/_.bk 2>&1 ) clean_world ( ) ( if [ "${NANO_OBJ}" != "${MAKEOBJDIRPREFIX}" ]; then pprint 2 "Clean and create object directory (${NANO_OBJ})" if ! rm -rf ${NANO_OBJ}/ > /dev/null 2>&1 ; then chflags -R noschg ${NANO_OBJ} rm -r ${NANO_OBJ}/ fi mkdir -p "${NANO_OBJ}" "${NANO_WORLDDIR}" printenv > ${NANO_LOG}/_.env else pprint 2 "Clean and create world directory (${NANO_WORLDDIR})" if ! rm -rf "${NANO_WORLDDIR}/" > /dev/null 2>&1 ; then chflags -R noschg "${NANO_WORLDDIR}" rm -rf "${NANO_WORLDDIR}/" fi mkdir -p "${NANO_WORLDDIR}" fi ) make_conf_install ( ) ( pprint 2 "Construct install make.conf ($NANO_MAKE_CONF_INSTALL)" # Make sure we get all the global settings that NanoBSD wants # in addition to the user's global settings ( nano_global_make_env echo "${CONF_WORLD}" echo "${CONF_INSTALL}" if [ -n "${NANO_NOPRIV_BUILD}" ]; then echo NO_ROOT=t echo METALOG=${NANO_METALOG} fi ) > ${NANO_MAKE_CONF_INSTALL} ) install_world ( ) ( pprint 2 "installworld" pprint 3 "log: ${NANO_LOG}/_.iw" ( nano_make_install_env set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} installworld DESTDIR="${NANO_WORLDDIR}" chflags -R noschg "${NANO_WORLDDIR}" ) > ${NANO_LOG}/_.iw 2>&1 ) install_etc ( ) ( pprint 2 "install /etc" pprint 3 "log: ${NANO_LOG}/_.etc" ( nano_make_install_env set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} distribution DESTDIR="${NANO_WORLDDIR}" # make.conf doesn't get created by default, but some ports need it # so they can spam it. cp /dev/null "${NANO_WORLDDIR}"/etc/make.conf ) > ${NANO_LOG}/_.etc 2>&1 ) install_kernel ( ) ( pprint 2 "install kernel ($NANO_KERNEL)" pprint 3 "log: ${NANO_LOG}/_.ik" ( nano_make_install_env nano_make_kernel_env if [ "${NANO_MODULES}" != "default" ]; then MODULES_OVERRIDE="${NANO_MODULES}" make_export MODULES_OVERRIDE fi set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} installkernel DESTDIR="${NANO_WORLDDIR}" ) > ${NANO_LOG}/_.ik 2>&1 ) native_xtools ( ) ( print 2 "Installing the optimized native build tools for cross env" pprint 3 "log: ${NANO_LOG}/_.native_xtools" ( nano_make_install_env set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} native-xtools DESTDIR="${NANO_WORLDDIR}" ) > ${NANO_LOG}/_.native_xtools 2>&1 ) # # Run the requested set of early customization scripts, run before # buildworld. # run_early_customize ( ) { pprint 2 "run early customize scripts" for c in $NANO_EARLY_CUSTOMIZE do pprint 2 "early customize \"$c\"" pprint 3 "log: ${NANO_LOG}/_.early_cust.$c" pprint 4 "`type $c`" { set -x ; $c ; set +x ; } >${NANO_LOG}/_.early_cust.$c 2>&1 done } # # Run the requested set of customization scripts, run after we've # done an installworld, installed the etc files, installed the kernel # and tweaked them in the standard way. # run_customize ( ) ( pprint 2 "run customize scripts" for c in $NANO_CUSTOMIZE do pprint 2 "customize \"$c\"" pprint 3 "log: ${NANO_LOG}/_.cust.$c" pprint 4 "`type $c`" ( set -x ; $c ) > ${NANO_LOG}/_.cust.$c 2>&1 done ) # # Run any last-minute customization commands after we've had a chance to # setup nanobsd, prune empty dirs from /usr, etc # run_late_customize ( ) ( pprint 2 "run late customize scripts" for c in $NANO_LATE_CUSTOMIZE do pprint 2 "late customize \"$c\"" pprint 3 "log: ${NANO_LOG}/_.late_cust.$c" pprint 4 "`type $c`" ( set -x ; $c ) > ${NANO_LOG}/_.late_cust.$c 2>&1 done ) # # Hook called after we run all the late customize commands, but # before we invoke the disk imager. The nopriv build uses it to # read in the meta log, apply the changes other parts of nanobsd # have been recording their actions. It's not anticipated that # a user's cfg file would override this. # fixup_before_diskimage ( ) ( # Run the deduplication script that takes the matalog journal and # combines multiple entries for the same file (see source for # details). We take the extra step of removing the size keywords. This # script, and many of the user scripts, copies, appeneds and otherwise # modifies files in the build, changing their sizes. These actions are # impossible to trap, so go ahead remove the size= keyword. For this # narrow use, it doesn't buy us any protection and just gets in the way. # The dedup tool's output must be sorted due to limitations in awk. if [ -n "${NANO_METALOG}" ]; then pprint 2 "Fixing metalog" cp ${NANO_METALOG} ${NANO_METALOG}.pre echo "/set uname=${NANO_DEF_UNAME} gname=${NANO_DEF_GNAME}" > ${NANO_METALOG} cat ${NANO_METALOG}.pre | ${NANO_TOOLS}/mtree-dedup.awk | \ sed -e 's/ size=[0-9][0-9]*//' | sort >> ${NANO_METALOG} fi ) setup_nanobsd ( ) ( pprint 2 "configure nanobsd setup" pprint 3 "log: ${NANO_LOG}/_.dl" ( cd "${NANO_WORLDDIR}" # Move /usr/local/etc to /etc/local so that the /cfg stuff # can stomp on it. Otherwise packages like ipsec-tools which # have hardcoded paths under ${prefix}/etc are not tweakable. if [ -d usr/local/etc ] ; then ( cd usr/local/etc find . -print | cpio -dumpl ../../../etc/local cd .. rm -rf etc ) fi # Always setup the usr/local/etc -> etc/local symlink. # usr/local/etc gets created by packages, but if no packages # are installed by this point, but are later in the process, # the symlink not being here causes problems. It never hurts # to have the symlink in error though. ln -s ../../etc/local usr/local/etc for d in var etc do # link /$d under /conf # we use hard links so we have them both places. # the files in /$d will be hidden by the mount. mkdir -p conf/base/$d conf/default/$d find $d -print | cpio -dumpl conf/base/ done echo "$NANO_RAM_ETCSIZE" > conf/base/etc/md_size echo "$NANO_RAM_TMPVARSIZE" > conf/base/var/md_size # pick up config files from the special partition echo "mount -o ro /dev/${NANO_DRIVE}${NANO_SLICE_CFG}" > conf/default/etc/remount # Put /tmp on the /var ramdisk (could be symlink already) tgt_dir2symlink tmp var/tmp ) > ${NANO_LOG}/_.dl 2>&1 ) setup_nanobsd_etc ( ) ( pprint 2 "configure nanobsd /etc" ( cd "${NANO_WORLDDIR}" # create diskless marker file touch etc/diskless [ -n "${NANO_NOPRIV_BUILD}" ] && chmod 666 etc/defaults/rc.conf # Make root filesystem R/O by default echo "root_rw_mount=NO" >> etc/defaults/rc.conf # Disable entropy file, since / is read-only /var/db/entropy should be enough? echo "entropy_file=NO" >> etc/defaults/rc.conf [ -n "${NANO_NOPRIV_BUILD}" ] && chmod 444 etc/defaults/rc.conf # save config file for scripts echo "NANO_DRIVE=${NANO_DRIVE}" > etc/nanobsd.conf echo "/dev/${NANO_DRIVE}${NANO_ROOT} / ufs ro 1 1" > etc/fstab echo "/dev/${NANO_DRIVE}${NANO_SLICE_CFG} /cfg ufs rw,noauto 2 2" >> etc/fstab mkdir -p cfg # Create directory for eventual /usr/local/etc contents mkdir -p etc/local ) ) prune_usr ( ) ( # Remove all empty directories in /usr find "${NANO_WORLDDIR}"/usr -type d -depth -print | while read d do rmdir $d > /dev/null 2>&1 || true done ) newfs_part ( ) ( local dev mnt lbl dev=$1 mnt=$2 lbl=$3 echo newfs ${NANO_NEWFS} ${NANO_LABEL:+-L${NANO_LABEL}${lbl}} ${dev} newfs ${NANO_NEWFS} ${NANO_LABEL:+-L${NANO_LABEL}${lbl}} ${dev} mount -o async ${dev} ${mnt} ) # Convenient spot to work around any umount issues that your build environment # hits by overriding this method. nano_umount ( ) ( umount ${1} ) populate_slice ( ) ( local dev dir mnt lbl dev=$1 dir=$2 mnt=$3 lbl=$4 echo "Creating ${dev} (mounting on ${mnt})" newfs_part ${dev} ${mnt} ${lbl} if [ -n "${dir}" -a -d "${dir}" ]; then echo "Populating ${lbl} from ${dir}" cd "${dir}" find . -print | grep -Ev '/(CVS|\.svn|\.hg|\.git)/' | cpio -dumpv ${mnt} fi df -i ${mnt} nano_umount ${mnt} ) populate_cfg_slice ( ) ( populate_slice "$1" "$2" "$3" "$4" ) populate_data_slice ( ) ( populate_slice "$1" "$2" "$3" "$4" ) last_orders ( ) ( # Redefine this function with any last orders you may have # after the build completed, for instance to copy the finished # image to a more convenient place: # cp ${NANO_DISKIMGDIR}/${NANO_IMG1NAME} /home/ftp/pub/nanobsd.disk true ) ####################################################################### # # Optional convenience functions. # ####################################################################### ####################################################################### # Common Flash device geometries # FlashDevice ( ) { if [ -d ${NANO_TOOLS} ] ; then . ${NANO_TOOLS}/FlashDevice.sub else . ${NANO_SRC}/${NANO_TOOLS}/FlashDevice.sub fi sub_FlashDevice $1 $2 } ####################################################################### # USB device geometries # # Usage: # UsbDevice Generic 1000 # a generic flash key sold as having 1GB # # This function will set NANO_MEDIASIZE, NANO_HEADS and NANO_SECTS for you. # # Note that the capacity of a flash key is usually advertised in MB or # GB, *not* MiB/GiB. As such, the precise number of cylinders available # for C/H/S geometry may vary depending on the actual flash geometry. # # The following generic device layouts are understood: # generic An alias for generic-hdd. # generic-hdd 255H 63S/T xxxxC with no MBR restrictions. # generic-fdd 64H 32S/T xxxxC with no MBR restrictions. # # The generic-hdd device is preferred for flash devices larger than 1GB. # UsbDevice ( ) { a1=`echo $1 | tr '[:upper:]' '[:lower:]'` case $a1 in generic-fdd) NANO_HEADS=64 NANO_SECTS=32 NANO_MEDIASIZE=$(( $2 * 1000 * 1000 / 512 )) ;; generic|generic-hdd) NANO_HEADS=255 NANO_SECTS=63 NANO_MEDIASIZE=$(( $2 * 1000 * 1000 / 512 )) ;; *) echo "Unknown USB flash device" exit 2 ;; esac } ####################################################################### # Setup serial console cust_comconsole ( ) ( # Enable getty on console sed -i "" -e /tty[du]0/s/off/on/ ${NANO_WORLDDIR}/etc/ttys # Disable getty on syscons devices sed -i "" -e '/^ttyv[0-8]/s/ on/ off/' ${NANO_WORLDDIR}/etc/ttys # Tell loader to use serial console early. echo "${NANO_BOOT2CFG}" > ${NANO_WORLDDIR}/boot.config ) ####################################################################### # Allow root login via ssh cust_allow_ssh_root ( ) ( sed -i "" -e '/PermitRootLogin/s/.*/PermitRootLogin yes/' \ ${NANO_WORLDDIR}/etc/ssh/sshd_config ) ####################################################################### # Install the stuff under ./Files cust_install_files ( ) ( cd "${NANO_TOOLS}/Files" find . -print | grep -Ev '/(CVS|\.svn|\.hg|\.git)/' | cpio -Ldumpv ${NANO_WORLDDIR} if [ -n "${NANO_CUST_FILES_MTREE}" -a -f ${NANO_CUST_FILES_MTREE} ]; then CR "mtree -eiU -p /" <${NANO_CUST_FILES_MTREE} fi ) ####################################################################### # Install packages from ${NANO_PACKAGE_DIR} cust_pkgng ( ) ( mkdir -p ${NANO_WORLDDIR}/usr/local/etc local PKG_CONF="${NANO_WORLDDIR}/usr/local/etc/pkg.conf" - local PKGCMD="env ASSUME_ALWAYS_YES=YES PKG_DBDIR=${NANO_PKG_META_BASE}/pkg SIGNATURE_TYPE=none /usr/sbin/pkg" + local PKGCMD="env BATCH=YES ASSUME_ALWAYS_YES=YES PKG_DBDIR=${NANO_PKG_META_BASE}/pkg SIGNATURE_TYPE=none /usr/sbin/pkg" # Ensure pkg.conf points pkg to where the package meta data lives. touch ${PKG_CONF} if grep -Eiq '^PKG_DBDIR:.*' ${PKG_CONF}; then sed -i -e "\|^PKG_DBDIR:.*|Is||PKG_DBDIR: "\"${NANO_PKG_META_BASE}/pkg\""|" ${PKG_CONF} else echo "PKG_DBDIR: \"${NANO_PKG_META_BASE}/pkg\"" >> ${PKG_CONF} fi # If the package directory doesn't exist, we're done. if [ ! -d ${NANO_PACKAGE_DIR} ]; then echo "DONE 0 packages" return 0 fi # Find a pkg-* package for x in `find -s ${NANO_PACKAGE_DIR} -iname 'pkg-*'`; do _NANO_PKG_PACKAGE=`basename "$x"` done if [ -z "${_NANO_PKG_PACKAGE}" -o ! -f "${NANO_PACKAGE_DIR}/${_NANO_PKG_PACKAGE}" ]; then echo "FAILED: need a pkg/ package for bootstrapping" exit 2 fi # Mount packages into chroot mkdir -p ${NANO_WORLDDIR}/_.p mount -t nullfs -o noatime -o ro ${NANO_PACKAGE_DIR} ${NANO_WORLDDIR}/_.p trap "umount ${NANO_WORLDDIR}/_.p ; rm -rf ${NANO_WORLDDIR}/_.p" 1 2 15 EXIT # Install pkg-* package CR "${PKGCMD} add /_.p/${_NANO_PKG_PACKAGE}" ( # Expand any glob characters in pacakge list cd "${NANO_PACKAGE_DIR}" _PKGS=`find ${NANO_PACKAGE_LIST} -not -name "${_NANO_PKG_PACKAGE}" -print | sort | uniq` # Show todo todo=`echo "$_PKGS" | wc -l` echo "=== TODO: $todo" echo "$_PKGS" echo "===" # Install packages for _PKG in $_PKGS; do CR "${PKGCMD} add /_.p/${_PKG}" done ) CR0 "${PKGCMD} info" trap - 1 2 15 EXIT umount ${NANO_WORLDDIR}/_.p rm -rf ${NANO_WORLDDIR}/_.p ) ####################################################################### # Convenience function: # Register all args as early customize function to run just before # build commences. early_customize_cmd ( ) { NANO_EARLY_CUSTOMIZE="$NANO_EARLY_CUSTOMIZE $*" } ####################################################################### # Convenience function: # Register all args as customize function. customize_cmd ( ) { NANO_CUSTOMIZE="$NANO_CUSTOMIZE $*" } ####################################################################### # Convenience function: # Register all args as late customize function to run just before # image creation. late_customize_cmd ( ) { NANO_LATE_CUSTOMIZE="$NANO_LATE_CUSTOMIZE $*" } ####################################################################### # # All set up to go... # ####################################################################### # Progress Print # Print $2 at level $1. pprint ( ) ( if [ "$1" -le $PPLEVEL ]; then runtime=$(( `date +%s` - $NANO_STARTTIME )) printf "%s %.${1}s %s\n" "`date -u -r $runtime +%H:%M:%S`" "#####" "$2" 1>&3 fi ) usage ( ) { ( echo "Usage: $0 [-bfhiKknqvwX] [-c config_file]" echo " -b suppress builds (both kernel and world)" echo " -c specify config file" echo " -f suppress code slice extraction" echo " -h print this help summary page" echo " -i suppress disk image build" echo " -K suppress installkernel" echo " -k suppress buildkernel" echo " -n add -DNO_CLEAN to buildworld, buildkernel, etc" echo " -q make output more quiet" echo " -v make output more verbose" echo " -w suppress buildworld" echo " -X make native-xtools" ) 1>&2 exit 2 } ####################################################################### # Setup and Export Internal variables # export_var ( ) { # Don't wawnt a subshell var=$1 # Lookup value of the variable. eval val=\$$var pprint 3 "Setting variable: $var=\"$val\"" export $1 } # Call this function to set defaults _after_ parsing options. # dont want a subshell otherwise variable setting is thrown away. set_defaults_and_export ( ) { : ${NANO_OBJ:=/usr/obj/nanobsd.${NANO_NAME}} : ${MAKEOBJDIRPREFIX:=${NANO_OBJ}} : ${NANO_DISKIMGDIR:=${NANO_OBJ}} : ${NANO_WORLDDIR:=${NANO_OBJ}/_.w} : ${NANO_LOG:=${NANO_OBJ}} NANO_MAKE_CONF_BUILD=${MAKEOBJDIRPREFIX}/make.conf.build NANO_MAKE_CONF_INSTALL=${NANO_OBJ}/make.conf.install # Override user's NANO_DRIVE if they specified a NANO_LABEL [ -n "${NANO_LABEL}" ] && NANO_DRIVE="ufs/${NANO_LABEL}" || true # Set a default NANO_TOOLS to NANO_SRC/NANO_TOOLS if it exists. [ ! -d "${NANO_TOOLS}" ] && [ -d "${NANO_SRC}/${NANO_TOOLS}" ] && \ NANO_TOOLS="${NANO_SRC}/${NANO_TOOLS}" || true [ -n "${NANO_NOPRIV_BUILD}" ] && [ -z "${NANO_METALOG}" ] && \ NANO_METALOG=${NANO_OBJ}/_.metalog || true NANO_STARTTIME=`date +%s` pprint 3 "Exporting NanoBSD variables" export_var MAKEOBJDIRPREFIX export_var NANO_ARCH export_var NANO_CODESIZE export_var NANO_CONFSIZE export_var NANO_CUSTOMIZE export_var NANO_DATASIZE export_var NANO_DRIVE export_var NANO_HEADS export_var NANO_IMAGES export_var NANO_IMGNAME export_var NANO_IMG1NAME export_var NANO_MAKE export_var NANO_MAKE_CONF_BUILD export_var NANO_MAKE_CONF_INSTALL export_var NANO_MEDIASIZE export_var NANO_NAME export_var NANO_NEWFS export_var NANO_OBJ export_var NANO_PMAKE export_var NANO_SECTS export_var NANO_SRC export_var NANO_TOOLS export_var NANO_WORLDDIR export_var NANO_BOOT0CFG export_var NANO_BOOTLOADER export_var NANO_LABEL export_var NANO_MODULES export_var NANO_NOPRIV_BUILD export_var NANO_METALOG export_var NANO_LOG export_var SRCCONF export_var SRC_ENV_CONF } Index: projects/runtime-coverage/usr.bin/calendar/calendars/calendar.freebsd =================================================================== --- projects/runtime-coverage/usr.bin/calendar/calendars/calendar.freebsd (revision 323974) +++ projects/runtime-coverage/usr.bin/calendar/calendars/calendar.freebsd (revision 323975) @@ -1,456 +1,457 @@ /* * FreeBSD * * $FreeBSD$ */ #ifndef _calendar_freebsd_ #define _calendar_freebsd_ 01/01 Dimitry Andric born in Utrecht, the Netherlands, 1969 01/01 Lev Serebryakov born in Leningrad, USSR, 1979 01/01 Alexander Langer born in Duesseldorf, Nordrhein-Westfalen, Germany, 1981 01/02 Ion-Mihai "IOnut" Tetcu born in Bucharest, Romania, 1980 01/02 Patrick Li born in Beijing, People's Republic of China, 1985 01/03 Tetsurou Okazaki born in Mobara, Chiba, Japan, 1972 01/04 Hiroyuki Hanai born in Kagawa pre., Japan, 1969 01/06 Philippe Audeoud born in Bretigny-Sur-Orge, France, 1980 01/08 Michael L. Hostbaek born in Copenhagen, Denmark, 1977 01/10 Jean-Yves Lefort born in Charleroi, Belgium, 1980 01/12 Yen-Ming Lee born in Taipei, Taiwan, Republic of China, 1977 01/12 Ying-Chieh Liao born in Taipei, Taiwan, Republic of China, 1979 01/12 Kristof Provost born in Aalst, Belgium, 1983 01/13 Ruslan Bukin born in Dudinka, Russian Federation, 1985 01/14 Yi-Jheng Lin born in Taichung, Taiwan, Republic of China, 1985 01/15 Anne Dickison born in Madison, Indiana, United States, 1976 01/16 Ariff Abdullah born in Kuala Lumpur, Malaysia, 1978 01/16 Dmitry Sivachenko born in Moscow, USSR, 1978 01/16 Vanilla I. Shu born in Taipei, Taiwan, Republic of China, 1978 01/17 Raphael Kubo da Costa born in Sao Paulo, Sao Paulo, Brazil, 1989 01/18 Dejan Lesjak born in Ljubljana, Slovenia, Yugoslavia, 1977 01/19 Marshall Kirk McKusick born in Wilmington, Delaware, United States, 1954 01/19 Ruslan Ermilov born in Simferopol, USSR, 1974 01/19 Marcelo S. Araujo born in Joinville, Santa Catarina, Brazil, 1981 01/20 Poul-Henning Kamp born in Korsoer, Denmark, 1966 01/21 Mahdi Mokhtari born in Tehran, Iran, 1995 01/22 Johann Visagie born in Cape Town, South Africa, 1970 01/23 Hideyuki KURASHINA born in Niigata, Japan, 1982 01/24 Fabien Thomas born in Avignon, France, 1971 01/24 Matteo Riondato born in Padova, Italy, 1986 01/25 Nick Hibma born in Groningen, the Netherlands, 1972 01/25 Bernd Walter born in Moers, Nordrhein-Westfalen, Germany, 1974 01/26 Andrew Gallatin born in Buffalo, New York, United States, 1970 01/27 Nick Sayer born in San Diego, California, United States, 1968 01/27 Jacques Anthony Vidrine born in Baton Rouge, Louisiana, United States, 1971 01/27 Ngie Cooper born in Seattle, Washington, United States, 1984 01/31 Hidetoshi Shimokawa born in Yokohama, Kanagawa, Japan, 1970 02/01 Doug Rabson born in London, England, 1966 02/01 Nicola Vitale born in Busto Arsizio, Varese, Italy, 1976 02/01 Paul Saab born in Champaign-Urbana, Illinois, United States, 1978 02/01 Martin Wilke born in Ludwigsfelde, Brandenburg, Germany, 1980 02/01 Christian Brueffer born in Gronau, Nordrhein-Westfalen, Germany, 1982 02/01 Steven Kreuzer born in Oceanside, New York, United States, 1982 02/01 Juli Mallett born in Washington, Pennsylvania, United States, 1985 02/02 Diomidis D. Spinellis born in Athens, Greece, 1967 02/02 Michael W Lucas born in Detroit, Michigan, United States, 1967 02/02 Dmitry Chagin born in Stalingrad, USSR, 1976 02/02 Yoichi Nakayama born in Tsu, Mie, Japan, 1976 02/02 Yoshihiro Takahashi born in Yokohama, Kanagawa, Japan, 1976 02/03 Jason Helfman born in Royal Oak, Michigan, United States, 1972 02/04 Eitan Adler born in West Hempstead, New York, United States, 1991 02/05 Frank Laszlo born in Howell, Michigan, United States, 1983 02/06 Julien Charbon born in Saint Etienne, Loire, France, 1978 02/07 Bjoern Heidotting born in Uelsen, Germany, 1980 02/10 David Greenman born in Portland, Oregon, United States, 1968 02/10 Paul Richards born in Ammanford, Carmarthenshire, United Kingdom, 1968 02/10 Simon Barner born in Rosenheim, Bayern, Germany, 1980 02/10 Jason E. Hale born in Pittsburgh, Pennsylvania, United States, 1982 02/13 Jesper Skriver born in Aarhus, Denmark, 1975 02/13 Steve Wills born in Lynchburg, Virginia, United States, 1975 02/13 Andrey Slusar born in Odessa, USSR, 1979 02/13 David W. Chapman Jr. born in Bethel, Connecticut, United States, 1981 02/14 Manolis Kiagias born in Chania, Greece, 1970 02/14 Erwin Lansing born in 's-Hertogenbosch, the Netherlands, 1975 02/14 Martin Blapp born in Olten, Switzerland, 1976 02/15 Hiren Panchasara born in Ahmedabad, Gujarat, India, 1984 02/16 Justin Hibbits born in Toledo, Ohio, United States, 1983 02/16 Tobias Christian Berner born in Bern, Switzerland, 1985 02/18 Christoph Moench-Tegeder born in Hannover, Niedersachsen, Germany, 1980 02/19 Murray Stokely born in Jacksonville, Florida, United States, 1979 02/20 Anders Nordby born in Oslo, Norway, 1976 02/21 Alexey Zelkin born in Simferopol, Ukraine, 1978 02/22 Brooks Davis born in Longview, Washington, United States, 1976 02/22 Jake Burkholder born in Maynooth, Ontario, Canada, 1979 02/23 Peter Wemm born in Perth, Western Australia, Australia, 1971 02/23 Mathieu Arnold born in Champigny sur Marne, Val de Marne, France, 1978 02/24 Johan Karlsson born in Mariannelund, Sweden, 1974 02/24 Colin Percival born in Burnaby, Canada, 1981 02/26 Pietro Cerutti born in Faido, Switzerland, 1984 02/28 Daichi GOTO born in Shimizu Suntou, Shizuoka, Japan, 1980 02/28 Ruslan Makhmatkhanov born in Rostov-on-Don, USSR, 1984 03/01 Hye-Shik Chang born in Daejeon, Republic of Korea, 1980 03/02 Cy Schubert born in Edmonton, Alberta, Canada, 1956 03/03 Sergey Matveychuk born in Moscow, Russian Federation, 1973 03/03 Doug White born in Eugene, Oregon, United States, 1977 03/03 Gordon Tetlow born in Reno, Nevada, United States, 1978 03/04 Oleksandr Tymoshenko born in Chernihiv, Ukraine, 1980 03/05 Baptiste Daroussin born in Beauvais, France, 1980 03/05 Philip Paeps born in Leuven, Belgium, 1983 03/05 Ulf Lilleengen born in Hamar, Norway, 1985 03/06 Christopher Piazza born in Kamloops, British Columbia, Canada, 1981 03/07 Michael P. Pritchard born in Los Angeles, California, United States, 1964 03/07 Giorgos Keramidas born in Athens, Greece, 1976 03/10 Andreas Klemm born in Duesseldorf, Nordrhein-Westfalen, Germany, 1963 03/10 Luiz Otavio O Souza born in Bauru, Sao Paulo, Brazil, 1978 03/10 Nikolai Lifanov born in Moscow, Russian Federation, 1989 03/11 Soeren Straarup born in Andst, Denmark, 1978 03/12 Greg Lewis born in Adelaide, South Australia, Australia, 1969 03/13 Alexander Leidinger born in Neunkirchen, Saarland, Germany, 1976 03/13 Will Andrews born in Pontiac, Michigan, United States, 1982 03/14 Bernhard Froehlich born in Graz, Styria, Austria, 1985 03/15 Paolo Pisati born in Lodi, Italy, 1977 03/15 Brian Fundakowski Feldman born in Alexandria, Virginia, United States, 1983 03/17 Michael Smith born in Bankstown, New South Wales, Australia, 1971 03/17 Alexander Motin born in Simferopol, Ukraine, 1979 03/18 Koop Mast born in Dokkum, the Netherlands, 1981 03/19 Mikhail Teterin born in Kyiv, Ukraine, 1972 03/20 Joseph S. Atkinson born in Batesville, Arkansas, United States, 1977 03/20 Henrik Brix Andersen born in Aarhus, Denmark, 1978 03/20 MANTANI Nobutaka born in Hiroshima, Japan, 1978 03/20 Cameron Grant died in Hemel Hempstead, United Kingdom, 2005 03/22 Brad Davis born in Farmington, New Mexico, United States, 1983 03/23 Daniel C. Sobral born in Brasilia, Distrito Federal, Brazil, 1971 03/23 Benno Rice born in Adelaide, South Australia, Australia, 1977 03/24 Marcel Moolenaar born in Hilversum, the Netherlands, 1968 03/24 Emanuel Haupt born in Zurich, Switzerland, 1979 03/25 Andrew R. Reiter born in Springfield, Massachusetts, United States, 1980 03/26 Jonathan Anderson born in Ottawa, Ontario, Canada, 1983 03/27 Josef El-Rayes born in Linz, Austria, 1982 03/28 Sean C. Farley born in Indianapolis, Indiana, United States, 1970 03/29 Thierry Thomas born in Luxeuil les Bains, France, 1961 03/30 Po-Chuan Hsieh born in Taipei, Taiwan, Republic of China, 1978 03/31 First quarter status reports are due on 04/15 04/01 Matthew Jacob born in San Francisco, California, United States, 1958 04/01 Alexander V. Chernikov born in Moscow, Russian Federation, 1984 04/01 Bill Fenner born in Bellefonte, Pennsylvania, United States, 1971 04/01 Peter Edwards born in Dublin, Ireland, 1973 04/03 Hellmuth Michaelis born in Kiel, Schleswig-Holstein, Germany, 1958 04/03 Tong Liu born in Beijing, People's Republic of China, 1981 04/03 Gabor Pali born in Kunhegyes, Hungary, 1982 04/04 Jason Unovitch born in Scranton, Pennsylvania, United States, 1986 04/05 Stacey Son born in Burley, Idaho, United States, 1967 04/06 Peter Jeremy born in Sydney, New South Wales, Australia, 1961 04/07 Edward Tomasz Napierala born in Wolsztyn, Poland, 1981 04/08 Jordan K. Hubbard born in Honolulu, Hawaii, United States, 1963 04/09 Ceri Davies born in Haverfordwest, Pembrokeshire, United Kingdom, 1976 04/11 Bruce A. Mah born in Fresno, California, United States, 1969 04/12 Patrick Gardella born in Columbus, Ohio, United States, 1967 04/12 Ed Schouten born in Oss, the Netherlands, 1986 04/12 Ruey-Cherng Yu born in Keelung, Taiwan, 1978 04/13 Oliver Braun born in Nuremberg, Bavaria, Germany, 1972 04/14 Crist J. Clark born in Milwaukee, Wisconsin, United States, 1970 04/14 Glen J. Barber born in Wilkes-Barre, Pennsylvania, United States, 1981 04/15 David Malone born in Dublin, Ireland, 1973 04/17 Alexey Degtyarev born in Ahtubinsk, Russian Federation, 1984 04/17 Dryice Liu born in Jinan, Shandong, China, 1975 04/22 Joerg Wunsch born in Dresden, Sachsen, Germany, 1962 04/22 Jun Kuriyama born in Matsue, Shimane, Japan, 1973 04/22 Jakub Klama born in Blachownia, Silesia, Poland, 1989 04/25 Richard Gallamore born in Kissimmee, Florida, United States, 1987 04/26 Rene Ladan born in Geldrop, the Netherlands, 1980 04/28 Oleg Bulyzhin born in Kharkov, USSR, 1976 04/28 Andriy Voskoboinyk born in Bila Tserkva, Ukraine, 1992 04/29 Adam Weinberger born in Berkeley, California, United States, 1980 04/29 Eric Anholt born in Portland, Oregon, United States, 1983 05/01 Randall Stewart born in Spokane, Washington, United States, 1959 05/02 Danilo G. Baio born in Maringa, Parana, Brazil, 1986 05/02 Wojciech A. Koszek born in Czestochowa, Poland, 1987 05/03 Brian Dean born in Elkins, West Virginia, United States, 1966 05/03 Patrick Kelsey born in Freehold, New Jersey, United States, 1976 05/03 Robert Nicholas Maxwell Watson born in Harrow, Middlesex, United Kingdom, 1977 05/04 Denis Peplin born in Nizhniy Novgorod, Russian Federation, 1977 05/08 Kirill Ponomarew born in Volgograd, Russian Federation, 1977 05/08 Sean Kelly born in Walnut Creek, California, United States, 1982 05/09 Daniel Eischen born in Syracuse, New York, United States, 1963 05/09 Aaron Dalton born in Boise, Idaho, United States, 1973 05/09 Jase Thew born in Abergavenny, Gwent, United Kingdom, 1974 05/10 Markus Brueffer born in Gronau, Nordrhein-Westfalen, Germany, 1977 05/11 Kurt Lidl born in Rockville, Maryland, United States, 1968 05/11 Jesus Rodriguez born in Barcelona, Spain, 1972 05/11 Marcin Wojtas born in Krakow, Poland, 1986 05/11 Roman Kurakin born in Moscow, USSR, 1979 05/11 Ulrich Spoerlein born in Schesslitz, Bayern, Germany, 1981 05/13 Pete Fritchman born in Lansdale, Pennsylvania, United States, 1983 05/14 Tatsumi Hosokawa born in Tokyo, Japan, 1968 05/14 Shigeyuku Fukushima born in Osaka, Japan, 1974 05/14 Bruce Cran born in Cambridge, United Kingdom, 1981 05/15 Hans Petter Selasky born in Flekkefjord, Norway, 1982 05/16 Johann Kois born in Wolfsberg, Austria, 1975 05/16 Marcus Alves Grando born in Florianopolis, Santa Catarina, Brazil, 1979 05/17 Thomas Abthorpe born in Port Arthur, Ontario, Canada, 1968 05/19 Philippe Charnier born in Fontainebleau, France, 1966 05/19 Ian Dowse born in Dublin, Ireland, 1975 05/19 Sofian Brabez born in Toulouse, France, 1984 05/20 Dan Moschuk died in Burlington, Ontario, Canada, 2010 05/21 Kris Kennaway born in Winnipeg, Manitoba, Canada, 1978 05/22 James Gritton born in San Francisco, California, United States, 1967 05/22 Clive Tong-I Lin born in Changhua, Taiwan, Republic of China, 1978 05/22 Michael Bushkov born in Rostov-on-Don, Russian Federation, 1985 05/22 Rui Paulo born in Evora, Portugal, 1986 05/22 David Naylor born in Johannesburg, South Africa, 1988 05/23 Munechika Sumikawa born in Osaka, Osaka, Japan, 1972 05/24 Duncan McLennan Barclay born in London, Middlesex, United Kingdom, 1970 05/24 Oliver Lehmann born in Karlsburg, Germany, 1981 05/25 Pawel Pekala born in Swidnica, Poland, 1980 05/25 Tom Rhodes born in Ellwood City, Pennsylvania, United States, 1981 05/25 Roman Divacky born in Brno, Czech Republic, 1983 05/26 Jim Pirzyk born in Chicago, Illinois, United States, 1968 05/26 Florian Smeets born in Schwerte, Nordrhein-Westfalen, Germany, 1982 05/27 Ollivier Robert born in Paris, France, 1967 05/29 Wilko Bulte born in Arnhem, the Netherlands, 1965 05/29 Seigo Tanimura born in Kitakyushu, Fukuoka, Japan, 1976 05/30 Wen Heping born in Xiangxiang, Hunan, China, 1970 05/31 Ville Skytta born in Helsinki, Finland, 1974 06/02 Jean-Marc Zucconi born in Pontarlier, France, 1954 06/02 Alexander Botero-Lowry born in Austin, Texas, United States, 1986 06/03 CHOI Junho born in Seoul, Korea, 1974 06/03 Wesley Shields born in Binghamton, New York, United States, 1981 06/04 Julian Elischer born in Perth, Australia, 1959 06/04 Justin Gibbs born in San Pedro, California, United States, 1973 06/04 Jason Evans born in Greeley, Colorado, United States, 1973 06/04 Thomas Moestl born in Braunschweig, Niedersachsen, Germany, 1980 06/04 Devin Teske born in Arcadia, California, United States, 1982 06/04 Zack Kirsch born in Memphis, Tennessee, United States, 1982 06/04 Johannes Jost Meixner born in Wiesbaden, Germany, 1987 06/06 Sergei Kolobov born in Karpinsk, Russian Federation, 1972 06/06 Ryan Libby born in Kirkland, Washington, United States, 1985 06/06 Alan Eldridge died in Denver, Colorado, United States, 2003 06/07 Jimmy Olgeni born in Milano, Italy, 1976 06/07 Benjamin Close born in Adelaide, Australia, 1978 06/07 Roger Pau Monne born in Reus, Catalunya, Spain, 1986 06/08 Ravi Pokala born in Royal Oak, Michigan, United States, 1980 06/09 Stanislav Galabov born in Sofia, Bulgaria 1978 06/11 Alonso Cardenas Marquez born in Arequipa, Peru, 1979 06/14 Josh Paetzel born in Minneapolis, Minnesota, United States, 1973 06/17 Tilman Linneweh born in Weinheim, Baden-Wuerttemberg, Germany, 1978 06/18 Li-Wen Hsu born in Taipei, Taiwan, Republic of China, 1984 06/18 Roman Bogorodskiy born in Saratov, Russian Federation, 1986 06/19 Charlie Root born in Portland, Oregon, United States, 1993 06/21 Ganbold Tsagaankhuu born in Ulaanbaatar, Mongolia, 1971 06/21 Niels Heinen born in Markelo, the Netherlands, 1978 06/22 Andreas Tobler born in Heiden, Switzerland, 1968 06/24 Chris Faulhaber born in Springfield, Illinois, United States, 1971 06/26 Brian Somers born in Dundrum, Dublin, Ireland, 1967 06/28 Mark Santcroos born in Rotterdam, the Netherlands, 1979 06/28 Xin Li born in Beijing, People's Republic of China, 1982 06/28 Bradley T. Hughes born in Amarillo, Texas, United States, 1977 06/29 Wilfredo Sanchez Vega born in Majaguez, Puerto Rico, United States, 1972 06/29 Daniel Harris born in Lubbock, Texas, United States, 1985 06/29 Andrew Pantyukhin born in Moscow, Russian Federation, 1985 06/30 Guido van Rooij born in Best, Noord-Brabant, the Netherlands, 1965 06/30 Second quarter status reports are due on 07/15 07/01 Matthew Dillon born in San Francisco, California, United States, 1966 07/01 Mateusz Guzik born in Nowy Targ, Poland, 1986 07/02 Mark Christopher Ovens born in Preston, Lancashire, United Kingdom, 1958 07/02 Vasil Venelinov Dimov born in Shumen, Bulgaria, 1982 07/04 Motoyuki Konno born in Musashino, Tokyo, Japan, 1969 07/04 Florent Thoumie born in Montmorency, Val d'Oise, France, 1982 07/05 Olivier Cochard-Labbe born in Brest, France, 1977 07/05 Sergey Kandaurov born in Gubkin, Russian Federation, 1985 07/07 Andrew Thompson born in Lower Hutt, Wellington, New Zealand, 1979 07/07 Maxime Henrion born in Metz, France, 1981 07/07 George Reid born in Frimley, Hampshire, United Kingdom, 1983 07/10 Jung-uk Kim born in Seoul, Korea, 1971 07/10 Justin Seger born in Harvard, Massachusetts, United States, 1981 07/10 David Schultz born in Oakland, California, United States, 1982 07/10 Ben Woods born in Perth, Western Australia, Australia, 1984 07/11 Jesus R. Camou born in Hermosillo, Sonora, Mexico, 1983 07/15 Gary Jennejohn born in Rochester, New York, United States, 1950 07/16 Suleiman Souhlal born in Roma, Italy, 1983 07/16 Davide Italiano born in Milazzo, Italy, 1989 07/17 Michael Chin-Yuan Wu born in Taipei, Taiwan, Republic of China, 1980 07/19 Masafumi NAKANE born in Okazaki, Aichi, Japan, 1972 07/19 Simon L. Nielsen born in Copenhagen, Denmark, 1980 07/19 Gleb Smirnoff born in Kharkov, USSR, 1981 07/20 Dru Lavigne born in Kingston, Ontario, Canada, 1965 07/20 Andrey V. Elsukov born in Kotelnich, Russian Federation, 1981 07/22 James Housley born in Chicago, Illinois, United States, 1965 07/22 Jens Schweikhardt born in Waiblingen, Baden-Wuerttemberg, Germany, 1967 07/22 Lukas Ertl born in Weissenbach/Enns, Steiermark, Austria, 1976 07/23 Sergey A. Osokin born in Krasnogorsky, Stepnogorsk, Akmolinskaya region, Kazakhstan, 1972 07/23 Andrey Zonov born in Kirov, Russian Federation, 1985 07/24 Alexander Nedotsukov born in Ulyanovsk, Russian Federation, 1974 07/24 Alberto Villa born in Vercelli, Italy, 1987 07/27 Andriy Gapon born in Kyrykivka, Sumy region, Ukraine, 1976 07/28 Jim Mock born in Bethlehem, Pennsylvania, United States, 1974 07/28 Tom Hukins born in Manchester, United Kingdom, 1976 07/29 Dirk Meyer born in Kassel, Hessen, Germany, 1965 07/29 Felippe M. Motta born in Maceio, Alagoas, Brazil, 1988 08/02 Gabor Kovesdan born in Budapest, Hungary, 1987 08/03 Peter Holm born in Copenhagen, Denmark, 1955 08/05 Alfred Perlstein born in Brooklyn, New York, United States, 1978 08/06 Anton Berezin born in Dnepropetrovsk, Ukraine, 1970 08/06 John-Mark Gurney born in Detroit, Michigan, United States, 1978 08/06 Damjan Marion born in Rijeka, Croatia, 1978 08/07 Jonathan Mini born in San Mateo, California, United States, 1979 08/08 Mikolaj Golub born in Kharkov, USSR, 1977 08/08 Juergen Lock died in Bremen, Germany, 2015 08/09 Stefan Farfeleder died in Wien, Austria, 2015 08/10 Julio Merino born in Barcelona, Spain, 1984 08/10 Peter Pentchev born in Sofia, Bulgaria, 1977 08/12 Joe Marcus Clarke born in Lakeland, Florida, United States, 1976 08/12 Max Brazhnikov born in Leningradskaya, Russian Federation, 1979 08/14 Stefan Esser born in Cologne, Nordrhein-Westfalen, Germany, 1961 08/16 Andrey Chernov died in Moscow, Russian Federation, 2017 08/17 Olivier Houchard born in Nancy, France, 1980 08/19 Chin-San Huang born in Yi-Lan, Taiwan, Republic of China, 1979 08/19 Pav Lucistnik born in Kutna Hora, Czech Republic, 1980 08/20 Michael Heffner born in Cleona, Pennsylvania, United States, 1981 08/21 Jason A. Harmening born in Fort Wayne, Indiana, United States, 1981 08/22 Ilya Bakulin born in Tbilisi, USSR, 1986 08/24 Mark Linimon born in Houston, Texas, United States, 1955 08/24 Alexander Botero-Lowry died in San Francisco, California, United States, 2012 08/25 Beech Rintoul born in Oakland, California, United States, 1952 08/25 Jean Milanez Melo born in Divinopolis, Minas Gerais, Brazil, 1982 08/26 Scott Long born in Chicago, Illinois, United States, 1974 08/26 Dima Ruban born in Nalchik, USSR, 1970 08/26 Marc Fonvieille born in Avignon, France, 1972 08/26 Herve Quiroz born in Aix-en-Provence, France, 1977 08/27 Andrey Chernov born in Moscow, USSR, 1966 08/27 Tony Finch born in London, United Kingdom, 1974 08/27 Michael Johnson born in Morganton, North Carolina, United States, 1980 08/28 Norikatsu Shigemura born in Fujisawa, Kanagawa, Japan, 1974 08/29 Thomas Gellekum born in Moenchengladbach, Nordrhein-Westfalen, Germany, 1967 08/29 Max Laier born in Karlsruhe, Germany, 1981 09/01 Pyun YongHyeon born in Kimcheon, Korea, 1968 09/01 William Grzybowski born in Parana, Brazil, 1988 09/03 Max Khon born in Novosibirsk, USSR, 1976 09/03 Allan Jude born in Hamilton, Ontario, Canada, 1984 09/03 Cheng-Lung Sung born in Taipei, Taiwan, Republic of China, 1977 09/05 Mark Robert Vaughan Murray born in Harare, Mashonaland, Zimbabwe, 1961 09/05 Adrian Harold Chadd born in Perth, Western Australia, Australia, 1979 09/05 Rodrigo Osorio born in Montevideo, Uruguay, 1975 09/06 Eric Joyner born in Fairfax, Virginia, United States, 1991 09/07 Tim Bishop born in Cornwall, United Kingdom, 1978 09/07 Chris Rees born in Kettering, United Kingdom, 1987 09/08 Boris Samorodov born in Krasnodar, Russian Federation, 1963 09/09 Yoshio Mita born in Hiroshima, Japan, 1972 09/09 Steven Hartland born in Wordsley, United Kingdom, 1973 09/10 Wesley R. Peters born in Hartford, Alabama, United States, 1961 09/12 Weongyo Jeong born in Haman, Korea, 1980 09/12 Benedict Christopher Reuschling born in Darmstadt, Germany, 1981 09/12 William C. Fumerola II born in Detroit, Michigan, United States, 1981 09/14 Matthew Seaman born in Bristol, United Kingdom, 1965 09/15 Aleksandr Rybalko born in Odessa, Ukraine, 1977 09/15 Dima Panov born in Khabarovsk, Russian Federation, 1978 09/16 Maksim Yevmenkin born in Taganrog, USSR, 1974 09/17 Maxim Bolotin born in Rostov-on-Don, Russian Federation, 1976 09/18 Matthew Fleming born in Cleveland, Ohio, United States, 1975 09/20 Kevin Lo born in Taipei, Taiwan, Republic of China, 1972 09/21 Alex Kozlov born in Bila Tserkva, Ukraine, 1970 09/21 Gleb Kurtsou born in Minsk, Belarus, 1984 09/22 Alan Somers born in San Antonio, Texas, United States, 1982 09/22 Bryan Drewery born in San Diego, California, United States, 1984 09/23 Martin Matuska born in Bratislava, Slovakia, 1979 09/24 Larry Rosenman born in Queens, New York, United States, 1957 09/27 Kyle Evans born in Oklahoma City, Oklahoma, United States, 1991 09/27 Neil Blakey-Milner born in Port Elizabeth, South Africa, 1978 09/27 Renato Botelho born in Araras, Sao Paulo, Brazil, 1979 09/28 Greg Lehey born in Melbourne, Victoria, Australia, 1948 09/28 Alex Dupre born in Milano, Italy, 1980 09/29 Matthew Hunt born in Johnstown, Pennsylvania, United States, 1976 09/30 Mark Felder born in Prairie du Chien, Wisconsin, United States, 1985 09/30 Hiten M. Pandya born in Dar-es-Salaam, Tanzania, East Africa, 1986 09/30 Third quarter status reports are due on 10/15 10/02 Beat Gaetzi born in Zurich, Switzerland, 1980 10/02 Grzegorz Blach born in Starachowice, Poland, 1985 10/05 Hiroki Sato born in Yamagata, Japan, 1977 10/05 Chris Costello born in Houston, Texas, United States, 1985 10/09 Stefan Walter born in Werne, Nordrhein-Westfalen, Germany, 1978 10/11 Rick Macklem born in Ontario, Canada, 1955 10/12 Pawel Jakub Dawidek born in Radzyn Podlaski, Poland, 1980 10/15 Maxim Konovalov born in Khabarovsk, USSR, 1973 10/15 Eugene Grosbein born in Novokuznetsk, Russian Republic, USSR, 1976 10/16 Remko Lodder born in Rotterdam, the Netherlands, 1983 10/17 Maho NAKATA born in Osaka, Japan, 1974 10/18 Sheldon Hearn born in Cape Town, Western Cape, South Africa, 1974 10/18 Vladimir Kondratyev born in Ryazan, USSR, 1975 10/19 Nicholas Souchu born in Suresnes, Hauts-de-Seine, France, 1972 10/19 Nick Barkas born in Longview, Washington, United States, 1981 10/19 Pedro Giffuni born in Bogotá, Colombia, 1968 10/20 Joel Dahl born in Bitterna, Skaraborg, Sweden, 1983 10/20 Dmitry Marakasov born in Moscow, Russian Federation, 1984 10/21 Ben Smithurst born in Sheffield, South Yorkshire, United Kingdom, 1981 10/22 Jean-Sebastien Pedron born in Redon, Ille-et-Vilaine, France, 1980 10/23 Mario Sergio Fujikawa Ferreira born in Brasilia, Distrito Federal, Brazil, 1976 10/23 Romain Tartière born in Clermont-Ferrand, France, 1984 10/25 Eric Melville born in Los Gatos, California, United States, 1980 10/25 Julien Laffaye born in Toulouse, France, 1988 10/25 Ashish SHUKLA born in Kanpur, India, 1985 10/25 Toomas Soome born Estonia, 1971 10/26 Matthew Ahrens born in United States, 1979 10/26 Philip M. Gollucci born in Silver Spring, Maryland, United States, 1979 10/27 Takanori Watanabe born in Numazu, Shizuoka, Japan, 1972 10/30 Olli Hauer born in Sindelfingen, Germany, 1968 10/31 Taras Korenko born in Cherkasy region, Ukraine, 1980 11/03 Ryan Stone born in Ottawa, Ontario, Canada, 1985 11/05 M. Warner Losh born in Kansas City, Kansas, United States, 1966 11/06 Michael Zhilin born in Stary Oskol, USSR, 1985 11/08 Joseph R. Mingrone born in Charlottetown, Prince Edward Island, Canada, 1976 11/09 Coleman Kane born in Cincinnati, Ohio, United States, 1980 11/09 Antoine Brodin born in Bagnolet, France, 1981 11/10 Gregory Neil Shapiro born in Providence, Rhode Island, United States, 1970 11/11 Danilo E. Gondolfo born in Lobato, Parana, Brazil, 1987 11/13 John Baldwin born in Stuart, Virginia, United States, 1977 11/14 Jeremie Le Hen born in Nancy, France, 1980 11/15 Lars Engels born in Hilden, Nordrhein-Westfalen, Germany, 1980 11/15 Tijl Coosemans born in Duffel, Belgium, 1983 11/16 Jose Maria Alcaide Salinas born in Madrid, Spain, 1962 11/16 Matt Joras born in Evanston, Illinois, United States, 1992 11/17 Ralf S. Engelschall born in Dachau, Bavaria, Germany, 1972 11/18 Thomas Quinot born in Paris, France, 1977 11/19 Konstantin Belousov born in Kiev, USSR, 1972 11/20 Dmitry Morozovsky born in Moscow, USSR, 1968 11/20 Gavin Atkinson born in Middlesbrough, United Kingdom, 1979 11/21 Mark Johnston born in Toronto, Ontario, Canada, 1989 11/22 Frederic Culot born in Saint-Germain-En-Laye, France, 1976 11/23 Josef Lawrence Karthauser born in Pembury, Kent, United Kingdom, 1972 11/23 Sepherosa Ziehau born in Shanghai, China, 1980 11/23 Luca Pizzamiglio born in Casalpusterlengo, Italy, 1978 11/24 Andrey Zakhvatov born in Chelyabinsk, Russian Federation, 1974 11/24 Daniel Gerzo born in Bratislava, Slovakia, 1986 +11/25 Fedor Uporov born in Yalta, Crimea, USSR, 1988 11/28 Nik Clayton born in Peterborough, United Kingdom, 1973 11/28 Stanislav Sedov born in Chelyabinsk, USSR, 1985 12/01 Hajimu Umemoto born in Nara, Japan, 1961 12/01 Alexey Dokuchaev born in Magadan, USSR, 1980 12/02 Ermal Luçi born in Tirane, Albania, 1980 12/03 Diane Bruce born in Ottawa, Ontario, Canada, 1952 12/04 Mariusz Zaborski born in Skierniewice, Poland, 1990 12/05 Ivan Voras born in Slavonski Brod, Croatia, 1981 12/06 Stefan Farfeleder born in Wien, Austria, 1980 12/08 Michael Tuexen born in Oldenburg, Germany, 1966 12/11 Ganael Laplanche born in Reims, France, 1980 12/15 James FitzGibbon born in Amersham, Buckinghamshire, United Kingdom, 1974 12/15 Timur I. Bakeyev born in Kazan, Republic of Tatarstan, USSR, 1974 12/18 Chris Timmons born in Ellensburg, Washington, United States, 1964 12/18 Dag-Erling Smorgrav born in Brussels, Belgium, 1977 12/18 Muhammad Moinur Rahman born in Dhaka, Bangladesh, 1983 12/18 Semen Ustimenko born in Novosibirsk, Russian Federation, 1979 12/19 Stephen Hurd born in Estevan, Saskatchewan, Canada, 1975 12/19 Emmanuel Vadot born in Decines-Charpieu, France, 1983 12/20 Sean Bruno born in Monterey, California, United States, 1974 12/21 Rong-En Fan born in Taipei, Taiwan, Republic of China, 1982 12/22 Alan L. Cox born in Warren, Ohio, United States, 1964 12/22 Maxim Sobolev born in Dnepropetrovsk, Ukraine, 1976 12/23 Sean Chittenden born in Seattle, Washington, United States, 1979 12/23 Alejandro Pulver born in Buenos Aires, Argentina, 1989 12/24 Jochen Neumeister born in Heidenheim, Germany, 1975 12/24 Guido Falsi born in Firenze, Italy, 1978 12/25 Niclas Zeising born in Stockholm, Sweden, 1986 12/28 Soren Schmidt born in Maribo, Denmark, 1960 12/28 Ade Lovett born in London, England, 1969 12/28 Marius Strobl born in Cham, Bavaria, Germany, 1978 12/31 Edwin Groothuis born in Geldrop, the Netherlands, 1970 12/31 Fourth quarter status reports are due on 01/15 #endif /* !_calendar_freebsd_ */ Index: projects/runtime-coverage =================================================================== --- projects/runtime-coverage (revision 323974) +++ projects/runtime-coverage (revision 323975) Property changes on: projects/runtime-coverage ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r323957-323974