Index: projects/sendfile/bin/sh/expand.c =================================================================== --- projects/sendfile/bin/sh/expand.c (revision 293405) +++ projects/sendfile/bin/sh/expand.c (revision 293406) @@ -1,1539 +1,1539 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1997-2005 * Herbert Xu . All rights reserved. * Copyright (c) 2010-2015 * Jilles Tjoelker . All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Routines to expand arguments to commands. We have to deal with * backquotes, shell variables, and file metacharacters. */ #include "shell.h" #include "main.h" #include "nodes.h" #include "eval.h" #include "expand.h" #include "syntax.h" #include "parser.h" #include "jobs.h" #include "options.h" #include "var.h" #include "input.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "arith.h" #include "show.h" #include "builtins.h" enum wordstate { WORD_IDLE, WORD_WS_DELIMITED, WORD_QUOTEMARK }; struct worddest { struct arglist *list; enum wordstate state; }; static char *expdest; /* output of current string */ static struct nodelist *argbackq; /* list of back quote expressions */ static char *argstr(char *, int, struct worddest *); static char *exptilde(char *, int); static char *expari(char *, int, struct worddest *); static void expbackq(union node *, int, int, struct worddest *); static void subevalvar_trim(char *, int, int, int); static int subevalvar_misc(char *, const char *, int, int, int); static char *evalvar(char *, int, struct worddest *); static int varisset(const char *, int); static void strtodest(const char *, int, int, int, struct worddest *); static void reprocess(int, int, int, int, struct worddest *); static void varvalue(const char *, int, int, int, struct worddest *); static void expandmeta(char *, struct arglist *); static void expmeta(char *, char *, struct arglist *); static int expsortcmp(const void *, const void *); static int patmatch(const char *, const char *); static void cvtnum(int, char *); static int collate_range_cmp(wchar_t, wchar_t); void emptyarglist(struct arglist *list) { list->args = list->smallarg; list->count = 0; list->capacity = sizeof(list->smallarg) / sizeof(list->smallarg[0]); } void appendarglist(struct arglist *list, char *str) { char **newargs; int newcapacity; if (list->count >= list->capacity) { newcapacity = list->capacity * 2; if (newcapacity < 16) newcapacity = 16; if (newcapacity > INT_MAX / (int)sizeof(newargs[0])) error("Too many entries in arglist"); newargs = stalloc(newcapacity * sizeof(newargs[0])); memcpy(newargs, list->args, list->count * sizeof(newargs[0])); list->args = newargs; list->capacity = newcapacity; } list->args[list->count++] = str; } static int collate_range_cmp(wchar_t c1, wchar_t c2) { static wchar_t s1[2], s2[2]; s1[0] = c1; s2[0] = c2; return (wcscoll(s1, s2)); } static char * stputs_quotes(const char *data, const char *syntax, char *p) { while (*data) { CHECKSTRSPACE(2, p); if (syntax[(int)*data] == CCTL) USTPUTC(CTLESC, p); USTPUTC(*data++, p); } return (p); } #define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p) static char * nextword(char c, int flag, char *p, struct worddest *dst) { int is_ws; is_ws = c == '\t' || c == '\n' || c == ' '; if (p != stackblock() || (is_ws ? dst->state == WORD_QUOTEMARK : dst->state != WORD_WS_DELIMITED) || c == '\0') { STPUTC('\0', p); if (flag & EXP_GLOB) expandmeta(grabstackstr(p), dst->list); else appendarglist(dst->list, grabstackstr(p)); dst->state = is_ws ? WORD_WS_DELIMITED : WORD_IDLE; } else if (!is_ws && dst->state == WORD_WS_DELIMITED) dst->state = WORD_IDLE; /* Reserve space while the stack string is empty. */ appendarglist(dst->list, NULL); dst->list->count--; STARTSTACKSTR(p); return p; } #define NEXTWORD(c, flag, p, dstlist) p = nextword(c, flag, p, dstlist) static char * stputs_split(const char *data, const char *syntax, int flag, char *p, struct worddest *dst) { const char *ifs; char c; ifs = ifsset() ? ifsval() : " \t\n"; while (*data) { CHECKSTRSPACE(2, p); c = *data++; if (strchr(ifs, c) != NULL) { NEXTWORD(c, flag, p, dst); continue; } if (flag & EXP_GLOB && syntax[(int)c] == CCTL) USTPUTC(CTLESC, p); USTPUTC(c, p); } return (p); } #define STPUTS_SPLIT(data, syntax, flag, p, dst) p = stputs_split((data), syntax, flag, p, dst) /* * Perform expansions on an argument, placing the resulting list of arguments * in arglist. Parameter expansion, command substitution and arithmetic * expansion are always performed; additional expansions can be requested * via flag (EXP_*). * The result is left in the stack string. * When arglist is NULL, perform here document expansion. * * Caution: this function uses global state and is not reentrant. * However, a new invocation after an interrupted invocation is safe * and will reset the global state for the new call. */ void expandarg(union node *arg, struct arglist *arglist, int flag) { struct worddest exparg; if (fflag) flag &= ~EXP_GLOB; argbackq = arg->narg.backquote; exparg.list = arglist; exparg.state = WORD_IDLE; STARTSTACKSTR(expdest); argstr(arg->narg.text, flag, &exparg); if (arglist == NULL) { STACKSTRNUL(expdest); return; /* here document expanded */ } if ((flag & EXP_SPLIT) == 0 || expdest != stackblock() || exparg.state == WORD_QUOTEMARK) { STPUTC('\0', expdest); if (flag & EXP_SPLIT) { if (flag & EXP_GLOB) expandmeta(grabstackstr(expdest), exparg.list); else appendarglist(exparg.list, grabstackstr(expdest)); } } if ((flag & EXP_SPLIT) == 0) appendarglist(arglist, grabstackstr(expdest)); } /* * Perform parameter expansion, command substitution and arithmetic * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE. * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'. * This is used to expand word in ${var+word} etc. * If EXP_GLOB or EXP_CASE are set, keep and/or generate CTLESC * characters to allow for further processing. * * If EXP_SPLIT is set, dst receives any complete words produced. */ static char * argstr(char *p, int flag, struct worddest *dst) { char c; int quotes = flag & (EXP_GLOB | EXP_CASE); /* do CTLESC */ int firsteq = 1; int split_lit; int lit_quoted; split_lit = flag & EXP_SPLIT_LIT; lit_quoted = flag & EXP_LIT_QUOTED; flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED); if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE))) p = exptilde(p, flag); for (;;) { CHECKSTRSPACE(2, expdest); switch (c = *p++) { case '\0': return (p - 1); case CTLENDVAR: case CTLENDARI: return (p); case CTLQUOTEMARK: lit_quoted = 1; /* "$@" syntax adherence hack */ if (p[0] == CTLVAR && (p[1] & VSQUOTE) != 0 && p[2] == '@' && p[3] == '=') break; if ((flag & EXP_SPLIT) != 0 && expdest == stackblock()) dst->state = WORD_QUOTEMARK; break; case CTLQUOTEEND: lit_quoted = 0; break; case CTLESC: c = *p++; if (split_lit && !lit_quoted && strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) { NEXTWORD(c, flag, expdest, dst); break; } if (quotes) USTPUTC(CTLESC, expdest); USTPUTC(c, expdest); break; case CTLVAR: p = evalvar(p, flag, dst); break; case CTLBACKQ: case CTLBACKQ|CTLQUOTE: expbackq(argbackq->n, c & CTLQUOTE, flag, dst); argbackq = argbackq->next; break; case CTLARI: p = expari(p, flag, dst); break; case ':': case '=': /* * sort of a hack - expand tildes in variable * assignments (after the first '=' and after ':'s). */ if (split_lit && !lit_quoted && strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) { NEXTWORD(c, flag, expdest, dst); break; } USTPUTC(c, expdest); if (flag & EXP_VARTILDE && *p == '~' && (c != '=' || firsteq)) { if (c == '=') firsteq = 0; p = exptilde(p, flag); } break; default: if (split_lit && !lit_quoted && strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) { NEXTWORD(c, flag, expdest, dst); break; } USTPUTC(c, expdest); } } } /* * Perform tilde expansion, placing the result in the stack string and * returning the next position in the input string to process. */ static char * exptilde(char *p, int flag) { char c, *startp = p; struct passwd *pw; char *home; for (;;) { c = *p; switch(c) { case CTLESC: /* This means CTL* are always considered quoted. */ case CTLVAR: case CTLBACKQ: case CTLBACKQ | CTLQUOTE: case CTLARI: case CTLENDARI: case CTLQUOTEMARK: return (startp); case ':': if ((flag & EXP_VARTILDE) == 0) break; /* FALLTHROUGH */ case '\0': case '/': case CTLENDVAR: *p = '\0'; if (*(startp+1) == '\0') { home = lookupvar("HOME"); } else { pw = getpwnam(startp+1); home = pw != NULL ? pw->pw_dir : NULL; } *p = c; if (home == NULL || *home == '\0') return (startp); strtodest(home, flag, VSNORMAL, 1, NULL); return (p); } p++; } } /* * Expand arithmetic expression. */ static char * expari(char *p, int flag, struct worddest *dst) { char *q, *start; arith_t result; int begoff; int quoted; int adj; quoted = *p++ == '"'; begoff = expdest - stackblock(); p = argstr(p, 0, NULL); STPUTC('\0', expdest); start = stackblock() + begoff; q = grabstackstr(expdest); result = arith(start); ungrabstackstr(q, expdest); start = stackblock() + begoff; adj = start - expdest; STADJUST(adj, expdest); CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest); fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result); adj = strlen(expdest); STADJUST(adj, expdest); if (!quoted) reprocess(expdest - adj - stackblock(), flag, VSNORMAL, 0, dst); return p; } /* * Perform command substitution. */ static void expbackq(union node *cmd, int quoted, int flag, struct worddest *dst) { struct backcmd in; int i; char buf[128]; char *p; char *dest = expdest; struct nodelist *saveargbackq; char lastc; char const *syntax = quoted? DQSYNTAX : BASESYNTAX; int quotes = flag & (EXP_GLOB | EXP_CASE); size_t nnl; const char *ifs; INTOFF; saveargbackq = argbackq; p = grabstackstr(dest); evalbackcmd(cmd, &in); ungrabstackstr(p, dest); argbackq = saveargbackq; p = in.buf; lastc = '\0'; nnl = 0; if (!quoted && flag & EXP_SPLIT) ifs = ifsset() ? ifsval() : " \t\n"; else ifs = ""; /* Don't copy trailing newlines */ for (;;) { if (--in.nleft < 0) { if (in.fd < 0) break; while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR); TRACE(("expbackq: read returns %d\n", i)); if (i <= 0) break; p = buf; in.nleft = i - 1; } lastc = *p++; if (lastc == '\0') continue; if (lastc == '\n') { nnl++; } else { if (nnl > 0) { if (strchr(ifs, '\n') != NULL) { NEXTWORD('\n', flag, dest, dst); nnl = 0; } else { CHECKSTRSPACE(nnl + 2, dest); while (nnl > 0) { nnl--; USTPUTC('\n', dest); } } } if (strchr(ifs, lastc) != NULL) NEXTWORD(lastc, flag, dest, dst); else { CHECKSTRSPACE(2, dest); if (quotes && syntax[(int)lastc] == CCTL) USTPUTC(CTLESC, dest); USTPUTC(lastc, dest); } } } if (in.fd >= 0) close(in.fd); if (in.buf) ckfree(in.buf); if (in.jp) exitstatus = waitforjob(in.jp, (int *)NULL); TRACE(("expbackq: size=%td: \"%.*s\"\n", ((dest - stackblock()) - startloc), (int)((dest - stackblock()) - startloc), stackblock() + startloc)); expdest = dest; INTON; } static void recordleft(const char *str, const char *loc, char *startp) { int amount; amount = ((str - 1) - (loc - startp)) - expdest; STADJUST(amount, expdest); while (loc != str - 1) *startp++ = *loc++; } static void subevalvar_trim(char *p, int strloc, int subtype, int startloc) { char *startp; char *loc = NULL; char *str; int c = 0; struct nodelist *saveargbackq = argbackq; int amount; argstr(p, EXP_CASE | EXP_TILDE, NULL); STACKSTRNUL(expdest); argbackq = saveargbackq; startp = stackblock() + startloc; str = stackblock() + strloc; switch (subtype) { case VSTRIMLEFT: for (loc = startp; loc < str; loc++) { c = *loc; *loc = '\0'; if (patmatch(str, startp)) { *loc = c; recordleft(str, loc, startp); return; } *loc = c; } break; case VSTRIMLEFTMAX: for (loc = str - 1; loc >= startp;) { c = *loc; *loc = '\0'; if (patmatch(str, startp)) { *loc = c; recordleft(str, loc, startp); return; } *loc = c; loc--; } break; case VSTRIMRIGHT: for (loc = str - 1; loc >= startp;) { if (patmatch(str, loc)) { amount = loc - expdest; STADJUST(amount, expdest); return; } loc--; } break; case VSTRIMRIGHTMAX: for (loc = startp; loc < str - 1; loc++) { if (patmatch(str, loc)) { amount = loc - expdest; STADJUST(amount, expdest); return; } } break; default: abort(); } amount = (expdest - stackblock() - strloc) + 1; STADJUST(-amount, expdest); } static int subevalvar_misc(char *p, const char *var, int subtype, int startloc, int varflags) { char *startp; struct nodelist *saveargbackq = argbackq; int amount; argstr(p, EXP_TILDE, NULL); STACKSTRNUL(expdest); argbackq = saveargbackq; startp = stackblock() + startloc; switch (subtype) { case VSASSIGN: setvar(var, startp, 0); amount = startp - expdest; STADJUST(amount, expdest); return 1; case VSQUESTION: if (*p != CTLENDVAR) { outfmt(out2, "%s\n", startp); error((char *)NULL); } error("%.*s: parameter %snot set", (int)(p - var - 1), var, (varflags & VSNUL) ? "null or " : ""); return 0; default: abort(); } } /* * Expand a variable, and return a pointer to the next character in the * input string. */ static char * evalvar(char *p, int flag, struct worddest *dst) { int subtype; int varflags; char *var; const char *val; int patloc; int c; int set; int special; int startloc; int varlen; int varlenb; char buf[21]; varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; var = p; special = 0; if (! is_name(*p)) special = 1; p = strchr(p, '=') + 1; again: /* jump here after setting a variable with ${var=text} */ if (varflags & VSLINENO) { set = 1; special = 1; val = NULL; } else if (special) { set = varisset(var, varflags & VSNUL); val = NULL; } else { val = bltinlookup(var, 1); if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) { val = NULL; set = 0; } else set = 1; } varlen = 0; startloc = expdest - stackblock(); if (!set && uflag && *var != '@' && *var != '*') { switch (subtype) { case VSNORMAL: case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: case VSLENGTH: error("%.*s: parameter not set", (int)(p - var - 1), var); } } if (set && subtype != VSPLUS) { /* insert the value of the variable */ if (special) { if (varflags & VSLINENO) { if (p - var > (ptrdiff_t)sizeof(buf)) abort(); memcpy(buf, var, p - var - 1); buf[p - var - 1] = '\0'; strtodest(buf, flag, subtype, varflags & VSQUOTE, dst); } else varvalue(var, varflags & VSQUOTE, subtype, flag, dst); if (subtype == VSLENGTH) { varlenb = expdest - stackblock() - startloc; varlen = varlenb; if (localeisutf8) { val = stackblock() + startloc; for (;val != expdest; val++) if ((*val & 0xC0) == 0x80) varlen--; } STADJUST(-varlenb, expdest); } } else { if (subtype == VSLENGTH) { for (;*val; val++) if (!localeisutf8 || (*val & 0xC0) != 0x80) varlen++; } else strtodest(val, flag, subtype, varflags & VSQUOTE, dst); } } if (subtype == VSPLUS) set = ! set; switch (subtype) { case VSLENGTH: cvtnum(varlen, buf); strtodest(buf, flag, VSNORMAL, varflags & VSQUOTE, dst); break; case VSNORMAL: break; case VSPLUS: case VSMINUS: if (!set) { argstr(p, flag | (flag & EXP_SPLIT ? EXP_SPLIT_LIT : 0) | (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0), dst); break; } break; case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: if (!set) break; /* * Terminate the string and start recording the pattern * right after it */ STPUTC('\0', expdest); patloc = expdest - stackblock(); subevalvar_trim(p, patloc, subtype, startloc); reprocess(startloc, flag, VSNORMAL, varflags & VSQUOTE, dst); if (flag & EXP_SPLIT && *var == '@' && varflags & VSQUOTE) dst->state = WORD_QUOTEMARK; break; case VSASSIGN: case VSQUESTION: if (!set) { if (subevalvar_misc(p, var, subtype, startloc, varflags)) { varflags &= ~VSNUL; goto again; } break; } break; case VSERROR: c = p - var - 1; error("${%.*s%s}: Bad substitution", c, var, (c > 0 && *p != CTLENDVAR) ? "..." : ""); default: abort(); } if (subtype != VSNORMAL) { /* skip to end of alternative */ int nesting = 1; for (;;) { if ((c = *p++) == CTLESC) p++; else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) { if (set) argbackq = argbackq->next; } else if (c == CTLVAR) { if ((*p++ & VSTYPE) != VSNORMAL) nesting++; } else if (c == CTLENDVAR) { if (--nesting == 0) break; } } } return p; } /* * Test whether a specialized variable is set. */ static int varisset(const char *name, int nulok) { if (*name == '!') return backgndpidset(); else if (*name == '@' || *name == '*') { if (*shellparam.p == NULL) return 0; if (nulok) { char **av; for (av = shellparam.p; *av; av++) if (**av != '\0') return 1; return 0; } } else if (is_digit(*name)) { char *ap; long num; errno = 0; num = strtol(name, NULL, 10); if (errno != 0 || num > shellparam.nparam) return 0; if (num == 0) ap = arg0; else ap = shellparam.p[num - 1]; if (nulok && (ap == NULL || *ap == '\0')) return 0; } return 1; } static void strtodest(const char *p, int flag, int subtype, int quoted, struct worddest *dst) { if (subtype == VSLENGTH || subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX) STPUTS(p, expdest); else if (flag & EXP_SPLIT && !quoted && dst != NULL) STPUTS_SPLIT(p, BASESYNTAX, flag, expdest, dst); else if (flag & (EXP_GLOB | EXP_CASE)) STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest); else STPUTS(p, expdest); } static void reprocess(int startloc, int flag, int subtype, int quoted, struct worddest *dst) { static char *buf = NULL; static size_t buflen = 0; char *startp; size_t len, zpos, zlen; startp = stackblock() + startloc; len = expdest - startp; if (len >= SIZE_MAX / 2) abort(); INTOFF; if (len >= buflen) { ckfree(buf); buf = NULL; } if (buflen < 128) buflen = 128; while (len >= buflen) buflen <<= 1; if (buf == NULL) buf = ckmalloc(buflen); INTON; memcpy(buf, startp, len); buf[len] = '\0'; STADJUST(-len, expdest); for (zpos = 0;;) { zlen = strlen(buf + zpos); strtodest(buf + zpos, flag, subtype, quoted, dst); zpos += zlen + 1; if (zpos == len + 1) break; if (flag & EXP_SPLIT && (quoted || (zlen > 0 && zpos < len))) NEXTWORD('\0', flag, expdest, dst); } } /* * Add the value of a specialized variable to the stack string. */ static void varvalue(const char *name, int quoted, int subtype, int flag, struct worddest *dst) { int num; char *p; int i; int splitlater; char sep[2]; char **ap; char buf[(NSHORTOPTS > 10 ? NSHORTOPTS : 10) + 1]; if (subtype == VSLENGTH) flag &= ~EXP_FULL; splitlater = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX; switch (*name) { case '$': num = rootpid; break; case '?': num = oexitstatus; break; case '#': num = shellparam.nparam; break; case '!': num = backgndpidval(); break; case '-': p = buf; for (i = 0 ; i < NSHORTOPTS ; i++) { - if (optlist[i].val) - *p++ = optlist[i].letter; + if (optval[i]) + *p++ = optletter[i]; } *p = '\0'; strtodest(buf, flag, subtype, quoted, dst); return; case '@': if (flag & EXP_SPLIT && quoted) { for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { strtodest(p, flag, subtype, quoted, dst); if (*ap) { if (splitlater) STPUTC('\0', expdest); else NEXTWORD('\0', flag, expdest, dst); } } if (shellparam.nparam > 0) dst->state = WORD_QUOTEMARK; return; } /* FALLTHROUGH */ case '*': if (ifsset()) sep[0] = ifsval()[0]; else sep[0] = ' '; sep[1] = '\0'; for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { strtodest(p, flag, subtype, quoted, dst); if (!*ap) break; if (sep[0]) strtodest(sep, flag, subtype, quoted, dst); else if (flag & EXP_SPLIT && !quoted && **ap != '\0') { if (splitlater) STPUTC('\0', expdest); else NEXTWORD('\0', flag, expdest, dst); } } return; default: if (is_digit(*name)) { num = atoi(name); if (num == 0) p = arg0; else if (num > 0 && num <= shellparam.nparam) p = shellparam.p[num - 1]; else return; strtodest(p, flag, subtype, quoted, dst); } return; } cvtnum(num, buf); strtodest(buf, flag, subtype, quoted, dst); } static char expdir[PATH_MAX]; #define expdir_end (expdir + sizeof(expdir)) /* * Perform pathname generation and remove control characters. * At this point, the only control characters should be CTLESC. * The results are stored in the list dstlist. */ static void expandmeta(char *pattern, struct arglist *dstlist) { char *p; int firstmatch; char c; firstmatch = dstlist->count; p = pattern; for (; (c = *p) != '\0'; p++) { /* fast check for meta chars */ if (c == '*' || c == '?' || c == '[') { INTOFF; expmeta(expdir, pattern, dstlist); INTON; break; } } if (dstlist->count == firstmatch) { /* * no matches */ rmescapes(pattern); appendarglist(dstlist, pattern); } else { qsort(&dstlist->args[firstmatch], dstlist->count - firstmatch, sizeof(dstlist->args[0]), expsortcmp); } } /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ static void expmeta(char *enddir, char *name, struct arglist *arglist) { const char *p; const char *q; const char *start; char *endname; int metaflag; struct stat statb; DIR *dirp; struct dirent *dp; int atend; int matchdot; int esc; int namlen; metaflag = 0; start = name; for (p = name; esc = 0, *p; p += esc + 1) { if (*p == '*' || *p == '?') metaflag = 1; else if (*p == '[') { q = p + 1; if (*q == '!' || *q == '^') q++; for (;;) { if (*q == CTLESC) q++; if (*q == '/' || *q == '\0') break; if (*++q == ']') { metaflag = 1; break; } } } else if (*p == '\0') break; else { if (*p == CTLESC) esc++; if (p[esc] == '/') { if (metaflag) break; start = p + esc + 1; } } } if (metaflag == 0) { /* we've reached the end of the file name */ if (enddir != expdir) metaflag++; for (p = name ; ; p++) { if (*p == CTLESC) p++; *enddir++ = *p; if (*p == '\0') break; if (enddir == expdir_end) return; } if (metaflag == 0 || lstat(expdir, &statb) >= 0) appendarglist(arglist, stsavestr(expdir)); return; } endname = name + (p - name); if (start != name) { p = name; while (p < start) { if (*p == CTLESC) p++; *enddir++ = *p++; if (enddir == expdir_end) return; } } if (enddir == expdir) { p = "."; } else if (enddir == expdir + 1 && *expdir == '/') { p = "/"; } else { p = expdir; enddir[-1] = '\0'; } if ((dirp = opendir(p)) == NULL) return; if (enddir != expdir) enddir[-1] = '/'; if (*endname == 0) { atend = 1; } else { atend = 0; *endname = '\0'; endname += esc + 1; } matchdot = 0; p = start; if (*p == CTLESC) p++; if (*p == '.') matchdot++; while (! int_pending() && (dp = readdir(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; if (patmatch(start, dp->d_name)) { namlen = dp->d_namlen; if (enddir + namlen + 1 > expdir_end) continue; memcpy(enddir, dp->d_name, namlen + 1); if (atend) appendarglist(arglist, stsavestr(expdir)); else { if (dp->d_type != DT_UNKNOWN && dp->d_type != DT_DIR && dp->d_type != DT_LNK) continue; if (enddir + namlen + 2 > expdir_end) continue; enddir[namlen] = '/'; enddir[namlen + 1] = '\0'; expmeta(enddir + namlen + 1, endname, arglist); } } } closedir(dirp); if (! atend) endname[-esc - 1] = esc ? CTLESC : '/'; } static int expsortcmp(const void *p1, const void *p2) { const char *s1 = *(const char * const *)p1; const char *s2 = *(const char * const *)p2; return (strcmp(s1, s2)); } static wchar_t get_wc(const char **p) { wchar_t c; int chrlen; chrlen = mbtowc(&c, *p, 4); if (chrlen == 0) return 0; else if (chrlen == -1) c = 0; else *p += chrlen; return c; } /* * See if a character matches a character class, starting at the first colon * of "[:class:]". * If a valid character class is recognized, a pointer to the next character * after the final closing bracket is stored into *end, otherwise a null * pointer is stored into *end. */ static int match_charclass(const char *p, wchar_t chr, const char **end) { char name[20]; const char *nameend; wctype_t cclass; *end = NULL; p++; nameend = strstr(p, ":]"); if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) || nameend == p) return 0; memcpy(name, p, nameend - p); name[nameend - p] = '\0'; *end = nameend + 2; cclass = wctype(name); /* An unknown class matches nothing but is valid nevertheless. */ if (cclass == 0) return 0; return iswctype(chr, cclass); } /* * Returns true if the pattern matches the string. */ static int patmatch(const char *pattern, const char *string) { const char *p, *q, *end; const char *bt_p, *bt_q; char c; wchar_t wc, wc2; p = pattern; q = string; bt_p = NULL; bt_q = NULL; for (;;) { switch (c = *p++) { case '\0': if (*q != '\0') goto backtrack; return 1; case CTLESC: if (*q++ != *p++) goto backtrack; break; case '?': if (*q == '\0') return 0; if (localeisutf8) { wc = get_wc(&q); /* * A '?' does not match invalid UTF-8 but a * '*' does, so backtrack. */ if (wc == 0) goto backtrack; } else wc = (unsigned char)*q++; break; case '*': c = *p; while (c == '*') c = *++p; /* * If the pattern ends here, we know the string * matches without needing to look at the rest of it. */ if (c == '\0') return 1; /* * First try the shortest match for the '*' that * could work. We can forget any earlier '*' since * there is no way having it match more characters * can help us, given that we are already here. */ bt_p = p; bt_q = q; break; case '[': { const char *savep, *saveq; int invert, found; wchar_t chr; savep = p, saveq = q; invert = 0; if (*p == '!' || *p == '^') { invert++; p++; } found = 0; if (*q == '\0') return 0; if (localeisutf8) { chr = get_wc(&q); if (chr == 0) goto backtrack; } else chr = (unsigned char)*q++; c = *p++; do { if (c == '\0') { p = savep, q = saveq; c = '['; goto dft; } if (c == '[' && *p == ':') { found |= match_charclass(p, chr, &end); if (end != NULL) p = end; } if (c == CTLESC) c = *p++; if (localeisutf8 && c & 0x80) { p--; wc = get_wc(&p); if (wc == 0) /* bad utf-8 */ return 0; } else wc = (unsigned char)c; if (*p == '-' && p[1] != ']') { p++; if (*p == CTLESC) p++; if (localeisutf8) { wc2 = get_wc(&p); if (wc2 == 0) /* bad utf-8 */ return 0; } else wc2 = (unsigned char)*p++; if ( collate_range_cmp(chr, wc) >= 0 && collate_range_cmp(chr, wc2) <= 0 ) found = 1; } else { if (chr == wc) found = 1; } } while ((c = *p++) != ']'); if (found == invert) goto backtrack; break; } dft: default: if (*q == '\0') return 0; if (*q++ == c) break; backtrack: /* * If we have a mismatch (other than hitting the end * of the string), go back to the last '*' seen and * have it match one additional character. */ if (bt_p == NULL) return 0; if (*bt_q == '\0') return 0; bt_q++; p = bt_p; q = bt_q; break; } } } /* * Remove any CTLESC and CTLQUOTEMARK characters from a string. */ void rmescapes(char *str) { char *p, *q; p = str; while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) { if (*p++ == '\0') return; } q = p; while (*p) { if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) { p++; continue; } if (*p == CTLESC) p++; *q++ = *p++; } *q = '\0'; } /* * See if a pattern matches in a case statement. */ int casematch(union node *pattern, const char *val) { struct stackmark smark; int result; char *p; setstackmark(&smark); argbackq = pattern->narg.backquote; STARTSTACKSTR(expdest); argstr(pattern->narg.text, EXP_TILDE | EXP_CASE, NULL); STPUTC('\0', expdest); p = grabstackstr(expdest); result = patmatch(p, val); popstackmark(&smark); return result; } /* * Our own itoa(). */ static void cvtnum(int num, char *buf) { char temp[32]; int neg = num < 0; char *p = temp + 31; temp[31] = '\0'; do { *--p = num % 10 + '0'; } while ((num /= 10) != 0); if (neg) *--p = '-'; memcpy(buf, p, temp + 32 - p); } /* * Do most of the work for wordexp(3). */ int wordexpcmd(int argc, char **argv) { size_t len; int i; out1fmt("%08x", argc - 1); for (i = 1, len = 0; i < argc; i++) len += strlen(argv[i]); out1fmt("%08x", (int)len); for (i = 1; i < argc; i++) outbin(argv[i], strlen(argv[i]) + 1, out1); return (0); } /* * Do most of the work for wordexp(3), new version. */ int freebsd_wordexpcmd(int argc __unused, char **argv __unused) { struct arglist arglist; union node *args, *n; size_t len; int ch; int protected = 0; int fd = -1; int i; while ((ch = nextopt("f:p")) != '\0') { switch (ch) { case 'f': fd = number(shoptarg); break; case 'p': protected = 1; break; } } if (*argptr != NULL) error("wrong number of arguments"); if (fd < 0) error("missing fd"); INTOFF; setinputfd(fd, 1); INTON; args = parsewordexp(); popfile(); /* will also close fd */ if (protected) for (n = args; n != NULL; n = n->narg.next) { if (n->narg.backquote != NULL) { outcslow('C', out1); error("command substitution disabled"); } } outcslow(' ', out1); emptyarglist(&arglist); for (n = args; n != NULL; n = n->narg.next) expandarg(n, &arglist, EXP_FULL | EXP_TILDE); for (i = 0, len = 0; i < arglist.count; i++) len += strlen(arglist.args[i]); out1fmt("%016x %016zx", arglist.count, len); for (i = 0; i < arglist.count; i++) outbin(arglist.args[i], strlen(arglist.args[i]) + 1, out1); return (0); } Index: projects/sendfile/bin/sh/options.c =================================================================== --- projects/sendfile/bin/sh/options.c (revision 293405) +++ projects/sendfile/bin/sh/options.c (revision 293406) @@ -1,595 +1,598 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)options.c 8.2 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "shell.h" #define DEFINE_OPTIONS #include "options.h" #undef DEFINE_OPTIONS #include "nodes.h" /* for other header files */ #include "eval.h" #include "jobs.h" #include "input.h" #include "output.h" #include "trap.h" #include "var.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "builtins.h" #ifndef NO_HISTORY #include "myhistedit.h" #endif char *arg0; /* value of $0 */ struct shparam shellparam; /* current positional parameters */ char **argptr; /* argument list for builtin commands */ char *shoptarg; /* set by nextopt (like getopt) */ char *nextopt_optptr; /* used by nextopt */ char *minusc; /* argument to -c option */ static void options(int); static void minus_o(char *, int); static void setoption(int, int); static void setoptionbyindex(int, int); static int getopts(char *, char *, char **, char ***, char **); /* * Process the shell command line arguments. */ void procargs(int argc, char **argv) { int i; char *scriptname; argptr = argv; if (argc > 0) argptr++; for (i = 0; i < NOPTS; i++) - optlist[i].val = 2; + optval[i] = 2; privileged = (getuid() != geteuid() || getgid() != getegid()); options(1); if (*argptr == NULL && minusc == NULL) sflag = 1; if (iflag != 0 && sflag == 1 && isatty(0) && isatty(1)) { iflag = 1; if (Eflag == 2) Eflag = 1; } if (mflag == 2) mflag = iflag; for (i = 0; i < NOPTS; i++) - if (optlist[i].val == 2) - optlist[i].val = 0; + if (optval[i] == 2) + optval[i] = 0; arg0 = argv[0]; if (sflag == 0 && minusc == NULL) { scriptname = *argptr++; setinputfile(scriptname, 0); commandname = arg0 = scriptname; } /* POSIX 1003.2: first arg after -c cmd is $0, remainder $1... */ if (argptr && minusc && *argptr) arg0 = *argptr++; shellparam.p = argptr; shellparam.reset = 1; /* assert(shellparam.malloc == 0 && shellparam.nparam == 0); */ while (*argptr) { shellparam.nparam++; argptr++; } optschanged(); } void optschanged(void) { setinteractive(iflag); #ifndef NO_HISTORY histedit(); #endif setjobctl(mflag); } /* * Process shell options. The global variable argptr contains a pointer * to the argument list; we advance it past the options. */ static void options(int cmdline) { char *kp, *p; int val; int c; if (cmdline) minusc = NULL; while ((p = *argptr) != NULL) { argptr++; if ((c = *p++) == '-') { val = 1; /* A "-" or "--" terminates options */ if (p[0] == '\0') goto end_options1; if (p[0] == '-' && p[1] == '\0') goto end_options2; /** * For the benefit of `#!' lines in shell scripts, * treat a string of '-- *#.*' the same as '--'. * This is needed so that a script starting with: * #!/bin/sh -- # -*- perl -*- * will continue to work after a change is made to * kern/imgact_shell.c to NOT token-ize the options * specified on a '#!' line. A bit of a kludge, * but that trick is recommended in documentation * for some scripting languages, and we might as * well continue to support it. */ if (p[0] == '-') { kp = p + 1; while (*kp == ' ' || *kp == '\t') kp++; if (*kp == '#' || *kp == '\0') goto end_options2; } } else if (c == '+') { val = 0; } else { argptr--; break; } while ((c = *p++) != '\0') { if (c == 'c' && cmdline) { char *q; #ifdef NOHACK /* removing this code allows sh -ce 'foo' for compat */ if (*p == '\0') #endif q = *argptr++; if (q == NULL || minusc != NULL) error("Bad -c option"); minusc = q; #ifdef NOHACK break; #endif } else if (c == 'o') { minus_o(*argptr, val); if (*argptr) argptr++; } else setoption(c, val); } } return; /* When processing `set', a single "-" means turn off -x and -v */ end_options1: if (!cmdline) { xflag = vflag = 0; return; } /* * When processing `set', a "--" means the remaining arguments * replace the positional parameters in the active shell. If * there are no remaining options, then all the positional * parameters are cleared (equivalent to doing ``shift $#''). */ end_options2: if (!cmdline) { if (*argptr == NULL) setparam(argptr); return; } /* * At this point we are processing options given to 'sh' on a command * line. If an end-of-options marker ("-" or "--") is followed by an * arg of "#", then skip over all remaining arguments. Some scripting * languages (e.g.: perl) document that /bin/sh will implement this * behavior, and they recommend that users take advantage of it to * solve certain issues that can come up when writing a perl script. * Yes, this feature is in /bin/sh to help users write perl scripts. */ p = *argptr; if (p != NULL && p[0] == '#' && p[1] == '\0') { while (*argptr != NULL) argptr++; /* We need to keep the final argument */ argptr--; } } static void minus_o(char *name, int val) { int i; + const unsigned char *on; + size_t len; if (name == NULL) { if (val) { /* "Pretty" output. */ out1str("Current option settings\n"); - for (i = 0; i < NOPTS; i++) - out1fmt("%-16s%s\n", optlist[i].name, - optlist[i].val ? "on" : "off"); + for (i = 0, on = optname; i < NOPTS; i++, on += *on + 1) + out1fmt("%-16.*s%s\n", *on, on + 1, + optval[i] ? "on" : "off"); } else { /* Output suitable for re-input to shell. */ - for (i = 0; i < NOPTS; i++) - out1fmt("%s %co %s%s", + for (i = 0, on = optname; i < NOPTS; i++, on += *on + 1) + out1fmt("%s %co %.*s%s", i % 6 == 0 ? "set" : "", - optlist[i].val ? '-' : '+', - optlist[i].name, + optval[i] ? '-' : '+', + *on, on + 1, i % 6 == 5 || i == NOPTS - 1 ? "\n" : ""); } } else { - for (i = 0; i < NOPTS; i++) - if (equal(name, optlist[i].name)) { + len = strlen(name); + for (i = 0, on = optname; i < NOPTS; i++, on += *on + 1) + if (*on == len && memcmp(on + 1, name, len) == 0) { setoptionbyindex(i, val); return; } error("Illegal option -o %s", name); } } static void setoptionbyindex(int idx, int val) { - if (optlist[idx].letter == 'p' && !val && privileged) { + if (optletter[idx] == 'p' && !val && privileged) { if (setgid(getgid()) == -1) error("setgid"); if (setuid(getuid()) == -1) error("setuid"); } - optlist[idx].val = val; + optval[idx] = val; if (val) { /* #%$ hack for ksh semantics */ - if (optlist[idx].letter == 'V') + if (optletter[idx] == 'V') Eflag = 0; - else if (optlist[idx].letter == 'E') + else if (optletter[idx] == 'E') Vflag = 0; } } static void setoption(int flag, int val) { int i; for (i = 0; i < NSHORTOPTS; i++) - if (optlist[i].letter == flag) { + if (optletter[i] == flag) { setoptionbyindex(i, val); return; } error("Illegal option -%c", flag); } /* * Set the shell parameters. */ void setparam(char **argv) { char **newparam; char **ap; int nparam; for (nparam = 0 ; argv[nparam] ; nparam++); ap = newparam = ckmalloc((nparam + 1) * sizeof *ap); while (*argv) { *ap++ = savestr(*argv++); } *ap = NULL; freeparam(&shellparam); shellparam.malloc = 1; shellparam.nparam = nparam; shellparam.p = newparam; shellparam.optp = NULL; shellparam.reset = 1; shellparam.optnext = NULL; } /* * Free the list of positional parameters. */ void freeparam(struct shparam *param) { char **ap; if (param->malloc) { for (ap = param->p ; *ap ; ap++) ckfree(*ap); ckfree(param->p); } if (param->optp) { for (ap = param->optp ; *ap ; ap++) ckfree(*ap); ckfree(param->optp); } } /* * The shift builtin command. */ int shiftcmd(int argc, char **argv) { int n; char **ap1, **ap2; n = 1; if (argc > 1) n = number(argv[1]); if (n > shellparam.nparam) return 1; INTOFF; shellparam.nparam -= n; for (ap1 = shellparam.p ; --n >= 0 ; ap1++) { if (shellparam.malloc) ckfree(*ap1); } ap2 = shellparam.p; while ((*ap2++ = *ap1++) != NULL); shellparam.reset = 1; INTON; return 0; } /* * The set command builtin. */ int setcmd(int argc, char **argv) { if (argc == 1) return showvarscmd(argc, argv); INTOFF; options(0); optschanged(); if (*argptr != NULL) { setparam(argptr); } INTON; return 0; } void getoptsreset(const char *value) { while (*value == '0') value++; if (strcmp(value, "1") == 0) shellparam.reset = 1; } /* * The getopts builtin. Shellparam.optnext points to the next argument * to be processed. Shellparam.optptr points to the next character to * be processed in the current argument. If shellparam.optnext is NULL, * then it's the first time getopts has been called. */ int getoptscmd(int argc, char **argv) { char **optbase = NULL, **ap; int i; if (argc < 3) error("usage: getopts optstring var [arg]"); if (shellparam.reset == 1) { INTOFF; if (shellparam.optp) { for (ap = shellparam.optp ; *ap ; ap++) ckfree(*ap); ckfree(shellparam.optp); shellparam.optp = NULL; } if (argc > 3) { shellparam.optp = ckmalloc((argc - 2) * sizeof *ap); memset(shellparam.optp, '\0', (argc - 2) * sizeof *ap); for (i = 0; i < argc - 3; i++) shellparam.optp[i] = savestr(argv[i + 3]); } INTON; optbase = argc == 3 ? shellparam.p : shellparam.optp; shellparam.optnext = optbase; shellparam.optptr = NULL; shellparam.reset = 0; } else optbase = shellparam.optp ? shellparam.optp : shellparam.p; return getopts(argv[1], argv[2], optbase, &shellparam.optnext, &shellparam.optptr); } static int getopts(char *optstr, char *optvar, char **optfirst, char ***optnext, char **optptr) { char *p, *q; char c = '?'; int done = 0; int ind = 0; int err = 0; char s[10]; const char *newoptarg = NULL; if ((p = *optptr) == NULL || *p == '\0') { /* Current word is done, advance */ if (*optnext == NULL) return 1; p = **optnext; if (p == NULL || *p != '-' || *++p == '\0') { atend: ind = *optnext - optfirst + 1; *optnext = NULL; p = NULL; done = 1; goto out; } (*optnext)++; if (p[0] == '-' && p[1] == '\0') /* check for "--" */ goto atend; } c = *p++; for (q = optstr; *q != c; ) { if (*q == '\0') { if (optstr[0] == ':') { s[0] = c; s[1] = '\0'; newoptarg = s; } else out2fmt_flush("Illegal option -%c\n", c); c = '?'; goto out; } if (*++q == ':') q++; } if (*++q == ':') { if (*p == '\0' && (p = **optnext) == NULL) { if (optstr[0] == ':') { s[0] = c; s[1] = '\0'; newoptarg = s; c = ':'; } else { out2fmt_flush("No arg for -%c option\n", c); c = '?'; } goto out; } if (p == **optnext) (*optnext)++; newoptarg = p; p = NULL; } out: if (*optnext != NULL) ind = *optnext - optfirst + 1; *optptr = p; if (newoptarg != NULL) err |= setvarsafe("OPTARG", newoptarg, 0); else { INTOFF; err |= unsetvar("OPTARG"); INTON; } fmtstr(s, sizeof(s), "%d", ind); err |= setvarsafe("OPTIND", s, VNOFUNC); s[0] = c; s[1] = '\0'; err |= setvarsafe(optvar, s, 0); if (err) { *optnext = NULL; *optptr = NULL; flushall(); exraise(EXERROR); } return done; } /* * Standard option processing (a la getopt) for builtin routines. The * only argument that is passed to nextopt is the option string; the * other arguments are unnecessary. It return the character, or '\0' on * end of input. */ int nextopt(const char *optstring) { char *p; const char *q; char c; if ((p = nextopt_optptr) == NULL || *p == '\0') { p = *argptr; if (p == NULL || *p != '-' || *++p == '\0') return '\0'; argptr++; if (p[0] == '-' && p[1] == '\0') /* check for "--" */ return '\0'; } c = *p++; for (q = optstring ; *q != c ; ) { if (*q == '\0') error("Illegal option -%c", c); if (*++q == ':') q++; } if (*++q == ':') { if (*p == '\0' && (p = *argptr++) == NULL) error("No arg for -%c option", c); shoptarg = p; p = NULL; } nextopt_optptr = p; return c; } Index: projects/sendfile/bin/sh/options.h =================================================================== --- projects/sendfile/bin/sh/options.h (revision 293405) +++ projects/sendfile/bin/sh/options.h (revision 293406) @@ -1,117 +1,114 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)options.h 8.2 (Berkeley) 5/4/95 * $FreeBSD$ */ struct shparam { int nparam; /* # of positional parameters (without $0) */ unsigned char malloc; /* if parameter list dynamically allocated */ unsigned char reset; /* if getopts has been reset */ char **p; /* parameter list */ char **optp; /* parameter list for getopts */ char **optnext; /* next parameter to be processed by getopts */ char *optptr; /* used by getopts */ }; -#define eflag optlist[0].val -#define fflag optlist[1].val -#define Iflag optlist[2].val -#define iflag optlist[3].val -#define mflag optlist[4].val -#define nflag optlist[5].val -#define sflag optlist[6].val -#define xflag optlist[7].val -#define vflag optlist[8].val -#define Vflag optlist[9].val -#define Eflag optlist[10].val -#define Cflag optlist[11].val -#define aflag optlist[12].val -#define bflag optlist[13].val -#define uflag optlist[14].val -#define privileged optlist[15].val -#define Tflag optlist[16].val -#define Pflag optlist[17].val -#define hflag optlist[18].val -#define nologflag optlist[19].val +#define eflag optval[0] +#define fflag optval[1] +#define Iflag optval[2] +#define iflag optval[3] +#define mflag optval[4] +#define nflag optval[5] +#define sflag optval[6] +#define xflag optval[7] +#define vflag optval[8] +#define Vflag optval[9] +#define Eflag optval[10] +#define Cflag optval[11] +#define aflag optval[12] +#define bflag optval[13] +#define uflag optval[14] +#define privileged optval[15] +#define Tflag optval[16] +#define Pflag optval[17] +#define hflag optval[18] +#define nologflag optval[19] #define NSHORTOPTS 19 #define NOPTS 20 -struct optent { - const char *name; - const char letter; - char val; -}; - -extern struct optent optlist[NOPTS]; +extern char optval[NOPTS]; +extern const char optletter[NSHORTOPTS]; #ifdef DEFINE_OPTIONS -struct optent optlist[NOPTS] = { - { "errexit", 'e', 0 }, - { "noglob", 'f', 0 }, - { "ignoreeof", 'I', 0 }, - { "interactive",'i', 0 }, - { "monitor", 'm', 0 }, - { "noexec", 'n', 0 }, - { "stdin", 's', 0 }, - { "xtrace", 'x', 0 }, - { "verbose", 'v', 0 }, - { "vi", 'V', 0 }, - { "emacs", 'E', 0 }, - { "noclobber", 'C', 0 }, - { "allexport", 'a', 0 }, - { "notify", 'b', 0 }, - { "nounset", 'u', 0 }, - { "privileged", 'p', 0 }, - { "trapsasync", 'T', 0 }, - { "physical", 'P', 0 }, - { "trackall", 'h', 0 }, - { "nolog", '\0', 0 }, -}; +char optval[NOPTS]; +const char optletter[NSHORTOPTS] = "efIimnsxvVECabupTPh"; +static const unsigned char optname[] = + "\007errexit" + "\006noglob" + "\011ignoreeof" + "\013interactive" + "\007monitor" + "\006noexec" + "\005stdin" + "\006xtrace" + "\007verbose" + "\002vi" + "\005emacs" + "\011noclobber" + "\011allexport" + "\006notify" + "\007nounset" + "\012privileged" + "\012trapsasync" + "\010physical" + "\010trackall" + "\005nolog" +; #endif extern char *minusc; /* argument to -c option */ extern char *arg0; /* $0 */ extern struct shparam shellparam; /* $@ */ extern char **argptr; /* argument list for builtin commands */ extern char *shoptarg; /* set by nextopt */ extern char *nextopt_optptr; /* used by nextopt */ void procargs(int, char **); void optschanged(void); void setparam(char **); void freeparam(struct shparam *); int nextopt(const char *); void getoptsreset(const char *); Index: projects/sendfile/bin/sh/var.c =================================================================== --- projects/sendfile/bin/sh/var.c (revision 293405) +++ projects/sendfile/bin/sh/var.c (revision 293406) @@ -1,954 +1,954 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)var.c 8.3 (Berkeley) 5/4/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include /* * Shell variables. */ #include #include #include "shell.h" #include "output.h" #include "expand.h" #include "nodes.h" /* for other headers */ #include "eval.h" /* defines cmdenviron */ #include "exec.h" #include "syntax.h" #include "options.h" #include "mail.h" #include "var.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "parser.h" #include "builtins.h" #ifndef NO_HISTORY #include "myhistedit.h" #endif #define VTABSIZE 39 struct varinit { struct var *var; int flags; const char *text; void (*func)(const char *); }; #ifndef NO_HISTORY struct var vhistsize; struct var vterm; #endif struct var vifs; struct var vmail; struct var vmpath; struct var vpath; struct var vps1; struct var vps2; struct var vps4; static struct var voptind; struct var vdisvfork; struct localvar *localvars; int forcelocal; static const struct varinit varinit[] = { #ifndef NO_HISTORY { &vhistsize, VUNSET, "HISTSIZE=", sethistsize }, #endif { &vifs, 0, "IFS= \t\n", NULL }, { &vmail, VUNSET, "MAIL=", NULL }, { &vmpath, VUNSET, "MAILPATH=", NULL }, { &vpath, 0, "PATH=" _PATH_DEFPATH, changepath }, /* * vps1 depends on uid */ { &vps2, 0, "PS2=> ", NULL }, { &vps4, 0, "PS4=+ ", NULL }, #ifndef NO_HISTORY { &vterm, VUNSET, "TERM=", setterm }, #endif { &voptind, 0, "OPTIND=1", getoptsreset }, { &vdisvfork, VUNSET, "SH_DISABLE_VFORK=", NULL }, { NULL, 0, NULL, NULL } }; static struct var *vartab[VTABSIZE]; static const char *const locale_names[7] = { "LC_COLLATE", "LC_CTYPE", "LC_MONETARY", "LC_NUMERIC", "LC_TIME", "LC_MESSAGES", NULL }; static const int locale_categories[7] = { LC_COLLATE, LC_CTYPE, LC_MONETARY, LC_NUMERIC, LC_TIME, LC_MESSAGES, 0 }; static int varequal(const char *, const char *); static struct var *find_var(const char *, struct var ***, int *); static int localevar(const char *); static void setvareq_const(const char *s, int flags); extern char **environ; /* * This routine initializes the builtin variables and imports the environment. * It is called when the shell is initialized. */ void initvar(void) { char ppid[20]; const struct varinit *ip; struct var *vp; struct var **vpp; char **envp; for (ip = varinit ; (vp = ip->var) != NULL ; ip++) { if (find_var(ip->text, &vpp, &vp->name_len) != NULL) continue; vp->next = *vpp; *vpp = vp; vp->text = __DECONST(char *, ip->text); vp->flags = ip->flags | VSTRFIXED | VTEXTFIXED; vp->func = ip->func; } /* * PS1 depends on uid */ if (find_var("PS1", &vpp, &vps1.name_len) == NULL) { vps1.next = *vpp; *vpp = &vps1; vps1.text = __DECONST(char *, geteuid() ? "PS1=$ " : "PS1=# "); vps1.flags = VSTRFIXED|VTEXTFIXED; } fmtstr(ppid, sizeof(ppid), "%d", (int)getppid()); setvarsafe("PPID", ppid, 0); for (envp = environ ; *envp ; envp++) { if (strchr(*envp, '=')) { setvareq(*envp, VEXPORT|VTEXTFIXED); } } setvareq_const("OPTIND=1", 0); } /* * Safe version of setvar, returns 1 on success 0 on failure. */ int setvarsafe(const char *name, const char *val, int flags) { struct jmploc jmploc; struct jmploc *const savehandler = handler; int err = 0; int inton; inton = is_int_on(); if (setjmp(jmploc.loc)) err = 1; else { handler = &jmploc; setvar(name, val, flags); } handler = savehandler; SETINTON(inton); return err; } /* * Set the value of a variable. The flags argument is stored with the * flags of the variable. If val is NULL, the variable is unset. */ void setvar(const char *name, const char *val, int flags) { const char *p; size_t len; size_t namelen; size_t vallen; char *nameeq; int isbad; isbad = 0; p = name; if (! is_name(*p)) isbad = 1; p++; for (;;) { if (! is_in_name(*p)) { if (*p == '\0' || *p == '=') break; isbad = 1; } p++; } namelen = p - name; if (isbad) error("%.*s: bad variable name", (int)namelen, name); len = namelen + 2; /* 2 is space for '=' and '\0' */ if (val == NULL) { flags |= VUNSET; vallen = 0; } else { vallen = strlen(val); len += vallen; } INTOFF; nameeq = ckmalloc(len); memcpy(nameeq, name, namelen); nameeq[namelen] = '='; if (val) memcpy(nameeq + namelen + 1, val, vallen + 1); else nameeq[namelen + 1] = '\0'; setvareq(nameeq, flags); INTON; } static int localevar(const char *s) { const char *const *ss; if (*s != 'L') return 0; if (varequal(s + 1, "ANG")) return 1; if (strncmp(s + 1, "C_", 2) != 0) return 0; if (varequal(s + 3, "ALL")) return 1; for (ss = locale_names; *ss ; ss++) if (varequal(s + 3, *ss + 3)) return 1; return 0; } /* * Sets/unsets an environment variable from a pointer that may actually be a * pointer into environ where the string should not be manipulated. */ static void change_env(const char *s, int set) { char *eqp; char *ss; INTOFF; ss = savestr(s); if ((eqp = strchr(ss, '=')) != NULL) *eqp = '\0'; if (set && eqp != NULL) (void) setenv(ss, eqp + 1, 1); else (void) unsetenv(ss); ckfree(ss); INTON; return; } /* * Same as setvar except that the variable and value are passed in * the first argument as name=value. Since the first argument will * be actually stored in the table, it should not be a string that * will go away. */ void setvareq(char *s, int flags) { struct var *vp, **vpp; int nlen; if (aflag) flags |= VEXPORT; if (forcelocal && !(flags & (VNOSET | VNOLOCAL))) mklocal(s); vp = find_var(s, &vpp, &nlen); if (vp != NULL) { if (vp->flags & VREADONLY) { if ((flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(s); error("%.*s: is read only", vp->name_len, vp->text); } if (flags & VNOSET) { if ((flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(s); return; } INTOFF; if (vp->func && (flags & VNOFUNC) == 0) (*vp->func)(s + vp->name_len + 1); if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(vp->text); vp->flags &= ~(VTEXTFIXED|VSTACK|VUNSET); vp->flags |= flags; vp->text = s; /* * We could roll this to a function, to handle it as * a regular variable function callback, but why bother? * * Note: this assumes iflag is not set to 1 initially. * As part of initvar(), this is called before arguments * are looked at. */ if ((vp == &vmpath || (vp == &vmail && ! mpathset())) && iflag == 1) chkmail(1); if ((vp->flags & VEXPORT) && localevar(s)) { change_env(s, 1); (void) setlocale(LC_ALL, ""); updatecharset(); } INTON; return; } /* not found */ if (flags & VNOSET) { if ((flags & (VTEXTFIXED|VSTACK)) == 0) ckfree(s); return; } INTOFF; vp = ckmalloc(sizeof (*vp)); vp->flags = flags; vp->text = s; vp->name_len = nlen; vp->next = *vpp; vp->func = NULL; *vpp = vp; if ((vp->flags & VEXPORT) && localevar(s)) { change_env(s, 1); (void) setlocale(LC_ALL, ""); updatecharset(); } INTON; } static void setvareq_const(const char *s, int flags) { setvareq(__DECONST(char *, s), flags | VTEXTFIXED); } /* * Process a linked list of variable assignments. */ void listsetvar(struct arglist *list, int flags) { int i; INTOFF; for (i = 0; i < list->count; i++) setvareq(savestr(list->args[i]), flags); INTON; } /* * Find the value of a variable. Returns NULL if not set. */ char * lookupvar(const char *name) { struct var *v; v = find_var(name, NULL, NULL); if (v == NULL || v->flags & VUNSET) return NULL; return v->text + v->name_len + 1; } /* * Search the environment of a builtin command. If the second argument * is nonzero, return the value of a variable even if it hasn't been * exported. */ char * bltinlookup(const char *name, int doall) { struct var *v; char *result; int i; result = NULL; if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) { if (varequal(cmdenviron->args[i], name)) result = strchr(cmdenviron->args[i], '=') + 1; } if (result != NULL) return result; v = find_var(name, NULL, NULL); if (v == NULL || v->flags & VUNSET || (!doall && (v->flags & VEXPORT) == 0)) return NULL; return v->text + v->name_len + 1; } /* * Set up locale for a builtin (LANG/LC_* assignments). */ void bltinsetlocale(void) { int act = 0; char *loc, *locdef; int i; if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) { if (localevar(cmdenviron->args[i])) { act = 1; break; } } if (!act) return; loc = bltinlookup("LC_ALL", 0); INTOFF; if (loc != NULL) { setlocale(LC_ALL, loc); INTON; updatecharset(); return; } locdef = bltinlookup("LANG", 0); for (i = 0; locale_names[i] != NULL; i++) { loc = bltinlookup(locale_names[i], 0); if (loc == NULL) loc = locdef; if (loc != NULL) setlocale(locale_categories[i], loc); } INTON; updatecharset(); } /* * Undo the effect of bltinlocaleset(). */ void bltinunsetlocale(void) { int i; INTOFF; if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) { if (localevar(cmdenviron->args[i])) { setlocale(LC_ALL, ""); updatecharset(); return; } } INTON; } /* * Update the localeisutf8 flag. */ void updatecharset(void) { char *charset; charset = nl_langinfo(CODESET); localeisutf8 = !strcmp(charset, "UTF-8"); } void initcharset(void) { updatecharset(); initial_localeisutf8 = localeisutf8; } /* * Generate a list of exported variables. This routine is used to construct * the third argument to execve when executing a program. */ char ** environment(void) { int nenv; struct var **vpp; struct var *vp; char **env, **ep; nenv = 0; for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) { for (vp = *vpp ; vp ; vp = vp->next) if (vp->flags & VEXPORT) nenv++; } ep = env = stalloc((nenv + 1) * sizeof *env); for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) { for (vp = *vpp ; vp ; vp = vp->next) if (vp->flags & VEXPORT) *ep++ = vp->text; } *ep = NULL; return env; } static int var_compare(const void *a, const void *b) { const char *const *sa, *const *sb; sa = a; sb = b; /* * This compares two var=value strings which creates a different * order from what you would probably expect. POSIX is somewhat * ambiguous on what should be sorted exactly. */ return strcoll(*sa, *sb); } /* * Command to list all variables which are set. This is invoked from the * set command when it is called without any options or operands. */ int showvarscmd(int argc __unused, char **argv __unused) { struct var **vpp; struct var *vp; const char *s; const char **vars; int i, n; /* * POSIX requires us to sort the variables. */ n = 0; for (vpp = vartab; vpp < vartab + VTABSIZE; vpp++) { for (vp = *vpp; vp; vp = vp->next) { if (!(vp->flags & VUNSET)) n++; } } INTOFF; vars = ckmalloc(n * sizeof(*vars)); i = 0; for (vpp = vartab; vpp < vartab + VTABSIZE; vpp++) { for (vp = *vpp; vp; vp = vp->next) { if (!(vp->flags & VUNSET)) vars[i++] = vp->text; } } qsort(vars, n, sizeof(*vars), var_compare); for (i = 0; i < n; i++) { /* * Skip improper variable names so the output remains usable as * shell input. */ if (!isassignment(vars[i])) continue; s = strchr(vars[i], '='); s++; outbin(vars[i], s - vars[i], out1); out1qstr(s); out1c('\n'); } ckfree(vars); INTON; return 0; } /* * The export and readonly commands. */ int exportcmd(int argc __unused, char **argv) { struct var **vpp; struct var *vp; char **ap; char *name; char *p; char *cmdname; int ch, values; int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT; cmdname = argv[0]; values = 0; while ((ch = nextopt("p")) != '\0') { switch (ch) { case 'p': values = 1; break; } } if (values && *argptr != NULL) error("-p requires no arguments"); if (*argptr != NULL) { for (ap = argptr; (name = *ap) != NULL; ap++) { if ((p = strchr(name, '=')) != NULL) { p++; } else { vp = find_var(name, NULL, NULL); if (vp != NULL) { vp->flags |= flag; if ((vp->flags & VEXPORT) && localevar(vp->text)) { change_env(vp->text, 1); (void) setlocale(LC_ALL, ""); updatecharset(); } continue; } } setvar(name, p, flag); } } else { for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) { for (vp = *vpp ; vp ; vp = vp->next) { if (vp->flags & flag) { if (values) { /* * Skip improper variable names * so the output remains usable * as shell input. */ if (!isassignment(vp->text)) continue; out1str(cmdname); out1c(' '); } if (values && !(vp->flags & VUNSET)) { outbin(vp->text, vp->name_len + 1, out1); out1qstr(vp->text + vp->name_len + 1); } else outbin(vp->text, vp->name_len, out1); out1c('\n'); } } } } return 0; } /* * The "local" command. */ int localcmd(int argc __unused, char **argv __unused) { char *name; nextopt(""); if (! in_function()) error("Not in a function"); while ((name = *argptr++) != NULL) { mklocal(name); } return 0; } /* * Make a variable a local variable. When a variable is made local, it's * value and flags are saved in a localvar structure. The saved values * will be restored when the shell function returns. We handle the name * "-" as a special case. */ void mklocal(char *name) { struct localvar *lvp; struct var **vpp; struct var *vp; INTOFF; lvp = ckmalloc(sizeof (struct localvar)); if (name[0] == '-' && name[1] == '\0') { - lvp->text = ckmalloc(sizeof optlist); - memcpy(lvp->text, optlist, sizeof optlist); + lvp->text = ckmalloc(sizeof optval); + memcpy(lvp->text, optval, sizeof optval); vp = NULL; } else { vp = find_var(name, &vpp, NULL); if (vp == NULL) { if (strchr(name, '=')) setvareq(savestr(name), VSTRFIXED | VNOLOCAL); else setvar(name, NULL, VSTRFIXED | VNOLOCAL); vp = *vpp; /* the new variable */ lvp->text = NULL; lvp->flags = VUNSET; } else { lvp->text = vp->text; lvp->flags = vp->flags; vp->flags |= VSTRFIXED|VTEXTFIXED; if (name[vp->name_len] == '=') setvareq(savestr(name), VNOLOCAL); } } lvp->vp = vp; lvp->next = localvars; localvars = lvp; INTON; } /* * Called after a function returns. */ void poplocalvars(void) { struct localvar *lvp; struct var *vp; INTOFF; while ((lvp = localvars) != NULL) { localvars = lvp->next; vp = lvp->vp; if (vp == NULL) { /* $- saved */ - memcpy(optlist, lvp->text, sizeof optlist); + memcpy(optval, lvp->text, sizeof optval); ckfree(lvp->text); optschanged(); } else if ((lvp->flags & (VUNSET|VSTRFIXED)) == VUNSET) { (void)unsetvar(vp->text); } else { if ((vp->flags & VTEXTFIXED) == 0) ckfree(vp->text); vp->flags = lvp->flags; vp->text = lvp->text; } ckfree(lvp); } INTON; } int setvarcmd(int argc, char **argv) { if (argc <= 2) return unsetcmd(argc, argv); else if (argc == 3) setvar(argv[1], argv[2], 0); else error("too many arguments"); return 0; } /* * The unset builtin command. */ int unsetcmd(int argc __unused, char **argv __unused) { char **ap; int i; int flg_func = 0; int flg_var = 0; int ret = 0; while ((i = nextopt("vf")) != '\0') { if (i == 'f') flg_func = 1; else flg_var = 1; } if (flg_func == 0 && flg_var == 0) flg_var = 1; INTOFF; for (ap = argptr; *ap ; ap++) { if (flg_func) ret |= unsetfunc(*ap); if (flg_var) ret |= unsetvar(*ap); } INTON; return ret; } /* * Unset the specified variable. * Called with interrupts off. */ int unsetvar(const char *s) { struct var **vpp; struct var *vp; vp = find_var(s, &vpp, NULL); if (vp == NULL) return (0); if (vp->flags & VREADONLY) return (1); if (vp->text[vp->name_len + 1] != '\0') setvar(s, "", 0); if ((vp->flags & VEXPORT) && localevar(vp->text)) { change_env(s, 0); setlocale(LC_ALL, ""); updatecharset(); } vp->flags &= ~VEXPORT; vp->flags |= VUNSET; if ((vp->flags & VSTRFIXED) == 0) { if ((vp->flags & VTEXTFIXED) == 0) ckfree(vp->text); *vpp = vp->next; ckfree(vp); } return (0); } /* * Returns true if the two strings specify the same variable. The first * variable name is terminated by '='; the second may be terminated by * either '=' or '\0'. */ static int varequal(const char *p, const char *q) { while (*p == *q++) { if (*p++ == '=') return 1; } if (*p == '=' && *(q - 1) == '\0') return 1; return 0; } /* * Search for a variable. * 'name' may be terminated by '=' or a NUL. * vppp is set to the pointer to vp, or the list head if vp isn't found * lenp is set to the number of characters in 'name' */ static struct var * find_var(const char *name, struct var ***vppp, int *lenp) { unsigned int hashval; int len; struct var *vp, **vpp; const char *p = name; hashval = 0; while (*p && *p != '=') hashval = 2 * hashval + (unsigned char)*p++; len = p - name; if (lenp) *lenp = len; vpp = &vartab[hashval % VTABSIZE]; if (vppp) *vppp = vpp; for (vp = *vpp ; vp ; vpp = &vp->next, vp = *vpp) { if (vp->name_len != len) continue; if (memcmp(vp->text, name, len) != 0) continue; if (vppp) *vppp = vpp; return vp; } return NULL; } Index: projects/sendfile/libexec/rtld-elf/rtld.c =================================================================== --- projects/sendfile/libexec/rtld-elf/rtld.c (revision 293405) +++ projects/sendfile/libexec/rtld-elf/rtld.c (revision 293406) @@ -1,5119 +1,5117 @@ /*- * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra. * Copyright 2003 Alexander Kabaev . * Copyright 2009-2012 Konstantin Belousov . * Copyright 2012 John Marino . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * Dynamic linker for ELF. * * John Polstra . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "debug.h" #include "rtld.h" #include "libmap.h" #include "paths.h" #include "rtld_tls.h" #include "rtld_printf.h" #include "notes.h" /* Types. */ typedef void (*func_ptr_type)(); typedef void * (*path_enum_proc) (const char *path, size_t len, void *arg); /* * Function declarations. */ static const char *basename(const char *); static void digest_dynamic1(Obj_Entry *, int, const Elf_Dyn **, const Elf_Dyn **, const Elf_Dyn **); static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *, const Elf_Dyn *); static void digest_dynamic(Obj_Entry *, int); static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *); static Obj_Entry *dlcheck(void *); static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate); static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int); static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *); static bool donelist_check(DoneList *, const Obj_Entry *); static void errmsg_restore(char *); static char *errmsg_save(void); static void *fill_search_info(const char *, size_t, void *); static char *find_library(const char *, const Obj_Entry *, int *); static const char *gethints(bool); static void init_dag(Obj_Entry *); static void init_pagesizes(Elf_Auxinfo **aux_info); static void init_rtld(caddr_t, Elf_Auxinfo **); static void initlist_add_neededs(Needed_Entry *, Objlist *); static void initlist_add_objects(Obj_Entry *, Obj_Entry **, Objlist *); static void linkmap_add(Obj_Entry *); static void linkmap_delete(Obj_Entry *); static void load_filtees(Obj_Entry *, int flags, RtldLockState *); static void unload_filtees(Obj_Entry *); static int load_needed_objects(Obj_Entry *, int); static int load_preload_objects(void); static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *, int); static void map_stacks_exec(RtldLockState *); static Obj_Entry *obj_from_addr(const void *); static void objlist_call_fini(Objlist *, Obj_Entry *, RtldLockState *); static void objlist_call_init(Objlist *, RtldLockState *); static void objlist_clear(Objlist *); static Objlist_Entry *objlist_find(Objlist *, const Obj_Entry *); static void objlist_init(Objlist *); static void objlist_push_head(Objlist *, Obj_Entry *); static void objlist_push_tail(Objlist *, Obj_Entry *); static void objlist_put_after(Objlist *, Obj_Entry *, Obj_Entry *); static void objlist_remove(Objlist *, Obj_Entry *); static int parse_libdir(const char *); static void *path_enumerate(const char *, path_enum_proc, void *); static int relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate); static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate); static int relocate_objects(Obj_Entry *, bool, Obj_Entry *, int, RtldLockState *); static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, RtldLockState *lockstate); static int rtld_dirname(const char *, char *); static int rtld_dirname_abs(const char *, char *); static void *rtld_dlopen(const char *name, int fd, int mode); static void rtld_exit(void); static char *search_library_path(const char *, const char *); static char *search_library_pathfds(const char *, const char *, int *); static const void **get_program_var_addr(const char *, RtldLockState *); static void set_program_var(const char *, const void *); static int symlook_default(SymLook *, const Obj_Entry *refobj); static int symlook_global(SymLook *, DoneList *); static void symlook_init_from_req(SymLook *, const SymLook *); static int symlook_list(SymLook *, const Objlist *, DoneList *); static int symlook_needed(SymLook *, const Needed_Entry *, DoneList *); static int symlook_obj1_sysv(SymLook *, const Obj_Entry *); static int symlook_obj1_gnu(SymLook *, const Obj_Entry *); static void trace_loaded_objects(Obj_Entry *); static void unlink_object(Obj_Entry *); static void unload_object(Obj_Entry *); static void unref_dag(Obj_Entry *); static void ref_dag(Obj_Entry *); static char *origin_subst_one(Obj_Entry *, char *, const char *, const char *, bool); static char *origin_subst(Obj_Entry *, char *); static bool obj_resolve_origin(Obj_Entry *obj); static void preinit_main(void); static int rtld_verify_versions(const Objlist *); static int rtld_verify_object_versions(Obj_Entry *); static void object_add_name(Obj_Entry *, const char *); static int object_match_name(const Obj_Entry *, const char *); static void ld_utrace_log(int, void *, void *, size_t, int, const char *); static void rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info); static uint32_t gnu_hash(const char *); static bool matched_symbol(SymLook *, const Obj_Entry *, Sym_Match_Result *, const unsigned long); void r_debug_state(struct r_debug *, struct link_map *) __noinline __exported; void _r_debug_postinit(struct link_map *) __noinline __exported; int __sys_openat(int, const char *, int, ...); /* * Data declarations. */ static char *error_message; /* Message for dlerror(), or NULL */ struct r_debug r_debug __exported; /* for GDB; */ static bool libmap_disable; /* Disable libmap */ static bool ld_loadfltr; /* Immediate filters processing */ static char *libmap_override; /* Maps to use in addition to libmap.conf */ static bool trust; /* False for setuid and setgid programs */ static bool dangerous_ld_env; /* True if environment variables have been used to affect the libraries loaded */ static char *ld_bind_now; /* Environment variable for immediate binding */ static char *ld_debug; /* Environment variable for debugging */ static char *ld_library_path; /* Environment variable for search path */ static char *ld_library_dirs; /* Environment variable for library descriptors */ static char *ld_preload; /* Environment variable for libraries to load first */ static char *ld_elf_hints_path; /* Environment variable for alternative hints path */ static char *ld_tracing; /* Called from ldd to print libs */ static char *ld_utrace; /* Use utrace() to log events. */ static Obj_Entry *obj_list; /* Head of linked list of shared objects */ static Obj_Entry **obj_tail; /* Link field of last object in list */ static Obj_Entry *obj_main; /* The main program shared object */ static Obj_Entry obj_rtld; /* The dynamic linker shared object */ static unsigned int obj_count; /* Number of objects in obj_list */ static unsigned int obj_loads; /* Number of objects in obj_list */ static Objlist list_global = /* Objects dlopened with RTLD_GLOBAL */ STAILQ_HEAD_INITIALIZER(list_global); static Objlist list_main = /* Objects loaded at program startup */ STAILQ_HEAD_INITIALIZER(list_main); static Objlist list_fini = /* Objects needing fini() calls */ STAILQ_HEAD_INITIALIZER(list_fini); Elf_Sym sym_zero; /* For resolving undefined weak refs. */ #define GDB_STATE(s,m) r_debug.r_state = s; r_debug_state(&r_debug,m); extern Elf_Dyn _DYNAMIC; #pragma weak _DYNAMIC #ifndef RTLD_IS_DYNAMIC #define RTLD_IS_DYNAMIC() (&_DYNAMIC != NULL) #endif int dlclose(void *) __exported; char *dlerror(void) __exported; void *dlopen(const char *, int) __exported; void *fdlopen(int, int) __exported; void *dlsym(void *, const char *) __exported; dlfunc_t dlfunc(void *, const char *) __exported; void *dlvsym(void *, const char *, const char *) __exported; int dladdr(const void *, Dl_info *) __exported; void dllockinit(void *, void *(*)(void *), void (*)(void *), void (*)(void *), void (*)(void *), void (*)(void *), void (*)(void *)) __exported; int dlinfo(void *, int , void *) __exported; int dl_iterate_phdr(__dl_iterate_hdr_callback, void *) __exported; int _rtld_addr_phdr(const void *, struct dl_phdr_info *) __exported; int _rtld_get_stack_prot(void) __exported; int _rtld_is_dlopened(void *) __exported; void _rtld_error(const char *, ...) __exported; int npagesizes, osreldate; size_t *pagesizes; long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0}; static int stack_prot = PROT_READ | PROT_WRITE | RTLD_DEFAULT_STACK_EXEC; static int max_stack_flags; /* * Global declarations normally provided by crt1. The dynamic linker is * not built with crt1, so we have to provide them ourselves. */ char *__progname; char **environ; /* * Used to pass argc, argv to init functions. */ int main_argc; char **main_argv; /* * Globals to control TLS allocation. */ size_t tls_last_offset; /* Static TLS offset of last module */ size_t tls_last_size; /* Static TLS size of last module */ size_t tls_static_space; /* Static TLS space allocated */ size_t tls_static_max_align; int tls_dtv_generation = 1; /* Used to detect when dtv size changes */ int tls_max_index = 1; /* Largest module index allocated */ bool ld_library_path_rpath = false; /* * Globals for path names, and such */ char *ld_elf_hints_default = _PATH_ELF_HINTS; char *ld_path_libmap_conf = _PATH_LIBMAP_CONF; char *ld_path_rtld = _PATH_RTLD; char *ld_standard_library_path = STANDARD_LIBRARY_PATH; char *ld_env_prefix = LD_; /* * Fill in a DoneList with an allocation large enough to hold all of * the currently-loaded objects. Keep this as a macro since it calls * alloca and we want that to occur within the scope of the caller. */ #define donelist_init(dlp) \ ((dlp)->objs = alloca(obj_count * sizeof (dlp)->objs[0]), \ assert((dlp)->objs != NULL), \ (dlp)->num_alloc = obj_count, \ (dlp)->num_used = 0) #define UTRACE_DLOPEN_START 1 #define UTRACE_DLOPEN_STOP 2 #define UTRACE_DLCLOSE_START 3 #define UTRACE_DLCLOSE_STOP 4 #define UTRACE_LOAD_OBJECT 5 #define UTRACE_UNLOAD_OBJECT 6 #define UTRACE_ADD_RUNDEP 7 #define UTRACE_PRELOAD_FINISHED 8 #define UTRACE_INIT_CALL 9 #define UTRACE_FINI_CALL 10 #define UTRACE_DLSYM_START 11 #define UTRACE_DLSYM_STOP 12 struct utrace_rtld { char sig[4]; /* 'RTLD' */ int event; void *handle; void *mapbase; /* Used for 'parent' and 'init/fini' */ size_t mapsize; int refcnt; /* Used for 'mode' */ char name[MAXPATHLEN]; }; #define LD_UTRACE(e, h, mb, ms, r, n) do { \ if (ld_utrace != NULL) \ ld_utrace_log(e, h, mb, ms, r, n); \ } while (0) static void ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize, int refcnt, const char *name) { struct utrace_rtld ut; ut.sig[0] = 'R'; ut.sig[1] = 'T'; ut.sig[2] = 'L'; ut.sig[3] = 'D'; ut.event = event; ut.handle = handle; ut.mapbase = mapbase; ut.mapsize = mapsize; ut.refcnt = refcnt; bzero(ut.name, sizeof(ut.name)); if (name) strlcpy(ut.name, name, sizeof(ut.name)); utrace(&ut, sizeof(ut)); } #ifdef RTLD_VARIANT_ENV_NAMES /* * construct the env variable based on the type of binary that's * running. */ static inline const char * _LD(const char *var) { static char buffer[128]; strlcpy(buffer, ld_env_prefix, sizeof(buffer)); strlcat(buffer, var, sizeof(buffer)); return (buffer); } #else #define _LD(x) LD_ x #endif /* * Main entry point for dynamic linking. The first argument is the * stack pointer. The stack is expected to be laid out as described * in the SVR4 ABI specification, Intel 386 Processor Supplement. * Specifically, the stack pointer points to a word containing * ARGC. Following that in the stack is a null-terminated sequence * of pointers to argument strings. Then comes a null-terminated * sequence of pointers to environment strings. Finally, there is a * sequence of "auxiliary vector" entries. * * The second argument points to a place to store the dynamic linker's * exit procedure pointer and the third to a place to store the main * program's object. * * The return value is the main program's entry point. */ func_ptr_type _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) { Elf_Auxinfo *aux_info[AT_COUNT]; int i; int argc; char **argv; char **env; Elf_Auxinfo *aux; Elf_Auxinfo *auxp; const char *argv0; Objlist_Entry *entry; Obj_Entry *obj; Obj_Entry **preload_tail; Obj_Entry *last_interposer; Objlist initlist; RtldLockState lockstate; char *library_path_rpath; int mib[2]; size_t len; /* * On entry, the dynamic linker itself has not been relocated yet. * Be very careful not to reference any global data until after * init_rtld has returned. It is OK to reference file-scope statics * and string constants, and to call static and global functions. */ /* Find the auxiliary vector on the stack. */ argc = *sp++; argv = (char **) sp; sp += argc + 1; /* Skip over arguments and NULL terminator */ env = (char **) sp; while (*sp++ != 0) /* Skip over environment, and NULL terminator */ ; aux = (Elf_Auxinfo *) sp; /* Digest the auxiliary vector. */ for (i = 0; i < AT_COUNT; i++) aux_info[i] = NULL; for (auxp = aux; auxp->a_type != AT_NULL; auxp++) { if (auxp->a_type < AT_COUNT) aux_info[auxp->a_type] = auxp; } /* Initialize and relocate ourselves. */ assert(aux_info[AT_BASE] != NULL); init_rtld((caddr_t) aux_info[AT_BASE]->a_un.a_ptr, aux_info); __progname = obj_rtld.path; argv0 = argv[0] != NULL ? argv[0] : "(null)"; environ = env; main_argc = argc; main_argv = argv; if (aux_info[AT_CANARY] != NULL && aux_info[AT_CANARY]->a_un.a_ptr != NULL) { i = aux_info[AT_CANARYLEN]->a_un.a_val; if (i > sizeof(__stack_chk_guard)) i = sizeof(__stack_chk_guard); memcpy(__stack_chk_guard, aux_info[AT_CANARY]->a_un.a_ptr, i); } else { mib[0] = CTL_KERN; mib[1] = KERN_ARND; len = sizeof(__stack_chk_guard); if (sysctl(mib, 2, __stack_chk_guard, &len, NULL, 0) == -1 || len != sizeof(__stack_chk_guard)) { /* If sysctl was unsuccessful, use the "terminator canary". */ ((unsigned char *)(void *)__stack_chk_guard)[0] = 0; ((unsigned char *)(void *)__stack_chk_guard)[1] = 0; ((unsigned char *)(void *)__stack_chk_guard)[2] = '\n'; ((unsigned char *)(void *)__stack_chk_guard)[3] = 255; } } trust = !issetugid(); /* md_abi_variant_hook(aux_info); */ ld_bind_now = getenv(_LD("BIND_NOW")); /* * If the process is tainted, then we un-set the dangerous environment * variables. The process will be marked as tainted until setuid(2) * is called. If any child process calls setuid(2) we do not want any * future processes to honor the potentially un-safe variables. */ if (!trust) { if (unsetenv(_LD("PRELOAD")) || unsetenv(_LD("LIBMAP")) || unsetenv(_LD("LIBRARY_PATH")) || unsetenv(_LD("LIBRARY_PATH_FDS")) || unsetenv(_LD("LIBMAP_DISABLE")) || unsetenv(_LD("DEBUG")) || unsetenv(_LD("ELF_HINTS_PATH")) || unsetenv(_LD("LOADFLTR")) || unsetenv(_LD("LIBRARY_PATH_RPATH"))) { _rtld_error("environment corrupt; aborting"); rtld_die(); } } ld_debug = getenv(_LD("DEBUG")); libmap_disable = getenv(_LD("LIBMAP_DISABLE")) != NULL; libmap_override = getenv(_LD("LIBMAP")); ld_library_path = getenv(_LD("LIBRARY_PATH")); ld_library_dirs = getenv(_LD("LIBRARY_PATH_FDS")); ld_preload = getenv(_LD("PRELOAD")); ld_elf_hints_path = getenv(_LD("ELF_HINTS_PATH")); ld_loadfltr = getenv(_LD("LOADFLTR")) != NULL; library_path_rpath = getenv(_LD("LIBRARY_PATH_RPATH")); if (library_path_rpath != NULL) { if (library_path_rpath[0] == 'y' || library_path_rpath[0] == 'Y' || library_path_rpath[0] == '1') ld_library_path_rpath = true; else ld_library_path_rpath = false; } dangerous_ld_env = libmap_disable || (libmap_override != NULL) || (ld_library_path != NULL) || (ld_preload != NULL) || (ld_elf_hints_path != NULL) || ld_loadfltr; ld_tracing = getenv(_LD("TRACE_LOADED_OBJECTS")); ld_utrace = getenv(_LD("UTRACE")); if ((ld_elf_hints_path == NULL) || strlen(ld_elf_hints_path) == 0) ld_elf_hints_path = ld_elf_hints_default; if (ld_debug != NULL && *ld_debug != '\0') debug = 1; dbg("%s is initialized, base address = %p", __progname, (caddr_t) aux_info[AT_BASE]->a_un.a_ptr); dbg("RTLD dynamic = %p", obj_rtld.dynamic); dbg("RTLD pltgot = %p", obj_rtld.pltgot); dbg("initializing thread locks"); lockdflt_init(); /* * Load the main program, or process its program header if it is * already loaded. */ if (aux_info[AT_EXECFD] != NULL) { /* Load the main program. */ int fd = aux_info[AT_EXECFD]->a_un.a_val; dbg("loading main program"); obj_main = map_object(fd, argv0, NULL); close(fd); if (obj_main == NULL) rtld_die(); max_stack_flags = obj->stack_flags; } else { /* Main program already loaded. */ const Elf_Phdr *phdr; int phnum; caddr_t entry; dbg("processing main program's program header"); assert(aux_info[AT_PHDR] != NULL); phdr = (const Elf_Phdr *) aux_info[AT_PHDR]->a_un.a_ptr; assert(aux_info[AT_PHNUM] != NULL); phnum = aux_info[AT_PHNUM]->a_un.a_val; assert(aux_info[AT_PHENT] != NULL); assert(aux_info[AT_PHENT]->a_un.a_val == sizeof(Elf_Phdr)); assert(aux_info[AT_ENTRY] != NULL); entry = (caddr_t) aux_info[AT_ENTRY]->a_un.a_ptr; if ((obj_main = digest_phdr(phdr, phnum, entry, argv0)) == NULL) rtld_die(); } if (aux_info[AT_EXECPATH] != 0) { char *kexecpath; char buf[MAXPATHLEN]; kexecpath = aux_info[AT_EXECPATH]->a_un.a_ptr; dbg("AT_EXECPATH %p %s", kexecpath, kexecpath); if (kexecpath[0] == '/') obj_main->path = kexecpath; else if (getcwd(buf, sizeof(buf)) == NULL || strlcat(buf, "/", sizeof(buf)) >= sizeof(buf) || strlcat(buf, kexecpath, sizeof(buf)) >= sizeof(buf)) obj_main->path = xstrdup(argv0); else obj_main->path = xstrdup(buf); } else { dbg("No AT_EXECPATH"); obj_main->path = xstrdup(argv0); } dbg("obj_main path %s", obj_main->path); obj_main->mainprog = true; if (aux_info[AT_STACKPROT] != NULL && aux_info[AT_STACKPROT]->a_un.a_val != 0) stack_prot = aux_info[AT_STACKPROT]->a_un.a_val; #ifndef COMPAT_32BIT /* * Get the actual dynamic linker pathname from the executable if * possible. (It should always be possible.) That ensures that * gdb will find the right dynamic linker even if a non-standard * one is being used. */ if (obj_main->interp != NULL && strcmp(obj_main->interp, obj_rtld.path) != 0) { free(obj_rtld.path); obj_rtld.path = xstrdup(obj_main->interp); __progname = obj_rtld.path; } #endif digest_dynamic(obj_main, 0); dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj_main->path, obj_main->valid_hash_sysv, obj_main->valid_hash_gnu, obj_main->dynsymcount); linkmap_add(obj_main); linkmap_add(&obj_rtld); /* Link the main program into the list of objects. */ *obj_tail = obj_main; obj_tail = &obj_main->next; obj_count++; obj_loads++; /* Initialize a fake symbol for resolving undefined weak references. */ sym_zero.st_info = ELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); sym_zero.st_shndx = SHN_UNDEF; sym_zero.st_value = -(uintptr_t)obj_main->relocbase; if (!libmap_disable) libmap_disable = (bool)lm_init(libmap_override); dbg("loading LD_PRELOAD libraries"); if (load_preload_objects() == -1) rtld_die(); preload_tail = obj_tail; dbg("loading needed objects"); if (load_needed_objects(obj_main, 0) == -1) rtld_die(); /* Make a list of all objects loaded at startup. */ last_interposer = obj_main; for (obj = obj_list; obj != NULL; obj = obj->next) { if (obj->z_interpose && obj != obj_main) { objlist_put_after(&list_main, last_interposer, obj); last_interposer = obj; } else { objlist_push_tail(&list_main, obj); } obj->refcount++; } dbg("checking for required versions"); if (rtld_verify_versions(&list_main) == -1 && !ld_tracing) rtld_die(); if (ld_tracing) { /* We're done */ trace_loaded_objects(obj_main); exit(0); } if (getenv(_LD("DUMP_REL_PRE")) != NULL) { dump_relocations(obj_main); exit (0); } /* * Processing tls relocations requires having the tls offsets * initialized. Prepare offsets before starting initial * relocation processing. */ dbg("initializing initial thread local storage offsets"); STAILQ_FOREACH(entry, &list_main, link) { /* * Allocate all the initial objects out of the static TLS * block even if they didn't ask for it. */ allocate_tls_offset(entry->obj); } if (relocate_objects(obj_main, ld_bind_now != NULL && *ld_bind_now != '\0', &obj_rtld, SYMLOOK_EARLY, NULL) == -1) rtld_die(); dbg("doing copy relocations"); if (do_copy_relocations(obj_main) == -1) rtld_die(); if (getenv(_LD("DUMP_REL_POST")) != NULL) { dump_relocations(obj_main); exit (0); } /* * Setup TLS for main thread. This must be done after the * relocations are processed, since tls initialization section * might be the subject for relocations. */ dbg("initializing initial thread local storage"); allocate_initial_tls(obj_list); dbg("initializing key program variables"); set_program_var("__progname", argv[0] != NULL ? basename(argv[0]) : ""); set_program_var("environ", env); set_program_var("__elf_aux_vector", aux); /* Make a list of init functions to call. */ objlist_init(&initlist); initlist_add_objects(obj_list, preload_tail, &initlist); r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */ map_stacks_exec(NULL); dbg("resolving ifuncs"); if (resolve_objects_ifunc(obj_main, ld_bind_now != NULL && *ld_bind_now != '\0', SYMLOOK_EARLY, NULL) == -1) rtld_die(); if (!obj_main->crt_no_init) { /* * Make sure we don't call the main program's init and fini * functions for binaries linked with old crt1 which calls * _init itself. */ obj_main->init = obj_main->fini = (Elf_Addr)NULL; obj_main->preinit_array = obj_main->init_array = obj_main->fini_array = (Elf_Addr)NULL; } wlock_acquire(rtld_bind_lock, &lockstate); if (obj_main->crt_no_init) preinit_main(); objlist_call_init(&initlist, &lockstate); _r_debug_postinit(&obj_main->linkmap); objlist_clear(&initlist); dbg("loading filtees"); for (obj = obj_list->next; obj != NULL; obj = obj->next) { if (ld_loadfltr || obj->z_loadfltr) load_filtees(obj, 0, &lockstate); } lock_release(rtld_bind_lock, &lockstate); dbg("transferring control to program entry point = %p", obj_main->entry); /* Return the exit procedure and the program entry point. */ *exit_proc = rtld_exit; *objp = obj_main; return (func_ptr_type) obj_main->entry; } void * rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def) { void *ptr; Elf_Addr target; ptr = (void *)make_function_pointer(def, obj); target = ((Elf_Addr (*)(void))ptr)(); return ((void *)target); } Elf_Addr _rtld_bind(Obj_Entry *obj, Elf_Size reloff) { const Elf_Rel *rel; const Elf_Sym *def; const Obj_Entry *defobj; Elf_Addr *where; Elf_Addr target; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (obj->pltrel) rel = (const Elf_Rel *) ((caddr_t) obj->pltrel + reloff); else rel = (const Elf_Rel *) ((caddr_t) obj->pltrela + reloff); where = (Elf_Addr *) (obj->relocbase + rel->r_offset); def = find_symdef(ELF_R_SYM(rel->r_info), obj, &defobj, true, NULL, &lockstate); if (def == NULL) rtld_die(); if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) target = (Elf_Addr)rtld_resolve_ifunc(defobj, def); else target = (Elf_Addr)(defobj->relocbase + def->st_value); dbg("\"%s\" in \"%s\" ==> %p in \"%s\"", defobj->strtab + def->st_name, basename(obj->path), (void *)target, basename(defobj->path)); /* * Write the new contents for the jmpslot. Note that depending on * architecture, the value which we need to return back to the * lazy binding trampoline may or may not be the target * address. The value returned from reloc_jmpslot() is the value * that the trampoline needs. */ target = reloc_jmpslot(where, target, defobj, obj, rel); lock_release(rtld_bind_lock, &lockstate); return target; } /* * Error reporting function. Use it like printf. If formats the message * into a buffer, and sets things up so that the next call to dlerror() * will return the message. */ void _rtld_error(const char *fmt, ...) { static char buf[512]; va_list ap; va_start(ap, fmt); rtld_vsnprintf(buf, sizeof buf, fmt, ap); error_message = buf; va_end(ap); } /* * Return a dynamically-allocated copy of the current error message, if any. */ static char * errmsg_save(void) { return error_message == NULL ? NULL : xstrdup(error_message); } /* * Restore the current error message from a copy which was previously saved * by errmsg_save(). The copy is freed. */ static void errmsg_restore(char *saved_msg) { if (saved_msg == NULL) error_message = NULL; else { _rtld_error("%s", saved_msg); free(saved_msg); } } static const char * basename(const char *name) { const char *p = strrchr(name, '/'); return p != NULL ? p + 1 : name; } static struct utsname uts; static char * origin_subst_one(Obj_Entry *obj, char *real, const char *kw, const char *subst, bool may_free) { char *p, *p1, *res, *resp; int subst_len, kw_len, subst_count, old_len, new_len; kw_len = strlen(kw); /* * First, count the number of the keyword occurences, to * preallocate the final string. */ for (p = real, subst_count = 0;; p = p1 + kw_len, subst_count++) { p1 = strstr(p, kw); if (p1 == NULL) break; } /* * If the keyword is not found, just return. * * Return non-substituted string if resolution failed. We * cannot do anything more reasonable, the failure mode of the * caller is unresolved library anyway. */ if (subst_count == 0 || (obj != NULL && !obj_resolve_origin(obj))) return (may_free ? real : xstrdup(real)); if (obj != NULL) subst = obj->origin_path; /* * There is indeed something to substitute. Calculate the * length of the resulting string, and allocate it. */ subst_len = strlen(subst); old_len = strlen(real); new_len = old_len + (subst_len - kw_len) * subst_count; res = xmalloc(new_len + 1); /* * Now, execute the substitution loop. */ for (p = real, resp = res, *resp = '\0';;) { p1 = strstr(p, kw); if (p1 != NULL) { /* Copy the prefix before keyword. */ memcpy(resp, p, p1 - p); resp += p1 - p; /* Keyword replacement. */ memcpy(resp, subst, subst_len); resp += subst_len; *resp = '\0'; p = p1 + kw_len; } else break; } /* Copy to the end of string and finish. */ strcat(resp, p); if (may_free) free(real); return (res); } static char * origin_subst(Obj_Entry *obj, char *real) { char *res1, *res2, *res3, *res4; if (obj == NULL || !trust) return (xstrdup(real)); if (uts.sysname[0] == '\0') { if (uname(&uts) != 0) { _rtld_error("utsname failed: %d", errno); return (NULL); } } res1 = origin_subst_one(obj, real, "$ORIGIN", NULL, false); res2 = origin_subst_one(NULL, res1, "$OSNAME", uts.sysname, true); res3 = origin_subst_one(NULL, res2, "$OSREL", uts.release, true); res4 = origin_subst_one(NULL, res3, "$PLATFORM", uts.machine, true); return (res4); } void rtld_die(void) { const char *msg = dlerror(); if (msg == NULL) msg = "Fatal error"; rtld_fdputstr(STDERR_FILENO, msg); rtld_fdputchar(STDERR_FILENO, '\n'); _exit(1); } /* * Process a shared object's DYNAMIC section, and save the important * information in its Obj_Entry structure. */ static void digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath, const Elf_Dyn **dyn_soname, const Elf_Dyn **dyn_runpath) { const Elf_Dyn *dynp; Needed_Entry **needed_tail = &obj->needed; Needed_Entry **needed_filtees_tail = &obj->needed_filtees; Needed_Entry **needed_aux_filtees_tail = &obj->needed_aux_filtees; const Elf_Hashelt *hashtab; const Elf32_Word *hashval; Elf32_Word bkt, nmaskwords; int bloom_size32; int plttype = DT_REL; *dyn_rpath = NULL; *dyn_soname = NULL; *dyn_runpath = NULL; obj->bind_now = false; for (dynp = obj->dynamic; dynp->d_tag != DT_NULL; dynp++) { switch (dynp->d_tag) { case DT_REL: obj->rel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_RELSZ: obj->relsize = dynp->d_un.d_val; break; case DT_RELENT: assert(dynp->d_un.d_val == sizeof(Elf_Rel)); break; case DT_JMPREL: obj->pltrel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_PLTRELSZ: obj->pltrelsize = dynp->d_un.d_val; break; case DT_RELA: obj->rela = (const Elf_Rela *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_RELASZ: obj->relasize = dynp->d_un.d_val; break; case DT_RELAENT: assert(dynp->d_un.d_val == sizeof(Elf_Rela)); break; case DT_PLTREL: plttype = dynp->d_un.d_val; assert(dynp->d_un.d_val == DT_REL || plttype == DT_RELA); break; case DT_SYMTAB: obj->symtab = (const Elf_Sym *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_SYMENT: assert(dynp->d_un.d_val == sizeof(Elf_Sym)); break; case DT_STRTAB: obj->strtab = (const char *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_STRSZ: obj->strsize = dynp->d_un.d_val; break; case DT_VERNEED: obj->verneed = (const Elf_Verneed *) (obj->relocbase + dynp->d_un.d_val); break; case DT_VERNEEDNUM: obj->verneednum = dynp->d_un.d_val; break; case DT_VERDEF: obj->verdef = (const Elf_Verdef *) (obj->relocbase + dynp->d_un.d_val); break; case DT_VERDEFNUM: obj->verdefnum = dynp->d_un.d_val; break; case DT_VERSYM: obj->versyms = (const Elf_Versym *)(obj->relocbase + dynp->d_un.d_val); break; case DT_HASH: { hashtab = (const Elf_Hashelt *)(obj->relocbase + dynp->d_un.d_ptr); obj->nbuckets = hashtab[0]; obj->nchains = hashtab[1]; obj->buckets = hashtab + 2; obj->chains = obj->buckets + obj->nbuckets; obj->valid_hash_sysv = obj->nbuckets > 0 && obj->nchains > 0 && obj->buckets != NULL; } break; case DT_GNU_HASH: { hashtab = (const Elf_Hashelt *)(obj->relocbase + dynp->d_un.d_ptr); obj->nbuckets_gnu = hashtab[0]; obj->symndx_gnu = hashtab[1]; nmaskwords = hashtab[2]; bloom_size32 = (__ELF_WORD_SIZE / 32) * nmaskwords; obj->maskwords_bm_gnu = nmaskwords - 1; obj->shift2_gnu = hashtab[3]; obj->bloom_gnu = (Elf_Addr *) (hashtab + 4); obj->buckets_gnu = hashtab + 4 + bloom_size32; obj->chain_zero_gnu = obj->buckets_gnu + obj->nbuckets_gnu - obj->symndx_gnu; /* Number of bitmask words is required to be power of 2 */ obj->valid_hash_gnu = powerof2(nmaskwords) && obj->nbuckets_gnu > 0 && obj->buckets_gnu != NULL; } break; case DT_NEEDED: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_tail = nep; needed_tail = &nep->next; } break; case DT_FILTER: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_filtees_tail = nep; needed_filtees_tail = &nep->next; } break; case DT_AUXILIARY: if (!obj->rtld) { Needed_Entry *nep = NEW(Needed_Entry); nep->name = dynp->d_un.d_val; nep->obj = NULL; nep->next = NULL; *needed_aux_filtees_tail = nep; needed_aux_filtees_tail = &nep->next; } break; case DT_PLTGOT: obj->pltgot = (Elf_Addr *) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_TEXTREL: obj->textrel = true; break; case DT_SYMBOLIC: obj->symbolic = true; break; case DT_RPATH: /* * We have to wait until later to process this, because we * might not have gotten the address of the string table yet. */ *dyn_rpath = dynp; break; case DT_SONAME: *dyn_soname = dynp; break; case DT_RUNPATH: *dyn_runpath = dynp; break; case DT_INIT: obj->init = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_PREINIT_ARRAY: obj->preinit_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_PREINIT_ARRAYSZ: obj->preinit_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; case DT_INIT_ARRAY: obj->init_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_INIT_ARRAYSZ: obj->init_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; case DT_FINI: obj->fini = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; case DT_FINI_ARRAY: obj->fini_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr); break; case DT_FINI_ARRAYSZ: obj->fini_array_num = dynp->d_un.d_val / sizeof(Elf_Addr); break; /* * Don't process DT_DEBUG on MIPS as the dynamic section * is mapped read-only. DT_MIPS_RLD_MAP is used instead. */ +#ifndef __mips__ case DT_DEBUG: - if (!obj->writable_dynamic) - break; if (!early) dbg("Filling in DT_DEBUG entry"); ((Elf_Dyn*)dynp)->d_un.d_ptr = (Elf_Addr) &r_debug; break; +#endif case DT_FLAGS: if (dynp->d_un.d_val & DF_ORIGIN) obj->z_origin = true; if (dynp->d_un.d_val & DF_SYMBOLIC) obj->symbolic = true; if (dynp->d_un.d_val & DF_TEXTREL) obj->textrel = true; if (dynp->d_un.d_val & DF_BIND_NOW) obj->bind_now = true; /*if (dynp->d_un.d_val & DF_STATIC_TLS) ;*/ break; #ifdef __mips__ case DT_MIPS_LOCAL_GOTNO: obj->local_gotno = dynp->d_un.d_val; break; case DT_MIPS_SYMTABNO: obj->symtabno = dynp->d_un.d_val; break; case DT_MIPS_GOTSYM: obj->gotsym = dynp->d_un.d_val; break; case DT_MIPS_RLD_MAP: *((Elf_Addr *)(dynp->d_un.d_ptr)) = (Elf_Addr) &r_debug; break; #endif #ifdef __powerpc64__ case DT_PPC64_GLINK: obj->glink = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr); break; #endif case DT_FLAGS_1: if (dynp->d_un.d_val & DF_1_NOOPEN) obj->z_noopen = true; if (dynp->d_un.d_val & DF_1_ORIGIN) obj->z_origin = true; if (dynp->d_un.d_val & DF_1_GLOBAL) obj->z_global = true; if (dynp->d_un.d_val & DF_1_BIND_NOW) obj->bind_now = true; if (dynp->d_un.d_val & DF_1_NODELETE) obj->z_nodelete = true; if (dynp->d_un.d_val & DF_1_LOADFLTR) obj->z_loadfltr = true; if (dynp->d_un.d_val & DF_1_INTERPOSE) obj->z_interpose = true; if (dynp->d_un.d_val & DF_1_NODEFLIB) obj->z_nodeflib = true; break; default: if (!early) { dbg("Ignoring d_tag %ld = %#lx", (long)dynp->d_tag, (long)dynp->d_tag); } break; } } obj->traced = false; if (plttype == DT_RELA) { obj->pltrela = (const Elf_Rela *) obj->pltrel; obj->pltrel = NULL; obj->pltrelasize = obj->pltrelsize; obj->pltrelsize = 0; } /* Determine size of dynsym table (equal to nchains of sysv hash) */ if (obj->valid_hash_sysv) obj->dynsymcount = obj->nchains; else if (obj->valid_hash_gnu) { obj->dynsymcount = 0; for (bkt = 0; bkt < obj->nbuckets_gnu; bkt++) { if (obj->buckets_gnu[bkt] == 0) continue; hashval = &obj->chain_zero_gnu[obj->buckets_gnu[bkt]]; do obj->dynsymcount++; while ((*hashval++ & 1u) == 0); } obj->dynsymcount += obj->symndx_gnu; } } static bool obj_resolve_origin(Obj_Entry *obj) { if (obj->origin_path != NULL) return (true); obj->origin_path = xmalloc(PATH_MAX); return (rtld_dirname_abs(obj->path, obj->origin_path) != -1); } static void digest_dynamic2(Obj_Entry *obj, const Elf_Dyn *dyn_rpath, const Elf_Dyn *dyn_soname, const Elf_Dyn *dyn_runpath) { if (obj->z_origin && !obj_resolve_origin(obj)) rtld_die(); if (dyn_runpath != NULL) { obj->runpath = (char *)obj->strtab + dyn_runpath->d_un.d_val; obj->runpath = origin_subst(obj, obj->runpath); } else if (dyn_rpath != NULL) { obj->rpath = (char *)obj->strtab + dyn_rpath->d_un.d_val; obj->rpath = origin_subst(obj, obj->rpath); } if (dyn_soname != NULL) object_add_name(obj, obj->strtab + dyn_soname->d_un.d_val); } static void digest_dynamic(Obj_Entry *obj, int early) { const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; digest_dynamic1(obj, early, &dyn_rpath, &dyn_soname, &dyn_runpath); digest_dynamic2(obj, dyn_rpath, dyn_soname, dyn_runpath); } /* * Process a shared object's program header. This is used only for the * main program, when the kernel has already loaded the main program * into memory before calling the dynamic linker. It creates and * returns an Obj_Entry structure. */ static Obj_Entry * digest_phdr(const Elf_Phdr *phdr, int phnum, caddr_t entry, const char *path) { Obj_Entry *obj; const Elf_Phdr *phlimit = phdr + phnum; const Elf_Phdr *ph; Elf_Addr note_start, note_end; int nsegs = 0; obj = obj_new(); for (ph = phdr; ph < phlimit; ph++) { if (ph->p_type != PT_PHDR) continue; obj->phdr = phdr; obj->phsize = ph->p_memsz; obj->relocbase = (caddr_t)phdr - ph->p_vaddr; break; } obj->stack_flags = PF_X | PF_R | PF_W; for (ph = phdr; ph < phlimit; ph++) { switch (ph->p_type) { case PT_INTERP: obj->interp = (const char *)(ph->p_vaddr + obj->relocbase); break; case PT_LOAD: if (nsegs == 0) { /* First load segment */ obj->vaddrbase = trunc_page(ph->p_vaddr); obj->mapbase = obj->vaddrbase + obj->relocbase; obj->textsize = round_page(ph->p_vaddr + ph->p_memsz) - obj->vaddrbase; } else { /* Last load segment */ obj->mapsize = round_page(ph->p_vaddr + ph->p_memsz) - obj->vaddrbase; } nsegs++; break; case PT_DYNAMIC: - if (ph->p_flags & PROT_WRITE) - obj->writable_dynamic = true; obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase); break; case PT_TLS: obj->tlsindex = 1; obj->tlssize = ph->p_memsz; obj->tlsalign = ph->p_align; obj->tlsinitsize = ph->p_filesz; obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); break; case PT_GNU_STACK: obj->stack_flags = ph->p_flags; break; case PT_GNU_RELRO: obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr); obj->relro_size = round_page(ph->p_memsz); break; case PT_NOTE: note_start = (Elf_Addr)obj->relocbase + ph->p_vaddr; note_end = note_start + ph->p_filesz; digest_notes(obj, note_start, note_end); break; } } if (nsegs < 1) { _rtld_error("%s: too few PT_LOAD segments", path); return NULL; } obj->entry = entry; return obj; } void digest_notes(Obj_Entry *obj, Elf_Addr note_start, Elf_Addr note_end) { const Elf_Note *note; const char *note_name; uintptr_t p; for (note = (const Elf_Note *)note_start; (Elf_Addr)note < note_end; note = (const Elf_Note *)((const char *)(note + 1) + roundup2(note->n_namesz, sizeof(Elf32_Addr)) + roundup2(note->n_descsz, sizeof(Elf32_Addr)))) { if (note->n_namesz != sizeof(NOTE_FREEBSD_VENDOR) || note->n_descsz != sizeof(int32_t)) continue; if (note->n_type != NT_FREEBSD_ABI_TAG && note->n_type != NT_FREEBSD_NOINIT_TAG) continue; note_name = (const char *)(note + 1); if (strncmp(NOTE_FREEBSD_VENDOR, note_name, sizeof(NOTE_FREEBSD_VENDOR)) != 0) continue; switch (note->n_type) { case NT_FREEBSD_ABI_TAG: /* FreeBSD osrel note */ p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); obj->osrel = *(const int32_t *)(p); dbg("note osrel %d", obj->osrel); break; case NT_FREEBSD_NOINIT_TAG: /* FreeBSD 'crt does not call init' note */ obj->crt_no_init = true; dbg("note crt_no_init"); break; } } } static Obj_Entry * dlcheck(void *handle) { Obj_Entry *obj; for (obj = obj_list; obj != NULL; obj = obj->next) if (obj == (Obj_Entry *) handle) break; if (obj == NULL || obj->refcount == 0 || obj->dl_refcount == 0) { _rtld_error("Invalid shared object handle %p", handle); return NULL; } return obj; } /* * If the given object is already in the donelist, return true. Otherwise * add the object to the list and return false. */ static bool donelist_check(DoneList *dlp, const Obj_Entry *obj) { unsigned int i; for (i = 0; i < dlp->num_used; i++) if (dlp->objs[i] == obj) return true; /* * Our donelist allocation should always be sufficient. But if * our threads locking isn't working properly, more shared objects * could have been loaded since we allocated the list. That should * never happen, but we'll handle it properly just in case it does. */ if (dlp->num_used < dlp->num_alloc) dlp->objs[dlp->num_used++] = obj; return false; } /* * Hash function for symbol table lookup. Don't even think about changing * this. It is specified by the System V ABI. */ unsigned long elf_hash(const char *name) { const unsigned char *p = (const unsigned char *) name; unsigned long h = 0; unsigned long g; while (*p != '\0') { h = (h << 4) + *p++; if ((g = h & 0xf0000000) != 0) h ^= g >> 24; h &= ~g; } return h; } /* * The GNU hash function is the Daniel J. Bernstein hash clipped to 32 bits * unsigned in case it's implemented with a wider type. */ static uint32_t gnu_hash(const char *s) { uint32_t h; unsigned char c; h = 5381; for (c = *s; c != '\0'; c = *++s) h = h * 33 + c; return (h & 0xffffffff); } /* * Find the library with the given name, and return its full pathname. * The returned string is dynamically allocated. Generates an error * message and returns NULL if the library cannot be found. * * If the second argument is non-NULL, then it refers to an already- * loaded shared object, whose library search path will be searched. * * If a library is successfully located via LD_LIBRARY_PATH_FDS, its * descriptor (which is close-on-exec) will be passed out via the third * argument. * * The search order is: * DT_RPATH in the referencing file _unless_ DT_RUNPATH is present (1) * DT_RPATH of the main object if DSO without defined DT_RUNPATH (1) * LD_LIBRARY_PATH * DT_RUNPATH in the referencing file * ldconfig hints (if -z nodefaultlib, filter out default library directories * from list) * /lib:/usr/lib _unless_ the referencing file is linked with -z nodefaultlib * * (1) Handled in digest_dynamic2 - rpath left NULL if runpath defined. */ static char * find_library(const char *xname, const Obj_Entry *refobj, int *fdp) { char *pathname; char *name; bool nodeflib, objgiven; objgiven = refobj != NULL; if (strchr(xname, '/') != NULL) { /* Hard coded pathname */ if (xname[0] != '/' && !trust) { _rtld_error("Absolute pathname required for shared object \"%s\"", xname); return NULL; } return (origin_subst(__DECONST(Obj_Entry *, refobj), __DECONST(char *, xname))); } if (libmap_disable || !objgiven || (name = lm_find(refobj->path, xname)) == NULL) name = (char *)xname; dbg(" Searching for \"%s\"", name); /* * If refobj->rpath != NULL, then refobj->runpath is NULL. Fall * back to pre-conforming behaviour if user requested so with * LD_LIBRARY_PATH_RPATH environment variable and ignore -z * nodeflib. */ if (objgiven && refobj->rpath != NULL && ld_library_path_rpath) { if ((pathname = search_library_path(name, ld_library_path)) != NULL || (refobj != NULL && (pathname = search_library_path(name, refobj->rpath)) != NULL) || (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL || (pathname = search_library_path(name, gethints(false))) != NULL || (pathname = search_library_path(name, ld_standard_library_path)) != NULL) return (pathname); } else { nodeflib = objgiven ? refobj->z_nodeflib : false; if ((objgiven && (pathname = search_library_path(name, refobj->rpath)) != NULL) || (objgiven && refobj->runpath == NULL && refobj != obj_main && (pathname = search_library_path(name, obj_main->rpath)) != NULL) || (pathname = search_library_path(name, ld_library_path)) != NULL || (objgiven && (pathname = search_library_path(name, refobj->runpath)) != NULL) || (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL || (pathname = search_library_path(name, gethints(nodeflib))) != NULL || (objgiven && !nodeflib && (pathname = search_library_path(name, ld_standard_library_path)) != NULL)) return (pathname); } if (objgiven && refobj->path != NULL) { _rtld_error("Shared object \"%s\" not found, required by \"%s\"", name, basename(refobj->path)); } else { _rtld_error("Shared object \"%s\" not found", name); } return NULL; } /* * Given a symbol number in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the * defining object via the reference parameter DEFOBJ_OUT. */ const Elf_Sym * find_symdef(unsigned long symnum, const Obj_Entry *refobj, const Obj_Entry **defobj_out, int flags, SymCache *cache, RtldLockState *lockstate) { const Elf_Sym *ref; const Elf_Sym *def; const Obj_Entry *defobj; SymLook req; const char *name; int res; /* * If we have already found this symbol, get the information from * the cache. */ if (symnum >= refobj->dynsymcount) return NULL; /* Bad object */ if (cache != NULL && cache[symnum].sym != NULL) { *defobj_out = cache[symnum].obj; return cache[symnum].sym; } ref = refobj->symtab + symnum; name = refobj->strtab + ref->st_name; def = NULL; defobj = NULL; /* * We don't have to do a full scale lookup if the symbol is local. * We know it will bind to the instance in this load module; to * which we already have a pointer (ie ref). By not doing a lookup, * we not only improve performance, but it also avoids unresolvable * symbols when local symbols are not in the hash table. This has * been seen with the ia64 toolchain. */ if (ELF_ST_BIND(ref->st_info) != STB_LOCAL) { if (ELF_ST_TYPE(ref->st_info) == STT_SECTION) { _rtld_error("%s: Bogus symbol table entry %lu", refobj->path, symnum); } symlook_init(&req, name); req.flags = flags; req.ventry = fetch_ventry(refobj, symnum); req.lockstate = lockstate; res = symlook_default(&req, refobj); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } else { def = ref; defobj = refobj; } /* * If we found no definition and the reference is weak, treat the * symbol as having the value zero. */ if (def == NULL && ELF_ST_BIND(ref->st_info) == STB_WEAK) { def = &sym_zero; defobj = obj_main; } if (def != NULL) { *defobj_out = defobj; /* Record the information in the cache to avoid subsequent lookups. */ if (cache != NULL) { cache[symnum].sym = def; cache[symnum].obj = defobj; } } else { if (refobj != &obj_rtld) _rtld_error("%s: Undefined symbol \"%s\"", refobj->path, name); } return def; } /* * Return the search path from the ldconfig hints file, reading it if * necessary. If nostdlib is true, then the default search paths are * not added to result. * * Returns NULL if there are problems with the hints file, * or if the search path there is empty. */ static const char * gethints(bool nostdlib) { static char *hints, *filtered_path; struct elfhints_hdr hdr; struct fill_search_info_args sargs, hargs; struct dl_serinfo smeta, hmeta, *SLPinfo, *hintinfo; struct dl_serpath *SLPpath, *hintpath; char *p; unsigned int SLPndx, hintndx, fndx, fcount; int fd; size_t flen; bool skip; /* First call, read the hints file */ if (hints == NULL) { /* Keep from trying again in case the hints file is bad. */ hints = ""; if ((fd = open(ld_elf_hints_path, O_RDONLY | O_CLOEXEC)) == -1) return (NULL); if (read(fd, &hdr, sizeof hdr) != sizeof hdr || hdr.magic != ELFHINTS_MAGIC || hdr.version != 1) { close(fd); return (NULL); } p = xmalloc(hdr.dirlistlen + 1); if (lseek(fd, hdr.strtab + hdr.dirlist, SEEK_SET) == -1 || read(fd, p, hdr.dirlistlen + 1) != (ssize_t)hdr.dirlistlen + 1) { free(p); close(fd); return (NULL); } hints = p; close(fd); } /* * If caller agreed to receive list which includes the default * paths, we are done. Otherwise, if we still did not * calculated filtered result, do it now. */ if (!nostdlib) return (hints[0] != '\0' ? hints : NULL); if (filtered_path != NULL) goto filt_ret; /* * Obtain the list of all configured search paths, and the * list of the default paths. * * First estimate the size of the results. */ smeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath); smeta.dls_cnt = 0; hmeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath); hmeta.dls_cnt = 0; sargs.request = RTLD_DI_SERINFOSIZE; sargs.serinfo = &smeta; hargs.request = RTLD_DI_SERINFOSIZE; hargs.serinfo = &hmeta; path_enumerate(ld_standard_library_path, fill_search_info, &sargs); path_enumerate(p, fill_search_info, &hargs); SLPinfo = xmalloc(smeta.dls_size); hintinfo = xmalloc(hmeta.dls_size); /* * Next fetch both sets of paths. */ sargs.request = RTLD_DI_SERINFO; sargs.serinfo = SLPinfo; sargs.serpath = &SLPinfo->dls_serpath[0]; sargs.strspace = (char *)&SLPinfo->dls_serpath[smeta.dls_cnt]; hargs.request = RTLD_DI_SERINFO; hargs.serinfo = hintinfo; hargs.serpath = &hintinfo->dls_serpath[0]; hargs.strspace = (char *)&hintinfo->dls_serpath[hmeta.dls_cnt]; path_enumerate(ld_standard_library_path, fill_search_info, &sargs); path_enumerate(p, fill_search_info, &hargs); /* * Now calculate the difference between two sets, by excluding * standard paths from the full set. */ fndx = 0; fcount = 0; filtered_path = xmalloc(hdr.dirlistlen + 1); hintpath = &hintinfo->dls_serpath[0]; for (hintndx = 0; hintndx < hmeta.dls_cnt; hintndx++, hintpath++) { skip = false; SLPpath = &SLPinfo->dls_serpath[0]; /* * Check each standard path against current. */ for (SLPndx = 0; SLPndx < smeta.dls_cnt; SLPndx++, SLPpath++) { /* matched, skip the path */ if (!strcmp(hintpath->dls_name, SLPpath->dls_name)) { skip = true; break; } } if (skip) continue; /* * Not matched against any standard path, add the path * to result. Separate consequtive paths with ':'. */ if (fcount > 0) { filtered_path[fndx] = ':'; fndx++; } fcount++; flen = strlen(hintpath->dls_name); strncpy((filtered_path + fndx), hintpath->dls_name, flen); fndx += flen; } filtered_path[fndx] = '\0'; free(SLPinfo); free(hintinfo); filt_ret: return (filtered_path[0] != '\0' ? filtered_path : NULL); } static void init_dag(Obj_Entry *root) { const Needed_Entry *needed; const Objlist_Entry *elm; DoneList donelist; if (root->dag_inited) return; donelist_init(&donelist); /* Root object belongs to own DAG. */ objlist_push_tail(&root->dldags, root); objlist_push_tail(&root->dagmembers, root); donelist_check(&donelist, root); /* * Add dependencies of root object to DAG in breadth order * by exploiting the fact that each new object get added * to the tail of the dagmembers list. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { for (needed = elm->obj->needed; needed != NULL; needed = needed->next) { if (needed->obj == NULL || donelist_check(&donelist, needed->obj)) continue; objlist_push_tail(&needed->obj->dldags, root); objlist_push_tail(&root->dagmembers, needed->obj); } } root->dag_inited = true; } static void process_z(Obj_Entry *root) { const Objlist_Entry *elm; Obj_Entry *obj; /* * Walk over object DAG and process every dependent object * that is marked as DF_1_NODELETE or DF_1_GLOBAL. They need * to grow their own DAG. * * For DF_1_GLOBAL, DAG is required for symbol lookups in * symlook_global() to work. * * For DF_1_NODELETE, the DAG should have its reference upped. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { obj = elm->obj; if (obj == NULL) continue; if (obj->z_nodelete && !obj->ref_nodel) { dbg("obj %s -z nodelete", obj->path); init_dag(obj); ref_dag(obj); obj->ref_nodel = true; } if (obj->z_global && objlist_find(&list_global, obj) == NULL) { dbg("obj %s -z global", obj->path); objlist_push_tail(&list_global, obj); init_dag(obj); } } } /* * Initialize the dynamic linker. The argument is the address at which * the dynamic linker has been mapped into memory. The primary task of * this function is to relocate the dynamic linker. */ static void init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info) { Obj_Entry objtmp; /* Temporary rtld object */ const Elf_Dyn *dyn_rpath; const Elf_Dyn *dyn_soname; const Elf_Dyn *dyn_runpath; #ifdef RTLD_INIT_PAGESIZES_EARLY /* The page size is required by the dynamic memory allocator. */ init_pagesizes(aux_info); #endif /* * Conjure up an Obj_Entry structure for the dynamic linker. * * The "path" member can't be initialized yet because string constants * cannot yet be accessed. Below we will set it correctly. */ memset(&objtmp, 0, sizeof(objtmp)); objtmp.path = NULL; objtmp.rtld = true; objtmp.mapbase = mapbase; #ifdef PIC objtmp.relocbase = mapbase; #endif if (RTLD_IS_DYNAMIC()) { objtmp.dynamic = rtld_dynamic(&objtmp); digest_dynamic1(&objtmp, 1, &dyn_rpath, &dyn_soname, &dyn_runpath); assert(objtmp.needed == NULL); #if !defined(__mips__) /* MIPS has a bogus DT_TEXTREL. */ assert(!objtmp.textrel); #endif /* * Temporarily put the dynamic linker entry into the object list, so * that symbols can be found. */ relocate_objects(&objtmp, true, &objtmp, 0, NULL); } /* Initialize the object list. */ obj_tail = &obj_list; /* Now that non-local variables can be accesses, copy out obj_rtld. */ memcpy(&obj_rtld, &objtmp, sizeof(obj_rtld)); #ifndef RTLD_INIT_PAGESIZES_EARLY /* The page size is required by the dynamic memory allocator. */ init_pagesizes(aux_info); #endif if (aux_info[AT_OSRELDATE] != NULL) osreldate = aux_info[AT_OSRELDATE]->a_un.a_val; digest_dynamic2(&obj_rtld, dyn_rpath, dyn_soname, dyn_runpath); /* Replace the path with a dynamically allocated copy. */ obj_rtld.path = xstrdup(ld_path_rtld); r_debug.r_brk = r_debug_state; r_debug.r_state = RT_CONSISTENT; } /* * Retrieve the array of supported page sizes. The kernel provides the page * sizes in increasing order. */ static void init_pagesizes(Elf_Auxinfo **aux_info) { static size_t psa[MAXPAGESIZES]; int mib[2]; size_t len, size; if (aux_info[AT_PAGESIZES] != NULL && aux_info[AT_PAGESIZESLEN] != NULL) { size = aux_info[AT_PAGESIZESLEN]->a_un.a_val; pagesizes = aux_info[AT_PAGESIZES]->a_un.a_ptr; } else { len = 2; if (sysctlnametomib("hw.pagesizes", mib, &len) == 0) size = sizeof(psa); else { /* As a fallback, retrieve the base page size. */ size = sizeof(psa[0]); if (aux_info[AT_PAGESZ] != NULL) { psa[0] = aux_info[AT_PAGESZ]->a_un.a_val; goto psa_filled; } else { mib[0] = CTL_HW; mib[1] = HW_PAGESIZE; len = 2; } } if (sysctl(mib, len, psa, &size, NULL, 0) == -1) { _rtld_error("sysctl for hw.pagesize(s) failed"); rtld_die(); } psa_filled: pagesizes = psa; } npagesizes = size / sizeof(pagesizes[0]); /* Discard any invalid entries at the end of the array. */ while (npagesizes > 0 && pagesizes[npagesizes - 1] == 0) npagesizes--; } /* * Add the init functions from a needed object list (and its recursive * needed objects) to "list". This is not used directly; it is a helper * function for initlist_add_objects(). The write lock must be held * when this function is called. */ static void initlist_add_neededs(Needed_Entry *needed, Objlist *list) { /* Recursively process the successor needed objects. */ if (needed->next != NULL) initlist_add_neededs(needed->next, list); /* Process the current needed object. */ if (needed->obj != NULL) initlist_add_objects(needed->obj, &needed->obj->next, list); } /* * Scan all of the DAGs rooted in the range of objects from "obj" to * "tail" and add their init functions to "list". This recurses over * the DAGs and ensure the proper init ordering such that each object's * needed libraries are initialized before the object itself. At the * same time, this function adds the objects to the global finalization * list "list_fini" in the opposite order. The write lock must be * held when this function is called. */ static void initlist_add_objects(Obj_Entry *obj, Obj_Entry **tail, Objlist *list) { if (obj->init_scanned || obj->init_done) return; obj->init_scanned = true; /* Recursively process the successor objects. */ if (&obj->next != tail) initlist_add_objects(obj->next, tail, list); /* Recursively process the needed objects. */ if (obj->needed != NULL) initlist_add_neededs(obj->needed, list); if (obj->needed_filtees != NULL) initlist_add_neededs(obj->needed_filtees, list); if (obj->needed_aux_filtees != NULL) initlist_add_neededs(obj->needed_aux_filtees, list); /* Add the object to the init list. */ if (obj->preinit_array != (Elf_Addr)NULL || obj->init != (Elf_Addr)NULL || obj->init_array != (Elf_Addr)NULL) objlist_push_tail(list, obj); /* Add the object to the global fini list in the reverse order. */ if ((obj->fini != (Elf_Addr)NULL || obj->fini_array != (Elf_Addr)NULL) && !obj->on_fini_list) { objlist_push_head(&list_fini, obj); obj->on_fini_list = true; } } #ifndef FPTR_TARGET #define FPTR_TARGET(f) ((Elf_Addr) (f)) #endif static void free_needed_filtees(Needed_Entry *n) { Needed_Entry *needed, *needed1; for (needed = n; needed != NULL; needed = needed->next) { if (needed->obj != NULL) { dlclose(needed->obj); needed->obj = NULL; } } for (needed = n; needed != NULL; needed = needed1) { needed1 = needed->next; free(needed); } } static void unload_filtees(Obj_Entry *obj) { free_needed_filtees(obj->needed_filtees); obj->needed_filtees = NULL; free_needed_filtees(obj->needed_aux_filtees); obj->needed_aux_filtees = NULL; obj->filtees_loaded = false; } static void load_filtee1(Obj_Entry *obj, Needed_Entry *needed, int flags, RtldLockState *lockstate) { for (; needed != NULL; needed = needed->next) { needed->obj = dlopen_object(obj->strtab + needed->name, -1, obj, flags, ((ld_loadfltr || obj->z_loadfltr) ? RTLD_NOW : RTLD_LAZY) | RTLD_LOCAL, lockstate); } } static void load_filtees(Obj_Entry *obj, int flags, RtldLockState *lockstate) { lock_restart_for_upgrade(lockstate); if (!obj->filtees_loaded) { load_filtee1(obj, obj->needed_filtees, flags, lockstate); load_filtee1(obj, obj->needed_aux_filtees, flags, lockstate); obj->filtees_loaded = true; } } static int process_needed(Obj_Entry *obj, Needed_Entry *needed, int flags) { Obj_Entry *obj1; for (; needed != NULL; needed = needed->next) { obj1 = needed->obj = load_object(obj->strtab + needed->name, -1, obj, flags & ~RTLD_LO_NOLOAD); if (obj1 == NULL && !ld_tracing && (flags & RTLD_LO_FILTEES) == 0) return (-1); } return (0); } /* * Given a shared object, traverse its list of needed objects, and load * each of them. Returns 0 on success. Generates an error message and * returns -1 on failure. */ static int load_needed_objects(Obj_Entry *first, int flags) { Obj_Entry *obj; for (obj = first; obj != NULL; obj = obj->next) { if (process_needed(obj, obj->needed, flags) == -1) return (-1); } return (0); } static int load_preload_objects(void) { char *p = ld_preload; Obj_Entry *obj; static const char delim[] = " \t:;"; if (p == NULL) return 0; p += strspn(p, delim); while (*p != '\0') { size_t len = strcspn(p, delim); char savech; savech = p[len]; p[len] = '\0'; obj = load_object(p, -1, NULL, 0); if (obj == NULL) return -1; /* XXX - cleanup */ obj->z_interpose = true; p[len] = savech; p += len; p += strspn(p, delim); } LD_UTRACE(UTRACE_PRELOAD_FINISHED, NULL, NULL, 0, 0, NULL); return 0; } static const char * printable_path(const char *path) { return (path == NULL ? "" : path); } /* * Load a shared object into memory, if it is not already loaded. The * object may be specified by name or by user-supplied file descriptor * fd_u. In the later case, the fd_u descriptor is not closed, but its * duplicate is. * * Returns a pointer to the Obj_Entry for the object. Returns NULL * on failure. */ static Obj_Entry * load_object(const char *name, int fd_u, const Obj_Entry *refobj, int flags) { Obj_Entry *obj; int fd; struct stat sb; char *path; fd = -1; if (name != NULL) { for (obj = obj_list->next; obj != NULL; obj = obj->next) { if (object_match_name(obj, name)) return (obj); } path = find_library(name, refobj, &fd); if (path == NULL) return (NULL); } else path = NULL; if (fd >= 0) { /* * search_library_pathfds() opens a fresh file descriptor for the * library, so there is no need to dup(). */ } else if (fd_u == -1) { /* * If we didn't find a match by pathname, or the name is not * supplied, open the file and check again by device and inode. * This avoids false mismatches caused by multiple links or ".." * in pathnames. * * To avoid a race, we open the file and use fstat() rather than * using stat(). */ if ((fd = open(path, O_RDONLY | O_CLOEXEC | O_VERIFY)) == -1) { _rtld_error("Cannot open \"%s\"", path); free(path); return (NULL); } } else { fd = fcntl(fd_u, F_DUPFD_CLOEXEC, 0); if (fd == -1) { _rtld_error("Cannot dup fd"); free(path); return (NULL); } } if (fstat(fd, &sb) == -1) { _rtld_error("Cannot fstat \"%s\"", printable_path(path)); close(fd); free(path); return NULL; } for (obj = obj_list->next; obj != NULL; obj = obj->next) if (obj->ino == sb.st_ino && obj->dev == sb.st_dev) break; if (obj != NULL && name != NULL) { object_add_name(obj, name); free(path); close(fd); return obj; } if (flags & RTLD_LO_NOLOAD) { free(path); close(fd); return (NULL); } /* First use of this object, so we must map it in */ obj = do_load_object(fd, name, path, &sb, flags); if (obj == NULL) free(path); close(fd); return obj; } static Obj_Entry * do_load_object(int fd, const char *name, char *path, struct stat *sbp, int flags) { Obj_Entry *obj; struct statfs fs; /* * but first, make sure that environment variables haven't been * used to circumvent the noexec flag on a filesystem. */ if (dangerous_ld_env) { if (fstatfs(fd, &fs) != 0) { _rtld_error("Cannot fstatfs \"%s\"", printable_path(path)); return NULL; } if (fs.f_flags & MNT_NOEXEC) { _rtld_error("Cannot execute objects on %s\n", fs.f_mntonname); return NULL; } } dbg("loading \"%s\"", printable_path(path)); obj = map_object(fd, printable_path(path), sbp); if (obj == NULL) return NULL; /* * If DT_SONAME is present in the object, digest_dynamic2 already * added it to the object names. */ if (name != NULL) object_add_name(obj, name); obj->path = path; digest_dynamic(obj, 0); dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj->path, obj->valid_hash_sysv, obj->valid_hash_gnu, obj->dynsymcount); if (obj->z_noopen && (flags & (RTLD_LO_DLOPEN | RTLD_LO_TRACE)) == RTLD_LO_DLOPEN) { dbg("refusing to load non-loadable \"%s\"", obj->path); _rtld_error("Cannot dlopen non-loadable %s", obj->path); munmap(obj->mapbase, obj->mapsize); obj_free(obj); return (NULL); } obj->dlopened = (flags & RTLD_LO_DLOPEN) != 0; *obj_tail = obj; obj_tail = &obj->next; obj_count++; obj_loads++; linkmap_add(obj); /* for GDB & dlinfo() */ max_stack_flags |= obj->stack_flags; dbg(" %p .. %p: %s", obj->mapbase, obj->mapbase + obj->mapsize - 1, obj->path); if (obj->textrel) dbg(" WARNING: %s has impure text", obj->path); LD_UTRACE(UTRACE_LOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0, obj->path); return obj; } static Obj_Entry * obj_from_addr(const void *addr) { Obj_Entry *obj; for (obj = obj_list; obj != NULL; obj = obj->next) { if (addr < (void *) obj->mapbase) continue; if (addr < (void *) (obj->mapbase + obj->mapsize)) return obj; } return NULL; } static void preinit_main(void) { Elf_Addr *preinit_addr; int index; preinit_addr = (Elf_Addr *)obj_main->preinit_array; if (preinit_addr == NULL) return; for (index = 0; index < obj_main->preinit_array_num; index++) { if (preinit_addr[index] != 0 && preinit_addr[index] != 1) { dbg("calling preinit function for %s at %p", obj_main->path, (void *)preinit_addr[index]); LD_UTRACE(UTRACE_INIT_CALL, obj_main, (void *)preinit_addr[index], 0, 0, obj_main->path); call_init_pointer(obj_main, preinit_addr[index]); } } } /* * Call the finalization functions for each of the objects in "list" * belonging to the DAG of "root" and referenced once. If NULL "root" * is specified, every finalization function will be called regardless * of the reference count and the list elements won't be freed. All of * the objects are expected to have non-NULL fini functions. */ static void objlist_call_fini(Objlist *list, Obj_Entry *root, RtldLockState *lockstate) { Objlist_Entry *elm; char *saved_msg; Elf_Addr *fini_addr; int index; assert(root == NULL || root->refcount == 1); /* * Preserve the current error message since a fini function might * call into the dynamic linker and overwrite it. */ saved_msg = errmsg_save(); do { STAILQ_FOREACH(elm, list, link) { if (root != NULL && (elm->obj->refcount != 1 || objlist_find(&root->dagmembers, elm->obj) == NULL)) continue; /* Remove object from fini list to prevent recursive invocation. */ STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link); /* * XXX: If a dlopen() call references an object while the * fini function is in progress, we might end up trying to * unload the referenced object in dlclose() or the object * won't be unloaded although its fini function has been * called. */ lock_release(rtld_bind_lock, lockstate); /* * It is legal to have both DT_FINI and DT_FINI_ARRAY defined. * When this happens, DT_FINI_ARRAY is processed first. */ fini_addr = (Elf_Addr *)elm->obj->fini_array; if (fini_addr != NULL && elm->obj->fini_array_num > 0) { for (index = elm->obj->fini_array_num - 1; index >= 0; index--) { if (fini_addr[index] != 0 && fini_addr[index] != 1) { dbg("calling fini function for %s at %p", elm->obj->path, (void *)fini_addr[index]); LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)fini_addr[index], 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, fini_addr[index]); } } } if (elm->obj->fini != (Elf_Addr)NULL) { dbg("calling fini function for %s at %p", elm->obj->path, (void *)elm->obj->fini); LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)elm->obj->fini, 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, elm->obj->fini); } wlock_acquire(rtld_bind_lock, lockstate); /* No need to free anything if process is going down. */ if (root != NULL) free(elm); /* * We must restart the list traversal after every fini call * because a dlclose() call from the fini function or from * another thread might have modified the reference counts. */ break; } } while (elm != NULL); errmsg_restore(saved_msg); } /* * Call the initialization functions for each of the objects in * "list". All of the objects are expected to have non-NULL init * functions. */ static void objlist_call_init(Objlist *list, RtldLockState *lockstate) { Objlist_Entry *elm; Obj_Entry *obj; char *saved_msg; Elf_Addr *init_addr; int index; /* * Clean init_scanned flag so that objects can be rechecked and * possibly initialized earlier if any of vectors called below * cause the change by using dlopen. */ for (obj = obj_list; obj != NULL; obj = obj->next) obj->init_scanned = false; /* * Preserve the current error message since an init function might * call into the dynamic linker and overwrite it. */ saved_msg = errmsg_save(); STAILQ_FOREACH(elm, list, link) { if (elm->obj->init_done) /* Initialized early. */ continue; /* * Race: other thread might try to use this object before current * one completes the initilization. Not much can be done here * without better locking. */ elm->obj->init_done = true; lock_release(rtld_bind_lock, lockstate); /* * It is legal to have both DT_INIT and DT_INIT_ARRAY defined. * When this happens, DT_INIT is processed first. */ if (elm->obj->init != (Elf_Addr)NULL) { dbg("calling init function for %s at %p", elm->obj->path, (void *)elm->obj->init); LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)elm->obj->init, 0, 0, elm->obj->path); call_initfini_pointer(elm->obj, elm->obj->init); } init_addr = (Elf_Addr *)elm->obj->init_array; if (init_addr != NULL) { for (index = 0; index < elm->obj->init_array_num; index++) { if (init_addr[index] != 0 && init_addr[index] != 1) { dbg("calling init function for %s at %p", elm->obj->path, (void *)init_addr[index]); LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)init_addr[index], 0, 0, elm->obj->path); call_init_pointer(elm->obj, init_addr[index]); } } } wlock_acquire(rtld_bind_lock, lockstate); } errmsg_restore(saved_msg); } static void objlist_clear(Objlist *list) { Objlist_Entry *elm; while (!STAILQ_EMPTY(list)) { elm = STAILQ_FIRST(list); STAILQ_REMOVE_HEAD(list, link); free(elm); } } static Objlist_Entry * objlist_find(Objlist *list, const Obj_Entry *obj) { Objlist_Entry *elm; STAILQ_FOREACH(elm, list, link) if (elm->obj == obj) return elm; return NULL; } static void objlist_init(Objlist *list) { STAILQ_INIT(list); } static void objlist_push_head(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; elm = NEW(Objlist_Entry); elm->obj = obj; STAILQ_INSERT_HEAD(list, elm, link); } static void objlist_push_tail(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; elm = NEW(Objlist_Entry); elm->obj = obj; STAILQ_INSERT_TAIL(list, elm, link); } static void objlist_put_after(Objlist *list, Obj_Entry *listobj, Obj_Entry *obj) { Objlist_Entry *elm, *listelm; STAILQ_FOREACH(listelm, list, link) { if (listelm->obj == listobj) break; } elm = NEW(Objlist_Entry); elm->obj = obj; if (listelm != NULL) STAILQ_INSERT_AFTER(list, listelm, elm, link); else STAILQ_INSERT_TAIL(list, elm, link); } static void objlist_remove(Objlist *list, Obj_Entry *obj) { Objlist_Entry *elm; if ((elm = objlist_find(list, obj)) != NULL) { STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link); free(elm); } } /* * Relocate dag rooted in the specified object. * Returns 0 on success, or -1 on failure. */ static int relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { Objlist_Entry *elm; int error; error = 0; STAILQ_FOREACH(elm, &root->dagmembers, link) { error = relocate_object(elm->obj, bind_now, rtldobj, flags, lockstate); if (error == -1) break; } return (error); } /* * Relocate single object. * Returns 0 on success, or -1 on failure. */ static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { if (obj->relocated) return (0); obj->relocated = true; if (obj != rtldobj) dbg("relocating \"%s\"", obj->path); if (obj->symtab == NULL || obj->strtab == NULL || !(obj->valid_hash_sysv || obj->valid_hash_gnu)) { _rtld_error("%s: Shared object has no run-time symbol table", obj->path); return (-1); } if (obj->textrel) { /* There are relocations to the write-protected text segment. */ if (mprotect(obj->mapbase, obj->textsize, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) { _rtld_error("%s: Cannot write-enable text segment: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* Process the non-PLT non-IFUNC relocations. */ if (reloc_non_plt(obj, rtldobj, flags, lockstate)) return (-1); if (obj->textrel) { /* Re-protected the text segment. */ if (mprotect(obj->mapbase, obj->textsize, PROT_READ|PROT_EXEC) == -1) { _rtld_error("%s: Cannot write-protect text segment: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* Set the special PLT or GOT entries. */ init_pltgot(obj); /* Process the PLT relocations. */ if (reloc_plt(obj) == -1) return (-1); /* Relocate the jump slots if we are doing immediate binding. */ if (obj->bind_now || bind_now) if (reloc_jmpslots(obj, flags, lockstate) == -1) return (-1); /* * Process the non-PLT IFUNC relocations. The relocations are * processed in two phases, because IFUNC resolvers may * reference other symbols, which must be readily processed * before resolvers are called. */ if (obj->non_plt_gnu_ifunc && reloc_non_plt(obj, rtldobj, flags | SYMLOOK_IFUNC, lockstate)) return (-1); if (obj->relro_size > 0) { if (mprotect(obj->relro_page, obj->relro_size, PROT_READ) == -1) { _rtld_error("%s: Cannot enforce relro protection: %s", obj->path, rtld_strerror(errno)); return (-1); } } /* * Set up the magic number and version in the Obj_Entry. These * were checked in the crt1.o from the original ElfKit, so we * set them for backward compatibility. */ obj->magic = RTLD_MAGIC; obj->version = RTLD_VERSION; return (0); } /* * Relocate newly-loaded shared objects. The argument is a pointer to * the Obj_Entry for the first such object. All objects from the first * to the end of the list of objects are relocated. Returns 0 on success, * or -1 on failure. */ static int relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj, int flags, RtldLockState *lockstate) { Obj_Entry *obj; int error; for (error = 0, obj = first; obj != NULL; obj = obj->next) { error = relocate_object(obj, bind_now, rtldobj, flags, lockstate); if (error == -1) break; } return (error); } /* * The handling of R_MACHINE_IRELATIVE relocations and jumpslots * referencing STT_GNU_IFUNC symbols is postponed till the other * relocations are done. The indirect functions specified as * ifunc are allowed to call other symbols, so we need to have * objects relocated before asking for resolution from indirects. * * The R_MACHINE_IRELATIVE slots are resolved in greedy fashion, * instead of the usual lazy handling of PLT slots. It is * consistent with how GNU does it. */ static int resolve_object_ifunc(Obj_Entry *obj, bool bind_now, int flags, RtldLockState *lockstate) { if (obj->irelative && reloc_iresolve(obj, lockstate) == -1) return (-1); if ((obj->bind_now || bind_now) && obj->gnu_ifunc && reloc_gnu_ifunc(obj, flags, lockstate) == -1) return (-1); return (0); } static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags, RtldLockState *lockstate) { Obj_Entry *obj; for (obj = first; obj != NULL; obj = obj->next) { if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1) return (-1); } return (0); } static int initlist_objects_ifunc(Objlist *list, bool bind_now, int flags, RtldLockState *lockstate) { Objlist_Entry *elm; STAILQ_FOREACH(elm, list, link) { if (resolve_object_ifunc(elm->obj, bind_now, flags, lockstate) == -1) return (-1); } return (0); } /* * Cleanup procedure. It will be called (by the atexit mechanism) just * before the process exits. */ static void rtld_exit(void) { RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); dbg("rtld_exit()"); objlist_call_fini(&list_fini, NULL, &lockstate); /* No need to remove the items from the list, since we are exiting. */ if (!libmap_disable) lm_fini(); lock_release(rtld_bind_lock, &lockstate); } /* * Iterate over a search path, translate each element, and invoke the * callback on the result. */ static void * path_enumerate(const char *path, path_enum_proc callback, void *arg) { const char *trans; if (path == NULL) return (NULL); path += strspn(path, ":;"); while (*path != '\0') { size_t len; char *res; len = strcspn(path, ":;"); trans = lm_findn(NULL, path, len); if (trans) res = callback(trans, strlen(trans), arg); else res = callback(path, len, arg); if (res != NULL) return (res); path += len; path += strspn(path, ":;"); } return (NULL); } struct try_library_args { const char *name; size_t namelen; char *buffer; size_t buflen; }; static void * try_library_path(const char *dir, size_t dirlen, void *param) { struct try_library_args *arg; arg = param; if (*dir == '/' || trust) { char *pathname; if (dirlen + 1 + arg->namelen + 1 > arg->buflen) return (NULL); pathname = arg->buffer; strncpy(pathname, dir, dirlen); pathname[dirlen] = '/'; strcpy(pathname + dirlen + 1, arg->name); dbg(" Trying \"%s\"", pathname); if (access(pathname, F_OK) == 0) { /* We found it */ pathname = xmalloc(dirlen + 1 + arg->namelen + 1); strcpy(pathname, arg->buffer); return (pathname); } } return (NULL); } static char * search_library_path(const char *name, const char *path) { char *p; struct try_library_args arg; if (path == NULL) return NULL; arg.name = name; arg.namelen = strlen(name); arg.buffer = xmalloc(PATH_MAX); arg.buflen = PATH_MAX; p = path_enumerate(path, try_library_path, &arg); free(arg.buffer); return (p); } /* * Finds the library with the given name using the directory descriptors * listed in the LD_LIBRARY_PATH_FDS environment variable. * * Returns a freshly-opened close-on-exec file descriptor for the library, * or -1 if the library cannot be found. */ static char * search_library_pathfds(const char *name, const char *path, int *fdp) { char *envcopy, *fdstr, *found, *last_token; size_t len; int dirfd, fd; dbg("%s('%s', '%s', fdp)", __func__, name, path); /* Don't load from user-specified libdirs into setuid binaries. */ if (!trust) return (NULL); /* We can't do anything if LD_LIBRARY_PATH_FDS isn't set. */ if (path == NULL) return (NULL); /* LD_LIBRARY_PATH_FDS only works with relative paths. */ if (name[0] == '/') { dbg("Absolute path (%s) passed to %s", name, __func__); return (NULL); } /* * Use strtok_r() to walk the FD:FD:FD list. This requires a local * copy of the path, as strtok_r rewrites separator tokens * with '\0'. */ found = NULL; envcopy = xstrdup(path); for (fdstr = strtok_r(envcopy, ":", &last_token); fdstr != NULL; fdstr = strtok_r(NULL, ":", &last_token)) { dirfd = parse_libdir(fdstr); if (dirfd < 0) break; fd = __sys_openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_VERIFY); if (fd >= 0) { *fdp = fd; len = strlen(fdstr) + strlen(name) + 3; found = xmalloc(len); if (rtld_snprintf(found, len, "#%d/%s", dirfd, name) < 0) { _rtld_error("error generating '%d/%s'", dirfd, name); rtld_die(); } dbg("open('%s') => %d", found, fd); break; } } free(envcopy); return (found); } int dlclose(void *handle) { Obj_Entry *root; RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); root = dlcheck(handle); if (root == NULL) { lock_release(rtld_bind_lock, &lockstate); return -1; } LD_UTRACE(UTRACE_DLCLOSE_START, handle, NULL, 0, root->dl_refcount, root->path); /* Unreference the object and its dependencies. */ root->dl_refcount--; if (root->refcount == 1) { /* * The object will be no longer referenced, so we must unload it. * First, call the fini functions. */ objlist_call_fini(&list_fini, root, &lockstate); unref_dag(root); /* Finish cleaning up the newly-unreferenced objects. */ GDB_STATE(RT_DELETE,&root->linkmap); unload_object(root); GDB_STATE(RT_CONSISTENT,NULL); } else unref_dag(root); LD_UTRACE(UTRACE_DLCLOSE_STOP, handle, NULL, 0, 0, NULL); lock_release(rtld_bind_lock, &lockstate); return 0; } char * dlerror(void) { char *msg = error_message; error_message = NULL; return msg; } /* * This function is deprecated and has no effect. */ void dllockinit(void *context, void *(*lock_create)(void *context), void (*rlock_acquire)(void *lock), void (*wlock_acquire)(void *lock), void (*lock_release)(void *lock), void (*lock_destroy)(void *lock), void (*context_destroy)(void *context)) { static void *cur_context; static void (*cur_context_destroy)(void *); /* Just destroy the context from the previous call, if necessary. */ if (cur_context_destroy != NULL) cur_context_destroy(cur_context); cur_context = context; cur_context_destroy = context_destroy; } void * dlopen(const char *name, int mode) { return (rtld_dlopen(name, -1, mode)); } void * fdlopen(int fd, int mode) { return (rtld_dlopen(NULL, fd, mode)); } static void * rtld_dlopen(const char *name, int fd, int mode) { RtldLockState lockstate; int lo_flags; LD_UTRACE(UTRACE_DLOPEN_START, NULL, NULL, 0, mode, name); ld_tracing = (mode & RTLD_TRACE) == 0 ? NULL : "1"; if (ld_tracing != NULL) { rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); environ = (char **)*get_program_var_addr("environ", &lockstate); lock_release(rtld_bind_lock, &lockstate); } lo_flags = RTLD_LO_DLOPEN; if (mode & RTLD_NODELETE) lo_flags |= RTLD_LO_NODELETE; if (mode & RTLD_NOLOAD) lo_flags |= RTLD_LO_NOLOAD; if (ld_tracing != NULL) lo_flags |= RTLD_LO_TRACE; return (dlopen_object(name, fd, obj_main, lo_flags, mode & (RTLD_MODEMASK | RTLD_GLOBAL), NULL)); } static void dlopen_cleanup(Obj_Entry *obj) { obj->dl_refcount--; unref_dag(obj); if (obj->refcount == 0) unload_object(obj); } static Obj_Entry * dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags, int mode, RtldLockState *lockstate) { Obj_Entry **old_obj_tail; Obj_Entry *obj; Objlist initlist; RtldLockState mlockstate; int result; objlist_init(&initlist); if (lockstate == NULL && !(lo_flags & RTLD_LO_EARLY)) { wlock_acquire(rtld_bind_lock, &mlockstate); lockstate = &mlockstate; } GDB_STATE(RT_ADD,NULL); old_obj_tail = obj_tail; obj = NULL; if (name == NULL && fd == -1) { obj = obj_main; obj->refcount++; } else { obj = load_object(name, fd, refobj, lo_flags); } if (obj) { obj->dl_refcount++; if (mode & RTLD_GLOBAL && objlist_find(&list_global, obj) == NULL) objlist_push_tail(&list_global, obj); if (*old_obj_tail != NULL) { /* We loaded something new. */ assert(*old_obj_tail == obj); result = load_needed_objects(obj, lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY)); init_dag(obj); ref_dag(obj); if (result != -1) result = rtld_verify_versions(&obj->dagmembers); if (result != -1 && ld_tracing) goto trace; if (result == -1 || relocate_object_dag(obj, (mode & RTLD_MODEMASK) == RTLD_NOW, &obj_rtld, (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0, lockstate) == -1) { dlopen_cleanup(obj); obj = NULL; } else if (lo_flags & RTLD_LO_EARLY) { /* * Do not call the init functions for early loaded * filtees. The image is still not initialized enough * for them to work. * * Our object is found by the global object list and * will be ordered among all init calls done right * before transferring control to main. */ } else { /* Make list of init functions to call. */ initlist_add_objects(obj, &obj->next, &initlist); } /* * Process all no_delete or global objects here, given * them own DAGs to prevent their dependencies from being * unloaded. This has to be done after we have loaded all * of the dependencies, so that we do not miss any. */ if (obj != NULL) process_z(obj); } else { /* * Bump the reference counts for objects on this DAG. If * this is the first dlopen() call for the object that was * already loaded as a dependency, initialize the dag * starting at it. */ init_dag(obj); ref_dag(obj); if ((lo_flags & RTLD_LO_TRACE) != 0) goto trace; } if (obj != NULL && ((lo_flags & RTLD_LO_NODELETE) != 0 || obj->z_nodelete) && !obj->ref_nodel) { dbg("obj %s nodelete", obj->path); ref_dag(obj); obj->z_nodelete = obj->ref_nodel = true; } } LD_UTRACE(UTRACE_DLOPEN_STOP, obj, NULL, 0, obj ? obj->dl_refcount : 0, name); GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL); if (!(lo_flags & RTLD_LO_EARLY)) { map_stacks_exec(lockstate); } if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW, (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0, lockstate) == -1) { objlist_clear(&initlist); dlopen_cleanup(obj); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); return (NULL); } if (!(lo_flags & RTLD_LO_EARLY)) { /* Call the init functions. */ objlist_call_init(&initlist, lockstate); } objlist_clear(&initlist); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); return obj; trace: trace_loaded_objects(obj); if (lockstate == &mlockstate) lock_release(rtld_bind_lock, lockstate); exit(0); } static void * do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve, int flags) { DoneList donelist; const Obj_Entry *obj, *defobj; const Elf_Sym *def; SymLook req; RtldLockState lockstate; tls_index ti; void *sym; int res; def = NULL; defobj = NULL; symlook_init(&req, name); req.ventry = ve; req.flags = flags | SYMLOOK_IN_PLT; req.lockstate = &lockstate; LD_UTRACE(UTRACE_DLSYM_START, handle, NULL, 0, 0, name); rlock_acquire(rtld_bind_lock, &lockstate); if (sigsetjmp(lockstate.env, 0) != 0) lock_upgrade(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_NEXT || handle == RTLD_DEFAULT || handle == RTLD_SELF) { if ((obj = obj_from_addr(retaddr)) == NULL) { _rtld_error("Cannot determine caller's shared object"); lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } if (handle == NULL) { /* Just the caller's shared object. */ res = symlook_obj(&req, obj); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } else if (handle == RTLD_NEXT || /* Objects after caller's */ handle == RTLD_SELF) { /* ... caller included */ if (handle == RTLD_NEXT) obj = obj->next; for (; obj != NULL; obj = obj->next) { res = symlook_obj(&req, obj); if (res == 0) { if (def == NULL || ELF_ST_BIND(req.sym_out->st_info) != STB_WEAK) { def = req.sym_out; defobj = req.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) { res = symlook_obj(&req, &obj_rtld); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } else { assert(handle == RTLD_DEFAULT); res = symlook_default(&req, obj); if (res == 0) { defobj = req.defobj_out; def = req.sym_out; } } } else { if ((obj = dlcheck(handle)) == NULL) { lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } donelist_init(&donelist); if (obj->mainprog) { /* Handle obtained by dlopen(NULL, ...) implies global scope. */ res = symlook_global(&req, &donelist); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) { res = symlook_obj(&req, &obj_rtld); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } else { /* Search the whole DAG rooted at the given object. */ res = symlook_list(&req, &obj->dagmembers, &donelist); if (res == 0) { def = req.sym_out; defobj = req.defobj_out; } } } if (def != NULL) { lock_release(rtld_bind_lock, &lockstate); /* * The value required by the caller is derived from the value * of the symbol. this is simply the relocated value of the * symbol. */ if (ELF_ST_TYPE(def->st_info) == STT_FUNC) sym = make_function_pointer(def, defobj); else if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC) sym = rtld_resolve_ifunc(defobj, def); else if (ELF_ST_TYPE(def->st_info) == STT_TLS) { ti.ti_module = defobj->tlsindex; ti.ti_offset = def->st_value; sym = __tls_get_addr(&ti); } else sym = defobj->relocbase + def->st_value; LD_UTRACE(UTRACE_DLSYM_STOP, handle, sym, 0, 0, name); return (sym); } _rtld_error("Undefined symbol \"%s\"", name); lock_release(rtld_bind_lock, &lockstate); LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name); return NULL; } void * dlsym(void *handle, const char *name) { return do_dlsym(handle, name, __builtin_return_address(0), NULL, SYMLOOK_DLSYM); } dlfunc_t dlfunc(void *handle, const char *name) { union { void *d; dlfunc_t f; } rv; rv.d = do_dlsym(handle, name, __builtin_return_address(0), NULL, SYMLOOK_DLSYM); return (rv.f); } void * dlvsym(void *handle, const char *name, const char *version) { Ver_Entry ventry; ventry.name = version; ventry.file = NULL; ventry.hash = elf_hash(version); ventry.flags= 0; return do_dlsym(handle, name, __builtin_return_address(0), &ventry, SYMLOOK_DLSYM); } int _rtld_addr_phdr(const void *addr, struct dl_phdr_info *phdr_info) { const Obj_Entry *obj; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return (0); } rtld_fill_dl_phdr_info(obj, phdr_info); lock_release(rtld_bind_lock, &lockstate); return (1); } int dladdr(const void *addr, Dl_info *info) { const Obj_Entry *obj; const Elf_Sym *def; void *symbol_addr; unsigned long symoffset; RtldLockState lockstate; rlock_acquire(rtld_bind_lock, &lockstate); obj = obj_from_addr(addr); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return 0; } info->dli_fname = obj->path; info->dli_fbase = obj->mapbase; info->dli_saddr = (void *)0; info->dli_sname = NULL; /* * Walk the symbol list looking for the symbol whose address is * closest to the address sent in. */ for (symoffset = 0; symoffset < obj->dynsymcount; symoffset++) { def = obj->symtab + symoffset; /* * For skip the symbol if st_shndx is either SHN_UNDEF or * SHN_COMMON. */ if (def->st_shndx == SHN_UNDEF || def->st_shndx == SHN_COMMON) continue; /* * If the symbol is greater than the specified address, or if it * is further away from addr than the current nearest symbol, * then reject it. */ symbol_addr = obj->relocbase + def->st_value; if (symbol_addr > addr || symbol_addr < info->dli_saddr) continue; /* Update our idea of the nearest symbol. */ info->dli_sname = obj->strtab + def->st_name; info->dli_saddr = symbol_addr; /* Exact match? */ if (info->dli_saddr == addr) break; } lock_release(rtld_bind_lock, &lockstate); return 1; } int dlinfo(void *handle, int request, void *p) { const Obj_Entry *obj; RtldLockState lockstate; int error; rlock_acquire(rtld_bind_lock, &lockstate); if (handle == NULL || handle == RTLD_SELF) { void *retaddr; retaddr = __builtin_return_address(0); /* __GNUC__ only */ if ((obj = obj_from_addr(retaddr)) == NULL) _rtld_error("Cannot determine caller's shared object"); } else obj = dlcheck(handle); if (obj == NULL) { lock_release(rtld_bind_lock, &lockstate); return (-1); } error = 0; switch (request) { case RTLD_DI_LINKMAP: *((struct link_map const **)p) = &obj->linkmap; break; case RTLD_DI_ORIGIN: error = rtld_dirname(obj->path, p); break; case RTLD_DI_SERINFOSIZE: case RTLD_DI_SERINFO: error = do_search_info(obj, request, (struct dl_serinfo *)p); break; default: _rtld_error("Invalid request %d passed to dlinfo()", request); error = -1; } lock_release(rtld_bind_lock, &lockstate); return (error); } static void rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info) { phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase; phdr_info->dlpi_name = obj->path; phdr_info->dlpi_phdr = obj->phdr; phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]); phdr_info->dlpi_tls_modid = obj->tlsindex; phdr_info->dlpi_tls_data = obj->tlsinit; phdr_info->dlpi_adds = obj_loads; phdr_info->dlpi_subs = obj_loads - obj_count; } int dl_iterate_phdr(__dl_iterate_hdr_callback callback, void *param) { struct dl_phdr_info phdr_info; const Obj_Entry *obj; RtldLockState bind_lockstate, phdr_lockstate; int error; wlock_acquire(rtld_phdr_lock, &phdr_lockstate); rlock_acquire(rtld_bind_lock, &bind_lockstate); error = 0; for (obj = obj_list; obj != NULL; obj = obj->next) { rtld_fill_dl_phdr_info(obj, &phdr_info); if ((error = callback(&phdr_info, sizeof phdr_info, param)) != 0) break; } if (error == 0) { rtld_fill_dl_phdr_info(&obj_rtld, &phdr_info); error = callback(&phdr_info, sizeof(phdr_info), param); } lock_release(rtld_bind_lock, &bind_lockstate); lock_release(rtld_phdr_lock, &phdr_lockstate); return (error); } static void * fill_search_info(const char *dir, size_t dirlen, void *param) { struct fill_search_info_args *arg; arg = param; if (arg->request == RTLD_DI_SERINFOSIZE) { arg->serinfo->dls_cnt ++; arg->serinfo->dls_size += sizeof(struct dl_serpath) + dirlen + 1; } else { struct dl_serpath *s_entry; s_entry = arg->serpath; s_entry->dls_name = arg->strspace; s_entry->dls_flags = arg->flags; strncpy(arg->strspace, dir, dirlen); arg->strspace[dirlen] = '\0'; arg->strspace += dirlen + 1; arg->serpath++; } return (NULL); } static int do_search_info(const Obj_Entry *obj, int request, struct dl_serinfo *info) { struct dl_serinfo _info; struct fill_search_info_args args; args.request = RTLD_DI_SERINFOSIZE; args.serinfo = &_info; _info.dls_size = __offsetof(struct dl_serinfo, dls_serpath); _info.dls_cnt = 0; path_enumerate(obj->rpath, fill_search_info, &args); path_enumerate(ld_library_path, fill_search_info, &args); path_enumerate(obj->runpath, fill_search_info, &args); path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args); if (!obj->z_nodeflib) path_enumerate(ld_standard_library_path, fill_search_info, &args); if (request == RTLD_DI_SERINFOSIZE) { info->dls_size = _info.dls_size; info->dls_cnt = _info.dls_cnt; return (0); } if (info->dls_cnt != _info.dls_cnt || info->dls_size != _info.dls_size) { _rtld_error("Uninitialized Dl_serinfo struct passed to dlinfo()"); return (-1); } args.request = RTLD_DI_SERINFO; args.serinfo = info; args.serpath = &info->dls_serpath[0]; args.strspace = (char *)&info->dls_serpath[_info.dls_cnt]; args.flags = LA_SER_RUNPATH; if (path_enumerate(obj->rpath, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_LIBPATH; if (path_enumerate(ld_library_path, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_RUNPATH; if (path_enumerate(obj->runpath, fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_CONFIG; if (path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args) != NULL) return (-1); args.flags = LA_SER_DEFAULT; if (!obj->z_nodeflib && path_enumerate(ld_standard_library_path, fill_search_info, &args) != NULL) return (-1); return (0); } static int rtld_dirname(const char *path, char *bname) { const char *endp; /* Empty or NULL string gets treated as "." */ if (path == NULL || *path == '\0') { bname[0] = '.'; bname[1] = '\0'; return (0); } /* Strip trailing slashes */ endp = path + strlen(path) - 1; while (endp > path && *endp == '/') endp--; /* Find the start of the dir */ while (endp > path && *endp != '/') endp--; /* Either the dir is "/" or there are no slashes */ if (endp == path) { bname[0] = *endp == '/' ? '/' : '.'; bname[1] = '\0'; return (0); } else { do { endp--; } while (endp > path && *endp == '/'); } if (endp - path + 2 > PATH_MAX) { _rtld_error("Filename is too long: %s", path); return(-1); } strncpy(bname, path, endp - path + 1); bname[endp - path + 1] = '\0'; return (0); } static int rtld_dirname_abs(const char *path, char *base) { char *last; if (realpath(path, base) == NULL) return (-1); dbg("%s -> %s", path, base); last = strrchr(base, '/'); if (last == NULL) return (-1); if (last != base) *last = '\0'; return (0); } static void linkmap_add(Obj_Entry *obj) { struct link_map *l = &obj->linkmap; struct link_map *prev; obj->linkmap.l_name = obj->path; obj->linkmap.l_addr = obj->mapbase; obj->linkmap.l_ld = obj->dynamic; #ifdef __mips__ /* GDB needs load offset on MIPS to use the symbols */ obj->linkmap.l_offs = obj->relocbase; #endif if (r_debug.r_map == NULL) { r_debug.r_map = l; return; } /* * Scan to the end of the list, but not past the entry for the * dynamic linker, which we want to keep at the very end. */ for (prev = r_debug.r_map; prev->l_next != NULL && prev->l_next != &obj_rtld.linkmap; prev = prev->l_next) ; /* Link in the new entry. */ l->l_prev = prev; l->l_next = prev->l_next; if (l->l_next != NULL) l->l_next->l_prev = l; prev->l_next = l; } static void linkmap_delete(Obj_Entry *obj) { struct link_map *l = &obj->linkmap; if (l->l_prev == NULL) { if ((r_debug.r_map = l->l_next) != NULL) l->l_next->l_prev = NULL; return; } if ((l->l_prev->l_next = l->l_next) != NULL) l->l_next->l_prev = l->l_prev; } /* * Function for the debugger to set a breakpoint on to gain control. * * The two parameters allow the debugger to easily find and determine * what the runtime loader is doing and to whom it is doing it. * * When the loadhook trap is hit (r_debug_state, set at program * initialization), the arguments can be found on the stack: * * +8 struct link_map *m * +4 struct r_debug *rd * +0 RetAddr */ void r_debug_state(struct r_debug* rd, struct link_map *m) { /* * The following is a hack to force the compiler to emit calls to * this function, even when optimizing. If the function is empty, * the compiler is not obliged to emit any code for calls to it, * even when marked __noinline. However, gdb depends on those * calls being made. */ __compiler_membar(); } /* * A function called after init routines have completed. This can be used to * break before a program's entry routine is called, and can be used when * main is not available in the symbol table. */ void _r_debug_postinit(struct link_map *m) { /* See r_debug_state(). */ __compiler_membar(); } /* * Get address of the pointer variable in the main program. * Prefer non-weak symbol over the weak one. */ static const void ** get_program_var_addr(const char *name, RtldLockState *lockstate) { SymLook req; DoneList donelist; symlook_init(&req, name); req.lockstate = lockstate; donelist_init(&donelist); if (symlook_global(&req, &donelist) != 0) return (NULL); if (ELF_ST_TYPE(req.sym_out->st_info) == STT_FUNC) return ((const void **)make_function_pointer(req.sym_out, req.defobj_out)); else if (ELF_ST_TYPE(req.sym_out->st_info) == STT_GNU_IFUNC) return ((const void **)rtld_resolve_ifunc(req.defobj_out, req.sym_out)); else return ((const void **)(req.defobj_out->relocbase + req.sym_out->st_value)); } /* * Set a pointer variable in the main program to the given value. This * is used to set key variables such as "environ" before any of the * init functions are called. */ static void set_program_var(const char *name, const void *value) { const void **addr; if ((addr = get_program_var_addr(name, NULL)) != NULL) { dbg("\"%s\": *%p <-- %p", name, addr, value); *addr = value; } } /* * Search the global objects, including dependencies and main object, * for the given symbol. */ static int symlook_global(SymLook *req, DoneList *donelist) { SymLook req1; const Objlist_Entry *elm; int res; symlook_init_from_req(&req1, req); /* Search all objects loaded at program start up. */ if (req->defobj_out == NULL || ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) { res = symlook_list(&req1, &list_main, donelist); if (res == 0 && (req->defobj_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } /* Search all DAGs whose roots are RTLD_GLOBAL objects. */ STAILQ_FOREACH(elm, &list_global, link) { if (req->defobj_out != NULL && ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK) break; res = symlook_list(&req1, &elm->obj->dagmembers, donelist); if (res == 0 && (req->defobj_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } return (req->sym_out != NULL ? 0 : ESRCH); } /* * Given a symbol name in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the * defining object via the reference parameter DEFOBJ_OUT. */ static int symlook_default(SymLook *req, const Obj_Entry *refobj) { DoneList donelist; const Objlist_Entry *elm; SymLook req1; int res; donelist_init(&donelist); symlook_init_from_req(&req1, req); /* Look first in the referencing object if linked symbolically. */ if (refobj->symbolic && !donelist_check(&donelist, refobj)) { res = symlook_obj(&req1, refobj); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } symlook_global(req, &donelist); /* Search all dlopened DAGs containing the referencing object. */ STAILQ_FOREACH(elm, &refobj->dldags, link) { if (req->sym_out != NULL && ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK) break; res = symlook_list(&req1, &elm->obj->dagmembers, &donelist); if (res == 0 && (req->sym_out == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } /* * Search the dynamic linker itself, and possibly resolve the * symbol from there. This is how the application links to * dynamic linker services such as dlopen. */ if (req->sym_out == NULL || ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) { res = symlook_obj(&req1, &obj_rtld); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; assert(req->defobj_out != NULL); } } return (req->sym_out != NULL ? 0 : ESRCH); } static int symlook_list(SymLook *req, const Objlist *objlist, DoneList *dlp) { const Elf_Sym *def; const Obj_Entry *defobj; const Objlist_Entry *elm; SymLook req1; int res; def = NULL; defobj = NULL; STAILQ_FOREACH(elm, objlist, link) { if (donelist_check(dlp, elm->obj)) continue; symlook_init_from_req(&req1, req); if ((res = symlook_obj(&req1, elm->obj)) == 0) { if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) { def = req1.sym_out; defobj = req1.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } } if (def != NULL) { req->sym_out = def; req->defobj_out = defobj; return (0); } return (ESRCH); } /* * Search the chain of DAGS cointed to by the given Needed_Entry * for a symbol of the given name. Each DAG is scanned completely * before advancing to the next one. Returns a pointer to the symbol, * or NULL if no definition was found. */ static int symlook_needed(SymLook *req, const Needed_Entry *needed, DoneList *dlp) { const Elf_Sym *def; const Needed_Entry *n; const Obj_Entry *defobj; SymLook req1; int res; def = NULL; defobj = NULL; symlook_init_from_req(&req1, req); for (n = needed; n != NULL; n = n->next) { if (n->obj == NULL || (res = symlook_list(&req1, &n->obj->dagmembers, dlp)) != 0) continue; if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) { def = req1.sym_out; defobj = req1.defobj_out; if (ELF_ST_BIND(def->st_info) != STB_WEAK) break; } } if (def != NULL) { req->sym_out = def; req->defobj_out = defobj; return (0); } return (ESRCH); } /* * Search the symbol table of a single shared object for a symbol of * the given name and version, if requested. Returns a pointer to the * symbol, or NULL if no definition was found. If the object is * filter, return filtered symbol from filtee. * * The symbol's hash value is passed in for efficiency reasons; that * eliminates many recomputations of the hash value. */ int symlook_obj(SymLook *req, const Obj_Entry *obj) { DoneList donelist; SymLook req1; int flags, res, mres; /* * If there is at least one valid hash at this point, we prefer to * use the faster GNU version if available. */ if (obj->valid_hash_gnu) mres = symlook_obj1_gnu(req, obj); else if (obj->valid_hash_sysv) mres = symlook_obj1_sysv(req, obj); else return (EINVAL); if (mres == 0) { if (obj->needed_filtees != NULL) { flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0; load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate); donelist_init(&donelist); symlook_init_from_req(&req1, req); res = symlook_needed(&req1, obj->needed_filtees, &donelist); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; } return (res); } if (obj->needed_aux_filtees != NULL) { flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0; load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate); donelist_init(&donelist); symlook_init_from_req(&req1, req); res = symlook_needed(&req1, obj->needed_aux_filtees, &donelist); if (res == 0) { req->sym_out = req1.sym_out; req->defobj_out = req1.defobj_out; return (res); } } } return (mres); } /* Symbol match routine common to both hash functions */ static bool matched_symbol(SymLook *req, const Obj_Entry *obj, Sym_Match_Result *result, const unsigned long symnum) { Elf_Versym verndx; const Elf_Sym *symp; const char *strp; symp = obj->symtab + symnum; strp = obj->strtab + symp->st_name; switch (ELF_ST_TYPE(symp->st_info)) { case STT_FUNC: case STT_NOTYPE: case STT_OBJECT: case STT_COMMON: case STT_GNU_IFUNC: if (symp->st_value == 0) return (false); /* fallthrough */ case STT_TLS: if (symp->st_shndx != SHN_UNDEF) break; #ifndef __mips__ else if (((req->flags & SYMLOOK_IN_PLT) == 0) && (ELF_ST_TYPE(symp->st_info) == STT_FUNC)) break; /* fallthrough */ #endif default: return (false); } if (req->name[0] != strp[0] || strcmp(req->name, strp) != 0) return (false); if (req->ventry == NULL) { if (obj->versyms != NULL) { verndx = VER_NDX(obj->versyms[symnum]); if (verndx > obj->vernum) { _rtld_error( "%s: symbol %s references wrong version %d", obj->path, obj->strtab + symnum, verndx); return (false); } /* * If we are not called from dlsym (i.e. this * is a normal relocation from unversioned * binary), accept the symbol immediately if * it happens to have first version after this * shared object became versioned. Otherwise, * if symbol is versioned and not hidden, * remember it. If it is the only symbol with * this name exported by the shared object, it * will be returned as a match by the calling * function. If symbol is global (verndx < 2) * accept it unconditionally. */ if ((req->flags & SYMLOOK_DLSYM) == 0 && verndx == VER_NDX_GIVEN) { result->sym_out = symp; return (true); } else if (verndx >= VER_NDX_GIVEN) { if ((obj->versyms[symnum] & VER_NDX_HIDDEN) == 0) { if (result->vsymp == NULL) result->vsymp = symp; result->vcount++; } return (false); } } result->sym_out = symp; return (true); } if (obj->versyms == NULL) { if (object_match_name(obj, req->ventry->name)) { _rtld_error("%s: object %s should provide version %s " "for symbol %s", obj_rtld.path, obj->path, req->ventry->name, obj->strtab + symnum); return (false); } } else { verndx = VER_NDX(obj->versyms[symnum]); if (verndx > obj->vernum) { _rtld_error("%s: symbol %s references wrong version %d", obj->path, obj->strtab + symnum, verndx); return (false); } if (obj->vertab[verndx].hash != req->ventry->hash || strcmp(obj->vertab[verndx].name, req->ventry->name)) { /* * Version does not match. Look if this is a * global symbol and if it is not hidden. If * global symbol (verndx < 2) is available, * use it. Do not return symbol if we are * called by dlvsym, because dlvsym looks for * a specific version and default one is not * what dlvsym wants. */ if ((req->flags & SYMLOOK_DLSYM) || (verndx >= VER_NDX_GIVEN) || (obj->versyms[symnum] & VER_NDX_HIDDEN)) return (false); } } result->sym_out = symp; return (true); } /* * Search for symbol using SysV hash function. * obj->buckets is known not to be NULL at this point; the test for this was * performed with the obj->valid_hash_sysv assignment. */ static int symlook_obj1_sysv(SymLook *req, const Obj_Entry *obj) { unsigned long symnum; Sym_Match_Result matchres; matchres.sym_out = NULL; matchres.vsymp = NULL; matchres.vcount = 0; for (symnum = obj->buckets[req->hash % obj->nbuckets]; symnum != STN_UNDEF; symnum = obj->chains[symnum]) { if (symnum >= obj->nchains) return (ESRCH); /* Bad object */ if (matched_symbol(req, obj, &matchres, symnum)) { req->sym_out = matchres.sym_out; req->defobj_out = obj; return (0); } } if (matchres.vcount == 1) { req->sym_out = matchres.vsymp; req->defobj_out = obj; return (0); } return (ESRCH); } /* Search for symbol using GNU hash function */ static int symlook_obj1_gnu(SymLook *req, const Obj_Entry *obj) { Elf_Addr bloom_word; const Elf32_Word *hashval; Elf32_Word bucket; Sym_Match_Result matchres; unsigned int h1, h2; unsigned long symnum; matchres.sym_out = NULL; matchres.vsymp = NULL; matchres.vcount = 0; /* Pick right bitmask word from Bloom filter array */ bloom_word = obj->bloom_gnu[(req->hash_gnu / __ELF_WORD_SIZE) & obj->maskwords_bm_gnu]; /* Calculate modulus word size of gnu hash and its derivative */ h1 = req->hash_gnu & (__ELF_WORD_SIZE - 1); h2 = ((req->hash_gnu >> obj->shift2_gnu) & (__ELF_WORD_SIZE - 1)); /* Filter out the "definitely not in set" queries */ if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0) return (ESRCH); /* Locate hash chain and corresponding value element*/ bucket = obj->buckets_gnu[req->hash_gnu % obj->nbuckets_gnu]; if (bucket == 0) return (ESRCH); hashval = &obj->chain_zero_gnu[bucket]; do { if (((*hashval ^ req->hash_gnu) >> 1) == 0) { symnum = hashval - obj->chain_zero_gnu; if (matched_symbol(req, obj, &matchres, symnum)) { req->sym_out = matchres.sym_out; req->defobj_out = obj; return (0); } } } while ((*hashval++ & 1) == 0); if (matchres.vcount == 1) { req->sym_out = matchres.vsymp; req->defobj_out = obj; return (0); } return (ESRCH); } static void trace_loaded_objects(Obj_Entry *obj) { char *fmt1, *fmt2, *fmt, *main_local, *list_containers; int c; if ((main_local = getenv(_LD("TRACE_LOADED_OBJECTS_PROGNAME"))) == NULL) main_local = ""; if ((fmt1 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT1"))) == NULL) fmt1 = "\t%o => %p (%x)\n"; if ((fmt2 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT2"))) == NULL) fmt2 = "\t%o (%x)\n"; list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL")); for (; obj; obj = obj->next) { Needed_Entry *needed; char *name, *path; bool is_lib; if (list_containers && obj->needed != NULL) rtld_printf("%s:\n", obj->path); for (needed = obj->needed; needed; needed = needed->next) { if (needed->obj != NULL) { if (needed->obj->traced && !list_containers) continue; needed->obj->traced = true; path = needed->obj->path; } else path = "not found"; name = (char *)obj->strtab + needed->name; is_lib = strncmp(name, "lib", 3) == 0; /* XXX - bogus */ fmt = is_lib ? fmt1 : fmt2; while ((c = *fmt++) != '\0') { switch (c) { default: rtld_putchar(c); continue; case '\\': switch (c = *fmt) { case '\0': continue; case 'n': rtld_putchar('\n'); break; case 't': rtld_putchar('\t'); break; } break; case '%': switch (c = *fmt) { case '\0': continue; case '%': default: rtld_putchar(c); break; case 'A': rtld_putstr(main_local); break; case 'a': rtld_putstr(obj_main->path); break; case 'o': rtld_putstr(name); break; #if 0 case 'm': rtld_printf("%d", sodp->sod_major); break; case 'n': rtld_printf("%d", sodp->sod_minor); break; #endif case 'p': rtld_putstr(path); break; case 'x': rtld_printf("%p", needed->obj ? needed->obj->mapbase : 0); break; } break; } ++fmt; } } } } /* * Unload a dlopened object and its dependencies from memory and from * our data structures. It is assumed that the DAG rooted in the * object has already been unreferenced, and that the object has a * reference count of 0. */ static void unload_object(Obj_Entry *root) { Obj_Entry *obj; Obj_Entry **linkp; assert(root->refcount == 0); /* * Pass over the DAG removing unreferenced objects from * appropriate lists. */ unlink_object(root); /* Unmap all objects that are no longer referenced. */ linkp = &obj_list->next; while ((obj = *linkp) != NULL) { if (obj->refcount == 0) { LD_UTRACE(UTRACE_UNLOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0, obj->path); dbg("unloading \"%s\"", obj->path); unload_filtees(root); munmap(obj->mapbase, obj->mapsize); linkmap_delete(obj); *linkp = obj->next; obj_count--; obj_free(obj); } else linkp = &obj->next; } obj_tail = linkp; } static void unlink_object(Obj_Entry *root) { Objlist_Entry *elm; if (root->refcount == 0) { /* Remove the object from the RTLD_GLOBAL list. */ objlist_remove(&list_global, root); /* Remove the object from all objects' DAG lists. */ STAILQ_FOREACH(elm, &root->dagmembers, link) { objlist_remove(&elm->obj->dldags, root); if (elm->obj != root) unlink_object(elm->obj); } } } static void ref_dag(Obj_Entry *root) { Objlist_Entry *elm; assert(root->dag_inited); STAILQ_FOREACH(elm, &root->dagmembers, link) elm->obj->refcount++; } static void unref_dag(Obj_Entry *root) { Objlist_Entry *elm; assert(root->dag_inited); STAILQ_FOREACH(elm, &root->dagmembers, link) elm->obj->refcount--; } /* * Common code for MD __tls_get_addr(). */ static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline; static void * tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset) { Elf_Addr *newdtv, *dtv; RtldLockState lockstate; int to_copy; dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (dtv[0] != tls_dtv_generation) { wlock_acquire(rtld_bind_lock, &lockstate); newdtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); to_copy = dtv[1]; if (to_copy > tls_max_index) to_copy = tls_max_index; memcpy(&newdtv[2], &dtv[2], to_copy * sizeof(Elf_Addr)); newdtv[0] = tls_dtv_generation; newdtv[1] = tls_max_index; free(dtv); lock_release(rtld_bind_lock, &lockstate); dtv = *dtvp = newdtv; } /* Dynamically allocate module TLS if necessary */ if (dtv[index + 1] == 0) { /* Signal safe, wlock will block out signals. */ wlock_acquire(rtld_bind_lock, &lockstate); if (!dtv[index + 1]) dtv[index + 1] = (Elf_Addr)allocate_module_tls(index); lock_release(rtld_bind_lock, &lockstate); } return ((void *)(dtv[index + 1] + offset)); } void * tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset) { Elf_Addr *dtv; dtv = *dtvp; /* Check dtv generation in case new modules have arrived */ if (__predict_true(dtv[0] == tls_dtv_generation && dtv[index + 1] != 0)) return ((void *)(dtv[index + 1] + offset)); return (tls_get_addr_slow(dtvp, index, offset)); } #if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || \ defined(__powerpc__) || defined(__riscv__) /* * Allocate Static TLS using the Variant I method. */ void * allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; char *tcb; Elf_Addr **tls; Elf_Addr *dtv; Elf_Addr addr; int i; if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); assert(tcbsize >= TLS_TCB_SIZE); tcb = xcalloc(1, tls_static_space - TLS_TCB_SIZE + tcbsize); tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE); if (oldtcb != NULL) { memcpy(tls, oldtcb, tls_static_space); free(oldtcb); /* Adjust the DTV. */ dtv = tls[0]; for (i = 0; i < dtv[1]; i++) { if (dtv[i+2] >= (Elf_Addr)oldtcb && dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) { dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tls; } } } else { dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); tls[0] = dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; for (obj = objs; obj; obj = obj->next) { if (obj->tlsoffset > 0) { addr = (Elf_Addr)tls + obj->tlsoffset; if (obj->tlsinitsize > 0) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); if (obj->tlssize > obj->tlsinitsize) memset((void*) (addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); dtv[obj->tlsindex + 1] = addr; } } } return (tcb); } void free_tls(void *tcb, size_t tcbsize, size_t tcbalign) { Elf_Addr *dtv; Elf_Addr tlsstart, tlsend; int dtvsize, i; assert(tcbsize >= TLS_TCB_SIZE); tlsstart = (Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE; tlsend = tlsstart + tls_static_space; dtv = *(Elf_Addr **)tlsstart; dtvsize = dtv[1]; for (i = 0; i < dtvsize; i++) { if (dtv[i+2] && (dtv[i+2] < tlsstart || dtv[i+2] >= tlsend)) { free((void*)dtv[i+2]); } } free(dtv); free(tcb); } #endif #if defined(__i386__) || defined(__amd64__) || defined(__sparc64__) /* * Allocate Static TLS using the Variant II method. */ void * allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; size_t size, ralign; char *tls; Elf_Addr *dtv, *olddtv; Elf_Addr segbase, oldsegbase, addr; int i; ralign = tcbalign; if (tls_static_max_align > ralign) ralign = tls_static_max_align; size = round(tls_static_space, ralign) + round(tcbsize, ralign); assert(tcbsize >= 2*sizeof(Elf_Addr)); tls = malloc_aligned(size, ralign); dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); segbase = (Elf_Addr)(tls + round(tls_static_space, ralign)); ((Elf_Addr*)segbase)[0] = segbase; ((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; if (oldtls) { /* * Copy the static TLS block over whole. */ oldsegbase = (Elf_Addr) oldtls; memcpy((void *)(segbase - tls_static_space), (const void *)(oldsegbase - tls_static_space), tls_static_space); /* * If any dynamic TLS blocks have been created tls_get_addr(), * move them over. */ olddtv = ((Elf_Addr**)oldsegbase)[1]; for (i = 0; i < olddtv[1]; i++) { if (olddtv[i+2] < oldsegbase - size || olddtv[i+2] > oldsegbase) { dtv[i+2] = olddtv[i+2]; olddtv[i+2] = 0; } } /* * We assume that this block was the one we created with * allocate_initial_tls(). */ free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr)); } else { for (obj = objs; obj; obj = obj->next) { if (obj->tlsoffset) { addr = segbase - obj->tlsoffset; memset((void*) (addr + obj->tlsinitsize), 0, obj->tlssize - obj->tlsinitsize); if (obj->tlsinit) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); dtv[obj->tlsindex + 1] = addr; } } } return (void*) segbase; } void free_tls(void *tls, size_t tcbsize, size_t tcbalign) { Elf_Addr* dtv; size_t size, ralign; int dtvsize, i; Elf_Addr tlsstart, tlsend; /* * Figure out the size of the initial TLS block so that we can * find stuff which ___tls_get_addr() allocated dynamically. */ ralign = tcbalign; if (tls_static_max_align > ralign) ralign = tls_static_max_align; size = round(tls_static_space, ralign); dtv = ((Elf_Addr**)tls)[1]; dtvsize = dtv[1]; tlsend = (Elf_Addr) tls; tlsstart = tlsend - size; for (i = 0; i < dtvsize; i++) { if (dtv[i + 2] != 0 && (dtv[i + 2] < tlsstart || dtv[i + 2] > tlsend)) { free_aligned((void *)dtv[i + 2]); } } free_aligned((void *)tlsstart); free((void*) dtv); } #endif /* * Allocate TLS block for module with given index. */ void * allocate_module_tls(int index) { Obj_Entry* obj; char* p; for (obj = obj_list; obj; obj = obj->next) { if (obj->tlsindex == index) break; } if (!obj) { _rtld_error("Can't find module with TLS index %d", index); rtld_die(); } p = malloc_aligned(obj->tlssize, obj->tlsalign); memcpy(p, obj->tlsinit, obj->tlsinitsize); memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); return p; } bool allocate_tls_offset(Obj_Entry *obj) { size_t off; if (obj->tls_done) return true; if (obj->tlssize == 0) { obj->tls_done = true; return true; } if (tls_last_offset == 0) off = calculate_first_tls_offset(obj->tlssize, obj->tlsalign); else off = calculate_tls_offset(tls_last_offset, tls_last_size, obj->tlssize, obj->tlsalign); /* * If we have already fixed the size of the static TLS block, we * must stay within that size. When allocating the static TLS, we * leave a small amount of space spare to be used for dynamically * loading modules which use static TLS. */ if (tls_static_space != 0) { if (calculate_tls_end(off, obj->tlssize) > tls_static_space) return false; } else if (obj->tlsalign > tls_static_max_align) { tls_static_max_align = obj->tlsalign; } tls_last_offset = obj->tlsoffset = off; tls_last_size = obj->tlssize; obj->tls_done = true; return true; } void free_tls_offset(Obj_Entry *obj) { /* * If we were the last thing to allocate out of the static TLS * block, we give our space back to the 'allocator'. This is a * simplistic workaround to allow libGL.so.1 to be loaded and * unloaded multiple times. */ if (calculate_tls_end(obj->tlsoffset, obj->tlssize) == calculate_tls_end(tls_last_offset, tls_last_size)) { tls_last_offset -= obj->tlssize; tls_last_size = 0; } } void * _rtld_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign) { void *ret; RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); ret = allocate_tls(obj_list, oldtls, tcbsize, tcbalign); lock_release(rtld_bind_lock, &lockstate); return (ret); } void _rtld_free_tls(void *tcb, size_t tcbsize, size_t tcbalign) { RtldLockState lockstate; wlock_acquire(rtld_bind_lock, &lockstate); free_tls(tcb, tcbsize, tcbalign); lock_release(rtld_bind_lock, &lockstate); } static void object_add_name(Obj_Entry *obj, const char *name) { Name_Entry *entry; size_t len; len = strlen(name); entry = malloc(sizeof(Name_Entry) + len); if (entry != NULL) { strcpy(entry->name, name); STAILQ_INSERT_TAIL(&obj->names, entry, link); } } static int object_match_name(const Obj_Entry *obj, const char *name) { Name_Entry *entry; STAILQ_FOREACH(entry, &obj->names, link) { if (strcmp(name, entry->name) == 0) return (1); } return (0); } static Obj_Entry * locate_dependency(const Obj_Entry *obj, const char *name) { const Objlist_Entry *entry; const Needed_Entry *needed; STAILQ_FOREACH(entry, &list_main, link) { if (object_match_name(entry->obj, name)) return entry->obj; } for (needed = obj->needed; needed != NULL; needed = needed->next) { if (strcmp(obj->strtab + needed->name, name) == 0 || (needed->obj != NULL && object_match_name(needed->obj, name))) { /* * If there is DT_NEEDED for the name we are looking for, * we are all set. Note that object might not be found if * dependency was not loaded yet, so the function can * return NULL here. This is expected and handled * properly by the caller. */ return (needed->obj); } } _rtld_error("%s: Unexpected inconsistency: dependency %s not found", obj->path, name); rtld_die(); } static int check_object_provided_version(Obj_Entry *refobj, const Obj_Entry *depobj, const Elf_Vernaux *vna) { const Elf_Verdef *vd; const char *vername; vername = refobj->strtab + vna->vna_name; vd = depobj->verdef; if (vd == NULL) { _rtld_error("%s: version %s required by %s not defined", depobj->path, vername, refobj->path); return (-1); } for (;;) { if (vd->vd_version != VER_DEF_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verdef entry", depobj->path, vd->vd_version); return (-1); } if (vna->vna_hash == vd->vd_hash) { const Elf_Verdaux *aux = (const Elf_Verdaux *) ((char *)vd + vd->vd_aux); if (strcmp(vername, depobj->strtab + aux->vda_name) == 0) return (0); } if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } if (vna->vna_flags & VER_FLG_WEAK) return (0); _rtld_error("%s: version %s required by %s not found", depobj->path, vername, refobj->path); return (-1); } static int rtld_verify_object_versions(Obj_Entry *obj) { const Elf_Verneed *vn; const Elf_Verdef *vd; const Elf_Verdaux *vda; const Elf_Vernaux *vna; const Obj_Entry *depobj; int maxvernum, vernum; if (obj->ver_checked) return (0); obj->ver_checked = true; maxvernum = 0; /* * Walk over defined and required version records and figure out * max index used by any of them. Do very basic sanity checking * while there. */ vn = obj->verneed; while (vn != NULL) { if (vn->vn_version != VER_NEED_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verneed entry", obj->path, vn->vn_version); return (-1); } vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux); for (;;) { vernum = VER_NEED_IDX(vna->vna_other); if (vernum > maxvernum) maxvernum = vernum; if (vna->vna_next == 0) break; vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next); } if (vn->vn_next == 0) break; vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next); } vd = obj->verdef; while (vd != NULL) { if (vd->vd_version != VER_DEF_CURRENT) { _rtld_error("%s: Unsupported version %d of Elf_Verdef entry", obj->path, vd->vd_version); return (-1); } vernum = VER_DEF_IDX(vd->vd_ndx); if (vernum > maxvernum) maxvernum = vernum; if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } if (maxvernum == 0) return (0); /* * Store version information in array indexable by version index. * Verify that object version requirements are satisfied along the * way. */ obj->vernum = maxvernum + 1; obj->vertab = xcalloc(obj->vernum, sizeof(Ver_Entry)); vd = obj->verdef; while (vd != NULL) { if ((vd->vd_flags & VER_FLG_BASE) == 0) { vernum = VER_DEF_IDX(vd->vd_ndx); assert(vernum <= maxvernum); vda = (const Elf_Verdaux *)((char *)vd + vd->vd_aux); obj->vertab[vernum].hash = vd->vd_hash; obj->vertab[vernum].name = obj->strtab + vda->vda_name; obj->vertab[vernum].file = NULL; obj->vertab[vernum].flags = 0; } if (vd->vd_next == 0) break; vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next); } vn = obj->verneed; while (vn != NULL) { depobj = locate_dependency(obj, obj->strtab + vn->vn_file); if (depobj == NULL) return (-1); vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux); for (;;) { if (check_object_provided_version(obj, depobj, vna)) return (-1); vernum = VER_NEED_IDX(vna->vna_other); assert(vernum <= maxvernum); obj->vertab[vernum].hash = vna->vna_hash; obj->vertab[vernum].name = obj->strtab + vna->vna_name; obj->vertab[vernum].file = obj->strtab + vn->vn_file; obj->vertab[vernum].flags = (vna->vna_other & VER_NEED_HIDDEN) ? VER_INFO_HIDDEN : 0; if (vna->vna_next == 0) break; vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next); } if (vn->vn_next == 0) break; vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next); } return 0; } static int rtld_verify_versions(const Objlist *objlist) { Objlist_Entry *entry; int rc; rc = 0; STAILQ_FOREACH(entry, objlist, link) { /* * Skip dummy objects or objects that have their version requirements * already checked. */ if (entry->obj->strtab == NULL || entry->obj->vertab != NULL) continue; if (rtld_verify_object_versions(entry->obj) == -1) { rc = -1; if (ld_tracing == NULL) break; } } if (rc == 0 || ld_tracing != NULL) rc = rtld_verify_object_versions(&obj_rtld); return rc; } const Ver_Entry * fetch_ventry(const Obj_Entry *obj, unsigned long symnum) { Elf_Versym vernum; if (obj->vertab) { vernum = VER_NDX(obj->versyms[symnum]); if (vernum >= obj->vernum) { _rtld_error("%s: symbol %s has wrong verneed value %d", obj->path, obj->strtab + symnum, vernum); } else if (obj->vertab[vernum].hash != 0) { return &obj->vertab[vernum]; } } return NULL; } int _rtld_get_stack_prot(void) { return (stack_prot); } int _rtld_is_dlopened(void *arg) { Obj_Entry *obj; RtldLockState lockstate; int res; rlock_acquire(rtld_bind_lock, &lockstate); obj = dlcheck(arg); if (obj == NULL) obj = obj_from_addr(arg); if (obj == NULL) { _rtld_error("No shared object contains address"); lock_release(rtld_bind_lock, &lockstate); return (-1); } res = obj->dlopened ? 1 : 0; lock_release(rtld_bind_lock, &lockstate); return (res); } static void map_stacks_exec(RtldLockState *lockstate) { void (*thr_map_stacks_exec)(void); if ((max_stack_flags & PF_X) == 0 || (stack_prot & PROT_EXEC) != 0) return; thr_map_stacks_exec = (void (*)(void))(uintptr_t) get_program_var_addr("__pthread_map_stacks_exec", lockstate); if (thr_map_stacks_exec != NULL) { stack_prot |= PROT_EXEC; thr_map_stacks_exec(); } } void symlook_init(SymLook *dst, const char *name) { bzero(dst, sizeof(*dst)); dst->name = name; dst->hash = elf_hash(name); dst->hash_gnu = gnu_hash(name); } static void symlook_init_from_req(SymLook *dst, const SymLook *src) { dst->name = src->name; dst->hash = src->hash; dst->hash_gnu = src->hash_gnu; dst->ventry = src->ventry; dst->flags = src->flags; dst->defobj_out = NULL; dst->sym_out = NULL; dst->lockstate = src->lockstate; } /* * Parse a file descriptor number without pulling in more of libc (e.g. atoi). */ static int parse_libdir(const char *str) { static const int RADIX = 10; /* XXXJA: possibly support hex? */ const char *orig; int fd; char c; orig = str; fd = 0; for (c = *str; c != '\0'; c = *++str) { if (c < '0' || c > '9') return (-1); fd *= RADIX; fd += c - '0'; } /* Make sure we actually parsed something. */ if (str == orig) { _rtld_error("failed to parse directory FD from '%s'", str); return (-1); } return (fd); } /* * Overrides for libc_pic-provided functions. */ int __getosreldate(void) { size_t len; int oid[2]; int error, osrel; if (osreldate != 0) return (osreldate); oid[0] = CTL_KERN; oid[1] = KERN_OSRELDATE; osrel = 0; len = sizeof(osrel); error = sysctl(oid, 2, &osrel, &len, NULL, 0); if (error == 0 && osrel > 0 && len == sizeof(osrel)) osreldate = osrel; return (osreldate); } void exit(int status) { _exit(status); } void (*__cleanup)(void); int __isthreaded = 0; int _thread_autoinit_dummy_decl = 1; /* * No unresolved symbols for rtld. */ void __pthread_cxa_finalize(struct dl_phdr_info *a) { } void __stack_chk_fail(void) { _rtld_error("stack overflow detected; terminated"); rtld_die(); } __weak_reference(__stack_chk_fail, __stack_chk_fail_local); void __chk_fail(void) { _rtld_error("buffer overflow detected; terminated"); rtld_die(); } const char * rtld_strerror(int errnum) { if (errnum < 0 || errnum >= sys_nerr) return ("Unknown error"); return (sys_errlist[errnum]); } Index: projects/sendfile/libexec/rtld-elf/rtld.h =================================================================== --- projects/sendfile/libexec/rtld-elf/rtld.h (revision 293405) +++ projects/sendfile/libexec/rtld-elf/rtld.h (revision 293406) @@ -1,397 +1,396 @@ /*- * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef RTLD_H /* { */ #define RTLD_H 1 #include #include #include #include #include #include #include #include #include "rtld_lock.h" #include "rtld_machdep.h" #define NEW(type) ((type *) xmalloc(sizeof(type))) #define CNEW(type) ((type *) xcalloc(1, sizeof(type))) /* We might as well do booleans like C++. */ typedef unsigned char bool; #define false 0 #define true 1 extern size_t tls_last_offset; extern size_t tls_last_size; extern size_t tls_static_space; extern int tls_dtv_generation; extern int tls_max_index; extern int npagesizes; extern size_t *pagesizes; extern int main_argc; extern char **main_argv; extern char **environ; struct stat; struct Struct_Obj_Entry; /* Lists of shared objects */ typedef struct Struct_Objlist_Entry { STAILQ_ENTRY(Struct_Objlist_Entry) link; struct Struct_Obj_Entry *obj; } Objlist_Entry; typedef STAILQ_HEAD(Struct_Objlist, Struct_Objlist_Entry) Objlist; /* Types of init and fini functions */ typedef void (*InitFunc)(void); typedef void (*InitArrFunc)(int, char **, char **); /* Lists of shared object dependencies */ typedef struct Struct_Needed_Entry { struct Struct_Needed_Entry *next; struct Struct_Obj_Entry *obj; unsigned long name; /* Offset of name in string table */ } Needed_Entry; typedef struct Struct_Name_Entry { STAILQ_ENTRY(Struct_Name_Entry) link; char name[1]; } Name_Entry; /* Lock object */ typedef struct Struct_LockInfo { void *context; /* Client context for creating locks */ void *thelock; /* The one big lock */ /* Debugging aids. */ volatile int rcount; /* Number of readers holding lock */ volatile int wcount; /* Number of writers holding lock */ /* Methods */ void *(*lock_create)(void *context); void (*rlock_acquire)(void *lock); void (*wlock_acquire)(void *lock); void (*rlock_release)(void *lock); void (*wlock_release)(void *lock); void (*lock_destroy)(void *lock); void (*context_destroy)(void *context); } LockInfo; typedef struct Struct_Ver_Entry { Elf_Word hash; unsigned int flags; const char *name; const char *file; } Ver_Entry; typedef struct Struct_Sym_Match_Result { const Elf_Sym *sym_out; const Elf_Sym *vsymp; int vcount; } Sym_Match_Result; #define VER_INFO_HIDDEN 0x01 /* * Shared object descriptor. * * Items marked with "(%)" are dynamically allocated, and must be freed * when the structure is destroyed. * * CAUTION: It appears that the JDK port peeks into these structures. * It looks at "next" and "mapbase" at least. Don't add new members * near the front, until this can be straightened out. */ typedef struct Struct_Obj_Entry { /* * These two items have to be set right for compatibility with the * original ElfKit crt1.o. */ Elf_Size magic; /* Magic number (sanity check) */ Elf_Size version; /* Version number of struct format */ struct Struct_Obj_Entry *next; char *path; /* Pathname of underlying file (%) */ char *origin_path; /* Directory path of origin file */ int refcount; int dl_refcount; /* Number of times loaded by dlopen */ /* These items are computed by map_object() or by digest_phdr(). */ caddr_t mapbase; /* Base address of mapped region */ size_t mapsize; /* Size of mapped region in bytes */ size_t textsize; /* Size of text segment in bytes */ Elf_Addr vaddrbase; /* Base address in shared object file */ caddr_t relocbase; /* Relocation constant = mapbase - vaddrbase */ const Elf_Dyn *dynamic; /* Dynamic section */ caddr_t entry; /* Entry point */ const Elf_Phdr *phdr; /* Program header if it is mapped, else NULL */ size_t phsize; /* Size of program header in bytes */ const char *interp; /* Pathname of the interpreter, if any */ Elf_Word stack_flags; /* TLS information */ int tlsindex; /* Index in DTV for this module */ void *tlsinit; /* Base address of TLS init block */ size_t tlsinitsize; /* Size of TLS init block for this module */ size_t tlssize; /* Size of TLS block for this module */ size_t tlsoffset; /* Offset of static TLS block for this module */ size_t tlsalign; /* Alignment of static TLS block */ caddr_t relro_page; size_t relro_size; /* Items from the dynamic section. */ Elf_Addr *pltgot; /* PLT or GOT, depending on architecture */ const Elf_Rel *rel; /* Relocation entries */ unsigned long relsize; /* Size in bytes of relocation info */ const Elf_Rela *rela; /* Relocation entries with addend */ unsigned long relasize; /* Size in bytes of addend relocation info */ const Elf_Rel *pltrel; /* PLT relocation entries */ unsigned long pltrelsize; /* Size in bytes of PLT relocation info */ const Elf_Rela *pltrela; /* PLT relocation entries with addend */ unsigned long pltrelasize; /* Size in bytes of PLT addend reloc info */ const Elf_Sym *symtab; /* Symbol table */ const char *strtab; /* String table */ unsigned long strsize; /* Size in bytes of string table */ #ifdef __mips__ Elf_Word local_gotno; /* Number of local GOT entries */ Elf_Word symtabno; /* Number of dynamic symbols */ Elf_Word gotsym; /* First dynamic symbol in GOT */ #endif #ifdef __powerpc64__ Elf_Addr glink; /* GLINK PLT call stub section */ #endif const Elf_Verneed *verneed; /* Required versions. */ Elf_Word verneednum; /* Number of entries in verneed table */ const Elf_Verdef *verdef; /* Provided versions. */ Elf_Word verdefnum; /* Number of entries in verdef table */ const Elf_Versym *versyms; /* Symbol versions table */ const Elf_Hashelt *buckets; /* Hash table buckets array */ unsigned long nbuckets; /* Number of buckets */ const Elf_Hashelt *chains; /* Hash table chain array */ unsigned long nchains; /* Number of entries in chain array */ Elf32_Word nbuckets_gnu; /* Number of GNU hash buckets*/ Elf32_Word symndx_gnu; /* 1st accessible symbol on dynsym table */ Elf32_Word maskwords_bm_gnu; /* Bloom filter words - 1 (bitmask) */ Elf32_Word shift2_gnu; /* Bloom filter shift count */ Elf32_Word dynsymcount; /* Total entries in dynsym table */ Elf_Addr *bloom_gnu; /* Bloom filter used by GNU hash func */ const Elf_Hashelt *buckets_gnu; /* GNU hash table bucket array */ const Elf_Hashelt *chain_zero_gnu; /* GNU hash table value array (Zeroed) */ char *rpath; /* Search path specified in object */ char *runpath; /* Search path with different priority */ Needed_Entry *needed; /* Shared objects needed by this one (%) */ Needed_Entry *needed_filtees; Needed_Entry *needed_aux_filtees; STAILQ_HEAD(, Struct_Name_Entry) names; /* List of names for this object we know about. */ Ver_Entry *vertab; /* Versions required /defined by this object */ int vernum; /* Number of entries in vertab */ Elf_Addr init; /* Initialization function to call */ Elf_Addr fini; /* Termination function to call */ Elf_Addr preinit_array; /* Pre-initialization array of functions */ Elf_Addr init_array; /* Initialization array of functions */ Elf_Addr fini_array; /* Termination array of functions */ int preinit_array_num; /* Number of entries in preinit_array */ int init_array_num; /* Number of entries in init_array */ int fini_array_num; /* Number of entries in fini_array */ int32_t osrel; /* OSREL note value */ bool mainprog : 1; /* True if this is the main program */ bool rtld : 1; /* True if this is the dynamic linker */ bool relocated : 1; /* True if processed by relocate_objects() */ bool ver_checked : 1; /* True if processed by rtld_verify_object_versions */ bool textrel : 1; /* True if there are relocations to text seg */ bool symbolic : 1; /* True if generated with "-Bsymbolic" */ bool bind_now : 1; /* True if all relocations should be made first */ bool traced : 1; /* Already printed in ldd trace output */ bool jmpslots_done : 1; /* Already have relocated the jump slots */ bool init_done : 1; /* Already have added object to init list */ bool tls_done : 1; /* Already allocated offset for static TLS */ bool phdr_alloc : 1; /* Phdr is allocated and needs to be freed. */ bool z_origin : 1; /* Process rpath and soname tokens */ bool z_nodelete : 1; /* Do not unload the object and dependencies */ bool z_noopen : 1; /* Do not load on dlopen */ bool z_loadfltr : 1; /* Immediately load filtees */ bool z_interpose : 1; /* Interpose all objects but main */ bool z_nodeflib : 1; /* Don't search default library path */ bool z_global : 1; /* Make the object global */ bool ref_nodel : 1; /* Refcount increased to prevent dlclose */ bool init_scanned: 1; /* Object is already on init list. */ bool on_fini_list: 1; /* Object is already on fini list. */ bool dag_inited : 1; /* Object has its DAG initialized. */ bool filtees_loaded : 1; /* Filtees loaded */ bool irelative : 1; /* Object has R_MACHDEP_IRELATIVE relocs */ bool gnu_ifunc : 1; /* Object has references to STT_GNU_IFUNC */ bool non_plt_gnu_ifunc : 1; /* Object has non-plt IFUNC references */ bool crt_no_init : 1; /* Object' crt does not call _init/_fini */ bool valid_hash_sysv : 1; /* A valid System V hash hash tag is available */ bool valid_hash_gnu : 1; /* A valid GNU hash tag is available */ bool dlopened : 1; /* dlopen()-ed (vs. load statically) */ - bool writable_dynamic : 1; /* PT_DYNAMIC is writable */ struct link_map linkmap; /* For GDB and dlinfo() */ Objlist dldags; /* Object belongs to these dlopened DAGs (%) */ Objlist dagmembers; /* DAG has these members (%) */ dev_t dev; /* Object's filesystem's device */ ino_t ino; /* Object's inode number */ void *priv; /* Platform-dependent */ } Obj_Entry; #define RTLD_MAGIC 0xd550b87a #define RTLD_VERSION 1 #define RTLD_STATIC_TLS_EXTRA 128 /* Flags to be passed into symlook_ family of functions. */ #define SYMLOOK_IN_PLT 0x01 /* Lookup for PLT symbol */ #define SYMLOOK_DLSYM 0x02 /* Return newest versioned symbol. Used by dlsym. */ #define SYMLOOK_EARLY 0x04 /* Symlook is done during initialization. */ #define SYMLOOK_IFUNC 0x08 /* Allow IFUNC processing in reloc_non_plt(). */ /* Flags for load_object(). */ #define RTLD_LO_NOLOAD 0x01 /* dlopen() specified RTLD_NOLOAD. */ #define RTLD_LO_DLOPEN 0x02 /* Load_object() called from dlopen(). */ #define RTLD_LO_TRACE 0x04 /* Only tracing. */ #define RTLD_LO_NODELETE 0x08 /* Loaded object cannot be closed. */ #define RTLD_LO_FILTEES 0x10 /* Loading filtee. */ #define RTLD_LO_EARLY 0x20 /* Do not call ctors, postpone it to the initialization during the image start. */ /* * Symbol cache entry used during relocation to avoid multiple lookups * of the same symbol. */ typedef struct Struct_SymCache { const Elf_Sym *sym; /* Symbol table entry */ const Obj_Entry *obj; /* Shared object which defines it */ } SymCache; /* * This structure provides a reentrant way to keep a list of objects and * check which ones have already been processed in some way. */ typedef struct Struct_DoneList { const Obj_Entry **objs; /* Array of object pointers */ unsigned int num_alloc; /* Allocated size of the array */ unsigned int num_used; /* Number of array slots used */ } DoneList; struct Struct_RtldLockState { int lockstate; sigjmp_buf env; }; struct fill_search_info_args { int request; unsigned int flags; struct dl_serinfo *serinfo; struct dl_serpath *serpath; char *strspace; }; /* * The pack of arguments and results for the symbol lookup functions. */ typedef struct Struct_SymLook { const char *name; unsigned long hash; uint32_t hash_gnu; const Ver_Entry *ventry; int flags; const Obj_Entry *defobj_out; const Elf_Sym *sym_out; struct Struct_RtldLockState *lockstate; } SymLook; void _rtld_error(const char *, ...) __printflike(1, 2) __exported; void rtld_die(void) __dead2; const char *rtld_strerror(int); Obj_Entry *map_object(int, const char *, const struct stat *); void *xcalloc(size_t, size_t); void *xmalloc(size_t); char *xstrdup(const char *); void *malloc_aligned(size_t size, size_t align); void free_aligned(void *ptr); extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; extern Elf_Sym sym_zero; /* For resolving undefined weak refs. */ void dump_relocations(Obj_Entry *); void dump_obj_relocations(Obj_Entry *); void dump_Elf_Rel(Obj_Entry *, const Elf_Rel *, u_long); void dump_Elf_Rela(Obj_Entry *, const Elf_Rela *, u_long); /* * Function declarations. */ unsigned long elf_hash(const char *); const Elf_Sym *find_symdef(unsigned long, const Obj_Entry *, const Obj_Entry **, int, SymCache *, struct Struct_RtldLockState *); void init_pltgot(Obj_Entry *); void lockdflt_init(void); void digest_notes(Obj_Entry *, Elf_Addr, Elf_Addr); void obj_free(Obj_Entry *); Obj_Entry *obj_new(void); void _rtld_bind_start(void); void *rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def); void symlook_init(SymLook *, const char *); int symlook_obj(SymLook *, const Obj_Entry *); void *tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset); void *allocate_tls(Obj_Entry *, void *, size_t, size_t); void free_tls(void *, size_t, size_t); void *allocate_module_tls(int index); bool allocate_tls_offset(Obj_Entry *obj); void free_tls_offset(Obj_Entry *obj); const Ver_Entry *fetch_ventry(const Obj_Entry *obj, unsigned long); /* * MD function declarations. */ int do_copy_relocations(Obj_Entry *); int reloc_non_plt(Obj_Entry *, Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_plt(Obj_Entry *); int reloc_jmpslots(Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_iresolve(Obj_Entry *, struct Struct_RtldLockState *); int reloc_gnu_ifunc(Obj_Entry *, int flags, struct Struct_RtldLockState *); void allocate_initial_tls(Obj_Entry *); #endif /* } */ Index: projects/sendfile/share/man/man4/ioat.4 =================================================================== --- projects/sendfile/share/man/man4/ioat.4 (revision 293405) +++ projects/sendfile/share/man/man4/ioat.4 (revision 293406) @@ -1,249 +1,259 @@ .\" Copyright (c) 2015 EMC / Isilon Storage Division .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd January 5, 2016 +.Dd January 7, 2016 .Dt IOAT 4 .Os .Sh NAME .Nm I/OAT .Nd Intel I/O Acceleration Technology .Sh SYNOPSIS To compile this driver into your kernel, place the following line in your kernel configuration file: .Bd -ragged -offset indent .Cd "device ioat" .Ed .Pp Or, to load the driver as a module at boot, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent ioat_load="YES" .Ed .Pp In .Xr loader.conf 5 : .Pp .Cd hw.ioat.force_legacy_interrupts=0 .Pp In .Xr loader.conf 5 or .Xr sysctl.conf 5 : .Pp .Cd hw.ioat.enable_ioat_test=0 .Cd hw.ioat.debug_level=0 (only critical errors; maximum of 3) .Pp .Ft typedef void .Fn (*bus_dmaengine_callback_t) "void *arg" "int error" .Pp .Ft bus_dmaengine_t .Fn ioat_get_dmaengine "uint32_t channel_index" .Ft void .Fn ioat_put_dmaengine "bus_dmaengine_t dmaengine" .Ft int .Fn ioat_get_hwversion "bus_dmaengine_t dmaengine" .Ft size_t .Fn ioat_get_max_io_size "bus_dmaengine_t dmaengine" .Ft int .Fn ioat_set_interrupt_coalesce "bus_dmaengine_t dmaengine" "uint16_t delay" .Ft uint16_t .Fn ioat_get_max_coalesce_period "bus_dmaengine_t dmaengine" .Ft void .Fn ioat_acquire "bus_dmaengine_t dmaengine" +.Ft int +.Fn ioat_acquire_reserve "bus_dmaengine_t dmaengine" "uint32_t n" "int mflags" .Ft void .Fn ioat_release "bus_dmaengine_t dmaengine" .Ft struct bus_dmadesc * .Fo ioat_copy .Fa "bus_dmaengine_t dmaengine" .Fa "bus_addr_t dst" .Fa "bus_addr_t src" .Fa "bus_size_t len" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Ft struct bus_dmadesc * .Fo ioat_copy_8k_aligned .Fa "bus_dmaengine_t dmaengine" .Fa "bus_addr_t dst1" .Fa "bus_addr_t dst2" .Fa "bus_addr_t src1" .Fa "bus_addr_t src2" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Ft struct bus_dmadesc * .Fo ioat_blockfill .Fa "bus_dmaengine_t dmaengine" .Fa "bus_addr_t dst" .Fa "uint64_t fillpattern" .Fa "bus_size_t len" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Ft struct bus_dmadesc * .Fo ioat_null .Fa "bus_dmaengine_t dmaengine" .Fa "bus_dmaengine_callback_t callback_fn" .Fa "void *callback_arg" .Fa "uint32_t flags" .Fc .Sh DESCRIPTION The .Nm driver provides a kernel API to a variety of DMA engines on some Intel server platforms. .Pp There is a number of DMA channels per CPU package. (Typically 4 or 8.) Each may be used independently. Operations on a single channel proceed sequentially. .Pp Blockfill operations can be used to write a 64-bit pattern to memory. .Pp Copy operations can be used to offload memory copies to the DMA engines. .Pp Null operations do nothing, but may be used to test the interrupt and callback mechanism. .Pp All operations can optionally trigger an interrupt at completion with the .Ar DMA_EN_INT flag. For example, a user might submit multiple operations to the same channel and only enable an interrupt and callback for the last operation. .Pp The hardware can delay and coalesce interrupts on a given channel for a configurable period of time, in microseconds. This may be desired to reduce the processing and interrupt overhead per descriptor, especially for workflows consisting of many small operations. Software can control this on a per-channel basis with the .Fn ioat_set_interrupt_coalesce API. The .Fn ioat_get_max_coalesce_period API can be used to determine the maximum coalescing period supported by the hardware, in microseconds. Current platforms support up to a 16.383 millisecond coalescing period. Optimal configuration will vary by workflow and desired operation latency. .Pp All operations are safe to use in a non-blocking context with the .Ar DMA_NO_WAIT flag. (Of course, allocations may fail and operations requested with .Ar DMA_NO_WAIT may return NULL.) .Pp All operations, as well as .Fn ioat_get_dmaengine , can return NULL in special circumstances. For example, if the .Nm driver is being unloaded, or the administrator has induced a hardware reset, or a usage error has resulted in a hardware error state that needs to be recovered from. .Pp It is invalid to attempt to submit new DMA operations in a .Fa bus_dmaengine_callback_t context. .Sh USAGE A typical user will lookup the DMA engine object for a given channel with .Fn ioat_get_dmaengine . When the user wants to offload a copy, they will first .Fn ioat_acquire the .Ar bus_dmaengine_t object for exclusive access to enqueue operations on that channel. +Optionally, the user can reserve space by using +.Fn ioat_acquire_reserve +instead. +If +.Fn ioat_acquire_reserve +succeeds, there is guaranteed to be room for +.Fa N +new operations in the internal ring buffer. Then, they will submit one or more operations using .Fn ioat_blockfill , .Fn ioat_copy , or .Fn ioat_null . After queuing one or more individual DMA operations, they will .Fn ioat_release the .Ar bus_dmaengine_t to drop their exclusive access to the channel. The routine they provided for the .Fa callback_fn argument will be invoked with the provided .Fa callback_arg when the operation is complete. When they are finished with the .Ar bus_dmaengine_t , the user should .Fn ioat_put_dmaengine . .Pp Users MUST NOT block between .Fn ioat_acquire and .Fn ioat_release . Users SHOULD NOT hold .Ar bus_dmaengine_t references for a very long time to enable fault recovery and kernel module unload. .Pp For an example of usage, see .Pa src/sys/dev/ioat/ioat_test.c . .Sh FILES .Bl -tag .It Pa /dev/ioat_test test device for .Xr ioatcontrol 8 .El .Sh SEE ALSO .Xr ioatcontrol 8 .Sh HISTORY The .Nm driver first appeared in .Fx 11.0 . .Sh AUTHORS The .Nm driver was developed by .An \&Jim Harris Aq Mt jimharris@FreeBSD.org , .An \&Carl Delsey Aq Mt carl.r.delsey@intel.com , and .An \&Conrad Meyer Aq Mt cem@FreeBSD.org . This manual page was written by .An \&Conrad Meyer Aq Mt cem@FreeBSD.org . .Sh CAVEATS Copy operation takes bus addresses as parameters, not virtual addresses. .Pp Buffers for individual copy operations must be physically contiguous. .Pp Copies larger than max transfer size (1MB, but may vary by hardware) are not supported. Future versions will likely support this by breaking up the transfer into smaller sizes. .Sh BUGS The .Nm driver only supports blockfill, copy, and null operations at this time. The driver does not yet support advanced DMA modes, such as XOR, that some I/OAT devices support. Index: projects/sendfile/share/man/man4 =================================================================== --- projects/sendfile/share/man/man4 (revision 293405) +++ projects/sendfile/share/man/man4 (revision 293406) Property changes on: projects/sendfile/share/man/man4 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share/man/man4:r293383-293405 Index: projects/sendfile/share =================================================================== --- projects/sendfile/share (revision 293405) +++ projects/sendfile/share (revision 293406) Property changes on: projects/sendfile/share ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/share:r293383-293405 Index: projects/sendfile/sys/dev/ioat/ioat.c =================================================================== --- projects/sendfile/sys/dev/ioat/ioat.c (revision 293405) +++ projects/sendfile/sys/dev/ioat/ioat.c (revision 293406) @@ -1,1856 +1,1871 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ioat.h" #include "ioat_hw.h" #include "ioat_internal.h" #define IOAT_INTR_TIMO (hz / 10) #define IOAT_REFLK (&ioat->submit_lock) static int ioat_probe(device_t device); static int ioat_attach(device_t device); static int ioat_detach(device_t device); static int ioat_setup_intr(struct ioat_softc *ioat); static int ioat_teardown_intr(struct ioat_softc *ioat); static int ioat3_attach(device_t device); static int ioat_start_channel(struct ioat_softc *ioat); static int ioat_map_pci_bar(struct ioat_softc *ioat); static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void ioat_interrupt_handler(void *arg); static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat); static int chanerr_to_errno(uint32_t); static void ioat_process_events(struct ioat_softc *ioat); static inline uint32_t ioat_get_active(struct ioat_softc *ioat); static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat); static void ioat_free_ring(struct ioat_softc *, uint32_t size, struct ioat_descriptor **); static void ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc); static struct ioat_descriptor *ioat_alloc_ring_entry(struct ioat_softc *, int mflags); static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags); static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index); static struct ioat_descriptor **ioat_prealloc_ring(struct ioat_softc *, uint32_t size, boolean_t need_dscr, int mflags); static int ring_grow(struct ioat_softc *, uint32_t oldorder, struct ioat_descriptor **); static int ring_shrink(struct ioat_softc *, uint32_t oldorder, struct ioat_descriptor **); static void ioat_halted_debug(struct ioat_softc *, uint32_t); static void ioat_timer_callback(void *arg); static void dump_descriptor(void *hw_desc); static void ioat_submit_single(struct ioat_softc *ioat); static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error); static int ioat_reset_hw(struct ioat_softc *ioat); static void ioat_setup_sysctl(device_t device); static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS); static inline struct ioat_softc *ioat_get(struct ioat_softc *, enum ioat_ref_kind); static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind); static inline void _ioat_putn(struct ioat_softc *, uint32_t, enum ioat_ref_kind, boolean_t); static inline void ioat_putn(struct ioat_softc *, uint32_t, enum ioat_ref_kind); static inline void ioat_putn_locked(struct ioat_softc *, uint32_t, enum ioat_ref_kind); static void ioat_drain_locked(struct ioat_softc *); #define ioat_log_message(v, ...) do { \ if ((v) <= g_ioat_debug_level) { \ device_printf(ioat->device, __VA_ARGS__); \ } \ } while (0) MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations"); SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node"); static int g_force_legacy_interrupts; SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN, &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled"); int g_ioat_debug_level = 0; SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level, 0, "Set log level (0-3) for ioat(4). Higher is more verbose."); /* * OS <-> Driver interface structures */ static device_method_t ioat_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ioat_probe), DEVMETHOD(device_attach, ioat_attach), DEVMETHOD(device_detach, ioat_detach), { 0, 0 } }; static driver_t ioat_pci_driver = { "ioat", ioat_pci_methods, sizeof(struct ioat_softc), }; static devclass_t ioat_devclass; DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0); MODULE_VERSION(ioat, 1); /* * Private data structures */ static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS]; static int ioat_channel_index = 0; SYSCTL_INT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0, "Number of IOAT channels attached"); static struct _pcsid { u_int32_t type; const char *desc; } pci_ids[] = { { 0x34308086, "TBG IOAT Ch0" }, { 0x34318086, "TBG IOAT Ch1" }, { 0x34328086, "TBG IOAT Ch2" }, { 0x34338086, "TBG IOAT Ch3" }, { 0x34298086, "TBG IOAT Ch4" }, { 0x342a8086, "TBG IOAT Ch5" }, { 0x342b8086, "TBG IOAT Ch6" }, { 0x342c8086, "TBG IOAT Ch7" }, { 0x37108086, "JSF IOAT Ch0" }, { 0x37118086, "JSF IOAT Ch1" }, { 0x37128086, "JSF IOAT Ch2" }, { 0x37138086, "JSF IOAT Ch3" }, { 0x37148086, "JSF IOAT Ch4" }, { 0x37158086, "JSF IOAT Ch5" }, { 0x37168086, "JSF IOAT Ch6" }, { 0x37178086, "JSF IOAT Ch7" }, { 0x37188086, "JSF IOAT Ch0 (RAID)" }, { 0x37198086, "JSF IOAT Ch1 (RAID)" }, { 0x3c208086, "SNB IOAT Ch0" }, { 0x3c218086, "SNB IOAT Ch1" }, { 0x3c228086, "SNB IOAT Ch2" }, { 0x3c238086, "SNB IOAT Ch3" }, { 0x3c248086, "SNB IOAT Ch4" }, { 0x3c258086, "SNB IOAT Ch5" }, { 0x3c268086, "SNB IOAT Ch6" }, { 0x3c278086, "SNB IOAT Ch7" }, { 0x3c2e8086, "SNB IOAT Ch0 (RAID)" }, { 0x3c2f8086, "SNB IOAT Ch1 (RAID)" }, { 0x0e208086, "IVB IOAT Ch0" }, { 0x0e218086, "IVB IOAT Ch1" }, { 0x0e228086, "IVB IOAT Ch2" }, { 0x0e238086, "IVB IOAT Ch3" }, { 0x0e248086, "IVB IOAT Ch4" }, { 0x0e258086, "IVB IOAT Ch5" }, { 0x0e268086, "IVB IOAT Ch6" }, { 0x0e278086, "IVB IOAT Ch7" }, { 0x0e2e8086, "IVB IOAT Ch0 (RAID)" }, { 0x0e2f8086, "IVB IOAT Ch1 (RAID)" }, { 0x2f208086, "HSW IOAT Ch0" }, { 0x2f218086, "HSW IOAT Ch1" }, { 0x2f228086, "HSW IOAT Ch2" }, { 0x2f238086, "HSW IOAT Ch3" }, { 0x2f248086, "HSW IOAT Ch4" }, { 0x2f258086, "HSW IOAT Ch5" }, { 0x2f268086, "HSW IOAT Ch6" }, { 0x2f278086, "HSW IOAT Ch7" }, { 0x2f2e8086, "HSW IOAT Ch0 (RAID)" }, { 0x2f2f8086, "HSW IOAT Ch1 (RAID)" }, { 0x0c508086, "BWD IOAT Ch0" }, { 0x0c518086, "BWD IOAT Ch1" }, { 0x0c528086, "BWD IOAT Ch2" }, { 0x0c538086, "BWD IOAT Ch3" }, { 0x6f508086, "BDXDE IOAT Ch0" }, { 0x6f518086, "BDXDE IOAT Ch1" }, { 0x6f528086, "BDXDE IOAT Ch2" }, { 0x6f538086, "BDXDE IOAT Ch3" }, { 0x6f208086, "BDX IOAT Ch0" }, { 0x6f218086, "BDX IOAT Ch1" }, { 0x6f228086, "BDX IOAT Ch2" }, { 0x6f238086, "BDX IOAT Ch3" }, { 0x6f248086, "BDX IOAT Ch4" }, { 0x6f258086, "BDX IOAT Ch5" }, { 0x6f268086, "BDX IOAT Ch6" }, { 0x6f278086, "BDX IOAT Ch7" }, { 0x6f2e8086, "BDX IOAT Ch0 (RAID)" }, { 0x6f2f8086, "BDX IOAT Ch1 (RAID)" }, { 0x00000000, NULL } }; /* * OS <-> Driver linkage functions */ static int ioat_probe(device_t device) { struct _pcsid *ep; u_int32_t type; type = pci_get_devid(device); for (ep = pci_ids; ep->type; ep++) { if (ep->type == type) { device_set_desc(device, ep->desc); return (0); } } return (ENXIO); } static int ioat_attach(device_t device) { struct ioat_softc *ioat; int error; ioat = DEVICE2SOFTC(device); ioat->device = device; error = ioat_map_pci_bar(ioat); if (error != 0) goto err; ioat->version = ioat_read_cbver(ioat); if (ioat->version < IOAT_VER_3_0) { error = ENODEV; goto err; } error = ioat3_attach(device); if (error != 0) goto err; error = pci_enable_busmaster(device); if (error != 0) goto err; error = ioat_setup_intr(ioat); if (error != 0) goto err; error = ioat_reset_hw(ioat); if (error != 0) goto err; ioat_process_events(ioat); ioat_setup_sysctl(device); ioat->chan_idx = ioat_channel_index; ioat_channel[ioat_channel_index++] = ioat; ioat_test_attach(); err: if (error != 0) ioat_detach(device); return (error); } static int ioat_detach(device_t device) { struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ioat_test_detach(); mtx_lock(IOAT_REFLK); ioat->quiescing = TRUE; ioat_channel[ioat->chan_idx] = NULL; ioat_drain_locked(ioat); mtx_unlock(IOAT_REFLK); ioat_teardown_intr(ioat); callout_drain(&ioat->timer); pci_disable_busmaster(device); if (ioat->pci_resource != NULL) bus_release_resource(device, SYS_RES_MEMORY, ioat->pci_resource_id, ioat->pci_resource); if (ioat->ring != NULL) ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring); if (ioat->comp_update != NULL) { bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map); bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update, ioat->comp_update_map); bus_dma_tag_destroy(ioat->comp_update_tag); } bus_dma_tag_destroy(ioat->hw_desc_tag); return (0); } static int ioat_teardown_intr(struct ioat_softc *ioat) { if (ioat->tag != NULL) bus_teardown_intr(ioat->device, ioat->res, ioat->tag); if (ioat->res != NULL) bus_release_resource(ioat->device, SYS_RES_IRQ, rman_get_rid(ioat->res), ioat->res); pci_release_msi(ioat->device); return (0); } static int ioat_start_channel(struct ioat_softc *ioat) { uint64_t status; uint32_t chanerr; int i; ioat_acquire(&ioat->dmaengine); ioat_null(&ioat->dmaengine, NULL, NULL, 0); ioat_release(&ioat->dmaengine); for (i = 0; i < 100; i++) { DELAY(1); status = ioat_get_chansts(ioat); if (is_ioat_idle(status)) return (0); } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_log_message(0, "could not start channel: " "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr, IOAT_CHANERR_STR); return (ENXIO); } /* * Initialize Hardware */ static int ioat3_attach(device_t device) { struct ioat_softc *ioat; struct ioat_descriptor **ring; struct ioat_descriptor *next; struct ioat_dma_hw_descriptor *dma_hw_desc; int i, num_descriptors; int error; uint8_t xfercap; error = 0; ioat = DEVICE2SOFTC(device); ioat->capabilities = ioat_read_dmacapability(ioat); ioat_log_message(1, "Capabilities: %b\n", (int)ioat->capabilities, IOAT_DMACAP_STR); xfercap = ioat_read_xfercap(ioat); ioat->max_xfer_size = 1 << xfercap; ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_SUPPORTED) != 0; if (ioat->intrdelay_supported) ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK; /* TODO: need to check DCA here if we ever do XOR/PQ */ mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF); mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF); callout_init(&ioat->timer, 1); /* Establish lock order for Witness */ mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); mtx_unlock(&ioat->cleanup_lock); mtx_unlock(&ioat->submit_lock); ioat->is_resize_pending = FALSE; ioat->is_completion_pending = FALSE; ioat->is_reset_pending = FALSE; ioat->is_channel_running = FALSE; bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL, &ioat->comp_update_tag); error = bus_dmamem_alloc(ioat->comp_update_tag, (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map); if (ioat->comp_update == NULL) return (ENOMEM); error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map, ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat, 0); if (error != 0) return (error); ioat->ring_size_order = IOAT_MIN_ORDER; num_descriptors = 1 << ioat->ring_size_order; bus_dma_tag_create(bus_get_dma_tag(ioat->device), 0x40, 0x0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct ioat_dma_hw_descriptor), 1, sizeof(struct ioat_dma_hw_descriptor), 0, NULL, NULL, &ioat->hw_desc_tag); ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT, M_ZERO | M_WAITOK); if (ioat->ring == NULL) return (ENOMEM); ring = ioat->ring; for (i = 0; i < num_descriptors; i++) { ring[i] = ioat_alloc_ring_entry(ioat, M_WAITOK); if (ring[i] == NULL) return (ENOMEM); ring[i]->id = i; } for (i = 0; i < num_descriptors - 1; i++) { next = ring[i + 1]; dma_hw_desc = ring[i]->u.dma; dma_hw_desc->next = next->hw_desc_bus_addr; } ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr; ioat->head = ioat->hw_head = 0; ioat->tail = 0; ioat->last_seen = 0; return (0); } static int ioat_map_pci_bar(struct ioat_softc *ioat) { ioat->pci_resource_id = PCIR_BAR(0); ioat->pci_resource = bus_alloc_resource_any(ioat->device, SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE); if (ioat->pci_resource == NULL) { ioat_log_message(0, "unable to allocate pci resource\n"); return (ENODEV); } ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource); ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource); return (0); } static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { struct ioat_softc *ioat = arg; KASSERT(error == 0, ("%s: error:%d", __func__, error)); ioat->comp_update_bus_addr = seg[0].ds_addr; } static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr; KASSERT(error == 0, ("%s: error:%d", __func__, error)); baddr = arg; *baddr = segs->ds_addr; } /* * Interrupt setup and handlers */ static int ioat_setup_intr(struct ioat_softc *ioat) { uint32_t num_vectors; int error; boolean_t use_msix; boolean_t force_legacy_interrupts; use_msix = FALSE; force_legacy_interrupts = FALSE; if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) { num_vectors = 1; pci_alloc_msix(ioat->device, &num_vectors); if (num_vectors == 1) use_msix = TRUE; } if (use_msix) { ioat->rid = 1; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_ACTIVE); } else { ioat->rid = 0; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_SHAREABLE | RF_ACTIVE); } if (ioat->res == NULL) { ioat_log_message(0, "bus_alloc_resource failed\n"); return (ENOMEM); } ioat->tag = NULL; error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag); if (error != 0) { ioat_log_message(0, "bus_setup_intr failed\n"); return (error); } ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN); return (0); } static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat) { u_int32_t pciid; pciid = pci_get_devid(ioat->device); switch (pciid) { /* BWD: */ case 0x0c508086: case 0x0c518086: case 0x0c528086: case 0x0c538086: /* BDXDE: */ case 0x6f508086: case 0x6f518086: case 0x6f528086: case 0x6f538086: return (TRUE); } return (FALSE); } static void ioat_interrupt_handler(void *arg) { struct ioat_softc *ioat = arg; ioat->stats.interrupts++; ioat_process_events(ioat); } static int chanerr_to_errno(uint32_t chanerr) { if (chanerr == 0) return (0); if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0) return (EFAULT); if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0) return (EIO); /* This one is probably our fault: */ if ((chanerr & IOAT_CHANERR_NDADDERR) != 0) return (EIO); return (EIO); } static void ioat_process_events(struct ioat_softc *ioat) { struct ioat_descriptor *desc; struct bus_dmadesc *dmadesc; uint64_t comp_update, status; uint32_t completed, chanerr; int error; mtx_lock(&ioat->cleanup_lock); completed = 0; comp_update = *ioat->comp_update; status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK; CTR0(KTR_IOAT, __func__); if (status == ioat->last_seen) goto out; while (1) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR1(KTR_IOAT, "completing desc %d", ioat->tail); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, 0); completed++; ioat->tail++; if (desc->hw_desc_bus_addr == status) break; } ioat->last_seen = desc->hw_desc_bus_addr; if (ioat->head == ioat->tail) { ioat->is_completion_pending = FALSE; callout_reset(&ioat->timer, IOAT_INTR_TIMO, ioat_timer_callback, ioat); } ioat->stats.descriptors_processed += completed; out: ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); mtx_unlock(&ioat->cleanup_lock); ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF); wakeup(&ioat->tail); if (!is_ioat_halted(comp_update)) return; ioat->stats.channel_halts++; /* * Fatal programming error on this DMA channel. Flush any outstanding * work with error status and restart the engine. */ ioat_log_message(0, "Channel halted due to fatal programming error\n"); mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); ioat->quiescing = TRUE; chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_halted_debug(ioat, chanerr); ioat->stats.last_halt_chanerr = chanerr; while (ioat_get_active(ioat) > 0) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR1(KTR_IOAT, "completing err desc %d", ioat->tail); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, chanerr_to_errno(chanerr)); ioat_putn_locked(ioat, 1, IOAT_ACTIVE_DESCR_REF); ioat->tail++; ioat->stats.descriptors_processed++; ioat->stats.descriptors_error++; } /* Clear error status */ ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); mtx_unlock(&ioat->cleanup_lock); mtx_unlock(&ioat->submit_lock); ioat_log_message(0, "Resetting channel to recover from error\n"); error = ioat_reset_hw(ioat); KASSERT(error == 0, ("%s: reset failed: %d", __func__, error)); } /* * User API functions */ bus_dmaengine_t ioat_get_dmaengine(uint32_t index) { struct ioat_softc *sc; if (index >= ioat_channel_index) return (NULL); sc = ioat_channel[index]; if (sc == NULL || sc->quiescing) return (NULL); return (&ioat_get(sc, IOAT_DMAENGINE_REF)->dmaengine); } void ioat_put_dmaengine(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); ioat_put(ioat, IOAT_DMAENGINE_REF); } int ioat_get_hwversion(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->version); } size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->max_xfer_size); } int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); if (!ioat->intrdelay_supported) return (ENODEV); if (delay > ioat->intrdelay_max) return (ERANGE); ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay); ioat->cached_intrdelay = ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK; return (0); } uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->intrdelay_max); } void ioat_acquire(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); mtx_lock(&ioat->submit_lock); CTR0(KTR_IOAT, __func__); } +int +ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags) +{ + struct ioat_softc *ioat; + int error; + + ioat = to_ioat_softc(dmaengine); + ioat_acquire(dmaengine); + + error = ioat_reserve_space(ioat, n, mflags); + if (error != 0) + ioat_release(dmaengine); + return (error); +} + void ioat_release(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR0(KTR_IOAT, __func__); ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->hw_head); mtx_unlock(&ioat->submit_lock); } static struct ioat_descriptor * ioat_op_generic(struct ioat_softc *ioat, uint8_t op, uint32_t size, uint64_t src, uint64_t dst, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_generic_hw_descriptor *hw_desc; struct ioat_descriptor *desc; int mflags; mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT((flags & ~DMA_ALL_FLAGS) == 0, ("Unrecognized flag(s): %#x", flags & ~DMA_ALL_FLAGS)); if ((flags & DMA_NO_WAIT) != 0) mflags = M_NOWAIT; else mflags = M_WAITOK; if (size > ioat->max_xfer_size) { ioat_log_message(0, "%s: max_xfer_size = %d, requested = %u\n", __func__, ioat->max_xfer_size, (unsigned)size); return (NULL); } if (ioat_reserve_space(ioat, 1, mflags) != 0) return (NULL); desc = ioat_get_ring_entry(ioat, ioat->head); hw_desc = desc->u.generic; hw_desc->u.control_raw = 0; hw_desc->u.control_generic.op = op; hw_desc->u.control_generic.completion_update = 1; if ((flags & DMA_INT_EN) != 0) hw_desc->u.control_generic.int_enable = 1; hw_desc->size = size; hw_desc->src_addr = src; hw_desc->dest_addr = dst; desc->bus_dmadesc.callback_fn = callback_fn; desc->bus_dmadesc.callback_arg = callback_arg; return (desc); } struct bus_dmadesc * ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.dma; hw_desc->u.control.null = 1; ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); if (((src | dst) & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.dma; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); if (((src1 | src2 | dst1 | dst2) & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n", __func__); return (NULL); } if (((src1 | src2 | dst1 | dst2) & PAGE_MASK) != 0) { ioat_log_message(0, "%s: Addresses must be page-aligned\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, src1, dst1, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.dma; if (src2 != src1 + PAGE_SIZE) { hw_desc->u.control.src_page_break = 1; hw_desc->next_src_addr = src2; } if (dst2 != dst1 + PAGE_SIZE) { hw_desc->u.control.dest_page_break = 1; hw_desc->next_dest_addr = dst2; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_fill_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; CTR0(KTR_IOAT, __func__); ioat = to_ioat_softc(dmaengine); if ((ioat->capabilities & IOAT_DMACAP_BFILL) == 0) { ioat_log_message(0, "%s: Device lacks BFILL capability\n", __func__); return (NULL); } if ((dst & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of dst invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = desc->u.fill; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } /* * Ring Management */ static inline uint32_t ioat_get_active(struct ioat_softc *ioat) { return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1)); } static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat) { return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1); } static struct ioat_descriptor * ioat_alloc_ring_entry(struct ioat_softc *ioat, int mflags) { struct ioat_generic_hw_descriptor *hw_desc; struct ioat_descriptor *desc; int error, busdmaflag; error = ENOMEM; hw_desc = NULL; if ((mflags & M_WAITOK) != 0) busdmaflag = BUS_DMA_WAITOK; else busdmaflag = BUS_DMA_NOWAIT; desc = malloc(sizeof(*desc), M_IOAT, mflags); if (desc == NULL) goto out; bus_dmamem_alloc(ioat->hw_desc_tag, (void **)&hw_desc, BUS_DMA_ZERO | busdmaflag, &ioat->hw_desc_map); if (hw_desc == NULL) goto out; memset(&desc->bus_dmadesc, 0, sizeof(desc->bus_dmadesc)); desc->u.generic = hw_desc; error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc, sizeof(*hw_desc), ioat_dmamap_cb, &desc->hw_desc_bus_addr, busdmaflag); if (error) goto out; out: if (error) { ioat_free_ring_entry(ioat, desc); return (NULL); } return (desc); } static void ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc) { if (desc == NULL) return; if (desc->u.generic) bus_dmamem_free(ioat->hw_desc_tag, desc->u.generic, ioat->hw_desc_map); free(desc, M_IOAT); } /* * Reserves space in this IOAT descriptor ring by ensuring enough slots remain * for 'num_descs'. * * If mflags contains M_WAITOK, blocks until enough space is available. * * Returns zero on success, or an errno on error. If num_descs is beyond the * maximum ring size, returns EINVAl; if allocation would block and mflags * contains M_NOWAIT, returns EAGAIN. * * Must be called with the submit_lock held; returns with the lock held. The * lock may be dropped to allocate the ring. * * (The submit_lock is needed to add any entries to the ring, so callers are * assured enough room is available.) */ static int ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags) { struct ioat_descriptor **new_ring; uint32_t order; int error; mtx_assert(&ioat->submit_lock, MA_OWNED); error = 0; if (num_descs < 1 || num_descs > (1 << IOAT_MAX_ORDER)) { error = EINVAL; goto out; } if (ioat->quiescing) { error = ENXIO; goto out; } for (;;) { if (ioat_get_ring_space(ioat) >= num_descs) goto out; order = ioat->ring_size_order; if (ioat->is_resize_pending || order == IOAT_MAX_ORDER) { if ((mflags & M_WAITOK) != 0) { msleep(&ioat->tail, &ioat->submit_lock, 0, "ioat_rsz", 0); continue; } error = EAGAIN; break; } ioat->is_resize_pending = TRUE; for (;;) { mtx_unlock(&ioat->submit_lock); new_ring = ioat_prealloc_ring(ioat, 1 << (order + 1), TRUE, mflags); mtx_lock(&ioat->submit_lock); KASSERT(ioat->ring_size_order == order, ("is_resize_pending should protect order")); if (new_ring == NULL) { KASSERT((mflags & M_WAITOK) == 0, ("allocation failed")); error = EAGAIN; break; } error = ring_grow(ioat, order, new_ring); if (error == 0) break; } ioat->is_resize_pending = FALSE; wakeup(&ioat->tail); if (error) break; } out: mtx_assert(&ioat->submit_lock, MA_OWNED); return (error); } static struct ioat_descriptor ** ioat_prealloc_ring(struct ioat_softc *ioat, uint32_t size, boolean_t need_dscr, int mflags) { struct ioat_descriptor **ring; uint32_t i; int error; KASSERT(size > 0 && powerof2(size), ("bogus size")); ring = malloc(size * sizeof(*ring), M_IOAT, M_ZERO | mflags); if (ring == NULL) return (NULL); if (need_dscr) { error = ENOMEM; for (i = size / 2; i < size; i++) { ring[i] = ioat_alloc_ring_entry(ioat, mflags); if (ring[i] == NULL) goto out; ring[i]->id = i; } } error = 0; out: if (error != 0 && ring != NULL) { ioat_free_ring(ioat, size, ring); ring = NULL; } return (ring); } static void ioat_free_ring(struct ioat_softc *ioat, uint32_t size, struct ioat_descriptor **ring) { uint32_t i; for (i = 0; i < size; i++) { if (ring[i] != NULL) ioat_free_ring_entry(ioat, ring[i]); } free(ring, M_IOAT); } static struct ioat_descriptor * ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index) { return (ioat->ring[index % (1 << ioat->ring_size_order)]); } static int ring_grow(struct ioat_softc *ioat, uint32_t oldorder, struct ioat_descriptor **newring) { struct ioat_descriptor *tmp, *next; struct ioat_dma_hw_descriptor *hw; uint32_t oldsize, newsize, head, tail, i, end; int error; CTR0(KTR_IOAT, __func__); mtx_assert(&ioat->submit_lock, MA_OWNED); if (oldorder != ioat->ring_size_order || oldorder >= IOAT_MAX_ORDER) { error = EINVAL; goto out; } oldsize = (1 << oldorder); newsize = (1 << (oldorder + 1)); mtx_lock(&ioat->cleanup_lock); head = ioat->head & (oldsize - 1); tail = ioat->tail & (oldsize - 1); /* Copy old descriptors to new ring */ for (i = 0; i < oldsize; i++) newring[i] = ioat->ring[i]; /* * If head has wrapped but tail hasn't, we must swap some descriptors * around so that tail can increment directly to head. */ if (head < tail) { for (i = 0; i <= head; i++) { tmp = newring[oldsize + i]; newring[oldsize + i] = newring[i]; newring[oldsize + i]->id = oldsize + i; newring[i] = tmp; newring[i]->id = i; } head += oldsize; } KASSERT(head >= tail, ("invariants")); /* Head didn't wrap; we only need to link in oldsize..newsize */ if (head < oldsize) { i = oldsize - 1; end = newsize; } else { /* Head did wrap; link newhead..newsize and 0..oldhead */ i = head; end = newsize + (head - oldsize) + 1; } /* * Fix up hardware ring, being careful not to trample the active * section (tail -> head). */ for (; i < end; i++) { KASSERT((i & (newsize - 1)) < tail || (i & (newsize - 1)) >= head, ("trampling snake")); next = newring[(i + 1) & (newsize - 1)]; hw = newring[i & (newsize - 1)]->u.dma; hw->next = next->hw_desc_bus_addr; } free(ioat->ring, M_IOAT); ioat->ring = newring; ioat->ring_size_order = oldorder + 1; ioat->tail = tail; ioat->head = head; error = 0; mtx_unlock(&ioat->cleanup_lock); out: if (error) ioat_free_ring(ioat, (1 << (oldorder + 1)), newring); return (error); } static int ring_shrink(struct ioat_softc *ioat, uint32_t oldorder, struct ioat_descriptor **newring) { struct ioat_dma_hw_descriptor *hw; struct ioat_descriptor *ent, *next; uint32_t oldsize, newsize, current_idx, new_idx, i; int error; CTR0(KTR_IOAT, __func__); mtx_assert(&ioat->submit_lock, MA_OWNED); if (oldorder != ioat->ring_size_order || oldorder <= IOAT_MIN_ORDER) { error = EINVAL; goto out_unlocked; } oldsize = (1 << oldorder); newsize = (1 << (oldorder - 1)); mtx_lock(&ioat->cleanup_lock); /* Can't shrink below current active set! */ if (ioat_get_active(ioat) >= newsize) { error = ENOMEM; goto out; } /* * Copy current descriptors to the new ring, dropping the removed * descriptors. */ for (i = 0; i < newsize; i++) { current_idx = (ioat->tail + i) & (oldsize - 1); new_idx = (ioat->tail + i) & (newsize - 1); newring[new_idx] = ioat->ring[current_idx]; newring[new_idx]->id = new_idx; } /* Free deleted descriptors */ for (i = newsize; i < oldsize; i++) { ent = ioat_get_ring_entry(ioat, ioat->tail + i); ioat_free_ring_entry(ioat, ent); } /* Fix up hardware ring. */ hw = newring[(ioat->tail + newsize - 1) & (newsize - 1)]->u.dma; next = newring[(ioat->tail + newsize) & (newsize - 1)]; hw->next = next->hw_desc_bus_addr; free(ioat->ring, M_IOAT); ioat->ring = newring; ioat->ring_size_order = oldorder - 1; error = 0; out: mtx_unlock(&ioat->cleanup_lock); out_unlocked: if (error) ioat_free_ring(ioat, (1 << (oldorder - 1)), newring); return (error); } static void ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr) { struct ioat_descriptor *desc; ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr, IOAT_CHANERR_STR); if (chanerr == 0) return; mtx_assert(&ioat->cleanup_lock, MA_OWNED); desc = ioat_get_ring_entry(ioat, ioat->tail + 0); dump_descriptor(desc->u.raw); desc = ioat_get_ring_entry(ioat, ioat->tail + 1); dump_descriptor(desc->u.raw); } static void ioat_timer_callback(void *arg) { struct ioat_descriptor **newring; struct ioat_softc *ioat; uint32_t order; ioat = arg; ioat_log_message(1, "%s\n", __func__); if (ioat->is_completion_pending) { ioat_process_events(ioat); return; } /* Slowly scale the ring down if idle. */ mtx_lock(&ioat->submit_lock); order = ioat->ring_size_order; if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) { mtx_unlock(&ioat->submit_lock); goto out; } ioat->is_resize_pending = TRUE; mtx_unlock(&ioat->submit_lock); newring = ioat_prealloc_ring(ioat, 1 << (order - 1), FALSE, M_NOWAIT); mtx_lock(&ioat->submit_lock); KASSERT(ioat->ring_size_order == order, ("resize_pending protects order")); if (newring != NULL) ring_shrink(ioat, order, newring); ioat->is_resize_pending = FALSE; mtx_unlock(&ioat->submit_lock); out: if (ioat->ring_size_order > IOAT_MIN_ORDER) callout_reset(&ioat->timer, 10 * hz, ioat_timer_callback, ioat); } /* * Support Functions */ static void ioat_submit_single(struct ioat_softc *ioat) { ioat_get(ioat, IOAT_ACTIVE_DESCR_REF); atomic_add_rel_int(&ioat->head, 1); atomic_add_rel_int(&ioat->hw_head, 1); if (!ioat->is_completion_pending) { ioat->is_completion_pending = TRUE; callout_reset(&ioat->timer, IOAT_INTR_TIMO, ioat_timer_callback, ioat); } ioat->stats.descriptors_submitted++; } static int ioat_reset_hw(struct ioat_softc *ioat) { uint64_t status; uint32_t chanerr; unsigned timeout; int error; mtx_lock(IOAT_REFLK); ioat->quiescing = TRUE; ioat_drain_locked(ioat); mtx_unlock(IOAT_REFLK); status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(ioat); /* Wait at most 20 ms */ for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) && timeout < 20; timeout++) { DELAY(1000); status = ioat_get_chansts(ioat); } if (timeout == 20) { error = ETIMEDOUT; goto out; } KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce")); chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); /* * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors * that can cause stability issues for IOAT v3. */ pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07, 4); chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4); pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4); /* * BDXDE and BWD models reset MSI-X registers on device reset. * Save/restore their contents manually. */ if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets MSI-X registers; saving\n"); pci_save_state(ioat->device); } ioat_reset(ioat); /* Wait at most 20 ms */ for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++) DELAY(1000); if (timeout == 20) { error = ETIMEDOUT; goto out; } if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets registers; restored\n"); pci_restore_state(ioat->device); } /* Reset attempts to return the hardware to "halted." */ status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) { /* So this really shouldn't happen... */ ioat_log_message(0, "Device is active after a reset?\n"); ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); error = 0; goto out; } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); if (chanerr != 0) { mtx_lock(&ioat->cleanup_lock); ioat_halted_debug(ioat, chanerr); mtx_unlock(&ioat->cleanup_lock); error = EIO; goto out; } /* * Bring device back online after reset. Writing CHAINADDR brings the * device back to active. * * The internal ring counter resets to zero, so we have to start over * at zero as well. */ ioat->tail = ioat->head = ioat->hw_head = 0; ioat->last_seen = 0; ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); ioat_write_chancmp(ioat, ioat->comp_update_bus_addr); ioat_write_chainaddr(ioat, ioat->ring[0]->hw_desc_bus_addr); error = 0; out: mtx_lock(IOAT_REFLK); ioat->quiescing = FALSE; mtx_unlock(IOAT_REFLK); if (error == 0) error = ioat_start_channel(ioat); return (error); } static int sysctl_handle_chansts(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; uint64_t status; int error; ioat = arg1; status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS; sbuf_new_for_sysctl(&sb, NULL, 256, req); switch (status) { case IOAT_CHANSTS_ACTIVE: sbuf_printf(&sb, "ACTIVE"); break; case IOAT_CHANSTS_IDLE: sbuf_printf(&sb, "IDLE"); break; case IOAT_CHANSTS_SUSPENDED: sbuf_printf(&sb, "SUSPENDED"); break; case IOAT_CHANSTS_HALTED: sbuf_printf(&sb, "HALTED"); break; case IOAT_CHANSTS_ARMED: sbuf_printf(&sb, "ARMED"); break; default: sbuf_printf(&sb, "UNKNOWN"); break; } error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_dpi(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; #define PRECISION "1" const uintmax_t factor = 10; uintmax_t rate; int error; ioat = arg1; sbuf_new_for_sysctl(&sb, NULL, 16, req); if (ioat->stats.interrupts == 0) { sbuf_printf(&sb, "NaN"); goto out; } rate = ioat->stats.descriptors_processed * factor / ioat->stats.interrupts; sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor, rate % factor); #undef PRECISION out: error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_error(SYSCTL_HANDLER_ARGS) { struct ioat_descriptor *desc; struct ioat_softc *ioat; int error, arg; ioat = arg1; arg = 0; error = SYSCTL_OUT(req, &arg, sizeof(arg)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &arg, sizeof(arg)); if (error != 0) return (error); if (arg != 0) { ioat_acquire(&ioat->dmaengine); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 1, 0xffff000000000000ull, 0xffff000000000000ull, NULL, NULL, 0); if (desc == NULL) error = ENOMEM; else ioat_submit_single(ioat); ioat_release(&ioat->dmaengine); } return (error); } static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; int error, arg; ioat = arg1; arg = 0; error = SYSCTL_OUT(req, &arg, sizeof(arg)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &arg, sizeof(arg)); if (error != 0) return (error); if (arg != 0) error = ioat_reset_hw(ioat); return (error); } static void dump_descriptor(void *hw_desc) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 8; j++) printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]); printf("\n"); } } static void ioat_setup_sysctl(device_t device) { struct sysctl_oid_list *par, *statpar, *state, *hammer; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmp; struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ctx = device_get_sysctl_ctx(device); tree = device_get_sysctl_tree(device); par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD, &ioat->version, 0, "HW version (0xMM form)"); SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD, &ioat->max_xfer_size, 0, "HW maximum transfer size"); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD, &ioat->intrdelay_supported, 0, "Is INTRDELAY supported"); SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD, &ioat->intrdelay_max, 0, "Maximum configurable INTRDELAY on this channel (microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL, "IOAT channel internal state"); state = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD, &ioat->ring_size_order, 0, "SW descriptor ring size order"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head, 0, "SW descriptor head pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail, 0, "SW descriptor tail pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "hw_head", CTLFLAG_RD, &ioat->hw_head, 0, "HW DMACOUNT"); SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD, ioat->comp_update, "HW addr of last completion"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_resize_pending", CTLFLAG_RD, &ioat->is_resize_pending, 0, "resize pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_completion_pending", CTLFLAG_RD, &ioat->is_completion_pending, 0, "completion pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_reset_pending", CTLFLAG_RD, &ioat->is_reset_pending, 0, "reset pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_channel_running", CTLFLAG_RD, &ioat->is_channel_running, 0, "channel running"); SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts", CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A", "String of the channel status"); SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD, &ioat->cached_intrdelay, 0, "Current INTRDELAY on this channel (cached, microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL, "Big hammers (mostly for testing)"); hammer = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset", CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I", "Set to non-zero to reset the hardware"); SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_error", CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_error, "I", "Set to non-zero to inject a recoverable hardware error"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD, NULL, "IOAT channel statistics"); statpar = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW, &ioat->stats.interrupts, "Number of interrupts processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW, &ioat->stats.descriptors_processed, "Number of descriptors processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW, &ioat->stats.descriptors_submitted, "Number of descriptors submitted to this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW, &ioat->stats.descriptors_error, "Number of descriptors failed by channel errors"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW, &ioat->stats.channel_halts, 0, "Number of times the channel has halted"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW, &ioat->stats.last_halt_chanerr, 0, "The raw CHANERR when the channel was last halted"); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt", CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_dpi, "A", "Descriptors per interrupt"); } static inline struct ioat_softc * ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind) { uint32_t old; KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus")); old = atomic_fetchadd_32(&ioat->refcnt, 1); KASSERT(old < UINT32_MAX, ("refcnt overflow")); #ifdef INVARIANTS old = atomic_fetchadd_32(&ioat->refkinds[kind], 1); KASSERT(old < UINT32_MAX, ("refcnt kind overflow")); #endif return (ioat); } static inline void ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind) { _ioat_putn(ioat, n, kind, FALSE); } static inline void ioat_putn_locked(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind) { _ioat_putn(ioat, n, kind, TRUE); } static inline void _ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind, boolean_t locked) { uint32_t old; KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus")); if (n == 0) return; #ifdef INVARIANTS old = atomic_fetchadd_32(&ioat->refkinds[kind], -n); KASSERT(old >= n, ("refcnt kind underflow")); #endif /* Skip acquiring the lock if resulting refcnt > 0. */ for (;;) { old = ioat->refcnt; if (old <= n) break; if (atomic_cmpset_32(&ioat->refcnt, old, old - n)) return; } if (locked) mtx_assert(IOAT_REFLK, MA_OWNED); else mtx_lock(IOAT_REFLK); old = atomic_fetchadd_32(&ioat->refcnt, -n); KASSERT(old >= n, ("refcnt error")); if (old == n) wakeup(IOAT_REFLK); if (!locked) mtx_unlock(IOAT_REFLK); } static inline void ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind) { ioat_putn(ioat, 1, kind); } static void ioat_drain_locked(struct ioat_softc *ioat) { mtx_assert(IOAT_REFLK, MA_OWNED); while (ioat->refcnt > 0) msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0); } Index: projects/sendfile/sys/dev/ioat/ioat.h =================================================================== --- projects/sendfile/sys/dev/ioat/ioat.h (revision 293405) +++ projects/sendfile/sys/dev/ioat/ioat.h (revision 293406) @@ -1,142 +1,155 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ __FBSDID("$FreeBSD$"); #ifndef __IOAT_H__ #define __IOAT_H__ #include #include /* * This file defines the public interface to the IOAT driver. */ /* * Enables an interrupt for this operation. Typically, you would only enable * this on the last operation in a group */ #define DMA_INT_EN 0x1 /* * Like M_NOWAIT. Operations will return NULL if they cannot allocate a * descriptor without blocking. */ #define DMA_NO_WAIT 0x2 #define DMA_ALL_FLAGS (DMA_INT_EN | DMA_NO_WAIT) /* * Hardware revision number. Different hardware revisions support different * features. For example, 3.2 cannot read from MMIO space, while 3.3 can. */ #define IOAT_VER_3_0 0x30 #define IOAT_VER_3_2 0x32 #define IOAT_VER_3_3 0x33 typedef void *bus_dmaengine_t; struct bus_dmadesc; typedef void (*bus_dmaengine_callback_t)(void *arg, int error); /* * Called first to acquire a reference to the DMA channel */ bus_dmaengine_t ioat_get_dmaengine(uint32_t channel_index); /* Release the DMA channel */ void ioat_put_dmaengine(bus_dmaengine_t dmaengine); /* Check the DMA engine's HW version */ int ioat_get_hwversion(bus_dmaengine_t dmaengine); size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine); /* * Set interrupt coalescing on a DMA channel. * * The argument is in microseconds. A zero value disables coalescing. Any * other value delays interrupt generation for N microseconds to provide * opportunity to coalesce multiple operations into a single interrupt. * * Returns an error status, or zero on success. * * - ERANGE if the given value exceeds the delay supported by the hardware. * (All current hardware supports a maximum of 0x3fff microseconds delay.) * - ENODEV if the hardware does not support interrupt coalescing. */ int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay); /* * Return the maximum supported coalescing period, for use in * ioat_set_interrupt_coalesce(). If the hardware does not support coalescing, * returns zero. */ uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine); /* * Acquire must be called before issuing an operation to perform. Release is - * called after. Multiple operations can be issued within the context of one + * called after. Multiple operations can be issued within the context of one * acquire and release */ void ioat_acquire(bus_dmaengine_t dmaengine); void ioat_release(bus_dmaengine_t dmaengine); + +/* + * Acquire_reserve can be called to ensure there is room for N descriptors. If + * it succeeds, the next N valid operations will successfully enqueue. + * + * It may fail with: + * - ENXIO if the channel is in an errored state, or the driver is being + * unloaded + * - EAGAIN if mflags included M_NOWAIT + * + * On failure, the caller does not hold the dmaengine. + */ +int ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags); /* * Issue a blockfill operation. The 64-bit pattern 'fillpattern' is written to * 'len' physically contiguous bytes at 'dst'. * * Only supported on devices with the BFILL capability. */ struct bus_dmadesc *ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); /* Issues the copy data operation */ struct bus_dmadesc *ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); /* * Issue a copy data operation, with constraints: * - src1, src2, dst1, dst2 are all page-aligned addresses * - The quantity to copy is exactly 2 pages; * - src1 -> dst1, src2 -> dst2 * * Why use this instead of normal _copy()? You can copy two non-contiguous * pages (src, dst, or both) with one descriptor. */ struct bus_dmadesc *ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); /* * Issues a null operation. This issues the operation to the hardware, but the * hardware doesn't do anything with it. */ struct bus_dmadesc *ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags); #endif /* __IOAT_H__ */ Index: projects/sendfile/sys/kern/uipc_usrreq.c =================================================================== --- projects/sendfile/sys/kern/uipc_usrreq.c (revision 293405) +++ projects/sendfile/sys/kern/uipc_usrreq.c (revision 293406) @@ -1,2554 +1,2554 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. * Copyright (c) 2004-2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 */ /* * UNIX Domain (Local) Sockets * * This is an implementation of UNIX (local) domain sockets. Each socket has * an associated struct unpcb (UNIX protocol control block). Stream sockets * may be connected to 0 or 1 other socket. Datagram sockets may be * connected to 0, 1, or many other sockets. Sockets may be created and * connected in pairs (socketpair(2)), or bound/connected to using the file * system name space. For most purposes, only the receive socket buffer is * used, as sending on one socket delivers directly to the receive socket * buffer of a second socket. * * The implementation is substantially complicated by the fact that * "ancillary data", such as file descriptors or credentials, may be passed * across UNIX domain sockets. The potential for passing UNIX domain sockets * over other UNIX domain sockets requires the implementation of a simple * garbage collector to find and tear down cycles of disconnected sockets. * * TODO: * RDM * rethink name space problems * need a proper out-of-band */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include /* XXX must be before */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include MALLOC_DECLARE(M_FILECAPS); /* * Locking key: * (l) Locked using list lock * (g) Locked using linkage lock */ static uma_zone_t unp_zone; static unp_gen_t unp_gencnt; /* (l) */ static u_int unp_count; /* (l) Count of local sockets. */ static ino_t unp_ino; /* Prototype for fake inode numbers. */ static int unp_rights; /* (g) File descriptors in flight. */ static struct unp_head unp_shead; /* (l) List of stream sockets. */ static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ static struct unp_head unp_sphead; /* (l) List of seqpacket sockets. */ struct unp_defer { SLIST_ENTRY(unp_defer) ud_link; struct file *ud_fp; }; static SLIST_HEAD(, unp_defer) unp_defers; static int unp_defers_count; static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; /* * Garbage collection of cyclic file descriptor/socket references occurs * asynchronously in a taskqueue context in order to avoid recursion and * reentrance in the UNIX domain socket, file descriptor, and socket layer * code. See unp_gc() for a full description. */ static struct timeout_task unp_gc_task; /* * The close of unix domain sockets attached as SCM_RIGHTS is * postponed to the taskqueue, to avoid arbitrary recursion depth. * The attached sockets might have another sockets attached. */ static struct task unp_defer_task; /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering for * stream sockets, although the total for sender and receiver is actually * only PIPSIZ. * * Datagram sockets really use the sendspace as the maximum datagram size, * and don't really want to reserve the sendspace. Their recvspace should be * large enough for at least one max-size datagram plus address. */ #ifndef PIPSIZ #define PIPSIZ 8192 #endif static u_long unpst_sendspace = PIPSIZ; static u_long unpst_recvspace = PIPSIZ; static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ static u_long unpdg_recvspace = 4*1024; static u_long unpsp_sendspace = PIPSIZ; /* really max datagram size */ static u_long unpsp_recvspace = PIPSIZ; static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0, "SOCK_SEQPACKET"); SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, &unpst_sendspace, 0, "Default stream send space."); SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, &unpst_recvspace, 0, "Default stream receive space."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, &unpdg_sendspace, 0, "Default datagram send space."); SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, &unpdg_recvspace, 0, "Default datagram receive space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW, &unpsp_sendspace, 0, "Default seqpacket send space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW, &unpsp_recvspace, 0, "Default seqpacket receive space."); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "File descriptors in flight."); SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD, &unp_defers_count, 0, "File descriptors deferred to taskqueue for close."); /* * Locking and synchronization: * * Three types of locks exit in the local domain socket implementation: a * global list mutex, a global linkage rwlock, and per-unpcb mutexes. Of the * global locks, the list lock protects the socket count, global generation * number, and stream/datagram global lists. The linkage lock protects the * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be * held exclusively over the acquisition of multiple unpcb locks to prevent * deadlock. * * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, * allocated in pru_attach() and freed in pru_detach(). The validity of that * pointer is an invariant, so no lock is required to dereference the so_pcb * pointer if a valid socket reference is held by the caller. In practice, * this is always true during operations performed on a socket. Each unpcb * has a back-pointer to its socket, unp_socket, which will be stable under * the same circumstances. * * This pointer may only be safely dereferenced as long as a valid reference * to the unpcb is held. Typically, this reference will be from the socket, * or from another unpcb when the referring unpcb's lock is held (in order * that the reference not be invalidated during use). For example, to follow * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, * as unp_socket remains valid as long as the reference to unp_conn is valid. * * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual * atomic reads without the lock may be performed "lockless", but more * complex reads and read-modify-writes require the mutex to be held. No * lock order is defined between unpcb locks -- multiple unpcb locks may be * acquired at the same time only when holding the linkage rwlock * exclusively, which prevents deadlocks. * * Blocking with UNIX domain sockets is a tricky issue: unlike most network * protocols, bind() is a non-atomic operation, and connect() requires * potential sleeping in the protocol, due to potentially waiting on local or * distributed file systems. We try to separate "lookup" operations, which * may sleep, and the IPC operations themselves, which typically can occur * with relative atomicity as locks can be held over the entire operation. * * Another tricky issue is simultaneous multi-threaded or multi-process * access to a single UNIX domain socket. These are handled by the flags * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or * binding, both of which involve dropping UNIX domain socket locks in order * to perform namei() and other file system operations. */ static struct rwlock unp_link_rwlock; static struct mtx unp_list_lock; static struct mtx unp_defers_lock; #define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \ "unp_link_rwlock") #define UNP_LINK_LOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_LOCKED) #define UNP_LINK_UNLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_UNLOCKED) #define UNP_LINK_RLOCK() rw_rlock(&unp_link_rwlock) #define UNP_LINK_RUNLOCK() rw_runlock(&unp_link_rwlock) #define UNP_LINK_WLOCK() rw_wlock(&unp_link_rwlock) #define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock) #define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ RA_WLOCKED) #define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \ "unp_list_lock", NULL, MTX_DEF) #define UNP_LIST_LOCK() mtx_lock(&unp_list_lock) #define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock) #define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \ "unp_defer", NULL, MTX_DEF) #define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock) #define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock) #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ MTX_DUPOK|MTX_DEF|MTX_RECURSE) #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) static int uipc_connect2(struct socket *, struct socket *); static int uipc_ctloutput(struct socket *, struct sockopt *); static int unp_connect(struct socket *, struct sockaddr *, struct thread *); static int unp_connectat(int, struct socket *, struct sockaddr *, struct thread *); static int unp_connect2(struct socket *so, struct socket *so2, int); static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); static void unp_dispose(struct mbuf *); static void unp_dispose_so(struct socket *so); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int)); static void unp_discard(struct file *); static void unp_freerights(struct filedescent **, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); static int unp_externalize(struct mbuf *, struct mbuf **, int); static int unp_externalize_fp(struct file *); static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); static void unp_process_defers(void * __unused, int); /* * Definitions of protocols supported in the LOCAL domain. */ static struct domain localdomain; static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream; static struct pr_usrreqs uipc_usrreqs_seqpacket; static struct protosw localsw[] = { { .pr_type = SOCK_STREAM, .pr_domain = &localdomain, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_stream }, { .pr_type = SOCK_DGRAM, .pr_domain = &localdomain, .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_dgram }, { .pr_type = SOCK_SEQPACKET, .pr_domain = &localdomain, /* * XXXRW: For now, PR_ADDR because soreceive will bump into them * due to our use of sbappendaddr. A new sbappend variants is needed * that supports both atomic record writes and control data. */ .pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD| PR_RIGHTS, .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_seqpacket, }, }; static struct domain localdomain = { .dom_family = AF_LOCAL, .dom_name = "local", .dom_init = unp_init, .dom_externalize = unp_externalize, .dom_dispose = unp_dispose_so, .dom_protosw = localsw, .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])] }; DOMAIN_SET(local); static void uipc_abort(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_drop(unp2, ECONNABORTED); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); } static int uipc_accept(struct socket *so, struct sockaddr **nam) { struct unpcb *unp, *unp2; const struct sockaddr *sa; /* * Pass back name of connected socket, if it was bound and we are * still connected (our peer may have closed already!). */ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_LINK_RLOCK(); unp2 = unp->unp_conn; if (unp2 != NULL && unp2->unp_addr != NULL) { UNP_PCB_LOCK(unp2); sa = (struct sockaddr *) unp2->unp_addr; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp2); } else { sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); } UNP_LINK_RUNLOCK(); return (0); } static int uipc_attach(struct socket *so, int proto, struct thread *td) { u_long sendspace, recvspace; struct unpcb *unp; int error; KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { case SOCK_STREAM: sendspace = unpst_sendspace; recvspace = unpst_recvspace; break; case SOCK_DGRAM: sendspace = unpdg_sendspace; recvspace = unpdg_recvspace; break; case SOCK_SEQPACKET: sendspace = unpsp_sendspace; recvspace = unpsp_recvspace; break; default: panic("uipc_attach"); } error = soreserve(so, sendspace, recvspace); if (error) return (error); } unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); if (unp == NULL) return (ENOBUFS); LIST_INIT(&unp->unp_refs); UNP_PCB_LOCK_INIT(unp); unp->unp_socket = so; so->so_pcb = unp; unp->unp_refcount = 1; UNP_LIST_LOCK(); unp->unp_gencnt = ++unp_gencnt; unp_count++; switch (so->so_type) { case SOCK_STREAM: LIST_INSERT_HEAD(&unp_shead, unp, unp_link); break; case SOCK_DGRAM: LIST_INSERT_HEAD(&unp_dhead, unp, unp_link); break; case SOCK_SEQPACKET: LIST_INSERT_HEAD(&unp_sphead, unp, unp_link); break; default: panic("uipc_attach"); } UNP_LIST_UNLOCK(); return (0); } static int uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vattr vattr; int error, namelen; struct nameidata nd; struct unpcb *unp; struct vnode *vp; struct mount *mp; cap_rights_t rights; char *buf; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); if (soun->sun_len > sizeof(struct sockaddr_un)) return (EINVAL); namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); if (namelen <= 0) return (EINVAL); /* * We don't allow simultaneous bind() calls on a single UNIX domain * socket, so flag in-progress operations, and return an error if an * operation is already in progress. * * Historically, we have not allowed a socket to be rebound, so this * also returns an error. Not allowing re-binding simplifies the * implementation and avoids a great many possible failure modes. */ UNP_PCB_LOCK(unp); if (unp->unp_vnode != NULL) { UNP_PCB_UNLOCK(unp); return (EINVAL); } if (unp->unp_flags & UNP_BINDING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } unp->unp_flags |= UNP_BINDING; UNP_PCB_UNLOCK(unp); buf = malloc(namelen + 1, M_TEMP, M_WAITOK); bcopy(soun->sun_path, buf, namelen); buf[namelen] = 0; restart: NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE, UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) goto error; vp = nd.ni_vp; if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp != NULL) { vrele(vp); error = EADDRINUSE; goto error; } error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error) goto error; goto restart; } VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (error == 0) error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (error) { vn_finished_write(mp); goto error; } vp = nd.ni_vp; ASSERT_VOP_ELOCKED(vp, "uipc_bind"); soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); VOP_UNP_BIND(vp, unp->unp_socket); unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); VOP_UNLOCK(vp, 0); vn_finished_write(mp); free(buf, M_TEMP); return (0); error: UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); free(buf, M_TEMP); return (error); } static int uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (uipc_bindat(AT_FDCWD, so, nam, td)); } static int uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); UNP_LINK_WLOCK(); error = unp_connect(so, nam, td); UNP_LINK_WUNLOCK(); return (error); } static int uipc_connectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; KASSERT(td == curthread, ("uipc_connectat: td != curthread")); UNP_LINK_WLOCK(); error = unp_connectat(fd, so, nam, td); UNP_LINK_WUNLOCK(); return (error); } static void uipc_close(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); } static int uipc_connect2(struct socket *so1, struct socket *so2) { struct unpcb *unp, *unp2; int error; UNP_LINK_WLOCK(); unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); UNP_PCB_LOCK(unp); unp2 = so2->so_pcb; KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); UNP_PCB_LOCK(unp2); error = unp_connect2(so1, so2, PRU_CONNECT2); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); return (error); } static void uipc_detach(struct socket *so) { struct unpcb *unp, *unp2; struct sockaddr_un *saved_unp_addr; struct vnode *vp; int freeunp, local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); UNP_LINK_WLOCK(); UNP_LIST_LOCK(); UNP_PCB_LOCK(unp); LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; UNP_LIST_UNLOCK(); /* * XXXRW: Should assert vp->v_socket == so. */ if ((vp = unp->unp_vnode) != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } /* * We hold the linkage lock exclusively, so it's OK to acquire * multiple pcb locks at a time. */ while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); UNP_PCB_LOCK(ref); unp_drop(ref, ECONNRESET); UNP_PCB_UNLOCK(ref); } local_unp_rights = unp_rights; UNP_LINK_WUNLOCK(); unp->unp_socket->so_pcb = NULL; saved_unp_addr = unp->unp_addr; unp->unp_addr = NULL; unp->unp_refcount--; freeunp = (unp->unp_refcount == 0); if (saved_unp_addr != NULL) free(saved_unp_addr, M_SONAME); if (freeunp) { UNP_PCB_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } else UNP_PCB_UNLOCK(unp); if (vp) vrele(vp); if (local_unp_rights) taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); } static int uipc_disconnect(struct socket *so) { struct unpcb *unp, *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); return (0); } static int uipc_listen(struct socket *so, int backlog, struct thread *td) { struct unpcb *unp; int error; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); UNP_PCB_LOCK(unp); if (unp->unp_vnode == NULL) { /* Already connected or not bound to an address. */ error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ; UNP_PCB_UNLOCK(unp); return (error); } SOCK_LOCK(so); error = solisten_proto_check(so); if (error == 0) { cru2x(td->td_ucred, &unp->unp_peercred); unp->unp_flags |= UNP_HAVEPCCACHED; solisten_proto(so, backlog); } SOCK_UNLOCK(so); UNP_PCB_UNLOCK(unp); return (error); } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp, *unp2; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_LINK_RLOCK(); /* * XXX: It seems that this test always fails even when connection is * established. So, this else clause is added as workaround to * return PF_LOCAL sockaddr. */ unp2 = unp->unp_conn; if (unp2 != NULL) { UNP_PCB_LOCK(unp2); if (unp2->unp_addr != NULL) sa = (struct sockaddr *) unp2->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp2); } else { sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); } UNP_LINK_RUNLOCK(); return (0); } static int uipc_rcvd(struct socket *so, int flags) { struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); /* * Adjust backpressure on sender and wakeup any waiting to write. * * The unp lock is acquired to maintain the validity of the unp_conn * pointer; no lock on unp2 is required as unp2->unp_socket will be * static as long as we don't permit unp2 to disconnect from unp, * which is prevented by the lock on unp. We cache values from * so_rcv to avoid holding the so_rcv lock over the entire * transaction on the remote so_snd. */ SOCKBUF_LOCK(&so->so_rcv); mbcnt = so->so_rcv.sb_mbcnt; sbcc = sbavail(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); /* * There is a benign race condition at this point. If we're planning to * clear SB_STOP, but uipc_send is called on the connected socket at * this instant, it might add data to the sockbuf and set SB_STOP. Then * we would erroneously clear SB_STOP below, even though the sockbuf is * full. The race is benign because the only ill effect is to allow the * sockbuf to exceed its size limit, and the size limits are not * strictly guaranteed anyway. */ UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 == NULL) { UNP_PCB_UNLOCK(unp); return (0); } so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_snd); if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax) so2->so_snd.sb_flags &= ~SB_STOP; sowwakeup_locked(so2); UNP_PCB_UNLOCK(unp); return (0); } static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; int error = 0; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM || so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; } if (control != NULL && (error = unp_internalize(&control, td))) goto release; if ((nam != NULL) || (flags & PRUS_EOF)) UNP_LINK_WLOCK(); else UNP_LINK_RLOCK(); switch (so->so_type) { case SOCK_DGRAM: { const struct sockaddr *from; unp2 = unp->unp_conn; if (nam != NULL) { UNP_LINK_WLOCK_ASSERT(); if (unp2 != NULL) { error = EISCONN; break; } error = unp_connect(so, nam, td); if (error) break; unp2 = unp->unp_conn; } /* * Because connect() and send() are non-atomic in a sendto() * with a target address, it's possible that the socket will * have disconnected before the send() can run. In that case * return the slightly counter-intuitive but otherwise * correct error that the socket is not connected. */ if (unp2 == NULL) { error = ENOTCONN; break; } /* Lockless read. */ if (unp2->unp_flags & UNP_WANTCRED) control = unp_addsockcred(td, control); UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { sorwakeup_locked(so2); m = NULL; control = NULL; } else { SOCKBUF_UNLOCK(&so2->so_rcv); error = ENOBUFS; } if (nam != NULL) { UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); break; } case SOCK_SEQPACKET: case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { UNP_LINK_WLOCK_ASSERT(); error = unp_connect(so, nam, td); if (error) break; /* XXX */ } else { error = ENOTCONN; break; } } /* Lockless read. */ if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; break; } /* * Because connect() and send() are non-atomic in a sendto() * with a target address, it's possible that the socket will * have disconnected before the send() can run. In that case * return the slightly counter-intuitive but otherwise * correct error that the socket is not connected. * * Locking here must be done carefully: the linkage lock * prevents interconnections between unpcbs from changing, so * we can traverse from unp to unp2 without acquiring unp's * lock. Socket buffer locks follow unpcb locks, so we can * acquire both remote and lock socket buffer locks. */ unp2 = unp->unp_conn; if (unp2 == NULL) { error = ENOTCONN; break; } so2 = unp2->unp_socket; UNP_PCB_LOCK(unp2); SOCKBUF_LOCK(&so2->so_rcv); if (unp2->unp_flags & UNP_WANTCRED) { /* * Credentials are passed only once on SOCK_STREAM * and SOCK_SEQPACKET. */ unp2->unp_flags &= ~UNP_WANTCRED; control = unp_addsockcred(td, control); } /* * Send to paired receive port, and then reduce send buffer * hiwater marks to maintain backpressure. Wake up readers. */ switch (so->so_type) { case SOCK_STREAM: if (control != NULL) { if (sbappendcontrol_locked(&so2->so_rcv, m, control)) control = NULL; } else - sbappend_locked(&so2->so_rcv, m); + sbappendstream_locked(&so2->so_rcv, m, flags); break; case SOCK_SEQPACKET: { const struct sockaddr *from; from = &sun_noname; /* * Don't check for space available in so2->so_rcv. * Unix domain sockets only check for space in the * sending sockbuf, and that check is performed one * level up the stack. */ if (sbappendaddr_nospacecheck_locked(&so2->so_rcv, from, m, control)) control = NULL; break; } } mbcnt = so2->so_rcv.sb_mbcnt; sbcc = sbavail(&so2->so_rcv); if (sbcc) sorwakeup_locked(so2); else SOCKBUF_UNLOCK(&so2->so_rcv); /* * The PCB lock on unp2 protects the SB_STOP flag. Without it, * it would be possible for uipc_rcvd to be called at this * point, drain the receiving sockbuf, clear SB_STOP, and then * we would set SB_STOP below. That could lead to an empty * sockbuf having SB_STOP set */ SOCKBUF_LOCK(&so->so_snd); if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax) so->so_snd.sb_flags |= SB_STOP; SOCKBUF_UNLOCK(&so->so_snd); UNP_PCB_UNLOCK(unp2); m = NULL; break; } /* * PRUS_EOF is equivalent to pru_send followed by pru_shutdown. */ if (flags & PRUS_EOF) { UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); } if ((nam != NULL) || (flags & PRUS_EOF)) UNP_LINK_WUNLOCK(); else UNP_LINK_RUNLOCK(); if (control != NULL && error != 0) unp_dispose(control); release: if (control != NULL) m_freem(control); if (m != NULL) m_freem(m); return (error); } static int uipc_ready(struct socket *so, struct mbuf *m, int count) { struct unpcb *unp, *unp2; struct socket *so2; int error; unp = sotounpcb(so); UNP_LINK_RLOCK(); unp2 = unp->unp_conn; UNP_PCB_LOCK(unp2); so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); if ((error = sbready(&so2->so_rcv, m, count)) == 0) sorwakeup_locked(so2); else SOCKBUF_UNLOCK(&so2->so_rcv); UNP_PCB_UNLOCK(unp2); UNP_LINK_RUNLOCK(); return (error); } static int uipc_sense(struct socket *so, struct stat *sb) { struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); sb->st_blksize = so->so_snd.sb_hiwat; UNP_PCB_LOCK(unp); sb->st_dev = NODEV; if (unp->unp_ino == 0) unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; sb->st_ino = unp->unp_ino; UNP_PCB_UNLOCK(unp); return (0); } static int uipc_shutdown(struct socket *so) { struct unpcb *unp; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); UNP_LINK_WUNLOCK(); return (0); } static int uipc_sockaddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp; const struct sockaddr *sa; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) sa = (struct sockaddr *) unp->unp_addr; else sa = &sun_noname; bcopy(sa, *nam, sa->sa_len); UNP_PCB_UNLOCK(unp); return (0); } static struct pr_usrreqs uipc_usrreqs_dgram = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, .pru_attach = uipc_attach, .pru_bind = uipc_bind, .pru_bindat = uipc_bindat, .pru_connect = uipc_connect, .pru_connectat = uipc_connectat, .pru_connect2 = uipc_connect2, .pru_detach = uipc_detach, .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_sense = uipc_sense, .pru_shutdown = uipc_shutdown, .pru_sockaddr = uipc_sockaddr, .pru_soreceive = soreceive_dgram, .pru_close = uipc_close, }; static struct pr_usrreqs uipc_usrreqs_seqpacket = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, .pru_attach = uipc_attach, .pru_bind = uipc_bind, .pru_bindat = uipc_bindat, .pru_connect = uipc_connect, .pru_connectat = uipc_connectat, .pru_connect2 = uipc_connect2, .pru_detach = uipc_detach, .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_sense = uipc_sense, .pru_shutdown = uipc_shutdown, .pru_sockaddr = uipc_sockaddr, .pru_soreceive = soreceive_generic, /* XXX: or...? */ .pru_close = uipc_close, }; static struct pr_usrreqs uipc_usrreqs_stream = { .pru_abort = uipc_abort, .pru_accept = uipc_accept, .pru_attach = uipc_attach, .pru_bind = uipc_bind, .pru_bindat = uipc_bindat, .pru_connect = uipc_connect, .pru_connectat = uipc_connectat, .pru_connect2 = uipc_connect2, .pru_detach = uipc_detach, .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_ready = uipc_ready, .pru_sense = uipc_sense, .pru_shutdown = uipc_shutdown, .pru_sockaddr = uipc_sockaddr, .pru_soreceive = soreceive_generic, .pru_close = uipc_close, }; static int uipc_ctloutput(struct socket *so, struct sockopt *sopt) { struct unpcb *unp; struct xucred xu; int error, optval; if (sopt->sopt_level != 0) return (EINVAL); unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case LOCAL_PEERCRED: UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_HAVEPC) xu = unp->unp_peercred; else { if (so->so_type == SOCK_STREAM) error = ENOTCONN; else error = EINVAL; } UNP_PCB_UNLOCK(unp); if (error == 0) error = sooptcopyout(sopt, &xu, sizeof(xu)); break; case LOCAL_CREDS: /* Unlocked read. */ optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case LOCAL_CONNWAIT: /* Unlocked read. */ optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EOPNOTSUPP; break; } break; case SOPT_SET: switch (sopt->sopt_name) { case LOCAL_CREDS: case LOCAL_CONNWAIT: error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; #define OPTSET(bit) do { \ UNP_PCB_LOCK(unp); \ if (optval) \ unp->unp_flags |= bit; \ else \ unp->unp_flags &= ~bit; \ UNP_PCB_UNLOCK(unp); \ } while (0) switch (sopt->sopt_name) { case LOCAL_CREDS: OPTSET(UNP_WANTCRED); break; case LOCAL_CONNWAIT: OPTSET(UNP_CONNWAIT); break; default: break; } break; #undef OPTSET default: error = ENOPROTOOPT; break; } break; default: error = EOPNOTSUPP; break; } return (error); } static int unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (unp_connectat(AT_FDCWD, so, nam, td)); } static int unp_connectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vnode *vp; struct socket *so2, *so3; struct unpcb *unp, *unp2, *unp3; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; cap_rights_t rights; int error, len; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); UNP_LINK_WLOCK_ASSERT(); unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); if (nam->sa_len > sizeof(struct sockaddr_un)) return (EINVAL); len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return (EINVAL); bcopy(soun->sun_path, buf, len); buf[len] = 0; UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_CONNECTING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } UNP_LINK_WUNLOCK(); unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td); error = namei(&nd); if (error) vp = NULL; else vp = nd.ni_vp; ASSERT_VOP_LOCKED(vp, "unp_connect"); NDFREE(&nd, NDF_ONLY_PNBUF); if (error) goto bad; if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); if (error) goto bad; #endif error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); if (error) goto bad; unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); /* * Lock linkage lock for two reasons: make sure v_socket is stable, * and to protect simultaneous locking of multiple pcbs. */ UNP_LINK_WLOCK(); VOP_UNP_CONNECT(vp, &so2); if (so2 == NULL) { error = ECONNREFUSED; goto bad2; } if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad2; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if (so2->so_options & SO_ACCEPTCONN) { CURVNET_SET(so2->so_vnet); so3 = sonewconn(so2, 0); CURVNET_RESTORE(); } else so3 = NULL; if (so3 == NULL) { error = ECONNREFUSED; goto bad2; } unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); UNP_PCB_LOCK(unp); UNP_PCB_LOCK(unp2); UNP_PCB_LOCK(unp3); if (unp2->unp_addr != NULL) { bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); unp3->unp_addr = (struct sockaddr_un *) sa; sa = NULL; } /* * The connector's (client's) credentials are copied from its * process structure at the time of connect() (which is now). */ cru2x(td->td_ucred, &unp3->unp_peercred); unp3->unp_flags |= UNP_HAVEPC; /* * The receiver's (server's) credentials are copied from the * unp_peercred member of socket on which the former called * listen(); uipc_listen() cached that process's credentials * at that time so we can use them now. */ KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, ("unp_connect: listener without cached peercred")); memcpy(&unp->unp_peercred, &unp2->unp_peercred, sizeof(unp->unp_peercred)); unp->unp_flags |= UNP_HAVEPC; if (unp2->unp_flags & UNP_WANTCRED) unp3->unp_flags |= UNP_WANTCRED; UNP_PCB_UNLOCK(unp3); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); #ifdef MAC mac_socketpeer_set_from_socket(so, so3); mac_socketpeer_set_from_socket(so3, so); #endif so2 = so3; } unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect: unp == NULL")); unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); UNP_PCB_LOCK(unp); UNP_PCB_LOCK(unp2); error = unp_connect2(so, so2, PRU_CONNECT); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); bad2: UNP_LINK_WUNLOCK(); bad: if (vp != NULL) vput(vp); free(sa, M_SONAME); UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); return (error); } static int unp_connect2(struct socket *so, struct socket *so2, int req) { struct unpcb *unp; struct unpcb *unp2; unp = sotounpcb(so); KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); if (so2->so_type != so->so_type) return (EPROTOTYPE); unp->unp_conn = unp2; switch (so->so_type) { case SOCK_DGRAM: LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); soisconnected(so); break; case SOCK_STREAM: case SOCK_SEQPACKET: unp2->unp_conn = unp; if (req == PRU_CONNECT && ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) soisconnecting(so); else soisconnected(so); soisconnected(so2); break; default: panic("unp_connect2"); } return (0); } static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2) { struct socket *so; KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); unp->unp_conn = NULL; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: LIST_REMOVE(unp, unp_reflink); so = unp->unp_socket; SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); break; case SOCK_STREAM: case SOCK_SEQPACKET: soisdisconnected(unp->unp_socket); unp2->unp_conn = NULL; soisdisconnected(unp2->unp_socket); break; } } /* * unp_pcblist() walks the global list of struct unpcb's to generate a * pointer list, bumping the refcount on each unpcb. It then copies them out * sequentially, validating the generation number on each to see if it has * been detached. All of this is necessary because copyout() may sleep on * disk I/O. */ static int unp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, n; int freeunp; struct unpcb *unp, **unp_list; unp_gen_t gencnt; struct xunpgen *xug; struct unp_head *head; struct xunpcb *xu; switch ((intptr_t)arg1) { case SOCK_STREAM: head = &unp_shead; break; case SOCK_DGRAM: head = &unp_dhead; break; case SOCK_SEQPACKET: head = &unp_sphead; break; default: panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1); } /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = unp_count; req->oldidx = 2 * (sizeof *xug) + (n + n/8) * sizeof(struct xunpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); UNP_LIST_LOCK(); gencnt = unp_gencnt; n = unp_count; UNP_LIST_UNLOCK(); xug->xug_len = sizeof *xug; xug->xug_count = n; xug->xug_gen = gencnt; xug->xug_sogen = so_gencnt; error = SYSCTL_OUT(req, xug, sizeof *xug); if (error) { free(xug, M_TEMP); return (error); } unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); UNP_LIST_LOCK(); for (unp = LIST_FIRST(head), i = 0; unp && i < n; unp = LIST_NEXT(unp, unp_link)) { UNP_PCB_LOCK(unp); if (unp->unp_gencnt <= gencnt) { if (cr_cansee(req->td->td_ucred, unp->unp_socket->so_cred)) { UNP_PCB_UNLOCK(unp); continue; } unp_list[i++] = unp; unp->unp_refcount++; } UNP_PCB_UNLOCK(unp); } UNP_LIST_UNLOCK(); n = i; /* In case we lost some during malloc. */ error = 0; xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); for (i = 0; i < n; i++) { unp = unp_list[i]; UNP_PCB_LOCK(unp); unp->unp_refcount--; if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; xu->xu_unpp = unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. */ if (unp->unp_addr != NULL) bcopy(unp->unp_addr, &xu->xu_addr, unp->unp_addr->sun_len); if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) bcopy(unp->unp_conn->unp_addr, &xu->xu_caddr, unp->unp_conn->unp_addr->sun_len); bcopy(unp, &xu->xu_unp, sizeof *unp); sotoxsocket(unp->unp_socket, &xu->xu_socket); UNP_PCB_UNLOCK(unp); error = SYSCTL_OUT(req, xu, sizeof *xu); } else { freeunp = (unp->unp_refcount == 0); UNP_PCB_UNLOCK(unp); if (freeunp) { UNP_PCB_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } } } free(xu, M_TEMP); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ xug->xug_gen = unp_gencnt; xug->xug_sogen = so_gencnt; xug->xug_count = unp_count; error = SYSCTL_OUT(req, xug, sizeof *xug); } free(unp_list, M_TEMP); free(xug, M_TEMP); return (error); } SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", "List of active local datagram sockets"); SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", "List of active local seqpacket sockets"); static void unp_shutdown(struct unpcb *unp) { struct unpcb *unp2; struct socket *so; UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; if ((unp->unp_socket->so_type == SOCK_STREAM || (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) { so = unp2->unp_socket; if (so != NULL) socantrcvmore(so); } } static void unp_drop(struct unpcb *unp, int errno) { struct socket *so = unp->unp_socket; struct unpcb *unp2; UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); so->so_error = errno; unp2 = unp->unp_conn; if (unp2 == NULL) return; UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } static void unp_freerights(struct filedescent **fdep, int fdcount) { struct file *fp; int i; KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount)); for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; filecaps_free(&fdep[i]->fde_caps); unp_discard(fp); } free(fdep[0], M_FILECAPS); } static int unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags) { struct thread *td = curthread; /* XXX */ struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; struct filedesc *fdesc = td->td_proc->p_fd; struct filedescent **fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; u_int newlen; UNP_LINK_UNLOCK_ASSERT(); error = 0; if (controlp != NULL) /* controlp == NULL => free control messages */ *controlp = NULL; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { newfds = datalen / sizeof(*fdep); if (newfds == 0) goto next; fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { unp_freerights(fdep, newfds); goto next; } FILEDESC_XLOCK(fdesc); /* * Now change each pointer to an fd in the global * table to an integer that is the index to the local * fd table entry that we set up to point to the * global one we are transferring. */ newlen = newfds * sizeof(int); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { FILEDESC_XUNLOCK(fdesc); error = E2BIG; unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); if (fdallocn(td, 0, fdp, newfds) != 0) { FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; unp_freerights(fdep, newfds); m_freem(*controlp); *controlp = NULL; goto next; } for (i = 0; i < newfds; i++, fdp++) { _finstall(fdesc, fdep[i]->fde_file, *fdp, (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0, &fdep[i]->fde_caps); unp_externalize_fp(fdep[i]->fde_file); } FILEDESC_XUNLOCK(fdesc); free(fdep[0], M_FILECAPS); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) goto next; *controlp = sbcreatecontrol(NULL, datalen, cm->cmsg_type, cm->cmsg_level); if (*controlp == NULL) { error = ENOBUFS; goto next; } bcopy(data, CMSG_DATA(mtod(*controlp, struct cmsghdr *)), datalen); } controlp = &(*controlp)->m_next; next: if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m_freem(control); return (error); } static void unp_zone_change(void *tag) { uma_zone_set_max(unp_zone, maxsockets); } static void unp_init(void) { #ifdef VIMAGE if (!IS_DEFAULT_VNET(curvnet)) return; #endif unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); if (unp_zone == NULL) panic("unp_init"); uma_zone_set_max(unp_zone, maxsockets); uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, NULL, EVENTHANDLER_PRI_ANY); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); LIST_INIT(&unp_sphead); SLIST_INIT(&unp_defers); TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL); TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL); UNP_LINK_LOCK_INIT(); UNP_LIST_LOCK_INIT(); UNP_DEFERRED_LOCK_INIT(); } static int unp_internalize(struct mbuf **controlp, struct thread *td) { struct mbuf *control = *controlp; struct proc *p = td->td_proc; struct filedesc *fdesc = p->p_fd; struct bintime *bt; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; struct filedescent *fde, **fdep, *fdev; struct file *fp; struct timeval *tv; int i, *fdp; void *data; socklen_t clen = control->m_len, datalen; int error, oldfds; u_int newlen; UNP_LINK_UNLOCK_ASSERT(); error = 0; *controlp = NULL; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) { error = EINVAL; goto out; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; switch (cm->cmsg_type) { /* * Fill in credential information. */ case SCM_CREDS: *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), SCM_CREDS, SOL_SOCKET); if (*controlp == NULL) { error = ENOBUFS; goto out; } cmcred = (struct cmsgcred *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); cmcred->cmcred_pid = p->p_pid; cmcred->cmcred_uid = td->td_ucred->cr_ruid; cmcred->cmcred_gid = td->td_ucred->cr_rgid; cmcred->cmcred_euid = td->td_ucred->cr_uid; cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); for (i = 0; i < cmcred->cmcred_ngroups; i++) cmcred->cmcred_groups[i] = td->td_ucred->cr_groups[i]; break; case SCM_RIGHTS: oldfds = datalen / sizeof (int); if (oldfds == 0) break; /* * Check that all the FDs passed in refer to legal * files. If not, reject the entire operation. */ fdp = data; FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++, fdp++) { fp = fget_locked(fdesc, *fdp); if (fp == NULL) { FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } } /* * Now replace the integer FDs with pointers to the * file structure and capability rights. */ newlen = oldfds * sizeof(fdep[0]); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; fdep = (struct filedescent **) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS, M_WAITOK); for (i = 0; i < oldfds; i++, fdev++, fdp++) { fde = &fdesc->fd_ofiles[*fdp]; fdep[i] = fdev; fdep[i]->fde_file = fde->fde_file; filecaps_copy(&fde->fde_caps, &fdep[i]->fde_caps, true); unp_internalize_fp(fdep[i]->fde_file); } FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: *controlp = sbcreatecontrol(NULL, sizeof(*tv), SCM_TIMESTAMP, SOL_SOCKET); if (*controlp == NULL) { error = ENOBUFS; goto out; } tv = (struct timeval *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); microtime(tv); break; case SCM_BINTIME: *controlp = sbcreatecontrol(NULL, sizeof(*bt), SCM_BINTIME, SOL_SOCKET); if (*controlp == NULL) { error = ENOBUFS; goto out; } bt = (struct bintime *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); bintime(bt); break; default: error = EINVAL; goto out; } controlp = &(*controlp)->m_next; if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } out: m_freem(control); return (error); } static struct mbuf * unp_addsockcred(struct thread *td, struct mbuf *control) { struct mbuf *m, *n, *n_prev; struct sockcred *sc; const struct cmsghdr *cm; int ngroups; int i; ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); if (m == NULL) return (control); sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); sc->sc_uid = td->td_ucred->cr_ruid; sc->sc_euid = td->td_ucred->cr_uid; sc->sc_gid = td->td_ucred->cr_rgid; sc->sc_egid = td->td_ucred->cr_gid; sc->sc_ngroups = ngroups; for (i = 0; i < sc->sc_ngroups; i++) sc->sc_groups[i] = td->td_ucred->cr_groups[i]; /* * Unlink SCM_CREDS control messages (struct cmsgcred), since just * created SCM_CREDS control message (struct sockcred) has another * format. */ if (control != NULL) for (n = control, n_prev = NULL; n != NULL;) { cm = mtod(n, struct cmsghdr *); if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_CREDS) { if (n_prev == NULL) control = n->m_next; else n_prev->m_next = n->m_next; n = m_free(n); } else { n_prev = n; n = n->m_next; } } /* Prepend it to the head. */ m->m_next = control; return (m); } static struct unpcb * fptounp(struct file *fp) { struct socket *so; if (fp->f_type != DTYPE_SOCKET) return (NULL); if ((so = fp->f_data) == NULL) return (NULL); if (so->so_proto->pr_domain != &localdomain) return (NULL); return sotounpcb(so); } static void unp_discard(struct file *fp) { struct unp_defer *dr; if (unp_externalize_fp(fp)) { dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK); dr->ud_fp = fp; UNP_DEFERRED_LOCK(); SLIST_INSERT_HEAD(&unp_defers, dr, ud_link); UNP_DEFERRED_UNLOCK(); atomic_add_int(&unp_defers_count, 1); taskqueue_enqueue(taskqueue_thread, &unp_defer_task); } else (void) closef(fp, (struct thread *)NULL); } static void unp_process_defers(void *arg __unused, int pending) { struct unp_defer *dr; SLIST_HEAD(, unp_defer) drl; int count; SLIST_INIT(&drl); for (;;) { UNP_DEFERRED_LOCK(); if (SLIST_FIRST(&unp_defers) == NULL) { UNP_DEFERRED_UNLOCK(); break; } SLIST_SWAP(&unp_defers, &drl, unp_defer); UNP_DEFERRED_UNLOCK(); count = 0; while ((dr = SLIST_FIRST(&drl)) != NULL) { SLIST_REMOVE_HEAD(&drl, ud_link); closef(dr->ud_fp, NULL); free(dr, M_TEMP); count++; } atomic_add_int(&unp_defers_count, -count); } } static void unp_internalize_fp(struct file *fp) { struct unpcb *unp; UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_file = fp; unp->unp_msgcount++; } fhold(fp); unp_rights++; UNP_LINK_WUNLOCK(); } static int unp_externalize_fp(struct file *fp) { struct unpcb *unp; int ret; UNP_LINK_WLOCK(); if ((unp = fptounp(fp)) != NULL) { unp->unp_msgcount--; ret = 1; } else ret = 0; unp_rights--; UNP_LINK_WUNLOCK(); return (ret); } /* * unp_defer indicates whether additional work has been defered for a future * pass through unp_gc(). It is thread local and does not require explicit * synchronization. */ static int unp_marked; static int unp_unreachable; static void unp_accessable(struct filedescent **fdep, int fdcount) { struct unpcb *unp; struct file *fp; int i; for (i = 0; i < fdcount; i++) { fp = fdep[i]->fde_file; if ((unp = fptounp(fp)) == NULL) continue; if (unp->unp_gcflag & UNPGC_REF) continue; unp->unp_gcflag &= ~UNPGC_DEAD; unp->unp_gcflag |= UNPGC_REF; unp_marked++; } } static void unp_gc_process(struct unpcb *unp) { struct socket *soa; struct socket *so; struct file *fp; /* Already processed. */ if (unp->unp_gcflag & UNPGC_SCANNED) return; fp = unp->unp_file; /* * Check for a socket potentially in a cycle. It must be in a * queue as indicated by msgcount, and this must equal the file * reference count. Note that when msgcount is 0 the file is NULL. */ if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { unp->unp_gcflag |= UNPGC_DEAD; unp_unreachable++; return; } /* * Mark all sockets we reference with RIGHTS. */ so = unp->unp_socket; if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) { SOCKBUF_LOCK(&so->so_rcv); unp_scan(so->so_rcv.sb_mb, unp_accessable); SOCKBUF_UNLOCK(&so->so_rcv); } /* * Mark all sockets in our accept queue. */ ACCEPT_LOCK(); TAILQ_FOREACH(soa, &so->so_comp, so_list) { if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0) continue; SOCKBUF_LOCK(&soa->so_rcv); unp_scan(soa->so_rcv.sb_mb, unp_accessable); SOCKBUF_UNLOCK(&soa->so_rcv); } ACCEPT_UNLOCK(); unp->unp_gcflag |= UNPGC_SCANNED; } static int unp_recycled; SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "Number of unreachable sockets claimed by the garbage collector."); static int unp_taskcount; SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "Number of times the garbage collector has run."); static void unp_gc(__unused void *arg, int pending) { struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead, NULL }; struct unp_head **head; struct file *f, **unref; struct unpcb *unp; int i, total; unp_taskcount++; UNP_LIST_LOCK(); /* * First clear all gc flags from previous runs, apart from * UNPGC_IGNORE_RIGHTS. */ for (head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) unp->unp_gcflag = (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS); /* * Scan marking all reachable sockets with UNPGC_REF. Once a socket * is reachable all of the sockets it references are reachable. * Stop the scan once we do a complete loop without discovering * a new reachable socket. */ do { unp_unreachable = 0; unp_marked = 0; for (head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) unp_gc_process(unp); } while (unp_marked); UNP_LIST_UNLOCK(); if (unp_unreachable == 0) return; /* * Allocate space for a local list of dead unpcbs. */ unref = malloc(unp_unreachable * sizeof(struct file *), M_TEMP, M_WAITOK); /* * Iterate looking for sockets which have been specifically marked * as as unreachable and store them locally. */ UNP_LINK_RLOCK(); UNP_LIST_LOCK(); for (total = 0, head = heads; *head != NULL; head++) LIST_FOREACH(unp, *head, unp_link) if ((unp->unp_gcflag & UNPGC_DEAD) != 0) { f = unp->unp_file; if (unp->unp_msgcount == 0 || f == NULL || f->f_count != unp->unp_msgcount) continue; unref[total++] = f; fhold(f); KASSERT(total <= unp_unreachable, ("unp_gc: incorrect unreachable count.")); } UNP_LIST_UNLOCK(); UNP_LINK_RUNLOCK(); /* * Now flush all sockets, free'ing rights. This will free the * struct files associated with these sockets but leave each socket * with one remaining ref. */ for (i = 0; i < total; i++) { struct socket *so; so = unref[i]->f_data; CURVNET_SET(so->so_vnet); sorflush(so); CURVNET_RESTORE(); } /* * And finally release the sockets so they can be reclaimed. */ for (i = 0; i < total; i++) fdrop(unref[i], NULL); unp_recycled += total; free(unref, M_TEMP); } static void unp_dispose(struct mbuf *m) { if (m) unp_scan(m, unp_freerights); } /* * Synchronize against unp_gc, which can trip over data as we are freeing it. */ static void unp_dispose_so(struct socket *so) { struct unpcb *unp; unp = sotounpcb(so); UNP_LIST_LOCK(); unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS; UNP_LIST_UNLOCK(); unp_dispose(so->so_rcv.sb_mb); } static void unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int)) { struct mbuf *m; struct cmsghdr *cm; void *data; socklen_t clen, datalen; while (m0 != NULL) { for (m = m0; m; m = m->m_next) { if (m->m_type != MT_CONTROL) continue; cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(*cm) > clen || cm->cmsg_len > clen) break; data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { (*op)(data, datalen / sizeof(struct filedescent *)); } if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } } m0 = m0->m_nextpkt; } } /* * A helper function called by VFS before socket-type vnode reclamation. * For an active vnode it clears unp_vnode pointer and decrements unp_vnode * use count. */ void vfs_unp_reclaim(struct vnode *vp) { struct socket *so; struct unpcb *unp; int active; ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim"); KASSERT(vp->v_type == VSOCK, ("vfs_unp_reclaim: vp->v_type != VSOCK")); active = 0; UNP_LINK_WLOCK(); VOP_UNP_CONNECT(vp, &so); if (so == NULL) goto done; unp = sotounpcb(so); if (unp == NULL) goto done; UNP_PCB_LOCK(unp); if (unp->unp_vnode == vp) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; active = 1; } UNP_PCB_UNLOCK(unp); done: UNP_LINK_WUNLOCK(); if (active) vunref(vp); } #ifdef DDB static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_unpflags(int unp_flags) { int comma; comma = 0; if (unp_flags & UNP_HAVEPC) { db_printf("%sUNP_HAVEPC", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_HAVEPCCACHED) { db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_WANTCRED) { db_printf("%sUNP_WANTCRED", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_CONNWAIT) { db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_CONNECTING) { db_printf("%sUNP_CONNECTING", comma ? ", " : ""); comma = 1; } if (unp_flags & UNP_BINDING) { db_printf("%sUNP_BINDING", comma ? ", " : ""); comma = 1; } } static void db_print_xucred(int indent, struct xucred *xu) { int comma, i; db_print_indent(indent); db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n", xu->cr_version, xu->cr_uid, xu->cr_ngroups); db_print_indent(indent); db_printf("cr_groups: "); comma = 0; for (i = 0; i < xu->cr_ngroups; i++) { db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); comma = 1; } db_printf("\n"); } static void db_print_unprefs(int indent, struct unp_head *uh) { struct unpcb *unp; int counter; counter = 0; LIST_FOREACH(unp, uh, unp_reflink) { if (counter % 4 == 0) db_print_indent(indent); db_printf("%p ", unp); if (counter % 4 == 3) db_printf("\n"); counter++; } if (counter != 0 && counter % 4 != 0) db_printf("\n"); } DB_SHOW_COMMAND(unpcb, db_show_unpcb) { struct unpcb *unp; if (!have_addr) { db_printf("usage: show unpcb \n"); return; } unp = (struct unpcb *)addr; db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, unp->unp_vnode); db_printf("unp_ino: %ju unp_conn: %p\n", (uintmax_t)unp->unp_ino, unp->unp_conn); db_printf("unp_refs:\n"); db_print_unprefs(2, &unp->unp_refs); /* XXXRW: Would be nice to print the full address, if any. */ db_printf("unp_addr: %p\n", unp->unp_addr); db_printf("unp_gencnt: %llu\n", (unsigned long long)unp->unp_gencnt); db_printf("unp_flags: %x (", unp->unp_flags); db_print_unpflags(unp->unp_flags); db_printf(")\n"); db_printf("unp_peercred:\n"); db_print_xucred(2, &unp->unp_peercred); db_printf("unp_refcount: %u\n", unp->unp_refcount); } #endif Index: projects/sendfile/sys =================================================================== --- projects/sendfile/sys (revision 293405) +++ projects/sendfile/sys (revision 293406) Property changes on: projects/sendfile/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r293383-293405 Index: projects/sendfile/tools/tools/nanobsd/defaults.sh =================================================================== --- projects/sendfile/tools/tools/nanobsd/defaults.sh (revision 293405) +++ projects/sendfile/tools/tools/nanobsd/defaults.sh (revision 293406) @@ -1,1074 +1,1073 @@ #!/bin/sh # # Copyright (c) 2005 Poul-Henning Kamp. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # set -e ####################################################################### # # Setup default values for all controlling variables. # These values can be overridden from the config file(s) # ####################################################################### # Name of this NanoBSD build. (Used to construct workdir names) NANO_NAME=full # Source tree directory NANO_SRC=/usr/src # Where nanobsd additional files live under the source tree NANO_TOOLS=tools/tools/nanobsd # Where cust_pkgng() finds packages to install NANO_PACKAGE_DIR=${NANO_SRC}/${NANO_TOOLS}/Pkg NANO_PACKAGE_LIST="*" # where package metadata gets placed NANO_PKG_META_BASE=/var/db # Object tree directory # default is subdir of /usr/obj #NANO_OBJ="" # The directory to put the final images # default is ${NANO_OBJ} #NANO_DISKIMGDIR="" # Make & parallel Make NANO_MAKE="make" NANO_PMAKE="make -j 3" # The default name for any image we create. NANO_IMGNAME="_.disk.full" # Options to put in make.conf during buildworld only CONF_BUILD=' ' # Options to put in make.conf during installworld only CONF_INSTALL=' ' # Options to put in make.conf during both build- & installworld. CONF_WORLD=' ' # Kernel config file to use NANO_KERNEL=GENERIC # Kernel modules to install. If empty, no modules are installed. # Use "default" to install all built modules. NANO_MODULES= # Customize commands. NANO_CUSTOMIZE="" # Late customize commands. NANO_LATE_CUSTOMIZE="" # Newfs paramters to use NANO_NEWFS="-b 4096 -f 512 -i 8192 -U" # The drive name of the media at runtime NANO_DRIVE=ad0 # Target media size in 512 bytes sectors NANO_MEDIASIZE=2000000 # Number of code images on media (1 or 2) NANO_IMAGES=2 # 0 -> Leave second image all zeroes so it compresses better. # 1 -> Initialize second image with a copy of the first NANO_INIT_IMG2=1 # Size of code file system in 512 bytes sectors # If zero, size will be as large as possible. NANO_CODESIZE=0 # Size of configuration file system in 512 bytes sectors # Cannot be zero. NANO_CONFSIZE=2048 # Size of data file system in 512 bytes sectors # If zero: no partition configured. # If negative: max size possible NANO_DATASIZE=0 # Size of the /etc ramdisk in 512 bytes sectors NANO_RAM_ETCSIZE=10240 # Size of the /tmp+/var ramdisk in 512 bytes sectors NANO_RAM_TMPVARSIZE=10240 # Media geometry, only relevant if bios doesn't understand LBA. NANO_SECTS=63 NANO_HEADS=16 # boot0 flags/options and configuration NANO_BOOT0CFG="-o packet -s 1 -m 3" NANO_BOOTLOADER="boot/boot0sio" # boot2 flags/options # default force serial console NANO_BOOT2CFG="-h -S115200" # Backing type of md(4) device # Can be "file" or "swap" NANO_MD_BACKING="file" # for swap type md(4) backing, write out the mbr only NANO_IMAGE_MBRONLY=true # Progress Print level PPLEVEL=3 # Set NANO_LABEL to non-blank to form the basis for using /dev/ufs/label # in preference to /dev/${NANO_DRIVE} # Root partition will be ${NANO_LABEL}s{1,2} # /cfg partition will be ${NANO_LABEL}s3 # /data partition will be ${NANO_LABEL}s4 NANO_LABEL="" NANO_SLICE_ROOT=s1 NANO_SLICE_ALTROOT=s2 NANO_SLICE_CFG=s3 NANO_SLICE_DATA=s4 # Default ownwership for nopriv build NANO_DEF_UNAME=root NANO_DEF_GNAME=wheel ####################################################################### # Architecture to build. Corresponds to TARGET_ARCH in a buildworld. # Unfortunately, there's no way to set TARGET at this time, and it # conflates the two, so architectures where TARGET != TARGET_ARCH and # TARGET can't be guessed from TARGET_ARCH do not work. This defaults # to the arch of the current machine. NANO_ARCH=`uname -p` # CPUTYPE defaults to "" which is the default when CPUTYPE isn't # defined. NANO_CPUTYPE="" # Directory to populate /cfg from NANO_CFGDIR="" # Directory to populate /data from NANO_DATADIR="" # We don't need SRCCONF or SRC_ENV_CONF. NanoBSD puts everything we # need for the build in files included with __MAKE_CONF. Override in your # config file if you really must. We set them unconditionally here, though # in case they are stray in the build environment SRCCONF=/dev/null SRC_ENV_CONF=/dev/null ####################################################################### # # The functions which do the real work. # Can be overridden from the config file(s) # ####################################################################### # Export values into the shell. Must use { } instead of ( ) like # other functions to avoid a subshell. # We set __MAKE_CONF as a global since it is easier to get quoting # right for paths with spaces in them. make_export ( ) { # Similar to export_var, except puts the data out to stdout var=$1 eval val=\$$var echo "Setting variable: $var=\"$val\"" export $1 } nano_make_build_env ( ) { __MAKE_CONF="${NANO_MAKE_CONF_BUILD}" make_export __MAKE_CONF } nano_make_install_env ( ) { __MAKE_CONF="${NANO_MAKE_CONF_INSTALL}" make_export __MAKE_CONF } # Extra environment variables for kernel builds nano_make_kernel_env ( ) { if [ -f ${NANO_KERNEL} ] ; then KERNCONFDIR="$(realpath $(dirname ${NANO_KERNEL}))" KERNCONF="$(basename ${NANO_KERNEL})" make_export KERNCONFDIR make_export KERNCONF else export KERNCONF="${NANO_KERNEL}" make_export KERNCONF fi } nano_global_make_env ( ) ( # global settings for the make.conf file, if set [ -z "${NANO_ARCH}" ] || echo TARGET_ARCH="${NANO_ARCH}" [ -z "${NANO_CPUTYPE}" ] || echo TARGET_CPUTYPE="${NANO_CPUTYPE}" ) # rm doesn't know -x prior to FreeBSD 10, so cope with a variety of build # hosts for now. This will go away when support in the base goes away. rm ( ) { echo "NANO RM $*" case $(uname -r) in 7*|8*|9*) command rm $* ;; *) command rm -x $* ;; esac } # # Create empty files in the target tree, and record the fact. All paths # are relative to NANO_WORLDDIR. # tgt_touch ( ) ( cd "${NANO_WORLDDIR}" for i; do touch $i echo "./${i} type=file" >> ${NANO_METALOG} done ) # # Convert a directory into a symlink. Takes two arguments, the # current directory and what it should become a symlink to. The # directory is removed and a symlink is created. If we're doing # a nopriv build, then append this fact to the metalog # tgt_dir2symlink () ( dir=$1 symlink=$2 cd "${NANO_WORLDDIR}" rm -rf "$dir" ln -s "$symlink" "$dir" if [ -n "$NANO_METALOG" ]; then echo "./${dir} type=link mode=0777 link=${symlink}" >> ${NANO_METALOG} fi ) # run in the world chroot, errors fatal CR ( ) { chroot "${NANO_WORLDDIR}" /bin/sh -exc "$*" } # run in the world chroot, errors not fatal CR0 ( ) { chroot "${NANO_WORLDDIR}" /bin/sh -c "$*" || true } nano_cleanup ( ) ( [ $? -eq 0 ] || echo "Error encountered. Check for errors in last log file." 1>&2 exit $? ) clean_build ( ) ( pprint 2 "Clean and create object directory (${MAKEOBJDIRPREFIX})" if ! rm -rf ${MAKEOBJDIRPREFIX}/ > /dev/null 2>&1 ; then chflags -R noschg ${MAKEOBJDIRPREFIX}/ rm -r ${MAKEOBJDIRPREFIX}/ fi ) make_conf_build ( ) ( pprint 2 "Construct build make.conf ($NANO_MAKE_CONF_BUILD)" mkdir -p ${MAKEOBJDIRPREFIX} printenv > ${MAKEOBJDIRPREFIX}/_.env # Make sure we get all the global settings that NanoBSD wants # in addition to the user's global settings ( nano_global_make_env echo "${CONF_WORLD}" echo "${CONF_BUILD}" ) > ${NANO_MAKE_CONF_BUILD} ) build_world ( ) ( pprint 2 "run buildworld" pprint 3 "log: ${MAKEOBJDIRPREFIX}/_.bw" ( nano_make_build_env set -o xtrace cd "${NANO_SRC}" ${NANO_PMAKE} buildworld ) > ${MAKEOBJDIRPREFIX}/_.bw 2>&1 ) build_kernel ( ) ( local extra pprint 2 "build kernel ($NANO_KERNEL)" pprint 3 "log: ${MAKEOBJDIRPREFIX}/_.bk" ( nano_make_build_env nano_make_kernel_env # Note: We intentionally build all modules, not only the ones in # NANO_MODULES so the built world can be reused by multiple images. # Although MODULES_OVERRIDE can be defined in the kenrel config # file to override this behavior. Just set NANO_MODULES=default. set -o xtrace cd "${NANO_SRC}" ${NANO_PMAKE} buildkernel ) > ${MAKEOBJDIRPREFIX}/_.bk 2>&1 ) clean_world ( ) ( if [ "${NANO_OBJ}" != "${MAKEOBJDIRPREFIX}" ]; then pprint 2 "Clean and create object directory (${NANO_OBJ})" if ! rm -rf ${NANO_OBJ}/ > /dev/null 2>&1 ; then chflags -R noschg ${NANO_OBJ} rm -r ${NANO_OBJ}/ fi mkdir -p "${NANO_OBJ}" "${NANO_WORLDDIR}" printenv > ${NANO_OBJ}/_.env else pprint 2 "Clean and create world directory (${NANO_WORLDDIR})" if ! rm -rf "${NANO_WORLDDIR}/" > /dev/null 2>&1 ; then chflags -R noschg "${NANO_WORLDDIR}" rm -rf "${NANO_WORLDDIR}/" fi mkdir -p "${NANO_WORLDDIR}" fi ) make_conf_install ( ) ( pprint 2 "Construct install make.conf ($NANO_MAKE_CONF_INSTALL)" # Make sure we get all the global settings that NanoBSD wants # in addition to the user's global settings ( nano_global_make_env echo "${CONF_WORLD}" echo "${CONF_INSTALL}" if [ -n "${NANO_NOPRIV_BUILD}" ]; then echo NO_ROOT=t echo METALOG=${NANO_METALOG} fi ) > ${NANO_MAKE_CONF_INSTALL} ) install_world ( ) ( pprint 2 "installworld" pprint 3 "log: ${NANO_OBJ}/_.iw" ( nano_make_install_env set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} installworld DESTDIR="${NANO_WORLDDIR}" chflags -R noschg "${NANO_WORLDDIR}" ) > ${NANO_OBJ}/_.iw 2>&1 ) install_etc ( ) ( pprint 2 "install /etc" pprint 3 "log: ${NANO_OBJ}/_.etc" ( nano_make_install_env set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} distribution DESTDIR="${NANO_WORLDDIR}" # make.conf doesn't get created by default, but some ports need it # so they can spam it. cp /dev/null "${NANO_WORLDDIR}"/etc/make.conf ) > ${NANO_OBJ}/_.etc 2>&1 ) install_kernel ( ) ( local extra pprint 2 "install kernel ($NANO_KERNEL)" pprint 3 "log: ${NANO_OBJ}/_.ik" ( nano_make_install_env nano_make_kernel_env if [ "${NANO_MODULES}" != "default" ]; then MODULES_OVERRIDE="${NANO_MODULES}" make_export MODULES_OVERRIDE fi set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} installkernel DESTDIR="${NANO_WORLDDIR}" ) > ${NANO_OBJ}/_.ik 2>&1 ) native_xtools ( ) ( print 2 "Installing the optimized native build tools for cross env" pprint 3 "log: ${NANO_OBJ}/_.native_xtools" ( nano_make_install_env set -o xtrace cd "${NANO_SRC}" ${NANO_MAKE} native-xtools DESTDIR="${NANO_WORLDDIR}" ) > ${NANO_OBJ}/_.native_xtools 2>&1 ) # # Run the requested set of customization scripts, run after we've # done an installworld, installed the etc files, installed the kernel # and tweaked them in the standard way. # run_customize ( ) ( pprint 2 "run customize scripts" for c in $NANO_CUSTOMIZE do pprint 2 "customize \"$c\"" pprint 3 "log: ${NANO_OBJ}/_.cust.$c" pprint 4 "`type $c`" ( set -x ; $c ) > ${NANO_OBJ}/_.cust.$c 2>&1 done ) # # Run any last-minute customization commands after we've had a chance to # setup nanobsd, prune empty dirs from /usr, etc # run_late_customize ( ) ( pprint 2 "run late customize scripts" for c in $NANO_LATE_CUSTOMIZE do pprint 2 "late customize \"$c\"" pprint 3 "log: ${NANO_OBJ}/_.late_cust.$c" pprint 4 "`type $c`" ( set -x ; $c ) > ${NANO_OBJ}/_.late_cust.$c 2>&1 done ) # # Hook called after we run all the late customize commands, but # before we invoke the disk imager. The nopriv build uses it to # read in the meta log, apply the changes other parts of nanobsd # have been recording their actions. It's not anticipated that # a user's cfg file would override this. # fixup_before_diskimage ( ) ( # Run the deduplication script that takes the matalog journal and # combines multiple entries for the same file (see source for # details). We take the extra step of removing the size keywords. This # script, and many of the user scripts, copies, appeneds and otherwise # modifies files in the build, changing their sizes. These actions are # impossible to trap, so go ahead remove the size= keyword. For this # narrow use, it doesn't buy us any protection and just gets in the way. # The dedup tool's output must be sorted due to limitations in awk. if [ -n "${NANO_METALOG}" ]; then pprint 2 "Fixing metalog" cp ${NANO_METALOG} ${NANO_METALOG}.pre - (echo "/set uname=${NANO_DEF_UNAME} gname=${NANO_DEF_GNAME}" && - cat ${NANO_METALOG}.pre) | \ - ${NANO_TOOLS}/mtree-dedup.awk | \ - sed -e 's/ size=[0-9][0-9]*//' | sort > ${NANO_METALOG} + echo "/set uname=${NANO_DEF_UNAME} gname=${NANO_DEF_GNAME}" > ${NANO_METALOG} + cat ${NANO_METALOG}.pre | ${NANO_TOOLS}/mtree-dedup.awk | \ + sed -e 's/ size=[0-9][0-9]*//' | sort >> ${NANO_METALOG} fi ) setup_nanobsd ( ) ( pprint 2 "configure nanobsd setup" pprint 3 "log: ${NANO_OBJ}/_.dl" ( cd "${NANO_WORLDDIR}" # Move /usr/local/etc to /etc/local so that the /cfg stuff # can stomp on it. Otherwise packages like ipsec-tools which # have hardcoded paths under ${prefix}/etc are not tweakable. if [ -d usr/local/etc ] ; then ( mkdir -p etc/local cd usr/local/etc find . -print | cpio -dumpl ../../../etc/local cd .. rm -rf etc ln -s ../../etc/local etc ) fi for d in var etc do # link /$d under /conf # we use hard links so we have them both places. # the files in /$d will be hidden by the mount. mkdir -p conf/base/$d conf/default/$d find $d -print | cpio -dumpl conf/base/ done echo "$NANO_RAM_ETCSIZE" > conf/base/etc/md_size echo "$NANO_RAM_TMPVARSIZE" > conf/base/var/md_size # pick up config files from the special partition echo "mount -o ro /dev/${NANO_DRIVE}${NANO_SLICE_CFG}" > conf/default/etc/remount # Put /tmp on the /var ramdisk (could be symlink already) tgt_dir2symlink tmp var/tmp ) > ${NANO_OBJ}/_.dl 2>&1 ) setup_nanobsd_etc ( ) ( pprint 2 "configure nanobsd /etc" ( cd "${NANO_WORLDDIR}" # create diskless marker file touch etc/diskless [ -n "${NANO_NOPRIV_BUILD}" ] && chmod 666 etc/defaults/rc.conf # Make root filesystem R/O by default echo "root_rw_mount=NO" >> etc/defaults/rc.conf # Disable entropy file, since / is read-only /var/db/entropy should be enough? echo "entropy_file=NO" >> etc/defaults/rc.conf [ -n "${NANO_NOPRIV_BUILD}" ] && chmod 444 etc/defaults/rc.conf # save config file for scripts echo "NANO_DRIVE=${NANO_DRIVE}" > etc/nanobsd.conf echo "/dev/${NANO_DRIVE}${NANO_SLICE_ROOT}a / ufs ro 1 1" > etc/fstab echo "/dev/${NANO_DRIVE}${NANO_SLICE_CFG} /cfg ufs rw,noauto 2 2" >> etc/fstab mkdir -p cfg ) ) prune_usr ( ) ( # Remove all empty directories in /usr find "${NANO_WORLDDIR}"/usr -type d -depth -print | while read d do rmdir $d > /dev/null 2>&1 || true done ) newfs_part ( ) ( local dev mnt lbl dev=$1 mnt=$2 lbl=$3 echo newfs ${NANO_NEWFS} ${NANO_LABEL:+-L${NANO_LABEL}${lbl}} ${dev} newfs ${NANO_NEWFS} ${NANO_LABEL:+-L${NANO_LABEL}${lbl}} ${dev} mount -o async ${dev} ${mnt} ) # Convenient spot to work around any umount issues that your build environment # hits by overriding this method. nano_umount ( ) ( umount ${1} ) populate_slice ( ) ( local dev dir mnt lbl dev=$1 dir=$2 mnt=$3 lbl=$4 echo "Creating ${dev} (mounting on ${mnt})" newfs_part ${dev} ${mnt} ${lbl} if [ -n "${dir}" -a -d "${dir}" ]; then echo "Populating ${lbl} from ${dir}" cd "${dir}" find . -print | grep -Ev '/(CVS|\.svn|\.hg|\.git)' | cpio -dumpv ${mnt} fi df -i ${mnt} nano_umount ${mnt} ) populate_cfg_slice ( ) ( populate_slice "$1" "$2" "$3" "$4" ) populate_data_slice ( ) ( populate_slice "$1" "$2" "$3" "$4" ) create_diskimage ( ) ( pprint 2 "build diskimage" pprint 3 "log: ${NANO_OBJ}/_.di" ( echo $NANO_MEDIASIZE $NANO_IMAGES \ $NANO_SECTS $NANO_HEADS \ $NANO_CODESIZE $NANO_CONFSIZE $NANO_DATASIZE | awk ' { printf "# %s\n", $0 # size of cylinder in sectors cs = $3 * $4 # number of full cylinders on media cyl = int ($1 / cs) # output fdisk geometry spec, truncate cyls to 1023 if (cyl <= 1023) print "g c" cyl " h" $4 " s" $3 else print "g c" 1023 " h" $4 " s" $3 if ($7 > 0) { # size of data partition in full cylinders dsl = int (($7 + cs - 1) / cs) } else { dsl = 0; } # size of config partition in full cylinders csl = int (($6 + cs - 1) / cs) if ($5 == 0) { # size of image partition(s) in full cylinders isl = int ((cyl - dsl - csl) / $2) } else { isl = int (($5 + cs - 1) / cs) } # First image partition start at second track print "p 1 165 " $3, isl * cs - $3 c = isl * cs; # Second image partition (if any) also starts offset one # track to keep them identical. if ($2 > 1) { print "p 2 165 " $3 + c, isl * cs - $3 c += isl * cs; } # Config partition starts at cylinder boundary. print "p 3 165 " c, csl * cs c += csl * cs # Data partition (if any) starts at cylinder boundary. if ($7 > 0) { print "p 4 165 " c, dsl * cs } else if ($7 < 0 && $1 > c) { print "p 4 165 " c, $1 - c } else if ($1 < c) { print "Disk space overcommitted by", \ c - $1, "sectors" > "/dev/stderr" exit 2 } # Force slice 1 to be marked active. This is necessary # for booting the image from a USB device to work. print "a 1" } ' > ${NANO_OBJ}/_.fdisk IMG=${NANO_DISKIMGDIR}/${NANO_IMGNAME} MNT=${NANO_OBJ}/_.mnt mkdir -p ${MNT} if [ "${NANO_MD_BACKING}" = "swap" ] ; then MD=`mdconfig -a -t swap -s ${NANO_MEDIASIZE} -x ${NANO_SECTS} \ -y ${NANO_HEADS}` else echo "Creating md backing file..." rm -f ${IMG} dd if=/dev/zero of=${IMG} seek=${NANO_MEDIASIZE} count=0 MD=`mdconfig -a -t vnode -f ${IMG} -x ${NANO_SECTS} \ -y ${NANO_HEADS}` fi trap "echo 'Running exit trap code' ; df -i ${MNT} ; nano_umount ${MNT} || true ; mdconfig -d -u $MD" 1 2 15 EXIT fdisk -i -f ${NANO_OBJ}/_.fdisk ${MD} fdisk ${MD} # XXX: params # XXX: pick up cached boot* files, they may not be in image anymore. if [ -f ${NANO_WORLDDIR}/${NANO_BOOTLOADER} ]; then boot0cfg -B -b ${NANO_WORLDDIR}/${NANO_BOOTLOADER} ${NANO_BOOT0CFG} ${MD} fi if [ -f ${NANO_WORLDDIR}/boot/boot ]; then bsdlabel -w -B -b ${NANO_WORLDDIR}/boot/boot ${MD}${NANO_SLICE_ROOT} else bsdlabel -w ${MD}${NANO_SLICE_ROOT} fi bsdlabel ${MD}${NANO_SLICE_ROOT} # Create first image populate_slice /dev/${MD}${NANO_SLICE_ROOT}a ${NANO_WORLDDIR} ${MNT} "${NANO_SLICE_ROOT}a" mount /dev/${MD}${NANO_SLICE_ROOT}a ${MNT} echo "Generating mtree..." ( cd "${MNT}" && mtree -c ) > ${NANO_OBJ}/_.mtree ( cd "${MNT}" && du -k ) > ${NANO_OBJ}/_.du nano_umount "${MNT}" if [ $NANO_IMAGES -gt 1 -a $NANO_INIT_IMG2 -gt 0 ] ; then # Duplicate to second image (if present) echo "Duplicating to second image..." dd conv=sparse if=/dev/${MD}${NANO_SLICE_ROOT} of=/dev/${MD}${NANO_SLICE_ALTROOT} bs=64k mount /dev/${MD}${NANO_SLICE_ALTROOT}a ${MNT} for f in ${MNT}/etc/fstab ${MNT}/conf/base/etc/fstab do sed -i "" "s=${NANO_DRIVE}${NANO_SLICE_ROOT}=${NANO_DRIVE}${NANO_SLICE_ALTROOT}=g" $f done nano_umount ${MNT} # Override the label from the first partition so we # don't confuse glabel with duplicates. if [ -n "${NANO_LABEL}" ]; then tunefs -L ${NANO_LABEL}"${NANO_SLICE_ALTROOT}a" /dev/${MD}${NANO_SLICE_ALTROOT}a fi fi # Create Config slice populate_cfg_slice /dev/${MD}${NANO_SLICE_CFG} "${NANO_CFGDIR}" ${MNT} "${NANO_SLICE_CFG}" # Create Data slice, if any. if [ -n "$NANO_SLICE_DATA" -a "$NANO_SLICE_CFG" = "$NANO_SLICE_DATA" -a \ "$NANO_DATASIZE" -ne 0 ]; then pprint 2 "NANO_SLICE_DATA is the same as NANO_SLICE_CFG, fix." exit 2 fi if [ $NANO_DATASIZE -ne 0 -a -n "$NANO_SLICE_DATA" ] ; then populate_data_slice /dev/${MD}${NANO_SLICE_DATA} "${NANO_DATADIR}" ${MNT} "${NANO_SLICE_DATA}" fi if [ "${NANO_MD_BACKING}" = "swap" ] ; then if [ ${NANO_IMAGE_MBRONLY} ]; then echo "Writing out _.disk.mbr..." dd if=/dev/${MD} of=${NANO_DISKIMGDIR}/_.disk.mbr bs=512 count=1 else echo "Writing out ${NANO_IMGNAME}..." dd if=/dev/${MD} of=${IMG} bs=64k fi echo "Writing out ${NANO_IMGNAME}..." dd conv=sparse if=/dev/${MD} of=${IMG} bs=64k fi if ${do_copyout_partition} ; then echo "Writing out _.disk.image..." dd conv=sparse if=/dev/${MD}${NANO_SLICE_ROOT} of=${NANO_DISKIMGDIR}/_.disk.image bs=64k fi mdconfig -d -u $MD trap - 1 2 15 trap nano_cleanup EXIT ) > ${NANO_OBJ}/_.di 2>&1 ) last_orders ( ) ( # Redefine this function with any last orders you may have # after the build completed, for instance to copy the finished # image to a more convenient place: # cp ${NANO_DISKIMGDIR}/_.disk.image /home/ftp/pub/nanobsd.disk true ) ####################################################################### # # Optional convenience functions. # ####################################################################### ####################################################################### # Common Flash device geometries # FlashDevice ( ) { if [ -d ${NANO_TOOLS} ] ; then . ${NANO_TOOLS}/FlashDevice.sub else . ${NANO_SRC}/${NANO_TOOLS}/FlashDevice.sub fi sub_FlashDevice $1 $2 } ####################################################################### # USB device geometries # # Usage: # UsbDevice Generic 1000 # a generic flash key sold as having 1GB # # This function will set NANO_MEDIASIZE, NANO_HEADS and NANO_SECTS for you. # # Note that the capacity of a flash key is usually advertised in MB or # GB, *not* MiB/GiB. As such, the precise number of cylinders available # for C/H/S geometry may vary depending on the actual flash geometry. # # The following generic device layouts are understood: # generic An alias for generic-hdd. # generic-hdd 255H 63S/T xxxxC with no MBR restrictions. # generic-fdd 64H 32S/T xxxxC with no MBR restrictions. # # The generic-hdd device is preferred for flash devices larger than 1GB. # UsbDevice ( ) { a1=`echo $1 | tr '[:upper:]' '[:lower:]'` case $a1 in generic-fdd) NANO_HEADS=64 NANO_SECTS=32 NANO_MEDIASIZE=$(( $2 * 1000 * 1000 / 512 )) ;; generic|generic-hdd) NANO_HEADS=255 NANO_SECTS=63 NANO_MEDIASIZE=$(( $2 * 1000 * 1000 / 512 )) ;; *) echo "Unknown USB flash device" exit 2 ;; esac } ####################################################################### # Setup serial console cust_comconsole ( ) ( # Enable getty on console sed -i "" -e /tty[du]0/s/off/on/ ${NANO_WORLDDIR}/etc/ttys # Disable getty on syscons devices sed -i "" -e '/^ttyv[0-8]/s/ on/ off/' ${NANO_WORLDDIR}/etc/ttys # Tell loader to use serial console early. echo "${NANO_BOOT2CFG}" > ${NANO_WORLDDIR}/boot.config ) ####################################################################### # Allow root login via ssh cust_allow_ssh_root ( ) ( sed -i "" -e '/PermitRootLogin/s/.*/PermitRootLogin yes/' \ ${NANO_WORLDDIR}/etc/ssh/sshd_config ) ####################################################################### # Install the stuff under ./Files cust_install_files ( ) ( cd "${NANO_TOOLS}/Files" find . -print | grep -Ev '/(CVS|\.svn|\.hg|\.git)' | cpio -Ldumpv ${NANO_WORLDDIR} ) ####################################################################### # Install packages from ${NANO_PACKAGE_DIR} cust_pkgng ( ) ( # If the package directory doesn't exist, we're done. if [ ! -d ${NANO_PACKAGE_DIR} ]; then echo "DONE 0 packages" return 0 fi # Find a pkg-* package for x in `find -s ${NANO_PACKAGE_DIR} -iname 'pkg-*'`; do _NANO_PKG_PACKAGE=`basename "$x"` done if [ -z "${_NANO_PKG_PACKAGE}" -o ! -f "${NANO_PACKAGE_DIR}/${_NANO_PKG_PACKAGE}" ]; then echo "FAILED: need a pkg/ package for bootstrapping" exit 2 fi # Copy packages into chroot mkdir -p ${NANO_WORLDDIR}/Pkg ( cd "${NANO_PACKAGE_DIR}" find "${NANO_PACKAGE_LIST}" -print | cpio -Ldumpv ${NANO_WORLDDIR}/Pkg ) #Bootstrap pkg CR env ASSUME_ALWAYS_YES=YES SIGNATURE_TYPE=none /usr/sbin/pkg add /Pkg/${_NANO_PKG_PACKAGE} CR pkg -N >/dev/null 2>&1 if [ "$?" -ne "0" ]; then echo "FAILED: pkg bootstrapping faied" exit 2 fi rm -f ${NANO_WORLDDIR}/Pkg/pkg-* # Count & report how many we have to install todo=`ls ${NANO_WORLDDIR}/Pkg | /usr/bin/wc -l` todo=$(expr $todo + 1) # add one for pkg since it is installed already echo "=== TODO: $todo" ls ${NANO_WORLDDIR}/Pkg echo "===" while true do # Record how many we have now have=$(CR env ASSUME_ALWAYS_YES=YES /usr/sbin/pkg info | /usr/bin/wc -l) # Attempt to install more packages CR0 'ls 'Pkg/*txz' | xargs env ASSUME_ALWAYS_YES=YES /usr/sbin/pkg add' # See what that got us now=$(CR env ASSUME_ALWAYS_YES=YES /usr/sbin/pkg info | /usr/bin/wc -l) echo "=== NOW $now" CR env ASSUME_ALWAYS_YES=YES /usr/sbin/pkg info echo "===" if [ $now -eq $todo ] ; then echo "DONE $now packages" break elif [ $now -eq $have ] ; then echo "FAILED: Nothing happened on this pass" exit 2 fi done rm -rf ${NANO_WORLDDIR}/Pkg ) ####################################################################### # Convenience function: # Register all args as customize function. customize_cmd ( ) { NANO_CUSTOMIZE="$NANO_CUSTOMIZE $*" } ####################################################################### # Convenience function: # Register all args as late customize function to run just before # image creation. late_customize_cmd ( ) { NANO_LATE_CUSTOMIZE="$NANO_LATE_CUSTOMIZE $*" } ####################################################################### # # All set up to go... # ####################################################################### # Progress Print # Print $2 at level $1. pprint ( ) ( if [ "$1" -le $PPLEVEL ]; then runtime=$(( `date +%s` - $NANO_STARTTIME )) printf "%s %.${1}s %s\n" "`date -u -r $runtime +%H:%M:%S`" "#####" "$2" 1>&3 fi ) usage ( ) { ( echo "Usage: $0 [-bfiKknqvw] [-c config_file]" echo " -b suppress builds (both kernel and world)" echo " -c specify config file" echo " -f suppress code slice extraction" echo " -i suppress disk image build" echo " -K suppress installkernel" echo " -k suppress buildkernel" echo " -n add -DNO_CLEAN to buildworld, buildkernel, etc" echo " -q make output more quiet" echo " -v make output more verbose" echo " -w suppress buildworld" ) 1>&2 exit 2 } ####################################################################### # Setup and Export Internal variables # export_var ( ) { # Don't wawnt a subshell var=$1 # Lookup value of the variable. eval val=\$$var pprint 3 "Setting variable: $var=\"$val\"" export $1 } # Call this function to set defaults _after_ parsing options. # dont want a subshell otherwise variable setting is thrown away. set_defaults_and_export ( ) { : ${NANO_OBJ:=/usr/obj/nanobsd.${NANO_NAME}} : ${MAKEOBJDIRPREFIX:=${NANO_OBJ}} : ${NANO_DISKIMGDIR:=${NANO_OBJ}} NANO_WORLDDIR=${NANO_OBJ}/_.w NANO_MAKE_CONF_BUILD=${MAKEOBJDIRPREFIX}/make.conf.build NANO_MAKE_CONF_INSTALL=${NANO_OBJ}/make.conf.install # Override user's NANO_DRIVE if they specified a NANO_LABEL [ -n "${NANO_LABEL}" ] && NANO_DRIVE="ufs/${NANO_LABEL}" || true # Set a default NANO_TOOLS to NANO_SRC/NANO_TOOLS if it exists. [ ! -d "${NANO_TOOLS}" ] && [ -d "${NANO_SRC}/${NANO_TOOLS}" ] && \ NANO_TOOLS="${NANO_SRC}/${NANO_TOOLS}" || true [ -n "${NANO_NOPRIV_BUILD}" ] && [ -z "${NANO_METALOG}" ] && \ NANO_METALOG=${NANO_OBJ}/_.metalog || true NANO_STARTTIME=`date +%s` pprint 3 "Exporting NanoBSD variables" export_var MAKEOBJDIRPREFIX export_var NANO_ARCH export_var NANO_CODESIZE export_var NANO_CONFSIZE export_var NANO_CUSTOMIZE export_var NANO_DATASIZE export_var NANO_DRIVE export_var NANO_HEADS export_var NANO_IMAGES export_var NANO_IMGNAME export_var NANO_MAKE export_var NANO_MAKE_CONF_BUILD export_var NANO_MAKE_CONF_INSTALL export_var NANO_MEDIASIZE export_var NANO_NAME export_var NANO_NEWFS export_var NANO_OBJ export_var NANO_PMAKE export_var NANO_SECTS export_var NANO_SRC export_var NANO_TOOLS export_var NANO_WORLDDIR export_var NANO_BOOT0CFG export_var NANO_BOOTLOADER export_var NANO_LABEL export_var NANO_MODULES export_var NANO_NOPRIV_BUILD export_var NANO_METALOG export_var SRCCONF export_var SRC_ENV_CONF } Index: projects/sendfile/tools/tools/nanobsd/embedded/common =================================================================== --- projects/sendfile/tools/tools/nanobsd/embedded/common (revision 293405) +++ projects/sendfile/tools/tools/nanobsd/embedded/common (revision 293406) @@ -1,652 +1,674 @@ # $FreeBSD$ #- # Copyright (c) 2015 Warner Losh. All Rights Reserved. # Copyright (c) 2010-2011 iXsystems, Inc., All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL iXsystems, Inc OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # # This file is heavily derived from both Sam Leffler's Avilia config, # as well as the BSDRP project's config file. Neither of these have # an explicit copyright/license statement, but are implicitly BSDL. This # example has been taken from the FreeNAS project (an early version) and # simplified to meet the needs of the example. # # NB: You want the other file as the command line arg :) # Missing in base: # o mkimg setting active partition # o mkimg starting at arbitrary offset (needed for A10, et al) # o mtools still needed becuase we have no makefs -t msdos # o nanobsd doesn't record changes to WORLDTEMP in customization # scripts yet, so we have kludge to pick up all files # o easy way for pkg to grab files from other repos and put that # data on the image # o Need to have some way to create / resize the s4 slice to do ping # pong bouncing between s3 and s4 for an arbitrary image. we can resize # one slice, not two # o hints in the uboot ports for how to create the image # # Missing here # o documentation for how to run the qemu images # o msdos mtools fallback # o special boot for !x86 !arm platforms # o qemu image for arm # o qemu image for aarch64 # o qemu image for armv6/armv7 # o easy support for different image / vm formats # o need to promote much of this to nanobsd.sh and friends # o uses old kludge to build image w/o ownership being right # for many files. Should move to mtree-dedup.awk. # o support for EFI images # o support for GPT partitioning # # Long Term # o common tooling for creating images for odd-ball platforms # o support for boot loaders other than uboot in the image # or via special insturctions # o No pony support. Sadly, you cannot have a pony. # if [ -z $NANO_NAME ]; then echo "NANO_NAME not defined. Use foo.cfg instead." fi NANO_SLICE_FAT_SIZE=32m NANO_SLICE_CFG_SIZE=32m NANO_RAM_ETCSIZE=8192 NANO_RAM_TMPVARSIZE=8192 NANO_IMAGES=2 NANO_PMAKE="make -j $(sysctl -n hw.ncpu)" NANO_CFG_BASE=$(pwd) NANO_CFG_BASE=$(realpath ${NANO_CFG_BASE}/..) NANO_SRC=$(realpath ${NANO_CFG_BASE}/../../..) #### XXX share obj NANO_OBJ=${NANO_SRC}/../$NANO_NAME/obj # Where cust_pkg() finds packages to install #XXX: Is this the right place? #NANO_PORTS=$(realpath ${NANO_SRC}/../ports) NANO_PORTS=/usr/ports NANO_PACKAGE_DIR=${NANO_SRC}/${NANO_TOOLS}/Pkg NANO_INIT_IMG2=0 NANO_NOPRIV_BUILD=t unset MAKEOBJDIRPREFIX # this to go into nanobsd.sh NANO_PORTS=${NANO_PORTS:-/usr/ports} mkdir -p ${NANO_OBJ} NANO_OBJ=$(realpath ${NANO_OBJ}) NANO_FAT_DIR=${NANO_OBJ}/_.fat customize_cmd cust_allow_ssh_root add_etc_make_conf ( ) ( touch ${NANO_WORLDDIR}/etc/make.conf ) customize_cmd add_etc_make_conf cust_install_machine_files ( ) ( echo "cd ${NANO_CFG_BASE}/Files" cd ${NANO_CFG_BASE}/Files find . -print | grep -Ev '/(CVS|\.svn|\.hg|\.git)' | cpio -dumpv ${NANO_WORLDDIR} ) customize_cmd cust_install_files customize_cmd cust_install_machine_files # NB: leave c++ enabled so devd can be built CONF_BUILD=" WITHOUT_ACPI=true WITHOUT_ATM=true WITHOUT_AUDIT=true WITHOUT_BIND_DNSSEC=true WITHOUT_BIND_ETC=true WITHOUT_BIND_LIBS_LWRES=true WITHOUT_BLUETOOTH=true WITHOUT_CALENDAR=true WITHOUT_CVS=true WITHOUT_DICT=true WITHOUT_EXAMPLES=true WITHOUT_FORTRAN=true WITHOUT_GAMES=true WITHOUT_GCOV=true WITHOUT_GPIB=true WITHOUT_HTML=true WITHOUT_I4B=true WITHOUT_IPFILTER=true WITHOUT_IPX=true WITHOUT_LIBKSE=true WITHOUT_LOCALES=true WITHOUT_LPR=true WITHOUT_MAN=true WITHOUT_NETCAT=true WITHOUT_NIS=true WITHOUT_NLS=true WITHOUT_NS_CACHING=true WITHOUT_OBJC=true WITHOUT_PROFILE=true WITHOUT_RCMDS=true WITHOUT_SENDMAIL=true WITHOUT_SHAREDOCS=true WITHOUT_SYSCONS=true WITHOUT_LIB32=true WITHOUT_TESTS=true " CONF_INSTALL="$CONF_BUILD INSTALL_NODEBUG=t NOPORTDOCS=t NO_INSTALL_MANPAGES=t " # The following would help... # WITHOUT_TOOLCHAIN=true can't build ports # WITHOUT_INSTALLLIB=true libgcc.a # # from the build # WITHOUT_INFO=true makeinfo # WITHOUT_RCS=true PKG_ONLY_MAKE_CONF=" WITHOUT_TOOLCHAIN=true WITHOUT_INSTALLLIB=true WITHOUT_INFO=true WITHOUT_RCS=true " NANO_PACKAGE_ONLY=1 # install a package from a pre-built binary do_add_pkg ( ) ( # Need to create ${NANO_OBJ}/ports in this add_pkg_${port} function set -x mkdir -p ${NANO_OBJ}/ports/distfiles mkdir -p ${NANO_OBJ}/ports/packages mkdir -p ${NANO_WORLDDIR}/usr/ports/packages mkdir -p ${NANO_WORLDDIR}/usr/ports/distfiles mount -t nullfs -o noatime ${NANO_OBJ}/ports/packages \ ${NANO_WORLDDIR}/usr/ports/packages mount -t nullfs -o noatime ${NANO_OBJ}/ports/distfiles \ ${NANO_WORLDDIR}/usr/ports/distfiles CR env ASSUME_ALWAYS_YES=YES SIGNATURE_TYPE=none /usr/sbin/pkg add /usr/ports/packages/All/$1.txz umount ${NANO_WORLDDIR}/usr/ports/distfiles umount ${NANO_WORLDDIR}/usr/ports/packages rmdir ${NANO_WORLDDIR}/usr/ports/packages rmdir ${NANO_WORLDDIR}/usr/ports/distfiles rmdir ${NANO_WORLDDIR}/usr/ports set +x ) # Build a port (with the side effect of creating a package) do_add_port ( ) ( local port_path port_path=$1 shift set -x # Need to create ${NANO_OBJ}/ports in this add_port_${port} function mkdir -p ${NANO_OBJ}/ports/distfiles mkdir -p ${NANO_OBJ}/ports/packages mkdir -p ${NANO_PORTS}/packages mkdir -p ${NANO_PORTS}/distfiles mkdir -p ${NANO_WORLDDIR}/usr/src mkdir -p ${NANO_WORLDDIR}/usr/ports mount -t nullfs -o noatime ${NANO_SRC} ${NANO_WORLDDIR}/usr/src mount -t nullfs -o noatime ${NANO_PORTS} ${NANO_WORLDDIR}/usr/ports mount -t nullfs -o noatime ${NANO_OBJ}/ports/packages \ ${NANO_WORLDDIR}/usr/ports/packages mount -t nullfs -o noatime ${NANO_OBJ}/ports/distfiles \ ${NANO_WORLDDIR}/usr/ports/distfiles mkdir -p ${NANO_WORLDDIR}/dev mount -t devfs devfs ${NANO_WORLDDIR}/dev mkdir -p ${NANO_WORLDDIR}/usr/workdir cp /etc/resolv.conf ${NANO_WORLDDIR}/etc/resolv.conf # OK, a little inefficient, but likely not enough to worry about. CR ldconfig /lib /usr/lib /usr/local/lib CR ldconfig -R CR ldconfig -r # Improvement: Don't know why package-recursive don't works here CR "env UNAME_p=${NANO_ARCH} TARGET=${NANO_ARCH} \ TARGET_ARCH=${NANO_ARCH} PORTSDIR=${NANO_PORTS} make \ __MAKE_CONF=${NANO_MAKE_CONF_BUILD} \ WRKDIRPREFIX=/usr/workdir -C /usr/ports/$port_path \ package-recursive BATCH=yes $* clean FORCE_PKG_REGISTER=t" rm ${NANO_WORLDDIR}/etc/resolv.conf rm -rf ${NANO_WORLDDIR}/usr/obj rm -rf ${NANO_WORLDDIR}/usr/workdir umount ${NANO_WORLDDIR}/dev umount ${NANO_WORLDDIR}/usr/ports/packages umount ${NANO_WORLDDIR}/usr/ports/distfiles umount ${NANO_WORLDDIR}/usr/ports umount ${NANO_WORLDDIR}/usr/src set +x ) # Need to check if this function works with cross-compiling architecture!!!! # Recursive complex fonction: Generate one function for each ports # writes shell functions called later, so don't do in subshell. add_port ( ) { local port_path=$1 local port=`echo $1 | sed -e 's/\//_/'` shift # Check if package allready exist # Need to: # 1. check ARCH of this package! # 2. Add a trap cd ${NANO_PORTS}/${port_path} PKG_NAME=`env PORTSDIR=${NANO_PORTS} make __MAKE_CONF=${NANO_MAKE_CONF_BUILD} package-name` if [ -f ${NANO_OBJ}/ports/packages/All/${PKG_NAME}.txz ]; then # Pkg file found: Generate add_pkg_NAME function eval " add_pkg_${port} ( ) { do_add_pkg ${PKG_NAME} } customize_cmd add_pkg_${port} " else # No pkg file: Generate add_port_NAME function eval " add_port_${port} ( ) { do_add_port ${port_path} $* } customize_cmd add_port_${port} " NANO_PACKAGE_ONLY=0 fi } # Creates images for all the formats that use MBR create_diskimage_mbr ( ) ( pprint 2 "build diskimage ${NANO_NAME}" pprint 3 "log: ${NANO_OBJ}/_.di" ( local extra i sz fmt fmtarg bootmbr bootbsd skiparg set -o xtrace [ -z ${NANO_DISKIMAGE_FORMAT} ] || fmtarg="-f ${NANO_DISKIMAGE_FORMAT}" [ -z ${NANO_DISKIMAGE_FORMAT} ] || fmt=".${NANO_DISKIMAGE_FORMAT}" bootmbr=${NANO_BOOT_MBR:+-b ${NANO_BOOT_MBR}} bootbsd=${NANO_BOOT_BSD:+-b ${NANO_BOOT_BSD}} skiparg=${NANO_MBR_FIRST_SKIP:+-S ${NANO_MBR_FIRST_SKIP}} for i in s1 s2 s3 s4 empty; do rm -fr ${NANO_OBJ}/_.${i}* done # Populate the FAT partition, if needed if [ -n "${NANO_SLICE_FAT}" ]; then echo Creating MSDOS partition for kernel newfs_msdos -C ${NANO_SLICE_FAT_SIZE} -F 16 -L ${NANO_NAME} \ ${NANO_OBJ}/_.${NANO_SLICE_FAT} if [ -d ${NANO_FAT_DIR} ]; then # Need to copy files from ${NANO_FATDIR} with mtools, or use # makefs -t msdos once that's supported mcopy -i ${NANO_OBJ}/_.${NANO_SLICE_FAT} ${NANO_FAT_DIR}/* :: fi fi # Populate the Powerpc boot image, if needed if [ "${NANO_LAYOUT}" = powerpc64-ibm ]; then dd if=${NANO_WORLDDIR}/boot/boot1.elf of=${NANO_OBJ}/_.s1 bs=800k count=1 conv=sync fi # Populate the / partition, and place it into a slice with a # bsd label [ -z ${NANO_NOPRIV_BUILD} ] || extra="-F ${NANO_METALOG}" sz=${NANO_SLICE_ROOT_SIZE:+-s ${NANO_SLICE_ROOT_SIZE}} eval "${NANO_MAKEFS_UFS}" ${extra} $sz "${NANO_OBJ}/_.${NANO_SLICE_ROOT}a" \ "${NANO_WORLDDIR}" mkimg -s bsd ${bootbsd} -p freebsd-ufs:=${NANO_OBJ}/_.${NANO_SLICE_ROOT}a \ -o ${NANO_OBJ}/_.${NANO_SLICE_ROOT} # Populate the /cfg partition, empty if none given if [ -z "${NANO_CFGDIR}" ]; then echo "Faking cfg dir, it's empty" NANO_CFGDIR=${NANO_OBJ}/_.empty mkdir -p ${NANO_CFGDIR} fi # XXX -F cfg-mtree eval "${NANO_MAKEFS_UFS}" -s ${NANO_SLICE_CFG_SIZE} \ "${NANO_OBJ}/_.${NANO_SLICE_CFG}" "${NANO_CFGDIR}" # data slice not supported since we need the part for FAT for # booting # Now shuffle all the slices together into the proper layout if [ -n "$NANO_SLICE_FAT" ]; then eval $NANO_SLICE_FAT=fat16b fi eval $NANO_SLICE_CFG=freebsd eval $NANO_SLICE_ROOT=freebsd # below depends on https://reviews.freebsd.org/D4403 not yet in the tree # but there's problems: it marks all partitions as active, so you have to # boot off parittion 3 or 2 by hand if you're playing around with this WIP case ${NANO_LAYOUT} in std-embedded) mkimg -a 3 ${skiparg} ${fmtarg} ${bootmbr} -s mbr -p ${s1}:=${NANO_OBJ}/_.s1 \ -p ${s2}:=${NANO_OBJ}/_.s2 \ -p ${s3}:=${NANO_OBJ}/_.s3 \ -o ${NANO_OBJ}/_.disk.image.${NANO_NAME}${fmt} ;; std-x86) # s1 is cfg, s2 is /, not sure how to make that # boot (marked as active) with mkimg yet mkimg -a 2 ${fmtarg} ${bootmbr} -s mbr -p ${s1}:=${NANO_OBJ}/_.s1 \ -p ${s2}:=${NANO_OBJ}/_.s2 \ -o ${NANO_OBJ}/_.disk.image.${NANO_NAME}${fmt} ;; powerpc64-ibm) # A lie to make the boot loader work, it boots the first BSD partition # it finds, regardless of the active flag. s2=fat16b # boot image is on a special partition, ala std-embedded, but that # partition isn't FAT with special files, but a copy of the boot # loader itself. mkimg -a 1 ${fmtarg} -s mbr -p prepboot:=${NANO_OBJ}/_.s1 \ -p ${s2}:=${NANO_OBJ}/_.s2 \ -p ${s3}:=${NANO_OBJ}/_.s3a \ -o ${NANO_OBJ}/_.disk.image.${NANO_NAME}${fmt} ;; esac ) > ${NANO_OBJ}/_.di 2>&1 ) die( ) { echo "$*" exit 1 } # Automatically include the packaging port here so it is always first so it # builds the port and adds the package so we can add other packages. #XXX Doesn't work for cross build, so punting until I can integreate qemu-static #XXX or poudere, both of which require priv'd execution. Or qemu-system, which #XXX is super, super slow. #add_port ports-mgmt/pkg #add_port security/sudo #add_port ftp/curl rp=$(realpath ${NANO_OBJ}/) __a=`mount | grep ${rp} | awk '{print length($3), $3;}' | sort -rn | awk '{$1=""; print;}'` if [ -n "$__a" ]; then echo "unmounting $__a" umount $__a fi if [ "$DEBUG" = 1 ]; then DEBUG_BUILD=" DEBUG_FLAGS= -g " else DEBUG_INSTALL=" INSTALL_NODEBUG= t " fi CONF_INSTALL="$CONF_BUILD ${DEBUG_BUILD} " CONF_INSTALL="$CONF_INSTALL ${DEBUG_INSTALL} " if [ "${NANO_PACKAGE_ONLY}" -eq 1 ]; then CONF_INSTALL="${CONF_INSTALL} ${PKG_ONLY_MAKE_CONF} " echo "Automatically building a thin image with packages" else echo "Automatically building a * * F A T * * image so we can build ports" fi VARS="MASTER_SITE_BACKUP MASTER_SITE_OVERRIDE PACKAGEROOT PACKAGESITE" for var in $VARS; do val=$(eval echo "\$$var") if [ -n "$val" ]; then CONF_INSTALL="${CONF_INSTALL} $var=$val" fi done typical_embedded ( ) ( # Need to create rc.conf before we copy over /etc to /conf/base/etc # so now's a good time. local rc=${NANO_WORLDDIR}/etc/rc.conf echo "hostname=nanobsd-${NANO_NAME}" > $rc echo "growfs_enable=YES" >> $rc echo "growfs_type=nanobsd-pingpong" >> $rc echo "ntpdate_enable=YES" >> $rc echo "ifconfig_DEFAULT=DHCP" >> $rc echo "ntpdate_hosts='0.freebsd.pool.ntp.org 1.freebsd.pool.ntp.org'" >> $rc # Make sure that firstboot scripts run so growfs works. # Note: still some issues remvoing this XXX touch ${NANO_WORLDDIR}/firstboot ) customize_cmd typical_embedded +fix_pkg ( ) ( + chdir ${NANO_WORLDDIR} + mkdir -p pkg + mkdir -p pkg/db + mkdir -p pkg/cache + mkdir -p pkg/tmp # Needed for pkg bootstrap + mkdir -p usr/local/etc # Will get moved to local/etc + ( + echo 'PKG_DBDIR = "/pkg/db"' + echo 'PKG_CACHEDIR = "/pkg/cache"' + echo 'DEFAULT_ALWAYS_YES = "yes"' + echo 'ASSUME_ALWAYS_YES = "yes"' + ) >> usr/local/etc/pkg.conf + [ -z ${NANO_NOPRIV_BUILD} ] || ( + echo "./pkg type=dir uname=root gname=wheel mode=0755" + echo "./pkg/cache type=dir uname=root gname=wheel mode=0755" + echo "./pkg/db type=dir uname=root gname=wheel mode=0755" + echo "./pkg/tmp type=dir uname=root gname=wheel mode=0755" + ) >> ${NANO_METALOG} +) +customize_cmd fix_pkg + save_build ( ) ( VERSION_FILE=${NANO_WORLDDIR}/etc/version if [ "${SVNREVISION}" = "${REVISION}" ]; then echo "${NANO_NAME}" > "${VERSION_FILE}" else echo "${NANO_NAME} (${SVNREVISION})" > "${VERSION_FILE}" fi ) customize_cmd save_build shrink_md_fbsize ( ) ( # We have a lot of little files on our memory disks. Let's decrease # the block and frag size to fit more little files on them (this # halves our space requirement by ~50% on /etc and /var on 8.x -- # and gives us more back on 9.x as the default block and frag size # are 4 times larger). sed -i '' -e 's,-S -i 4096,-S -i 4096 -b 4096 -f 512,' \ ${NANO_WORLDDIR}/etc/rc.initdiskless ) customize_cmd shrink_md_fbsize customize_cmd cust_comconsole dos_boot_part ( ) ( local d=/usr/local/share/u-boot/${NANO_BOOT_PKG} local f=${NANO_FAT_DIR} # For now, just copy all the files. However, for iMX6 and Allwinner, # we'll need to put a special boot block at a fixed location # on the disk as well. rm -rf $f mkdir $f chdir $f cp ${d}/* . # Also copy ubldr. u-boot will load it and it will load the kernel # from the ufs partition cp ${NANO_WORLDDIR}/boot/ubldr . cp ${NANO_WORLDDIR}/boot/ubldr.bin . # We have to touch the saveenv file touch uEnv.txt # Now we need to copy over dtb files from the build. cp ${NANO_WORLDDIR}/boot/dtb/*.dtb . ) if [ -n "$NANO_BOOT_PKG" ]; then if [ ! -d ${d} ]; then echo ${NANO_BOOT_PKG} not installed. Sadly, it must be. exit 1 fi if [ ! -x /usr/local/bin/mcopy ]; then echo mtools not installed. Sadly, we gotta have it. exit 1 fi customize_cmd dos_boot_part fi product_custom ( ) ( # not quie ready to tweak these in nopriv build if [ -z ${NANO_NOPRIV_BUILD} ]; then # Last second tweaks -- generally not needed chown -R root:wheel ${NANO_WORLDDIR}/root chmod 0755 ${NANO_WORLDDIR}/root/* chmod 0755 ${NANO_WORLDDIR}/* chown -R root:wheel ${NANO_WORLDDIR}/etc chown -R root:wheel ${NANO_WORLDDIR}/boot chown root:wheel ${NANO_WORLDDIR}/ chown root:wheel ${NANO_WORLDDIR}/usr fi ) late_customize_cmd product_custom # # Convenience routines to bring up many settings for standard environments # # # For each architecture, we have a standard set of settings to the extent # it makes sense. For architectures that don't have a standard environment # cfg files need to either define a lot of settings, provide their own disk # imaging, or set which of the variants we support. No subshells, since we # set global variables # std_aarch64 ( ) { } std_amd64 ( ) { std_i386 } std_arm ( ) { } std_armeb ( ) { NANO_ENDIAN=big } std_armv6 ( ) { } std_i386 ( ) { # Default values, if not overridden in .cfg file : ${NANO_KERNEL:=GENERIC} : ${NANO_DRIVE:=ada0} : ${NANO_LAYOUT:=std-x86} : ${NANO_BOOT_MBR:=${NANO_WORLDDIR}/boot/boot0sio} : ${NANO_BOOT_BSD:=${NANO_WORLDDIR}/boot/boot} } std_mips ( ) { NANO_ENDIAN=big } std_mipsel ( ) { } std_mips64 ( ) { NANO_ENDIAN=big } std_mips64el ( ) { } std_powerpc ( ) { NANO_ENDIAN=big } std_powerpc64 ( ) { NANO_LAYOUT=powerpc64-ibm NANO_ENDIAN=big } std_sparc64 ( ) { NANO_ENDIAN=big } # # QEMU settings for the standard environments # qemu_env ( ) { NANO_DISKIMAGE_FORMAT=qcow2 } # other standard environments will go here eval std_${NANO_ARCH} # Slice 1: FAT for ROM loading bootstrap # Slice 2: Config partition # Slice 3: FreeBSD partition (sized exactly) # Slice 4: FreeBSD partition (empty) # on first boot, we resize slice 3 & 4 to be 1/2 of what's # left over on the SD card after slice 1 and 2 are taken # off the top. We also resize the 'a' partion on first boot # to the size of the partition for the ping/pong upgrade. # This feature needs support in the rc.d bootup script. : ${NANO_ENDIAN:=little} # make -V something to figure it out? : ${NANO_LAYOUT:=std-embedded} : ${NANO_MAKEFS_UFS:=makefs -t ffs -B ${NANO_ENDIAN}} : ${NANO_DISK_SCHEME:=mbr} # No others really supported ATM case ${NANO_LAYOUT} in std-embedded) NANO_SLICE_FAT=s1 NANO_SLICE_CFG=s2 NANO_SLICE_ROOT=s3 NANO_SLICE_ALTROOT=s4 ;; std-x86) NANO_SLICE_CFG=s1 NANO_SLICE_ROOT=s2 NANO_SLICE_ALTROOT=s3 ;; powerpc64-ibm) NANO_SLICE_PPCBOOT=s1 NANO_SLICE_CFG=s2 NANO_SLICE_ROOT=s3 NANO_SLICE_ALTROOT=s4 ;; powerpc64-apple) echo Not yet exit 1 ;; *) echo Unknown Layout ${NANO_LAYOUT} exit 1 ;; esac NANO_SLICE_DATA= # Not included # Each major disk scheme has its own routine. Generally # this is for mbr, gpt, etc. These are generally are widely # shared, but some specialized formats won't be shared. create_diskimage ( ) ( eval create_diskimage_${NANO_DISK_SCHEME} ) Index: projects/sendfile =================================================================== --- projects/sendfile (revision 293405) +++ projects/sendfile (revision 293406) Property changes on: projects/sendfile ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r293383-293405