Index: head/bin/sh/builtins.def =================================================================== --- head/bin/sh/builtins.def (revision 288429) +++ head/bin/sh/builtins.def (revision 288430) @@ -1,92 +1,93 @@ #!/bin/sh - #- # Copyright (c) 1991, 1993 # The Regents of the University of California. All rights reserved. # # This code is derived from software contributed to Berkeley by # Kenneth Almquist. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 4. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)builtins.def 8.4 (Berkeley) 5/4/95 # $FreeBSD$ # # This file lists all the builtin commands. The first column is the name # of a C routine. # The -j flag specifies that this command is to be excluded from systems # without job control. # The -h flag specifies that this command is to be excluded from systems # based on the NO_HISTORY compile-time symbol. # The -s flag specifies that this is a POSIX 'special built-in' command. # The rest of the line specifies the command name or names used to run the # command. The entry for bltincmd, which is run when the user does not specify # a command, must come first. # # NOTE: bltincmd must come first! bltincmd builtin aliascmd alias bgcmd -j bg bindcmd bind breakcmd -s break -s continue cdcmd cd chdir commandcmd command dotcmd -s . echocmd echo evalcmd -s eval execcmd -s exec exitcmd -s exit letcmd let exportcmd -s export -s readonly #exprcmd expr falsecmd false fgcmd -j fg +freebsd_wordexpcmd freebsd_wordexp getoptscmd getopts hashcmd hash histcmd -h fc jobidcmd jobid jobscmd jobs killcmd kill localcmd local printfcmd printf pwdcmd pwd readcmd read returncmd -s return setcmd -s set setvarcmd setvar shiftcmd -s shift testcmd test [ timescmd -s times trapcmd -s trap truecmd -s : true typecmd type ulimitcmd ulimit umaskcmd umask unaliascmd unalias unsetcmd -s unset waitcmd wait wordexpcmd wordexp Index: head/bin/sh/expand.c =================================================================== --- head/bin/sh/expand.c (revision 288429) +++ head/bin/sh/expand.c (revision 288430) @@ -1,1658 +1,1712 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1997-2005 * Herbert Xu . All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Routines to expand arguments to commands. We have to deal with * backquotes, shell variables, and file metacharacters. */ #include "shell.h" #include "main.h" #include "nodes.h" #include "eval.h" #include "expand.h" #include "syntax.h" #include "parser.h" #include "jobs.h" #include "options.h" #include "var.h" #include "input.h" #include "output.h" #include "memalloc.h" #include "error.h" #include "mystring.h" #include "arith.h" #include "show.h" #include "builtins.h" /* * Structure specifying which parts of the string should be searched * for IFS characters. */ struct ifsregion { struct ifsregion *next; /* next region in list */ int begoff; /* offset of start of region */ int endoff; /* offset of end of region */ int inquotes; /* search for nul bytes only */ }; static char *expdest; /* output of current string */ static struct nodelist *argbackq; /* list of back quote expressions */ static struct ifsregion ifsfirst; /* first struct in list of ifs regions */ static struct ifsregion *ifslastp; /* last struct in list */ static struct arglist exparg; /* holds expanded arg list */ static char *argstr(char *, int); static char *exptilde(char *, int); static char *expari(char *); static void expbackq(union node *, int, int); static int subevalvar(char *, char *, int, int, int, int, int); static char *evalvar(char *, int); static int varisset(const char *, int); static void strtodest(const char *, int, int, int); static void varvalue(const char *, int, int, int); static void recordregion(int, int, int); static void removerecordregions(int); static void ifsbreakup(char *, struct arglist *); static void expandmeta(struct strlist *); static void expmeta(char *, char *); static void addfname(char *); static struct strlist *expsort(struct strlist *); static struct strlist *msort(struct strlist *, int); static int patmatch(const char *, const char *, int); static char *cvtnum(int, char *); static int collate_range_cmp(wchar_t, wchar_t); static int collate_range_cmp(wchar_t c1, wchar_t c2) { static wchar_t s1[2], s2[2]; s1[0] = c1; s2[0] = c2; return (wcscoll(s1, s2)); } static char * stputs_quotes(const char *data, const char *syntax, char *p) { while (*data) { CHECKSTRSPACE(2, p); if (syntax[(int)*data] == CCTL) USTPUTC(CTLESC, p); USTPUTC(*data++, p); } return (p); } #define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p) /* * Perform expansions on an argument, placing the resulting list of arguments * in arglist. Parameter expansion, command substitution and arithmetic * expansion are always performed; additional expansions can be requested * via flag (EXP_*). * The result is left in the stack string. * When arglist is NULL, perform here document expansion. * * Caution: this function uses global state and is not reentrant. * However, a new invocation after an interrupted invocation is safe * and will reset the global state for the new call. */ void expandarg(union node *arg, struct arglist *arglist, int flag) { struct strlist *sp; char *p; argbackq = arg->narg.backquote; STARTSTACKSTR(expdest); ifsfirst.next = NULL; ifslastp = NULL; argstr(arg->narg.text, flag); if (arglist == NULL) { STACKSTRNUL(expdest); return; /* here document expanded */ } STPUTC('\0', expdest); p = grabstackstr(expdest); exparg.lastp = &exparg.list; if (flag & EXP_FULL) { ifsbreakup(p, &exparg); *exparg.lastp = NULL; exparg.lastp = &exparg.list; expandmeta(exparg.list); } else { sp = (struct strlist *)stalloc(sizeof (struct strlist)); sp->text = p; *exparg.lastp = sp; exparg.lastp = &sp->next; } while (ifsfirst.next != NULL) { struct ifsregion *ifsp; INTOFF; ifsp = ifsfirst.next->next; ckfree(ifsfirst.next); ifsfirst.next = ifsp; INTON; } *exparg.lastp = NULL; if (exparg.list) { *arglist->lastp = exparg.list; arglist->lastp = exparg.lastp; } } /* * Perform parameter expansion, command substitution and arithmetic * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE. * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'. * This is used to expand word in ${var+word} etc. * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC * characters to allow for further processing. * If EXP_FULL is set, also preserve CTLQUOTEMARK characters. */ static char * argstr(char *p, int flag) { char c; int quotes = flag & (EXP_FULL | EXP_CASE); /* do CTLESC */ int firsteq = 1; int split_lit; int lit_quoted; split_lit = flag & EXP_SPLIT_LIT; lit_quoted = flag & EXP_LIT_QUOTED; flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED); if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE))) p = exptilde(p, flag); for (;;) { CHECKSTRSPACE(2, expdest); switch (c = *p++) { case '\0': return (p - 1); case CTLENDVAR: case CTLENDARI: return (p); case CTLQUOTEMARK: lit_quoted = 1; /* "$@" syntax adherence hack */ if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=') break; if ((flag & EXP_FULL) != 0) USTPUTC(c, expdest); break; case CTLQUOTEEND: lit_quoted = 0; break; case CTLESC: if (quotes) USTPUTC(c, expdest); c = *p++; USTPUTC(c, expdest); if (split_lit && !lit_quoted) recordregion(expdest - stackblock() - (quotes ? 2 : 1), expdest - stackblock(), 0); break; case CTLVAR: p = evalvar(p, flag); break; case CTLBACKQ: case CTLBACKQ|CTLQUOTE: expbackq(argbackq->n, c & CTLQUOTE, flag); argbackq = argbackq->next; break; case CTLARI: p = expari(p); break; case ':': case '=': /* * sort of a hack - expand tildes in variable * assignments (after the first '=' and after ':'s). */ USTPUTC(c, expdest); if (split_lit && !lit_quoted) recordregion(expdest - stackblock() - 1, expdest - stackblock(), 0); if (flag & EXP_VARTILDE && *p == '~' && (c != '=' || firsteq)) { if (c == '=') firsteq = 0; p = exptilde(p, flag); } break; default: USTPUTC(c, expdest); if (split_lit && !lit_quoted) recordregion(expdest - stackblock() - 1, expdest - stackblock(), 0); } } } /* * Perform tilde expansion, placing the result in the stack string and * returning the next position in the input string to process. */ static char * exptilde(char *p, int flag) { char c, *startp = p; struct passwd *pw; char *home; for (;;) { c = *p; switch(c) { case CTLESC: /* This means CTL* are always considered quoted. */ case CTLVAR: case CTLBACKQ: case CTLBACKQ | CTLQUOTE: case CTLARI: case CTLENDARI: case CTLQUOTEMARK: return (startp); case ':': if ((flag & EXP_VARTILDE) == 0) break; /* FALLTHROUGH */ case '\0': case '/': case CTLENDVAR: *p = '\0'; if (*(startp+1) == '\0') { home = lookupvar("HOME"); } else { pw = getpwnam(startp+1); home = pw != NULL ? pw->pw_dir : NULL; } *p = c; if (home == NULL || *home == '\0') return (startp); strtodest(home, flag, VSNORMAL, 1); return (p); } p++; } } static void removerecordregions(int endoff) { if (ifslastp == NULL) return; if (ifsfirst.endoff > endoff) { while (ifsfirst.next != NULL) { struct ifsregion *ifsp; INTOFF; ifsp = ifsfirst.next->next; ckfree(ifsfirst.next); ifsfirst.next = ifsp; INTON; } if (ifsfirst.begoff > endoff) ifslastp = NULL; else { ifslastp = &ifsfirst; ifsfirst.endoff = endoff; } return; } ifslastp = &ifsfirst; while (ifslastp->next && ifslastp->next->begoff < endoff) ifslastp=ifslastp->next; while (ifslastp->next != NULL) { struct ifsregion *ifsp; INTOFF; ifsp = ifslastp->next->next; ckfree(ifslastp->next); ifslastp->next = ifsp; INTON; } if (ifslastp->endoff > endoff) ifslastp->endoff = endoff; } /* * Expand arithmetic expression. * Note that flag is not required as digits never require CTLESC characters. */ static char * expari(char *p) { char *q, *start; arith_t result; int begoff; int quoted; int adj; quoted = *p++ == '"'; begoff = expdest - stackblock(); p = argstr(p, 0); removerecordregions(begoff); STPUTC('\0', expdest); start = stackblock() + begoff; q = grabstackstr(expdest); result = arith(start); ungrabstackstr(q, expdest); start = stackblock() + begoff; adj = start - expdest; STADJUST(adj, expdest); CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest); fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result); adj = strlen(expdest); STADJUST(adj, expdest); if (!quoted) recordregion(begoff, expdest - stackblock(), 0); return p; } /* * Perform command substitution. */ static void expbackq(union node *cmd, int quoted, int flag) { struct backcmd in; int i; char buf[128]; char *p; char *dest = expdest; struct ifsregion saveifs, *savelastp; struct nodelist *saveargbackq; char lastc; int startloc = dest - stackblock(); char const *syntax = quoted? DQSYNTAX : BASESYNTAX; int quotes = flag & (EXP_FULL | EXP_CASE); size_t nnl; INTOFF; saveifs = ifsfirst; savelastp = ifslastp; saveargbackq = argbackq; p = grabstackstr(dest); evalbackcmd(cmd, &in); ungrabstackstr(p, dest); ifsfirst = saveifs; ifslastp = savelastp; argbackq = saveargbackq; p = in.buf; lastc = '\0'; nnl = 0; /* Don't copy trailing newlines */ for (;;) { if (--in.nleft < 0) { if (in.fd < 0) break; while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR); TRACE(("expbackq: read returns %d\n", i)); if (i <= 0) break; p = buf; in.nleft = i - 1; } lastc = *p++; if (lastc != '\0') { if (lastc == '\n') { nnl++; } else { CHECKSTRSPACE(nnl + 2, dest); while (nnl > 0) { nnl--; USTPUTC('\n', dest); } if (quotes && syntax[(int)lastc] == CCTL) USTPUTC(CTLESC, dest); USTPUTC(lastc, dest); } } } if (in.fd >= 0) close(in.fd); if (in.buf) ckfree(in.buf); if (in.jp) exitstatus = waitforjob(in.jp, (int *)NULL); if (quoted == 0) recordregion(startloc, dest - stackblock(), 0); TRACE(("expbackq: size=%td: \"%.*s\"\n", ((dest - stackblock()) - startloc), (int)((dest - stackblock()) - startloc), stackblock() + startloc)); expdest = dest; INTON; } static void recordleft(const char *str, const char *loc, char *startp) { int amount; amount = ((str - 1) - (loc - startp)) - expdest; STADJUST(amount, expdest); while (loc != str - 1) *startp++ = *loc++; } static int subevalvar(char *p, char *str, int strloc, int subtype, int startloc, int varflags, int quotes) { char *startp; char *loc = NULL; char *q; int c = 0; struct nodelist *saveargbackq = argbackq; int amount; argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ? EXP_CASE : 0) | EXP_TILDE); STACKSTRNUL(expdest); argbackq = saveargbackq; startp = stackblock() + startloc; if (str == NULL) str = stackblock() + strloc; switch (subtype) { case VSASSIGN: setvar(str, startp, 0); amount = startp - expdest; STADJUST(amount, expdest); varflags &= ~VSNUL; return 1; case VSQUESTION: if (*p != CTLENDVAR) { outfmt(out2, "%s\n", startp); error((char *)NULL); } error("%.*s: parameter %snot set", (int)(p - str - 1), str, (varflags & VSNUL) ? "null or " : ""); return 0; case VSTRIMLEFT: for (loc = startp; loc < str; loc++) { c = *loc; *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; recordleft(str, loc, startp); return 1; } *loc = c; if (quotes && *loc == CTLESC) loc++; } return 0; case VSTRIMLEFTMAX: for (loc = str - 1; loc >= startp;) { c = *loc; *loc = '\0'; if (patmatch(str, startp, quotes)) { *loc = c; recordleft(str, loc, startp); return 1; } *loc = c; loc--; if (quotes && loc > startp && *(loc - 1) == CTLESC) { for (q = startp; q < loc; q++) if (*q == CTLESC) q++; if (q > loc) loc--; } } return 0; case VSTRIMRIGHT: for (loc = str - 1; loc >= startp;) { if (patmatch(str, loc, quotes)) { amount = loc - expdest; STADJUST(amount, expdest); return 1; } loc--; if (quotes && loc > startp && *(loc - 1) == CTLESC) { for (q = startp; q < loc; q++) if (*q == CTLESC) q++; if (q > loc) loc--; } } return 0; case VSTRIMRIGHTMAX: for (loc = startp; loc < str - 1; loc++) { if (patmatch(str, loc, quotes)) { amount = loc - expdest; STADJUST(amount, expdest); return 1; } if (quotes && *loc == CTLESC) loc++; } return 0; default: abort(); } } /* * Expand a variable, and return a pointer to the next character in the * input string. */ static char * evalvar(char *p, int flag) { int subtype; int varflags; char *var; const char *val; int patloc; int c; int set; int special; int startloc; int varlen; int varlenb; int easy; int quotes = flag & (EXP_FULL | EXP_CASE); int record = 0; varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; var = p; special = 0; if (! is_name(*p)) special = 1; p = strchr(p, '=') + 1; again: /* jump here after setting a variable with ${var=text} */ if (varflags & VSLINENO) { set = 1; special = 1; val = NULL; } else if (special) { set = varisset(var, varflags & VSNUL); val = NULL; } else { val = bltinlookup(var, 1); if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) { val = NULL; set = 0; } else set = 1; } varlen = 0; startloc = expdest - stackblock(); if (!set && uflag && *var != '@' && *var != '*') { switch (subtype) { case VSNORMAL: case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: case VSLENGTH: error("%.*s: parameter not set", (int)(p - var - 1), var); } } if (set && subtype != VSPLUS) { /* insert the value of the variable */ if (special) { if (varflags & VSLINENO) STPUTBIN(var, p - var - 1, expdest); else varvalue(var, varflags & VSQUOTE, subtype, flag); if (subtype == VSLENGTH) { varlenb = expdest - stackblock() - startloc; varlen = varlenb; if (localeisutf8) { val = stackblock() + startloc; for (;val != expdest; val++) if ((*val & 0xC0) == 0x80) varlen--; } STADJUST(-varlenb, expdest); } } else { if (subtype == VSLENGTH) { for (;*val; val++) if (!localeisutf8 || (*val & 0xC0) != 0x80) varlen++; } else strtodest(val, flag, subtype, varflags & VSQUOTE); } } if (subtype == VSPLUS) set = ! set; easy = ((varflags & VSQUOTE) == 0 || (*var == '@' && shellparam.nparam != 1)); switch (subtype) { case VSLENGTH: expdest = cvtnum(varlen, expdest); record = 1; break; case VSNORMAL: record = easy; break; case VSPLUS: case VSMINUS: if (!set) { argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) | (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0)); break; } record = easy; break; case VSTRIMLEFT: case VSTRIMLEFTMAX: case VSTRIMRIGHT: case VSTRIMRIGHTMAX: if (!set) break; /* * Terminate the string and start recording the pattern * right after it */ STPUTC('\0', expdest); patloc = expdest - stackblock(); if (subevalvar(p, NULL, patloc, subtype, startloc, varflags, quotes) == 0) { int amount = (expdest - stackblock() - patloc) + 1; STADJUST(-amount, expdest); } /* Remove any recorded regions beyond start of variable */ removerecordregions(startloc); record = 1; break; case VSASSIGN: case VSQUESTION: if (!set) { if (subevalvar(p, var, 0, subtype, startloc, varflags, quotes)) { varflags &= ~VSNUL; /* * Remove any recorded regions beyond * start of variable */ removerecordregions(startloc); goto again; } break; } record = easy; break; case VSERROR: c = p - var - 1; error("${%.*s%s}: Bad substitution", c, var, (c > 0 && *p != CTLENDVAR) ? "..." : ""); default: abort(); } if (record) recordregion(startloc, expdest - stackblock(), varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' && (*var == '@' || *var == '*'))); if (subtype != VSNORMAL) { /* skip to end of alternative */ int nesting = 1; for (;;) { if ((c = *p++) == CTLESC) p++; else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) { if (set) argbackq = argbackq->next; } else if (c == CTLVAR) { if ((*p++ & VSTYPE) != VSNORMAL) nesting++; } else if (c == CTLENDVAR) { if (--nesting == 0) break; } } } return p; } /* * Test whether a specialized variable is set. */ static int varisset(const char *name, int nulok) { if (*name == '!') return backgndpidset(); else if (*name == '@' || *name == '*') { if (*shellparam.p == NULL) return 0; if (nulok) { char **av; for (av = shellparam.p; *av; av++) if (**av != '\0') return 1; return 0; } } else if (is_digit(*name)) { char *ap; long num; errno = 0; num = strtol(name, NULL, 10); if (errno != 0 || num > shellparam.nparam) return 0; if (num == 0) ap = arg0; else ap = shellparam.p[num - 1]; if (nulok && (ap == NULL || *ap == '\0')) return 0; } return 1; } static void strtodest(const char *p, int flag, int subtype, int quoted) { if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH) STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest); else STPUTS(p, expdest); } /* * Add the value of a specialized variable to the stack string. */ static void varvalue(const char *name, int quoted, int subtype, int flag) { int num; char *p; int i; char sep[2]; char **ap; switch (*name) { case '$': num = rootpid; break; case '?': num = oexitstatus; break; case '#': num = shellparam.nparam; break; case '!': num = backgndpidval(); break; case '-': for (i = 0 ; i < NSHORTOPTS ; i++) { if (optlist[i].val) STPUTC(optlist[i].letter, expdest); } return; case '@': if (flag & EXP_FULL && quoted) { for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { strtodest(p, flag, subtype, quoted); if (*ap) STPUTC('\0', expdest); } return; } /* FALLTHROUGH */ case '*': if (ifsset()) sep[0] = ifsval()[0]; else sep[0] = ' '; sep[1] = '\0'; for (ap = shellparam.p ; (p = *ap++) != NULL ; ) { strtodest(p, flag, subtype, quoted); if (!*ap) break; if (sep[0]) strtodest(sep, flag, subtype, quoted); else if (flag & EXP_FULL && !quoted && **ap != '\0') STPUTC('\0', expdest); } return; default: if (is_digit(*name)) { num = atoi(name); if (num == 0) p = arg0; else if (num > 0 && num <= shellparam.nparam) p = shellparam.p[num - 1]; else return; strtodest(p, flag, subtype, quoted); } return; } expdest = cvtnum(num, expdest); } /* * Record the fact that we have to scan this region of the * string for IFS characters. */ static void recordregion(int start, int end, int inquotes) { struct ifsregion *ifsp; INTOFF; if (ifslastp == NULL) { ifsp = &ifsfirst; } else { if (ifslastp->endoff == start && ifslastp->inquotes == inquotes) { /* extend previous area */ ifslastp->endoff = end; INTON; return; } ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion)); ifslastp->next = ifsp; } ifslastp = ifsp; ifslastp->next = NULL; ifslastp->begoff = start; ifslastp->endoff = end; ifslastp->inquotes = inquotes; INTON; } /* * Break the argument string into pieces based upon IFS and add the * strings to the argument list. The regions of the string to be * searched for IFS characters have been stored by recordregion. * CTLESC characters are preserved but have little effect in this pass * other than escaping CTL* characters. In particular, they do not escape * IFS characters: that should be done with the ifsregion mechanism. * CTLQUOTEMARK characters are used to preserve empty quoted strings. * This pass treats them as a regular character, making the string non-empty. * Later, they are removed along with the other CTL* characters. */ static void ifsbreakup(char *string, struct arglist *arglist) { struct ifsregion *ifsp; struct strlist *sp; char *start; char *p; char *q; const char *ifs; const char *ifsspc; int had_param_ch = 0; start = string; if (ifslastp == NULL) { /* Return entire argument, IFS doesn't apply to any of it */ sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; return; } ifs = ifsset() ? ifsval() : " \t\n"; for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) { p = string + ifsp->begoff; while (p < string + ifsp->endoff) { q = p; if (*p == CTLESC) p++; if (ifsp->inquotes) { /* Only NULs (should be from "$@") end args */ had_param_ch = 1; if (*p != 0) { p++; continue; } ifsspc = NULL; } else { if (!strchr(ifs, *p)) { had_param_ch = 1; p++; continue; } ifsspc = strchr(" \t\n", *p); /* Ignore IFS whitespace at start */ if (q == start && ifsspc != NULL) { p++; start = p; continue; } had_param_ch = 0; } /* Save this argument... */ *q = '\0'; sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; p++; if (ifsspc != NULL) { /* Ignore further trailing IFS whitespace */ for (; p < string + ifsp->endoff; p++) { q = p; if (*p == CTLESC) p++; if (strchr(ifs, *p) == NULL) { p = q; break; } if (strchr(" \t\n", *p) == NULL) { p++; break; } } } start = p; } } /* * Save anything left as an argument. * Traditionally we have treated 'IFS=':'; set -- x$IFS' as * generating 2 arguments, the second of which is empty. * Some recent clarification of the Posix spec say that it * should only generate one.... */ if (had_param_ch || *start != 0) { sp = (struct strlist *)stalloc(sizeof *sp); sp->text = start; *arglist->lastp = sp; arglist->lastp = &sp->next; } } static char expdir[PATH_MAX]; #define expdir_end (expdir + sizeof(expdir)) /* * Perform pathname generation and remove control characters. * At this point, the only control characters should be CTLESC and CTLQUOTEMARK. * The results are stored in the list exparg. */ static void expandmeta(struct strlist *str) { char *p; struct strlist **savelastp; struct strlist *sp; char c; while (str) { savelastp = exparg.lastp; if (!fflag) { p = str->text; for (; (c = *p) != '\0'; p++) { /* fast check for meta chars */ if (c == '*' || c == '?' || c == '[') { INTOFF; expmeta(expdir, str->text); INTON; break; } } } if (exparg.lastp == savelastp) { /* * no matches */ *exparg.lastp = str; rmescapes(str->text); exparg.lastp = &str->next; } else { *exparg.lastp = NULL; *savelastp = sp = expsort(*savelastp); while (sp->next != NULL) sp = sp->next; exparg.lastp = &sp->next; } str = str->next; } } /* * Do metacharacter (i.e. *, ?, [...]) expansion. */ static void expmeta(char *enddir, char *name) { const char *p; const char *q; const char *start; char *endname; int metaflag; struct stat statb; DIR *dirp; struct dirent *dp; int atend; int matchdot; int esc; int namlen; metaflag = 0; start = name; for (p = name; esc = 0, *p; p += esc + 1) { if (*p == '*' || *p == '?') metaflag = 1; else if (*p == '[') { q = p + 1; if (*q == '!' || *q == '^') q++; for (;;) { while (*q == CTLQUOTEMARK) q++; if (*q == CTLESC) q++; if (*q == '/' || *q == '\0') break; if (*++q == ']') { metaflag = 1; break; } } } else if (*p == '\0') break; else if (*p == CTLQUOTEMARK) continue; else { if (*p == CTLESC) esc++; if (p[esc] == '/') { if (metaflag) break; start = p + esc + 1; } } } if (metaflag == 0) { /* we've reached the end of the file name */ if (enddir != expdir) metaflag++; for (p = name ; ; p++) { if (*p == CTLQUOTEMARK) continue; if (*p == CTLESC) p++; *enddir++ = *p; if (*p == '\0') break; if (enddir == expdir_end) return; } if (metaflag == 0 || lstat(expdir, &statb) >= 0) addfname(expdir); return; } endname = name + (p - name); if (start != name) { p = name; while (p < start) { while (*p == CTLQUOTEMARK) p++; if (*p == CTLESC) p++; *enddir++ = *p++; if (enddir == expdir_end) return; } } if (enddir == expdir) { p = "."; } else if (enddir == expdir + 1 && *expdir == '/') { p = "/"; } else { p = expdir; enddir[-1] = '\0'; } if ((dirp = opendir(p)) == NULL) return; if (enddir != expdir) enddir[-1] = '/'; if (*endname == 0) { atend = 1; } else { atend = 0; *endname = '\0'; endname += esc + 1; } matchdot = 0; p = start; while (*p == CTLQUOTEMARK) p++; if (*p == CTLESC) p++; if (*p == '.') matchdot++; while (! int_pending() && (dp = readdir(dirp)) != NULL) { if (dp->d_name[0] == '.' && ! matchdot) continue; if (patmatch(start, dp->d_name, 0)) { namlen = dp->d_namlen; if (enddir + namlen + 1 > expdir_end) continue; memcpy(enddir, dp->d_name, namlen + 1); if (atend) addfname(expdir); else { if (dp->d_type != DT_UNKNOWN && dp->d_type != DT_DIR && dp->d_type != DT_LNK) continue; if (enddir + namlen + 2 > expdir_end) continue; enddir[namlen] = '/'; enddir[namlen + 1] = '\0'; expmeta(enddir + namlen + 1, endname); } } } closedir(dirp); if (! atend) endname[-esc - 1] = esc ? CTLESC : '/'; } /* * Add a file name to the list. */ static void addfname(char *name) { char *p; struct strlist *sp; p = stsavestr(name); sp = (struct strlist *)stalloc(sizeof *sp); sp->text = p; *exparg.lastp = sp; exparg.lastp = &sp->next; } /* * Sort the results of file name expansion. It calculates the number of * strings to sort and then calls msort (short for merge sort) to do the * work. */ static struct strlist * expsort(struct strlist *str) { int len; struct strlist *sp; len = 0; for (sp = str ; sp ; sp = sp->next) len++; return msort(str, len); } static struct strlist * msort(struct strlist *list, int len) { struct strlist *p, *q = NULL; struct strlist **lpp; int half; int n; if (len <= 1) return list; half = len >> 1; p = list; for (n = half ; --n >= 0 ; ) { q = p; p = p->next; } q->next = NULL; /* terminate first half of list */ q = msort(list, half); /* sort first half of list */ p = msort(p, len - half); /* sort second half */ lpp = &list; for (;;) { if (strcmp(p->text, q->text) < 0) { *lpp = p; lpp = &p->next; if ((p = *lpp) == NULL) { *lpp = q; break; } } else { *lpp = q; lpp = &q->next; if ((q = *lpp) == NULL) { *lpp = p; break; } } } return list; } static wchar_t get_wc(const char **p) { wchar_t c; int chrlen; chrlen = mbtowc(&c, *p, 4); if (chrlen == 0) return 0; else if (chrlen == -1) c = 0; else *p += chrlen; return c; } /* * See if a character matches a character class, starting at the first colon * of "[:class:]". * If a valid character class is recognized, a pointer to the next character * after the final closing bracket is stored into *end, otherwise a null * pointer is stored into *end. */ static int match_charclass(const char *p, wchar_t chr, const char **end) { char name[20]; const char *nameend; wctype_t cclass; *end = NULL; p++; nameend = strstr(p, ":]"); if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) || nameend == p) return 0; memcpy(name, p, nameend - p); name[nameend - p] = '\0'; *end = nameend + 2; cclass = wctype(name); /* An unknown class matches nothing but is valid nevertheless. */ if (cclass == 0) return 0; return iswctype(chr, cclass); } /* * Returns true if the pattern matches the string. */ static int patmatch(const char *pattern, const char *string, int squoted) { const char *p, *q, *end; const char *bt_p, *bt_q; char c; wchar_t wc, wc2; p = pattern; q = string; bt_p = NULL; bt_q = NULL; for (;;) { switch (c = *p++) { case '\0': if (*q != '\0') goto backtrack; return 1; case CTLESC: if (squoted && *q == CTLESC) q++; if (*q++ != *p++) goto backtrack; break; case CTLQUOTEMARK: continue; case '?': if (squoted && *q == CTLESC) q++; if (*q == '\0') return 0; if (localeisutf8) { wc = get_wc(&q); /* * A '?' does not match invalid UTF-8 but a * '*' does, so backtrack. */ if (wc == 0) goto backtrack; } else wc = (unsigned char)*q++; break; case '*': c = *p; while (c == CTLQUOTEMARK || c == '*') c = *++p; /* * If the pattern ends here, we know the string * matches without needing to look at the rest of it. */ if (c == '\0') return 1; /* * First try the shortest match for the '*' that * could work. We can forget any earlier '*' since * there is no way having it match more characters * can help us, given that we are already here. */ bt_p = p; bt_q = q; break; case '[': { const char *savep, *saveq; int invert, found; wchar_t chr; savep = p, saveq = q; invert = 0; if (*p == '!' || *p == '^') { invert++; p++; } found = 0; if (squoted && *q == CTLESC) q++; if (*q == '\0') return 0; if (localeisutf8) { chr = get_wc(&q); if (chr == 0) goto backtrack; } else chr = (unsigned char)*q++; c = *p++; do { if (c == '\0') { p = savep, q = saveq; c = '['; goto dft; } if (c == CTLQUOTEMARK) continue; if (c == '[' && *p == ':') { found |= match_charclass(p, chr, &end); if (end != NULL) p = end; } if (c == CTLESC) c = *p++; if (localeisutf8 && c & 0x80) { p--; wc = get_wc(&p); if (wc == 0) /* bad utf-8 */ return 0; } else wc = (unsigned char)c; if (*p == '-' && p[1] != ']') { p++; while (*p == CTLQUOTEMARK) p++; if (*p == CTLESC) p++; if (localeisutf8) { wc2 = get_wc(&p); if (wc2 == 0) /* bad utf-8 */ return 0; } else wc2 = (unsigned char)*p++; if ( collate_range_cmp(chr, wc) >= 0 && collate_range_cmp(chr, wc2) <= 0 ) found = 1; } else { if (chr == wc) found = 1; } } while ((c = *p++) != ']'); if (found == invert) goto backtrack; break; } dft: default: if (squoted && *q == CTLESC) q++; if (*q == '\0') return 0; if (*q++ == c) break; backtrack: /* * If we have a mismatch (other than hitting the end * of the string), go back to the last '*' seen and * have it match one additional character. */ if (bt_p == NULL) return 0; if (squoted && *bt_q == CTLESC) bt_q++; if (*bt_q == '\0') return 0; bt_q++; p = bt_p; q = bt_q; break; } } } /* * Remove any CTLESC and CTLQUOTEMARK characters from a string. */ void rmescapes(char *str) { char *p, *q; p = str; while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) { if (*p++ == '\0') return; } q = p; while (*p) { if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) { p++; continue; } if (*p == CTLESC) p++; *q++ = *p++; } *q = '\0'; } /* * See if a pattern matches in a case statement. */ int casematch(union node *pattern, const char *val) { struct stackmark smark; int result; char *p; setstackmark(&smark); argbackq = pattern->narg.backquote; STARTSTACKSTR(expdest); ifslastp = NULL; argstr(pattern->narg.text, EXP_TILDE | EXP_CASE); STPUTC('\0', expdest); p = grabstackstr(expdest); result = patmatch(p, val, 0); popstackmark(&smark); return result; } /* * Our own itoa(). */ static char * cvtnum(int num, char *buf) { char temp[32]; int neg = num < 0; char *p = temp + 31; temp[31] = '\0'; do { *--p = num % 10 + '0'; } while ((num /= 10) != 0); if (neg) *--p = '-'; STPUTS(p, buf); return buf; } /* * Do most of the work for wordexp(3). */ int wordexpcmd(int argc, char **argv) { size_t len; int i; out1fmt("%08x", argc - 1); for (i = 1, len = 0; i < argc; i++) len += strlen(argv[i]); out1fmt("%08x", (int)len); for (i = 1; i < argc; i++) outbin(argv[i], strlen(argv[i]) + 1, out1); return (0); } + +/* + * Do most of the work for wordexp(3), new version. + */ + +int +freebsd_wordexpcmd(int argc __unused, char **argv __unused) +{ + struct arglist arglist; + union node *args, *n; + struct strlist *sp; + size_t count, len; + int ch; + int protected = 0; + int fd = -1; + + while ((ch = nextopt("f:p")) != '\0') { + switch (ch) { + case 'f': + fd = number(shoptarg); + break; + case 'p': + protected = 1; + break; + } + } + if (*argptr != NULL) + error("wrong number of arguments"); + if (fd < 0) + error("missing fd"); + INTOFF; + setinputfd(fd, 1); + INTON; + args = parsewordexp(); + popfile(); /* will also close fd */ + if (protected) + for (n = args; n != NULL; n = n->narg.next) { + if (n->narg.backquote != NULL) { + outcslow('C', out1); + error("command substitution disabled"); + } + } + outcslow(' ', out1); + arglist.lastp = &arglist.list; + for (n = args; n != NULL; n = n->narg.next) + expandarg(n, &arglist, EXP_FULL | EXP_TILDE); + *arglist.lastp = NULL; + for (sp = arglist.list, count = len = 0; sp; sp = sp->next) + count++, len += strlen(sp->text); + out1fmt("%016zx %016zx", count, len); + for (sp = arglist.list; sp; sp = sp->next) + outbin(sp->text, strlen(sp->text) + 1, out1); + return (0); +} Index: head/bin/sh/parser.c =================================================================== --- head/bin/sh/parser.c (revision 288429) +++ head/bin/sh/parser.c (revision 288430) @@ -1,2080 +1,2113 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; #endif #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "shell.h" #include "parser.h" #include "nodes.h" #include "expand.h" /* defines rmescapes() */ #include "syntax.h" #include "options.h" #include "input.h" #include "output.h" #include "var.h" #include "error.h" #include "memalloc.h" #include "mystring.h" #include "alias.h" #include "show.h" #include "eval.h" #include "exec.h" /* to check for special builtins */ #ifndef NO_HISTORY #include "myhistedit.h" #endif /* * Shell command parser. */ #define PROMPTLEN 128 /* values of checkkwd variable */ #define CHKALIAS 0x1 #define CHKKWD 0x2 #define CHKNL 0x4 /* values returned by readtoken */ #include "token.h" struct heredoc { struct heredoc *next; /* next here document in list */ union node *here; /* redirection node */ char *eofmark; /* string indicating end of input */ int striptabs; /* if set, strip leading tabs */ }; struct parser_temp { struct parser_temp *next; void *data; }; static struct heredoc *heredoclist; /* list of here documents to read */ static int doprompt; /* if set, prompt the user */ static int needprompt; /* true if interactive and at start of line */ static int lasttoken; /* last token read */ static int tokpushback; /* last token pushed back */ static char *wordtext; /* text of last word returned by readtoken */ static int checkkwd; static struct nodelist *backquotelist; static union node *redirnode; static struct heredoc *heredoc; static int quoteflag; /* set if (part of) last token was quoted */ static int startlinno; /* line # where last token started */ static int funclinno; /* line # where the current function started */ static struct parser_temp *parser_temp; #define NOEOFMARK ((const char *)&heredoclist) static union node *list(int); static union node *andor(void); static union node *pipeline(void); static union node *command(void); static union node *simplecmd(union node **, union node *); static union node *makename(void); static union node *makebinary(int type, union node *n1, union node *n2); static void parsefname(void); static void parseheredoc(void); static int peektoken(void); static int readtoken(void); static int xxreadtoken(void); static int readtoken1(int, const char *, const char *, int); static int noexpand(char *); static void consumetoken(int); static void synexpect(int) __dead2; static void synerror(const char *) __dead2; static void setprompt(int); static int pgetc_linecont(void); static void * parser_temp_alloc(size_t len) { struct parser_temp *t; INTOFF; t = ckmalloc(sizeof(*t)); t->data = NULL; t->next = parser_temp; parser_temp = t; t->data = ckmalloc(len); INTON; return t->data; } static void * parser_temp_realloc(void *ptr, size_t len) { struct parser_temp *t; INTOFF; t = parser_temp; if (ptr != t->data) error("bug: parser_temp_realloc misused"); t->data = ckrealloc(t->data, len); INTON; return t->data; } static void parser_temp_free_upto(void *ptr) { struct parser_temp *t; int done = 0; INTOFF; while (parser_temp != NULL && !done) { t = parser_temp; parser_temp = t->next; done = t->data == ptr; ckfree(t->data); ckfree(t); } INTON; if (!done) error("bug: parser_temp_free_upto misused"); } static void parser_temp_free_all(void) { struct parser_temp *t; INTOFF; while (parser_temp != NULL) { t = parser_temp; parser_temp = t->next; ckfree(t->data); ckfree(t); } INTON; } /* * Read and parse a command. Returns NEOF on end of file. (NULL is a * valid parse tree indicating a blank line.) */ union node * parsecmd(int interact) { int t; /* This assumes the parser is not re-entered, * which could happen if we add command substitution on PS1/PS2. */ parser_temp_free_all(); heredoclist = NULL; tokpushback = 0; checkkwd = 0; doprompt = interact; if (doprompt) setprompt(1); else setprompt(0); needprompt = 0; t = readtoken(); if (t == TEOF) return NEOF; if (t == TNL) return NULL; tokpushback++; return list(1); } +/* + * Read and parse words for wordexp. + * Returns a list of NARG nodes; NULL if there are no words. + */ +union node * +parsewordexp(void) +{ + union node *n, *first = NULL, **pnext; + int t; + + /* This assumes the parser is not re-entered, + * which could happen if we add command substitution on PS1/PS2. + */ + parser_temp_free_all(); + heredoclist = NULL; + + tokpushback = 0; + checkkwd = 0; + doprompt = 0; + setprompt(0); + needprompt = 0; + pnext = &first; + while ((t = readtoken()) != TEOF) { + if (t != TWORD) + synexpect(TWORD); + n = makename(); + *pnext = n; + pnext = &n->narg.next; + } + return first; +} + + static union node * list(int nlflag) { union node *ntop, *n1, *n2, *n3; int tok; checkkwd = CHKNL | CHKKWD | CHKALIAS; if (!nlflag && tokendlist[peektoken()]) return NULL; ntop = n1 = NULL; for (;;) { n2 = andor(); tok = readtoken(); if (tok == TBACKGND) { if (n2 != NULL && n2->type == NPIPE) { n2->npipe.backgnd = 1; } else if (n2 != NULL && n2->type == NREDIR) { n2->type = NBACKGND; } else { n3 = (union node *)stalloc(sizeof (struct nredir)); n3->type = NBACKGND; n3->nredir.n = n2; n3->nredir.redirect = NULL; n2 = n3; } } if (ntop == NULL) ntop = n2; else if (n1 == NULL) { n1 = makebinary(NSEMI, ntop, n2); ntop = n1; } else { n3 = makebinary(NSEMI, n1->nbinary.ch2, n2); n1->nbinary.ch2 = n3; n1 = n3; } switch (tok) { case TBACKGND: case TSEMI: tok = readtoken(); /* FALLTHROUGH */ case TNL: if (tok == TNL) { parseheredoc(); if (nlflag) return ntop; } else if (tok == TEOF && nlflag) { parseheredoc(); return ntop; } else { tokpushback++; } checkkwd = CHKNL | CHKKWD | CHKALIAS; if (!nlflag && tokendlist[peektoken()]) return ntop; break; case TEOF: if (heredoclist) parseheredoc(); else pungetc(); /* push back EOF on input */ return ntop; default: if (nlflag) synexpect(-1); tokpushback++; return ntop; } } } static union node * andor(void) { union node *n; int t; n = pipeline(); for (;;) { if ((t = readtoken()) == TAND) { t = NAND; } else if (t == TOR) { t = NOR; } else { tokpushback++; return n; } n = makebinary(t, n, pipeline()); } } static union node * pipeline(void) { union node *n1, *n2, *pipenode; struct nodelist *lp, *prev; int negate, t; negate = 0; checkkwd = CHKNL | CHKKWD | CHKALIAS; TRACE(("pipeline: entered\n")); while (readtoken() == TNOT) negate = !negate; tokpushback++; n1 = command(); if (readtoken() == TPIPE) { pipenode = (union node *)stalloc(sizeof (struct npipe)); pipenode->type = NPIPE; pipenode->npipe.backgnd = 0; lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); pipenode->npipe.cmdlist = lp; lp->n = n1; do { prev = lp; lp = (struct nodelist *)stalloc(sizeof (struct nodelist)); checkkwd = CHKNL | CHKKWD | CHKALIAS; t = readtoken(); tokpushback++; if (t == TNOT) lp->n = pipeline(); else lp->n = command(); prev->next = lp; } while (readtoken() == TPIPE); lp->next = NULL; n1 = pipenode; } tokpushback++; if (negate) { n2 = (union node *)stalloc(sizeof (struct nnot)); n2->type = NNOT; n2->nnot.com = n1; return n2; } else return n1; } static union node * command(void) { union node *n1, *n2; union node *ap, **app; union node *cp, **cpp; union node *redir, **rpp; int t; int is_subshell; checkkwd = CHKNL | CHKKWD | CHKALIAS; is_subshell = 0; redir = NULL; n1 = NULL; rpp = &redir; /* Check for redirection which may precede command */ while (readtoken() == TREDIR) { *rpp = n2 = redirnode; rpp = &n2->nfile.next; parsefname(); } tokpushback++; switch (readtoken()) { case TIF: n1 = (union node *)stalloc(sizeof (struct nif)); n1->type = NIF; if ((n1->nif.test = list(0)) == NULL) synexpect(-1); consumetoken(TTHEN); n1->nif.ifpart = list(0); n2 = n1; while (readtoken() == TELIF) { n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif)); n2 = n2->nif.elsepart; n2->type = NIF; if ((n2->nif.test = list(0)) == NULL) synexpect(-1); consumetoken(TTHEN); n2->nif.ifpart = list(0); } if (lasttoken == TELSE) n2->nif.elsepart = list(0); else { n2->nif.elsepart = NULL; tokpushback++; } consumetoken(TFI); checkkwd = CHKKWD | CHKALIAS; break; case TWHILE: case TUNTIL: t = lasttoken; if ((n1 = list(0)) == NULL) synexpect(-1); consumetoken(TDO); n1 = makebinary((t == TWHILE)? NWHILE : NUNTIL, n1, list(0)); consumetoken(TDONE); checkkwd = CHKKWD | CHKALIAS; break; case TFOR: if (readtoken() != TWORD || quoteflag || ! goodname(wordtext)) synerror("Bad for loop variable"); n1 = (union node *)stalloc(sizeof (struct nfor)); n1->type = NFOR; n1->nfor.var = wordtext; while (readtoken() == TNL) ; if (lasttoken == TWORD && ! quoteflag && equal(wordtext, "in")) { app = ≈ while (readtoken() == TWORD) { n2 = makename(); *app = n2; app = &n2->narg.next; } *app = NULL; n1->nfor.args = ap; if (lasttoken != TNL && lasttoken != TSEMI) synexpect(-1); } else { static char argvars[5] = { CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0' }; n2 = (union node *)stalloc(sizeof (struct narg)); n2->type = NARG; n2->narg.text = argvars; n2->narg.backquote = NULL; n2->narg.next = NULL; n1->nfor.args = n2; /* * Newline or semicolon here is optional (but note * that the original Bourne shell only allowed NL). */ if (lasttoken != TNL && lasttoken != TSEMI) tokpushback++; } checkkwd = CHKNL | CHKKWD | CHKALIAS; if ((t = readtoken()) == TDO) t = TDONE; else if (t == TBEGIN) t = TEND; else synexpect(-1); n1->nfor.body = list(0); consumetoken(t); checkkwd = CHKKWD | CHKALIAS; break; case TCASE: n1 = (union node *)stalloc(sizeof (struct ncase)); n1->type = NCASE; consumetoken(TWORD); n1->ncase.expr = makename(); while (readtoken() == TNL); if (lasttoken != TWORD || ! equal(wordtext, "in")) synerror("expecting \"in\""); cpp = &n1->ncase.cases; checkkwd = CHKNL | CHKKWD, readtoken(); while (lasttoken != TESAC) { *cpp = cp = (union node *)stalloc(sizeof (struct nclist)); cp->type = NCLIST; app = &cp->nclist.pattern; if (lasttoken == TLP) readtoken(); for (;;) { *app = ap = makename(); checkkwd = CHKNL | CHKKWD; if (readtoken() != TPIPE) break; app = &ap->narg.next; readtoken(); } ap->narg.next = NULL; if (lasttoken != TRP) synexpect(TRP); cp->nclist.body = list(0); checkkwd = CHKNL | CHKKWD | CHKALIAS; if ((t = readtoken()) != TESAC) { if (t == TENDCASE) ; else if (t == TFALLTHRU) cp->type = NCLISTFALLTHRU; else synexpect(TENDCASE); checkkwd = CHKNL | CHKKWD, readtoken(); } cpp = &cp->nclist.next; } *cpp = NULL; checkkwd = CHKKWD | CHKALIAS; break; case TLP: n1 = (union node *)stalloc(sizeof (struct nredir)); n1->type = NSUBSHELL; n1->nredir.n = list(0); n1->nredir.redirect = NULL; consumetoken(TRP); checkkwd = CHKKWD | CHKALIAS; is_subshell = 1; break; case TBEGIN: n1 = list(0); consumetoken(TEND); checkkwd = CHKKWD | CHKALIAS; break; /* A simple command must have at least one redirection or word. */ case TBACKGND: case TSEMI: case TAND: case TOR: case TPIPE: case TENDCASE: case TFALLTHRU: case TEOF: case TNL: case TRP: if (!redir) synexpect(-1); case TWORD: tokpushback++; n1 = simplecmd(rpp, redir); return n1; default: synexpect(-1); } /* Now check for redirection which may follow command */ while (readtoken() == TREDIR) { *rpp = n2 = redirnode; rpp = &n2->nfile.next; parsefname(); } tokpushback++; *rpp = NULL; if (redir) { if (!is_subshell) { n2 = (union node *)stalloc(sizeof (struct nredir)); n2->type = NREDIR; n2->nredir.n = n1; n1 = n2; } n1->nredir.redirect = redir; } return n1; } static union node * simplecmd(union node **rpp, union node *redir) { union node *args, **app; union node **orig_rpp = rpp; union node *n = NULL; int special; int savecheckkwd; /* If we don't have any redirections already, then we must reset */ /* rpp to be the address of the local redir variable. */ if (redir == 0) rpp = &redir; args = NULL; app = &args; /* * We save the incoming value, because we need this for shell * functions. There can not be a redirect or an argument between * the function name and the open parenthesis. */ orig_rpp = rpp; savecheckkwd = CHKALIAS; for (;;) { checkkwd = savecheckkwd; if (readtoken() == TWORD) { n = makename(); *app = n; app = &n->narg.next; if (savecheckkwd != 0 && !isassignment(wordtext)) savecheckkwd = 0; } else if (lasttoken == TREDIR) { *rpp = n = redirnode; rpp = &n->nfile.next; parsefname(); /* read name of redirection file */ } else if (lasttoken == TLP && app == &args->narg.next && rpp == orig_rpp) { /* We have a function */ consumetoken(TRP); funclinno = plinno; /* * - Require plain text. * - Functions with '/' cannot be called. * - Reject name=(). * - Reject ksh extended glob patterns. */ if (!noexpand(n->narg.text) || quoteflag || strchr(n->narg.text, '/') || strchr("!%*+-=?@}~", n->narg.text[strlen(n->narg.text) - 1])) synerror("Bad function name"); rmescapes(n->narg.text); if (find_builtin(n->narg.text, &special) >= 0 && special) synerror("Cannot override a special builtin with a function"); n->type = NDEFUN; n->narg.next = command(); funclinno = 0; return n; } else { tokpushback++; break; } } *app = NULL; *rpp = NULL; n = (union node *)stalloc(sizeof (struct ncmd)); n->type = NCMD; n->ncmd.args = args; n->ncmd.redirect = redir; return n; } static union node * makename(void) { union node *n; n = (union node *)stalloc(sizeof (struct narg)); n->type = NARG; n->narg.next = NULL; n->narg.text = wordtext; n->narg.backquote = backquotelist; return n; } static union node * makebinary(int type, union node *n1, union node *n2) { union node *n; n = (union node *)stalloc(sizeof (struct nbinary)); n->type = type; n->nbinary.ch1 = n1; n->nbinary.ch2 = n2; return (n); } void forcealias(void) { checkkwd |= CHKALIAS; } void fixredir(union node *n, const char *text, int err) { TRACE(("Fix redir %s %d\n", text, err)); if (!err) n->ndup.vname = NULL; if (is_digit(text[0]) && text[1] == '\0') n->ndup.dupfd = digit_val(text[0]); else if (text[0] == '-' && text[1] == '\0') n->ndup.dupfd = -1; else { if (err) synerror("Bad fd number"); else n->ndup.vname = makename(); } } static void parsefname(void) { union node *n = redirnode; consumetoken(TWORD); if (n->type == NHERE) { struct heredoc *here = heredoc; struct heredoc *p; if (quoteflag == 0) n->type = NXHERE; TRACE(("Here document %d\n", n->type)); if (here->striptabs) { while (*wordtext == '\t') wordtext++; } if (! noexpand(wordtext)) synerror("Illegal eof marker for << redirection"); rmescapes(wordtext); here->eofmark = wordtext; here->next = NULL; if (heredoclist == NULL) heredoclist = here; else { for (p = heredoclist ; p->next ; p = p->next); p->next = here; } } else if (n->type == NTOFD || n->type == NFROMFD) { fixredir(n, wordtext, 0); } else { n->nfile.fname = makename(); } } /* * Input any here documents. */ static void parseheredoc(void) { struct heredoc *here; union node *n; while (heredoclist) { here = heredoclist; heredoclist = here->next; if (needprompt) { setprompt(2); needprompt = 0; } readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX, here->eofmark, here->striptabs); n = makename(); here->here->nhere.doc = n; } } static int peektoken(void) { int t; t = readtoken(); tokpushback++; return (t); } static int readtoken(void) { int t; struct alias *ap; #ifdef DEBUG int alreadyseen = tokpushback; #endif top: t = xxreadtoken(); /* * eat newlines */ if (checkkwd & CHKNL) { while (t == TNL) { parseheredoc(); t = xxreadtoken(); } } /* * check for keywords and aliases */ if (t == TWORD && !quoteflag) { const char * const *pp; if (checkkwd & CHKKWD) for (pp = parsekwd; *pp; pp++) { if (**pp == *wordtext && equal(*pp, wordtext)) { lasttoken = t = pp - parsekwd + KWDOFFSET; TRACE(("keyword %s recognized\n", tokname[t])); goto out; } } if (checkkwd & CHKALIAS && (ap = lookupalias(wordtext, 1)) != NULL) { pushstring(ap->val, strlen(ap->val), ap); goto top; } } out: if (t != TNOT) checkkwd = 0; #ifdef DEBUG if (!alreadyseen) TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); else TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : "")); #endif return (t); } /* * Read the next input token. * If the token is a word, we set backquotelist to the list of cmds in * backquotes. We set quoteflag to true if any part of the word was * quoted. * If the token is TREDIR, then we set redirnode to a structure containing * the redirection. * In all cases, the variable startlinno is set to the number of the line * on which the token starts. * * [Change comment: here documents and internal procedures] * [Readtoken shouldn't have any arguments. Perhaps we should make the * word parsing code into a separate routine. In this case, readtoken * doesn't need to have any internal procedures, but parseword does. * We could also make parseoperator in essence the main routine, and * have parseword (readtoken1?) handle both words and redirection.] */ #define RETURN(token) return lasttoken = token static int xxreadtoken(void) { int c; if (tokpushback) { tokpushback = 0; return lasttoken; } if (needprompt) { setprompt(2); needprompt = 0; } startlinno = plinno; for (;;) { /* until token or start of word found */ c = pgetc_macro(); switch (c) { case ' ': case '\t': continue; case '#': while ((c = pgetc()) != '\n' && c != PEOF); pungetc(); continue; case '\\': if (pgetc() == '\n') { startlinno = ++plinno; if (doprompt) setprompt(2); else setprompt(0); continue; } pungetc(); /* FALLTHROUGH */ default: return readtoken1(c, BASESYNTAX, (char *)NULL, 0); case '\n': plinno++; needprompt = doprompt; RETURN(TNL); case PEOF: RETURN(TEOF); case '&': if (pgetc_linecont() == '&') RETURN(TAND); pungetc(); RETURN(TBACKGND); case '|': if (pgetc_linecont() == '|') RETURN(TOR); pungetc(); RETURN(TPIPE); case ';': c = pgetc_linecont(); if (c == ';') RETURN(TENDCASE); else if (c == '&') RETURN(TFALLTHRU); pungetc(); RETURN(TSEMI); case '(': RETURN(TLP); case ')': RETURN(TRP); } } #undef RETURN } #define MAXNEST_static 8 struct tokenstate { const char *syntax; /* *SYNTAX */ int parenlevel; /* levels of parentheses in arithmetic */ enum tokenstate_category { TSTATE_TOP, TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */ TSTATE_VAR_NEW, /* other ${var...}, own dquote state */ TSTATE_ARITH } category; }; /* * Check to see whether we are at the end of the here document. When this * is called, c is set to the first character of the next input line. If * we are at the end of the here document, this routine sets the c to PEOF. * The new value of c is returned. */ static int checkend(int c, const char *eofmark, int striptabs) { if (striptabs) { while (c == '\t') c = pgetc(); } if (c == *eofmark) { int c2; const char *q; for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++) ; if ((c2 == PEOF || c2 == '\n') && *q == '\0') { c = PEOF; if (c2 == '\n') { plinno++; needprompt = doprompt; } } else { pungetc(); pushstring(eofmark + 1, q - (eofmark + 1), NULL); } } else if (c == '\n' && *eofmark == '\0') { c = PEOF; plinno++; needprompt = doprompt; } return (c); } /* * Parse a redirection operator. The variable "out" points to a string * specifying the fd to be redirected. The variable "c" contains the * first character of the redirection operator. */ static void parseredir(char *out, int c) { char fd = *out; union node *np; np = (union node *)stalloc(sizeof (struct nfile)); if (c == '>') { np->nfile.fd = 1; c = pgetc_linecont(); if (c == '>') np->type = NAPPEND; else if (c == '&') np->type = NTOFD; else if (c == '|') np->type = NCLOBBER; else { np->type = NTO; pungetc(); } } else { /* c == '<' */ np->nfile.fd = 0; c = pgetc_linecont(); if (c == '<') { if (sizeof (struct nfile) != sizeof (struct nhere)) { np = (union node *)stalloc(sizeof (struct nhere)); np->nfile.fd = 0; } np->type = NHERE; heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc)); heredoc->here = np; if ((c = pgetc_linecont()) == '-') { heredoc->striptabs = 1; } else { heredoc->striptabs = 0; pungetc(); } } else if (c == '&') np->type = NFROMFD; else if (c == '>') np->type = NFROMTO; else { np->type = NFROM; pungetc(); } } if (fd != '\0') np->nfile.fd = digit_val(fd); redirnode = np; } /* * Called to parse command substitutions. */ static char * parsebackq(char *out, struct nodelist **pbqlist, int oldstyle, int dblquote, int quoted) { struct nodelist **nlpp; union node *n; char *volatile str; struct jmploc jmploc; struct jmploc *const savehandler = handler; size_t savelen; int saveprompt; const int bq_startlinno = plinno; char *volatile ostr = NULL; struct parsefile *const savetopfile = getcurrentfile(); struct heredoc *const saveheredoclist = heredoclist; struct heredoc *here; str = NULL; if (setjmp(jmploc.loc)) { popfilesupto(savetopfile); if (str) ckfree(str); if (ostr) ckfree(ostr); heredoclist = saveheredoclist; handler = savehandler; if (exception == EXERROR) { startlinno = bq_startlinno; synerror("Error in command substitution"); } longjmp(handler->loc, 1); } INTOFF; savelen = out - stackblock(); if (savelen > 0) { str = ckmalloc(savelen); memcpy(str, stackblock(), savelen); } handler = &jmploc; heredoclist = NULL; INTON; if (oldstyle) { /* We must read until the closing backquote, giving special treatment to some slashes, and then push the string and reread it as input, interpreting it normally. */ char *oout; int c; int olen; STARTSTACKSTR(oout); for (;;) { if (needprompt) { setprompt(2); needprompt = 0; } CHECKSTRSPACE(2, oout); c = pgetc_linecont(); if (c == '`') break; switch (c) { case '\\': c = pgetc(); if (c != '\\' && c != '`' && c != '$' && (!dblquote || c != '"')) USTPUTC('\\', oout); break; case '\n': plinno++; needprompt = doprompt; break; case PEOF: startlinno = plinno; synerror("EOF in backquote substitution"); break; default: break; } USTPUTC(c, oout); } USTPUTC('\0', oout); olen = oout - stackblock(); INTOFF; ostr = ckmalloc(olen); memcpy(ostr, stackblock(), olen); setinputstring(ostr, 1); INTON; } nlpp = pbqlist; while (*nlpp) nlpp = &(*nlpp)->next; *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist)); (*nlpp)->next = NULL; if (oldstyle) { saveprompt = doprompt; doprompt = 0; } n = list(0); if (oldstyle) { if (peektoken() != TEOF) synexpect(-1); doprompt = saveprompt; } else consumetoken(TRP); (*nlpp)->n = n; if (oldstyle) { /* * Start reading from old file again, ignoring any pushed back * tokens left from the backquote parsing */ popfile(); tokpushback = 0; } STARTSTACKSTR(out); CHECKSTRSPACE(savelen + 1, out); INTOFF; if (str) { memcpy(out, str, savelen); STADJUST(savelen, out); ckfree(str); str = NULL; } if (ostr) { ckfree(ostr); ostr = NULL; } here = saveheredoclist; if (here != NULL) { while (here->next != NULL) here = here->next; here->next = heredoclist; heredoclist = saveheredoclist; } handler = savehandler; INTON; if (quoted) USTPUTC(CTLBACKQ | CTLQUOTE, out); else USTPUTC(CTLBACKQ, out); return out; } /* * Called to parse a backslash escape sequence inside $'...'. * The backslash has already been read. */ static char * readcstyleesc(char *out) { int c, vc, i, n; unsigned int v; c = pgetc(); switch (c) { case '\0': synerror("Unterminated quoted string"); case '\n': plinno++; if (doprompt) setprompt(2); else setprompt(0); return out; case '\\': case '\'': case '"': v = c; break; case 'a': v = '\a'; break; case 'b': v = '\b'; break; case 'e': v = '\033'; break; case 'f': v = '\f'; break; case 'n': v = '\n'; break; case 'r': v = '\r'; break; case 't': v = '\t'; break; case 'v': v = '\v'; break; case 'x': v = 0; for (;;) { c = pgetc(); if (c >= '0' && c <= '9') v = (v << 4) + c - '0'; else if (c >= 'A' && c <= 'F') v = (v << 4) + c - 'A' + 10; else if (c >= 'a' && c <= 'f') v = (v << 4) + c - 'a' + 10; else break; } pungetc(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': v = c - '0'; c = pgetc(); if (c >= '0' && c <= '7') { v <<= 3; v += c - '0'; c = pgetc(); if (c >= '0' && c <= '7') { v <<= 3; v += c - '0'; } else pungetc(); } else pungetc(); break; case 'c': c = pgetc(); if (c < 0x3f || c > 0x7a || c == 0x60) synerror("Bad escape sequence"); if (c == '\\' && pgetc() != '\\') synerror("Bad escape sequence"); if (c == '?') v = 127; else v = c & 0x1f; break; case 'u': case 'U': n = c == 'U' ? 8 : 4; v = 0; for (i = 0; i < n; i++) { c = pgetc(); if (c >= '0' && c <= '9') v = (v << 4) + c - '0'; else if (c >= 'A' && c <= 'F') v = (v << 4) + c - 'A' + 10; else if (c >= 'a' && c <= 'f') v = (v << 4) + c - 'a' + 10; else synerror("Bad escape sequence"); } if (v == 0 || (v >= 0xd800 && v <= 0xdfff)) synerror("Bad escape sequence"); /* We really need iconv here. */ if (initial_localeisutf8 && v > 127) { CHECKSTRSPACE(4, out); /* * We cannot use wctomb() as the locale may have * changed. */ if (v <= 0x7ff) { USTPUTC(0xc0 | v >> 6, out); USTPUTC(0x80 | (v & 0x3f), out); return out; } else if (v <= 0xffff) { USTPUTC(0xe0 | v >> 12, out); USTPUTC(0x80 | ((v >> 6) & 0x3f), out); USTPUTC(0x80 | (v & 0x3f), out); return out; } else if (v <= 0x10ffff) { USTPUTC(0xf0 | v >> 18, out); USTPUTC(0x80 | ((v >> 12) & 0x3f), out); USTPUTC(0x80 | ((v >> 6) & 0x3f), out); USTPUTC(0x80 | (v & 0x3f), out); return out; } } if (v > 127) v = '?'; break; default: synerror("Bad escape sequence"); } vc = (char)v; /* * We can't handle NUL bytes. * POSIX says we should skip till the closing quote. */ if (vc == '\0') { while ((c = pgetc()) != '\'') { if (c == '\\') c = pgetc(); if (c == PEOF) synerror("Unterminated quoted string"); if (c == '\n') { plinno++; if (doprompt) setprompt(2); else setprompt(0); } } pungetc(); return out; } if (SQSYNTAX[vc] == CCTL) USTPUTC(CTLESC, out); USTPUTC(vc, out); return out; } /* * If eofmark is NULL, read a word or a redirection symbol. If eofmark * is not NULL, read a here document. In the latter case, eofmark is the * word which marks the end of the document and striptabs is true if * leading tabs should be stripped from the document. The argument firstc * is the first character of the input token or document. * * Because C does not have internal subroutines, I have simulated them * using goto's to implement the subroutine linkage. The following macros * will run code that appears at the end of readtoken1. */ #define PARSESUB() {goto parsesub; parsesub_return:;} #define PARSEARITH() {goto parsearith; parsearith_return:;} static int readtoken1(int firstc, char const *initialsyntax, const char *eofmark, int striptabs) { int c = firstc; char *out; int len; struct nodelist *bqlist; int quotef; int newvarnest; int level; int synentry; struct tokenstate state_static[MAXNEST_static]; int maxnest = MAXNEST_static; struct tokenstate *state = state_static; int sqiscstyle = 0; startlinno = plinno; quotef = 0; bqlist = NULL; newvarnest = 0; level = 0; state[level].syntax = initialsyntax; state[level].parenlevel = 0; state[level].category = TSTATE_TOP; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ if (eofmark && eofmark != NOEOFMARK) /* set c to PEOF if at end of here document */ c = checkend(c, eofmark, striptabs); for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ synentry = state[level].syntax[c]; switch(synentry) { case CNL: /* '\n' */ if (state[level].syntax == BASESYNTAX) goto endword; /* exit outer loop */ USTPUTC(c, out); plinno++; if (doprompt) setprompt(2); else setprompt(0); c = pgetc(); goto loop; /* continue outer loop */ case CSBACK: if (sqiscstyle) { out = readcstyleesc(out); break; } /* FALLTHROUGH */ case CWORD: USTPUTC(c, out); break; case CCTL: if (eofmark == NULL || initialsyntax != SQSYNTAX) USTPUTC(CTLESC, out); USTPUTC(c, out); break; case CBACK: /* backslash */ c = pgetc(); if (c == PEOF) { USTPUTC('\\', out); pungetc(); } else if (c == '\n') { plinno++; if (doprompt) setprompt(2); else setprompt(0); } else { if (state[level].syntax == DQSYNTAX && c != '\\' && c != '`' && c != '$' && (c != '"' || (eofmark != NULL && newvarnest == 0)) && (c != '}' || state[level].category != TSTATE_VAR_OLD)) USTPUTC('\\', out); if ((eofmark == NULL || newvarnest > 0) && state[level].syntax == BASESYNTAX) USTPUTC(CTLQUOTEMARK, out); if (SQSYNTAX[c] == CCTL) USTPUTC(CTLESC, out); USTPUTC(c, out); if ((eofmark == NULL || newvarnest > 0) && state[level].syntax == BASESYNTAX && state[level].category == TSTATE_VAR_OLD) USTPUTC(CTLQUOTEEND, out); quotef++; } break; case CSQUOTE: USTPUTC(CTLQUOTEMARK, out); state[level].syntax = SQSYNTAX; sqiscstyle = 0; break; case CDQUOTE: USTPUTC(CTLQUOTEMARK, out); state[level].syntax = DQSYNTAX; break; case CENDQUOTE: if (eofmark != NULL && newvarnest == 0) USTPUTC(c, out); else { if (state[level].category == TSTATE_VAR_OLD) USTPUTC(CTLQUOTEEND, out); state[level].syntax = BASESYNTAX; quotef++; } break; case CVAR: /* '$' */ PARSESUB(); /* parse substitution */ break; case CENDVAR: /* '}' */ if (level > 0 && ((state[level].category == TSTATE_VAR_OLD && state[level].syntax == state[level - 1].syntax) || (state[level].category == TSTATE_VAR_NEW && state[level].syntax == BASESYNTAX))) { if (state[level].category == TSTATE_VAR_NEW) newvarnest--; level--; USTPUTC(CTLENDVAR, out); } else { USTPUTC(c, out); } break; case CLP: /* '(' in arithmetic */ state[level].parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ if (state[level].parenlevel > 0) { USTPUTC(c, out); --state[level].parenlevel; } else { if (pgetc_linecont() == ')') { if (level > 0 && state[level].category == TSTATE_ARITH) { level--; USTPUTC(CTLENDARI, out); } else USTPUTC(')', out); } else { /* * unbalanced parens * (don't 2nd guess - no error) */ pungetc(); USTPUTC(')', out); } } break; case CBQUOTE: /* '`' */ out = parsebackq(out, &bqlist, 1, state[level].syntax == DQSYNTAX && (eofmark == NULL || newvarnest > 0), state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); break; case CEOF: goto endword; /* exit outer loop */ case CIGN: break; default: if (level == 0) goto endword; /* exit outer loop */ USTPUTC(c, out); } c = pgetc_macro(); } } endword: if (state[level].syntax == ARISYNTAX) synerror("Missing '))'"); if (state[level].syntax != BASESYNTAX && eofmark == NULL) synerror("Unterminated quoted string"); if (state[level].category == TSTATE_VAR_OLD || state[level].category == TSTATE_VAR_NEW) { startlinno = plinno; synerror("Missing '}'"); } if (state != state_static) parser_temp_free_upto(state); USTPUTC('\0', out); len = out - stackblock(); out = stackblock(); if (eofmark == NULL) { if ((c == '>' || c == '<') && quotef == 0 && len <= 2 && (*out == '\0' || is_digit(*out))) { parseredir(out, c); return lasttoken = TREDIR; } else { pungetc(); } } quoteflag = quotef; backquotelist = bqlist; grabstackblock(len); wordtext = out; return lasttoken = TWORD; /* end of readtoken routine */ /* * Parse a substitution. At this point, we have read the dollar sign * and nothing else. */ parsesub: { char buf[10]; int subtype; int typeloc; int flags; char *p; static const char types[] = "}-+?="; int bracketed_name = 0; /* used to handle ${[0-9]*} variables */ int linno; int length; int c1; c = pgetc_linecont(); if (c == '(') { /* $(command) or $((arith)) */ if (pgetc_linecont() == '(') { PARSEARITH(); } else { pungetc(); out = parsebackq(out, &bqlist, 0, state[level].syntax == DQSYNTAX && (eofmark == NULL || newvarnest > 0), state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); } } else if (c == '{' || is_name(c) || is_special(c)) { USTPUTC(CTLVAR, out); typeloc = out - stackblock(); USTPUTC(VSNORMAL, out); subtype = VSNORMAL; flags = 0; if (c == '{') { bracketed_name = 1; c = pgetc_linecont(); subtype = 0; } varname: if (!is_eof(c) && is_name(c)) { length = 0; do { STPUTC(c, out); c = pgetc_linecont(); length++; } while (!is_eof(c) && is_in_name(c)); if (length == 6 && strncmp(out - length, "LINENO", length) == 0) { /* Replace the variable name with the * current line number. */ linno = plinno; if (funclinno != 0) linno -= funclinno - 1; snprintf(buf, sizeof(buf), "%d", linno); STADJUST(-6, out); STPUTS(buf, out); flags |= VSLINENO; } } else if (is_digit(c)) { if (bracketed_name) { do { STPUTC(c, out); c = pgetc_linecont(); } while (is_digit(c)); } else { STPUTC(c, out); c = pgetc_linecont(); } } else if (is_special(c)) { c1 = c; c = pgetc_linecont(); if (subtype == 0 && c1 == '#') { subtype = VSLENGTH; if (strchr(types, c) == NULL && c != ':' && c != '#' && c != '%') goto varname; c1 = c; c = pgetc_linecont(); if (c1 != '}' && c == '}') { pungetc(); c = c1; goto varname; } pungetc(); c = c1; c1 = '#'; subtype = 0; } USTPUTC(c1, out); } else { subtype = VSERROR; if (c == '}') pungetc(); else if (c == '\n' || c == PEOF) synerror("Unexpected end of line in substitution"); else if (BASESYNTAX[c] != CCTL) USTPUTC(c, out); } if (subtype == 0) { switch (c) { case ':': flags |= VSNUL; c = pgetc_linecont(); /*FALLTHROUGH*/ default: p = strchr(types, c); if (p == NULL) { if (c == '\n' || c == PEOF) synerror("Unexpected end of line in substitution"); if (flags == VSNUL) STPUTC(':', out); if (BASESYNTAX[c] != CCTL) STPUTC(c, out); subtype = VSERROR; } else subtype = p - types + VSNORMAL; break; case '%': case '#': { int cc = c; subtype = c == '#' ? VSTRIMLEFT : VSTRIMRIGHT; c = pgetc_linecont(); if (c == cc) subtype++; else pungetc(); break; } } } else if (subtype != VSERROR) { if (subtype == VSLENGTH && c != '}') subtype = VSERROR; pungetc(); } STPUTC('=', out); if (state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX) flags |= VSQUOTE; *(stackblock() + typeloc) = subtype | flags; if (subtype != VSNORMAL) { if (level + 1 >= maxnest) { maxnest *= 2; if (state == state_static) { state = parser_temp_alloc( maxnest * sizeof(*state)); memcpy(state, state_static, MAXNEST_static * sizeof(*state)); } else state = parser_temp_realloc(state, maxnest * sizeof(*state)); } level++; state[level].parenlevel = 0; if (subtype == VSMINUS || subtype == VSPLUS || subtype == VSQUESTION || subtype == VSASSIGN) { /* * For operators that were in the Bourne shell, * inherit the double-quote state. */ state[level].syntax = state[level - 1].syntax; state[level].category = TSTATE_VAR_OLD; } else { /* * The other operators take a pattern, * so go to BASESYNTAX. * Also, ' and " are now special, even * in here documents. */ state[level].syntax = BASESYNTAX; state[level].category = TSTATE_VAR_NEW; newvarnest++; } } } else if (c == '\'' && state[level].syntax == BASESYNTAX) { /* $'cstylequotes' */ USTPUTC(CTLQUOTEMARK, out); state[level].syntax = SQSYNTAX; sqiscstyle = 1; } else { USTPUTC('$', out); pungetc(); } goto parsesub_return; } /* * Parse an arithmetic expansion (indicate start of one and set state) */ parsearith: { if (level + 1 >= maxnest) { maxnest *= 2; if (state == state_static) { state = parser_temp_alloc( maxnest * sizeof(*state)); memcpy(state, state_static, MAXNEST_static * sizeof(*state)); } else state = parser_temp_realloc(state, maxnest * sizeof(*state)); } level++; state[level].syntax = ARISYNTAX; state[level].parenlevel = 0; state[level].category = TSTATE_ARITH; USTPUTC(CTLARI, out); if (state[level - 1].syntax == DQSYNTAX) USTPUTC('"',out); else USTPUTC(' ',out); goto parsearith_return; } } /* end of readtoken */ /* * Returns true if the text contains nothing to expand (no dollar signs * or backquotes). */ static int noexpand(char *text) { char *p; char c; p = text; while ((c = *p++) != '\0') { if ( c == CTLQUOTEMARK) continue; if (c == CTLESC) p++; else if (BASESYNTAX[(int)c] == CCTL) return 0; } return 1; } /* * Return true if the argument is a legal variable name (a letter or * underscore followed by zero or more letters, underscores, and digits). */ int goodname(const char *name) { const char *p; p = name; if (! is_name(*p)) return 0; while (*++p) { if (! is_in_name(*p)) return 0; } return 1; } int isassignment(const char *p) { if (!is_name(*p)) return 0; p++; for (;;) { if (*p == '=') return 1; else if (!is_in_name(*p)) return 0; p++; } } static void consumetoken(int token) { if (readtoken() != token) synexpect(token); } /* * Called when an unexpected token is read during the parse. The argument * is the token that is expected, or -1 if more than one type of token can * occur at this point. */ static void synexpect(int token) { char msg[64]; if (token >= 0) { fmtstr(msg, 64, "%s unexpected (expecting %s)", tokname[lasttoken], tokname[token]); } else { fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]); } synerror(msg); } static void synerror(const char *msg) { if (commandname) outfmt(out2, "%s: %d: ", commandname, startlinno); else if (arg0) outfmt(out2, "%s: ", arg0); outfmt(out2, "Syntax error: %s\n", msg); error((char *)NULL); } static void setprompt(int which) { whichprompt = which; #ifndef NO_HISTORY if (!el) #endif { out2str(getprompt(NULL)); flushout(out2); } } static int pgetc_linecont(void) { int c; while ((c = pgetc_macro()) == '\\') { c = pgetc(); if (c == '\n') { plinno++; if (doprompt) setprompt(2); else setprompt(0); } else { pungetc(); /* Allow the backslash to be pushed back. */ pushstring("\\", 1, NULL); return (pgetc()); } } return (c); } /* * called by editline -- any expansions to the prompt * should be added here. */ char * getprompt(void *unused __unused) { static char ps[PROMPTLEN]; const char *fmt; const char *pwd; int i, trim; static char internal_error[] = "??"; /* * Select prompt format. */ switch (whichprompt) { case 0: fmt = ""; break; case 1: fmt = ps1val(); break; case 2: fmt = ps2val(); break; default: return internal_error; } /* * Format prompt string. */ for (i = 0; (i < 127) && (*fmt != '\0'); i++, fmt++) if (*fmt == '\\') switch (*++fmt) { /* * Hostname. * * \h specifies just the local hostname, * \H specifies fully-qualified hostname. */ case 'h': case 'H': ps[i] = '\0'; gethostname(&ps[i], PROMPTLEN - i); /* Skip to end of hostname. */ trim = (*fmt == 'h') ? '.' : '\0'; while ((ps[i+1] != '\0') && (ps[i+1] != trim)) i++; break; /* * Working directory. * * \W specifies just the final component, * \w specifies the entire path. */ case 'W': case 'w': pwd = lookupvar("PWD"); if (pwd == NULL) pwd = "?"; if (*fmt == 'W' && *pwd == '/' && pwd[1] != '\0') strlcpy(&ps[i], strrchr(pwd, '/') + 1, PROMPTLEN - i); else strlcpy(&ps[i], pwd, PROMPTLEN - i); /* Skip to end of path. */ while (ps[i + 1] != '\0') i++; break; /* * Superuser status. * * '$' for normal users, '#' for root. */ case '$': ps[i] = (geteuid() != 0) ? '$' : '#'; break; /* * A literal \. */ case '\\': ps[i] = '\\'; break; /* * Emit unrecognized formats verbatim. */ default: ps[i++] = '\\'; ps[i] = *fmt; break; } else ps[i] = *fmt; ps[i] = '\0'; return (ps); } const char * expandstr(const char *ps) { union node n; struct jmploc jmploc; struct jmploc *const savehandler = handler; const int saveprompt = doprompt; struct parsefile *const savetopfile = getcurrentfile(); struct parser_temp *const saveparser_temp = parser_temp; const char *result = NULL; if (!setjmp(jmploc.loc)) { handler = &jmploc; parser_temp = NULL; setinputstring(ps, 1); doprompt = 0; readtoken1(pgetc(), DQSYNTAX, NOEOFMARK, 0); if (backquotelist != NULL) error("Command substitution not allowed here"); n.narg.type = NARG; n.narg.next = NULL; n.narg.text = wordtext; n.narg.backquote = backquotelist; expandarg(&n, NULL, 0); result = stackblock(); INTOFF; } handler = savehandler; doprompt = saveprompt; popfilesupto(savetopfile); if (parser_temp != saveparser_temp) { parser_temp_free_all(); parser_temp = saveparser_temp; } if (result != NULL) { INTON; } else if (exception == EXINT) raise(SIGINT); return result; } Index: head/bin/sh/parser.h =================================================================== --- head/bin/sh/parser.h (revision 288429) +++ head/bin/sh/parser.h (revision 288430) @@ -1,84 +1,85 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Kenneth Almquist. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)parser.h 8.3 (Berkeley) 5/4/95 * $FreeBSD$ */ /* control characters in argument strings */ #define CTLESC '\300' #define CTLVAR '\301' #define CTLENDVAR '\371' #define CTLBACKQ '\372' #define CTLQUOTE 01 /* ored with CTLBACKQ code if in quotes */ /* CTLBACKQ | CTLQUOTE == '\373' */ #define CTLARI '\374' #define CTLENDARI '\375' #define CTLQUOTEMARK '\376' #define CTLQUOTEEND '\377' /* only for ${v+-...} */ /* variable substitution byte (follows CTLVAR) */ #define VSTYPE 0x0f /* type of variable substitution */ #define VSNUL 0x10 /* colon--treat the empty string as unset */ #define VSLINENO 0x20 /* expansion of $LINENO, the line number \ follows immediately */ #define VSQUOTE 0x80 /* inside double quotes--suppress splitting */ /* values of VSTYPE field */ #define VSNORMAL 0x1 /* normal variable: $var or ${var} */ #define VSMINUS 0x2 /* ${var-text} */ #define VSPLUS 0x3 /* ${var+text} */ #define VSQUESTION 0x4 /* ${var?message} */ #define VSASSIGN 0x5 /* ${var=text} */ #define VSTRIMLEFT 0x6 /* ${var#pattern} */ #define VSTRIMLEFTMAX 0x7 /* ${var##pattern} */ #define VSTRIMRIGHT 0x8 /* ${var%pattern} */ #define VSTRIMRIGHTMAX 0x9 /* ${var%%pattern} */ #define VSLENGTH 0xa /* ${#var} */ #define VSERROR 0xb /* Syntax error, issue when expanded */ /* * NEOF is returned by parsecmd when it encounters an end of file. It * must be distinct from NULL. */ #define NEOF ((union node *)-1) extern int whichprompt; /* 1 == PS1, 2 == PS2 */ extern const char *const parsekwd[]; union node *parsecmd(int); +union node *parsewordexp(void); void forcealias(void); void fixredir(union node *, const char *, int); int goodname(const char *); int isassignment(const char *); char *getprompt(void *); const char *expandstr(const char *); Index: head/lib/libc/gen/wordexp.3 =================================================================== --- head/lib/libc/gen/wordexp.3 (revision 288429) +++ head/lib/libc/gen/wordexp.3 (revision 288430) @@ -1,208 +1,210 @@ .\" .\" Copyright (c) 2002 Tim J. Robbins .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd August 18, 2015 +.Dd September 30, 2015 .Dt WORDEXP 3 .Os .Sh NAME .Nm wordexp .Nd "perform shell-style word expansions" .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In wordexp.h .Ft int .Fn wordexp "const char * restrict words" "wordexp_t * restrict we" "int flags" .Ft void .Fn wordfree "wordexp_t *we" .Sh DESCRIPTION The .Fn wordexp function performs shell-style word expansion on .Fa words and places the list of words into the .Va we_wordv member of .Fa we , and the number of words into .Va we_wordc . .Pp The .Fa flags argument is the bitwise inclusive OR of any of the following constants: .Bl -tag -width ".Dv WRDE_SHOWERR" .It Dv WRDE_APPEND Append the words to those generated by a previous call to .Fn wordexp . .It Dv WRDE_DOOFFS As many .Dv NULL pointers as are specified by the .Va we_offs member of .Fa we are added to the front of .Va we_wordv . .It Dv WRDE_NOCMD Disallow command substitution in .Fa words . See the note in .Sx BUGS before using this. .It Dv WRDE_REUSE The .Fa we argument was passed to a previous successful call to .Fn wordexp but has not been passed to .Fn wordfree . The implementation may reuse the space allocated to it. .It Dv WRDE_SHOWERR Do not redirect shell error messages to .Pa /dev/null . .It Dv WRDE_UNDEF Report error on an attempt to expand an undefined shell variable. .El .Pp The .Vt wordexp_t structure is defined in .In wordexp.h as: .Bd -literal -offset indent typedef struct { size_t we_wordc; /* count of words matched */ char **we_wordv; /* pointer to list of words */ size_t we_offs; /* slots to reserve in we_wordv */ } wordexp_t; .Ed .Pp The .Fn wordfree function frees the memory allocated by .Fn wordexp . .Sh IMPLEMENTATION NOTES The .Fn wordexp -function is implemented by executing -.Xr sh 1 . +function is implemented using the undocumented +.Ic freebsd_wordexp +shell built-in command. .Sh RETURN VALUES The .Fn wordexp function returns zero if successful, otherwise it returns one of the following error codes: .Bl -tag -width ".Dv WRDE_NOSPACE" .It Dv WRDE_BADCHAR The .Fa words argument contains one of the following unquoted characters: .Aq newline , .Ql | , .Ql & , .Ql \&; , .Ql < , .Ql > , .Ql \&( , .Ql \&) , .Ql { , .Ql } . .It Dv WRDE_BADVAL An error after successful parsing, such as an attempt to expand an undefined shell variable with .Dv WRDE_UNDEF set in .Fa flags . .It Dv WRDE_CMDSUB An attempt was made to use command substitution and .Dv WRDE_NOCMD is set in .Fa flags . .It Dv WRDE_NOSPACE Not enough memory to store the result or an error during .Xr fork 2 . .It Dv WRDE_SYNTAX Shell syntax error in .Fa words . .El .Pp The .Fn wordfree function returns no value. .Sh ENVIRONMENT .Bl -tag -width ".Ev IFS" .It Ev IFS Field separator. .El .Sh EXAMPLES Invoke the editor on all .Pa .c files in the current directory and .Pa /etc/motd (error checking omitted): .Bd -literal -offset indent wordexp_t we; wordexp("${EDITOR:-vi} *.c /etc/motd", &we, 0); execvp(we.we_wordv[0], we.we_wordv); .Ed .Sh DIAGNOSTICS Diagnostic messages from the shell are written to the standard error output if .Dv WRDE_SHOWERR is set in .Fa flags . .Sh SEE ALSO .Xr sh 1 , .Xr fnmatch 3 , .Xr glob 3 , .Xr popen 3 , .Xr system 3 .Sh STANDARDS The .Fn wordexp and .Fn wordfree functions conform to .St -p1003.1-2001 . .Sh BUGS -Do not pass untrusted user data to -.Fn wordexp , -regardless of whether the -.Dv WRDE_NOCMD -flag is set. -The -.Fn wordexp -function attempts to detect input that would cause commands to be -executed before passing it to the shell -but it does not use the same parser so it may be fooled. -.Pp The current .Fn wordexp implementation does not recognize multibyte characters other than UTF-8, since the shell (which it invokes to perform expansions) does not. +.Sh SECURITY CONSIDERATIONS +Pathname generation may create output that is exponentially larger than the +input size. +.Pp +Although this implementation detects command substitution reliably for +.Dv WRDE_NOCMD , +the attack surface remains fairly large. +Also, some other implementations +(such as older versions of this one) +may execute command substitutions even if +.Dv WRDE_NOCMD +is set. Index: head/lib/libc/gen/wordexp.c =================================================================== --- head/lib/libc/gen/wordexp.c (revision 288429) +++ head/lib/libc/gen/wordexp.c (revision 288430) @@ -1,358 +1,410 @@ /*- * Copyright (c) 2002 Tim J. Robbins. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "namespace.h" #include #include #include #include #include #include #include +#include #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" __FBSDID("$FreeBSD$"); static int we_askshell(const char *, wordexp_t *, int); -static int we_check(const char *, int); +static int we_check(const char *); /* * wordexp -- * Perform shell word expansion on `words' and place the resulting list * of words in `we'. See wordexp(3). * * Specified by IEEE Std. 1003.1-2001. */ int wordexp(const char * __restrict words, wordexp_t * __restrict we, int flags) { int error; if (flags & WRDE_REUSE) wordfree(we); if ((flags & WRDE_APPEND) == 0) { we->we_wordc = 0; we->we_wordv = NULL; we->we_strings = NULL; we->we_nbytes = 0; } - if ((error = we_check(words, flags)) != 0) { + if ((error = we_check(words)) != 0) { wordfree(we); return (error); } if ((error = we_askshell(words, we, flags)) != 0) { wordfree(we); return (error); } return (0); } static size_t we_read_fully(int fd, char *buffer, size_t len) { size_t done; ssize_t nread; done = 0; do { nread = _read(fd, buffer + done, len - done); if (nread == -1 && errno == EINTR) continue; if (nread <= 0) break; done += nread; } while (done != len); return done; } +static bool +we_write_fully(int fd, const char *buffer, size_t len) +{ + size_t done; + ssize_t nwritten; + + done = 0; + do { + nwritten = _write(fd, buffer + done, len - done); + if (nwritten == -1 && errno == EINTR) + continue; + if (nwritten <= 0) + return (false); + done += nwritten; + } while (done != len); + return (true); +} + /* * we_askshell -- - * Use the `wordexp' /bin/sh builtin function to do most of the work - * in expanding the word string. This function is complicated by + * Use the `freebsd_wordexp' /bin/sh builtin function to do most of the + * work in expanding the word string. This function is complicated by * memory management. */ static int we_askshell(const char *words, wordexp_t *we, int flags) { - int pdes[2]; /* Pipe to child */ - char buf[18]; /* Buffer for byte and word count */ + int pdesw[2]; /* Pipe for writing words */ + int pdes[2]; /* Pipe for reading output */ + char wfdstr[sizeof(int) * 3 + 1]; + char buf[35]; /* Buffer for byte and word count */ long nwords, nbytes; /* Number of words, bytes from child */ long i; /* Handy integer */ size_t sofs; /* Offset into we->we_strings */ size_t vofs; /* Offset into we->we_wordv */ pid_t pid; /* Process ID of child */ pid_t wpid; /* waitpid return value */ int status; /* Child exit status */ int error; /* Our return value */ int serrno; /* errno to return */ char *np, *p; /* Handy pointers */ char *nstrings; /* Temporary for realloc() */ char **nwv; /* Temporary for realloc() */ sigset_t newsigblock, oldsigblock; const char *ifs; - char save; serrno = errno; ifs = getenv("IFS"); - if (pipe2(pdes, O_CLOEXEC) < 0) + if (pipe2(pdesw, O_CLOEXEC) < 0) return (WRDE_NOSPACE); /* XXX */ + snprintf(wfdstr, sizeof(wfdstr), "%d", pdesw[0]); + if (pipe2(pdes, O_CLOEXEC) < 0) { + _close(pdesw[0]); + _close(pdesw[1]); + return (WRDE_NOSPACE); /* XXX */ + } (void)sigemptyset(&newsigblock); (void)sigaddset(&newsigblock, SIGCHLD); (void)__libc_sigprocmask(SIG_BLOCK, &newsigblock, &oldsigblock); if ((pid = fork()) < 0) { serrno = errno; + _close(pdesw[0]); + _close(pdesw[1]); _close(pdes[0]); _close(pdes[1]); (void)__libc_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); errno = serrno; return (WRDE_NOSPACE); /* XXX */ } else if (pid == 0) { /* * We are the child; make /bin/sh expand `words'. */ (void)__libc_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); if ((pdes[1] != STDOUT_FILENO ? _dup2(pdes[1], STDOUT_FILENO) : _fcntl(pdes[1], F_SETFD, 0)) < 0) _exit(1); + if (_fcntl(pdesw[0], F_SETFD, 0) < 0) + _exit(1); execl(_PATH_BSHELL, "sh", flags & WRDE_UNDEF ? "-u" : "+u", - "-c", "IFS=$1;eval \"$2\";eval \"echo;set -- $3\";" - "IFS=;a=\"$*\";printf '%08x' \"$#\" \"${#a}\";" - "printf '%s\\0' \"$@\"", + "-c", "IFS=$1;eval \"$2\";" + "freebsd_wordexp -f \"$3\" ${4:+\"$4\"}", "", ifs != NULL ? ifs : " \t\n", - flags & WRDE_SHOWERR ? "" : "exec 2>/dev/null", words, + flags & WRDE_SHOWERR ? "" : "exec 2>/dev/null", + wfdstr, + flags & WRDE_NOCMD ? "-p" : "", (char *)NULL); _exit(1); } /* - * We are the parent; read the output of the shell wordexp function, - * which is a byte indicating that the words were parsed successfully, - * a 32-bit hexadecimal word count, a 32-bit hexadecimal byte count - * (not including terminating null bytes), followed by the expanded - * words separated by nulls. + * We are the parent; write the words. */ _close(pdes[1]); - switch (we_read_fully(pdes[0], buf, 17)) { + _close(pdesw[0]); + if (!we_write_fully(pdesw[1], words, strlen(words))) { + _close(pdesw[1]); + error = WRDE_SYNTAX; + goto cleanup; + } + _close(pdesw[1]); + /* + * Read the output of the shell wordexp function, + * which is a byte indicating that the words were parsed successfully, + * a 64-bit hexadecimal word count, a dummy byte, a 64-bit hexadecimal + * byte count (not including terminating null bytes), followed by the + * expanded words separated by nulls. + */ + switch (we_read_fully(pdes[0], buf, 34)) { case 1: - error = WRDE_BADVAL; + error = buf[0] == 'C' ? WRDE_CMDSUB : WRDE_BADVAL; serrno = errno; goto cleanup; - case 17: + case 34: break; default: error = WRDE_SYNTAX; serrno = errno; goto cleanup; } - save = buf[9]; - buf[9] = '\0'; - nwords = strtol(buf + 1, NULL, 16); - buf[9] = save; buf[17] = '\0'; - nbytes = strtol(buf + 9, NULL, 16) + nwords; + nwords = strtol(buf + 1, NULL, 16); + buf[34] = '\0'; + nbytes = strtol(buf + 18, NULL, 16) + nwords; /* * Allocate or reallocate (when flags & WRDE_APPEND) the word vector * and string storage buffers for the expanded words we're about to * read from the child. */ sofs = we->we_nbytes; vofs = we->we_wordc; if ((flags & (WRDE_DOOFFS|WRDE_APPEND)) == (WRDE_DOOFFS|WRDE_APPEND)) vofs += we->we_offs; we->we_wordc += nwords; we->we_nbytes += nbytes; if ((nwv = realloc(we->we_wordv, (we->we_wordc + 1 + (flags & WRDE_DOOFFS ? we->we_offs : 0)) * sizeof(char *))) == NULL) { error = WRDE_NOSPACE; goto cleanup; } we->we_wordv = nwv; if ((nstrings = realloc(we->we_strings, we->we_nbytes)) == NULL) { error = WRDE_NOSPACE; goto cleanup; } for (i = 0; i < vofs; i++) if (we->we_wordv[i] != NULL) we->we_wordv[i] += nstrings - we->we_strings; we->we_strings = nstrings; if (we_read_fully(pdes[0], we->we_strings + sofs, nbytes) != nbytes) { error = WRDE_NOSPACE; /* abort for unknown reason */ serrno = errno; goto cleanup; } error = 0; cleanup: _close(pdes[0]); do wpid = _waitpid(pid, &status, 0); while (wpid < 0 && errno == EINTR); (void)__libc_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); if (error != 0) { errno = serrno; return (error); } if (wpid < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) return (WRDE_NOSPACE); /* abort for unknown reason */ /* * Break the null-terminated expanded word strings out into * the vector. */ if (vofs == 0 && flags & WRDE_DOOFFS) while (vofs < we->we_offs) we->we_wordv[vofs++] = NULL; p = we->we_strings + sofs; while (nwords-- != 0) { we->we_wordv[vofs++] = p; if ((np = memchr(p, '\0', nbytes)) == NULL) return (WRDE_NOSPACE); /* XXX */ nbytes -= np - p + 1; p = np + 1; } we->we_wordv[vofs] = NULL; return (0); } /* * we_check -- * Check that the string contains none of the following unquoted * special characters: |&;<>(){} - * or command substitutions when WRDE_NOCMD is set in flags. + * This mainly serves for {} which are normally legal in sh. + * It deliberately does not attempt to model full sh syntax. */ static int -we_check(const char *words, int flags) +we_check(const char *words) { char c; - int dquote, level, quote, squote; + /* Saw \ or $, possibly not special: */ + bool quote = false, dollar = false; + /* Saw ', ", ${, ` or $(, possibly not special: */ + bool have_sq = false, have_dq = false, have_par_begin = false; + bool have_cmd = false; + /* Definitely saw a ', ", ${, ` or $(, need a closing character: */ + bool need_sq = false, need_dq = false, need_par_end = false; + bool need_cmd_old = false, need_cmd_new = false; - quote = squote = dquote = 0; while ((c = *words++) != '\0') { switch (c) { case '\\': - if (squote == 0) - quote ^= 1; + quote = !quote; continue; + case '$': + if (quote) + quote = false; + else + dollar = !dollar; + continue; case '\'': - if (quote + dquote == 0) - squote ^= 1; + if (!quote && !have_sq && !have_dq) + need_sq = true; + else + need_sq = false; + have_sq = true; break; case '"': - if (quote + squote == 0) - dquote ^= 1; + if (!quote && !have_sq && !have_dq) + need_dq = true; + else + need_dq = false; + have_dq = true; break; case '`': - if (quote + squote == 0 && flags & WRDE_NOCMD) - return (WRDE_CMDSUB); - while ((c = *words++) != '\0' && c != '`') - if (c == '\\' && (c = *words++) == '\0') - break; - if (c == '\0') - return (WRDE_SYNTAX); + if (!quote && !have_sq && !have_cmd) + need_cmd_old = true; + else + need_cmd_old = false; + have_cmd = true; break; - case '|': case '&': case ';': case '<': case '>': - case '{': case '}': case '(': case ')': case '\n': - if (quote + squote + dquote == 0) + case '{': + if (!quote && !dollar && !have_sq && !have_dq && + !have_cmd) return (WRDE_BADCHAR); + if (dollar) { + if (!quote && !have_sq) + need_par_end = true; + have_par_begin = true; + } break; - case '$': - if ((c = *words++) == '\0') - break; - else if (quote + squote == 0 && c == '(') { - if (flags & WRDE_NOCMD && *words != '(') - return (WRDE_CMDSUB); - level = 1; - while ((c = *words++) != '\0') { - if (c == '\\') { - if ((c = *words++) == '\0') - break; - } else if (c == '(') - level++; - else if (c == ')' && --level == 0) - break; - } - if (c == '\0' || level != 0) - return (WRDE_SYNTAX); - } else if (quote + squote == 0 && c == '{') { - level = 1; - while ((c = *words++) != '\0') { - if (c == '\\') { - if ((c = *words++) == '\0') - break; - } else if (c == '{') - level++; - else if (c == '}' && --level == 0) - break; - } - if (c == '\0' || level != 0) - return (WRDE_SYNTAX); - } else - --words; + case '}': + if (!quote && !have_sq && !have_dq && !have_par_begin && + !have_cmd) + return (WRDE_BADCHAR); + need_par_end = false; break; + case '(': + if (!quote && !dollar && !have_sq && !have_dq && + !have_cmd) + return (WRDE_BADCHAR); + if (dollar) { + if (!quote && !have_sq) + need_cmd_new = true; + have_cmd = true; + } + break; + case ')': + if (!quote && !have_sq && !have_dq && !have_cmd) + return (WRDE_BADCHAR); + need_cmd_new = false; + break; + case '|': case '&': case ';': case '<': case '>': case '\n': + if (!quote && !have_sq && !have_dq && !have_cmd) + return (WRDE_BADCHAR); + break; default: break; } - quote = 0; + quote = dollar = false; } - if (quote + squote + dquote != 0) + if (quote || dollar || need_sq || need_dq || need_par_end || + need_cmd_old || need_cmd_new) return (WRDE_SYNTAX); return (0); } /* * wordfree -- * Free the result of wordexp(). See wordexp(3). * * Specified by IEEE Std. 1003.1-2001. */ void wordfree(wordexp_t *we) { if (we == NULL) return; free(we->we_wordv); free(we->we_strings); we->we_wordv = NULL; we->we_strings = NULL; we->we_nbytes = 0; we->we_wordc = 0; }