Changeset View
Standalone View
bin/sh/parser.c
| Show All 33 Lines | |||||
| #ifndef lint | #ifndef lint | ||||
| #if 0 | #if 0 | ||||
| static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; | static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; | ||||
| #endif | #endif | ||||
| #endif /* not lint */ | #endif /* not lint */ | ||||
| #include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
| #include <sys/param.h> | #include <sys/param.h> | ||||
| #include <ctype.h> | |||||
| #include <pwd.h> | #include <pwd.h> | ||||
| #include <stdbool.h> | |||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <time.h> | #include <time.h> | ||||
| #include "shell.h" | #include "shell.h" | ||||
| #include "parser.h" | #include "parser.h" | ||||
| #include "nodes.h" | #include "nodes.h" | ||||
| ▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | |||||
| static int readtoken1(int, const char *, const char *, int); | static int readtoken1(int, const char *, const char *, int); | ||||
| static int noexpand(char *); | static int noexpand(char *); | ||||
| static void consumetoken(int); | static void consumetoken(int); | ||||
| static void synexpect(int) __dead2; | static void synexpect(int) __dead2; | ||||
| static void synerror(const char *) __dead2; | static void synerror(const char *) __dead2; | ||||
| static void setprompt(int); | static void setprompt(int); | ||||
| static int pgetc_linecont(void); | static int pgetc_linecont(void); | ||||
| static void getusername(char *, size_t); | static void getusername(char *, size_t); | ||||
| static bool expandbrace(FILE *, const char *); | |||||
| static void * | static void * | ||||
| parser_temp_alloc(size_t len) | parser_temp_alloc(size_t len) | ||||
| { | { | ||||
| struct parser_temp *t; | struct parser_temp *t; | ||||
| INTOFF; | INTOFF; | ||||
| ▲ Show 20 Lines • Show All 672 Lines • ▼ Show 20 Lines | |||||
| { | { | ||||
| int t; | int t; | ||||
| t = readtoken(); | t = readtoken(); | ||||
| tokpushback++; | tokpushback++; | ||||
| return (t); | return (t); | ||||
| } | } | ||||
| static bool expandsequence(FILE *stream, const char prefix[static 1], const char *pattern) { | |||||
| char format[] = "%0.*d%.*s"; | |||||
| int start, end, step, i; | |||||
| const char *p, *close; | |||||
| char *portal, *endptr; | |||||
| const char *terminator = strchr(pattern, '\0'); | |||||
| int prefixlen = (int)(pattern - prefix - 1); | |||||
| int size = prefixlen + 32; | |||||
| int zeroes = 0; | |||||
| /* Parse start value (letter or number) */ | |||||
| if (isalpha(pattern[0])) { | |||||
jilles: The parser should use classification macros from syntax.c such as `is_alpha` and not the ones… | |||||
| start = pattern[0]; | |||||
| p = pattern + 1; | |||||
| } else if (isspace(pattern[0])) | |||||
| /* strtol accepts space here, brace expansion doesn't */ | |||||
| return false; | |||||
| else { | |||||
| zeroes = strspn(pattern, "0"); | |||||
| start = strtol(pattern + zeroes, &endptr, 10); | |||||
| if (pattern == endptr) | |||||
| return false; | |||||
| p = endptr; | |||||
| } | |||||
| /* Verify ".." separator */ | |||||
| if (*p++ != '.' || *p++ != '.') | |||||
| return false; | |||||
| /* Parse end value (must match type of start) */ | |||||
| if (isalpha(p[0])) { | |||||
| if (!isalpha(pattern[0])) | |||||
| return false; | |||||
| end = *p++; | |||||
| format[4] = 'c'; | |||||
| } else if (isspace(p[0])) | |||||
| return false; | |||||
| else { | |||||
| int z = strspn(p, "0"); | |||||
| if (z > zeroes) | |||||
| zeroes = z; | |||||
| close = p + z; | |||||
| end = strtol(close, &endptr, 10); | |||||
| if (close == endptr) | |||||
| return false; | |||||
| p = endptr; | |||||
| } | |||||
| /* Parse optional step value */ | |||||
| if (*p == '}') | |||||
| step = 1; | |||||
| else if (*p++ == '.' && *p++ == '.') { | |||||
| if (*p == '+' || *p == '-') | |||||
| p++; /* Direction will be determined by testing start > end */ | |||||
| if (!isdigit(*p)) | |||||
| return false; | |||||
| step = strtol(p, &endptr, 10); | |||||
| if (*endptr != '}') | |||||
| return false; | |||||
| p = endptr; | |||||
| } else | |||||
| return false; | |||||
| close = p; | |||||
| /* Each of the generated terms will start from the same prefix */ | |||||
| if (!(portal = malloc(size))) | |||||
| return false; | |||||
| snprintf(portal, size, "%.*s", prefixlen, prefix); | |||||
| size -= prefixlen; | |||||
| /* Generate sequence */ | |||||
| i = start; | |||||
| if (start > end) | |||||
| step = -step; | |||||
| goto first_iteration; | |||||
| while (start > end ? i >= end : i <= end) { | |||||
| if (isalpha(pattern[0]) && !isalpha(i)) | |||||
| break; | |||||
| fputc(' ', stream); | |||||
| first_iteration: | |||||
| /* Copy the generated term and the suffix */ | |||||
| snprintf(portal + prefixlen, size, format, | |||||
| zeroes + 1, i, (int)(terminator - close), close + 1); | |||||
| /* The string may be a brace expansion pattern too */ | |||||
| expandbrace(stream, portal); | |||||
| i += step; | |||||
| } | |||||
| free(portal); | |||||
| return true; | |||||
| } | |||||
| static bool expandbrace(FILE *stream, const char pattern[static 1]) { | |||||
| const char *close, *separator, *last; | |||||
| int prefixlen; | |||||
| char *portal; | |||||
| const char *open = pattern; | |||||
| const char *end = strchr(pattern, '\0'); | |||||
| int size = end - pattern; | |||||
| /* Unescaped braces and one of the separators are required */ | |||||
| if (open && open[0] != '{') | |||||
| while ((open = strchr(open + 1, '{'))) { | |||||
| if (open[-1] == '\\') | |||||
| continue; | |||||
| if (open[-1] != '$') | |||||
| break; | |||||
jillesUnsubmitted Not Done Inline ActionsThe code in readtoken1 has already replaced quoting characters and dollar signs with CTL sequences at this point. Any backslashes and dollar signs left are literals. jilles: The code in `readtoken1` has already replaced quoting characters and dollar signs with `CTL`… | |||||
| /* '${' restarts the search for '{' from the closest '}' */ | |||||
| if (!(open = strchr(open + 1, '}'))) | |||||
| break; | |||||
| } | |||||
| /* When checking for sequence, don't go past the first closing brace */ | |||||
| if (open && (separator = strpbrk(open + 1, ".}"))) | |||||
| if (separator[0] == '.' && separator[1] == '.') | |||||
| if (expandsequence(stream, pattern, open + 1)) | |||||
| return true; | |||||
| /* If not a sequence, the comma is required */ | |||||
| if ((separator = open)) | |||||
| while ((separator = strchr(separator + 1, ','))) | |||||
| if (separator[-1] != '\\') | |||||
| break; | |||||
| /* Only look for closing brace once the comma has been found */ | |||||
| if ((close = separator)) | |||||
| while ((close = strchr(close + 1, '}'))) | |||||
| if (close[-1] != '\\') | |||||
| break; | |||||
| /* Not a brace expansion pattern, treat literally */ | |||||
| if (!close || !(portal = malloc(size))) { | |||||
jillesUnsubmitted Not Done Inline ActionsOut of memory should fail the parse, not continue without brace expansion. Also, keep in mind the SIGINT handler that calls longjmp if not disabled. Since this is a temporary allocation, the parser_temp mechanism seems suitable to fix both problems. It will throw an exception (longjmp) if the allocation fails. jilles: Out of memory should fail the parse, not continue without brace expansion. Also, keep in mind… | |||||
| fputs(pattern, stream); | |||||
| return false; | |||||
| } | |||||
| /* Each of the generated alternatives will start from the same prefix */ | |||||
| prefixlen = snprintf(portal, size, "%.*s", | |||||
| (int)(open - pattern), pattern); | |||||
| size -= prefixlen; | |||||
| /* Generate alternatives */ | |||||
| last = open + 1; | |||||
| goto first_iteration; | |||||
| for (;;) { | |||||
| fputc(' ', stream); | |||||
| first_iteration: | |||||
| /* Copy the generated alternative and the suffix */ | |||||
| snprintf(portal + prefixlen, size, "%.*s%.*s", | |||||
| (int)((separator ? separator : close) - last), last, | |||||
| (int)(end - close), close + 1); | |||||
| /* The string may be a brace expansion pattern too */ | |||||
| expandbrace(stream, portal); | |||||
| /* Find the next alternative */ | |||||
| if (!separator) | |||||
| break; | |||||
| last = separator + 1; | |||||
| separator = memchr(last, ',', close - separator - 1); | |||||
| } | |||||
| free(portal); | |||||
| return true; | |||||
| } | |||||
| static int | static int | ||||
| readtoken(void) | readtoken(void) | ||||
| { | { | ||||
| int t; | int t; | ||||
| struct alias *ap; | struct alias *ap; | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| int alreadyseen = tokpushback; | int alreadyseen = tokpushback; | ||||
| #endif | #endif | ||||
| Show All 26 Lines | if (checkkwd & CHKKWD) | ||||
| TRACE(("keyword %s recognized\n", tokname[t])); | TRACE(("keyword %s recognized\n", tokname[t])); | ||||
| goto out; | goto out; | ||||
| } | } | ||||
| } | } | ||||
| if (checkkwd & CHKALIAS && | if (checkkwd & CHKALIAS && | ||||
| (ap = lookupalias(wordtext, 1)) != NULL) { | (ap = lookupalias(wordtext, 1)) != NULL) { | ||||
| pushstring(ap->val, strlen(ap->val), ap); | pushstring(ap->val, strlen(ap->val), ap); | ||||
| goto top; | goto top; | ||||
| } | |||||
| if (braceexpandflag && wordtext != NULL) { | |||||
jillesUnsubmitted Not Done Inline ActionsThe !quoteflag above means that the code will not do brace expansion if there are any quoting characters in the word, which does not match other shells. Fixing this may not be so easy. jilles: The `!quoteflag` above means that the code will not do brace expansion if there are any quoting… | |||||
jillesUnsubmitted Not Done Inline Actionswordtext can't be null for a TWORD, so it should not be checked. Instead, a check for a { seems appropriate, so as to avoid a memstream allocation for every word. jilles: `wordtext` can't be null for a `TWORD`, so it should not be checked.
Instead, a check for a… | |||||
| FILE *memstream; | |||||
| char *buf; | |||||
| size_t len; | |||||
| bool expanded; | |||||
| if ((memstream = open_memstream(&buf, &len)) == NULL) | |||||
| goto out; | |||||
| expanded = expandbrace(memstream, wordtext); | |||||
| fclose(memstream); | |||||
| if (expanded) { | |||||
| pushstring(buf, len, NULL); | |||||
jillesUnsubmitted Not Done Inline ActionsReprocessing the intermediate representation seems difficult to get right, since there is no obvious way to go back to a shell language word from the intermediate representation. Also, the memory allocated by the memstream is leaked in both expanded and !expanded cases. jilles: Reprocessing the intermediate representation seems difficult to get right, since there is no… | |||||
| goto top; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| out: | out: | ||||
| if (t != TNOT) | if (t != TNOT) | ||||
| checkkwd = 0; | checkkwd = 0; | ||||
| #ifdef DEBUG | #ifdef DEBUG | ||||
| if (!alreadyseen) | if (!alreadyseen) | ||||
| ▲ Show 20 Lines • Show All 1,408 Lines • Show Last 20 Lines | |||||
The parser should use classification macros from syntax.c such as is_alpha and not the ones from ctype.h, so it is not inappropriately affected by locales. Where the parser depends on the locale, this should be the locale that was in effect when the shell started (e.g. initial_localeisutf8), so there are no weird dependencies on the parse/execute split.
By the way, isalpha requires a char to be cast to unsigned char first, but is_alpha does not.