Index: args.c =================================================================== --- args.c +++ args.c @@ -74,8 +74,12 @@ static void scan_profile(FILE *); +#define KEY_FILE 5 /* only used for args */ + const char *option_source = "?"; +void add_typedefs_from_file(const char *str); + /* * N.B.: because of the way the table here is scanned, options whose names are * substrings of other options must occur later; that is, with -lp vs -l, -lp @@ -91,6 +95,7 @@ } pro[] = { {"T", PRO_SPECIAL, 0, KEY, 0}, + {"U", PRO_SPECIAL, 0, KEY_FILE, 0}, {"bacc", PRO_BOOL, false, ON, &blanklines_around_conditional_compilation}, {"badp", PRO_BOOL, false, ON, &blanklines_after_declarations_at_proctop}, {"bad", PRO_BOOL, false, ON, &blanklines_after_declarations}, @@ -147,6 +152,7 @@ {"npro", PRO_SPECIAL, 0, IGN, 0}, {"npsl", PRO_BOOL, true, OFF, &procnames_start_line}, {"nps", PRO_BOOL, false, OFF, &pointer_as_binop}, + {"nsac", PRO_BOOL, false, OFF, &space_after_cast}, {"nsc", PRO_BOOL, true, OFF, &star_comment_cont}, {"nsob", PRO_BOOL, false, OFF, &swallow_optional_blanklines}, {"nut", PRO_BOOL, true, OFF, &use_tabs}, @@ -154,6 +160,7 @@ {"pcs", PRO_BOOL, false, ON, &proc_calls_space}, {"psl", PRO_BOOL, true, ON, &procnames_start_line}, {"ps", PRO_BOOL, false, ON, &pointer_as_binop}, + {"sac", PRO_BOOL, false, ON, &space_after_cast}, {"sc", PRO_BOOL, true, ON, &star_comment_cont}, {"sob", PRO_BOOL, false, ON, &swallow_optional_blanklines}, {"st", PRO_SPECIAL, 0, STDIN, 0}, @@ -293,10 +300,16 @@ char *str = strdup(param_start); if (str == NULL) err(1, NULL); - addkey(str, 4); + add_typename(str); } break; + case KEY_FILE: + if (*param_start == 0) + goto need_param; + add_typedefs_from_file(param_start); + break; + default: errx(1, "set_option: internal error: p_special %d", p->p_special); } @@ -325,3 +338,26 @@ errx(1, "set_option: internal error: p_type %d", p->p_type); } } + + +void +add_typedefs_from_file(const char *str) +{ + FILE *file; + char line[BUFSIZ]; + char *copy; + + if ((file = fopen(param_start, "r")) == NULL) { + fprintf(stderr, "indent: cannot open file %s\n", str); + exit(1); + } + while ((fgets(line, BUFSIZ, file)) != NULL) { + /* Remove trailing whitespace */ + line[strcspn(line, " \t\n\r")] = '\0'; + if ((copy = strdup(line)) == NULL) { + err(1, NULL); + } + add_typename(copy); + } + fclose(file); +} Index: indent.h =================================================================== --- indent.h +++ indent.h @@ -28,10 +28,12 @@ __FBSDID("$FreeBSD: head/usr.bin/indent/indent.h 93440 2002-03-30 17:10:20Z dwmalone $"); #endif -void addkey(char *, int); +void add_typename(const char *); +void alloc_typenames(void); int compute_code_target(void); int compute_label_target(void); int count_spaces(int, char *); +int count_spaces_until(int, char *, char *); int lexi(void); void diag2(int, const char *); void diag3(int, const char *, int); Index: indent.c =================================================================== --- indent.c +++ indent.c @@ -63,6 +63,7 @@ #include "indent.h" static void bakcopy(void); +static void indent_declaration(int, int); const char *in_name = "Standard Input"; /* will always point to name of input * file */ @@ -118,6 +119,7 @@ tokenbuf = (char *) malloc(bufsize); if (tokenbuf == NULL) err(1, NULL); + alloc_typenames(); l_com = combuf + bufsize - 5; l_lab = labbuf + bufsize - 5; l_code = codebuf + bufsize - 5; @@ -508,28 +510,29 @@ ++ps.p_l_follow; /* count parens to make Healy happy */ if (ps.want_blank && *token != '[' && (ps.last_token != ident || proc_calls_space - || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) + /* offsetof (1) is never allowed a space; sizeof (2) gets + * one iff -bs; all other keywords (>2) always get a space + * before lparen */ + || (ps.keyword + Bill_Shannon > 2))) *e_code++ = ' '; - if (ps.in_decl && !ps.block_init) - if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { - ps.dumped_decl_indent = 1; + ps.want_blank = false; + if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && !is_procname) { + /* function pointer declarations */ + if (troff) { sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); e_code += strlen(e_code); } else { - while ((e_code - s_code) < dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } - *e_code++ = token[0]; + indent_declaration(dec_ind, tabs_to_var); } - else + ps.dumped_decl_indent = true; + } + if (!troff) *e_code++ = token[0]; ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent && ps.paren_indents[0] < 2 * ps.ind_size) ps.paren_indents[0] = 2 * ps.ind_size; - ps.want_blank = false; if (ps.in_or_st && *token == '(' && ps.tos <= 2) { /* * this is a kluge to make sure that declarations will be @@ -540,19 +543,21 @@ ps.in_or_st = false; /* turn off flag for structure decl or * initialization */ } - if (ps.sizeof_keyword) - ps.sizeof_mask |= 1 << ps.p_l_follow; + /* parenthesized type following sizeof or offsetof is not a cast */ + if (ps.keyword == 1 || ps.keyword == 2) + ps.not_cast_mask |= 1 << ps.p_l_follow; break; case rparen: /* got a ')' or ']' */ rparen_count--; - if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { + if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { ps.last_u_d = true; ps.cast_mask &= (1 << ps.p_l_follow) - 1; - ps.want_blank = false; - } else + ps.want_blank = space_after_cast; + } + else ps.want_blank = true; - ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; + ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; if (--ps.p_l_follow < 0) { ps.p_l_follow = 0; diag3(0, "Extra %c", *token); @@ -580,27 +585,28 @@ break; case unary_op: /* this could be any unary operation */ - if (ps.want_blank) - *e_code++ = ' '; - - if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { - sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); - ps.dumped_decl_indent = 1; - e_code += strlen(e_code); + if (!ps.dumped_decl_indent && ps.in_decl && !is_procname && !ps.block_init) { + /* pointer declarations */ + if (troff) { + if (ps.want_blank) + *e_code++ = ' '; + sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); + e_code += strlen(e_code); + } + else { + /* if this is a unary op in a declaration, we should indent + * this token */ + for (i = 0; token[i]; ++i) + /* find length of token */; + indent_declaration(dec_ind - i, tabs_to_var); + } + ps.dumped_decl_indent = true; } - else { + else if (ps.want_blank) + *e_code++ = ' '; + { const char *res = token; - if (ps.in_decl && !ps.block_init) { /* if this is a unary op - * in a declaration, we - * should indent this - * token */ - for (i = 0; token[i]; ++i); /* find length of token */ - while ((e_code - s_code) < (dec_ind - i)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; /* pad it */ - } - } if (troff && token[0] == '-' && token[1] == '>') res = "\\(->"; for (t_ptr = res; *t_ptr; ++t_ptr) { @@ -699,23 +705,25 @@ break; case semicolon: /* got a ';' */ - ps.in_or_st = false;/* we are not in an initialization or - * structure declaration */ + if (ps.dec_nest == 0) { + /* we are not in an initialization or structure declaration */ + ps.in_or_st = false; + } scase = false; /* these will only need resetting in an error */ squest = 0; if (ps.last_token == rparen && rparen_count == 0) ps.in_parameter_declaration = 0; ps.cast_mask = 0; - ps.sizeof_mask = 0; + ps.not_cast_mask = 0; ps.block_init = 0; ps.block_init_level = 0; ps.just_saw_decl--; - if (ps.in_decl && s_code == e_code && !ps.block_init) - while ((e_code - s_code) < (dec_ind - 1)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } + if (ps.in_decl && s_code == e_code && !ps.block_init && !ps.dumped_decl_indent) { + /* indent stray semicolons in declarations */ + indent_declaration(dec_ind - 1, tabs_to_var); + ps.dumped_decl_indent = true; + } ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level * structure declaration, we @@ -929,58 +937,28 @@ if (ps.in_decl) { /* if we are in a declaration, we must indent * identifier */ if (is_procname == 0 || !procnames_start_line) { - if (!ps.block_init) { - if (troff && !ps.dumped_decl_indent) { + if (!ps.block_init && !ps.dumped_decl_indent) { + if (troff) { if (ps.want_blank) *e_code++ = ' '; - ps.want_blank = false; sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); - ps.dumped_decl_indent = 1; e_code += strlen(e_code); - } else { - int cur_dec_ind; - int pos, startpos; - - /* - * in order to get the tab math right for - * indentations that are not multiples of 8 we - * need to modify both startpos and dec_ind - * (cur_dec_ind) here by eight minus the - * remainder of the current starting column - * divided by eight. This seems to be a - * properly working fix - */ - startpos = e_code - s_code; - cur_dec_ind = dec_ind; - pos = startpos; - if ((ps.ind_level * ps.ind_size) % 8 != 0) { - pos += (ps.ind_level * ps.ind_size) % 8; - cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; - } - - if (tabs_to_var) { - while ((pos & ~7) + 8 <= cur_dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = '\t'; - pos = (pos & ~7) + 8; - } - } - while (pos < cur_dec_ind) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - pos++; - } - if (ps.want_blank && e_code - s_code == startpos) - *e_code++ = ' '; - ps.want_blank = false; } + else { + indent_declaration(dec_ind, tabs_to_var); + } + ps.dumped_decl_indent = true; + ps.want_blank = false; } - } else { + } + else { if (ps.want_blank) *e_code++ = ' '; ps.want_blank = false; - if (dec_ind && s_code != e_code) + if (dec_ind && s_code != e_code) { + *e_code = '\0'; dump_line(); + } dec_ind = 0; } } @@ -994,7 +972,7 @@ copy_id: if (ps.want_blank) *e_code++ = ' '; - if (troff && ps.its_a_keyword) { + if (troff && ps.keyword) { e_code = chfont(&bodyf, &keywordf, e_code); for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; @@ -1021,12 +999,11 @@ ps.want_blank = (s_code != e_code); /* only put blank after comma * if comma does not start the * line */ - if (ps.in_decl && is_procname == 0 && !ps.block_init) - while ((e_code - s_code) < (dec_ind - 1)) { - CHECK_SIZE_CODE; - *e_code++ = ' '; - } - + if (ps.in_decl && is_procname == 0 && !ps.block_init && !ps.dumped_decl_indent) { + /* indent leading commas and not the actual identifiers */ + indent_declaration(dec_ind - 1, tabs_to_var); + ps.dumped_decl_indent = true; + } *e_code++ = ','; if (ps.p_l_follow == 0) { if (ps.block_init_level <= 0) @@ -1126,13 +1103,7 @@ ps.pcase = false; } - if (strncmp(s_lab, "#if", 3) == 0) { - if (blanklines_around_conditional_compilation) { - int c; - prefix_blankline_requested++; - while ((c = getc(input)) == '\n'); - ungetc(c, input); - } + if (!strncmp(s_lab, "#if", 3)) { /* also ifdef, ifndef */ if ((size_t)ifdef_level < sizeof(state_stack)/sizeof(state_stack[0])) { match_state[ifdef_level].tos = -1; state_stack[ifdef_level++] = ps; @@ -1140,34 +1111,45 @@ else diag2(1, "#if stack overflow"); } - else if (strncmp(s_lab, "#else", 5) == 0) + else if (!strncmp(s_lab, "#el", 3)) { /* else, elif */ if (ifdef_level <= 0) - diag2(1, "Unmatched #else"); + diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); else { match_state[ifdef_level - 1] = ps; ps = state_stack[ifdef_level - 1]; } + } else if (strncmp(s_lab, "#endif", 6) == 0) { if (ifdef_level <= 0) diag2(1, "Unmatched #endif"); - else { + else ifdef_level--; - -#ifdef undef - /* - * This match needs to be more intelligent before the - * message is useful - */ - if (match_state[ifdef_level].tos >= 0 - && bcmp(&ps, &match_state[ifdef_level], sizeof ps)) - diag2(0, "Syntactically inconsistent #ifdef alternatives"); -#endif + } + else { + struct directives { + int size; char *string; } - if (blanklines_around_conditional_compilation) { - postfix_blankline_requested++; - n_real_blanklines = 0; + recognized[] = { + {7, "include"}, {6, "define"}, {5, "undef"}, + {4, "line"}, {5, "error"}, {6, "pragma"} + }; + int d = sizeof recognized / sizeof recognized[0]; + while (--d >= 0) + if (!strncmp(s_lab + 1, recognized[d].string, recognized[d].size)) + break; + if (d < 0) { + diag2(1, "Unrecognized cpp directive"); + break; } } + if (blanklines_around_conditional_compilation) { + postfix_blankline_requested++; + n_real_blanklines = 0; + } + else { + postfix_blankline_requested = 0; + prefix_blankline_requested = 0; + } break; /* subsequent processing of the newline * character will cause the line to be printed */ @@ -1232,3 +1214,34 @@ err(1, "%s", in_name); } } + +static void +indent_declaration(int cur_dec_ind, int tabs_to_var) +{ + int pos = e_code - s_code; + char *startpos = e_code; + + /* + * get the tab math right for indentations that are not multiples of 8 + */ + if ((ps.ind_level * ps.ind_size) % 8 != 0) { + pos += (ps.ind_level * ps.ind_size) % 8; + cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; + } + if (tabs_to_var) { + while ((pos & ~7) + 8 <= cur_dec_ind) { + CHECK_SIZE_CODE; + *e_code++ = '\t'; + pos = (pos & ~7) + 8; + } + } + while (pos < cur_dec_ind) { + CHECK_SIZE_CODE; + *e_code++ = ' '; + pos++; + } + if (e_code == startpos && ps.want_blank) { + *e_code++ = ' '; + ps.want_blank = false; + } +} Index: indent_globs.h =================================================================== --- indent_globs.h +++ indent_globs.h @@ -71,6 +71,7 @@ if (combuf == NULL) \ err(1, NULL); \ e_com = combuf + (e_com-s_com) + 1; \ + last_bl = combuf + (last_bl-s_com) + 1; \ l_com = combuf + nsize - 5; \ s_com = combuf + 1; \ } @@ -206,6 +207,7 @@ * false uses all spaces */ int auto_typedefs; /* set true to recognize identifiers * ending in "_t" like typedefs */ +int space_after_cast; /* "b = (int) a" vs "b = (int)a" */ /* -troff font state information */ @@ -238,10 +240,10 @@ * char should be lined up with the / in / followed by * */ int comment_delta, n_comment_delta; - int cast_mask; /* indicates which close parens close off - * casts */ - int sizeof_mask; /* indicates which close parens close off - * sizeof''s */ + int cast_mask; /* indicates which close parens potentially + * close off casts */ + int not_cast_mask; /* indicates which close parens definitely + * close off something else than casts */ int block_init; /* true iff inside a block initialization */ int block_init_level; /* The level of brace nesting in an * initialization */ @@ -311,8 +313,7 @@ * specially */ int decl_indent; /* column to indent declared identifiers to */ int local_decl_indent; /* like decl_indent but for locals */ - int its_a_keyword; - int sizeof_keyword; + int keyword; /* the type of a keyword or 0 */ int dumped_decl_indent; float case_indent; /* The distance to indent case labels from the * switch statement */ Index: io.c =================================================================== --- io.c +++ io.c @@ -116,6 +116,7 @@ } while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; + *e_lab = '\0'; cur_col = pad_output(1, compute_label_target()); if (s_lab[0] == '#' && (strncmp(s_lab, "#else", 5) == 0 || strncmp(s_lab, "#endif", 6) == 0)) { @@ -241,17 +242,8 @@ while (e_com > com_st && isspace(e_com[-1])) e_com--; cur_col = pad_output(cur_col, target); - if (!ps.box_com) { - if (star_comment_cont && (com_st[1] != '*' || e_com <= com_st + 1)) { - if (com_st[1] == ' ' && com_st[0] == ' ' && e_com > com_st + 1) - com_st[1] = '*'; - else - fwrite(" * ", com_st[0] == '\t' ? 2 : com_st[0] == '*' ? 1 : 3, 1, output); - } - } fwrite(com_st, e_com - com_st, 1, output); ps.comment_delta = ps.n_comment_delta; - cur_col = count_spaces(cur_col, com_st); ++ps.com_lines; /* count lines with comments */ } } @@ -281,10 +273,11 @@ ps.dumped_decl_indent = 0; *(e_lab = s_lab) = '\0'; /* reset buffers */ *(e_code = s_code) = '\0'; - *(e_com = s_com) = '\0'; + *(e_com = s_com = combuf + 1) = '\0'; ps.ind_level = ps.i_l_follow; ps.paren_level = ps.p_l_follow; - paren_target = -ps.paren_indents[ps.paren_level - 1]; + if (ps.paren_level > 0) + paren_target = -ps.paren_indents[ps.paren_level - 1]; not_first_line = 1; } @@ -377,7 +370,7 @@ } buf_ptr = in_buffer; buf_end = p; - if (p[-2] == '/' && p[-3] == '*') { + if (p - in_buffer > 2 && p[-2] == '/' && p[-3] == '*') { if (in_buffer[3] == 'I' && strncmp(in_buffer, "/**INDENT**", 11) == 0) fill_buffer(); /* flush indent error message */ else { @@ -505,18 +498,15 @@ * */ int -count_spaces(int current, char *buffer) +count_spaces_until(int cur, char *buffer, char *end) /* * this routine figures out where the character position will be after * printing the text in buffer starting at column "current" */ { char *buf; /* used to look thru buffer */ - int cur; /* current character counter */ - cur = current; - - for (buf = buffer; *buf != '\0'; ++buf) { + for (buf = buffer; *buf != '\0' && buf != end; ++buf) { switch (*buf) { case '\n': @@ -540,6 +530,12 @@ return (cur); } +int +count_spaces(int cur, char *buffer) +{ + return count_spaces_until(cur, buffer, NULL); +} + void diag4(int level, const char *msg, int a, int b) { Index: lexi.c =================================================================== --- lexi.c +++ lexi.c @@ -57,48 +57,58 @@ #include "indent.h" #define alphanum 1 +#ifdef undef #define opchar 3 +#endif struct templ { const char *rwd; int rwcode; }; -struct templ specials[1000] = +/* + * This table has to be sorted alphabetically, because it'll be used in binary + * search. For the same reason, string must be the first thing in struct templ. + */ +struct templ specials[] = { - {"switch", 1}, - {"case", 2}, - {"break", 0}, - {"struct", 3}, - {"union", 3}, - {"enum", 3}, - {"default", 2}, - {"int", 4}, + {"break", 9}, + {"case", 8}, {"char", 4}, - {"float", 4}, + {"const", 4}, + {"default", 8}, + {"do", 6}, {"double", 4}, + {"else", 6}, + {"enum", 3}, + {"extern", 4}, + {"float", 4}, + {"for", 5}, + {"global", 4}, + {"goto", 9}, + {"if", 5}, + {"int", 4}, {"long", 4}, - {"short", 4}, - {"typdef", 4}, - {"unsigned", 4}, + {"offsetof", 1}, {"register", 4}, + {"return", 9}, + {"short", 4}, + {"sizeof", 2}, {"static", 4}, - {"global", 4}, - {"extern", 4}, + {"struct", 3}, + {"switch", 7}, + {"typedef", 4}, + {"union", 3}, + {"unsigned", 4}, {"void", 4}, - {"const", 4}, {"volatile", 4}, - {"goto", 0}, - {"return", 0}, - {"if", 5}, - {"while", 5}, - {"for", 5}, - {"else", 6}, - {"do", 6}, - {"sizeof", 7}, - {0, 0} + {"while", 5} }; +const char **typenames; +int typename_count; +int typename_top = -1; + char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each @@ -121,6 +131,12 @@ 1, 1, 1, 0, 3, 0, 3, 0 }; +static int +strcmp_type(const void *e1, const void *e2) +{ + return strcmp(e1, *(const char * const *) e2); +} + int lexi(void) { @@ -191,15 +207,13 @@ } } while (1) { - if (!(seensfx & 1) && - (*buf_ptr == 'U' || *buf_ptr == 'u')) { + if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; seensfx |= 1; continue; } - if (!(seensfx & 2) && - (*buf_ptr == 'L' || *buf_ptr == 'l')) { + if (!(seensfx & 2) && strchr("fFlL", *buf_ptr)) { CHECK_SIZE_TOKEN; if (buf_ptr[1] == buf_ptr[0]) *e_token++ = *buf_ptr++; @@ -232,8 +246,7 @@ if (++buf_ptr >= buf_end) fill_buffer(); } - ps.its_a_keyword = false; - ps.sizeof_keyword = false; + ps.keyword = 0; if (l_struct && !ps.p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token @@ -249,43 +262,33 @@ last_code = ident; /* Remember that this is the code we will * return */ - if (auto_typedefs) { - const char *q = s_token; - size_t q_len = strlen(q); - /* Check if we have an "_t" in the end */ - if (q_len > 2 && - (strcmp(q + q_len - 2, "_t") == 0)) { - ps.its_a_keyword = true; + p = bsearch(s_token, + specials, + sizeof specials / sizeof specials[0], + sizeof specials[0], + strcmp_type); + if (!p) { /* not a special keyword... */ + char *u; + + /* ... so maybe a type_t or a typedef */ + if ((auto_typedefs && (u = strrchr(s_token, '_')) && strcmp(u, "_t") == 0) + || (typename_top >= 0 && bsearch(s_token, + typenames, + typename_top + 1, + sizeof typenames[0], + strcmp_type))) { + ps.keyword = 4; /* a type name */ ps.last_u_d = true; - goto found_auto_typedef; + goto found_typename; } } - - /* - * This loop will check if the token is a keyword. - */ - for (p = specials; (j = p->rwd) != 0; p++) { - const char *q = s_token; /* point at scanned token */ - if (*j++ != *q++ || *j++ != *q++) - continue; /* This test depends on the fact that - * identifiers are always at least 1 character - * long (ie. the first two bytes of the - * identifier are always meaningful) */ - if (q[-1] == 0) - break; /* If its a one-character identifier */ - while (*q++ == *j) - if (*j++ == 0) - goto found_keyword; /* I wish that C had a multi-level - * break... */ - } - if (p->rwd) { /* we have a keyword */ - found_keyword: - ps.its_a_keyword = true; + else { /* we have a keyword */ + ps.keyword = p->rwcode; ps.last_u_d = true; switch (p->rwcode) { - case 1: /* it is a switch */ + case 7: /* it is a switch */ return (swstmt); - case 2: /* a case or default */ + case 8: /* a case or default */ return (casestmt); case 3: /* a "struct" */ @@ -297,10 +300,11 @@ /* FALLTHROUGH */ case 4: /* one of the declaration keywords */ - found_auto_typedef: + found_typename: if (ps.p_l_follow) { - ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; - break; /* inside parens: cast, param list or sizeof */ + /* inside parens: cast, param list, offsetof or sizeof */ + ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; + break; } last_code = decl; return (decl); @@ -311,8 +315,6 @@ case 6: /* do, else */ return (sp_nparen); - case 7: - ps.sizeof_keyword = true; default: /* all others are treated like any other * identifier */ return (ident); @@ -339,7 +341,7 @@ && (ps.last_token == rparen || ps.last_token == semicolon || ps.last_token == decl || ps.last_token == lbrace || ps.last_token == rbrace)) { - ps.its_a_keyword = true; + ps.keyword = 4; /* a type name */ ps.last_u_d = true; last_code = decl; return decl; @@ -586,23 +588,39 @@ return (code); } -/* - * Add the given keyword to the keyword table, using val as the keyword type - */ void -addkey(char *key, int val) +alloc_typenames(void) { - struct templ *p = specials; - while (p->rwd) - if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) + typenames = (const char **) malloc(sizeof typenames * (typename_count = 16)); + if (typenames == NULL) + err(1, NULL); +} + +void +add_typename(const char *key) +{ + int comparison; + + if (typename_top + 1 >= typename_count) { + typenames = realloc((void *) typenames, sizeof typenames * (typename_count *= 2)); + if (typenames == NULL) + err(1, NULL); + } + if (typename_top == -1) + typenames[++typename_top] = key; + else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { + /* take advantage of sorted input */ + if (comparison != 0) /* remove duplicates */ + typenames[++typename_top] = key; + } + else { + int p; + + for (p = 0; (comparison = strcmp(key, typenames[p])) >= 0; p++) + /* find place for the new key */; + if (comparison == 0) /* remove duplicates */ return; - else - p++; - if (p >= specials + sizeof specials / sizeof specials[0]) - return; /* For now, table overflows are silently - * ignored */ - p->rwd = key; - p->rwcode = val; - p[1].rwd = 0; - p[1].rwcode = 0; + memmove(&typenames[p + 1], &typenames[p], sizeof typenames * (++typename_top - p)); + typenames[p] = key; + } } Index: parse.c =================================================================== --- parse.c +++ parse.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD: head/usr.bin/indent/parse.c 116390 2003-06-15 09:28:17Z charnier $"); #include +#include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" @@ -200,6 +201,10 @@ } /* end of switch */ + if (ps.tos >= STACKSIZE) { + errx(1, "parser stack overflow"); + } + reduce(); /* see if any reduction can be done */ #ifdef debug Index: pr_comment.c =================================================================== --- pr_comment.c +++ pr_comment.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "indent_globs.h" #include "indent.h" /* @@ -87,33 +88,22 @@ char *last_bl; /* points to the last blank in the output * buffer */ char *t_ptr; /* used for moving string */ - int unix_comment; /* tri-state variable used to decide if it is - * a unix-style comment. 0 means only blanks - * since /+*, 1 means regular style comment, 2 - * means unix style comment */ int break_delim = comment_delimiter_on_blankline; int l_just_saw_decl = ps.just_saw_decl; - /* - * int ps.last_nl = 0; true iff the last significant thing - * weve seen is a newline - */ - int one_liner = 1; /* true iff this comment is a one-liner */ adj_max_col = max_col; ps.just_saw_decl = 0; - last_bl = 0; /* no blanks found so far */ + last_bl = NULL; /* no blanks found so far */ ps.box_com = false; /* at first, assume that we are not in * a boxed comment or some other * comment that should not be touched */ ++ps.out_coms; /* keep track of number of comments */ - unix_comment = 1; /* set flag to let us figure out if there is a - * unix-style comment ** DISABLED: use 0 to - * reenable this hack! */ /* Figure where to align and how to treat the comment */ if (ps.col_1 && !format_col1_comments) { /* if comment starts in column * 1 it should not be touched */ ps.box_com = true; + break_delim = false; ps.com_col = 1; } else { @@ -126,7 +116,7 @@ * be a block comment and is treated as a * box comment unless format_block_comments * is nonzero (the default). */ - break_delim = 0; + break_delim = false; } if ( /* ps.bl_line && */ (s_lab == e_lab) && (s_code == e_code)) { /* klg: check only if this line is blank */ @@ -141,7 +131,7 @@ } else { int target_col; - break_delim = 0; + break_delim = false; if (s_code != e_code) target_col = count_spaces(compute_code_target(), s_code); else { @@ -172,23 +162,36 @@ if (*buf_ptr != ' ' && !ps.box_com) *e_com++ = ' '; - *e_com = '\0'; + /* Don't put a break delimiter if this comment is a one-liner */ + for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) { + if (t_ptr >= buf_end) + fill_buffer(); + if (t_ptr[0] == '*' && t_ptr[1] == '/') { + break_delim = false; + break; + } + } + + if (break_delim) { + char *t = e_com; + e_com = s_com + 2; + *e_com = 0; + if (blanklines_before_blockcomments) + prefix_blankline_requested = 1; + dump_line(); + e_com = s_com = t; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + } + if (troff) { - now_col = 1; adj_max_col = 80; } - else - now_col = count_spaces(ps.com_col, s_com); /* figure what column we - * would be in if we - * printed the comment - * now */ /* Start to copy the comment */ while (1) { /* this loop will go until the comment is * copied */ - if (*buf_ptr > 040 && *buf_ptr != '*') - ps.last_nl = 0; CHECK_SIZE_COM; switch (*buf_ptr) { /* this checks for various spcl cases */ case 014: /* check for a form feed */ @@ -196,11 +199,11 @@ ps.use_ff = true; /* fix so dump_line uses a form feed */ dump_line(); - last_bl = 0; - *e_com++ = ' '; - *e_com++ = '*'; - *e_com++ = ' '; - while (*++buf_ptr == ' ' || *buf_ptr == '\t'); + last_bl = NULL; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + while (*++buf_ptr == ' ' || *buf_ptr == '\t') + ; } else { if (++buf_ptr >= buf_end) @@ -212,69 +215,25 @@ case '\n': if (had_eof) { /* check for unexpected eof */ printf("Unterminated comment\n"); - *e_com = '\0'; dump_line(); return; } - one_liner = 0; + last_bl = NULL; if (ps.box_com || ps.last_nl) { /* if this is a boxed comment, * we dont ignore the newline */ - if (s_com == e_com) { + if (s_com == e_com) *e_com++ = ' '; - *e_com++ = ' '; - } - *e_com = '\0'; if (!ps.box_com && e_com - s_com > 3) { - if (break_delim == 1 && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; - dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; - } dump_line(); - CHECK_SIZE_COM; - *e_com++ = ' '; - *e_com++ = ' '; + if (star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } dump_line(); - now_col = ps.com_col; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; } else { ps.last_nl = 1; - if (unix_comment != 1) { /* we not are in unix_style - * comment */ - if (unix_comment == 0 && s_code == e_code) { - /* - * if it is a UNIX-style comment, ignore the - * requirement that previous line be blank for - * unindention - */ - ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1; - if (ps.com_col <= 1) - ps.com_col = 2; - } - unix_comment = 2; /* permanently remember that we are in - * this type of comment */ - dump_line(); - ++line_no; - now_col = ps.com_col; - *e_com++ = ' '; - /* - * fix so that the star at the start of the line will line - * up - */ - do /* flush leading white space */ - if (++buf_ptr >= buf_end) - fill_buffer(); - while (*buf_ptr == ' ' || *buf_ptr == '\t'); - break; - } if (*(e_com - 1) == ' ' || *(e_com - 1) == '\t') last_bl = e_com - 1; /* @@ -285,7 +244,6 @@ last_bl = e_com; CHECK_SIZE_COM; *e_com++ = ' '; - ++now_col; } } ++line_no; /* keep track of input line number */ @@ -312,116 +270,63 @@ if (++buf_ptr >= buf_end) /* get to next char after * */ fill_buffer(); - if (unix_comment == 0) /* set flag to show we are not in - * unix-style comment */ - unix_comment = 1; - if (*buf_ptr == '/') { /* it is the end!!! */ end_of_comment: if (++buf_ptr >= buf_end) fill_buffer(); - - if (*(e_com - 1) != ' ' && !ps.box_com) { /* insure blank before - * end */ - *e_com++ = ' '; - ++now_col; - } - if (break_delim == 1 && !one_liner && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; - dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; - } - if (break_delim == 2 && e_com > s_com + 3 - /* now_col > adj_max_col - 2 && !ps.box_com */ ) { - *e_com = '\0'; - dump_line(); - now_col = ps.com_col; - } CHECK_SIZE_COM; - *e_com++ = '*'; - *e_com++ = '/'; - *e_com = '\0'; + if (break_delim) { + if (e_com > s_com + 3) { + dump_line(); + } + else + s_com = e_com; + *e_com++ = ' '; + } + if (e_com[-1] != ' ' && !ps.box_com) { + *e_com++ = ' '; /* ensure blank before end */ + } + *e_com++ = '*', *e_com++ = '/', *e_com = '\0'; ps.just_saw_decl = l_just_saw_decl; return; } else { /* handle isolated '*' */ *e_com++ = '*'; - ++now_col; } break; default: /* we have a random char */ - if (unix_comment == 0 && *buf_ptr != ' ' && *buf_ptr != '\t') - unix_comment = 1; /* we are not in unix-style comment */ - - *e_com = *buf_ptr++; - if (buf_ptr >= buf_end) - fill_buffer(); - - if (*e_com == '\t') /* keep track of column */ - now_col = ((now_col - 1) & tabmask) + tabsize + 1; - else if (*e_com == '\b') /* this is a backspace */ - --now_col; - else - ++now_col; - - if (*e_com == ' ' || *e_com == '\t') - last_bl = e_com; - /* remember we saw a blank */ - - ++e_com; - if (now_col > adj_max_col && !ps.box_com && unix_comment == 1 && e_com[-1] > ' ') { - /* - * the comment is too long, it must be broken up - */ - if (break_delim == 1 && s_com[0] == '/' - && s_com[1] == '*' && s_com[2] == ' ') { - char *t = e_com; - break_delim = 2; - e_com = s_com + 2; - *e_com = 0; - if (blanklines_before_blockcomments) - prefix_blankline_requested = 1; + now_col = count_spaces_until(ps.com_col, s_com, e_com); + do { + *e_com = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); + if (*e_com == ' ' || *e_com == '\t') + last_bl = e_com; /* remember we saw a blank */ + ++e_com; + now_col++; + } while (!memchr("*\n\r\b\t", *buf_ptr, 6) && (now_col <= adj_max_col || !last_bl)); + ps.last_nl = false; + if (now_col > adj_max_col && !ps.box_com && e_com[-1] > ' ') { + /* the comment is too long, it must be broken up */ + if (last_bl == NULL) { dump_line(); - e_com = t; - s_com[0] = s_com[1] = s_com[2] = ' '; + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + break; } - if (last_bl == 0) { /* we have seen no blanks */ - last_bl = e_com; /* fake it */ - *e_com++ = ' '; - } - *e_com = '\0'; /* print what we have */ - *last_bl = '\0'; - while (last_bl > s_com && last_bl[-1] < 040) - *--last_bl = 0; + *e_com = '\0'; e_com = last_bl; dump_line(); - - *e_com++ = ' '; /* add blanks for continuation */ - *e_com++ = ' '; - *e_com++ = ' '; - - t_ptr = last_bl + 1; - last_bl = 0; - if (t_ptr >= e_com) { - while (*t_ptr == ' ' || *t_ptr == '\t') - t_ptr++; - while (*t_ptr != '\0') { /* move unprinted part of - * comment down in buffer */ - if (*t_ptr == ' ' || *t_ptr == '\t') - last_bl = e_com; - *e_com++ = *t_ptr++; - } + if (!ps.box_com && star_comment_cont) + *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' '; + for (t_ptr = last_bl + 1; *t_ptr == ' ' || *t_ptr == '\t'; t_ptr++) + ; + last_bl = NULL; + while (*t_ptr != '\0') { + if (*t_ptr == ' ' || *t_ptr == '\t') + last_bl = e_com; + *e_com++ = *t_ptr++; } - *e_com = '\0'; - now_col = count_spaces(ps.com_col, s_com); /* recompute current - * position */ } break; }