Index: head/usr.bin/indent/indent.c =================================================================== --- head/usr.bin/indent/indent.c (revision 303717) +++ head/usr.bin/indent/indent.c (revision 303718) @@ -1,1247 +1,1251 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1976 Board of Trustees of the University of Illinois. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\ @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\ @(#) Copyright (c) 1980, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #if 0 #ifndef lint static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" static void bakcopy(void); static void indent_declaration(int, int); const char *in_name = "Standard Input"; /* will always point to name of input * file */ const char *out_name = "Standard Output"; /* will always point to name * of output file */ char bakfile[MAXPATHLEN] = ""; int main(int argc, char **argv) { int dec_ind; /* current indentation for declarations */ int di_stack[20]; /* a stack of structure indentation levels */ int flushed_nl; /* used when buffering up comments to remember * that a newline was passed over */ int force_nl; /* when true, code must be broken */ int hd_type = 0; /* used to store type of stmt for if (...), * for (...), etc */ int i; /* local loop counter */ int scase; /* set to true when we see a case, so we will * know what to do with the following colon */ int sp_sw; /* when true, we are in the expression of * if(...), while(...), etc. */ int squest; /* when this is positive, we have seen a ? * without the matching : in a ?: * construct */ const char *t_ptr; /* used for copying tokens */ int tabs_to_var; /* true if using tabs to indent to var name */ int type_code; /* the type of token, returned by lexi */ int last_else = 0; /* true iff last keyword was an else */ /*-----------------------------------------------*\ | INITIALIZATION | \*-----------------------------------------------*/ found_err = 0; ps.p_stack[0] = stmt; /* this is the parser's stack */ ps.last_nl = true; /* this is true if the last thing scanned was * a newline */ ps.last_token = semicolon; combuf = (char *) malloc(bufsize); if (combuf == NULL) err(1, NULL); labbuf = (char *) malloc(bufsize); if (labbuf == NULL) err(1, NULL); codebuf = (char *) malloc(bufsize); if (codebuf == NULL) err(1, NULL); tokenbuf = (char *) malloc(bufsize); if (tokenbuf == NULL) err(1, NULL); l_com = combuf + bufsize - 5; l_lab = labbuf + bufsize - 5; l_code = codebuf + bufsize - 5; l_token = tokenbuf + bufsize - 5; combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and * comment buffers */ combuf[1] = codebuf[1] = labbuf[1] = '\0'; ps.else_if = 1; /* Default else-if special processing to on */ s_lab = e_lab = labbuf + 1; s_code = e_code = codebuf + 1; s_com = e_com = combuf + 1; s_token = e_token = tokenbuf + 1; in_buffer = (char *) malloc(10); if (in_buffer == NULL) err(1, NULL); in_buffer_limit = in_buffer + 8; buf_ptr = buf_end = in_buffer; line_no = 1; had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; sp_sw = force_nl = false; ps.in_or_st = false; ps.bl_line = true; dec_ind = 0; di_stack[ps.dec_nest = 0] = 0; ps.want_blank = ps.in_stmt = ps.ind_stmt = false; scase = ps.pcase = false; squest = 0; sc_end = NULL; bp_save = NULL; be_save = NULL; output = NULL; tabs_to_var = 0; /*--------------------------------------------------*\ | COMMAND LINE SCAN | \*--------------------------------------------------*/ #ifdef undef max_col = 78; /* -l78 */ lineup_to_parens = 1; /* -lp */ ps.ljust_decl = 0; /* -ndj */ ps.com_ind = 33; /* -c33 */ star_comment_cont = 1; /* -sc */ ps.ind_size = 8; /* -i8 */ verbose = 0; ps.decl_indent = 16; /* -di16 */ ps.local_decl_indent = -1; /* if this is not set to some nonnegative value * by an arg, we will set this equal to * ps.decl_ind */ ps.indent_parameters = 1; /* -ip */ ps.decl_com_ind = 0; /* if this is not set to some positive value * by an arg, we will set this equal to * ps.com_ind */ btype_2 = 1; /* -br */ cuddle_else = 1; /* -ce */ ps.unindent_displace = 0; /* -d0 */ ps.case_indent = 0; /* -cli0 */ format_block_comments = 1; /* -fcb */ format_col1_comments = 1; /* -fc1 */ procnames_start_line = 1; /* -psl */ proc_calls_space = 0; /* -npcs */ comment_delimiter_on_blankline = 1; /* -cdb */ ps.leave_comma = 1; /* -nbc */ #endif for (i = 1; i < argc; ++i) if (strcmp(argv[i], "-npro") == 0) break; set_defaults(); if (i >= argc) set_profile(); for (i = 1; i < argc; ++i) { /* * look thru args (if any) for changes to defaults */ if (argv[i][0] != '-') {/* no flag on parameter */ if (input == NULL) { /* we must have the input file */ in_name = argv[i]; /* remember name of input file */ input = fopen(in_name, "r"); if (input == NULL) /* check for open error */ err(1, "%s", in_name); continue; } else if (output == NULL) { /* we have the output file */ out_name = argv[i]; /* remember name of output file */ if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite * the file */ errx(1, "input and output files must be different"); } output = fopen(out_name, "w"); if (output == NULL) /* check for create error */ err(1, "%s", out_name); continue; } errx(1, "unknown parameter: %s", argv[i]); } else set_option(argv[i]); } /* end of for */ if (input == NULL) input = stdin; if (output == NULL) { if (troff || input == stdin) output = stdout; else { out_name = in_name; bakcopy(); } } if (ps.com_ind <= 1) ps.com_ind = 2; /* dont put normal comments before column 2 */ if (troff) { if (bodyf.font[0] == 0) parsefont(&bodyf, "R"); if (scomf.font[0] == 0) parsefont(&scomf, "I"); if (blkcomf.font[0] == 0) blkcomf = scomf, blkcomf.size += 2; if (boxcomf.font[0] == 0) boxcomf = blkcomf; if (stringf.font[0] == 0) parsefont(&stringf, "L"); if (keywordf.font[0] == 0) parsefont(&keywordf, "B"); writefdef(&bodyf, 'B'); writefdef(&scomf, 'C'); writefdef(&blkcomf, 'L'); writefdef(&boxcomf, 'X'); writefdef(&stringf, 'S'); writefdef(&keywordf, 'K'); } if (block_comment_max_col <= 0) block_comment_max_col = max_col; if (ps.local_decl_indent < 0) /* if not specified by user, set this */ ps.local_decl_indent = ps.decl_indent; if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; if (continuation_indent == 0) continuation_indent = ps.ind_size; fill_buffer(); /* get first batch of stuff into input buffer */ parse(semicolon); { char *p = buf_ptr; int col = 1; while (1) { if (*p == ' ') col++; else if (*p == '\t') col = ((col - 1) & ~7) + 9; else break; p++; } if (col > ps.ind_size) ps.ind_level = ps.i_l_follow = col / ps.ind_size; } if (troff) { const char *p = in_name, *beg = in_name; while (*p) if (*p++ == '/') beg = p; fprintf(output, ".Fn \"%s\"\n", beg); } /* * START OF MAIN LOOP */ while (1) { /* this is the main loop. it will go until we * reach eof */ int is_procname; type_code = lexi(); /* lexi reads one token. The actual * characters read are stored in "token". lexi * returns a code indicating the type of token */ is_procname = ps.procname[0]; /* * The following code moves everything following an if (), while (), * else, etc. up to the start of the following stmt to a buffer. This * allows proper handling of both kinds of brace placement. */ flushed_nl = false; while (ps.search_brace) { /* if we scanned an if(), while(), * etc., we might need to copy stuff * into a buffer we must loop, copying * stuff into save_com, until we find * the start of the stmt which follows * the if, or whatever */ switch (type_code) { case newline: ++line_no; if (sc_end != NULL) goto sw_buffer; /* dump comment, if any */ flushed_nl = true; case form_feed: break; /* form feeds and newlines found here will be * ignored */ case lbrace: /* this is a brace that starts the compound * stmt */ if (sc_end == NULL) { /* ignore buffering if a comment wasn't * stored up */ ps.search_brace = false; goto check_type; } if (btype_2) { save_com[0] = '{'; /* we either want to put the brace * right after the if */ goto sw_buffer; /* go to common code to get out of * this loop */ } case comment: /* we have a comment, so we must copy it into * the buffer */ if (!flushed_nl || sc_end != NULL) { if (sc_end == NULL) { /* if this is the first comment, we * must set up the buffer */ save_com[0] = save_com[1] = ' '; sc_end = &(save_com[2]); } else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } *sc_end++ = '/'; /* copy in start of comment */ *sc_end++ = '*'; for (;;) { /* loop until we get to the end of the comment */ *sc_end = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*sc_end++ == '*' && *buf_ptr == '/') break; /* we are at end of comment */ if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer * overflow */ diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); fflush(output); exit(1); } } *sc_end++ = '/'; /* add ending slash */ if (++buf_ptr >= buf_end) /* get past / in buffer */ fill_buffer(); break; } default: /* it is the start of a normal statement */ if (flushed_nl) /* if we flushed a newline, make sure it is * put back */ force_nl = true; if ((type_code == sp_paren && *token == 'i' && last_else && ps.else_if) || (type_code == sp_nparen && *token == 'e' && e_code != s_code && e_code[-1] == '}')) force_nl = false; if (sc_end == NULL) { /* ignore buffering if comment wasn't * saved up */ ps.search_brace = false; goto check_type; } if (force_nl) { /* if we should insert a nl here, put it into * the buffer */ force_nl = false; --line_no; /* this will be re-increased when the nl is * read from the buffer */ *sc_end++ = '\n'; *sc_end++ = ' '; if (verbose && !flushed_nl) /* print error msg if the line * was not already broken */ diag2(0, "Line broken"); flushed_nl = false; } for (t_ptr = token; *t_ptr; ++t_ptr) *sc_end++ = *t_ptr; /* copy token into temp buffer */ ps.procname[0] = 0; sw_buffer: ps.search_brace = false; /* stop looking for start of * stmt */ bp_save = buf_ptr; /* save current input buffer */ be_save = buf_end; buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ *sc_end++ = ' ';/* add trailing blank, just in case */ buf_end = sc_end; sc_end = NULL; break; } /* end of switch */ if (type_code != 0) /* we must make this check, just in case there * was an unexpected EOF */ type_code = lexi(); /* read another token */ /* if (ps.search_brace) ps.procname[0] = 0; */ if ((is_procname = ps.procname[0]) && flushed_nl && !procnames_start_line && ps.in_decl && type_code == ident) flushed_nl = 0; } /* end of while (search_brace) */ last_else = 0; check_type: if (type_code == 0) { /* we got eof */ if (s_lab != e_lab || s_code != e_code || s_com != e_com) /* must dump end of line */ dump_line(); if (ps.tos > 1) /* check for balanced braces */ diag2(1, "Stuff missing from end of file"); if (verbose) { printf("There were %d output lines and %d comments\n", ps.out_lines, ps.out_coms); printf("(Lines with comments)/(Lines with code): %6.3f\n", (1.0 * ps.com_lines) / code_lines); } fflush(output); exit(found_err); } if ( (type_code != comment) && (type_code != newline) && (type_code != preesc) && (type_code != form_feed)) { if (force_nl && (type_code != semicolon) && (type_code != lbrace || !btype_2)) { /* we should force a broken line here */ if (verbose && !flushed_nl) diag2(0, "Line broken"); flushed_nl = false; dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; } ps.in_stmt = true; /* turn on flag which causes an extra level of * indentation. this is turned off by a ; or * '}' */ if (s_com != e_com) { /* the turkey has embedded a comment * in a line. fix it */ *e_code++ = ' '; for (t_ptr = s_com; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } *e_code++ = ' '; *e_code = '\0'; /* null terminate code sect */ ps.want_blank = false; e_com = s_com; } } else if (type_code != comment) /* preserve force_nl thru a comment */ force_nl = false; /* cancel forced newline after newline, form * feed, etc */ /*-----------------------------------------------------*\ | do switch on type of token scanned | \*-----------------------------------------------------*/ CHECK_SIZE_CODE; switch (type_code) { /* now, decide what to do with the token */ case form_feed: /* found a form feed in line */ ps.use_ff = true; /* a form feed is treated much like a newline */ dump_line(); ps.want_blank = false; break; case newline: if (ps.last_token != comma || ps.p_l_follow > 0 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { dump_line(); ps.want_blank = false; } ++line_no; /* keep track of input line number */ break; case lparen: /* got a '(' or '[' */ ++ps.p_l_follow; /* count parens to make Healy happy */ if (ps.want_blank && *token != '[' && - (ps.last_token != ident || proc_calls_space - || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) + (ps.last_token != ident || proc_calls_space || + /* offsetof (1) is never allowed a space; sizeof (2) gets + * one iff -bs; all other keywords (>2) always get a space + * before lparen */ + (ps.keyword + Bill_Shannon > 2))) *e_code++ = ' '; ps.want_blank = false; if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && !is_procname) { /* function pointer declarations */ if (troff) { sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); e_code += strlen(e_code); } else { indent_declaration(dec_ind, tabs_to_var); } ps.dumped_decl_indent = true; } if (!troff) *e_code++ = token[0]; ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent && ps.paren_indents[0] < 2 * ps.ind_size) ps.paren_indents[0] = 2 * ps.ind_size; if (ps.in_or_st && *token == '(' && ps.tos <= 2) { /* * this is a kluge to make sure that declarations will be * aligned right if proc decl has an explicit type on it, i.e. * "int a(x) {..." */ parse(semicolon); /* I said this was a kluge... */ ps.in_or_st = false; /* turn off flag for structure decl or * initialization */ } - if (ps.sizeof_keyword) - ps.sizeof_mask |= 1 << ps.p_l_follow; + /* parenthesized type following sizeof or offsetof is not a cast */ + if (ps.keyword == 1 || ps.keyword == 2) + ps.not_cast_mask |= 1 << ps.p_l_follow; break; case rparen: /* got a ')' or ']' */ rparen_count--; - if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { + if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { ps.last_u_d = true; ps.cast_mask &= (1 << ps.p_l_follow) - 1; ps.want_blank = false; } else ps.want_blank = true; - ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; + ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; if (--ps.p_l_follow < 0) { ps.p_l_follow = 0; diag3(0, "Extra %c", *token); } if (e_code == s_code) /* if the paren starts the line */ ps.paren_level = ps.p_l_follow; /* then indent it */ *e_code++ = token[0]; if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if * (...), or some such */ sp_sw = false; force_nl = true;/* must force newline after if */ ps.last_u_d = true; /* inform lexi that a following * operator is unary */ ps.in_stmt = false; /* dont use stmt continuation * indentation */ parse(hd_type); /* let parser worry about if, or whatever */ } ps.search_brace = btype_2; /* this should insure that constructs * such as main(){...} and int[]{...} * have their braces put in the right * place */ break; case unary_op: /* this could be any unary operation */ if (!ps.dumped_decl_indent && ps.in_decl && !is_procname && !ps.block_init) { /* pointer declarations */ if (troff) { if (ps.want_blank) *e_code++ = ' '; sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); e_code += strlen(e_code); } else { /* if this is a unary op in a declaration, we should * indent this token */ for (i = 0; token[i]; ++i) /* find length of token */; indent_declaration(dec_ind - i, tabs_to_var); } ps.dumped_decl_indent = true; } else if (ps.want_blank) *e_code++ = ' '; { const char *res = token; if (troff && token[0] == '-' && token[1] == '>') res = "\\(->"; for (t_ptr = res; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } } ps.want_blank = false; break; case binary_op: /* any binary operation */ if (ps.want_blank) *e_code++ = ' '; { const char *res = token; if (troff) switch (token[0]) { case '<': if (token[1] == '=') res = "\\(<="; break; case '>': if (token[1] == '=') res = "\\(>="; break; case '!': if (token[1] == '=') res = "\\(!="; break; case '|': if (token[1] == '|') res = "\\(br\\(br"; else if (token[1] == 0) res = "\\(br"; break; } for (t_ptr = res; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; /* move the operator */ } } ps.want_blank = true; break; case postop: /* got a trailing ++ or -- */ *e_code++ = token[0]; *e_code++ = token[1]; ps.want_blank = true; break; case question: /* got a ? */ squest++; /* this will be used when a later colon * appears so we can distinguish the * ?: construct */ if (ps.want_blank) *e_code++ = ' '; *e_code++ = '?'; ps.want_blank = true; break; case casestmt: /* got word 'case' or 'default' */ scase = true; /* so we can process the later colon properly */ goto copy_id; case colon: /* got a ':' */ if (squest > 0) { /* it is part of the ?: construct */ --squest; if (ps.want_blank) *e_code++ = ' '; *e_code++ = ':'; ps.want_blank = true; break; } if (ps.in_or_st) { *e_code++ = ':'; ps.want_blank = false; break; } ps.in_stmt = false; /* seeing a label does not imply we are in a * stmt */ for (t_ptr = s_code; *t_ptr; ++t_ptr) *e_lab++ = *t_ptr; /* turn everything so far into a label */ e_code = s_code; *e_lab++ = ':'; *e_lab++ = ' '; *e_lab = '\0'; force_nl = ps.pcase = scase; /* ps.pcase will be used by * dump_line to decide how to * indent the label. force_nl * will force a case n: to be * on a line by itself */ scase = false; ps.want_blank = false; break; case semicolon: /* got a ';' */ if (ps.dec_nest == 0) ps.in_or_st = false;/* we are not in an initialization or * structure declaration */ scase = false; /* these will only need resetting in an error */ squest = 0; if (ps.last_token == rparen && rparen_count == 0) ps.in_parameter_declaration = 0; ps.cast_mask = 0; - ps.sizeof_mask = 0; + ps.not_cast_mask = 0; ps.block_init = 0; ps.block_init_level = 0; ps.just_saw_decl--; if (ps.in_decl && s_code == e_code && !ps.block_init && !ps.dumped_decl_indent) { /* indent stray semicolons in declarations */ indent_declaration(dec_ind - 1, tabs_to_var); ps.dumped_decl_indent = true; } ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level * structure declaration, we * arent any more */ if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { /* * This should be true iff there were unbalanced parens in the * stmt. It is a bit complicated, because the semicolon might * be in a for stmt */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; if (sp_sw) { /* this is a check for an if, while, etc. with * unbalanced parens */ sp_sw = false; parse(hd_type); /* dont lose the if, or whatever */ } } *e_code++ = ';'; ps.want_blank = true; ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the * middle of a stmt */ if (!sp_sw) { /* if not if for (;;) */ parse(semicolon); /* let parser know about end of stmt */ force_nl = true;/* force newline after an end of stmt */ } break; case lbrace: /* got a '{' */ ps.in_stmt = false; /* dont indent the {} */ if (!ps.block_init) force_nl = true;/* force other stuff on same line as '{' onto * new line */ else if (ps.block_init_level <= 0) ps.block_init_level = 1; else ps.block_init_level++; if (s_code != e_code && !ps.block_init) { if (!btype_2) { dump_line(); ps.want_blank = false; } else if (ps.in_parameter_declaration && !ps.in_or_st) { ps.i_l_follow = 0; if (function_brace_split) { /* dump the line prior to the * brace ... */ dump_line(); ps.want_blank = false; } else /* add a space between the decl and brace */ ps.want_blank = true; } } if (ps.in_parameter_declaration) prefix_blankline_requested = 0; if (ps.p_l_follow > 0) { /* check for preceding unbalanced * parens */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; if (sp_sw) { /* check for unclosed if, for, etc. */ sp_sw = false; parse(hd_type); ps.ind_level = ps.i_l_follow; } } if (s_code == e_code) ps.ind_stmt = false; /* dont put extra indentation on line * with '{' */ if (ps.in_decl && ps.in_or_st) { /* this is either a structure * declaration or an init */ di_stack[ps.dec_nest++] = dec_ind; /* ? dec_ind = 0; */ } else { ps.decl_on_line = false; /* we can't be in the middle of * a declaration, so don't do * special indentation of * comments */ if (blanklines_after_declarations_at_proctop && ps.in_parameter_declaration) postfix_blankline_requested = 1; ps.in_parameter_declaration = 0; } dec_ind = 0; parse(lbrace); /* let parser know about this */ if (ps.want_blank) /* put a blank before '{' if '{' is not at * start of line */ *e_code++ = ' '; ps.want_blank = false; *e_code++ = '{'; ps.just_saw_decl = 0; break; case rbrace: /* got a '}' */ if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be * omitted in * declarations */ parse(semicolon); if (ps.p_l_follow) {/* check for unclosed if, for, else. */ diag2(1, "Unbalanced parens"); ps.p_l_follow = 0; sp_sw = false; } ps.just_saw_decl = 0; ps.block_init_level--; if (s_code != e_code && !ps.block_init) { /* '}' must be first on * line */ if (verbose) diag2(0, "Line broken"); dump_line(); } *e_code++ = '}'; ps.want_blank = true; ps.in_stmt = ps.ind_stmt = false; if (ps.dec_nest > 0) { /* we are in multi-level structure * declaration */ dec_ind = di_stack[--ps.dec_nest]; if (ps.dec_nest == 0 && !ps.in_parameter_declaration) ps.just_saw_decl = 2; ps.in_decl = true; } prefix_blankline_requested = 0; parse(rbrace); /* let parser know about this */ ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead && ps.il[ps.tos] >= ps.ind_level; if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) postfix_blankline_requested = 1; break; case swstmt: /* got keyword "switch" */ sp_sw = true; hd_type = swstmt; /* keep this for when we have seen the * expression */ goto copy_id; /* go move the token into buffer */ case sp_paren: /* token is if, while, for */ sp_sw = true; /* the interesting stuff is done after the * expression is scanned */ hd_type = (*token == 'i' ? ifstmt : (*token == 'w' ? whilestmt : forstmt)); /* * remember the type of header for later use by parser */ goto copy_id; /* copy the token into line */ case sp_nparen: /* got else, do */ ps.in_stmt = false; if (*token == 'e') { if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { if (verbose) diag2(0, "Line broken"); dump_line();/* make sure this starts a line */ ps.want_blank = false; } force_nl = true;/* also, following stuff must go onto new line */ last_else = 1; parse(elselit); } else { if (e_code != s_code) { /* make sure this starts a line */ if (verbose) diag2(0, "Line broken"); dump_line(); ps.want_blank = false; } force_nl = true;/* also, following stuff must go onto new line */ last_else = 0; parse(dolit); } goto copy_id; /* move the token into line */ case decl: /* we have a declaration type (int, register, * etc.) */ parse(decl); /* let parser worry about indentation */ if (ps.last_token == rparen && ps.tos <= 1) { ps.in_parameter_declaration = 1; if (s_code != e_code) { dump_line(); ps.want_blank = 0; } } if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { ps.ind_level = ps.i_l_follow = 1; ps.ind_stmt = 0; } ps.in_or_st = true; /* this might be a structure or initialization * declaration */ ps.in_decl = ps.decl_on_line = true; if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) ps.just_saw_decl = 2; prefix_blankline_requested = 0; for (i = 0; token[i++];); /* get length of token */ if (ps.ind_level == 0 || ps.dec_nest > 0) { /* global variable or struct member in local variable */ dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); } else { /* local variable */ dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i; tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0); } goto copy_id; case ident: /* got an identifier or constant */ if (ps.in_decl) { /* if we are in a declaration, we must indent * identifier */ if (is_procname == 0 || !procnames_start_line) { if (!ps.block_init && !ps.dumped_decl_indent) { if (troff) { if (ps.want_blank) *e_code++ = ' '; sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); e_code += strlen(e_code); } else indent_declaration(dec_ind, tabs_to_var); ps.dumped_decl_indent = true; ps.want_blank = false; } } else { if (ps.want_blank) *e_code++ = ' '; ps.want_blank = false; if (dec_ind && s_code != e_code) { *e_code = '\0'; dump_line(); } dec_ind = 0; } } else if (sp_sw && ps.p_l_follow == 0) { sp_sw = false; force_nl = true; ps.last_u_d = true; ps.in_stmt = false; parse(hd_type); } copy_id: if (ps.want_blank) *e_code++ = ' '; - if (troff && ps.its_a_keyword) { + if (troff && ps.keyword) { e_code = chfont(&bodyf, &keywordf, e_code); for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = keywordf.allcaps && islower(*t_ptr) ? toupper(*t_ptr) : *t_ptr; } e_code = chfont(&keywordf, &bodyf, e_code); } else for (t_ptr = token; *t_ptr; ++t_ptr) { CHECK_SIZE_CODE; *e_code++ = *t_ptr; } ps.want_blank = true; break; case period: /* treat a period kind of like a binary * operation */ *e_code++ = '.'; /* move the period into line */ ps.want_blank = false; /* dont put a blank after a period */ break; case comma: ps.want_blank = (s_code != e_code); /* only put blank after comma * if comma does not start the * line */ if (ps.in_decl && is_procname == 0 && !ps.block_init && !ps.dumped_decl_indent) { /* indent leading commas and not the actual identifiers */ indent_declaration(dec_ind - 1, tabs_to_var); ps.dumped_decl_indent = true; } *e_code++ = ','; if (ps.p_l_follow == 0) { if (ps.block_init_level <= 0) ps.block_init = 0; if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) force_nl = true; } break; case preesc: /* got the character '#' */ if ((s_com != e_com) || (s_lab != e_lab) || (s_code != e_code)) dump_line(); *e_lab++ = '#'; /* move whole line to 'label' buffer */ { int in_comment = 0; int com_start = 0; char quote = 0; int com_end = 0; while (*buf_ptr == ' ' || *buf_ptr == '\t') { buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } while (*buf_ptr != '\n' || (in_comment && !had_eof)) { CHECK_SIZE_LAB; *e_lab = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); switch (*e_lab++) { case BACKSLASH: if (troff) *e_lab++ = BACKSLASH; if (!in_comment) { *e_lab++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } break; case '/': if (*buf_ptr == '*' && !in_comment && !quote) { in_comment = 1; *e_lab++ = *buf_ptr++; com_start = e_lab - s_lab - 2; } break; case '"': if (quote == '"') quote = 0; break; case '\'': if (quote == '\'') quote = 0; break; case '*': if (*buf_ptr == '/' && in_comment) { in_comment = 0; *e_lab++ = *buf_ptr++; com_end = e_lab - s_lab; } break; } } while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; if (e_lab - s_lab == com_end && bp_save == NULL) { /* comment on preprocessor line */ if (sc_end == NULL) /* if this is the first comment, we * must set up the buffer */ sc_end = &(save_com[0]); else { *sc_end++ = '\n'; /* add newline between * comments */ *sc_end++ = ' '; --line_no; } bcopy(s_lab + com_start, sc_end, com_end - com_start); sc_end += com_end - com_start; if (sc_end >= &save_com[sc_size]) abort(); e_lab = s_lab + com_start; while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) e_lab--; bp_save = buf_ptr; /* save current input buffer */ be_save = buf_end; buf_ptr = save_com; /* fix so that subsequent calls to * lexi will take tokens out of * save_com */ *sc_end++ = ' '; /* add trailing blank, just in case */ buf_end = sc_end; sc_end = NULL; } *e_lab = '\0'; /* null terminate line */ ps.pcase = false; } if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ if ((size_t)ifdef_level < nitems(state_stack)) { match_state[ifdef_level].tos = -1; state_stack[ifdef_level++] = ps; } else diag2(1, "#if stack overflow"); } else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ if (ifdef_level <= 0) diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); else { match_state[ifdef_level - 1] = ps; ps = state_stack[ifdef_level - 1]; } } else if (strncmp(s_lab, "#endif", 6) == 0) { if (ifdef_level <= 0) diag2(1, "Unmatched #endif"); else ifdef_level--; } else { struct directives { int size; const char *string; } recognized[] = { {7, "include"}, {6, "define"}, {5, "undef"}, {4, "line"}, {5, "error"}, {6, "pragma"} }; int d = nitems(recognized); while (--d >= 0) if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) break; if (d < 0) { diag2(1, "Unrecognized cpp directive"); break; } } if (blanklines_around_conditional_compilation) { postfix_blankline_requested++; n_real_blanklines = 0; } else { postfix_blankline_requested = 0; prefix_blankline_requested = 0; } break; /* subsequent processing of the newline * character will cause the line to be printed */ case comment: /* we have gotten a / followed by * this is a biggie */ if (flushed_nl) { /* we should force a broken line here */ flushed_nl = false; dump_line(); ps.want_blank = false; /* dont insert blank at line start */ force_nl = false; } pr_comment(); break; } /* end of big switch stmt */ *e_code = '\0'; /* make sure code section is null terminated */ if (type_code != comment && type_code != newline && type_code != preesc) ps.last_token = type_code; } /* end of main while (1) loop */ } /* * copy input file to backup file if in_name is /blah/blah/blah/file, then * backup file will be ".Bfile" then make the backup file the input and * original input file the output */ static void bakcopy(void) { int n, bakchn; char buff[8 * 1024]; const char *p; /* construct file name .Bfile */ for (p = in_name; *p; p++); /* skip to end of string */ while (p > in_name && *p != '/') /* find last '/' */ p--; if (*p == '/') p++; sprintf(bakfile, "%s.BAK", p); /* copy in_name to backup file */ bakchn = creat(bakfile, 0600); if (bakchn < 0) err(1, "%s", bakfile); while ((n = read(fileno(input), buff, sizeof(buff))) > 0) if (write(bakchn, buff, n) != n) err(1, "%s", bakfile); if (n < 0) err(1, "%s", in_name); close(bakchn); fclose(input); /* re-open backup file as the input file */ input = fopen(bakfile, "r"); if (input == NULL) err(1, "%s", bakfile); /* now the original input file will be the output */ output = fopen(in_name, "w"); if (output == NULL) { unlink(bakfile); err(1, "%s", in_name); } } static void indent_declaration(int cur_dec_ind, int tabs_to_var) { int pos = e_code - s_code; char *startpos = e_code; /* * get the tab math right for indentations that are not multiples of 8 */ if ((ps.ind_level * ps.ind_size) % 8 != 0) { pos += (ps.ind_level * ps.ind_size) % 8; cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; } if (tabs_to_var) while ((pos & ~7) + 8 <= cur_dec_ind) { CHECK_SIZE_CODE; *e_code++ = '\t'; pos = (pos & ~7) + 8; } while (pos < cur_dec_ind) { CHECK_SIZE_CODE; *e_code++ = ' '; pos++; } if (e_code == startpos && ps.want_blank) { *e_code++ = ' '; ps.want_blank = false; } } Index: head/usr.bin/indent/indent_globs.h =================================================================== --- head/usr.bin/indent/indent_globs.h (revision 303717) +++ head/usr.bin/indent/indent_globs.h (revision 303718) @@ -1,335 +1,334 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)indent_globs.h 8.1 (Berkeley) 6/6/93 * $FreeBSD$ */ #define BACKSLASH '\\' #define bufsize 200 /* size of internal buffers */ #define sc_size 5000 /* size of save_com buffer */ #define label_offset 2 /* number of levels a label is placed to left * of code */ #define tabsize 8 /* the size of a tab */ #define tabmask 0177770 /* mask used when figuring length of lines * with tabs */ #define false 0 #define true 1 FILE *input; /* the fid for the input file */ FILE *output; /* the output file */ #define CHECK_SIZE_CODE \ if (e_code >= l_code) { \ int nsize = l_code-s_code+400; \ int code_len = e_code-s_code; \ codebuf = (char *) realloc(codebuf, nsize); \ if (codebuf == NULL) \ err(1, NULL); \ e_code = codebuf + code_len + 1; \ l_code = codebuf + nsize - 5; \ s_code = codebuf + 1; \ } #define CHECK_SIZE_COM \ if (e_com >= l_com) { \ int nsize = l_com-s_com+400; \ int com_len = e_com - s_com; \ int blank_pos = last_bl - s_com; \ combuf = (char *) realloc(combuf, nsize); \ if (combuf == NULL) \ err(1, NULL); \ e_com = combuf + com_len + 1; \ last_bl = combuf + blank_pos + 1; \ l_com = combuf + nsize - 5; \ s_com = combuf + 1; \ } #define CHECK_SIZE_LAB \ if (e_lab >= l_lab) { \ int nsize = l_lab-s_lab+400; \ int label_len = e_lab - s_lab; \ labbuf = (char *) realloc(labbuf, nsize); \ if (labbuf == NULL) \ err(1, NULL); \ e_lab = labbuf + label_len + 1; \ l_lab = labbuf + nsize - 5; \ s_lab = labbuf + 1; \ } #define CHECK_SIZE_TOKEN \ if (e_token >= l_token) { \ int nsize = l_token-s_token+400; \ int token_len = e_token - s_token; \ tokenbuf = (char *) realloc(tokenbuf, nsize); \ if (tokenbuf == NULL) \ err(1, NULL); \ e_token = tokenbuf + token_len + 1; \ l_token = tokenbuf + nsize - 5; \ s_token = tokenbuf + 1; \ } char *labbuf; /* buffer for label */ char *s_lab; /* start ... */ char *e_lab; /* .. and end of stored label */ char *l_lab; /* limit of label buffer */ char *codebuf; /* buffer for code section */ char *s_code; /* start ... */ char *e_code; /* .. and end of stored code */ char *l_code; /* limit of code section */ char *combuf; /* buffer for comments */ char *s_com; /* start ... */ char *e_com; /* ... and end of stored comments */ char *l_com; /* limit of comment buffer */ #define token s_token char *tokenbuf; /* the last token scanned */ char *s_token; char *e_token; char *l_token; char *in_buffer; /* input buffer */ char *in_buffer_limit; /* the end of the input buffer */ char *buf_ptr; /* ptr to next character to be taken from * in_buffer */ char *buf_end; /* ptr to first after last char in in_buffer */ char save_com[sc_size]; /* input text is saved here when looking for * the brace after an if, while, etc */ char *sc_end; /* pointer into save_com buffer */ char *bp_save; /* saved value of buf_ptr when taking input * from save_com */ char *be_save; /* similarly saved value of buf_end */ int found_err; int pointer_as_binop; int blanklines_after_declarations; int blanklines_before_blockcomments; int blanklines_after_procs; int blanklines_around_conditional_compilation; int swallow_optional_blanklines; int n_real_blanklines; int prefix_blankline_requested; int postfix_blankline_requested; int break_comma; /* when true and not in parens, break after a * comma */ int btype_2; /* when true, brace should be on same line as * if, while, etc */ float case_ind; /* indentation level to be used for a "case * n:" */ int code_lines; /* count of lines with code */ int had_eof; /* set to true when input is exhausted */ int line_no; /* the current line number. */ int max_col; /* the maximum allowable line length */ int verbose; /* when true, non-essential error messages are * printed */ int cuddle_else; /* true if else should cuddle up to '}' */ int star_comment_cont; /* true iff comment continuation lines should * have stars at the beginning of each line. */ int comment_delimiter_on_blankline; int troff; /* true iff were generating troff input */ int procnames_start_line; /* if true, the names of procedures * being defined get placed in column * 1 (ie. a newline is placed between * the type of the procedure and its * name) */ int proc_calls_space; /* If true, procedure calls look like: * foo(bar) rather than foo (bar) */ int format_block_comments; /* true if comments beginning with * `/ * \n' are to be reformatted */ int format_col1_comments; /* If comments which start in column 1 * are to be magically reformatted * (just like comments that begin in * later columns) */ int inhibit_formatting; /* true if INDENT OFF is in effect */ int suppress_blanklines;/* set iff following blanklines should be * suppressed */ int continuation_indent;/* set to the indentation between the edge of * code and continuation lines */ int lineup_to_parens; /* if true, continued code within parens will * be lined up to the open paren */ int Bill_Shannon; /* true iff a blank should always be inserted * after sizeof */ int blanklines_after_declarations_at_proctop; /* This is vaguely * similar to * blanklines_after_decla * rations except that * it only applies to * the first set of * declarations in a * procedure (just after * the first '{') and it * causes a blank line * to be generated even * if there are no * declarations */ int block_comment_max_col; int extra_expression_indent; /* true if continuation lines from the * expression part of "if(e)", * "while(e)", "for(e;e;e)" should be * indented an extra tab stop so that * they don't conflict with the code * that follows */ int function_brace_split; /* split function declaration and * brace onto separate lines */ int use_tabs; /* set true to use tabs for spacing, * false uses all spaces */ int auto_typedefs; /* set true to recognize identifiers * ending in "_t" like typedefs */ /* -troff font state information */ struct fstate { char font[4]; char size; int allcaps:1; } __aligned(sizeof(int)); char *chfont(struct fstate *, struct fstate *, char *); struct fstate keywordf, /* keyword font */ stringf, /* string font */ boxcomf, /* Box comment font */ blkcomf, /* Block comment font */ scomf, /* Same line comment font */ bodyf; /* major body font */ #define STACKSIZE 256 struct parser_state { int last_token; struct fstate cfont; /* Current font */ int p_stack[STACKSIZE]; /* this is the parsers stack */ int il[STACKSIZE]; /* this stack stores indentation levels */ float cstk[STACKSIZE];/* used to store case stmt indentation levels */ int box_com; /* set to true when we are in a "boxed" * comment. In that case, the first non-blank * char should be lined up with the / in / followed by * */ int comment_delta, n_comment_delta; - int cast_mask; /* indicates which close parens close off - * casts */ - int sizeof_mask; /* indicates which close parens close off - * sizeof''s */ + int cast_mask; /* indicates which close parens potentially + * close off casts */ + int not_cast_mask; /* indicates which close parens definitely + * close off something else than casts */ int block_init; /* true iff inside a block initialization */ int block_init_level; /* The level of brace nesting in an * initialization */ int last_nl; /* this is true if the last thing scanned was * a newline */ int in_or_st; /* Will be true iff there has been a * declarator (e.g. int or char) and no left * paren since the last semicolon. When true, * a '{' is starting a structure definition or * an initialization list */ int bl_line; /* set to 1 by dump_line if the line is blank */ int col_1; /* set to true if the last token started in * column 1 */ int com_col; /* this is the column in which the current * comment should start */ int com_ind; /* the column in which comments to the right * of code should start */ int com_lines; /* the number of lines with comments, set by * dump_line */ int dec_nest; /* current nesting level for structure or init */ int decl_com_ind; /* the column in which comments after * declarations should be put */ int decl_on_line; /* set to true if this line of code has part * of a declaration on it */ int i_l_follow; /* the level to which ind_level should be set * after the current line is printed */ int in_decl; /* set to true when we are in a declaration * stmt. The processing of braces is then * slightly different */ int in_stmt; /* set to 1 while in a stmt */ int ind_level; /* the current indentation level */ int ind_size; /* the size of one indentation level */ int ind_stmt; /* set to 1 if next line should have an extra * indentation level because we are in the * middle of a stmt */ int last_u_d; /* set to true after scanning a token which * forces a following operator to be unary */ int leave_comma; /* if true, never break declarations after * commas */ int ljust_decl; /* true if declarations should be left * justified */ int out_coms; /* the number of comments processed, set by * pr_comment */ int out_lines; /* the number of lines written, set by * dump_line */ int p_l_follow; /* used to remember how to indent following * statement */ int paren_level; /* parenthesization level. used to indent * within statements */ short paren_indents[20]; /* column positions of each paren */ int pcase; /* set to 1 if the current line label is a * case. It is printed differently from a * regular label */ int search_brace; /* set to true by parse when it is necessary * to buffer up all info up to the start of a * stmt after an if, while, etc */ int unindent_displace; /* comments not to the right of code * will be placed this many * indentation levels to the left of * code */ int use_ff; /* set to one if the current line should be * terminated with a form feed */ int want_blank; /* set to true when the following token should * be prefixed by a blank. (Said prefixing is * ignored in some cases.) */ int else_if; /* True iff else if pairs should be handled * specially */ int decl_indent; /* column to indent declared identifiers to */ int local_decl_indent; /* like decl_indent but for locals */ - int its_a_keyword; - int sizeof_keyword; + int keyword; /* the type of a keyword or 0 */ int dumped_decl_indent; float case_indent; /* The distance to indent case labels from the * switch statement */ int in_parameter_declaration; int indent_parameters; int tos; /* pointer to top of stack */ char procname[100]; /* The name of the current procedure */ int just_saw_decl; } ps; int ifdef_level; int rparen_count; struct parser_state state_stack[5]; struct parser_state match_state[5]; Index: head/usr.bin/indent/lexi.c =================================================================== --- head/usr.bin/indent/lexi.c (revision 303717) +++ head/usr.bin/indent/lexi.c (revision 303718) @@ -1,606 +1,605 @@ /*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Here we have the token scanner for indent. It scans off one token and puts * it in the global variable "token". It returns a code, indicating the type * of token scanned. */ #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" #define alphanum 1 #define opchar 3 struct templ { const char *rwd; int rwcode; }; struct templ specials[1000] = { - {"switch", 1}, - {"case", 2}, - {"break", 0}, + {"switch", 7}, + {"case", 8}, + {"break", 9}, {"struct", 3}, {"union", 3}, {"enum", 3}, - {"default", 2}, + {"default", 8}, {"int", 4}, {"char", 4}, {"float", 4}, {"double", 4}, {"long", 4}, {"short", 4}, {"typedef", 4}, {"unsigned", 4}, {"register", 4}, {"static", 4}, {"global", 4}, {"extern", 4}, {"void", 4}, {"const", 4}, {"volatile", 4}, - {"goto", 0}, - {"return", 0}, + {"goto", 9}, + {"return", 9}, {"if", 5}, {"while", 5}, {"for", 5}, {"else", 6}, {"do", 6}, - {"sizeof", 7}, + {"sizeof", 2}, + {"offsetof", 1}, {0, 0} }; char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; int lexi(void) { int unary_delim; /* this is set to 1 if the current token * forces a following operator to be unary */ static int last_code; /* the last token type returned */ static int l_struct; /* set to 1 if the last token was 'struct' */ int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ e_token = s_token; /* point to start of place to save token */ unary_delim = false; ps.col_1 = ps.last_nl; /* tell world that this token started in * column 1 iff the last thing scanned was nl */ ps.last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ ps.col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } /* Scan an alphanumeric token */ if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { /* * we have a character or number */ const char *j; /* used for searching thru list of * * reserved words */ struct templ *p; if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { int seendot = 0, seenexp = 0, seensfx = 0; if (*buf_ptr == '0' && (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; while (isxdigit(*buf_ptr)) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; } } else while (1) { if (*buf_ptr == '.') { if (seendot) break; else seendot++; } CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (!isdigit(*buf_ptr) && *buf_ptr != '.') { if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) break; else { seenexp++; seendot++; CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; if (*buf_ptr == '+' || *buf_ptr == '-') *e_token++ = *buf_ptr++; } } } while (1) { if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { CHECK_SIZE_TOKEN; *e_token++ = *buf_ptr++; seensfx |= 1; continue; } if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { CHECK_SIZE_TOKEN; if (buf_ptr[1] == buf_ptr[0]) *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; seensfx |= 2; continue; } break; } } else while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { buf_ptr += 2; if (buf_ptr >= buf_end) fill_buffer(); } else break; } CHECK_SIZE_TOKEN; /* copy it over */ *e_token++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } *e_token++ = '\0'; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } - ps.its_a_keyword = false; - ps.sizeof_keyword = false; + ps.keyword = 0; if (l_struct && !ps.p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token * should be treated as a declaration */ l_struct = false; last_code = ident; ps.last_u_d = true; return (decl); } ps.last_u_d = l_struct; /* Operator after identifier is binary * unless last token was 'struct' */ l_struct = false; last_code = ident; /* Remember that this is the code we will * return */ if (auto_typedefs) { const char *q = s_token; size_t q_len = strlen(q); /* Check if we have an "_t" in the end */ if (q_len > 2 && (strcmp(q + q_len - 2, "_t") == 0)) { - ps.its_a_keyword = true; + ps.keyword = 4; /* a type name */ ps.last_u_d = true; goto found_auto_typedef; } } /* * This loop will check if the token is a keyword. */ for (p = specials; (j = p->rwd) != NULL; p++) { const char *q = s_token; /* point at scanned token */ if (*j++ != *q++ || *j++ != *q++) continue; /* This test depends on the fact that * identifiers are always at least 1 character * long (ie. the first two bytes of the * identifier are always meaningful) */ if (q[-1] == 0) break; /* If its a one-character identifier */ while (*q++ == *j) if (*j++ == 0) goto found_keyword; /* I wish that C had a multi-level * break... */ } if (p->rwd) { /* we have a keyword */ found_keyword: - ps.its_a_keyword = true; + ps.keyword = p->rwcode; ps.last_u_d = true; switch (p->rwcode) { - case 1: /* it is a switch */ + case 7: /* it is a switch */ return (swstmt); - case 2: /* a case or default */ + case 8: /* a case or default */ return (casestmt); case 3: /* a "struct" */ /* * Next time around, we will want to know that we have had a * 'struct' */ l_struct = true; /* FALLTHROUGH */ case 4: /* one of the declaration keywords */ found_auto_typedef: if (ps.p_l_follow) { - ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask; - break; /* inside parens: cast, param list or sizeof */ + /* inside parens: cast, param list, offsetof or sizeof */ + ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask; + break; } last_code = decl; return (decl); case 5: /* if, while, for */ return (sp_paren); case 6: /* do, else */ return (sp_nparen); - case 7: - ps.sizeof_keyword = true; default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { char *tp = buf_ptr; while (tp < buf_end) if (*tp++ == ')' && (*tp == ';' || *tp == ',')) goto not_proc; strncpy(ps.procname, token, sizeof ps.procname - 1); ps.in_parameter_declaration = 1; rparen_count = 1; not_proc:; } /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been * typedefd */ if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') && !ps.p_l_follow && !ps.block_init && (ps.last_token == rparen || ps.last_token == semicolon || ps.last_token == decl || ps.last_token == lbrace || ps.last_token == rbrace)) { - ps.its_a_keyword = true; + ps.keyword = 4; /* a type name */ ps.last_u_d = true; last_code = decl; return decl; } if (last_code == decl) /* if this is a declared variable, then * following sign is unary */ ps.last_u_d = true; /* will make "int a -1" work */ last_code = ident; return (ident); /* the ident is not in the list */ } /* end of procesing for alpanum character */ /* Scan a non-alphanumeric token */ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is * moved here */ *e_token = '\0'; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { case '\n': unary_delim = ps.last_u_d; ps.last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* * if data has been exhausted, the newline is a dummy, and we should * return code to stop */ break; case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; if (troff) { e_token[-1] = '`'; if (qchar == '"') *e_token++ = '`'; e_token = chfont(&bodyf, &stringf, e_token); } do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { diag2(1, "Unterminated literal"); goto stop_lit; } CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, * since CHECK_SIZE guarantees that there * are at least 5 entries left */ *e_token = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_token == BACKSLASH) { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; if (troff) { *++e_token = BACKSLASH; if (*buf_ptr == BACKSLASH) *++e_token = BACKSLASH; } *++e_token = *buf_ptr++; ++e_token; /* we must increment this again because we * copied two chars */ if (buf_ptr >= buf_end) fill_buffer(); } else break; /* we copied one character */ } /* end of while (1) */ } while (*e_token++ != qchar); if (troff) { e_token = chfont(&stringf, &bodyf, e_token - 1); if (qchar == '"') *e_token++ = '\''; } stop_lit: code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = ps.last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; /* * if (ps.in_or_st) ps.block_init = 1; */ /* ? code = ps.block_init ? lparen : lbrace; */ code = lbrace; break; case ('}'): unary_delim = true; /* ? code = ps.block_init ? rparen : rbrace; */ code = rbrace; break; case 014: /* a form feed */ unary_delim = ps.last_u_d; ps.last_nl = true; /* remember this so we can set 'ps.col_1' * right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *e_token++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ if (last_code == ident || last_code == rparen) { code = (ps.last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '=') /* check for operator += */ *e_token++ = *buf_ptr++; else if (*buf_ptr == '>') { /* check for operator -> */ *e_token++ = *buf_ptr++; if (!pointer_as_binop) { unary_delim = false; code = unary_op; ps.want_blank = false; } } break; /* buffer overflow will be checked at end of * switch */ case '=': if (ps.in_or_st) ps.block_init = 1; #ifdef undef if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ e_token[-1] = *buf_ptr++; if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) *e_token++ = *buf_ptr++; *e_token++ = '='; /* Flip =+ to += */ *e_token = 0; } #else if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } #endif code = binary_op; unary_delim = true; break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } if (*buf_ptr == '=') *e_token++ = *buf_ptr++; code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ *e_token++ = '*'; if (++buf_ptr >= buf_end) fill_buffer(); code = comment; unary_delim = ps.last_u_d; break; } while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } code = (ps.last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (code != newline) { l_struct = false; last_code = code; } if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); ps.last_u_d = unary_delim; *e_token = '\0'; /* null terminate the token */ return (code); } /* * Add the given keyword to the keyword table, using val as the keyword type */ void addkey(char *key, int val) { struct templ *p = specials; while (p->rwd) if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) return; else p++; if (p >= specials + sizeof specials / sizeof specials[0]) return; /* For now, table overflows are silently * ignored */ p->rwd = key; p->rwcode = val; p[1].rwd = NULL; p[1].rwcode = 0; }