src/tools/pg_bsd_indent/lexi.c - cloudberry - Git at Google

 /*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */

 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif

 #include "c.h"

 /*
  * Here we have the token scanner for indent.  It scans off one token and puts
  * it in the global variable "token".  It returns a code, indicating the type
  * of token scanned.
  */

 #include <err.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"

 #define alphanum 1
 #ifdef undef
 #define opchar 3
 #endif

 struct templ {
     const char *rwd;
     int         rwcode;
 };

 /*
  * This table has to be sorted alphabetically, because it'll be used in binary
  * search. For the same reason, string must be the first thing in struct templ.
  */
 struct templ specials[] =
 {
     {"_Bool", 4},
     {"_Complex", 4},
     {"_Imaginary", 4},
     {"auto", 10},
     {"bool", 4},
     {"break", 9},
     {"case", 8},
     {"char", 4},
     {"complex", 4},
     {"const", 4},
     {"continue", 12},
     {"default", 8},
     {"do", 6},
     {"double", 4},
     {"else", 6},
     {"enum", 3},
     {"extern", 10},
     {"float", 4},
     {"for", 5},
     {"global", 4},
     {"goto", 9},
     {"if", 5},
     {"imaginary", 4},
     {"inline", 12},
     {"int", 4},
     {"long", 4},
     {"offsetof", 1},
     {"register", 10},
     {"restrict", 12},
     {"return", 9},
     {"short", 4},
     {"signed", 4},
     {"sizeof", 2},
     {"static", 10},
     {"struct", 3},
     {"switch", 7},
     {"typedef", 11},
     {"union", 3},
     {"unsigned", 4},
     {"void", 4},
     {"volatile", 4},
     {"while", 5}
 };

 const char **typenames;
 int         typename_count;
 int         typename_top = -1;

 char        chartype[128] =
 {				/* this is used to facilitate the decision of
 				 * what type (alphanumeric, operator) each
 				 * character is */
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 3, 0, 0, 1, 3, 3, 0,
     0, 0, 3, 3, 0, 3, 0, 3,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 0, 0, 3, 3, 3, 3,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 0, 0, 3, 1,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 3, 0, 3, 0
 };

 static int
 strcmp_type(const void *e1, const void *e2)
 {
     return (strcmp(e1, *(const char * const *)e2));
 }

 /*
  * Decide whether "foo(..." is a function definition or declaration.
  *
  * At call, we are looking at the '('.  Look ahead to find the first
  * '{', ';' or ',' that is not within parentheses or comments; then
  * it's a definition if we found '{', otherwise a declaration.
  * Note that this rule is fooled by K&R-style parameter declarations,
  * but telling the difference between those and function attributes
  * seems like more trouble than it's worth.  This code could also be
  * fooled by mismatched parens or apparent comment starts within string
  * literals, but that seems unlikely in the context it's used in.
  */
 static int
 is_func_definition(char *tp)
 {
     int		paren_depth = 0;
     int		in_comment = false;
     int		in_slash_comment = false;
     int		lastc = 0;

     /* We may need to look past the end of the current buffer. */
     lookahead_reset();
     for (;;) {
 	int	    c;

 	/* Fetch next character. */
 	if (tp < buf_end)
 	    c = *tp++;
 	else {
 	    c = lookahead();
 	    if (c == EOF)
 		break;
 	}
 	/* Handle comments. */
 	if (in_comment) {
 	    if (lastc == '*' && c == '/')
 		in_comment = false;
 	} else if (lastc == '/' && c == '*' && !in_slash_comment)
 	    in_comment = true;
 	else if (in_slash_comment) {
 	    if (c == '\n')
 		in_slash_comment = false;
 	} else if (lastc == '/' && c == '/')
 	    in_slash_comment = true;
 	/* Count nested parens properly. */
 	else if (c == '(')
 	    paren_depth++;
 	else if (c == ')') {
 	    paren_depth--;
 	    /*
 	     * If we find unbalanced parens, we must have started inside a
 	     * declaration.
 	     */
 	    if (paren_depth < 0)
 		return false;
 	} else if (paren_depth == 0) {
 	    /* We are outside any parentheses or comments. */
 	    if (c == '{')
 		return true;
 	    else if (c == ';' || c == ',')
 		return false;
 	}
 	lastc = c;
     }
     /* Hit EOF --- for lack of anything better, assume "not a definition". */
     return false;
 }

 int
 lexi(struct parser_state *state)
 {
     int         unary_delim;	/* this is set to 1 if the current token
 				 * forces a following operator to be unary */
     int         code;		/* internal code to be returned */
     char        qchar;		/* the delimiter character for a string */

     e_token = s_token;		/* point to start of place to save token */
     unary_delim = false;
     state->col_1 = state->last_nl;	/* tell world that this token started
 					 * in column 1 iff the last thing
 					 * scanned was a newline */
     state->last_nl = false;

     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	state->col_1 = false;	/* leading blanks imply token is not in column
 				 * 1 */
 	if (++buf_ptr >= buf_end)
 	    fill_buffer();
     }

     /* Scan an alphanumeric token */
     if (chartype[*buf_ptr & 127] == alphanum ||
 	(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
 	/*
 	 * we have a character or number
 	 */
 	struct templ *p;

 	if (isdigit((unsigned char)*buf_ptr) ||
 	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;

 	    /*
 	     * base 2, base 8, base 16:
 	     */
 	    if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
 		int len;

 		if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
 		    len = strspn(buf_ptr + 2, "01") + 2;
 		else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
 		    len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
 		else
 		    len = strspn(buf_ptr + 1, "012345678") + 1;
 		if (len > 0) {
 		    CHECK_SIZE_TOKEN(len);
 		    memcpy(e_token, buf_ptr, len);
 		    e_token += len;
 		    buf_ptr += len;
 		}
 		else
 		    diag2(1, "Unterminated literal");
 	    }
 	    else		/* base 10: */
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
 			    break;
 			else
 			    seendot++;
 		    }
 		    CHECK_SIZE_TOKEN(3);
 		    *e_token++ = *buf_ptr++;
 		    if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
 			    break;
 			else {
 			    seenexp++;
 			    seendot++;
 			    *e_token++ = *buf_ptr++;
 			    if (*buf_ptr == '+' || *buf_ptr == '-')
 				*e_token++ = *buf_ptr++;
 			}
 		    }
 		}

 	    while (1) {
 		CHECK_SIZE_TOKEN(2);
 		if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 1;
 		    continue;
 		}
 		if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
 		    if (buf_ptr[1] == buf_ptr[0])
 		        *e_token++ = *buf_ptr++;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 2;
 		    continue;
 		}
 		break;
 	    }
 	}
 	else
 	    while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
 		/* fill_buffer() terminates buffer with newline */
 		if (*buf_ptr == BACKSLASH) {
 		    if (*(buf_ptr + 1) == '\n') {
 			buf_ptr += 2;
 			if (buf_ptr >= buf_end)
 			    fill_buffer();
 			} else
 			    break;
 		}
 		CHECK_SIZE_TOKEN(1);
 		/* copy it over */
 		*e_token++ = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 	    }
 	*e_token = '\0';

 	if (s_token[0] == 'L' && s_token[1] == '\0' &&
 	      (*buf_ptr == '"' || *buf_ptr == '\''))
 	    return (strpfx);

 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	state->keyword = 0;
 	if (state->last_token == structure && !state->p_l_follow) {
 				/* if last token was 'struct' and we're not
 				 * in parentheses, then this token
 				 * should be treated as a declaration */
 	    state->last_u_d = true;
 	    return (decl);
 	}
 	/*
 	 * Operator after identifier is binary unless last token was 'struct'
 	 */
 	state->last_u_d = (state->last_token == structure);

 	p = bsearch(s_token,
 	    specials,
 	    sizeof(specials) / sizeof(specials[0]),
 	    sizeof(specials[0]),
 	    strcmp_type);
 	if (p == NULL) {	/* not a special keyword... */
 	    char *u;

 	    /* ... so maybe a type_t or a typedef */
 	    if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
 	        strcmp(u, "_t") == 0) || (typename_top >= 0 &&
 		  bsearch(s_token, typenames, typename_top + 1,
 		    sizeof(typenames[0]), strcmp_type))) {
 		state->keyword = 4;	/* a type name */
 		state->last_u_d = true;
 	        goto found_typename;
 	    }
 	} else {			/* we have a keyword */
 	    state->keyword = p->rwcode;
 	    state->last_u_d = true;
 	    switch (p->rwcode) {
 	    case 7:		/* it is a switch */
 		return (swstmt);
 	    case 8:		/* a case or default */
 		return (casestmt);

 	    case 3:		/* a "struct" */
 		/* FALLTHROUGH */
 	    case 4:		/* one of the declaration keywords */
 	    found_typename:
 		if (state->p_l_follow) {
 		    /* inside parens: cast, param list, offsetof or sizeof */
 		    state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
 		}
 		if (state->last_token == period || state->last_token == unary_op) {
 		    state->keyword = 0;
 		    break;
 		}
 		if (p != NULL && p->rwcode == 3)
 		    return (structure);
 		if (state->p_l_follow)
 		    break;
 		return (decl);

 	    case 5:		/* if, while, for */
 		return (sp_paren);

 	    case 6:		/* do, else */
 		return (sp_nparen);

 	    case 10:		/* storage class specifier */
 		return (storage);

 	    case 11:		/* typedef */
 		return (type_def);

 	    default:		/* all others are treated like any other
 				 * identifier */
 		return (ident);
 	    }			/* end of switch */
 	}			/* end of if (found_it) */
 	if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
 	    state->in_parameter_declaration == 0 && state->block_init == 0) {
 	    if (is_func_definition(buf_ptr)) {
 		strncpy(state->procname, token, sizeof state->procname - 1);
 		if (state->in_decl)
 		    state->in_parameter_declaration = 1;
 		return (funcname);
 	    }
 	}
 	/*
 	 * The following hack attempts to guess whether or not the current
 	 * token is in fact a declaration keyword -- one that has been
 	 * typedefd
 	 */
 	else if (!state->p_l_follow && !state->block_init &&
 	    !state->in_stmt &&
 	    ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
 		isalpha((unsigned char)*buf_ptr)) &&
 	    (state->last_token == semicolon || state->last_token == lbrace ||
 		state->last_token == rbrace)) {
 	    state->keyword = 4;	/* a type name */
 	    state->last_u_d = true;
 	    return decl;
 	}
 	if (state->last_token == decl)	/* if this is a declared variable,
 					 * then following sign is unary */
 	    state->last_u_d = true;	/* will make "int a -1" work */
 	return (ident);		/* the ident is not in the list */
     }				/* end of processing for alphanum character */

     /* Scan a non-alphanumeric token */

     CHECK_SIZE_TOKEN(3);		/* things like "<<=" */
     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
 				 * moved here */
     *e_token = '\0';
     if (++buf_ptr >= buf_end)
 	fill_buffer();

     switch (*token) {
     case '\n':
 	unary_delim = state->last_u_d;
 	state->last_nl = true;	/* remember that we just had a newline */
 	code = (had_eof ? 0 : newline);

 	/*
 	 * if data has been exhausted, the newline is a dummy, and we should
 	 * return code to stop
 	 */
 	break;

     case '\'':			/* start of quoted character */
     case '"':			/* start of string */
 	qchar = *token;
 	do {			/* copy the string */
 	    while (1) {		/* move one character or [/<char>]<char> */
 		if (*buf_ptr == '\n') {
 		    diag2(1, "Unterminated literal");
 		    goto stop_lit;
 		}
 		CHECK_SIZE_TOKEN(2);
 		*e_token = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
 		    if (*buf_ptr == '\n')	/* check for escaped newline */
 			++line_no;
 		    *++e_token = *buf_ptr++;
 		    ++e_token;	/* we must increment this again because we
 				 * copied two chars */
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		}
 		else
 		    break;	/* we copied one character */
 	    }			/* end of while (1) */
 	} while (*e_token++ != qchar);
 stop_lit:
 	code = ident;
 	break;

     case ('('):
     case ('['):
 	unary_delim = true;
 	code = lparen;
 	break;

     case (')'):
     case (']'):
 	code = rparen;
 	break;

     case '#':
 	unary_delim = state->last_u_d;
 	code = preesc;
 	break;

     case '?':
 	unary_delim = true;
 	code = question;
 	break;

     case (':'):
 	code = colon;
 	unary_delim = true;
 	break;

     case (';'):
 	unary_delim = true;
 	code = semicolon;
 	break;

     case ('{'):
 	unary_delim = true;

 	/*
 	 * if (state->in_or_st) state->block_init = 1;
 	 */
 	/* ?	code = state->block_init ? lparen : lbrace; */
 	code = lbrace;
 	break;

     case ('}'):
 	unary_delim = true;
 	/* ?	code = state->block_init ? rparen : rbrace; */
 	code = rbrace;
 	break;

     case 014:			/* a form feed */
 	unary_delim = state->last_u_d;
 	state->last_nl = true;	/* remember this so we can set 'state->col_1'
 				 * right */
 	code = form_feed;
 	break;

     case (','):
 	unary_delim = true;
 	code = comma;
 	break;

     case '.':
 	unary_delim = false;
 	code = period;
 	break;

     case '-':
     case '+':			/* check for -, +, --, ++ */
 	code = (state->last_u_d ? unary_op : binary_op);
 	unary_delim = true;

 	if (*buf_ptr == token[0]) {
 	    /* check for doubled character */
 	    *e_token++ = *buf_ptr++;
 	    /* buffer overflow will be checked at end of loop */
 	    if (state->last_token == ident || state->last_token == rparen) {
 		code = (state->last_u_d ? unary_op : postop);
 		/* check for following ++ or -- */
 		unary_delim = false;
 	    }
 	}
 	else if (*buf_ptr == '=')
 	    /* check for operator += */
 	    *e_token++ = *buf_ptr++;
 	else if (*buf_ptr == '>') {
 	    /* check for operator -> */
 	    *e_token++ = *buf_ptr++;
 	    unary_delim = false;
 	    code = unary_op;
 	    state->want_blank = false;
 	}
 	break;			/* buffer overflow will be checked at end of
 				 * switch */

     case '=':
 	if (state->in_or_st)
 	    state->block_init = 1;
 #ifdef undef
 	if (chartype[*buf_ptr & 127] == opchar) {	/* we have two char assignment */
 	    e_token[-1] = *buf_ptr++;
 	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
 		*e_token++ = *buf_ptr++;
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    *e_token = 0;
 	}
 #else
 	if (*buf_ptr == '=') {/* == */
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    buf_ptr++;
 	    *e_token = 0;
 	}
 #endif
 	code = binary_op;
 	unary_delim = true;
 	break;
 	/* can drop thru!!! */

     case '>':
     case '<':
     case '!':			/* ops like <, <<, <=, !=, etc */
 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	if (*buf_ptr == '=')
 	    *e_token++ = *buf_ptr++;
 	code = (state->last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 	break;

     case '*':
 	unary_delim = true;
 	if (!state->last_u_d) {
 	    if (*buf_ptr == '=')
 		*e_token++ = *buf_ptr++;
 	    code = binary_op;
 	    break;
 	}
 	while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
 	    if (*buf_ptr == '*') {
 		CHECK_SIZE_TOKEN(1);
 		*e_token++ = *buf_ptr;
 	    }
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	code = unary_op;
 	break;

     default:
 	if (token[0] == '/' && *buf_ptr == '*') {
 	    /* it is start of comment */
 	    *e_token++ = '*';

 	    if (++buf_ptr >= buf_end)
 		fill_buffer();

 	    code = comment;
 	    unary_delim = state->last_u_d;
 	    break;
 	}
 	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
 	    /*
 	     * handle ||, &&, etc, and also things as in int *****i
 	     */
 	    CHECK_SIZE_TOKEN(1);
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	code = (state->last_u_d ? unary_op : binary_op);
 	unary_delim = true;


     }				/* end of switch */
     if (buf_ptr >= buf_end)	/* check for input buffer empty */
 	fill_buffer();
     state->last_u_d = unary_delim;
     CHECK_SIZE_TOKEN(1);
     *e_token = '\0';		/* null terminate the token */
     return (code);
 }

 void
 alloc_typenames(void)
 {

     typenames = (const char **)malloc(sizeof(typenames[0]) *
         (typename_count = 16));
     if (typenames == NULL)
 	err(1, NULL);
 }

 void
 add_typename(const char *key)
 {
     int comparison;
     const char *copy;

     if (typename_top + 1 >= typename_count) {
 	typenames = realloc((void *)typenames,
 	    sizeof(typenames[0]) * (typename_count *= 2));
 	if (typenames == NULL)
 	    err(1, NULL);
     }
     if (typename_top == -1)
 	typenames[++typename_top] = copy = strdup(key);
     else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
 	/* take advantage of sorted input */
 	if (comparison == 0)	/* remove duplicates */
 	    return;
 	typenames[++typename_top] = copy = strdup(key);
     }
     else {
 	int p;

 	for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
 	    /* find place for the new key */;
 	if (comparison == 0)	/* remove duplicates */
 	    return;
 	memmove(&typenames[p + 1], &typenames[p],
 	    sizeof(typenames[0]) * (++typename_top - p));
 	typenames[p] = copy = strdup(key);
     }

     if (copy == NULL)
 	err(1, NULL);
 }
	/*-
	* Copyright (c) 1985 Sun Microsystems, Inc.
	* Copyright (c) 1980, 1993
	* The Regents of the University of California. All rights reserved.
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	#if 0
	#ifndef lint
	static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
	#endif /* not lint */
	#endif

	#include "c.h"

	/*
	* Here we have the token scanner for indent. It scans off one token and puts
	* it in the global variable "token". It returns a code, indicating the type
	* of token scanned.
	*/

	#include <err.h>
	#include <stdio.h>
	#include <ctype.h>
	#include <stdlib.h>
	#include <string.h>
	#include "indent_globs.h"
	#include "indent_codes.h"
	#include "indent.h"

	#define alphanum 1
	#ifdef undef
	#define opchar 3
	#endif

	struct templ {
	const char *rwd;
	int rwcode;
	};

	/*
	* This table has to be sorted alphabetically, because it'll be used in binary
	* search. For the same reason, string must be the first thing in struct templ.
	*/
	struct templ specials[] =
	{
	{"_Bool", 4},
	{"_Complex", 4},
	{"_Imaginary", 4},
	{"auto", 10},
	{"bool", 4},
	{"break", 9},
	{"case", 8},
	{"char", 4},
	{"complex", 4},
	{"const", 4},
	{"continue", 12},
	{"default", 8},
	{"do", 6},
	{"double", 4},
	{"else", 6},
	{"enum", 3},
	{"extern", 10},
	{"float", 4},
	{"for", 5},
	{"global", 4},
	{"goto", 9},
	{"if", 5},
	{"imaginary", 4},
	{"inline", 12},
	{"int", 4},
	{"long", 4},
	{"offsetof", 1},
	{"register", 10},
	{"restrict", 12},
	{"return", 9},
	{"short", 4},
	{"signed", 4},
	{"sizeof", 2},
	{"static", 10},
	{"struct", 3},
	{"switch", 7},
	{"typedef", 11},
	{"union", 3},
	{"unsigned", 4},
	{"void", 4},
	{"volatile", 4},
	{"while", 5}
	};

	const char **typenames;
	int typename_count;
	int typename_top = -1;

	char chartype[128] =
	{ /* this is used to facilitate the decision of
	* what type (alphanumeric, operator) each
	* character is */
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 3, 0, 0, 1, 3, 3, 0,
	0, 0, 3, 3, 0, 3, 0, 3,
	1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 0, 0, 3, 3, 3, 3,
	0, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 0, 0, 0, 3, 1,
	0, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 0, 3, 0, 3, 0
	};

	static int
	strcmp_type(const void e1, const void e2)
	{
	return (strcmp(e1, (const char const *)e2));
	}

	/*
	* Decide whether "foo(..." is a function definition or declaration.
	*
	* At call, we are looking at the '('. Look ahead to find the first
	* '{', ';' or ',' that is not within parentheses or comments; then
	* it's a definition if we found '{', otherwise a declaration.
	* Note that this rule is fooled by K&R-style parameter declarations,
	* but telling the difference between those and function attributes
	* seems like more trouble than it's worth. This code could also be
	* fooled by mismatched parens or apparent comment starts within string
	* literals, but that seems unlikely in the context it's used in.
	*/
	static int
	is_func_definition(char *tp)
	{
	int paren_depth = 0;
	int in_comment = false;
	int in_slash_comment = false;
	int lastc = 0;

	/* We may need to look past the end of the current buffer. */
	lookahead_reset();
	for (;;) {
	int c;

	/* Fetch next character. */
	if (tp < buf_end)
	c = *tp++;
	else {
	c = lookahead();
	if (c == EOF)
	break;
	}
	/* Handle comments. */
	if (in_comment) {
	if (lastc == '*' && c == '/')
	in_comment = false;
	} else if (lastc == '/' && c == '*' && !in_slash_comment)
	in_comment = true;
	else if (in_slash_comment) {
	if (c == '\n')
	in_slash_comment = false;
	} else if (lastc == '/' && c == '/')
	in_slash_comment = true;
	/* Count nested parens properly. */
	else if (c == '(')
	paren_depth++;
	else if (c == ')') {
	paren_depth--;
	/*
	* If we find unbalanced parens, we must have started inside a
	* declaration.
	*/
	if (paren_depth < 0)
	return false;
	} else if (paren_depth == 0) {
	/* We are outside any parentheses or comments. */
	if (c == '{')
	return true;
	else if (c == ';' \|\| c == ',')
	return false;
	}
	lastc = c;
	}
	/* Hit EOF --- for lack of anything better, assume "not a definition". */
	return false;
	}

	int
	lexi(struct parser_state *state)
	{
	int unary_delim; /* this is set to 1 if the current token
	* forces a following operator to be unary */
	int code; /* internal code to be returned */
	char qchar; /* the delimiter character for a string */

	e_token = s_token; /* point to start of place to save token */
	unary_delim = false;
	state->col_1 = state->last_nl; /* tell world that this token started
	* in column 1 iff the last thing
	* scanned was a newline */
	state->last_nl = false;

	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
	state->col_1 = false; /* leading blanks imply token is not in column
	* 1 */
	if (++buf_ptr >= buf_end)
	fill_buffer();
	}

	/* Scan an alphanumeric token */
	if (chartype[*buf_ptr & 127] == alphanum \|\|
	(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
	/*
	* we have a character or number
	*/
	struct templ *p;

	if (isdigit((unsigned char)*buf_ptr) \|\|
	(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
	int seendot = 0,
	seenexp = 0,
	seensfx = 0;

	/*
	* base 2, base 8, base 16:
	*/
	if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
	int len;

	if (buf_ptr[1] == 'b' \|\| buf_ptr[1] == 'B')
	len = strspn(buf_ptr + 2, "01") + 2;
	else if (buf_ptr[1] == 'x' \|\| buf_ptr[1] == 'X')
	len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
	else
	len = strspn(buf_ptr + 1, "012345678") + 1;
	if (len > 0) {
	CHECK_SIZE_TOKEN(len);
	memcpy(e_token, buf_ptr, len);
	e_token += len;
	buf_ptr += len;
	}
	else
	diag2(1, "Unterminated literal");
	}
	else /* base 10: */
	while (1) {
	if (*buf_ptr == '.') {
	if (seendot)
	break;
	else
	seendot++;
	}
	CHECK_SIZE_TOKEN(3);
	e_token++ = buf_ptr++;
	if (!isdigit((unsigned char)buf_ptr) && buf_ptr != '.') {
	if ((buf_ptr != 'E' && buf_ptr != 'e') \|\| seenexp)
	break;
	else {
	seenexp++;
	seendot++;
	e_token++ = buf_ptr++;
	if (buf_ptr == '+' \|\| buf_ptr == '-')
	e_token++ = buf_ptr++;
	}
	}
	}

	while (1) {
	CHECK_SIZE_TOKEN(2);
	if (!(seensfx & 1) && (buf_ptr == 'U' \|\| buf_ptr == 'u')) {
	e_token++ = buf_ptr++;
	seensfx \|= 1;
	continue;
	}
	if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
	if (buf_ptr[1] == buf_ptr[0])
	e_token++ = buf_ptr++;
	e_token++ = buf_ptr++;
	seensfx \|= 2;
	continue;
	}
	break;
	}
	}
	else
	while (chartype[buf_ptr & 127] == alphanum \|\| buf_ptr == BACKSLASH) {
	/* fill_buffer() terminates buffer with newline */
	if (*buf_ptr == BACKSLASH) {
	if (*(buf_ptr + 1) == '\n') {
	buf_ptr += 2;
	if (buf_ptr >= buf_end)
	fill_buffer();
	} else
	break;
	}
	CHECK_SIZE_TOKEN(1);
	/* copy it over */
	e_token++ = buf_ptr++;
	if (buf_ptr >= buf_end)
	fill_buffer();
	}
	*e_token = '\0';

	if (s_token[0] == 'L' && s_token[1] == '\0' &&
	(buf_ptr == '"' \|\| buf_ptr == '\''))
	return (strpfx);

	while (buf_ptr == ' ' \|\| buf_ptr == '\t') { /* get rid of blanks */
	if (++buf_ptr >= buf_end)
	fill_buffer();
	}
	state->keyword = 0;
	if (state->last_token == structure && !state->p_l_follow) {
	/* if last token was 'struct' and we're not
	* in parentheses, then this token
	* should be treated as a declaration */
	state->last_u_d = true;
	return (decl);
	}
	/*
	* Operator after identifier is binary unless last token was 'struct'
	*/
	state->last_u_d = (state->last_token == structure);

	p = bsearch(s_token,
	specials,
	sizeof(specials) / sizeof(specials[0]),
	sizeof(specials[0]),
	strcmp_type);
	if (p == NULL) { /* not a special keyword... */
	char *u;

	/* ... so maybe a type_t or a typedef */
	if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
	strcmp(u, "_t") == 0) \|\| (typename_top >= 0 &&
	bsearch(s_token, typenames, typename_top + 1,
	sizeof(typenames[0]), strcmp_type))) {
	state->keyword = 4; /* a type name */
	state->last_u_d = true;
	goto found_typename;
	}
	} else { /* we have a keyword */
	state->keyword = p->rwcode;
	state->last_u_d = true;
	switch (p->rwcode) {
	case 7: /* it is a switch */
	return (swstmt);
	case 8: /* a case or default */
	return (casestmt);

	case 3: /* a "struct" */
	/* FALLTHROUGH */
	case 4: /* one of the declaration keywords */
	found_typename:
	if (state->p_l_follow) {
	/* inside parens: cast, param list, offsetof or sizeof */
	state->cast_mask \|= (1 << state->p_l_follow) & ~state->not_cast_mask;
	}
	if (state->last_token == period \|\| state->last_token == unary_op) {
	state->keyword = 0;
	break;
	}
	if (p != NULL && p->rwcode == 3)
	return (structure);
	if (state->p_l_follow)
	break;
	return (decl);

	case 5: /* if, while, for */
	return (sp_paren);

	case 6: /* do, else */
	return (sp_nparen);

	case 10: /* storage class specifier */
	return (storage);

	case 11: /* typedef */
	return (type_def);

	default: /* all others are treated like any other
	* identifier */
	return (ident);
	} /* end of switch */
	} /* end of if (found_it) */
	if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
	state->in_parameter_declaration == 0 && state->block_init == 0) {
	if (is_func_definition(buf_ptr)) {
	strncpy(state->procname, token, sizeof state->procname - 1);
	if (state->in_decl)
	state->in_parameter_declaration = 1;
	return (funcname);
	}
	}
	/*
	* The following hack attempts to guess whether or not the current
	* token is in fact a declaration keyword -- one that has been
	* typedefd
	*/
	else if (!state->p_l_follow && !state->block_init &&
	!state->in_stmt &&
	((buf_ptr == '' && buf_ptr[1] != '=') \|\|
	isalpha((unsigned char)*buf_ptr)) &&
	(state->last_token == semicolon \|\| state->last_token == lbrace \|\|
	state->last_token == rbrace)) {
	state->keyword = 4; /* a type name */
	state->last_u_d = true;
	return decl;
	}
	if (state->last_token == decl) /* if this is a declared variable,
	* then following sign is unary */
	state->last_u_d = true; /* will make "int a -1" work */
	return (ident); /* the ident is not in the list */
	} /* end of processing for alphanum character */

	/* Scan a non-alphanumeric token */

	CHECK_SIZE_TOKEN(3); /* things like "<<=" */
	e_token++ = buf_ptr; /* if it is only a one-character token, it is
	* moved here */
	*e_token = '\0';
	if (++buf_ptr >= buf_end)
	fill_buffer();

	switch (*token) {
	case '\n':
	unary_delim = state->last_u_d;
	state->last_nl = true; /* remember that we just had a newline */
	code = (had_eof ? 0 : newline);

	/*
	* if data has been exhausted, the newline is a dummy, and we should
	* return code to stop
	*/
	break;

	case '\'': /* start of quoted character */
	case '"': /* start of string */
	qchar = *token;
	do { /* copy the string */
	while (1) { /* move one character or [/<char>]<char> */
	if (*buf_ptr == '\n') {
	diag2(1, "Unterminated literal");
	goto stop_lit;
	}
	CHECK_SIZE_TOKEN(2);
	e_token = buf_ptr++;
	if (buf_ptr >= buf_end)
	fill_buffer();
	if (e_token == BACKSLASH) { / if escape, copy extra char */
	if (buf_ptr == '\n') / check for escaped newline */
	++line_no;
	++e_token = buf_ptr++;
	++e_token; /* we must increment this again because we
	* copied two chars */
	if (buf_ptr >= buf_end)
	fill_buffer();
	}
	else
	break; /* we copied one character */
	} /* end of while (1) */
	} while (*e_token++ != qchar);
	stop_lit:
	code = ident;
	break;

	case ('('):
	case ('['):
	unary_delim = true;
	code = lparen;
	break;

	case (')'):
	case (']'):
	code = rparen;
	break;

	case '#':
	unary_delim = state->last_u_d;
	code = preesc;
	break;

	case '?':
	unary_delim = true;
	code = question;
	break;

	case (':'):
	code = colon;
	unary_delim = true;
	break;

	case (';'):
	unary_delim = true;
	code = semicolon;
	break;

	case ('{'):
	unary_delim = true;

	/*
	* if (state->in_or_st) state->block_init = 1;
	*/
	/* ? code = state->block_init ? lparen : lbrace; */
	code = lbrace;
	break;

	case ('}'):
	unary_delim = true;
	/* ? code = state->block_init ? rparen : rbrace; */
	code = rbrace;
	break;

	case 014: /* a form feed */
	unary_delim = state->last_u_d;
	state->last_nl = true; /* remember this so we can set 'state->col_1'
	* right */
	code = form_feed;
	break;

	case (','):
	unary_delim = true;
	code = comma;
	break;

	case '.':
	unary_delim = false;
	code = period;
	break;

	case '-':
	case '+': /* check for -, +, --, ++ */
	code = (state->last_u_d ? unary_op : binary_op);
	unary_delim = true;

	if (*buf_ptr == token[0]) {
	/* check for doubled character */
	e_token++ = buf_ptr++;
	/* buffer overflow will be checked at end of loop */
	if (state->last_token == ident \|\| state->last_token == rparen) {
	code = (state->last_u_d ? unary_op : postop);
	/* check for following ++ or -- */
	unary_delim = false;
	}
	}
	else if (*buf_ptr == '=')
	/* check for operator += */
	e_token++ = buf_ptr++;
	else if (*buf_ptr == '>') {
	/* check for operator -> */
	e_token++ = buf_ptr++;
	unary_delim = false;
	code = unary_op;
	state->want_blank = false;
	}
	break; /* buffer overflow will be checked at end of
	* switch */

	case '=':
	if (state->in_or_st)
	state->block_init = 1;
	#ifdef undef
	if (chartype[buf_ptr & 127] == opchar) { / we have two char assignment */
	e_token[-1] = *buf_ptr++;
	if ((e_token[-1] == '<' \|\| e_token[-1] == '>') && e_token[-1] == *buf_ptr)
	e_token++ = buf_ptr++;
	e_token++ = '='; / Flip =+ to += */
	*e_token = 0;
	}
	#else
	if (buf_ptr == '=') {/ == */
	e_token++ = '='; / Flip =+ to += */
	buf_ptr++;
	*e_token = 0;
	}
	#endif
	code = binary_op;
	unary_delim = true;
	break;
	/* can drop thru!!! */

	case '>':
	case '<':
	case '!': /* ops like <, <<, <=, !=, etc */
	if (buf_ptr == '>' \|\| buf_ptr == '<' \|\| *buf_ptr == '=') {
	e_token++ = buf_ptr;
	if (++buf_ptr >= buf_end)
	fill_buffer();
	}
	if (*buf_ptr == '=')
	e_token++ = buf_ptr++;
	code = (state->last_u_d ? unary_op : binary_op);
	unary_delim = true;
	break;

	case '*':
	unary_delim = true;
	if (!state->last_u_d) {
	if (*buf_ptr == '=')
	e_token++ = buf_ptr++;
	code = binary_op;
	break;
	}
	while (buf_ptr == '' \|\| isspace((unsigned char)*buf_ptr)) {
	if (buf_ptr == '') {
	CHECK_SIZE_TOKEN(1);
	e_token++ = buf_ptr;
	}
	if (++buf_ptr >= buf_end)
	fill_buffer();
	}
	code = unary_op;
	break;

	default:
	if (token[0] == '/' && buf_ptr == '') {
	/* it is start of comment */
	e_token++ = '';

	if (++buf_ptr >= buf_end)
	fill_buffer();

	code = comment;
	unary_delim = state->last_u_d;
	break;
	}
	while ((e_token - 1) == buf_ptr \|\| *buf_ptr == '=') {
	/*
	* handle \|\|, &&, etc, and also things as in int *****i
	*/
	CHECK_SIZE_TOKEN(1);
	e_token++ = buf_ptr;
	if (++buf_ptr >= buf_end)
	fill_buffer();
	}
	code = (state->last_u_d ? unary_op : binary_op);
	unary_delim = true;


	} /* end of switch */
	if (buf_ptr >= buf_end) /* check for input buffer empty */
	fill_buffer();
	state->last_u_d = unary_delim;
	CHECK_SIZE_TOKEN(1);
	e_token = '\0'; / null terminate the token */
	return (code);
	}

	void
	alloc_typenames(void)
	{

	typenames = (const char *)malloc(sizeof(typenames[0])
	(typename_count = 16));
	if (typenames == NULL)
	err(1, NULL);
	}

	void
	add_typename(const char *key)
	{
	int comparison;
	const char *copy;

	if (typename_top + 1 >= typename_count) {
	typenames = realloc((void *)typenames,
	sizeof(typenames[0]) * (typename_count *= 2));
	if (typenames == NULL)
	err(1, NULL);
	}
	if (typename_top == -1)
	typenames[++typename_top] = copy = strdup(key);
	else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
	/* take advantage of sorted input */
	if (comparison == 0) /* remove duplicates */
	return;
	typenames[++typename_top] = copy = strdup(key);
	}
	else {
	int p;

	for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
	/* find place for the new key */;
	if (comparison == 0) /* remove duplicates */
	return;
	memmove(&typenames[p + 1], &typenames[p],
	sizeof(typenames[0]) * (++typename_top - p));
	typenames[p] = copy = strdup(key);
	}

	if (copy == NULL)
	err(1, NULL);
	}