mirror of
				git://git.code.sf.net/p/zsh/code
				synced 2025-10-30 17:50:58 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			2204 lines
		
	
	
	
		
			48 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			2204 lines
		
	
	
	
		
			48 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * lex.c - lexical analysis
 | |
|  *
 | |
|  * This file is part of zsh, the Z shell.
 | |
|  *
 | |
|  * Copyright (c) 1992-1997 Paul Falstad
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Permission is hereby granted, without written agreement and without
 | |
|  * license or royalty fees, to use, copy, modify, and distribute this
 | |
|  * software and to distribute modified versions of this software for any
 | |
|  * purpose, provided that the above copyright notice and the following
 | |
|  * two paragraphs appear in all copies of this software.
 | |
|  *
 | |
|  * In no event shall Paul Falstad or the Zsh Development Group be liable
 | |
|  * to any party for direct, indirect, special, incidental, or consequential
 | |
|  * damages arising out of the use of this software and its documentation,
 | |
|  * even if Paul Falstad and the Zsh Development Group have been advised of
 | |
|  * the possibility of such damage.
 | |
|  *
 | |
|  * Paul Falstad and the Zsh Development Group specifically disclaim any
 | |
|  * warranties, including, but not limited to, the implied warranties of
 | |
|  * merchantability and fitness for a particular purpose.  The software
 | |
|  * provided hereunder is on an "as is" basis, and Paul Falstad and the
 | |
|  * Zsh Development Group have no obligation to provide maintenance,
 | |
|  * support, updates, enhancements, or modifications.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include "zsh.mdh"
 | |
| #include "lex.pro"
 | |
| 
 | |
| #define LEX_HEAP_SIZE (32)
 | |
| 
 | |
| /* tokens */
 | |
| 
 | |
| /**/
 | |
| mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
 | |
| 
 | |
| /* parts of the current token */
 | |
| 
 | |
| /**/
 | |
| char *zshlextext;
 | |
| /**/
 | |
| mod_export char *tokstr;
 | |
| /**/
 | |
| mod_export enum lextok tok;
 | |
| /**/
 | |
| mod_export int tokfd;
 | |
| 
 | |
| /*
 | |
|  * Line number at which the first character of a token was found.
 | |
|  * We always set this in gettok(), which is always called from
 | |
|  * zshlex() unless we have reached an error.  So it is always
 | |
|  * valid when parsing.  It is not useful during execution
 | |
|  * of the parsed structure.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| zlong toklineno;
 | |
| 
 | |
| /* lexical analyzer error flag */
 | |
|  
 | |
| /**/
 | |
| mod_export int lexstop;
 | |
| 
 | |
| /* if != 0, this is the first line of the command */
 | |
|  
 | |
| /**/
 | |
| mod_export int isfirstln;
 | |
|  
 | |
| /* if != 0, this is the first char of the command (not including white space) */
 | |
|  
 | |
| /**/
 | |
| int isfirstch;
 | |
| 
 | |
| /* flag that an alias should be expanded after expansion ending in space */
 | |
| 
 | |
| /**/
 | |
| int inalmore;
 | |
| 
 | |
| /*
 | |
|  * Don't do spelling correction.
 | |
|  * Bit 1 is only valid for the current word.  It's
 | |
|  * set when we detect a lookahead that stops the word from
 | |
|  * needing correction.
 | |
|  */
 | |
|  
 | |
| /**/
 | |
| int nocorrect;
 | |
| 
 | |
| /*
 | |
|  * TBD: the following exported variables are part of the non-interface
 | |
|  * with ZLE for completion.  They are poorly named and the whole
 | |
|  * scheme is incredibly brittle.  One piece of robustness is applied:
 | |
|  * the variables are only set if LEXFLAGS_ZLE is set.  Improvements
 | |
|  * should therefore concentrate on areas with this flag set.
 | |
|  *
 | |
|  * Cursor position and line length in zle when the line is
 | |
|  * metafied for access from the main shell.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int zlemetacs, zlemetall;
 | |
| 
 | |
| /* inwhat says what exactly we are in     *
 | |
|  * (its value is one of the IN_* things). */
 | |
| 
 | |
| /**/
 | |
| mod_export int inwhat;
 | |
| 
 | |
| /* 1 if x added to complete in a blank between words */
 | |
| 
 | |
| /**/
 | |
| mod_export int addedx;
 | |
| 
 | |
| /* wb and we hold the beginning/end position of the word we are completing. */
 | |
| 
 | |
| /**/
 | |
| mod_export int wb, we;
 | |
| 
 | |
| /**/
 | |
| mod_export int wordbeg;
 | |
| 
 | |
| /**/
 | |
| mod_export int parbegin;
 | |
| 
 | |
| /**/
 | |
| mod_export int parend;
 | |
| 
 | |
| 
 | |
| /* 1 if aliases should not be expanded */
 | |
| 
 | |
| /**/
 | |
| mod_export int noaliases;
 | |
| 
 | |
| /*
 | |
|  * If non-zero, we are parsing a line sent to use by the editor, or some
 | |
|  * other string that's not part of standard command input (e.g. eval is
 | |
|  * part of normal command input).
 | |
|  *
 | |
|  * Set of bits from LEXFLAGS_*.
 | |
|  *
 | |
|  * Note that although it is passed into the lexer as an input, the
 | |
|  * lexer can set it to zero after finding the word it's searching for.
 | |
|  * This only happens if the line being parsed actually does come from
 | |
|  * ZLE, and hence the bit LEXFLAGS_ZLE is set.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int lexflags;
 | |
| 
 | |
| /* don't recognize comments */
 | |
| 
 | |
| /**/
 | |
| mod_export int nocomments;
 | |
| 
 | |
| /* add raw input characters while parsing command substitution */
 | |
| 
 | |
| /**/
 | |
| int lex_add_raw;
 | |
| 
 | |
| /* variables associated with the above */
 | |
| 
 | |
| static char *tokstr_raw;
 | |
| static struct lexbufstate lexbuf_raw;
 | |
| 
 | |
| /* text of punctuation tokens */
 | |
| 
 | |
| /**/
 | |
| mod_export char *tokstrings[WHILE + 1] = {
 | |
|     NULL,	/* NULLTOK	  0  */
 | |
|     ";",	/* SEPER	     */
 | |
|     "\\n",	/* NEWLIN	     */
 | |
|     ";",	/* SEMI		     */
 | |
|     ";;",	/* DSEMI	     */
 | |
|     "&",	/* AMPER	  5  */
 | |
|     "(",	/* INPAR	     */
 | |
|     ")",	/* OUTPAR	     */
 | |
|     "||",	/* DBAR		     */
 | |
|     "&&",	/* DAMPER	     */
 | |
|     ">",	/* OUTANG	  10 */
 | |
|     ">|",	/* OUTANGBANG	     */
 | |
|     ">>",	/* DOUTANG	     */
 | |
|     ">>|",	/* DOUTANGBANG	     */
 | |
|     "<",	/* INANG	     */
 | |
|     "<>",	/* INOUTANG	  15 */
 | |
|     "<<",	/* DINANG	     */
 | |
|     "<<-",	/* DINANGDASH	     */
 | |
|     "<&",	/* INANGAMP	     */
 | |
|     ">&",	/* OUTANGAMP	     */
 | |
|     "&>",	/* AMPOUTANG	  20 */
 | |
|     "&>|",	/* OUTANGAMPBANG     */
 | |
|     ">>&",	/* DOUTANGAMP	     */
 | |
|     ">>&|",	/* DOUTANGAMPBANG    */
 | |
|     "<<<",	/* TRINANG	     */
 | |
|     "|",	/* BAR		  25 */
 | |
|     "|&",	/* BARAMP	     */
 | |
|     "()",	/* INOUTPAR	     */
 | |
|     "((",	/* DINPAR	     */
 | |
|     "))",	/* DOUTPAR	     */
 | |
|     "&|",	/* AMPERBANG	  30 */
 | |
|     ";&",	/* SEMIAMP	     */
 | |
|     ";|",	/* SEMIBAR	     */
 | |
| };
 | |
| 
 | |
| /* lexical state */
 | |
| 
 | |
| static int dbparens;
 | |
| static struct lexbufstate lexbuf = { NULL, 256, 0 };
 | |
| 
 | |
| /* save lexical context */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| lex_context_save(struct lex_stack *ls, int toplevel)
 | |
| {
 | |
|     (void)toplevel;
 | |
| 
 | |
|     ls->dbparens = dbparens;
 | |
|     ls->isfirstln = isfirstln;
 | |
|     ls->isfirstch = isfirstch;
 | |
|     ls->lexflags = lexflags;
 | |
| 
 | |
|     ls->tok = tok;
 | |
|     ls->tokstr = tokstr;
 | |
|     ls->zshlextext = zshlextext;
 | |
|     ls->lexbuf = lexbuf;
 | |
|     ls->lex_add_raw = lex_add_raw;
 | |
|     ls->tokstr_raw = tokstr_raw;
 | |
|     ls->lexbuf_raw = lexbuf_raw;
 | |
|     ls->lexstop = lexstop;
 | |
|     ls->toklineno = toklineno;
 | |
| 
 | |
|     tokstr = zshlextext = lexbuf.ptr = NULL;
 | |
|     lexbuf.siz = 256;
 | |
|     tokstr_raw = lexbuf_raw.ptr = NULL;
 | |
|     lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
 | |
| }
 | |
| 
 | |
| /* restore lexical context */
 | |
| 
 | |
| /**/
 | |
| mod_export void
 | |
| lex_context_restore(const struct lex_stack *ls, int toplevel)
 | |
| {
 | |
|     (void)toplevel;
 | |
| 
 | |
|     dbparens = ls->dbparens;
 | |
|     isfirstln = ls->isfirstln;
 | |
|     isfirstch = ls->isfirstch;
 | |
|     lexflags = ls->lexflags;
 | |
|     tok = ls->tok;
 | |
|     tokstr = ls->tokstr;
 | |
|     zshlextext = ls->zshlextext;
 | |
|     lexbuf = ls->lexbuf;
 | |
|     lex_add_raw = ls->lex_add_raw;
 | |
|     tokstr_raw = ls->tokstr_raw;
 | |
|     lexbuf_raw = ls->lexbuf_raw;
 | |
|     lexstop = ls->lexstop;
 | |
|     toklineno = ls->toklineno;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| void
 | |
| zshlex(void)
 | |
| {
 | |
|     if (tok == LEXERR)
 | |
| 	return;
 | |
|     do {
 | |
| 	if (inrepeat_)
 | |
| 	    ++inrepeat_;
 | |
| 	if (inrepeat_ == 3 && isset(SHORTLOOPS))
 | |
| 	    incmdpos = 1;
 | |
| 	tok = gettok();
 | |
|     } while (tok != ENDINPUT && exalias());
 | |
|     nocorrect &= 1;
 | |
|     if (tok == NEWLIN || tok == ENDINPUT) {
 | |
| 	while (hdocs) {
 | |
| 	    struct heredocs *next = hdocs->next;
 | |
| 	    char *doc, *munged_term;
 | |
| 
 | |
| 	    hwbegin(0);
 | |
| 	    cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
 | |
| 	    munged_term = dupstring(hdocs->str);
 | |
| 	    STOPHIST
 | |
| 	    doc = gethere(&munged_term, hdocs->type);
 | |
| 	    ALLOWHIST
 | |
| 	    cmdpop();
 | |
| 	    hwend();
 | |
| 	    if (!doc) {
 | |
| 		zerr("here document too large");
 | |
| 		while (hdocs) {
 | |
| 		    next = hdocs->next;
 | |
| 		    zfree(hdocs, sizeof(struct heredocs));
 | |
| 		    hdocs = next;
 | |
| 		}
 | |
| 		tok = LEXERR;
 | |
| 		break;
 | |
| 	    }
 | |
| 	    setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
 | |
| 		       munged_term);
 | |
| 	    zfree(hdocs, sizeof(struct heredocs));
 | |
| 	    hdocs = next;
 | |
| 	}
 | |
|     }
 | |
|     if (tok != NEWLIN)
 | |
| 	isnewlin = 0;
 | |
|     else
 | |
| 	isnewlin = (inbufct) ? -1 : 1;
 | |
|     if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
 | |
| 	tok = SEPER;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| mod_export void
 | |
| ctxtlex(void)
 | |
| {
 | |
|     static int oldpos;
 | |
| 
 | |
|     zshlex();
 | |
|     switch (tok) {
 | |
|     case SEPER:
 | |
|     case NEWLIN:
 | |
|     case SEMI:
 | |
|     case DSEMI:
 | |
|     case SEMIAMP:
 | |
|     case SEMIBAR:
 | |
|     case AMPER:
 | |
|     case AMPERBANG:
 | |
|     case INPAR:
 | |
|     case INBRACE:
 | |
|     case DBAR:
 | |
|     case DAMPER:
 | |
|     case BAR:
 | |
|     case BARAMP:
 | |
|     case INOUTPAR:
 | |
|     case DOLOOP:
 | |
|     case THEN:
 | |
|     case ELIF:
 | |
|     case ELSE:
 | |
|     case DOUTBRACK:
 | |
| 	incmdpos = 1;
 | |
| 	break;
 | |
|     case STRING:
 | |
|     case TYPESET:
 | |
|  /* case ENVSTRING: */
 | |
|     case ENVARRAY:
 | |
|     case OUTPAR:
 | |
|     case CASE:
 | |
|     case DINBRACK:
 | |
| 	incmdpos = 0;
 | |
| 	break;
 | |
| 
 | |
|     default:
 | |
| 	/* nothing to do, keep compiler happy */
 | |
| 	break;
 | |
|     }
 | |
|     if (tok != DINPAR)
 | |
| 	infor = tok == FOR ? 2 : 0;
 | |
|     if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
 | |
| 	inredir = 1;
 | |
| 	oldpos = incmdpos;
 | |
| 	incmdpos = 0;
 | |
|     } else if (inredir) {
 | |
| 	incmdpos = oldpos;
 | |
| 	inredir = 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| #define LX1_BKSLASH 0
 | |
| #define LX1_COMMENT 1
 | |
| #define LX1_NEWLIN 2
 | |
| #define LX1_SEMI 3
 | |
| #define LX1_AMPER 5
 | |
| #define LX1_BAR 6
 | |
| #define LX1_INPAR 7
 | |
| #define LX1_OUTPAR 8
 | |
| #define LX1_INANG 13
 | |
| #define LX1_OUTANG 14
 | |
| #define LX1_OTHER 15
 | |
| 
 | |
| #define LX2_BREAK 0
 | |
| #define LX2_OUTPAR 1
 | |
| #define LX2_BAR 2
 | |
| #define LX2_STRING 3
 | |
| #define LX2_INBRACK 4
 | |
| #define LX2_OUTBRACK 5
 | |
| #define LX2_TILDE 6
 | |
| #define LX2_INPAR 7
 | |
| #define LX2_INBRACE 8
 | |
| #define LX2_OUTBRACE 9
 | |
| #define LX2_OUTANG 10
 | |
| #define LX2_INANG 11
 | |
| #define LX2_EQUALS 12
 | |
| #define LX2_BKSLASH 13
 | |
| #define LX2_QUOTE 14
 | |
| #define LX2_DQUOTE 15
 | |
| #define LX2_BQUOTE 16
 | |
| #define LX2_COMMA 17
 | |
| #define LX2_DASH 18
 | |
| #define LX2_BANG 19
 | |
| #define LX2_OTHER 20
 | |
| #define LX2_META 21
 | |
| 
 | |
| static unsigned char lexact1[256], lexact2[256], lextok2[256];
 | |
| 
 | |
| /**/
 | |
| void
 | |
| initlextabs(void)
 | |
| {
 | |
|     int t0;
 | |
|     static char *lx1 = "\\q\n;!&|(){}[]<>";
 | |
|     static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
 | |
| 
 | |
|     for (t0 = 0; t0 != 256; t0++) {
 | |
|        lexact1[t0] = LX1_OTHER;
 | |
| 	lexact2[t0] = LX2_OTHER;
 | |
| 	lextok2[t0] = t0;
 | |
|     }
 | |
|     for (t0 = 0; lx1[t0]; t0++)
 | |
| 	lexact1[(int)lx1[t0]] = t0;
 | |
|     for (t0 = 0; lx2[t0]; t0++)
 | |
| 	lexact2[(int)lx2[t0]] = t0;
 | |
|     lexact2['&'] = LX2_BREAK;
 | |
|     lexact2[STOUC(Meta)] = LX2_META;
 | |
|     lextok2['*'] = Star;
 | |
|     lextok2['?'] = Quest;
 | |
|     lextok2['{'] = Inbrace;
 | |
|     lextok2['['] = Inbrack;
 | |
|     lextok2['$'] = String;
 | |
|     lextok2['~'] = Tilde;
 | |
|     lextok2['#'] = Pound;
 | |
|     lextok2['^'] = Hat;
 | |
| }
 | |
| 
 | |
| /* initialize lexical state */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| lexinit(void)
 | |
| {
 | |
|     nocorrect = dbparens = lexstop = 0;
 | |
|     tok = ENDINPUT;
 | |
| }
 | |
| 
 | |
| /* add a char to the string buffer */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| add(int c)
 | |
| {
 | |
|     *lexbuf.ptr++ = c;
 | |
|     if (lexbuf.siz == ++lexbuf.len) {
 | |
| 	int newbsiz = lexbuf.siz * 2;
 | |
| 
 | |
| 	if (newbsiz > inbufct && inbufct > lexbuf.siz)
 | |
| 	    newbsiz = inbufct;
 | |
| 
 | |
| 	tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
 | |
| 	lexbuf.ptr = tokstr + lexbuf.len;
 | |
| 	/* len == bsiz, so bptr is at the start of newly allocated memory */
 | |
| 	memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
 | |
| 	lexbuf.siz = newbsiz;
 | |
|     }
 | |
| }
 | |
| 
 | |
| #define SETPARBEGIN {							\
 | |
| 	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) &&	\
 | |
| 	    zlemetacs >= zlemetall+1-inbufct)				\
 | |
| 	    parbegin = inbufct;		      \
 | |
|     }
 | |
| #define SETPAREND {						      \
 | |
| 	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
 | |
| 	    parbegin != -1 && parend == -1) {			      \
 | |
| 	    if (zlemetacs >= zlemetall + 1 - inbufct)		      \
 | |
| 		parbegin = -1;					      \
 | |
| 	    else						      \
 | |
| 		parend = inbufct;				      \
 | |
| 	}							      \
 | |
|     }
 | |
| 
 | |
| enum {
 | |
|     CMD_OR_MATH_CMD,
 | |
|     CMD_OR_MATH_MATH,
 | |
|     CMD_OR_MATH_ERR
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Return one of the above.  If it couldn't be
 | |
|  * parsed as math, but there was no gross error, it's a command.
 | |
|  */
 | |
| 
 | |
| static int
 | |
| cmd_or_math(int cs_type)
 | |
| {
 | |
|     int oldlen = lexbuf.len;
 | |
|     int c;
 | |
|     int oinflags = inbufflags;
 | |
| 
 | |
|     cmdpush(cs_type);
 | |
|     inbufflags |= INP_APPEND;
 | |
|     c = dquote_parse(')', 0);
 | |
|     if (!(oinflags & INP_APPEND))
 | |
| 	inbufflags &= ~INP_APPEND;
 | |
|     cmdpop();
 | |
|     *lexbuf.ptr = '\0';
 | |
|     if (!c) {
 | |
| 	/* Successfully parsed, see if it was math */
 | |
| 	c = hgetc();
 | |
| 	if (c == ')')
 | |
| 	    return CMD_OR_MATH_MATH; /* yes */
 | |
| 	hungetc(c);
 | |
| 	lexstop = 0;
 | |
| 	c = ')';
 | |
|     } else if (lexstop) {
 | |
| 	/* we haven't got anything to unget */
 | |
| 	return CMD_OR_MATH_ERR;
 | |
|     }
 | |
|     /* else unsuccessful: unget the whole thing */
 | |
|     hungetc(c);
 | |
|     lexstop = 0;
 | |
|     while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) {
 | |
| 	lexbuf.len--;
 | |
| 	hungetc(itok(*--lexbuf.ptr) ?
 | |
| 		ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
 | |
|     }
 | |
|     if (errflag)
 | |
| 	return CMD_OR_MATH_ERR;
 | |
|     hungetc('(');
 | |
|     return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Parse either a $(( ... )) or a $(...)
 | |
|  * Return the same as cmd_or_math().
 | |
|  */
 | |
| static int
 | |
| cmd_or_math_sub(void)
 | |
| {
 | |
|     int c = hgetc(), ret;
 | |
| 
 | |
|     if (c == '(') {
 | |
| 	int lexpos = (int)(lexbuf.ptr - tokstr);
 | |
| 	add(Inpar);
 | |
| 	add('(');
 | |
| 	if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
 | |
| 	    tokstr[lexpos] = Inparmath;
 | |
| 	    add(')');
 | |
| 	    return CMD_OR_MATH_MATH;
 | |
| 	}
 | |
| 	if (ret == CMD_OR_MATH_ERR)
 | |
| 	    return CMD_OR_MATH_ERR;
 | |
| 	lexbuf.ptr -= 2;
 | |
| 	lexbuf.len -= 2;
 | |
|     } else {
 | |
| 	hungetc(c);
 | |
| 	lexstop = 0;
 | |
|     }
 | |
|     return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
 | |
| }
 | |
| 
 | |
| /* Check whether we're looking at valid numeric globbing syntax      *
 | |
|  * (/\<[0-9]*-[0-9]*\>/).  Call pointing just after the opening "<". *
 | |
|  * Leaves the input in the same place, returning 0 or 1.             */
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| isnumglob(void)
 | |
| {
 | |
|     int c, ec = '-', ret = 0;
 | |
|     int tbs = 256, n = 0;
 | |
|     char *tbuf = (char *)zalloc(tbs);
 | |
| 
 | |
|     while(1) {
 | |
| 	c = hgetc();
 | |
| 	if(lexstop) {
 | |
| 	    lexstop = 0;
 | |
| 	    break;
 | |
| 	}
 | |
| 	tbuf[n++] = c;
 | |
| 	if(!idigit(c)) {
 | |
| 	    if(c != ec)
 | |
| 		break;
 | |
| 	    if(ec == '>') {
 | |
| 		ret = 1;
 | |
| 		break;
 | |
| 	    }
 | |
| 	    ec = '>';
 | |
| 	}
 | |
| 	if(n == tbs)
 | |
| 	    tbuf = (char *)realloc(tbuf, tbs *= 2);
 | |
|     }
 | |
|     while(n--)
 | |
| 	hungetc(tbuf[n]);
 | |
|     zfree(tbuf, tbs);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| static enum lextok
 | |
| gettok(void)
 | |
| {
 | |
|     int c, d;
 | |
|     int peekfd = -1;
 | |
|     enum lextok peek;
 | |
| 
 | |
|   beginning:
 | |
|     tokstr = NULL;
 | |
|     while (iblank(c = hgetc()) && !lexstop);
 | |
|     toklineno = lineno;
 | |
|     if (lexstop)
 | |
| 	return (errflag) ? LEXERR : ENDINPUT;
 | |
|     isfirstln = 0;
 | |
|     if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS))
 | |
| 	wordbeg = inbufct - (qbang && c == bangchar);
 | |
|     hwbegin(-1-(qbang && c == bangchar));
 | |
|     /* word includes the last character read and possibly \ before ! */
 | |
|     if (dbparens) {
 | |
| 	lexbuf.len = 0;
 | |
| 	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 | |
| 	hungetc(c);
 | |
| 	cmdpush(CS_MATH);
 | |
| 	c = dquote_parse(infor ? ';' : ')', 0);
 | |
| 	cmdpop();
 | |
| 	*lexbuf.ptr = '\0';
 | |
| 	if (!c && infor) {
 | |
| 	    infor--;
 | |
| 	    return DINPAR;
 | |
| 	}
 | |
| 	if (c || (c = hgetc()) != ')') {
 | |
| 	    hungetc(c);
 | |
| 	    return LEXERR;
 | |
| 	}
 | |
| 	dbparens = 0;
 | |
| 	return DOUTPAR;
 | |
|     } else if (idigit(c)) {	/* handle 1< foo */
 | |
| 	d = hgetc();
 | |
| 	if(d == '&') {
 | |
| 	    d = hgetc();
 | |
| 	    if(d == '>') {
 | |
| 		peekfd = c - '0';
 | |
| 		hungetc('>');
 | |
| 		c = '&';
 | |
| 	    } else {
 | |
| 		hungetc(d);
 | |
| 		lexstop = 0;
 | |
| 		hungetc('&');
 | |
| 	    }
 | |
| 	} else if (d == '>' || d == '<') {
 | |
| 	    peekfd = c - '0';
 | |
| 	    c = d;
 | |
| 	} else {
 | |
| 	    hungetc(d);
 | |
| 	    lexstop = 0;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /* chars in initial position in word */
 | |
| 
 | |
|     /*
 | |
|      * Handle comments.  There are some special cases when this
 | |
|      * is not normal command input: lexflags implies we are examining
 | |
|      * a line lexically without it being used for normal command input.
 | |
|      */
 | |
|     if (c == hashchar && !nocomments &&
 | |
| 	(isset(INTERACTIVECOMMENTS) ||
 | |
| 	 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
 | |
| 	  (!interact || unset(SHINSTDIN) || strin)))) {
 | |
| 	/* History is handled here to prevent extra  *
 | |
| 	 * newlines being inserted into the history. */
 | |
| 
 | |
| 	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
 | |
| 	    lexbuf.len = 0;
 | |
| 	    lexbuf.ptr = tokstr =
 | |
| 		(char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 | |
| 	    add(c);
 | |
| 	}
 | |
| 	hwabort();
 | |
| 	while ((c = ingetc()) != '\n' && !lexstop) {
 | |
| 	    hwaddc(c);
 | |
| 	    addtoline(c);
 | |
| 	    if (lexflags & LEXFLAGS_COMMENTS_KEEP)
 | |
| 		add(c);
 | |
| 	}
 | |
| 
 | |
| 	if (errflag)
 | |
| 	    peek = LEXERR;
 | |
| 	else {
 | |
| 	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
 | |
| 		*lexbuf.ptr = '\0';
 | |
| 		if (!lexstop)
 | |
| 		    hungetc(c);
 | |
| 		peek = STRING;
 | |
| 	    } else {
 | |
| 		hwend();
 | |
| 		hwbegin(0);
 | |
| 		hwaddc('\n');
 | |
| 		addtoline('\n');
 | |
| 		/*
 | |
| 		 * If splitting a line and removing comments,
 | |
| 		 * we don't want a newline token since it's
 | |
| 		 * treated specially.
 | |
| 		 */
 | |
| 		if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
 | |
| 		    peek = ENDINPUT;
 | |
| 		else
 | |
| 		    peek = NEWLIN;
 | |
| 	    }
 | |
| 	}
 | |
| 	return peek;
 | |
|     }
 | |
|     switch (lexact1[STOUC(c)]) {
 | |
|     case LX1_BKSLASH:
 | |
| 	d = hgetc();
 | |
| 	if (d == '\n')
 | |
| 	    goto beginning;
 | |
| 	hungetc(d);
 | |
| 	lexstop = 0;
 | |
| 	break;
 | |
|     case LX1_NEWLIN:
 | |
| 	return NEWLIN;
 | |
|     case LX1_SEMI:
 | |
| 	d = hgetc();
 | |
| 	if(d == ';')
 | |
| 	    return DSEMI;
 | |
| 	else if(d == '&')
 | |
| 	    return SEMIAMP;
 | |
| 	else if (d == '|')
 | |
| 	    return SEMIBAR;
 | |
| 	hungetc(d);
 | |
| 	lexstop = 0;
 | |
| 	return SEMI;
 | |
|     case LX1_AMPER:
 | |
| 	d = hgetc();
 | |
| 	if (d == '&')
 | |
| 	    return DAMPER;
 | |
| 	else if (d == '!' || d == '|')
 | |
| 	    return AMPERBANG;
 | |
| 	else if (d == '>') {
 | |
| 	    tokfd = peekfd;
 | |
| 	    d = hgetc();
 | |
| 	    if (d == '!' || d == '|')
 | |
| 		return OUTANGAMPBANG;
 | |
| 	    else if (d == '>') {
 | |
| 		d = hgetc();
 | |
| 		if (d == '!' || d == '|')
 | |
| 		    return DOUTANGAMPBANG;
 | |
| 		hungetc(d);
 | |
| 		lexstop = 0;
 | |
| 		return DOUTANGAMP;
 | |
| 	    }
 | |
| 	    hungetc(d);
 | |
| 	    lexstop = 0;
 | |
| 	    return AMPOUTANG;
 | |
| 	}
 | |
| 	hungetc(d);
 | |
| 	lexstop = 0;
 | |
| 	return AMPER;
 | |
|     case LX1_BAR:
 | |
| 	d = hgetc();
 | |
| 	if (d == '|' && !incasepat)
 | |
| 	    return DBAR;
 | |
| 	else if (d == '&')
 | |
| 	    return BARAMP;
 | |
| 	hungetc(d);
 | |
| 	lexstop = 0;
 | |
| 	return BAR;
 | |
|     case LX1_INPAR:
 | |
| 	d = hgetc();
 | |
| 	if (d == '(') {
 | |
| 	    if (infor) {
 | |
| 		dbparens = 1;
 | |
| 		return DINPAR;
 | |
| 	    }
 | |
| 	    if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
 | |
| 		lexbuf.len = 0;
 | |
| 		lexbuf.ptr = tokstr = (char *)
 | |
| 		    hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 | |
| 		switch (cmd_or_math(CS_MATH)) {
 | |
| 		case CMD_OR_MATH_MATH:
 | |
| 		    return DINPAR;
 | |
| 
 | |
| 		case CMD_OR_MATH_CMD:
 | |
| 		    /*
 | |
| 		     * Not math, so we don't return the contents
 | |
| 		     * as a string in this case.
 | |
| 		     */
 | |
| 		    tokstr = NULL;
 | |
| 		    return INPAR;
 | |
| 		    
 | |
| 		case CMD_OR_MATH_ERR:
 | |
| 		    /*
 | |
| 		     * LEXFLAGS_ACTIVE means we came from bufferwords(),
 | |
| 		     * so we treat as an incomplete math expression
 | |
| 		     */
 | |
| 		    if (lexflags & LEXFLAGS_ACTIVE)
 | |
| 			tokstr = dyncat("((", tokstr ? tokstr : "");
 | |
| 		    /* fall through */
 | |
| 
 | |
| 		default:
 | |
| 		    return LEXERR;
 | |
| 		}
 | |
| 	    }
 | |
| 	} else if (d == ')')
 | |
| 	    return INOUTPAR;
 | |
| 	hungetc(d);
 | |
| 	lexstop = 0;
 | |
| 	if (!(isset(SHGLOB) || incond == 1 || incmdpos))
 | |
| 	    break;
 | |
| 	return INPAR;
 | |
|     case LX1_OUTPAR:
 | |
| 	return OUTPAR;
 | |
|     case LX1_INANG:
 | |
| 	d = hgetc();
 | |
| 	if (d == '(') {
 | |
| 	    hungetc(d);
 | |
| 	    lexstop = 0;
 | |
| 	    unpeekfd:
 | |
| 	    if(peekfd != -1) {
 | |
| 		hungetc(c);
 | |
| 		c = '0' + peekfd;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	}
 | |
| 	if (d == '>') {
 | |
| 	    peek = INOUTANG;
 | |
| 	} else if (d == '<') {
 | |
| 	    int e = hgetc();
 | |
| 
 | |
| 	    if (e == '(') {
 | |
| 		hungetc(e);
 | |
| 		hungetc(d);
 | |
| 		peek = INANG;
 | |
| 	    } else if (e == '<')
 | |
| 		peek = TRINANG;
 | |
| 	    else if (e == '-')
 | |
| 		peek = DINANGDASH;
 | |
| 	    else {
 | |
| 		hungetc(e);
 | |
| 		lexstop = 0;
 | |
| 		peek = DINANG;
 | |
| 	    }
 | |
| 	} else if (d == '&') {
 | |
| 	    peek = INANGAMP;
 | |
| 	} else {
 | |
| 	    hungetc(d);
 | |
| 	    if(isnumglob())
 | |
| 		goto unpeekfd;
 | |
| 	    peek = INANG;
 | |
| 	}
 | |
| 	tokfd = peekfd;
 | |
| 	return peek;
 | |
|     case LX1_OUTANG:
 | |
| 	d = hgetc();
 | |
| 	if (d == '(') {
 | |
| 	    hungetc(d);
 | |
| 	    goto unpeekfd;
 | |
| 	} else if (d == '&') {
 | |
| 	    d = hgetc();
 | |
| 	    if (d == '!' || d == '|')
 | |
| 		peek = OUTANGAMPBANG;
 | |
| 	    else {
 | |
| 		hungetc(d);
 | |
| 		lexstop = 0;
 | |
| 		peek = OUTANGAMP;
 | |
| 	    }
 | |
| 	} else if (d == '!' || d == '|')
 | |
| 	    peek = OUTANGBANG;
 | |
| 	else if (d == '>') {
 | |
| 	    d = hgetc();
 | |
| 	    if (d == '&') {
 | |
| 		d = hgetc();
 | |
| 		if (d == '!' || d == '|')
 | |
| 		    peek = DOUTANGAMPBANG;
 | |
| 		else {
 | |
| 		    hungetc(d);
 | |
| 		    lexstop = 0;
 | |
| 		    peek = DOUTANGAMP;
 | |
| 		}
 | |
| 	    } else if (d == '!' || d == '|')
 | |
| 		peek = DOUTANGBANG;
 | |
| 	    else if (d == '(') {
 | |
| 		hungetc(d);
 | |
| 		hungetc('>');
 | |
| 		peek = OUTANG;
 | |
| 	    } else {
 | |
| 		hungetc(d);
 | |
| 		lexstop = 0;
 | |
| 		peek = DOUTANG;
 | |
| 		if (isset(HISTALLOWCLOBBER))
 | |
| 		    hwaddc('|');
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    hungetc(d);
 | |
| 	    lexstop = 0;
 | |
| 	    peek = OUTANG;
 | |
| 	    if (!incond && isset(HISTALLOWCLOBBER))
 | |
| 		hwaddc('|');
 | |
| 	}
 | |
| 	tokfd = peekfd;
 | |
| 	return peek;
 | |
|     }
 | |
| 
 | |
|     /* we've started a string, now get the *
 | |
|      * rest of it, performing tokenization */
 | |
|     return gettokstr(c, 0);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Get the remains of a token string.  This has two uses.
 | |
|  * When called from gettok(), with sub = 0, we have already identified
 | |
|  * any interesting initial character and want to get the rest of
 | |
|  * what we now know is a string.  However, the string may still include
 | |
|  * metacharacters and potentially substitutions.
 | |
|  *
 | |
|  * When called from parse_subst_string() with sub = 1, we are not
 | |
|  * fully parsing a command line, merely tokenizing a string.
 | |
|  * In this case we always add characters to the parsed string
 | |
|  * unless there is a parse error.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static enum lextok
 | |
| gettokstr(int c, int sub)
 | |
| {
 | |
|     int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
 | |
|     int intpos = 1, in_brace_param = 0;
 | |
|     int inquote, unmatched = 0;
 | |
|     enum lextok peek;
 | |
| #ifdef DEBUG
 | |
|     int ocmdsp = cmdsp;
 | |
| #endif
 | |
| 
 | |
|     peek = STRING;
 | |
|     if (!sub) {
 | |
| 	lexbuf.len = 0;
 | |
| 	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
 | |
|     }
 | |
|     for (;;) {
 | |
| 	int act;
 | |
| 	int e;
 | |
| 	int inbl = inblank(c);
 | |
| 	
 | |
| 	if (fdpar && !inbl && c != ')')
 | |
| 	    fdpar = 0;
 | |
| 
 | |
| 	if (inbl && !in_brace_param && !pct)
 | |
| 	    act = LX2_BREAK;
 | |
| 	else {
 | |
| 	    act = lexact2[STOUC(c)];
 | |
| 	    c = lextok2[STOUC(c)];
 | |
| 	}
 | |
| 	switch (act) {
 | |
| 	case LX2_BREAK:
 | |
| 	    if (!in_brace_param && !sub)
 | |
| 		goto brk;
 | |
| 	    break;
 | |
| 	case LX2_META:
 | |
| 	    c = hgetc();
 | |
| #ifdef DEBUG
 | |
| 	    if (lexstop) {
 | |
| 		fputs("BUG: input terminated by Meta\n", stderr);
 | |
| 		fflush(stderr);
 | |
| 		goto brk;
 | |
| 	    }
 | |
| #endif
 | |
| 	    add(Meta);
 | |
| 	    break;
 | |
| 	case LX2_OUTPAR:
 | |
| 	    if (fdpar) {
 | |
| 		/* this is a single word `(   )', treat as INOUTPAR */
 | |
| 		add(c);
 | |
| 		*lexbuf.ptr = '\0';
 | |
| 		return INOUTPAR;
 | |
| 	    }
 | |
| 	    if ((sub || in_brace_param) && isset(SHGLOB))
 | |
| 		break;
 | |
| 	    if (!in_brace_param && !pct--) {
 | |
| 		if (sub) {
 | |
| 		    pct = 0;
 | |
| 		    break;
 | |
| 		} else
 | |
| 		    goto brk;
 | |
| 	    }
 | |
| 	    c = Outpar;
 | |
| 	    break;
 | |
| 	case LX2_BAR:
 | |
| 	    if (!pct && !in_brace_param) {
 | |
| 		if (sub)
 | |
| 		    break;
 | |
| 		else
 | |
| 		    goto brk;
 | |
| 	    }
 | |
| 	    if (unset(SHGLOB) || (!sub && !in_brace_param))
 | |
| 		c = Bar;
 | |
| 	    break;
 | |
| 	case LX2_STRING:
 | |
| 	    e = hgetc();
 | |
| 	    if (e == '[') {
 | |
| 		cmdpush(CS_MATHSUBST);
 | |
| 		add(String);
 | |
| 		add(Inbrack);
 | |
| 		c = dquote_parse(']', sub);
 | |
| 		cmdpop();
 | |
| 		if (c) {
 | |
| 		    peek = LEXERR;
 | |
| 		    goto brk;
 | |
| 		}
 | |
| 		c = Outbrack;
 | |
| 	    } else if (e == '(') {
 | |
| 		add(String);
 | |
| 		switch (cmd_or_math_sub()) {
 | |
| 		case CMD_OR_MATH_CMD:
 | |
| 		    c = Outpar;
 | |
| 		    break;
 | |
| 
 | |
| 		case CMD_OR_MATH_MATH:
 | |
| 		    c = Outparmath;
 | |
| 		    break;
 | |
| 
 | |
| 		default:
 | |
| 		    peek = LEXERR;
 | |
| 		    goto brk;
 | |
| 		}
 | |
| 	    } else {
 | |
| 		if (e == '{') {
 | |
| 		    add(c);
 | |
| 		    c = Inbrace;
 | |
| 		    ++bct;
 | |
| 		    cmdpush(CS_BRACEPAR);
 | |
| 		    if (!in_brace_param) {
 | |
| 			if ((in_brace_param = bct))
 | |
| 			    seen_brct = 0;
 | |
| 		    }
 | |
| 		} else {
 | |
| 		    hungetc(e);
 | |
| 		    lexstop = 0;
 | |
| 		}
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case LX2_INBRACK:
 | |
| 	    if (!in_brace_param) {
 | |
| 		brct++;
 | |
| 		seen_brct = 1;
 | |
| 	    }
 | |
| 	    c = Inbrack;
 | |
| 	    break;
 | |
| 	case LX2_OUTBRACK:
 | |
| 	    if (!in_brace_param)
 | |
| 		brct--;
 | |
| 	    if (brct < 0)
 | |
| 		brct = 0;
 | |
| 	    c = Outbrack;
 | |
| 	    break;
 | |
| 	case LX2_INPAR:
 | |
| 	    if (isset(SHGLOB)) {
 | |
| 		if (sub || in_brace_param)
 | |
| 		    break;
 | |
| 		if (incasepat > 0 && !lexbuf.len)
 | |
| 		    return INPAR;
 | |
| 		if (!isset(KSHGLOB) && lexbuf.len)
 | |
| 		    goto brk;
 | |
| 	    }
 | |
| 	    if (!in_brace_param) {
 | |
| 		if (!sub) {
 | |
| 		    e = hgetc();
 | |
| 		    hungetc(e);
 | |
| 		    lexstop = 0;
 | |
| 		    /* For command words, parentheses are only
 | |
| 		     * special at the start.  But now we're tokenising
 | |
| 		     * the remaining string.  So I don't see what
 | |
| 		     * the old incmdpos test here is for.
 | |
| 		     *   pws 1999/6/8
 | |
| 		     *
 | |
| 		     * Oh, no.
 | |
| 		     *  func1(   )
 | |
| 		     * is a valid function definition in [k]sh.  The best
 | |
| 		     * thing we can do, without really nasty lookahead tricks,
 | |
| 		     * is break if we find a blank after a parenthesis.  At
 | |
| 		     * least this can't happen inside braces or brackets.  We
 | |
| 		     * only allow this with SHGLOB (set for both sh and ksh).
 | |
| 		     *
 | |
| 		     * Things like `print @( |foo)' should still
 | |
| 		     * work, because [k]sh don't allow multiple words
 | |
| 		     * in a function definition, so we only do this
 | |
| 		     * in command position.
 | |
| 		     *   pws 1999/6/14
 | |
| 		     */
 | |
| 		    if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
 | |
| 				     !brct && !intpos && incmdpos)) {
 | |
| 			/*
 | |
| 			 * Either a () token, or a command word with
 | |
| 			 * something suspiciously like a ksh function
 | |
| 			 * definition.
 | |
| 			 * The current word isn't spellcheckable.
 | |
| 			 */
 | |
| 			nocorrect |= 2;
 | |
| 			goto brk;
 | |
| 		    }
 | |
| 		}
 | |
| 		/*
 | |
| 		 * This also handles the [k]sh `foo( )' function definition.
 | |
| 		 * Maintain a variable fdpar, set as long as a single set of
 | |
| 		 * parentheses contains only space.  Then if we get to the
 | |
| 		 * closing parenthesis and it is still set, we can assume we
 | |
| 		 * have a function definition.  Only do this at the start of
 | |
| 		 * the word, since the (...) must be a separate token.
 | |
| 		 */
 | |
| 		if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
 | |
| 		    fdpar = 1;
 | |
| 	    }
 | |
| 	    c = Inpar;
 | |
| 	    break;
 | |
| 	case LX2_INBRACE:
 | |
| 	    if (isset(IGNOREBRACES) || sub)
 | |
| 		c = '{';
 | |
| 	    else {
 | |
| 		if (!lexbuf.len && incmdpos) {
 | |
| 		    add('{');
 | |
| 		    *lexbuf.ptr = '\0';
 | |
| 		    return STRING;
 | |
| 		}
 | |
| 		if (in_brace_param) {
 | |
| 		    cmdpush(CS_BRACE);
 | |
| 		}
 | |
| 		bct++;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case LX2_OUTBRACE:
 | |
| 	    if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
 | |
| 		break;
 | |
| 	    if (!bct)
 | |
| 		break;
 | |
| 	    if (in_brace_param) {
 | |
| 		cmdpop();
 | |
| 	    }
 | |
| 	    if (bct-- == in_brace_param)
 | |
| 		in_brace_param = 0;
 | |
| 	    c = Outbrace;
 | |
| 	    break;
 | |
| 	case LX2_COMMA:
 | |
| 	    if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
 | |
| 		c = Comma;
 | |
| 	    break;
 | |
| 	case LX2_OUTANG:
 | |
| 	    if (in_brace_param || sub)
 | |
| 		break;
 | |
| 	    e = hgetc();
 | |
| 	    if (e != '(') {
 | |
| 		hungetc(e);
 | |
| 		lexstop = 0;
 | |
| 		goto brk;
 | |
| 	    }
 | |
| 	    add(OutangProc);
 | |
| 	    if (skipcomm()) {
 | |
| 		peek = LEXERR;
 | |
| 		goto brk;
 | |
| 	    }
 | |
| 	    c = Outpar;
 | |
| 	    break;
 | |
| 	case LX2_INANG:
 | |
| 	    if (isset(SHGLOB) && sub)
 | |
| 		break;
 | |
| 	    e = hgetc();
 | |
| 	    if (!(in_brace_param || sub) && e == '(') {
 | |
| 		add(Inang);
 | |
| 		if (skipcomm()) {
 | |
| 		    peek = LEXERR;
 | |
| 		    goto brk;
 | |
| 		}
 | |
| 		c = Outpar;
 | |
| 		break;
 | |
| 	    }
 | |
| 	    hungetc(e);
 | |
| 	    if(isnumglob()) {
 | |
| 		add(Inang);
 | |
| 		while ((c = hgetc()) != '>')
 | |
| 		    add(c);
 | |
| 		c = Outang;
 | |
| 		break;
 | |
| 	    }
 | |
| 	    lexstop = 0;
 | |
| 	    if (in_brace_param || sub)
 | |
| 		break;
 | |
| 	    goto brk;
 | |
| 	case LX2_EQUALS:
 | |
| 	    if (!sub) {
 | |
| 		if (intpos) {
 | |
| 		    e = hgetc();
 | |
| 		    if (e != '(') {
 | |
| 			hungetc(e);
 | |
| 			lexstop = 0;
 | |
| 			c = Equals;
 | |
| 		    } else {
 | |
| 			add(Equals);
 | |
| 			if (skipcomm()) {
 | |
| 			    peek = LEXERR;
 | |
| 			    goto brk;
 | |
| 			}
 | |
| 			c = Outpar;
 | |
| 		    }
 | |
| 		} else if (peek != ENVSTRING &&
 | |
| 			   (incmdpos || intypeset) && !bct && !brct) {
 | |
| 		    char *t = tokstr;
 | |
| 		    if (idigit(*t))
 | |
| 			while (++t < lexbuf.ptr && idigit(*t));
 | |
| 		    else {
 | |
| 			int sav = *lexbuf.ptr;
 | |
| 			*lexbuf.ptr = '\0';
 | |
| 			t = itype_end(t, IIDENT, 0);
 | |
| 			if (t < lexbuf.ptr) {
 | |
| 			    skipparens(Inbrack, Outbrack, &t);
 | |
| 			} else {
 | |
| 			    *lexbuf.ptr = sav;
 | |
| 			}
 | |
| 		    }
 | |
| 		    if (*t == '+')
 | |
| 			t++;
 | |
| 		    if (t == lexbuf.ptr) {
 | |
| 			e = hgetc();
 | |
| 			if (e == '(') {
 | |
| 			    *lexbuf.ptr = '\0';
 | |
| 			    return ENVARRAY;
 | |
| 			}
 | |
| 			hungetc(e);
 | |
| 			lexstop = 0;
 | |
| 			peek = ENVSTRING;
 | |
| 			intpos = 2;
 | |
| 		    } else
 | |
| 			c = Equals;
 | |
| 		} else
 | |
| 		    c = Equals;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case LX2_BKSLASH:
 | |
| 	    c = hgetc();
 | |
| 	    if (c == '\n') {
 | |
| 		c = hgetc();
 | |
| 		if (!lexstop)
 | |
| 		    continue;
 | |
| 	    } else {
 | |
| 		add(Bnull);
 | |
| 		if (c == STOUC(Meta)) {
 | |
| 		    c = hgetc();
 | |
| #ifdef DEBUG
 | |
| 		    if (lexstop) {
 | |
| 			fputs("BUG: input terminated by Meta\n", stderr);
 | |
| 			fflush(stderr);
 | |
| 			goto brk;
 | |
| 		    }
 | |
| #endif
 | |
| 		    add(Meta);
 | |
| 		}
 | |
| 	    }
 | |
| 	    if (lexstop)
 | |
| 		goto brk;
 | |
| 	    break;
 | |
| 	case LX2_QUOTE: {
 | |
| 	    int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
 | |
| 
 | |
| 	    add(Snull);
 | |
| 	    cmdpush(CS_QUOTE);
 | |
| 	    for (;;) {
 | |
| 		STOPHIST
 | |
| 		while ((c = hgetc()) != '\'' && !lexstop) {
 | |
| 		    if (strquote && c == '\\') {
 | |
| 			c = hgetc();
 | |
| 			if (lexstop)
 | |
| 			    break;
 | |
| 			/*
 | |
| 			 * Mostly we don't need to do anything special
 | |
| 			 * with escape backslashes or closing quotes
 | |
| 			 * inside $'...'; however in completion we
 | |
| 			 * need to be able to strip multiple backslashes
 | |
| 			 * neatly.
 | |
| 			 */
 | |
| 			if (c == '\\' || c == '\'')
 | |
| 			    add(Bnull);
 | |
| 			else
 | |
| 			    add('\\');
 | |
| 		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
 | |
| 			if (lexbuf.ptr[-1] == '\\')
 | |
| 			    lexbuf.ptr--, lexbuf.len--;
 | |
| 			else
 | |
| 			    break;
 | |
| 		    }
 | |
| 		    add(c);
 | |
| 		}
 | |
| 		ALLOWHIST
 | |
| 		if (c != '\'') {
 | |
| 		    unmatched = '\'';
 | |
| 		    /* Not an error when called from bufferwords() */
 | |
| 		    if (!(lexflags & LEXFLAGS_ACTIVE))
 | |
| 			peek = LEXERR;
 | |
| 		    cmdpop();
 | |
| 		    goto brk;
 | |
| 		}
 | |
| 		e = hgetc();
 | |
| 		if (e != '\'' || unset(RCQUOTES) || strquote)
 | |
| 		    break;
 | |
| 		add(c);
 | |
| 	    }
 | |
| 	    cmdpop();
 | |
| 	    hungetc(e);
 | |
| 	    lexstop = 0;
 | |
| 	    c = Snull;
 | |
| 	    break;
 | |
| 	}
 | |
| 	case LX2_DQUOTE:
 | |
| 	    add(Dnull);
 | |
| 	    cmdpush(CS_DQUOTE);
 | |
| 	    c = dquote_parse('"', sub);
 | |
| 	    cmdpop();
 | |
| 	    if (c) {
 | |
| 		unmatched = '"';
 | |
| 		/* Not an error when called from bufferwords() */
 | |
| 		if (!(lexflags & LEXFLAGS_ACTIVE))
 | |
| 		    peek = LEXERR;
 | |
| 		goto brk;
 | |
| 	    }
 | |
| 	    c = Dnull;
 | |
| 	    break;
 | |
| 	case LX2_BQUOTE:
 | |
| 	    add(Tick);
 | |
| 	    cmdpush(CS_BQUOTE);
 | |
| 	    SETPARBEGIN
 | |
| 	    inquote = 0;
 | |
| 	    while ((c = hgetc()) != '`' && !lexstop) {
 | |
| 		if (c == '\\') {
 | |
| 		    c = hgetc();
 | |
| 		    if (c != '\n') {
 | |
| 			add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
 | |
| 			add(c);
 | |
| 		    }
 | |
| 		    else if (!sub && isset(CSHJUNKIEQUOTES))
 | |
| 			add(c);
 | |
| 		} else {
 | |
| 		    if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
 | |
| 			break;
 | |
| 		    }
 | |
| 		    add(c);
 | |
| 		    if (c == '\'') {
 | |
| 			if ((inquote = !inquote))
 | |
| 			    STOPHIST
 | |
| 			else
 | |
| 			    ALLOWHIST
 | |
| 		    }
 | |
| 		}
 | |
| 	    }
 | |
| 	    if (inquote)
 | |
| 		ALLOWHIST
 | |
| 	    cmdpop();
 | |
| 	    if (c != '`') {
 | |
| 		unmatched = '`';
 | |
| 		/* Not an error when called from bufferwords() */
 | |
| 		if (!(lexflags & LEXFLAGS_ACTIVE))
 | |
| 		    peek = LEXERR;
 | |
| 		goto brk;
 | |
| 	    }
 | |
| 	    c = Tick;
 | |
| 	    SETPAREND
 | |
| 	    break;
 | |
| 	case LX2_DASH:
 | |
| 	    /*
 | |
| 	     * - shouldn't be treated as a special character unless
 | |
| 	     * we're in a pattern.  Unfortunately, working out for
 | |
| 	     * sure in complicated expressions whether we're in a
 | |
| 	     * pattern is tricky.  So we'll make it special and
 | |
| 	     * turn it back any time we don't need it special.
 | |
| 	     * This is not ideal as it's a lot of work.
 | |
| 	     */
 | |
| 	    c = Dash;
 | |
|            break;
 | |
|        case LX2_BANG:
 | |
|            /*
 | |
|             * Same logic as Dash, for ! to perform negation in range.
 | |
|             */
 | |
|            if (seen_brct)
 | |
|                c = Bang;
 | |
|            else
 | |
|                c = '!';
 | |
|        }
 | |
|        add(c);
 | |
|        c = hgetc();
 | |
| 	if (intpos)
 | |
| 	    intpos--;
 | |
| 	if (lexstop)
 | |
| 	    break;
 | |
|     }
 | |
|   brk:
 | |
|     if (errflag) {
 | |
| 	if (in_brace_param) {
 | |
| 	    while(bct-- >= in_brace_param)
 | |
| 		cmdpop();
 | |
| 	}
 | |
| 	return LEXERR;
 | |
|     }
 | |
|     hungetc(c);
 | |
|     if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
 | |
| 	zerr("unmatched %c", unmatched);
 | |
|     if (in_brace_param) {
 | |
| 	while(bct-- >= in_brace_param)
 | |
| 	    cmdpop();
 | |
| 	zerr("closing brace expected");
 | |
|     } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
 | |
| 	       peek == STRING && lexbuf.ptr[-1] == '}' &&
 | |
| 	       lexbuf.ptr[-2] != Bnull) {
 | |
| 	/* hack to get {foo} command syntax work */
 | |
| 	lexbuf.ptr--;
 | |
| 	lexbuf.len--;
 | |
| 	lexstop = 0;
 | |
| 	hungetc('}');
 | |
|     }
 | |
|     *lexbuf.ptr = '\0';
 | |
|     DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
 | |
|     return peek;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Parse input as if in double quotes.
 | |
|  * endchar is the end character to expect.
 | |
|  * sub has got something to do with whether we are doing quoted substitution.
 | |
|  * Return non-zero for error (character to unget), else zero
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| dquote_parse(char endchar, int sub)
 | |
| {
 | |
|     int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
 | |
|     int c;
 | |
|     int math = endchar == ')' || endchar == ']' || infor;
 | |
|     int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
 | |
| 
 | |
|     while (((c = hgetc()) != endchar || bct ||
 | |
| 	    (math && ((pct > 0) || (brct > 0))) ||
 | |
| 	    intick) && !lexstop) {
 | |
|       cont:
 | |
| 	switch (c) {
 | |
| 	case '\\':
 | |
| 	    c = hgetc();
 | |
| 	    if (c != '\n') {
 | |
| 		if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
 | |
| 		    c == endchar || c == '`' ||
 | |
| 		    (endchar == ']' && (c == '[' || c == ']' ||
 | |
| 					c == '(' || c == ')' ||
 | |
| 					c == '{' || c == '}' ||
 | |
| 					(c == '"' && sub))))
 | |
| 		    add(Bnull);
 | |
| 		else {
 | |
| 		    /* lexstop is implicitly handled here */
 | |
| 		    add('\\');
 | |
| 		    goto cont;
 | |
| 		}
 | |
| 	    } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
 | |
| 		continue;
 | |
| 	    break;
 | |
| 	case '\n':
 | |
| 	    err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
 | |
| 	    break;
 | |
| 	case '$':
 | |
| 	    if (intick)
 | |
| 		break;
 | |
| 	    c = hgetc();
 | |
| 	    if (c == '(') {
 | |
| 		add(Qstring);
 | |
| 		switch (cmd_or_math_sub()) {
 | |
| 		case CMD_OR_MATH_CMD:
 | |
| 		    c = Outpar;
 | |
| 		    break;
 | |
| 
 | |
| 		case CMD_OR_MATH_MATH:
 | |
| 		    c = Outparmath;
 | |
| 		    break;
 | |
| 
 | |
| 		default:
 | |
| 		    err = 1;
 | |
| 		    break;
 | |
| 		}
 | |
| 	    } else if (c == '[') {
 | |
| 		add(String);
 | |
| 		add(Inbrack);
 | |
| 		cmdpush(CS_MATHSUBST);
 | |
| 		err = dquote_parse(']', sub);
 | |
| 		cmdpop();
 | |
| 		c = Outbrack;
 | |
| 	    } else if (c == '{') {
 | |
| 		add(Qstring);
 | |
| 		c = Inbrace;
 | |
| 		cmdpush(CS_BRACEPAR);
 | |
| 		bct++;
 | |
| 	    } else if (c == '$')
 | |
| 		add(Qstring);
 | |
| 	    else {
 | |
| 		hungetc(c);
 | |
| 		lexstop = 0;
 | |
| 		c = Qstring;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case '}':
 | |
| 	    if (intick || !bct)
 | |
| 		break;
 | |
| 	    c = Outbrace;
 | |
| 	    bct--;
 | |
| 	    cmdpop();
 | |
| 	    break;
 | |
| 	case '`':
 | |
| 	    c = Qtick;
 | |
| 	    if (intick == 2)
 | |
| 		ALLOWHIST
 | |
| 	    if ((intick = !intick)) {
 | |
| 		SETPARBEGIN
 | |
| 		cmdpush(CS_BQUOTE);
 | |
| 	    } else {
 | |
| 		SETPAREND
 | |
| 	        cmdpop();
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case '\'':
 | |
| 	    if (!intick)
 | |
| 		break;
 | |
| 	    if (intick == 1)
 | |
| 		intick = 2, STOPHIST
 | |
| 	    else
 | |
| 		intick = 1, ALLOWHIST
 | |
| 	    break;
 | |
| 	case '(':
 | |
| 	    if (!math || !bct)
 | |
| 		pct++;
 | |
| 	    break;
 | |
| 	case ')':
 | |
| 	    if (!math || !bct)
 | |
| 		err = (!pct-- && math);
 | |
| 	    break;
 | |
| 	case '[':
 | |
| 	    if (!math || !bct)
 | |
| 		brct++;
 | |
| 	    break;
 | |
| 	case ']':
 | |
| 	    if (!math || !bct)
 | |
| 		err = (!brct-- && math);
 | |
| 	    break;
 | |
| 	case '"':
 | |
| 	    if (intick || (endchar != '"' && !bct))
 | |
| 		break;
 | |
| 	    if (bct) {
 | |
| 		add(Dnull);
 | |
| 		cmdpush(CS_DQUOTE);
 | |
| 		err = dquote_parse('"', sub);
 | |
| 		cmdpop();
 | |
| 		c = Dnull;
 | |
| 	    } else
 | |
| 		err = 1;
 | |
| 	    break;
 | |
| 	}
 | |
| 	if (err || lexstop)
 | |
| 	    break;
 | |
| 	add(c);
 | |
|     }
 | |
|     if (intick == 2)
 | |
| 	ALLOWHIST
 | |
|     if (intick) {
 | |
| 	cmdpop();
 | |
|     }
 | |
|     while (bct--)
 | |
| 	cmdpop();
 | |
|     if (lexstop)
 | |
| 	err = intick || endchar || err;
 | |
|     else if (err == 1) {
 | |
| 	/*
 | |
| 	 * TODO: as far as I can see, this hack is used in gettokstr()
 | |
| 	 * to hungetc() a character on an error.  However, I don't
 | |
| 	 * understand what that actually gets us, and we can't guarantee
 | |
| 	 * it's a character anyway, because of the previous test.
 | |
| 	 *
 | |
| 	 * We use the same feature in cmd_or_math where we actually do
 | |
| 	 * need to unget if we decide it's really a command substitution.
 | |
| 	 * We try to handle the other case by testing for lexstop.
 | |
| 	 */
 | |
| 	err = c;
 | |
|     }
 | |
|     if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
 | |
| 	inwhat = IN_MATH;
 | |
|     return err;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Tokenize a string given in s. Parsing is done as in double
 | |
|  * quotes.  This is usually called before singsub().
 | |
|  *
 | |
|  * parsestr() is noisier, reporting an error if the parse failed.
 | |
|  *
 | |
|  * On entry, *s must point to a string allocated from the stack of
 | |
|  * exactly the right length, i.e. strlen(*s) + 1, as the string
 | |
|  * is used as the lexical token string whose memory management
 | |
|  * demands this.  Usually the input string will therefore be
 | |
|  * the result of an immediately preceding dupstring().
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| parsestr(char **s)
 | |
| {
 | |
|     int err;
 | |
| 
 | |
|     if ((err = parsestrnoerr(s))) {
 | |
| 	untokenize(*s);
 | |
| 	if (!(errflag & ERRFLAG_INT)) {
 | |
| 	    if (err > 32 && err < 127)
 | |
| 		zerr("parse error near `%c'", err);
 | |
| 	    else
 | |
| 		zerr("parse error");
 | |
| 	    tok = LEXERR;
 | |
| 	}
 | |
|     }
 | |
|     return err;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| parsestrnoerr(char **s)
 | |
| {
 | |
|     int l = strlen(*s), err;
 | |
| 
 | |
|     zcontext_save();
 | |
|     untokenize(*s);
 | |
|     inpush(dupstring_wlen(*s, l), 0, NULL);
 | |
|     strinbeg(0);
 | |
|     lexbuf.len = 0;
 | |
|     lexbuf.ptr = tokstr = *s;
 | |
|     lexbuf.siz = l + 1;
 | |
|     err = dquote_parse('\0', 1);
 | |
|     if (tokstr)
 | |
| 	*s = tokstr;
 | |
|     *lexbuf.ptr = '\0';
 | |
|     strinend();
 | |
|     inpop();
 | |
|     DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
 | |
|     zcontext_restore();
 | |
|     return err;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Parse a subscript in string s.
 | |
|  * sub is passed down to dquote_parse().
 | |
|  * endchar is the final character.
 | |
|  * Return the next character, or NULL.
 | |
|  */
 | |
| /**/
 | |
| mod_export char *
 | |
| parse_subscript(char *s, int sub, int endchar)
 | |
| {
 | |
|     int l = strlen(s), err, toklen;
 | |
|     char *t;
 | |
| 
 | |
|     if (!*s || *s == endchar)
 | |
| 	return 0;
 | |
|     zcontext_save();
 | |
|     untokenize(t = dupstring_wlen(s, l));
 | |
|     inpush(t, 0, NULL);
 | |
|     strinbeg(0);
 | |
|     /*
 | |
|      * Warning to Future Generations:
 | |
|      *
 | |
|      * This way of passing the subscript through the lexer is brittle.
 | |
|      * Code above this for several layers assumes that when we tokenise
 | |
|      * the input it goes into the same place as the original string.
 | |
|      * However, the lexer may overwrite later bits of the string or
 | |
|      * reallocate it, in particular when expanding aliaes.  To get
 | |
|      * around this, we copy the string and then copy it back.  This is a
 | |
|      * bit more robust but still relies on the underlying assumption of
 | |
|      * length preservation.
 | |
|      */
 | |
|     lexbuf.len = 0;
 | |
|     lexbuf.ptr = tokstr = dupstring_wlen(s, l);
 | |
|     lexbuf.siz = l + 1;
 | |
|     err = dquote_parse(endchar, sub);
 | |
|     toklen = (int)(lexbuf.ptr - tokstr);
 | |
|     DPUTS(toklen > l, "Bad length for parsed subscript");
 | |
|     memcpy(s, tokstr, toklen);
 | |
|     if (err) {
 | |
| 	char *strend = s + toklen;
 | |
| 	err = *strend;
 | |
| 	*strend = '\0';
 | |
| 	untokenize(s);
 | |
| 	*strend = err;
 | |
| 	s = NULL;
 | |
|     } else {
 | |
| 	s += toklen;
 | |
|     }
 | |
|     strinend();
 | |
|     inpop();
 | |
|     DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
 | |
|     zcontext_restore();
 | |
|     return s;
 | |
| }
 | |
| 
 | |
| /* Tokenize a string given in s. Parsing is done as if s were a normal *
 | |
|  * command-line argument but it may contain separators.  This is used  *
 | |
|  * to parse the right-hand side of ${...%...} substitutions.           */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| parse_subst_string(char *s)
 | |
| {
 | |
|     int c, l = strlen(s), err;
 | |
|     char *ptr;
 | |
|     enum lextok ctok;
 | |
| 
 | |
|     if (!*s || !strcmp(s, nulstring))
 | |
| 	return 0;
 | |
|     zcontext_save();
 | |
|     untokenize(s);
 | |
|     inpush(dupstring_wlen(s, l), 0, NULL);
 | |
|     strinbeg(0);
 | |
|     lexbuf.len = 0;
 | |
|     lexbuf.ptr = tokstr = s;
 | |
|     lexbuf.siz = l + 1;
 | |
|     c = hgetc();
 | |
|     ctok = gettokstr(c, 1);
 | |
|     err = errflag;
 | |
|     strinend();
 | |
|     inpop();
 | |
|     DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
 | |
|     zcontext_restore();
 | |
|     /* Keep any interrupt error status */
 | |
|     errflag = err | (errflag & ERRFLAG_INT);
 | |
|     if (ctok == LEXERR) {
 | |
| 	untokenize(s);
 | |
| 	return 1;
 | |
|     }
 | |
| #ifdef DEBUG
 | |
|     /*
 | |
|      * Historical note: we used to check here for olen (the value of lexbuf.len
 | |
|      * before zcontext_restore()) == l, but that's not necessarily the case if
 | |
|      * we stripped an RCQUOTE.
 | |
|      */
 | |
|     if (ctok != STRING || (errflag && !noerrs)) {
 | |
| 	fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
 | |
| 		errflag ? "errflag" : "ctok != STRING");
 | |
| 	fflush(stderr);
 | |
| 	untokenize(s);
 | |
| 	return 1;
 | |
|     }
 | |
| #endif
 | |
|     /* Check for $'...' quoting.  This needs special handling. */
 | |
|     for (ptr = s; *ptr; )
 | |
|     {
 | |
| 	if (*ptr == String && ptr[1] == Snull)
 | |
| 	{
 | |
| 	    char *t;
 | |
| 	    int len, tlen, diff;
 | |
| 	    t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
 | |
| 	    len += 2;
 | |
| 	    tlen = strlen(t);
 | |
| 	    diff = len - tlen;
 | |
| 	    /*
 | |
| 	     * Yuk.
 | |
| 	     * parse_subst_string() currently handles strings in-place.
 | |
| 	     * That's not so easy to fix without knowing whether
 | |
| 	     * additional memory should come off the heap or
 | |
| 	     * otherwise.  So we cheat by copying the unquoted string
 | |
| 	     * into place, unless it's too long.  That's not the
 | |
| 	     * normal case, but I'm worried there are pathological
 | |
| 	     * cases with converting metafied multibyte strings.
 | |
| 	     * If someone can prove there aren't I will be very happy.
 | |
| 	     */
 | |
| 	    if (diff < 0) {
 | |
| 		DPUTS(1, "$'...' subst too long: fix get_parse_string()");
 | |
| 		return 1;
 | |
| 	    }
 | |
| 	    memcpy(ptr, t, tlen);
 | |
| 	    ptr += tlen;
 | |
| 	    if (diff > 0) {
 | |
| 		char *dptr = ptr;
 | |
| 		char *sptr = ptr + diff;
 | |
| 		while ((*dptr++ = *sptr++))
 | |
| 		    ;
 | |
| 	    }
 | |
| 	} else
 | |
| 	    ptr++;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /* Called below to report word positions. */
 | |
| 
 | |
| /**/
 | |
| static void
 | |
| gotword(void)
 | |
| {
 | |
|     int nwe = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
 | |
|     if (zlemetacs <= nwe) {
 | |
| 	int nwb = zlemetall - wordbeg + addedx;
 | |
| 	if (zlemetacs >= nwb) {
 | |
| 	    wb = nwb;
 | |
| 	    we = nwe;
 | |
| 	} else {
 | |
| 	    wb = zlemetacs + addedx;
 | |
| 	    if (we < wb)
 | |
| 		we = wb;
 | |
| 	}
 | |
| 	lexflags = 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Check if current lex text matches an alias: 1 if so, else 0 */
 | |
| 
 | |
| static int
 | |
| checkalias(void)
 | |
| {
 | |
|     Alias an;
 | |
| 
 | |
|     if (!zshlextext)
 | |
| 	return 0;
 | |
| 
 | |
|     if (!noaliases && isset(ALIASESOPT) &&
 | |
| 	(!isset(POSIXALIASES) ||
 | |
| 	 (tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) {
 | |
| 	char *suf;
 | |
| 
 | |
| 	an = (Alias) aliastab->getnode(aliastab, zshlextext);
 | |
| 	if (an && !an->inuse &&
 | |
| 	    ((an->node.flags & ALIAS_GLOBAL) ||
 | |
| 	     (incmdpos && tok == STRING) || inalmore)) {
 | |
| 	    if (!lexstop) {
 | |
| 		/*
 | |
| 		 * Tokens that don't require a space after, get one,
 | |
| 		 * because they are treated as if preceded by one.
 | |
| 		 */
 | |
| 		int c = hgetc();
 | |
| 		hungetc(c);
 | |
| 		if (!iblank(c))
 | |
| 		    inpush(" ", INP_ALIAS, 0);
 | |
| 	    }
 | |
| 	    inpush(an->text, INP_ALIAS, an);
 | |
| 	    if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
 | |
| 		aliasspaceflag = 1;
 | |
| 	    lexstop = 0;
 | |
| 	    return 1;
 | |
| 	}
 | |
| 	if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
 | |
| 	    suf > zshlextext && suf[-1] != Meta &&
 | |
| 	    (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
 | |
| 	    !an->inuse && incmdpos) {
 | |
| 	    inpush(dupstring(zshlextext), INP_ALIAS, an);
 | |
| 	    inpush(" ", INP_ALIAS, NULL);
 | |
| 	    inpush(an->text, INP_ALIAS, NULL);
 | |
| 	    lexstop = 0;
 | |
| 	    return 1;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /* expand aliases and reserved words */
 | |
| 
 | |
| /**/
 | |
| int
 | |
| exalias(void)
 | |
| {
 | |
|     Reswd rw;
 | |
| 
 | |
|     hwend();
 | |
|     if (interact && isset(SHINSTDIN) && !strin && incasepat <= 0 &&
 | |
| 	tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
 | |
| 	(isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
 | |
| 	spckword(&tokstr, 1, incmdpos, 1);
 | |
| 
 | |
|     if (!tokstr) {
 | |
| 	zshlextext = tokstrings[tok];
 | |
| 
 | |
| 	if (tok == NEWLIN)
 | |
| 	    return 0;
 | |
| 	return checkalias();
 | |
|     } else {
 | |
| 	VARARR(char, copy, (strlen(tokstr) + 1));
 | |
| 
 | |
| 	if (has_token(tokstr)) {
 | |
| 	    char *p, *t;
 | |
| 
 | |
| 	    zshlextext = p = copy;
 | |
| 	    for (t = tokstr;
 | |
| 		 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
 | |
| 	} else
 | |
| 	    zshlextext = tokstr;
 | |
| 
 | |
| 	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
 | |
| 	    int zp = lexflags;
 | |
| 
 | |
| 	    gotword();
 | |
| 	    if ((zp & LEXFLAGS_ZLE) && !lexflags) {
 | |
| 		if (zshlextext == copy)
 | |
| 		    zshlextext = tokstr;
 | |
| 		return 0;
 | |
| 	    }
 | |
| 	}
 | |
| 
 | |
| 	if (tok == STRING) {
 | |
| 	    /* Check for an alias */
 | |
| 	    if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) {
 | |
| 		if (zshlextext == copy)
 | |
| 		    zshlextext = tokstr;
 | |
| 		return 1;
 | |
| 	    }
 | |
| 
 | |
| 	    /* Then check for a reserved word */
 | |
| 	    if ((incmdpos ||
 | |
| 		 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
 | |
| 		  zshlextext[0] == '}' && !zshlextext[1])) &&
 | |
| 		(rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
 | |
| 		tok = rw->token;
 | |
| 		inrepeat_ = (tok == REPEAT);
 | |
| 		if (tok == DINBRACK)
 | |
| 		    incond = 1;
 | |
| 	    } else if (incond && !strcmp(zshlextext, "]]")) {
 | |
| 		tok = DOUTBRACK;
 | |
| 		incond = 0;
 | |
| 	    } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
 | |
| 		tok = BANG;
 | |
| 	}
 | |
| 	inalmore = 0;
 | |
| 	if (zshlextext == copy)
 | |
| 	    zshlextext = tokstr;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| void
 | |
| zshlex_raw_add(int c)
 | |
| {
 | |
|     if (!lex_add_raw)
 | |
| 	return;
 | |
| 
 | |
|     *lexbuf_raw.ptr++ = c;
 | |
|     if (lexbuf_raw.siz == ++lexbuf_raw.len) {
 | |
| 	int newbsiz = lexbuf_raw.siz * 2;
 | |
| 
 | |
| 	tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
 | |
| 	lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
 | |
| 	memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
 | |
| 	lexbuf_raw.siz = newbsiz;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /**/
 | |
| void
 | |
| zshlex_raw_back(void)
 | |
| {
 | |
|     if (!lex_add_raw)
 | |
| 	return;
 | |
|     lexbuf_raw.ptr--;
 | |
|     lexbuf_raw.len--;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| int
 | |
| zshlex_raw_mark(int offset)
 | |
| {
 | |
|     if (!lex_add_raw)
 | |
| 	return 0;
 | |
|     return lexbuf_raw.len + offset;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| void
 | |
| zshlex_raw_back_to_mark(int mark)
 | |
| {
 | |
|     if (!lex_add_raw)
 | |
| 	return;
 | |
|     lexbuf_raw.ptr = tokstr_raw + mark;
 | |
|     lexbuf_raw.len = mark;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Skip (...) for command-style substitutions: $(...), <(...), >(...)
 | |
|  *
 | |
|  * In order to ensure we don't stop at closing parentheses with
 | |
|  * some other syntactic significance, we'll parse the input until
 | |
|  * we find an unmatched closing parenthesis.  However, we'll throw
 | |
|  * away the result of the parsing and just keep the string we've built
 | |
|  * up on the way.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| skipcomm(void)
 | |
| {
 | |
| #ifdef ZSH_OLD_SKIPCOMM
 | |
|     int pct = 1, c, start = 1;
 | |
| 
 | |
|     cmdpush(CS_CMDSUBST);
 | |
|     SETPARBEGIN
 | |
|     c = Inpar;
 | |
|     do {
 | |
| 	int iswhite;
 | |
| 	add(c);
 | |
| 	c = hgetc();
 | |
| 	if (itok(c) || lexstop)
 | |
| 	    break;
 | |
| 	iswhite = inblank(c);
 | |
| 	switch (c) {
 | |
| 	case '(':
 | |
| 	    pct++;
 | |
| 	    break;
 | |
| 	case ')':
 | |
| 	    pct--;
 | |
| 	    break;
 | |
| 	case '\\':
 | |
| 	    add(c);
 | |
| 	    c = hgetc();
 | |
| 	    break;
 | |
| 	case '\'': {
 | |
| 	    int strquote = lexbuf.ptr[-1] == '$';
 | |
| 	    add(c);
 | |
| 	    STOPHIST
 | |
| 	    while ((c = hgetc()) != '\'' && !lexstop) {
 | |
| 		if (c == '\\' && strquote) {
 | |
| 		    add(c);
 | |
| 		    c = hgetc();
 | |
| 		}
 | |
| 		add(c);
 | |
| 	    }
 | |
| 	    ALLOWHIST
 | |
| 	    break;
 | |
| 	}
 | |
| 	case '\"':
 | |
| 	    add(c);
 | |
| 	    while ((c = hgetc()) != '\"' && !lexstop)
 | |
| 		if (c == '\\') {
 | |
| 		    add(c);
 | |
| 		    add(hgetc());
 | |
| 		} else
 | |
| 		    add(c);
 | |
| 	    break;
 | |
| 	case '`':
 | |
| 	    add(c);
 | |
| 	    while ((c = hgetc()) != '`' && !lexstop)
 | |
| 		if (c == '\\')
 | |
| 		    add(c), add(hgetc());
 | |
| 		else
 | |
| 		    add(c);
 | |
| 	    break;
 | |
| 	case '#':
 | |
| 	    if (start) {
 | |
| 		add(c);
 | |
| 		while ((c = hgetc()) != '\n' && !lexstop)
 | |
| 		    add(c);
 | |
| 		iswhite = 1;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	}
 | |
| 	start = iswhite;
 | |
|     }
 | |
|     while (pct);
 | |
|     if (!lexstop)
 | |
| 	SETPAREND
 | |
|     cmdpop();
 | |
|     return lexstop;
 | |
| #else
 | |
|     char *new_tokstr;
 | |
|     int new_lexstop, new_lex_add_raw;
 | |
|     int save_infor = infor;
 | |
|     struct lexbufstate new_lexbuf;
 | |
| 
 | |
|     infor = 0;
 | |
|     cmdpush(CS_CMDSUBST);
 | |
|     SETPARBEGIN
 | |
|     add(Inpar);
 | |
| 
 | |
|     new_lex_add_raw = lex_add_raw + 1;
 | |
|     if (!lex_add_raw) {
 | |
| 	/*
 | |
| 	 * We'll combine the string so far with the input
 | |
| 	 * read in for the command substitution.  To do this
 | |
| 	 * we'll just propagate the current tokstr etc. as the
 | |
| 	 * variables used for adding raw input, and
 | |
| 	 * ensure we swap those for the real tokstr etc. at the end.
 | |
| 	 *
 | |
| 	 * However, we need to save and restore the rest of the
 | |
| 	 * lexical and parse state as we're effectively parsing
 | |
| 	 * an internal string.  Because we're still parsing it from
 | |
| 	 * the original input source (we have to --- we don't know
 | |
| 	 * when to stop inputting it otherwise and can't rely on
 | |
| 	 * the input being recoverable until we've read it) we need
 | |
| 	 * to keep the same history context.
 | |
| 	 */
 | |
| 	new_tokstr = tokstr;
 | |
| 	new_lexbuf = lexbuf;
 | |
| 
 | |
| 	/*
 | |
| 	 * If we're expanding an alias at this point, we need the whole
 | |
| 	 * remaining text as part of the string for the command in
 | |
| 	 * parentheses, so don't backtrack.  This is different from the
 | |
| 	 * usual case where the alias is fully within the command, where
 | |
| 	 * we want the unexpanded text so that it will be expanded
 | |
| 	 * again when the command in the parentheses is executed.
 | |
| 	 *
 | |
| 	 * I never wanted to be a software engineer, you know.
 | |
| 	 */
 | |
| 	if (inbufflags & INP_ALIAS)
 | |
| 	    inbufflags |= INP_RAW_KEEP;
 | |
| 	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
 | |
| 	hist_in_word(1);
 | |
|     } else {
 | |
| 	/*
 | |
| 	 * Set up for nested command subsitution, however
 | |
| 	 * we don't actually need the string until we get
 | |
| 	 * back to the top level and recover the lot.
 | |
| 	 * The $() body just appears empty.
 | |
| 	 *
 | |
| 	 * We do need to propagate the raw variables which would
 | |
| 	 * otherwise by cleared, though.
 | |
| 	 */
 | |
| 	new_tokstr = tokstr_raw;
 | |
| 	new_lexbuf = lexbuf_raw;
 | |
| 
 | |
| 	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
 | |
|     }
 | |
|     tokstr_raw = new_tokstr;
 | |
|     lexbuf_raw = new_lexbuf;
 | |
|     lex_add_raw = new_lex_add_raw;
 | |
|     /*
 | |
|      * Don't do any ZLE specials down here: they're only needed
 | |
|      * when we return the string from the recursive parse.
 | |
|      * (TBD: this probably means we should be initialising lexflags
 | |
|      * more consistently.)
 | |
|      *
 | |
|      * Note that in that case we're still using the ZLE line reading
 | |
|      * function at the history layer --- this is consistent with the
 | |
|      * intention of maintaining the history and input layers across
 | |
|      * the recursive parsing.
 | |
|      *
 | |
|      * Also turn off LEXFLAGS_NEWLINE because this is already skipping
 | |
|      * across the entire construct, and parse_event() needs embedded
 | |
|      * newlines to be "real" when looking for the OUTPAR token.
 | |
|      */
 | |
|     lexflags &= ~(LEXFLAGS_ZLE|LEXFLAGS_NEWLINE);
 | |
|     dbparens = 0;	/* restored by zcontext_restore_partial() */
 | |
| 
 | |
|     if (!parse_event(OUTPAR) || tok != OUTPAR) {
 | |
| 	if (strin) {
 | |
| 	    /*
 | |
| 	     * Get the rest of the string raw since we don't
 | |
| 	     * know where this token ends.
 | |
| 	     */
 | |
| 	    while (!lexstop)
 | |
| 		(void)ingetc();
 | |
| 	} else
 | |
| 	    lexstop = 1;
 | |
|     }
 | |
|      /* Outpar lexical token gets added in caller if present */
 | |
| 
 | |
|     /*
 | |
|      * We're going to keep the full raw input string
 | |
|      * as the current token string after popping the stack.
 | |
|      */
 | |
|     new_tokstr = tokstr_raw;
 | |
|     new_lexbuf = lexbuf_raw;
 | |
|     /*
 | |
|      * We're also going to propagate the lexical state:
 | |
|      * if we couldn't parse the command substitution we
 | |
|      * can't continue.
 | |
|      */
 | |
|     new_lexstop = lexstop;
 | |
| 
 | |
|     zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
 | |
| 
 | |
|     if (lex_add_raw) {
 | |
| 	/*
 | |
| 	 * Keep going, so retain the raw variables.
 | |
| 	 */
 | |
| 	tokstr_raw = new_tokstr;
 | |
| 	lexbuf_raw = new_lexbuf;
 | |
|     } else {
 | |
| 	if (!new_lexstop) {
 | |
| 	    /* Ignore the ')' added on input */
 | |
| 	    new_lexbuf.len--;
 | |
| 	    *--new_lexbuf.ptr = '\0';
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Convince the rest of lex.c we were examining a string
 | |
| 	 * all along.
 | |
| 	 */
 | |
| 	tokstr = new_tokstr;
 | |
| 	lexbuf = new_lexbuf;
 | |
| 	lexstop = new_lexstop;
 | |
| 	hist_in_word(0);
 | |
|     }
 | |
| 
 | |
|     if (!lexstop)
 | |
| 	SETPAREND
 | |
|     cmdpop();
 | |
|     infor = save_infor;
 | |
| 
 | |
|     return lexstop;
 | |
| #endif
 | |
| }
 |