1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-01-01 05:16:05 +01:00
zsh/Src/lex.c
Peter Stephenson 298919f43a users/28338: command substitution with alias edge case.
See added regression test.
2022-11-08 14:12:01 +00:00

2234 lines
49 KiB
C

/*
* lex.c - lexical analysis
*
* This file is part of zsh, the Z shell.
*
* Copyright (c) 1992-1997 Paul Falstad
* All rights reserved.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and to distribute modified versions of this software for any
* purpose, provided that the above copyright notice and the following
* two paragraphs appear in all copies of this software.
*
* In no event shall Paul Falstad or the Zsh Development Group be liable
* to any party for direct, indirect, special, incidental, or consequential
* damages arising out of the use of this software and its documentation,
* even if Paul Falstad and the Zsh Development Group have been advised of
* the possibility of such damage.
*
* Paul Falstad and the Zsh Development Group specifically disclaim any
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose. The software
* provided hereunder is on an "as is" basis, and Paul Falstad and the
* Zsh Development Group have no obligation to provide maintenance,
* support, updates, enhancements, or modifications.
*
*/
#include "zsh.mdh"
#include "lex.pro"
#define LEX_HEAP_SIZE (32)
/* tokens */
/**/
mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
/* parts of the current token */
/**/
char *zshlextext;
/**/
mod_export char *tokstr;
/**/
mod_export enum lextok tok;
/**/
mod_export int tokfd;
/*
* Line number at which the first character of a token was found.
* We always set this in gettok(), which is always called from
* zshlex() unless we have reached an error. So it is always
* valid when parsing. It is not useful during execution
* of the parsed structure.
*/
/**/
zlong toklineno;
/* lexical analyzer error flag */
/**/
mod_export int lexstop;
/* if != 0, this is the first line of the command */
/**/
mod_export int isfirstln;
/* if != 0, this is the first char of the command (not including white space) */
/**/
int isfirstch;
/* flag that an alias should be expanded after expansion ending in space */
/**/
int inalmore;
/*
* Don't do spelling correction.
* Bit 1 is only valid for the current word. It's
* set when we detect a lookahead that stops the word from
* needing correction.
*/
/**/
int nocorrect;
/*
* TBD: the following exported variables are part of the non-interface
* with ZLE for completion. They are poorly named and the whole
* scheme is incredibly brittle. One piece of robustness is applied:
* the variables are only set if LEXFLAGS_ZLE is set. Improvements
* should therefore concentrate on areas with this flag set.
*
* Cursor position and line length in zle when the line is
* metafied for access from the main shell.
*/
/**/
mod_export int zlemetacs, zlemetall;
/* inwhat says what exactly we are in *
* (its value is one of the IN_* things). */
/**/
mod_export int inwhat;
/* 1 if x added to complete in a blank between words */
/**/
mod_export int addedx;
/* wb and we hold the beginning/end position of the word we are completing. */
/**/
mod_export int wb, we;
/**/
mod_export int wordbeg;
/**/
mod_export int parbegin;
/**/
mod_export int parend;
/* 1 if aliases should not be expanded */
/**/
mod_export int noaliases;
/*
* If non-zero, we are parsing a line sent to use by the editor, or some
* other string that's not part of standard command input (e.g. eval is
* part of normal command input).
*
* Set of bits from LEXFLAGS_*.
*
* Note that although it is passed into the lexer as an input, the
* lexer can set it to zero after finding the word it's searching for.
* This only happens if the line being parsed actually does come from
* ZLE, and hence the bit LEXFLAGS_ZLE is set.
*/
/**/
mod_export int lexflags;
/* don't recognize comments */
/**/
mod_export int nocomments;
/* add raw input characters while parsing command substitution */
/**/
int lex_add_raw;
/* variables associated with the above */
static char *tokstr_raw;
static struct lexbufstate lexbuf_raw;
/* text of punctuation tokens */
/**/
mod_export char *tokstrings[WHILE + 1] = {
NULL, /* NULLTOK 0 */
";", /* SEPER */
"\\n", /* NEWLIN */
";", /* SEMI */
";;", /* DSEMI */
"&", /* AMPER 5 */
"(", /* INPAR */
")", /* OUTPAR */
"||", /* DBAR */
"&&", /* DAMPER */
">", /* OUTANG 10 */
">|", /* OUTANGBANG */
">>", /* DOUTANG */
">>|", /* DOUTANGBANG */
"<", /* INANG */
"<>", /* INOUTANG 15 */
"<<", /* DINANG */
"<<-", /* DINANGDASH */
"<&", /* INANGAMP */
">&", /* OUTANGAMP */
"&>", /* AMPOUTANG 20 */
"&>|", /* OUTANGAMPBANG */
">>&", /* DOUTANGAMP */
">>&|", /* DOUTANGAMPBANG */
"<<<", /* TRINANG */
"|", /* BAR 25 */
"|&", /* BARAMP */
"()", /* INOUTPAR */
"((", /* DINPAR */
"))", /* DOUTPAR */
"&|", /* AMPERBANG 30 */
";&", /* SEMIAMP */
";|", /* SEMIBAR */
};
/* lexical state */
static int dbparens;
static struct lexbufstate lexbuf = { NULL, 256, 0 };
/* save lexical context */
/**/
void
lex_context_save(struct lex_stack *ls, int toplevel)
{
(void)toplevel;
ls->dbparens = dbparens;
ls->isfirstln = isfirstln;
ls->isfirstch = isfirstch;
ls->lexflags = lexflags;
ls->tok = tok;
ls->tokstr = tokstr;
ls->zshlextext = zshlextext;
ls->lexbuf = lexbuf;
ls->lex_add_raw = lex_add_raw;
ls->tokstr_raw = tokstr_raw;
ls->lexbuf_raw = lexbuf_raw;
ls->lexstop = lexstop;
ls->toklineno = toklineno;
tokstr = zshlextext = lexbuf.ptr = NULL;
lexbuf.siz = 256;
tokstr_raw = lexbuf_raw.ptr = NULL;
lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
}
/* restore lexical context */
/**/
mod_export void
lex_context_restore(const struct lex_stack *ls, int toplevel)
{
(void)toplevel;
dbparens = ls->dbparens;
isfirstln = ls->isfirstln;
isfirstch = ls->isfirstch;
lexflags = ls->lexflags;
tok = ls->tok;
tokstr = ls->tokstr;
zshlextext = ls->zshlextext;
lexbuf = ls->lexbuf;
lex_add_raw = ls->lex_add_raw;
tokstr_raw = ls->tokstr_raw;
lexbuf_raw = ls->lexbuf_raw;
lexstop = ls->lexstop;
toklineno = ls->toklineno;
}
/**/
void
zshlex(void)
{
if (tok == LEXERR)
return;
do {
if (inrepeat_)
++inrepeat_;
if (inrepeat_ == 3 && (isset(SHORTLOOPS) || isset(SHORTREPEAT)))
incmdpos = 1;
tok = gettok();
} while (tok != ENDINPUT && exalias());
nocorrect &= 1;
if (tok == NEWLIN || tok == ENDINPUT) {
while (hdocs) {
struct heredocs *next = hdocs->next;
char *doc, *munged_term;
hwbegin(0);
cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
munged_term = dupstring(hdocs->str);
STOPHIST
doc = gethere(&munged_term, hdocs->type);
ALLOWHIST
cmdpop();
hwend();
if (!doc) {
zerr("here document too large");
while (hdocs) {
next = hdocs->next;
zfree(hdocs, sizeof(struct heredocs));
hdocs = next;
}
tok = LEXERR;
break;
}
setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
munged_term);
zfree(hdocs, sizeof(struct heredocs));
hdocs = next;
}
}
if (tok != NEWLIN)
isnewlin = 0;
else
isnewlin = (inbufct) ? -1 : 1;
if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
tok = SEPER;
}
/**/
mod_export void
ctxtlex(void)
{
static int oldpos;
zshlex();
switch (tok) {
case SEPER:
case NEWLIN:
case SEMI:
case DSEMI:
case SEMIAMP:
case SEMIBAR:
case AMPER:
case AMPERBANG:
case INPAR:
case INBRACE:
case DBAR:
case DAMPER:
case BAR:
case BARAMP:
case INOUTPAR:
case DOLOOP:
case THEN:
case ELIF:
case ELSE:
case DOUTBRACK:
incmdpos = 1;
break;
case STRING:
case TYPESET:
/* case ENVSTRING: */
case ENVARRAY:
case OUTPAR:
case CASE:
case DINBRACK:
incmdpos = 0;
break;
default:
/* nothing to do, keep compiler happy */
break;
}
if (tok != DINPAR)
infor = tok == FOR ? 2 : 0;
if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
inredir = 1;
oldpos = incmdpos;
incmdpos = 0;
} else if (inredir) {
incmdpos = oldpos;
inredir = 0;
}
}
#define LX1_BKSLASH 0
#define LX1_COMMENT 1
#define LX1_NEWLIN 2
#define LX1_SEMI 3
#define LX1_AMPER 5
#define LX1_BAR 6
#define LX1_INPAR 7
#define LX1_OUTPAR 8
#define LX1_INANG 13
#define LX1_OUTANG 14
#define LX1_OTHER 15
#define LX2_BREAK 0
#define LX2_OUTPAR 1
#define LX2_BAR 2
#define LX2_STRING 3
#define LX2_INBRACK 4
#define LX2_OUTBRACK 5
#define LX2_TILDE 6
#define LX2_INPAR 7
#define LX2_INBRACE 8
#define LX2_OUTBRACE 9
#define LX2_OUTANG 10
#define LX2_INANG 11
#define LX2_EQUALS 12
#define LX2_BKSLASH 13
#define LX2_QUOTE 14
#define LX2_DQUOTE 15
#define LX2_BQUOTE 16
#define LX2_COMMA 17
#define LX2_DASH 18
#define LX2_BANG 19
#define LX2_OTHER 20
#define LX2_META 21
static unsigned char lexact1[256], lexact2[256], lextok2[256];
/**/
void
initlextabs(void)
{
int t0;
static char *lx1 = "\\q\n;!&|(){}[]<>";
static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
for (t0 = 0; t0 != 256; t0++) {
lexact1[t0] = LX1_OTHER;
lexact2[t0] = LX2_OTHER;
lextok2[t0] = t0;
}
for (t0 = 0; lx1[t0]; t0++)
lexact1[(int)lx1[t0]] = t0;
for (t0 = 0; lx2[t0]; t0++)
lexact2[(int)lx2[t0]] = t0;
lexact2['&'] = LX2_BREAK;
lexact2[STOUC(Meta)] = LX2_META;
lextok2['*'] = Star;
lextok2['?'] = Quest;
lextok2['{'] = Inbrace;
lextok2['['] = Inbrack;
lextok2['$'] = String;
lextok2['~'] = Tilde;
lextok2['#'] = Pound;
lextok2['^'] = Hat;
}
/* initialize lexical state */
/**/
void
lexinit(void)
{
nocorrect = dbparens = lexstop = 0;
tok = ENDINPUT;
}
/* add a char to the string buffer */
/**/
void
add(int c)
{
*lexbuf.ptr++ = c;
if (lexbuf.siz == ++lexbuf.len) {
int newbsiz = lexbuf.siz * 2;
if (newbsiz > inbufct && inbufct > lexbuf.siz)
newbsiz = inbufct;
tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
lexbuf.ptr = tokstr + lexbuf.len;
/* len == bsiz, so bptr is at the start of newly allocated memory */
memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
lexbuf.siz = newbsiz;
}
}
#define SETPARBEGIN { \
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
zlemetacs >= zlemetall+1-inbufct) \
parbegin = inbufct; \
}
#define SETPAREND { \
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
parbegin != -1 && parend == -1) { \
if (zlemetacs >= zlemetall + 1 - inbufct) \
parbegin = -1; \
else \
parend = inbufct; \
} \
}
enum {
CMD_OR_MATH_CMD,
CMD_OR_MATH_MATH,
CMD_OR_MATH_ERR
};
/*
* Return one of the above. If it couldn't be
* parsed as math, but there was no gross error, it's a command.
*/
static int
cmd_or_math(int cs_type)
{
int oldlen = lexbuf.len;
int c;
int oinflags = inbufflags;
cmdpush(cs_type);
inbufflags |= INP_APPEND;
c = dquote_parse(')', 0);
if (!(oinflags & INP_APPEND))
inbufflags &= ~INP_APPEND;
cmdpop();
*lexbuf.ptr = '\0';
if (!c) {
/* Successfully parsed, see if it was math */
c = hgetc();
if (c == ')')
return CMD_OR_MATH_MATH; /* yes */
hungetc(c);
lexstop = 0;
c = ')';
} else if (lexstop) {
/* we haven't got anything to unget */
return CMD_OR_MATH_ERR;
}
/* else unsuccessful: unget the whole thing */
hungetc(c);
lexstop = 0;
while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) {
lexbuf.len--;
hungetc(itok(*--lexbuf.ptr) ?
ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
}
if (errflag)
return CMD_OR_MATH_ERR;
hungetc('(');
return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
}
/*
* Parse either a $(( ... )) or a $(...)
* Return the same as cmd_or_math().
*/
static int
cmd_or_math_sub(void)
{
int c = hgetc(), ret;
if (c == '\\') {
c = hgetc();
if (c != '\n') {
hungetc(c);
hungetc('\\');
lexstop = 0;
return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
}
c = hgetc();
}
if (c == '(') {
int lexpos = (int)(lexbuf.ptr - tokstr);
add(Inpar);
add('(');
if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
tokstr[lexpos] = Inparmath;
add(')');
return CMD_OR_MATH_MATH;
}
if (ret == CMD_OR_MATH_ERR)
return CMD_OR_MATH_ERR;
lexbuf.ptr -= 2;
lexbuf.len -= 2;
} else {
hungetc(c);
lexstop = 0;
}
return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
}
/* Check whether we're looking at valid numeric globbing syntax *
* (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". *
* Leaves the input in the same place, returning 0 or 1. */
/**/
static int
isnumglob(void)
{
int c, ec = '-', ret = 0;
int tbs = 256, n = 0;
char *tbuf = (char *)zalloc(tbs);
while(1) {
c = hgetc();
if(lexstop) {
lexstop = 0;
break;
}
tbuf[n++] = c;
if(!idigit(c)) {
if(c != ec)
break;
if(ec == '>') {
ret = 1;
break;
}
ec = '>';
}
if(n == tbs)
tbuf = (char *)realloc(tbuf, tbs *= 2);
}
while(n--)
hungetc(tbuf[n]);
zfree(tbuf, tbs);
return ret;
}
/**/
static enum lextok
gettok(void)
{
int c, d;
int peekfd = -1;
enum lextok peek;
beginning:
tokstr = NULL;
while (iblank(c = hgetc()) && !lexstop);
toklineno = lineno;
if (lexstop)
return (errflag) ? LEXERR : ENDINPUT;
isfirstln = 0;
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS))
wordbeg = inbufct - (qbang && c == bangchar);
hwbegin(-1-(qbang && c == bangchar));
/* word includes the last character read and possibly \ before ! */
if (dbparens) {
lexbuf.len = 0;
lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
hungetc(c);
cmdpush(CS_MATH);
c = dquote_parse(infor ? ';' : ')', 0);
cmdpop();
*lexbuf.ptr = '\0';
if (!c && infor) {
infor--;
return DINPAR;
}
if (c || (c = hgetc()) != ')') {
hungetc(c);
return LEXERR;
}
dbparens = 0;
return DOUTPAR;
} else if (idigit(c)) { /* handle 1< foo */
d = hgetc();
if(d == '&') {
d = hgetc();
if(d == '>') {
peekfd = c - '0';
hungetc('>');
c = '&';
} else {
hungetc(d);
lexstop = 0;
hungetc('&');
}
} else if (d == '>' || d == '<') {
peekfd = c - '0';
c = d;
} else {
hungetc(d);
lexstop = 0;
}
}
/* chars in initial position in word */
/*
* Handle comments. There are some special cases when this
* is not normal command input: lexflags implies we are examining
* a line lexically without it being used for normal command input.
*/
if (c == hashchar && !nocomments &&
(isset(INTERACTIVECOMMENTS) ||
((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
(!interact || unset(SHINSTDIN) || strin)))) {
/* History is handled here to prevent extra *
* newlines being inserted into the history. */
if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
lexbuf.len = 0;
lexbuf.ptr = tokstr =
(char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
add(c);
}
hwabort();
while ((c = ingetc()) != '\n' && !lexstop) {
hwaddc(c);
addtoline(c);
if (lexflags & LEXFLAGS_COMMENTS_KEEP)
add(c);
}
if (errflag)
peek = LEXERR;
else {
if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
*lexbuf.ptr = '\0';
if (!lexstop)
hungetc(c);
peek = STRING;
} else {
hwend();
hwbegin(0);
hwaddc('\n');
addtoline('\n');
/*
* If splitting a line and removing comments,
* we don't want a newline token since it's
* treated specially.
*/
if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
peek = ENDINPUT;
else
peek = NEWLIN;
}
}
return peek;
}
switch (lexact1[STOUC(c)]) {
case LX1_BKSLASH:
d = hgetc();
if (d == '\n')
goto beginning;
hungetc(d);
lexstop = 0;
break;
case LX1_NEWLIN:
return NEWLIN;
case LX1_SEMI:
d = hgetc();
if(d == ';')
return DSEMI;
else if(d == '&')
return SEMIAMP;
else if (d == '|')
return SEMIBAR;
hungetc(d);
lexstop = 0;
return SEMI;
case LX1_AMPER:
d = hgetc();
if (d == '&')
return DAMPER;
else if (d == '!' || d == '|')
return AMPERBANG;
else if (d == '>') {
tokfd = peekfd;
d = hgetc();
if (d == '!' || d == '|')
return OUTANGAMPBANG;
else if (d == '>') {
d = hgetc();
if (d == '!' || d == '|')
return DOUTANGAMPBANG;
hungetc(d);
lexstop = 0;
return DOUTANGAMP;
}
hungetc(d);
lexstop = 0;
return AMPOUTANG;
}
hungetc(d);
lexstop = 0;
return AMPER;
case LX1_BAR:
d = hgetc();
if (d == '|' && !incasepat)
return DBAR;
else if (d == '&')
return BARAMP;
hungetc(d);
lexstop = 0;
return BAR;
case LX1_INPAR:
d = hgetc();
if (d == '(') {
if (infor) {
dbparens = 1;
return DINPAR;
}
if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
lexbuf.len = 0;
lexbuf.ptr = tokstr = (char *)
hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
switch (cmd_or_math(CS_MATH)) {
case CMD_OR_MATH_MATH:
return DINPAR;
case CMD_OR_MATH_CMD:
/*
* Not math, so we don't return the contents
* as a string in this case.
*/
tokstr = NULL;
return INPAR;
case CMD_OR_MATH_ERR:
/*
* LEXFLAGS_ACTIVE means we came from bufferwords(),
* so we treat as an incomplete math expression
*/
if (lexflags & LEXFLAGS_ACTIVE)
tokstr = dyncat("((", tokstr ? tokstr : "");
/* fall through */
default:
return LEXERR;
}
}
} else if (d == ')')
return INOUTPAR;
hungetc(d);
lexstop = 0;
if (!(isset(SHGLOB) || incond == 1 || incmdpos))
break;
return INPAR;
case LX1_OUTPAR:
return OUTPAR;
case LX1_INANG:
d = hgetc();
if (d == '(') {
hungetc(d);
lexstop = 0;
unpeekfd:
if(peekfd != -1) {
hungetc(c);
c = '0' + peekfd;
}
break;
}
if (d == '>') {
peek = INOUTANG;
} else if (d == '<') {
int e = hgetc();
if (e == '(') {
hungetc(e);
hungetc(d);
peek = INANG;
} else if (e == '<')
peek = TRINANG;
else if (e == '-')
peek = DINANGDASH;
else {
hungetc(e);
lexstop = 0;
peek = DINANG;
}
} else if (d == '&') {
peek = INANGAMP;
} else {
hungetc(d);
if(isnumglob())
goto unpeekfd;
peek = INANG;
}
tokfd = peekfd;
return peek;
case LX1_OUTANG:
d = hgetc();
if (d == '(') {
hungetc(d);
goto unpeekfd;
} else if (d == '&') {
d = hgetc();
if (d == '!' || d == '|')
peek = OUTANGAMPBANG;
else {
hungetc(d);
lexstop = 0;
peek = OUTANGAMP;
}
} else if (d == '!' || d == '|')
peek = OUTANGBANG;
else if (d == '>') {
d = hgetc();
if (d == '&') {
d = hgetc();
if (d == '!' || d == '|')
peek = DOUTANGAMPBANG;
else {
hungetc(d);
lexstop = 0;
peek = DOUTANGAMP;
}
} else if (d == '!' || d == '|')
peek = DOUTANGBANG;
else if (d == '(') {
hungetc(d);
hungetc('>');
peek = OUTANG;
} else {
hungetc(d);
lexstop = 0;
peek = DOUTANG;
if (isset(HISTALLOWCLOBBER))
hwaddc('|');
}
} else {
hungetc(d);
lexstop = 0;
peek = OUTANG;
if (!incond && isset(HISTALLOWCLOBBER))
hwaddc('|');
}
tokfd = peekfd;
return peek;
}
/* we've started a string, now get the *
* rest of it, performing tokenization */
return gettokstr(c, 0);
}
/*
* Get the remains of a token string. This has two uses.
* When called from gettok(), with sub = 0, we have already identified
* any interesting initial character and want to get the rest of
* what we now know is a string. However, the string may still include
* metacharacters and potentially substitutions.
*
* When called from parse_subst_string() with sub = 1, we are not
* fully parsing a command line, merely tokenizing a string.
* In this case we always add characters to the parsed string
* unless there is a parse error.
*/
/**/
static enum lextok
gettokstr(int c, int sub)
{
int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
int intpos = 1, in_brace_param = 0;
int inquote, unmatched = 0;
enum lextok peek;
#ifdef DEBUG
int ocmdsp = cmdsp;
#endif
peek = STRING;
if (!sub) {
lexbuf.len = 0;
lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
}
for (;;) {
int act;
int e;
int inbl = inblank(c);
if (fdpar && !inbl && c != ')')
fdpar = 0;
if (inbl && !in_brace_param && !pct)
act = LX2_BREAK;
else {
act = lexact2[STOUC(c)];
c = lextok2[STOUC(c)];
}
switch (act) {
case LX2_BREAK:
if (!in_brace_param && !sub)
goto brk;
break;
case LX2_META:
c = hgetc();
#ifdef DEBUG
if (lexstop) {
fputs("BUG: input terminated by Meta\n", stderr);
fflush(stderr);
goto brk;
}
#endif
add(Meta);
break;
case LX2_OUTPAR:
if (fdpar) {
/* this is a single word `( )', treat as INOUTPAR */
add(c);
*lexbuf.ptr = '\0';
return INOUTPAR;
}
if ((sub || in_brace_param) && isset(SHGLOB))
break;
if (!in_brace_param && !pct--) {
if (sub) {
pct = 0;
break;
} else
goto brk;
}
c = Outpar;
break;
case LX2_BAR:
if (!pct && !in_brace_param) {
if (sub)
break;
else
goto brk;
}
if (unset(SHGLOB) || (!sub && !in_brace_param))
c = Bar;
break;
case LX2_STRING:
e = hgetc();
if (e == '\\') {
e = hgetc();
if (e != '\n') {
hungetc(e);
hungetc('\\');
lexstop = 0;
break;
}
e = hgetc();
}
if (e == '[') {
cmdpush(CS_MATHSUBST);
add(String);
add(Inbrack);
c = dquote_parse(']', sub);
cmdpop();
if (c) {
peek = LEXERR;
goto brk;
}
c = Outbrack;
} else if (e == '(') {
add(String);
switch (cmd_or_math_sub()) {
case CMD_OR_MATH_CMD:
c = Outpar;
break;
case CMD_OR_MATH_MATH:
c = Outparmath;
break;
default:
peek = LEXERR;
goto brk;
}
} else {
if (e == '{') {
add(c);
c = Inbrace;
++bct;
cmdpush(CS_BRACEPAR);
if (!in_brace_param) {
if ((in_brace_param = bct))
seen_brct = 0;
}
} else {
hungetc(e);
lexstop = 0;
}
}
break;
case LX2_INBRACK:
if (!in_brace_param) {
brct++;
seen_brct = 1;
}
c = Inbrack;
break;
case LX2_OUTBRACK:
if (!in_brace_param)
brct--;
if (brct < 0)
brct = 0;
c = Outbrack;
break;
case LX2_INPAR:
if (isset(SHGLOB)) {
if (sub || in_brace_param)
break;
if (incasepat > 0 && !lexbuf.len)
return INPAR;
if (!isset(KSHGLOB) && lexbuf.len)
goto brk;
}
if (!in_brace_param) {
if (!sub) {
e = hgetc();
hungetc(e);
lexstop = 0;
/* For command words, parentheses are only
* special at the start. But now we're tokenising
* the remaining string. So I don't see what
* the old incmdpos test here is for.
* pws 1999/6/8
*
* Oh, no.
* func1( )
* is a valid function definition in [k]sh. The best
* thing we can do, without really nasty lookahead tricks,
* is break if we find a blank after a parenthesis. At
* least this can't happen inside braces or brackets. We
* only allow this with SHGLOB (set for both sh and ksh).
*
* Things like `print @( |foo)' should still
* work, because [k]sh don't allow multiple words
* in a function definition, so we only do this
* in command position.
* pws 1999/6/14
*/
if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
!brct && !intpos && incmdpos)) {
/*
* Either a () token, or a command word with
* something suspiciously like a ksh function
* definition.
* The current word isn't spellcheckable.
*/
nocorrect |= 2;
goto brk;
}
}
/*
* This also handles the [k]sh `foo( )' function definition.
* Maintain a variable fdpar, set as long as a single set of
* parentheses contains only space. Then if we get to the
* closing parenthesis and it is still set, we can assume we
* have a function definition. Only do this at the start of
* the word, since the (...) must be a separate token.
*/
if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
fdpar = 1;
}
c = Inpar;
break;
case LX2_INBRACE:
if (isset(IGNOREBRACES) || sub)
c = '{';
else {
if (!lexbuf.len && incmdpos) {
add('{');
*lexbuf.ptr = '\0';
return STRING;
}
if (in_brace_param) {
cmdpush(CS_BRACE);
}
bct++;
}
break;
case LX2_OUTBRACE:
if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
break;
if (!bct)
break;
if (in_brace_param) {
cmdpop();
}
if (bct-- == in_brace_param)
in_brace_param = 0;
c = Outbrace;
break;
case LX2_COMMA:
if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
c = Comma;
break;
case LX2_OUTANG:
if (in_brace_param || sub)
break;
e = hgetc();
if (e != '(') {
hungetc(e);
lexstop = 0;
goto brk;
}
add(OutangProc);
if (skipcomm()) {
peek = LEXERR;
goto brk;
}
c = Outpar;
break;
case LX2_INANG:
if (isset(SHGLOB) && sub)
break;
e = hgetc();
if (!(in_brace_param || sub) && e == '(') {
add(Inang);
if (skipcomm()) {
peek = LEXERR;
goto brk;
}
c = Outpar;
break;
}
hungetc(e);
if(isnumglob()) {
add(Inang);
while ((c = hgetc()) != '>')
add(c);
c = Outang;
break;
}
lexstop = 0;
if (in_brace_param || sub)
break;
goto brk;
case LX2_EQUALS:
if (!sub) {
if (intpos) {
e = hgetc();
if (e != '(') {
hungetc(e);
lexstop = 0;
c = Equals;
} else {
add(Equals);
if (skipcomm()) {
peek = LEXERR;
goto brk;
}
c = Outpar;
}
} else if (peek != ENVSTRING &&
(incmdpos || intypeset) && !bct && !brct) {
char *t = tokstr;
if (idigit(*t))
while (++t < lexbuf.ptr && idigit(*t));
else {
int sav = *lexbuf.ptr;
*lexbuf.ptr = '\0';
t = itype_end(t, IIDENT, 0);
if (t < lexbuf.ptr) {
skipparens(Inbrack, Outbrack, &t);
} else {
*lexbuf.ptr = sav;
}
}
if (*t == '+')
t++;
if (t == lexbuf.ptr) {
e = hgetc();
if (e == '(') {
*lexbuf.ptr = '\0';
return ENVARRAY;
}
hungetc(e);
lexstop = 0;
peek = ENVSTRING;
intpos = 2;
} else
c = Equals;
} else
c = Equals;
}
break;
case LX2_BKSLASH:
c = hgetc();
if (c == '\n') {
c = hgetc();
if (!lexstop)
continue;
} else {
add(Bnull);
if (c == STOUC(Meta)) {
c = hgetc();
#ifdef DEBUG
if (lexstop) {
fputs("BUG: input terminated by Meta\n", stderr);
fflush(stderr);
goto brk;
}
#endif
add(Meta);
}
}
if (lexstop)
goto brk;
break;
case LX2_QUOTE: {
int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
add(Snull);
cmdpush(CS_QUOTE);
for (;;) {
STOPHIST
while ((c = hgetc()) != '\'' && !lexstop) {
if (strquote && c == '\\') {
c = hgetc();
if (lexstop)
break;
/*
* Mostly we don't need to do anything special
* with escape backslashes or closing quotes
* inside $'...'; however in completion we
* need to be able to strip multiple backslashes
* neatly.
*/
if (c == '\\' || c == '\'')
add(Bnull);
else
add('\\');
} else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
if (lexbuf.ptr[-1] == '\\')
lexbuf.ptr--, lexbuf.len--;
else
break;
}
add(c);
}
ALLOWHIST
if (c != '\'') {
unmatched = '\'';
/* Not an error when called from bufferwords() */
if (!(lexflags & LEXFLAGS_ACTIVE))
peek = LEXERR;
cmdpop();
goto brk;
}
e = hgetc();
if (e != '\'' || unset(RCQUOTES) || strquote)
break;
add(c);
}
cmdpop();
hungetc(e);
lexstop = 0;
c = Snull;
break;
}
case LX2_DQUOTE:
add(Dnull);
cmdpush(CS_DQUOTE);
c = dquote_parse('"', sub);
cmdpop();
if (c) {
unmatched = '"';
/* Not an error when called from bufferwords() */
if (!(lexflags & LEXFLAGS_ACTIVE))
peek = LEXERR;
goto brk;
}
c = Dnull;
break;
case LX2_BQUOTE:
add(Tick);
cmdpush(CS_BQUOTE);
SETPARBEGIN
inquote = 0;
while ((c = hgetc()) != '`' && !lexstop) {
if (c == '\\') {
c = hgetc();
if (c != '\n') {
add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
add(c);
}
else if (!sub && isset(CSHJUNKIEQUOTES))
add(c);
} else {
if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
break;
}
add(c);
if (c == '\'') {
if ((inquote = !inquote))
STOPHIST
else
ALLOWHIST
}
}
}
if (inquote)
ALLOWHIST
cmdpop();
if (c != '`') {
unmatched = '`';
/* Not an error when called from bufferwords() */
if (!(lexflags & LEXFLAGS_ACTIVE))
peek = LEXERR;
goto brk;
}
c = Tick;
SETPAREND
break;
case LX2_DASH:
/*
* - shouldn't be treated as a special character unless
* we're in a pattern. Unfortunately, working out for
* sure in complicated expressions whether we're in a
* pattern is tricky. So we'll make it special and
* turn it back any time we don't need it special.
* This is not ideal as it's a lot of work.
*/
c = Dash;
break;
case LX2_BANG:
/*
* Same logic as Dash, for ! to perform negation in range.
*/
if (seen_brct)
c = Bang;
else
c = '!';
}
add(c);
c = hgetc();
if (intpos)
intpos--;
if (lexstop)
break;
}
brk:
if (errflag) {
if (in_brace_param) {
while(bct-- >= in_brace_param)
cmdpop();
}
return LEXERR;
}
hungetc(c);
if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
zerr("unmatched %c", unmatched);
if (in_brace_param) {
while(bct-- >= in_brace_param)
cmdpop();
zerr("closing brace expected");
} else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
peek == STRING && lexbuf.ptr[-1] == '}' &&
lexbuf.ptr[-2] != Bnull) {
/* hack to get {foo} command syntax work */
/*
* Alias expansion when parsing command substitution means that
* the case for raw lexical analysis may not be the same.
* (Just go with it, OK?)
*/
int lar = lex_add_raw;
lex_add_raw = lexbuf_raw.len > 0 && lexbuf_raw.ptr[-1] == '}';
lexbuf.ptr--;
lexbuf.len--;
lexstop = 0;
hungetc('}');
lex_add_raw = lar;
}
*lexbuf.ptr = '\0';
DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
return peek;
}
/*
* Parse input as if in double quotes.
* endchar is the end character to expect.
* sub has got something to do with whether we are doing quoted substitution.
* Return non-zero for error (character to unget), else zero
*/
/**/
static int
dquote_parse(char endchar, int sub)
{
int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
int c;
int math = endchar == ')' || endchar == ']' || infor;
int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
while (((c = hgetc()) != endchar || bct ||
(math && ((pct > 0) || (brct > 0))) ||
intick) && !lexstop) {
cont:
switch (c) {
case '\\':
c = hgetc();
if (c != '\n') {
if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
c == endchar || c == '`' ||
(endchar == ']' && (c == '[' || c == ']' ||
c == '(' || c == ')' ||
c == '{' || c == '}' ||
(c == '"' && sub))))
add(Bnull);
else {
/* lexstop is implicitly handled here */
add('\\');
goto cont;
}
} else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
continue;
break;
case '\n':
err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
break;
case '$':
if (intick)
break;
c = hgetc();
if (c == '(') {
add(Qstring);
switch (cmd_or_math_sub()) {
case CMD_OR_MATH_CMD:
c = Outpar;
break;
case CMD_OR_MATH_MATH:
c = Outparmath;
break;
default:
err = 1;
break;
}
} else if (c == '[') {
add(String);
add(Inbrack);
cmdpush(CS_MATHSUBST);
err = dquote_parse(']', sub);
cmdpop();
c = Outbrack;
} else if (c == '{') {
add(Qstring);
c = Inbrace;
cmdpush(CS_BRACEPAR);
bct++;
} else if (c == '$')
add(Qstring);
else {
hungetc(c);
lexstop = 0;
c = Qstring;
}
break;
case '}':
if (intick || !bct)
break;
c = Outbrace;
bct--;
cmdpop();
break;
case '`':
c = Qtick;
if (intick == 2)
ALLOWHIST
if ((intick = !intick)) {
SETPARBEGIN
cmdpush(CS_BQUOTE);
} else {
SETPAREND
cmdpop();
}
break;
case '\'':
if (!intick)
break;
if (intick == 1)
intick = 2, STOPHIST
else
intick = 1, ALLOWHIST
break;
case '(':
if (!math || !bct)
pct++;
break;
case ')':
if (!math || !bct)
err = (!pct-- && math);
break;
case '[':
if (!math || !bct)
brct++;
break;
case ']':
if (!math || !bct)
err = (!brct-- && math);
break;
case '"':
if (intick || (endchar != '"' && !bct))
break;
if (bct) {
add(Dnull);
cmdpush(CS_DQUOTE);
err = dquote_parse('"', sub);
cmdpop();
c = Dnull;
} else
err = 1;
break;
}
if (err || lexstop)
break;
add(c);
}
if (intick == 2)
ALLOWHIST
if (intick) {
cmdpop();
}
while (bct--)
cmdpop();
if (lexstop)
err = intick || endchar || err;
else if (err == 1) {
/*
* TODO: as far as I can see, this hack is used in gettokstr()
* to hungetc() a character on an error. However, I don't
* understand what that actually gets us, and we can't guarantee
* it's a character anyway, because of the previous test.
*
* We use the same feature in cmd_or_math where we actually do
* need to unget if we decide it's really a command substitution.
* We try to handle the other case by testing for lexstop.
*/
err = c;
}
if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
inwhat = IN_MATH;
return err;
}
/*
* Tokenize a string given in s. Parsing is done as in double
* quotes. This is usually called before singsub().
*
* parsestr() is noisier, reporting an error if the parse failed.
*
* On entry, *s must point to a string allocated from the stack of
* exactly the right length, i.e. strlen(*s) + 1, as the string
* is used as the lexical token string whose memory management
* demands this. Usually the input string will therefore be
* the result of an immediately preceding dupstring().
*/
/**/
mod_export int
parsestr(char **s)
{
int err;
if ((err = parsestrnoerr(s))) {
untokenize(*s);
if (!(errflag & ERRFLAG_INT)) {
if (err > 32 && err < 127)
zerr("parse error near `%c'", err);
else
zerr("parse error");
tok = LEXERR;
}
}
return err;
}
/**/
mod_export int
parsestrnoerr(char **s)
{
int l = strlen(*s), err;
zcontext_save();
untokenize(*s);
inpush(dupstring_wlen(*s, l), 0, NULL);
strinbeg(0);
lexbuf.len = 0;
lexbuf.ptr = tokstr = *s;
lexbuf.siz = l + 1;
err = dquote_parse('\0', 1);
if (tokstr)
*s = tokstr;
*lexbuf.ptr = '\0';
strinend();
inpop();
DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
zcontext_restore();
return err;
}
/*
* Parse a subscript in string s.
* sub is passed down to dquote_parse().
* endchar is the final character.
* Return the next character, or NULL.
*/
/**/
mod_export char *
parse_subscript(char *s, int sub, int endchar)
{
int l = strlen(s), err, toklen;
char *t;
if (!*s || *s == endchar)
return 0;
zcontext_save();
untokenize(t = dupstring_wlen(s, l));
inpush(t, 0, NULL);
strinbeg(0);
/*
* Warning to Future Generations:
*
* This way of passing the subscript through the lexer is brittle.
* Code above this for several layers assumes that when we tokenise
* the input it goes into the same place as the original string.
* However, the lexer may overwrite later bits of the string or
* reallocate it, in particular when expanding aliaes. To get
* around this, we copy the string and then copy it back. This is a
* bit more robust but still relies on the underlying assumption of
* length preservation.
*/
lexbuf.len = 0;
lexbuf.ptr = tokstr = dupstring_wlen(s, l);
lexbuf.siz = l + 1;
err = dquote_parse(endchar, sub);
toklen = (int)(lexbuf.ptr - tokstr);
DPUTS(toklen > l, "Bad length for parsed subscript");
memcpy(s, tokstr, toklen);
if (err) {
char *strend = s + toklen;
err = *strend;
*strend = '\0';
untokenize(s);
*strend = err;
s = NULL;
} else {
s += toklen;
}
strinend();
inpop();
DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
zcontext_restore();
return s;
}
/* Tokenize a string given in s. Parsing is done as if s were a normal *
* command-line argument but it may contain separators. This is used *
* to parse the right-hand side of ${...%...} substitutions. */
/**/
mod_export int
parse_subst_string(char *s)
{
int c, l = strlen(s), err;
char *ptr;
enum lextok ctok;
if (!*s || !strcmp(s, nulstring))
return 0;
zcontext_save();
untokenize(s);
inpush(dupstring_wlen(s, l), 0, NULL);
strinbeg(0);
lexbuf.len = 0;
lexbuf.ptr = tokstr = s;
lexbuf.siz = l + 1;
c = hgetc();
ctok = gettokstr(c, 1);
err = errflag;
strinend();
inpop();
DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
zcontext_restore();
/* Keep any interrupt error status */
errflag = err | (errflag & ERRFLAG_INT);
if (ctok == LEXERR) {
untokenize(s);
return 1;
}
#ifdef DEBUG
/*
* Historical note: we used to check here for olen (the value of lexbuf.len
* before zcontext_restore()) == l, but that's not necessarily the case if
* we stripped an RCQUOTE.
*/
if (ctok != STRING || (errflag && !noerrs)) {
fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
errflag ? "errflag" : "ctok != STRING");
fflush(stderr);
untokenize(s);
return 1;
}
#endif
/* Check for $'...' quoting. This needs special handling. */
for (ptr = s; *ptr; )
{
if (*ptr == String && ptr[1] == Snull)
{
char *t;
int len, tlen, diff;
t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
len += 2;
tlen = strlen(t);
diff = len - tlen;
/*
* Yuk.
* parse_subst_string() currently handles strings in-place.
* That's not so easy to fix without knowing whether
* additional memory should come off the heap or
* otherwise. So we cheat by copying the unquoted string
* into place, unless it's too long. That's not the
* normal case, but I'm worried there are pathological
* cases with converting metafied multibyte strings.
* If someone can prove there aren't I will be very happy.
*/
if (diff < 0) {
DPUTS(1, "$'...' subst too long: fix get_parse_string()");
return 1;
}
memcpy(ptr, t, tlen);
ptr += tlen;
if (diff > 0) {
char *dptr = ptr;
char *sptr = ptr + diff;
while ((*dptr++ = *sptr++))
;
}
} else
ptr++;
}
return 0;
}
/* Called below to report word positions. */
/**/
static void
gotword(void)
{
int nwe = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
if (zlemetacs <= nwe) {
int nwb = zlemetall - wordbeg + addedx;
if (zlemetacs >= nwb) {
wb = nwb;
we = nwe;
} else {
wb = zlemetacs + addedx;
if (we < wb)
we = wb;
}
lexflags = 0;
}
}
/* Check if current lex text matches an alias: 1 if so, else 0 */
static int
checkalias(void)
{
Alias an;
if (!zshlextext)
return 0;
if (!noaliases && isset(ALIASESOPT) &&
(!isset(POSIXALIASES) ||
(tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) {
char *suf;
an = (Alias) aliastab->getnode(aliastab, zshlextext);
if (an && !an->inuse &&
((an->node.flags & ALIAS_GLOBAL) ||
(incmdpos && tok == STRING) || inalmore)) {
if (!lexstop) {
/*
* Tokens that don't require a space after, get one,
* because they are treated as if preceded by one.
*/
int c = hgetc();
hungetc(c);
if (!iblank(c))
inpush(" ", INP_ALIAS, 0);
}
inpush(an->text, INP_ALIAS, an);
if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
aliasspaceflag = 1;
lexstop = 0;
return 1;
}
if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
suf > zshlextext && suf[-1] != Meta &&
(an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
!an->inuse && incmdpos) {
inpush(dupstring(zshlextext), INP_ALIAS, an);
inpush(" ", INP_ALIAS, NULL);
inpush(an->text, INP_ALIAS, NULL);
lexstop = 0;
return 1;
}
}
return 0;
}
/* expand aliases and reserved words */
/**/
int
exalias(void)
{
Reswd rw;
hwend();
if (interact && isset(SHINSTDIN) && !strin && incasepat <= 0 &&
tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
!hist_is_in_word() &&
(isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
spckword(&tokstr, 1, incmdpos, 1);
if (!tokstr) {
zshlextext = tokstrings[tok];
if (tok == NEWLIN)
return 0;
return checkalias();
} else {
VARARR(char, copy, (strlen(tokstr) + 1));
if (has_token(tokstr)) {
char *p, *t;
zshlextext = p = copy;
for (t = tokstr;
(*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
} else
zshlextext = tokstr;
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
int zp = lexflags;
gotword();
if ((zp & LEXFLAGS_ZLE) && !lexflags) {
if (zshlextext == copy)
zshlextext = tokstr;
return 0;
}
}
if (tok == STRING) {
/* Check for an alias */
if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) {
if (zshlextext == copy)
zshlextext = tokstr;
return 1;
}
/* Then check for a reserved word */
if ((incmdpos ||
(unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
zshlextext[0] == '}' && !zshlextext[1])) &&
(rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
tok = rw->token;
inrepeat_ = (tok == REPEAT);
if (tok == DINBRACK)
incond = 1;
} else if (incond && !strcmp(zshlextext, "]]")) {
tok = DOUTBRACK;
incond = 0;
} else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
tok = BANG;
}
inalmore = 0;
if (zshlextext == copy)
zshlextext = tokstr;
}
return 0;
}
/**/
void
zshlex_raw_add(int c)
{
if (!lex_add_raw)
return;
*lexbuf_raw.ptr++ = c;
if (lexbuf_raw.siz == ++lexbuf_raw.len) {
int newbsiz = lexbuf_raw.siz * 2;
tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
lexbuf_raw.siz = newbsiz;
}
}
/**/
void
zshlex_raw_back(void)
{
if (!lex_add_raw)
return;
lexbuf_raw.ptr--;
lexbuf_raw.len--;
}
/**/
int
zshlex_raw_mark(int offset)
{
if (!lex_add_raw)
return 0;
return lexbuf_raw.len + offset;
}
/**/
void
zshlex_raw_back_to_mark(int mark)
{
if (!lex_add_raw)
return;
lexbuf_raw.ptr = tokstr_raw + mark;
lexbuf_raw.len = mark;
}
/*
* Skip (...) for command-style substitutions: $(...), <(...), >(...)
*
* In order to ensure we don't stop at closing parentheses with
* some other syntactic significance, we'll parse the input until
* we find an unmatched closing parenthesis. However, we'll throw
* away the result of the parsing and just keep the string we've built
* up on the way.
*/
/**/
static int
skipcomm(void)
{
#ifdef ZSH_OLD_SKIPCOMM
int pct = 1, c, start = 1;
cmdpush(CS_CMDSUBST);
SETPARBEGIN
c = Inpar;
do {
int iswhite;
add(c);
c = hgetc();
if (itok(c) || lexstop)
break;
iswhite = inblank(c);
switch (c) {
case '(':
pct++;
break;
case ')':
pct--;
break;
case '\\':
add(c);
c = hgetc();
break;
case '\'': {
int strquote = lexbuf.ptr[-1] == '$';
add(c);
STOPHIST
while ((c = hgetc()) != '\'' && !lexstop) {
if (c == '\\' && strquote) {
add(c);
c = hgetc();
}
add(c);
}
ALLOWHIST
break;
}
case '\"':
add(c);
while ((c = hgetc()) != '\"' && !lexstop)
if (c == '\\') {
add(c);
add(hgetc());
} else
add(c);
break;
case '`':
add(c);
while ((c = hgetc()) != '`' && !lexstop)
if (c == '\\')
add(c), add(hgetc());
else
add(c);
break;
case '#':
if (start) {
add(c);
while ((c = hgetc()) != '\n' && !lexstop)
add(c);
iswhite = 1;
}
break;
}
start = iswhite;
}
while (pct);
if (!lexstop)
SETPAREND
cmdpop();
return lexstop;
#else
char *new_tokstr;
int new_lexstop, new_lex_add_raw;
int save_infor = infor;
struct lexbufstate new_lexbuf;
infor = 0;
cmdpush(CS_CMDSUBST);
SETPARBEGIN
add(Inpar);
new_lex_add_raw = lex_add_raw + 1;
if (!lex_add_raw) {
/*
* We'll combine the string so far with the input
* read in for the command substitution. To do this
* we'll just propagate the current tokstr etc. as the
* variables used for adding raw input, and
* ensure we swap those for the real tokstr etc. at the end.
*
* However, we need to save and restore the rest of the
* lexical and parse state as we're effectively parsing
* an internal string. Because we're still parsing it from
* the original input source (we have to --- we don't know
* when to stop inputting it otherwise and can't rely on
* the input being recoverable until we've read it) we need
* to keep the same history context.
*/
new_tokstr = tokstr;
new_lexbuf = lexbuf;
/*
* If we're expanding an alias at this point, we need the whole
* remaining text as part of the string for the command in
* parentheses, so don't backtrack. This is different from the
* usual case where the alias is fully within the command, where
* we want the unexpanded text so that it will be expanded
* again when the command in the parentheses is executed.
*
* I never wanted to be a software engineer, you know.
*/
if (inbufflags & INP_ALIAS)
inbufflags |= INP_RAW_KEEP;
zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
hist_in_word(1);
} else {
/*
* Set up for nested command substitution, however
* we don't actually need the string until we get
* back to the top level and recover the lot.
* The $() body just appears empty.
*
* We do need to propagate the raw variables which would
* otherwise by cleared, though.
*/
new_tokstr = tokstr_raw;
new_lexbuf = lexbuf_raw;
zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
}
tokstr_raw = new_tokstr;
lexbuf_raw = new_lexbuf;
lex_add_raw = new_lex_add_raw;
/*
* Don't do any ZLE specials down here: they're only needed
* when we return the string from the recursive parse.
* (TBD: this probably means we should be initialising lexflags
* more consistently.)
*
* Note that in that case we're still using the ZLE line reading
* function at the history layer --- this is consistent with the
* intention of maintaining the history and input layers across
* the recursive parsing.
*
* Also turn off LEXFLAGS_NEWLINE because this is already skipping
* across the entire construct, and parse_event() needs embedded
* newlines to be "real" when looking for the OUTPAR token.
*/
lexflags &= ~(LEXFLAGS_ZLE|LEXFLAGS_NEWLINE);
dbparens = 0; /* restored by zcontext_restore_partial() */
if (!parse_event(OUTPAR) || tok != OUTPAR) {
if (strin) {
/*
* Get the rest of the string raw since we don't
* know where this token ends.
*/
while (!lexstop)
(void)ingetc();
} else
lexstop = 1;
}
/* Outpar lexical token gets added in caller if present */
/*
* We're going to keep the full raw input string
* as the current token string after popping the stack.
*/
new_tokstr = tokstr_raw;
new_lexbuf = lexbuf_raw;
/*
* We're also going to propagate the lexical state:
* if we couldn't parse the command substitution we
* can't continue.
*/
new_lexstop = lexstop;
zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
if (lex_add_raw) {
/*
* Keep going, so retain the raw variables.
*/
tokstr_raw = new_tokstr;
lexbuf_raw = new_lexbuf;
} else {
if (!new_lexstop) {
/* Ignore the ')' added on input */
new_lexbuf.len--;
*--new_lexbuf.ptr = '\0';
}
/*
* Convince the rest of lex.c we were examining a string
* all along.
*/
tokstr = new_tokstr;
lexbuf = new_lexbuf;
lexstop = new_lexstop;
hist_in_word(0);
}
if (!lexstop)
SETPAREND
cmdpop();
infor = save_infor;
return lexstop;
#endif
}