1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-09-07 11:41:16 +02:00

Fix command substitutions to parse contents as they are read in.

Do this by refactoring misnamed lexsave()/lexrestore() to allow
continuity of history and input.

Add test.
This commit is contained in:
Peter Stephenson 2015-01-06 17:05:17 +00:00
parent 93846edb0d
commit c0d01a6fe0
7 changed files with 410 additions and 194 deletions

View file

@ -1,3 +1,11 @@
2015-01-08 Peter Stephenson <p.stephenson@samsung.com>
* Src/init.c, Src/input.c, Src/lex.c, Src/parse.c, Src/zsh.h,
Test/D08cmdsubst.ztst: fix the problem that command and similar
substitutions weren't properly parsed so could end prematurely.
Use improved resolution in context save and restore to allow
parsing the substitution while tracking the string.
2015-01-07 Barton E. Schaefer <schaefer@zsh.org> 2015-01-07 Barton E. Schaefer <schaefer@zsh.org>
* 34154 (tweaked per 34155): Src/builtin.c: reorder bin_print() to * 34154 (tweaked per 34155): Src/builtin.c: reorder bin_print() to

View file

@ -142,7 +142,8 @@ loop(int toplevel, int justonce)
use_exit_printed = 0; use_exit_printed = 0;
intr(); /* interrupts on */ intr(); /* interrupts on */
lexinit(); /* initialize lexical state */ lexinit(); /* initialize lexical state */
if (!(prog = parse_event())) { /* if we couldn't parse a list */ if (!(prog = parse_event(ENDINPUT))) {
/* if we couldn't parse a list */
hend(NULL); hend(NULL);
if ((tok == ENDINPUT && !errflag) || if ((tok == ENDINPUT && !errflag) ||
(tok == LEXERR && (!isset(SHINSTDIN) || !toplevel)) || (tok == LEXERR && (!isset(SHINSTDIN) || !toplevel)) ||

View file

@ -179,12 +179,12 @@ shingetline(void)
/* Get the next character from the input. /* Get the next character from the input.
* Will call inputline() to get a new line where necessary. * Will call inputline() to get a new line where necessary.
*/ */
/**/ /**/
int int
ingetc(void) ingetc(void)
{ {
int lastc; int lastc = ' ';
if (lexstop) if (lexstop)
return ' '; return ' ';
@ -196,7 +196,7 @@ ingetc(void)
continue; continue;
if (((inbufflags & INP_LINENO) || !strin) && lastc == '\n') if (((inbufflags & INP_LINENO) || !strin) && lastc == '\n')
lineno++; lineno++;
return lastc; break;
} }
/* /*
@ -208,7 +208,7 @@ ingetc(void)
*/ */
if (!inbufct && (strin || errflag)) { if (!inbufct && (strin || errflag)) {
lexstop = 1; lexstop = 1;
return ' '; break;
} }
/* If the next element down the input stack is a continuation of /* If the next element down the input stack is a continuation of
* this, use it. * this, use it.
@ -219,8 +219,10 @@ ingetc(void)
} }
/* As a last resort, get some more input */ /* As a last resort, get some more input */
if (inputline()) if (inputline())
return ' '; break;
} }
zshlex_raw_add(lastc);
return lastc;
} }
/* Read a line from the current command stream and store it as input */ /* Read a line from the current command stream and store it as input */
@ -426,6 +428,7 @@ inungetc(int c)
inbufleft = 0; inbufleft = 0;
inbuf = inbufptr = ""; inbuf = inbufptr = "";
} }
zshlex_raw_back();
} }
} }

500
Src/lex.c
View file

@ -148,6 +148,16 @@ mod_export int parend;
/**/ /**/
mod_export int nocomments; mod_export int nocomments;
/* add raw input characters while parsing command substitution */
/**/
static int lex_add_raw;
/* variables associated with the above */
static char *tokstr_raw, *bptr_raw;
static int len_raw, bsiz_raw;
/* text of punctuation tokens */ /* text of punctuation tokens */
/**/ /**/
@ -216,6 +226,11 @@ struct lexstack {
char *bptr; char *bptr;
int bsiz; int bsiz;
int len; int len;
int lex_add_raw;
char *tokstr_raw;
char *bptr_raw;
int bsiz_raw;
int len_raw;
short *chwords; short *chwords;
int chwordlen; int chwordlen;
int chwordpos; int chwordpos;
@ -241,89 +256,121 @@ struct lexstack {
static struct lexstack *lstack = NULL; static struct lexstack *lstack = NULL;
/* save the lexical state */ /* save the context or parts thereof */
/* is this a hack or what? */ /* is this a hack or what? */
/**/ /**/
mod_export void mod_export void
lexsave(void) lexsave_partial(int parts)
{ {
struct lexstack *ls; struct lexstack *ls;
ls = (struct lexstack *)malloc(sizeof(struct lexstack)); ls = (struct lexstack *)malloc(sizeof(struct lexstack));
ls->incmdpos = incmdpos; if (parts & ZCONTEXT_LEX) {
ls->incond = incond; ls->incmdpos = incmdpos;
ls->incasepat = incasepat; ls->incond = incond;
ls->dbparens = dbparens; ls->incasepat = incasepat;
ls->isfirstln = isfirstln; ls->dbparens = dbparens;
ls->isfirstch = isfirstch; ls->isfirstln = isfirstln;
ls->histactive = histactive; ls->isfirstch = isfirstch;
ls->histdone = histdone; ls->lexflags = lexflags;
ls->lexflags = lexflags;
ls->stophist = stophist; ls->tok = tok;
stophist = 0; ls->isnewlin = isnewlin;
if (!lstack) { ls->tokstr = tokstr;
/* top level, make this version visible to ZLE */ ls->zshlextext = zshlextext;
zle_chline = chline; ls->bptr = bptr;
/* ensure line stored is NULL-terminated */ ls->bsiz = bsiz;
if (hptr) ls->len = len;
*hptr = '\0'; ls->lex_add_raw = lex_add_raw;
ls->tokstr_raw = tokstr_raw;
ls->bptr_raw = bptr_raw;
ls->bsiz_raw = bsiz_raw;
ls->len_raw = len_raw;
ls->lexstop = lexstop;
ls->toklineno = toklineno;
tokstr = zshlextext = bptr = NULL;
bsiz = 256;
tokstr_raw = bptr_raw = NULL;
bsiz_raw = len_raw = lex_add_raw = 0;
inredir = 0;
}
if (parts & ZCONTEXT_HIST) {
if (!lstack) {
/* top level, make this version visible to ZLE */
zle_chline = chline;
/* ensure line stored is NULL-terminated */
if (hptr)
*hptr = '\0';
}
ls->histactive = histactive;
ls->histdone = histdone;
ls->stophist = stophist;
ls->hline = chline;
ls->hptr = hptr;
ls->chwords = chwords;
ls->chwordlen = chwordlen;
ls->chwordpos = chwordpos;
ls->hwgetword = hwgetword;
ls->hgetc = hgetc;
ls->hungetc = hungetc;
ls->hwaddc = hwaddc;
ls->hwbegin = hwbegin;
ls->hwend = hwend;
ls->addtoline = addtoline;
ls->hlinesz = hlinesz;
/*
* We save and restore the command stack with history
* as it's visible to the user interactively, so if
* we're preserving history state we'll continue to
* show the current set of commands from input.
*/
ls->cstack = cmdstack;
ls->csp = cmdsp;
stophist = 0;
chline = NULL;
hptr = NULL;
histactive = 0;
cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
cmdsp = 0;
}
if (parts & ZCONTEXT_PARSE) {
ls->hdocs = hdocs;
ls->eclen = eclen;
ls->ecused = ecused;
ls->ecnpats = ecnpats;
ls->ecbuf = ecbuf;
ls->ecstrs = ecstrs;
ls->ecsoffs = ecsoffs;
ls->ecssub = ecssub;
ls->ecnfunc = ecnfunc;
ecbuf = NULL;
hdocs = NULL;
} }
ls->hline = chline;
chline = NULL;
ls->hptr = hptr;
hptr = NULL;
ls->hlinesz = hlinesz;
ls->cstack = cmdstack;
ls->csp = cmdsp;
cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
ls->tok = tok;
ls->isnewlin = isnewlin;
ls->tokstr = tokstr;
ls->zshlextext = zshlextext;
ls->bptr = bptr;
tokstr = zshlextext = bptr = NULL;
ls->bsiz = bsiz;
bsiz = 256;
ls->len = len;
ls->chwords = chwords;
ls->chwordlen = chwordlen;
ls->chwordpos = chwordpos;
ls->hwgetword = hwgetword;
ls->lexstop = lexstop;
ls->hdocs = hdocs;
ls->hgetc = hgetc;
ls->hungetc = hungetc;
ls->hwaddc = hwaddc;
ls->hwbegin = hwbegin;
ls->hwend = hwend;
ls->addtoline = addtoline;
ls->eclen = eclen;
ls->ecused = ecused;
ls->ecnpats = ecnpats;
ls->ecbuf = ecbuf;
ls->ecstrs = ecstrs;
ls->ecsoffs = ecsoffs;
ls->ecssub = ecssub;
ls->ecnfunc = ecnfunc;
ls->toklineno = toklineno;
cmdsp = 0;
inredir = 0;
hdocs = NULL;
histactive = 0;
ecbuf = NULL;
ls->next = lstack; ls->next = lstack;
lstack = ls; lstack = ls;
} }
/* restore lexical state */ /* save context in full */
/**/ /**/
mod_export void mod_export void
lexrestore(void) lexsave(void)
{
lexsave_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
}
/* restore context or part therefore */
/**/
mod_export void
lexrestore_partial(int parts)
{ {
struct lexstack *ln = lstack; struct lexstack *ln = lstack;
@ -332,65 +379,89 @@ lexrestore(void)
queue_signals(); queue_signals();
lstack = lstack->next; lstack = lstack->next;
if (!lstack) { if (parts & ZCONTEXT_LEX) {
/* Back to top level: don't need special ZLE value */ incmdpos = ln->incmdpos;
DPUTS(ln->hline != zle_chline, "BUG: Ouch, wrong chline for ZLE"); incond = ln->incond;
zle_chline = NULL; incasepat = ln->incasepat;
dbparens = ln->dbparens;
isfirstln = ln->isfirstln;
isfirstch = ln->isfirstch;
lexflags = ln->lexflags;
tok = ln->tok;
isnewlin = ln->isnewlin;
tokstr = ln->tokstr;
zshlextext = ln->zshlextext;
bptr = ln->bptr;
bsiz = ln->bsiz;
len = ln->len;
lex_add_raw = ln->lex_add_raw;
tokstr_raw = ln->tokstr_raw;
bptr_raw = ln->bptr_raw;
bsiz_raw = ln->bsiz_raw;
len_raw = ln->len_raw;
lexstop = ln->lexstop;
toklineno = ln->toklineno;
}
if (parts & ZCONTEXT_HIST) {
if (!lstack) {
/* Back to top level: don't need special ZLE value */
DPUTS(ln->hline != zle_chline, "BUG: Ouch, wrong chline for ZLE");
zle_chline = NULL;
}
histactive = ln->histactive;
histdone = ln->histdone;
stophist = ln->stophist;
chline = ln->hline;
hptr = ln->hptr;
chwords = ln->chwords;
chwordlen = ln->chwordlen;
chwordpos = ln->chwordpos;
hwgetword = ln->hwgetword;
hgetc = ln->hgetc;
hungetc = ln->hungetc;
hwaddc = ln->hwaddc;
hwbegin = ln->hwbegin;
hwend = ln->hwend;
addtoline = ln->addtoline;
hlinesz = ln->hlinesz;
if (cmdstack)
zfree(cmdstack, CMDSTACKSZ);
cmdstack = ln->cstack;
cmdsp = ln->csp;
}
if (parts & ZCONTEXT_PARSE) {
if (ecbuf)
zfree(ecbuf, eclen);
hdocs = ln->hdocs;
eclen = ln->eclen;
ecused = ln->ecused;
ecnpats = ln->ecnpats;
ecbuf = ln->ecbuf;
ecstrs = ln->ecstrs;
ecsoffs = ln->ecsoffs;
ecssub = ln->ecssub;
ecnfunc = ln->ecnfunc;
errflag &= ~ERRFLAG_ERROR;
} }
incmdpos = ln->incmdpos;
incond = ln->incond;
incasepat = ln->incasepat;
dbparens = ln->dbparens;
isfirstln = ln->isfirstln;
isfirstch = ln->isfirstch;
histactive = ln->histactive;
histdone = ln->histdone;
lexflags = ln->lexflags;
stophist = ln->stophist;
chline = ln->hline;
hptr = ln->hptr;
if (cmdstack)
zfree(cmdstack, CMDSTACKSZ);
cmdstack = ln->cstack;
cmdsp = ln->csp;
tok = ln->tok;
isnewlin = ln->isnewlin;
tokstr = ln->tokstr;
zshlextext = ln->zshlextext;
bptr = ln->bptr;
bsiz = ln->bsiz;
len = ln->len;
chwords = ln->chwords;
chwordlen = ln->chwordlen;
chwordpos = ln->chwordpos;
hwgetword = ln->hwgetword;
lexstop = ln->lexstop;
hdocs = ln->hdocs;
hgetc = ln->hgetc;
hungetc = ln->hungetc;
hwaddc = ln->hwaddc;
hwbegin = ln->hwbegin;
hwend = ln->hwend;
addtoline = ln->addtoline;
if (ecbuf)
zfree(ecbuf, eclen);
eclen = ln->eclen;
ecused = ln->ecused;
ecnpats = ln->ecnpats;
ecbuf = ln->ecbuf;
ecstrs = ln->ecstrs;
ecsoffs = ln->ecsoffs;
ecssub = ln->ecssub;
ecnfunc = ln->ecnfunc;
hlinesz = ln->hlinesz;
toklineno = ln->toklineno;
errflag &= ~ERRFLAG_ERROR;
free(ln); free(ln);
unqueue_signals(); unqueue_signals();
} }
/* complete restore context */
/**/
mod_export void
lexrestore(void)
{
lexrestore_partial(ZCONTEXT_HIST|ZCONTEXT_LEX|ZCONTEXT_PARSE);
}
/**/ /**/
void void
zshlex(void) zshlex(void)
@ -1905,80 +1976,151 @@ exalias(void)
return 0; return 0;
} }
/* skip (...) */ /**/
void
zshlex_raw_add(int c)
{
if (!lex_add_raw)
return;
*bptr_raw++ = c;
if (bsiz_raw == ++len_raw) {
int newbsiz = bsiz_raw * 2;
tokstr_raw = (char *)hrealloc(tokstr_raw, bsiz_raw, newbsiz);
bptr_raw = tokstr_raw + len_raw;
memset(bptr_raw, 0, newbsiz - bsiz_raw);
bsiz_raw = newbsiz;
}
}
/**/
void
zshlex_raw_back(void)
{
if (!lex_add_raw)
return;
bptr_raw--;
len_raw--;
}
/*
* Skip (...) for command-style substitutions: $(...), <(...), >(...)
*
* In order to ensure we don't stop at closing parentheses with
* some other syntactic significance, we'll parse the input until
* we find an unmatched closing parenthesis. However, we'll throw
* away the result of the parsing and just keep the string we've built
* up on the way.
*/
/**/ /**/
static int static int
skipcomm(void) skipcomm(void)
{ {
int pct = 1, c, start = 1; char *new_tokstr, *new_bptr = bptr_raw;
int new_len, new_bsiz, new_lexstop, new_lex_add_raw;
cmdpush(CS_CMDSUBST); cmdpush(CS_CMDSUBST);
SETPARBEGIN SETPARBEGIN
c = Inpar; add(Inpar);
do {
int iswhite; new_lex_add_raw = lex_add_raw + 1;
add(c); if (!lex_add_raw) {
c = hgetc(); /*
if (itok(c) || lexstop) * We'll combine the string so far with the input
break; * read in for the command substitution. To do this
iswhite = inblank(c); * we'll just propagate the current tokstr etc. as the
switch (c) { * variables used for adding raw input, and
case '(': * ensure we swap those for the real tokstr etc. at the end.
pct++; *
break; * However, we need to save and restore the rest of the
case ')': * lexical and parse state as we're effectively parsing
pct--; * an internal string. Because we're still parsing it from
break; * the original input source (we have to --- we don't know
case '\\': * when to stop inputting it otherwise and can't rely on
add(c); * the input being recoverable until we've read it) we need
c = hgetc(); * to keep the same history context.
break; */
case '\'': { new_tokstr = tokstr;
int strquote = bptr[-1] == '$'; new_bptr = bptr;
add(c); new_len = len;
STOPHIST new_bsiz = bsiz;
while ((c = hgetc()) != '\'' && !lexstop) {
if (c == '\\' && strquote) { lexsave_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
add(c); } else {
c = hgetc(); /*
} * Set up for nested command subsitution, however
add(c); * we don't actually need the string until we get
} * back to the top level and recover the lot.
ALLOWHIST * The $() body just appears empty.
break; *
} * We do need to propagate the raw variables which would
case '\"': * otherwise by cleared, though.
add(c); */
while ((c = hgetc()) != '\"' && !lexstop) new_tokstr = tokstr_raw;
if (c == '\\') { new_bptr = bptr_raw;
add(c); new_len = len_raw;
add(hgetc()); new_bsiz = bsiz_raw;
} else
add(c); lexsave_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
break;
case '`':
add(c);
while ((c = hgetc()) != '`' && !lexstop)
if (c == '\\')
add(c), add(hgetc());
else
add(c);
break;
case '#':
if (start) {
add(c);
while ((c = hgetc()) != '\n' && !lexstop)
add(c);
iswhite = 1;
}
break;
}
start = iswhite;
} }
while (pct); tokstr_raw = new_tokstr;
bsiz_raw = new_bsiz;
len_raw = new_len;
bptr_raw = new_bptr;
lex_add_raw = new_lex_add_raw;
if (!parse_event(OUTPAR) || tok != OUTPAR)
lexstop = 1;
/* Outpar lexical token gets added in caller if present */
/*
* We're going to keep the full raw input string
* as the current token string after popping the stack.
*/
new_tokstr = tokstr_raw;
new_bptr = bptr_raw;
new_len = len_raw;
new_bsiz = bsiz_raw;
/*
* We're also going to propagate the lexical state:
* if we couldn't parse the command substitution we
* can't continue.
*/
new_lexstop = lexstop;
lexrestore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
if (lex_add_raw) {
/*
* Keep going, so retain the raw variables.
*/
tokstr_raw = new_tokstr;
bptr_raw = new_bptr;
len_raw = new_len;
bsiz_raw = new_bsiz;
} else {
if (!new_lexstop) {
/* Ignore the ')' added on input */
new_len--;
*--new_bptr = '\0';
}
/*
* Convince the rest of lex.c we were examining a string
* all along.
*/
tokstr = new_tokstr;
bptr = new_bptr;
len = new_len;
bsiz = new_bsiz;
lexstop = new_lexstop;
}
if (!lexstop) if (!lexstop)
SETPAREND SETPAREND
cmdpop(); cmdpop();
return lexstop; return lexstop;
} }

View file

@ -361,7 +361,8 @@ ecstrcode(char *s)
/* Initialise wordcode buffer. */ /* Initialise wordcode buffer. */
static void /**/
void
init_parse(void) init_parse(void)
{ {
if (ecbuf) zfree(ecbuf, eclen); if (ecbuf) zfree(ecbuf, eclen);
@ -443,11 +444,15 @@ clear_hdocs()
* event : ENDINPUT * event : ENDINPUT
* | SEPER * | SEPER
* | sublist [ SEPER | AMPER | AMPERBANG ] * | sublist [ SEPER | AMPER | AMPERBANG ]
*
* cmdsubst indicates our event is part of a command-style
* substitution terminated by the token indicationg, usual closing
* parenthesis. In other cases endtok is ENDINPUT.
*/ */
/**/ /**/
Eprog Eprog
parse_event(void) parse_event(int endtok)
{ {
tok = ENDINPUT; tok = ENDINPUT;
incmdpos = 1; incmdpos = 1;
@ -455,36 +460,42 @@ parse_event(void)
zshlex(); zshlex();
init_parse(); init_parse();
if (!par_event()) { if (!par_event(endtok)) {
clear_hdocs(); clear_hdocs();
return NULL; return NULL;
} }
if (endtok != ENDINPUT) {
/* don't need to build an eprog for this */
return &dummy_eprog;
}
return bld_eprog(1); return bld_eprog(1);
} }
/**/ /**/
static int int
par_event(void) par_event(int endtok)
{ {
int r = 0, p, c = 0; int r = 0, p, c = 0;
while (tok == SEPER) { while (tok == SEPER) {
if (isnewlin > 0) if (isnewlin > 0 && endtok == ENDINPUT)
return 0; return 0;
zshlex(); zshlex();
} }
if (tok == ENDINPUT) if (tok == ENDINPUT)
return 0; return 0;
if (tok == endtok)
return 0;
p = ecadd(0); p = ecadd(0);
if (par_sublist(&c)) { if (par_sublist(&c)) {
if (tok == ENDINPUT) { if (tok == ENDINPUT || tok == endtok) {
set_list_code(p, Z_SYNC, c); set_list_code(p, Z_SYNC, c);
r = 1; r = 1;
} else if (tok == SEPER) { } else if (tok == SEPER) {
set_list_code(p, Z_SYNC, c); set_list_code(p, Z_SYNC, c);
if (isnewlin <= 0) if (isnewlin <= 0 || endtok != ENDINPUT)
zshlex(); zshlex();
r = 1; r = 1;
} else if (tok == AMPER) { } else if (tok == AMPER) {
@ -513,7 +524,7 @@ par_event(void)
} else { } else {
int oec = ecused; int oec = ecused;
if (!par_event()) { if (!par_event(endtok)) {
ecused = oec; ecused = oec;
ecbuf[p] |= wc_bdata(Z_END); ecbuf[p] |= wc_bdata(Z_END);
} }

View file

@ -421,6 +421,15 @@ enum {
#define META_HEAPDUP 6 #define META_HEAPDUP 6
#define META_HREALLOC 7 #define META_HREALLOC 7
/* Context to save and restore (bit fields) */
enum {
/* History mechanism */
ZCONTEXT_HIST = (1<<0),
/* Lexical analyser */
ZCONTEXT_LEX = (1<<1),
/* Parser */
ZCONTEXT_PARSE = (1<<2)
};
/**************************/ /**************************/
/* Abstract types for zsh */ /* Abstract types for zsh */

View file

@ -106,3 +106,45 @@
>34 >34
>" >"
>" OK >" OK
echo $(case foo in
foo)
echo This test worked.
;;
bar)
echo This test failed in a rather bizarre way.
;;
*)
echo This test failed.
;;
esac)
0:Parsing of command substitution with unmatched parentheses: case, basic
>This test worked.
echo "$(case bar in
foo)
echo This test spoobed.
;;
bar)
echo This test plurbled.
;;
*)
echo This test bzonked.
;;
esac)"
0:Parsing of command substitution with unmatched parentheses: case with quotes
>This test plurbled.
echo before $(
echo start; echo unpretentious |
while read line; do
case $line in
u*)
print Word began with u
print and ended with a crunch
;;
esac
done | sed -e 's/Word/Universe/'; echo end
) after
0:Parsing of command substitution with ummatched parentheses: with frills
>before start Universe began with u and ended with a crunch end after