52154, 52155: Implement, document, and test non-forking command substitution.

Comprises workers/51957, 51985, 51987, 51988, 51993, 52131, 52139, plus
fixes for return values, parse errors, and trailing newlines (which were
incorrectly removed) in ${ ... }
master
Bart Schaefer 8 months ago
parent 355cfc1b95
commit 3aaef16569

@ -1,5 +1,11 @@
2023-09-16 Bart Schaefer <schaefer@zsh.org>
* 52155: Test/D10nofork.ztst: Tests for non-forking substitution.
* 52154: Doc/Zsh/expn.yo, Src/lex.c, Src/subst.c: implement
and document non-forking command substitutions ${|...} and
${ ... }. Based on Sebastian Gniazdowski: 51702.
* 52153: Src/input.c, Src/Modules/mapfile.c: $mapfile[fname]
should not trim newlines (only applies when not HAVE_MMAP)

@ -1881,23 +1881,55 @@ sect(Command Substitution)
cindex(command substitution)
cindex(substitution, command)
A command enclosed in parentheses preceded by a dollar sign, like
`tt($LPAR())...tt(RPAR())', or quoted with grave
accents, like `tt(`)...tt(`)', is replaced with its standard output, with
any trailing newlines deleted.
If the substitution is not enclosed in double quotes, the
output is broken into words using the tt(IFS) parameter.
`tt($LPAR())...tt(RPAR())', or quoted with grave accents, like
`tt(`)...tt(`)', is executed in a subshell and replaced by its
standard output, with any trailing newlines deleted. If the
substitution is not enclosed in double quotes, the output is broken
into words using the tt(IFS) parameter.
vindex(IFS, use of)
The substitution `tt($LPAR()cat) var(foo)tt(RPAR())' may be replaced
by the faster `tt($LPAR()<)var(foo)tt(RPAR())'. In this case var(foo)
undergoes single word shell expansions (em(parameter expansion),
em(command substitution) and em(arithmetic expansion)), but not
filename generation.
filename generation. No subshell is created.
If the option tt(GLOB_SUBST) is set, the result of any unquoted command
substitution, including the special form just mentioned, is eligible for
filename generation.
A command with a leading pipe character, enclosed in braces prefixed by
a dollar sign, as in `tt(${|)...tt(})', is executed in the current shell
context, rather than in a subshell, and is replaced by the value of the
parameter tt(REPLY) at the end of the command. There em(must not) be
any whitespace between the opening brace and the pipe character. Any
prior value of tt($REPLY) is saved and restored around this substitution,
in the manner of a function local parameter. Other parameters declared
within the substitution also behave as locals, as if in a function,
unless `tt(typeset -g)' is used. Trailing newlines are em(not) deleted
from the final replacement in this case, and it is subject to filename
generation in the same way as `tt($LPAR())...tt(RPAR())' but is em(not)
split on tt(IFS) unless the tt(SH_WORD_SPLIT) option is set.
Substitutions of the form `tt(${|)var(param)tt(|)...tt(})' are similar,
except that the substitution is replaced by the value of the parameter
named by var(param). No implicit save or restore applies to var(param)
except as noted for tt(REPLY), and var(param) should em(not) be declared
within the command. If var(param) names an array, array expansion rules
apply.
A command enclosed in braces preceded by a dollar sign, and set off from
the braces by whitespace, like `tt(${ )...tt( })', is replaced by its
standard output. Like `tt(${|)...tt(})' and unlike
`tt($LPAR())...tt(RPAR())', the command executes in the current shell
context with function local behaviors and does not create a subshell.
Note that because the `tt(${|)...tt(})' and `tt(${ )...tt( })' forms
must be parsed at once as both string tokens and commands, all other
braces (`tt({)' or `tt(})') within the command either must be quoted,
or must appear in syntactically valid pairs, such as around complex
commands, function bodies, or parameter references.
texinode(Arithmetic Expansion)(Brace Expansion)(Command Substitution)(Expansion)
sect(Arithmetic Expansion)
cindex(arithmetic expansion)

@ -937,7 +937,7 @@ static enum lextok
gettokstr(int c, int sub)
{
int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
int intpos = 1, in_brace_param = 0;
int intpos = 1, in_brace_param = 0, cmdsubst = 0;
int inquote, unmatched = 0;
enum lextok peek;
#ifdef DEBUG
@ -1135,7 +1135,7 @@ gettokstr(int c, int sub)
c = Inpar;
break;
case LX2_INBRACE:
if (isset(IGNOREBRACES) || sub)
if ((isset(IGNOREBRACES) && !cmdsubst) || sub)
c = '{';
else {
if (!lexbuf.len && incmdpos) {
@ -1157,8 +1157,11 @@ gettokstr(int c, int sub)
if (in_brace_param) {
cmdpop();
}
if (bct-- == in_brace_param)
in_brace_param = 0;
if (bct-- == in_brace_param) {
if (cmdsubst)
cmdpop();
in_brace_param = cmdsubst = 0;
}
c = Outbrace;
break;
case LX2_COMMA:
@ -1405,16 +1408,24 @@ gettokstr(int c, int sub)
}
add(c);
c = hgetc();
if (intpos)
if (intpos)
intpos--;
if (lexstop)
if (lexstop)
break;
if (!cmdsubst && in_brace_param && act == LX2_STRING &&
(c == '|' || c == Bar || inblank(c))) {
cmdsubst = in_brace_param;
cmdpush(CS_CURSH);
}
}
brk:
if (errflag) {
if (in_brace_param) {
while(bct-- >= in_brace_param)
while(bct >= in_brace_param) {
if (bct-- == cmdsubst)
cmdpop();
cmdpop();
}
}
return LEXERR;
}
@ -1422,8 +1433,11 @@ gettokstr(int c, int sub)
if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
zerr("unmatched %c", unmatched);
if (in_brace_param) {
while(bct-- >= in_brace_param)
while(bct >= in_brace_param) {
if (bct-- == cmdsubst)
cmdpop();
cmdpop();
}
zerr("closing brace expected");
} else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
peek == STRING && lexbuf.ptr[-1] == '}' &&
@ -1459,8 +1473,8 @@ gettokstr(int c, int sub)
static int
dquote_parse(char endchar, int sub)
{
int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
int c;
int pct = 0, brct = 0, bct = 0, intick = 0, err = 0, cmdsubst = 0;
int c, bskip = 0;
int math = endchar == ')' || endchar == ']' || infor;
int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
@ -1529,11 +1543,25 @@ dquote_parse(char endchar, int sub)
c = Qstring;
}
break;
case '{':
if (cmdsubst && !intick) {
/* In nofork substitution, tokenize as if unquoted */
c = Inbrace;
bskip++;
}
break;
case '}':
if (intick || !bct)
break;
c = Outbrace;
bct--;
if (bskip) {
bskip--;
break;
}
if (bct-- == cmdsubst) {
cmdsubst = 0;
cmdpop();
}
cmdpop();
break;
case '`':
@ -1588,14 +1616,34 @@ dquote_parse(char endchar, int sub)
if (err || lexstop)
break;
add(c);
if (!cmdsubst && c == Inbrace) {
/* Check for ${|...} nofork command substitution */
if ((c = hgetc()) && !lexstop) {
if (c == '|' || inblank(c)) {
cmdsubst = bct;
cmdpush(CS_CURSH);
}
hungetc(c);
}
}
}
if (intick == 2)
ALLOWHIST
if (intick) {
cmdpop();
}
while (bct--)
while (bct) {
if (bct-- == cmdsubst) {
/*
* You would think this is an error, but if we call it one,
* parsestrnoerr() returns nonzero to subst_parse_str() and
* subsequently "bad substitution" is not reported
*/
/* err = 1 */
cmdpop();
}
cmdpop();
}
if (lexstop)
err = intick || endchar || err;
else if (err == 1) {

@ -1867,6 +1867,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* joining the array into a string (for compatibility with ksh/bash).
*/
int quoted_array_with_offset = 0;
/* Indicates ${|...;} */
char *rplyvar = NULL;
/* Indicates ${ ... ;} */
char *rplytmp = NULL;
*s++ = '\0';
/*
@ -1894,8 +1898,147 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* flags in parentheses, but also one ksh hack.
*/
if (c == Inbrace) {
/* The command string to be run by ${|...;} */
char *cmdarg = NULL;
size_t slen = 0;
inbrace = 1;
s++;
/* Short-path for the nofork command substitution ${|cmd;}
* See other comments about kludges for why this is here.
*
* The command string is extracted and executed, and the
* substitution assigned. There's no (...)-flags processing,
* i.e. no ${|(U)cmd;}, because it looks quite awful and
* should not be part of command substitution in any case.
* Use ${(U)${|cmd;}} as you would for ${(U)$(cmd;)}.
*/
if (*s == '|' || *s == Bar || inblank(*s)) {
char *outbracep = s;
char sav = *s;
*s = Inbrace;
if (skipparens(Inbrace, Outbrace, &outbracep) == 0) {
slen = outbracep - s - 1;
if ((*s = sav) != Bar) {
sav = *outbracep;
*outbracep = '\0';
tokenize(s);
*outbracep = sav;
}
}
}
if (slen > 1) {
char *outbracep = s + slen;
if (*outbracep == Outbrace) {
if ((rplyvar = itype_end(s+1, INAMESPC, 0))) {
if (*rplyvar == Inbrack &&
(rplyvar = parse_subscript(++rplyvar, 1, ']')))
++rplyvar;
}
if (rplyvar == s+1 && *rplyvar == Bar) {
/* Is ${||...} a subtitution error or a syntax error?
zerr("bad substitution");
return NULL;
*/
rplyvar = NULL;
}
if (rplyvar && *rplyvar == Bar) {
cmdarg = dupstrpfx(rplyvar+1, outbracep-rplyvar-1);
rplyvar = dupstrpfx(s+1,rplyvar-s-1);
} else {
cmdarg = dupstrpfx(s+1, outbracep-s-1);
rplyvar = "REPLY";
}
if (inblank(*s)) {
/*
* Admittedly a hack. Take advantage of the enforced
* locality of REPLY and the semantics of $(<file) to
* construct a command to write/read a temporary file.
* Then fall through to the regular handling of $REPLY
* to manage word splitting, expansion flags, etc.
*/
char *outfmt = ">| %s { %s ;}"; /* 13 */
if ((rplytmp = gettempname(NULL, 1))) {
/* Prevent shenanigans with $TMPPREFIX */
char *tmpfile = quotestring(rplytmp, QT_BACKSLASH);
char *dummy = zhalloc(strlen(cmdarg) +
strlen(tmpfile) +
13);
sprintf(dummy, outfmt, tmpfile, cmdarg);
cmdarg = dummy;
} else {
/* TMPPREFIX not writable? */
cmdoutval = lastval;
cmdarg = NULL;
}
}
s = outbracep;
}
}
if (rplyvar) {
Param pm;
/* char *rplyval = getsparam("REPLY"); */
startparamscope(); /* "local" behaves as if in a function */
pm = createparam("REPLY", PM_LOCAL|PM_UNSET);
if (pm) /* Shouldn't createparam() do this? */
pm->level = locallevel;
/* if (rplyval) setsparam("REPLY", ztrdup(rplyval)); */
}
if (rplyvar && cmdarg && *cmdarg) {
int obreaks = breaks;
Eprog cmdprog;
/* Execute the shell command */
untokenize(cmdarg);
cmdprog = parse_string(cmdarg, 0);
if (cmdprog) {
execode(cmdprog, 1, 0, "cmdsubst");
cmdoutval = lastval;
/* "return" behaves as if in a function */
if (retflag) {
retflag = 0;
breaks = obreaks; /* Is this ever not zero? */
}
} else /* parse error */
errflag |= ERRFLAG_ERROR;
if (rplytmp && !errflag) {
int onoerrs = noerrs;
noerrs = 2;
if ((cmdarg = ztuff(rplytmp)))
setsparam("REPLY", cmdarg);
noerrs = onoerrs;
}
}
if (rplytmp)
unlink(rplytmp);
if (rplyvar) {
if (strcmp(rplyvar, "REPLY") == 0) {
if ((val = dupstring(getsparam("REPLY"))))
vunset = 0;
else {
vunset = 1;
val = dupstring("");
}
} else {
s = dyncat(rplyvar, s);
rplyvar = NULL;
}
endparamscope();
if (exit_pending) {
if (mypid == getpid()) {
/*
* paranoia: don't check for jobs, but there
* shouldn't be any if not interactive.
*/
stopmsg = 1;
zexit(exit_val, ZEXIT_NORMAL);
} else
_exit(exit_val);
}
}
/*
* In ksh emulation a leading `!' is a special flag working
* sort of like our (k). This is true only for arrays or
@ -2590,14 +2733,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* we let fetchvalue set the main string pointer s to
* the end of the bit it's fetched.
*/
if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
(wantt ? -1 :
((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
scanflags)) ||
(v->pm && (v->pm->node.flags & PM_UNSET)) ||
(v->flags & VALFLAG_EMPTY))
if (!rplyvar &&
(!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
(wantt ? -1 :
((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
scanflags)) ||
(v->pm && (v->pm->node.flags & PM_UNSET)) ||
(v->flags & VALFLAG_EMPTY)))
vunset = 1;
if (wantt) {
/*
* Handle the (t) flag: value now becomes the type

Loading…
Cancel
Save