40760: Always tokenize unquoted - to Dash.

This fixes use of pattern match character ranges in unusual contexts.

Attempt to detect a tokenized - in cases where we don't care.
This commit is contained in:
Peter Stephenson 2017-03-07 10:43:58 +00:00
parent a8345a40b1
commit f3f8537cfa
12 changed files with 116 additions and 67 deletions

View File

@ -1,3 +1,11 @@
2017-03-07 Peter Stephenson <p.stephenson@samsung.com>
* 40760: Src/cond.c, Src/exec.c, Src/glob.c, Src/lex.c,
Src/math.c, Src/parse.c, Src/pattern.c, Src/subst.c,
Src/utils.c, Src/zsh.h, Test/D02glob.ztst: Always tokenise '-'
to Dash to eliminate niggles with range matches in complicated
contexts. Match both - or Dash in contexts that don't care.
2017-03-07 Mikael Magnusson <mikachu@gmail.com>
* 40780: Completion/Unix/Command/_mount: Don't use =~ for simple

View File

@ -138,13 +138,13 @@ evalcond(Estate state, char *fromtest)
strs = arrdup(sbuf);
l = 2;
}
if (name && name[0] == '-')
if (name && IS_DASH(name[0]))
errname = name;
else if (strs[0] && *strs[0] == '-')
else if (strs[0] && IS_DASH(*strs[0]))
errname = strs[0];
else
errname = "<null>";
if (name && name[0] == '-' &&
if (name && IS_DASH(name[0]) &&
(cd = getconddef((ctype == COND_MODI), name + 1, 1))) {
if (ctype == COND_MOD &&
(l < cd->min || (cd->max >= 0 && l > cd->max))) {
@ -171,7 +171,7 @@ evalcond(Estate state, char *fromtest)
strs[0] = dupstring(name);
name = s;
if (name && name[0] == '-' &&
if (name && IS_DASH(name[0]) &&
(cd = getconddef(0, name + 1, 1))) {
if (l < cd->min || (cd->max >= 0 && l > cd->max)) {
zwarnnam(fromtest, "unknown condition: %s",

View File

@ -2779,9 +2779,10 @@ execcmd_exec(Estate state, Execcmd_params eparams,
char *argdata = (char *) getdata(argnode);
char *cmdopt;
int has_p = 0, has_vV = 0, has_other = 0;
while (*argdata == '-') {
while (IS_DASH(*argdata)) {
/* Just to be definite, stop on single "-", too, */
if (!argdata[1] || (argdata[1] == '-' && !argdata[2]))
if (!argdata[1] ||
(IS_DASH(argdata[1]) && !argdata[2]))
break;
for (cmdopt = argdata+1; *cmdopt; cmdopt++) {
switch (*cmdopt) {
@ -2835,7 +2836,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
* as if this is command [non-option-stuff]. This
* isn't a good place for standard option handling.
*/
if (!strcmp(argdata, "--"))
if (IS_DASH(argdata[0]) && IS_DASH(argdata[1]) && !argdata[2])
uremnode(args, firstnode(args));
}
if ((cflags & BINF_EXEC) && nextnode(firstnode(args))) {
@ -2855,7 +2856,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
* people aren't likely to mix the option style
* with the zsh style.
*/
while (next && *next == '-' && strlen(next) >= 2) {
while (next && IS_DASH(*next) && strlen(next) >= 2) {
if (!firstnode(args)) {
zerr("exec requires a command to execute");
lastval = 1;
@ -2863,7 +2864,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
goto done;
}
uremnode(args, firstnode(args));
if (!strcmp(next, "--"))
if (IS_DASH(next[0]) && IS_DASH(next[1]) && !next[2])
break;
for (cmdopt = &next[1]; *cmdopt; ++cmdopt) {
switch (*cmdopt) {

View File

@ -1314,6 +1314,7 @@ zglob(LinkList list, LinkNode np, int nountok)
sense ^= 1;
break;
case '-':
case Dash:
/* Toggle matching of symbolic links */
sense ^= 2;
break;
@ -1608,7 +1609,7 @@ zglob(LinkList list, LinkNode np, int nountok)
++s;
}
/* See if it's greater than, equal to, or less than */
if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0))
if ((g_range = *s == '+' ? 1 : IS_DASH(*s) ? -1 : 0))
++s;
data = qgetnum(&s);
break;
@ -2025,13 +2026,13 @@ hasbraces(char *str)
if (bracechardots(str-1, NULL, NULL))
return 1;
lbr = str - 1;
if (*str == '-')
if (IS_DASH(*str))
str++;
while (idigit(*str))
str++;
if (*str == '.' && str[1] == '.') {
str++; str++;
if (*str == '-')
if (IS_DASH(*str))
str++;
while (idigit(*str))
str++;
@ -2040,7 +2041,7 @@ hasbraces(char *str)
return 1;
else if (*str == '.' && str[1] == '.') {
str++; str++;
if (*str == '-')
if (IS_DASH(*str))
str++;
while (idigit(*str))
str++;
@ -2123,7 +2124,7 @@ xpandredir(struct redir *fn, LinkList redirtab)
fn->name = s;
untokenize(s);
if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) {
if (s[0] == '-' && !s[1])
if (IS_DASH(s[0]) && !s[1])
fn->type = REDIR_CLOSE;
else if (s[0] == 'p' && !s[1])
fn->fd2 = -2;
@ -2329,12 +2330,14 @@ xpandbraces(LinkList list, LinkNode *np)
* str+1 is the first number in the range, dots+2 the last,
* and dots2+2 is the increment if that's given. */
/* TODO: sorry about this */
int minw = (str[1] == '0' || (str[1] == '-' && str[2] == '0'))
int minw = (str[1] == '0' ||
(IS_DASH(str[1]) && str[2] == '0'))
? wid1
: (dots[2] == '0' || (dots[2] == '-' && dots[3] == '0'))
: (dots[2] == '0' ||
(IS_DASH(dots[2]) && dots[3] == '0'))
? wid2
: (dots2 && (dots2[2] == '0' ||
(dots2[2] == '-' && dots2[3] == '0')))
(IS_DASH(dots2[2]) && dots2[3] == '0')))
? wid3
: 0;
if (rincr < 0) {
@ -2392,7 +2395,7 @@ xpandbraces(LinkList list, LinkNode *np)
c2 = ztokens[c2 - STOUC(Pound)];
if ((char) c2 == Meta)
c2 = 32 ^ p[1];
if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) {
if (IS_DASH(c1) && lastch >= 0 && p < str2 && lastch <= (int)c2) {
while (lastch < (int)c2)
ccl[lastch++] = 1;
lastch = -1;
@ -3528,7 +3531,7 @@ zshtokenize(char *s, int flags)
}
t = s;
while (idigit(*++s));
if (*s != '-')
if (!IS_DASH(*s))
goto cont;
while (idigit(*++s));
if (*s != '>')

View File

@ -1359,17 +1359,13 @@ gettokstr(int c, int sub)
case LX2_DASH:
/*
* - shouldn't be treated as a special character unless
* we're in a pattern. Howeve,simply counting "[" doesn't
* work as []a-z] is a valid expression and we don't know
* down here what this "[" is for as $foo[stuff] is valid
* in zsh. So just detect an opening [, which is enough
* to turn this into a pattern; the Dash will be harmlessly
* untokenised if not wanted.
* we're in a pattern. Unfortunately, working out for
* sure in complicated expressions whether we're in a
* pattern is tricky. So we'll make it special and
* turn it back any time we don't need it special.
* This is not ideal as it's a lot of work.
*/
if (seen_brct)
c = Dash;
else
c = '-';
c = Dash;
break;
case LX2_BANG:
/*

View File

@ -463,7 +463,7 @@ lexconstant(void)
char *nptr;
nptr = ptr;
if (*nptr == '-')
if (IS_DASH(*nptr))
nptr++;
if (*nptr == '0') {
@ -527,7 +527,7 @@ lexconstant(void)
}
if (*nptr == 'e' || *nptr == 'E') {
nptr++;
if (*nptr == '+' || *nptr == '-')
if (*nptr == '+' || IS_DASH(*nptr))
nptr++;
while (idigit(*nptr) || *nptr == '_')
nptr++;
@ -599,7 +599,8 @@ zzlex(void)
}
return (unary) ? UPLUS : PLUS;
case '-':
if (*ptr == '-') {
case Dash:
if (IS_DASH(*ptr)) {
ptr++;
return (unary) ? PREMINUS : POSTMINUS;
}

View File

@ -2316,6 +2316,19 @@ par_cond_1(void)
return r;
}
/*
* Return 1 if condition matches. This also works for non-elided options.
*
* input is test string, may begin - or Dash.
* cond is condition following the -.
*/
static int check_cond(const char *input, const char *cond)
{
if (!IS_DASH(input[0]))
return 0;
return !strcmp(input + 1, cond);
}
/*
* cond_2 : BANG cond_2
| INPAR { SEPER } cond_2 { SEPER } OUTPAR
@ -2342,7 +2355,7 @@ par_cond_2(void)
s1 = tokstr;
condlex();
/* ksh behavior: [ -t ] means [ -t 1 ]; bash disagrees */
if (unset(POSIXBUILTINS) && !strcmp(s1, "-t"))
if (unset(POSIXBUILTINS) && check_cond(s1, "t"))
return par_cond_double(s1, dupstring("1"));
return par_cond_double(dupstring("-n"), s1);
}
@ -2352,7 +2365,7 @@ par_cond_2(void)
if (!strcmp(*testargs, "=") ||
!strcmp(*testargs, "==") ||
!strcmp(*testargs, "!=") ||
(**testargs == '-' && get_cond_num(*testargs + 1) >= 0)) {
(IS_DASH(**testargs) && get_cond_num(*testargs + 1) >= 0)) {
s1 = tokstr;
condlex();
s2 = tokstr;
@ -2374,8 +2387,8 @@ par_cond_2(void)
* In "test" compatibility mode, "! -a ..." and "! -o ..."
* are treated as "[string] [and] ..." and "[string] [or] ...".
*/
if (!(n_testargs > 1 &&
(!strcmp(*testargs, "-a") || !strcmp(*testargs, "-o"))))
if (!(n_testargs > 1 && (check_cond(*testargs, "a") ||
check_cond(*testargs, "o"))))
{
condlex();
ecadd(WCB_COND(COND_NOT, 0));
@ -2397,7 +2410,7 @@ par_cond_2(void)
return r;
}
s1 = tokstr;
dble = (s1 && *s1 == '-'
dble = (s1 && IS_DASH(*s1)
&& (!n_testargs
|| strspn(s1+1, "abcdefghknoprstuvwxzLONGS") == 1)
&& !s1[2]);
@ -2411,7 +2424,7 @@ par_cond_2(void)
YYERROR(ecused);
}
condlex();
if (n_testargs == 2 && tok != STRING && tokstr && s1[0] == '-') {
if (n_testargs == 2 && tok != STRING && tokstr && IS_DASH(s1[0])) {
/*
* Something like "test -z" followed by a token.
* We'll turn the token into a string (we've also
@ -2446,9 +2459,9 @@ par_cond_2(void)
} else
YYERROR(ecused);
}
s2 = tokstr;
s2 = tokstr;
if (!n_testargs)
dble = (s2 && *s2 == '-' && !s2[2]);
dble = (s2 && IS_DASH(*s2) && !s2[2]);
incond++; /* parentheses do globbing */
do condlex(); while (COND_SEP());
incond--; /* parentheses do grouping */
@ -2476,7 +2489,7 @@ par_cond_2(void)
static int
par_cond_double(char *a, char *b)
{
if (a[0] != '-' || !a[1])
if (!IS_DASH(a[0]) || !a[1])
COND_ERROR("parse error: condition expected: %s", a);
else if (!a[2] && strspn(a+1, "abcdefgknoprstuvwxzhLONGS") == 1) {
ecadd(WCB_COND(a[1], 0));
@ -2534,7 +2547,7 @@ par_cond_triple(char *a, char *b, char *c)
ecadd(WCB_COND(COND_REGEX, 0));
ecstr(a);
ecstr(c);
} else if (b[0] == '-') {
} else if (IS_DASH(b[0])) {
if ((t0 = get_cond_num(b + 1)) > -1) {
ecadd(WCB_COND(t0 + COND_NT, 0));
ecstr(a);
@ -2545,7 +2558,7 @@ par_cond_triple(char *a, char *b, char *c)
ecstr(a);
ecstr(c);
}
} else if (a[0] == '-' && a[1]) {
} else if (IS_DASH(a[0]) && a[1]) {
ecadd(WCB_COND(COND_MOD, 2));
ecstr(a);
ecstr(b);
@ -2560,7 +2573,7 @@ par_cond_triple(char *a, char *b, char *c)
static int
par_cond_multi(char *a, LinkList l)
{
if (a[0] != '-' || !a[1])
if (!IS_DASH(a[0]) || !a[1])
COND_ERROR("condition expected: %s", a);
else {
LinkNode n;
@ -3256,10 +3269,10 @@ build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)
for (hlen = FD_PRELEN, tlen = 0; *files; files++) {
struct stat st;
if (!strcmp(*files, "-k")) {
if (check_cond(*files, "k")) {
flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_KSHLOAD;
continue;
} else if (!strcmp(*files, "-z")) {
} else if (check_cond(*files, "z")) {
flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_ZSHLOAD;
continue;
}

View File

@ -1521,7 +1521,7 @@ patcomppiece(int *flagp, int paren)
patparse = nptr;
len |= 1;
}
DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
DPUTS(!IS_DASH(*patparse), "BUG: - missing from numeric glob");
patparse++;
if (idigit(*patparse)) {
to = (zrange_t) zstrtol((char *)patparse,

View File

@ -481,6 +481,8 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
for ( ; *x; x += l) {
int rawc = -1;
convchar_t c;
if (*x == Dash)
*x = '-';
if (itok(STOUC(*x))) {
/* token, can't be separator, must be single byte */
rawc = *x;
@ -1766,7 +1768,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
*/
c = *s;
if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
c != '-' && c != '!' && c != '$' && c != String && c != Qstring &&
!IS_DASH(c) &&
c != '!' && c != '$' && c != String && c != Qstring &&
c != '?' && c != Quest &&
c != '*' && c != Star && c != '@' && c != '{' &&
c != Inbrace && c != '=' && c != Equals && c != Hat &&
@ -1895,13 +1898,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
if (quotetype == QT_DOLLARS ||
quotetype == QT_BACKSLASH_PATTERN)
goto flagerr;
if (s[1] == '-' || s[1] == '+') {
if (IS_DASH(s[1]) || s[1] == '+') {
if (quotemod)
goto flagerr;
s++;
quotemod = 1;
quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL :
QT_QUOTEDZPUTS;
quotetype = (*s == '+') ? QT_QUOTEDZPUTS :
QT_SINGLE_OPTIONAL;
} else {
if (quotetype == QT_SINGLE_OPTIONAL) {
/* extra q's after '-' not allowed */
@ -2208,9 +2211,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* properly in the first place we wouldn't
* have this nonsense.
*/
|| ((cc == '#' || cc == Pound) &&
s[2] == Outbrace)
|| cc == '-' || (cc == ':' && s[2] == '-')
|| ((cc == '#' || cc == Pound) && s[2] == Outbrace)
|| IS_DASH(cc)
|| (cc == ':' && IS_DASH(s[2]))
|| (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
getlen = 1 + whichlen, s++;
/*
@ -2605,14 +2608,17 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
* Again, this duplicates tests for characters we're about to
* examine properly later on.
*/
if (inbrace &&
(c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' &&
c != '=' && c != Equals &&
c != '#' && c != Pound &&
c != '?' && c != Quest &&
c != '}' && c != Outbrace) {
zerr("bad substitution");
return NULL;
if (inbrace) {
c = *s;
if (!IS_DASH(c) &&
c != '+' && c != ':' && c != '%' && c != '/' &&
c != '=' && c != Equals &&
c != '#' && c != Pound &&
c != '?' && c != Quest &&
c != '}' && c != Outbrace) {
zerr("bad substitution");
return NULL;
}
}
/*
* Join arrays up if we're in quotes and there isn't some
@ -2690,8 +2696,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
/* Check for ${..?..} or ${..=..} or one of those. *
* Only works if the name is in braces. */
if (inbrace && ((c = *s) == '-' ||
c == '+' ||
if (inbrace && ((c = *s) == '+' ||
IS_DASH(c) ||
c == ':' || /* i.e. a doubled colon */
c == '=' || c == Equals ||
c == '%' ||
@ -2802,6 +2808,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
vunset = 1;
/* Fall Through! */
case '-':
case Dash:
if (vunset) {
int split_flags;
val = dupstring(s);

View File

@ -2376,7 +2376,7 @@ zstrtol_underscore(const char *s, char **t, int base, int underscore)
while (inblank(*s))
s++;
if ((neg = (*s == '-')))
if ((neg = IS_DASH(*s)))
s++;
else if (*s == '+')
s++;
@ -6118,7 +6118,9 @@ quotedzputs(char const *s, FILE *stream)
} else
*ptr++ = '\'';
while(*s) {
if (*s == Meta)
if (*s == Dash)
c = '-';
else if (*s == Meta)
c = *++s ^ 32;
else
c = *s;
@ -6155,7 +6157,9 @@ quotedzputs(char const *s, FILE *stream)
} else {
/* use Bourne-style quoting, avoiding empty quoted strings */
while (*s) {
if (*s == Meta)
if (*s == Dash)
c = '-';
else if (*s == Meta)
c = *++s ^ 32;
else
c = *s;

View File

@ -237,6 +237,16 @@ struct mathfunc {
#define PATCHARS "#^*()|[]<>?~\\"
/*
* Check for a possibly tokenized dash.
*
* A dash only needs to be a token in a character range, [a-z], but
* it's difficult in general to ensure that. So it's turned into
* a token at the usual point in the lexer. However, we need
* to check for a literal dash at many points.
*/
#define IS_DASH(x) ((x) == '-' || (x) == Dash)
/*
* Types of quote. This is used in various places, so care needs
* to be taken when changing them. (Oooh, don't you look surprised.)

View File

@ -686,3 +686,9 @@
rm glob.tmp/link
0:modifier ':P' resolves symlinks before '..' components
*>*glob.tmp/hello/world
foo=a
value="ac"
print ${value//[${foo}b-z]/x}
0:handling of - range in complicated pattern context
>xx