37689: ! and ^ need to be tokenised in character sets

mikachu/badarrays
Peter Stephenson 8 years ago
parent 8eb9070d67
commit ad16356e19

@ -1,3 +1,9 @@
2016-01-19 Peter Stephenson <p.stephenson@samsung.com>
* 37689: README, Src/glob.c, Src/lex.c, Src/pattern.c,
Src/zsh.h, Test/D02glob.ztst: also ! and ^ need to be tokenised
in character set.
2016-01-18 Daniel Shahaf <d.s@daniel.shahaf.name>
* 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c,

@ -29,17 +29,43 @@ Zsh is a shell with lots of features. For a list of some of these, see the
file FEATURES, and for the latest changes see NEWS. For more
details, see the documentation.
Incompatibilities between 5.1 and 5.2
Incompatibilities between 5.2 and 5.3
-------------------------------------
In character classes delimited by "[" and "]" within patterns, whether
used for filename generation (globbing) or other forms of pattern
matching, it used not to be possible to quote "-" when used for a range,
or "^" and "!" when used for negating a character set. The characters can
now be quoted by any of the standard shell means, but note that
the "[" and "]" must not be quoted. For example,
[[ $a = ['a-z'] ]]
matches if the variable a contains just one of the characters "a", "-"
or "z" only. Previously this would have matched any lower case ASCII
letter. Note therefore the useful fact that
[[ $a = ["$cset"] ]]
matches any character contained in the variable "cset". A consequence
of this change is that variables that should have active ranges need
(with default zsh options) to be indicated explicitly, e.g.
cset="a-z"
[[ b = [${~cset}] ]]
The "~" causes the "-" character to be active. In sh emulation the
"~" is unncessary in this example and double quotes must be used to
suppress the range behaviour of the "-".
Incompatibilities between 5.0.8 and 5.2
---------------------------------------
The behaviour of the parameter flag (P) has changed when it appears
in a nested parameter group, in order to make it more useful in
such cases. A (P) in the outermost parameter group behaves as
before. See NEWS for more.
Incompatibilities between 5.0.8 and 5.1
---------------------------------------
The default behaviour when text is pasted into an X Windows terminal has
changed significantly (unless you are using a very old terminal emulator
that doesn't support this mode). Now, the new "bracketed paste mode"

@ -3476,7 +3476,7 @@ static void
zshtokenize(char *s, int flags)
{
char *t;
int bslash = 0, seen_brct = 0;
int bslash = 0;
for (; *s; s++) {
cont:
@ -3507,20 +3507,6 @@ zshtokenize(char *s, int flags)
*t = Inang;
*s = Outang;
break;
case '[':
if (bslash)
s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
else {
seen_brct = 1;
*s = Inbrack;
}
break;
case '-':
if (bslash)
s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
else if (seen_brct) /* see corresonding code in lex.c */
*s = Dash;
break;
case '(':
case '|':
case ')':
@ -3531,10 +3517,13 @@ zshtokenize(char *s, int flags)
case '^':
case '#':
case '~':
case '[':
case ']':
case '*':
case '?':
case '=':
case '-':
case '!':
for (t = ztokens; *t; t++) {
if (*t == *s) {
if (bslash)

@ -35,7 +35,7 @@
/* tokens */
/**/
mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\";
mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
/* parts of the current token */
@ -395,8 +395,9 @@ ctxtlex(void)
#define LX2_BQUOTE 16
#define LX2_COMMA 17
#define LX2_DASH 18
#define LX2_OTHER 19
#define LX2_META 20
#define LX2_BANG 19
#define LX2_OTHER 20
#define LX2_META 21
static unsigned char lexact1[256], lexact2[256], lextok2[256];
@ -406,10 +407,10 @@ initlextabs(void)
{
int t0;
static char *lx1 = "\\q\n;!&|(){}[]<>";
static char *lx2 = ";)|$[]~({}><=\\\'\"`,-";
static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
for (t0 = 0; t0 != 256; t0++) {
lexact1[t0] = LX1_OTHER;
lexact1[t0] = LX1_OTHER;
lexact2[t0] = LX2_OTHER;
lextok2[t0] = t0;
}
@ -1361,12 +1362,20 @@ gettokstr(int c, int sub)
*/
if (seen_brct)
c = Dash;
else
c = '-';
break;
}
add(c);
c = hgetc();
else
c = '-';
break;
case LX2_BANG:
/*
* Same logic as Dash, for ! to perform negation in range.
*/
if (seen_brct)
c = Bang;
else
c = '!';
}
add(c);
c = hgetc();
if (intpos)
intpos--;
if (lexstop)

@ -247,7 +247,7 @@ typedef unsigned long zrange_t;
*/
static const char zpc_chars[ZPC_COUNT] = {
'/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@'
};
/*
@ -257,7 +257,7 @@ static const char zpc_chars[ZPC_COUNT] = {
/**/
mod_export const char *zpc_strings[ZPC_COUNT] = {
NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
"^", "#", NULL, "?(", "*(", "+(", "!(", "@("
"^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@("
};
/*
@ -481,7 +481,7 @@ patcompcharsset(void)
*/
zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
zpc_special[ZPC_KSH_AT] = Marker;
zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker;
}
/*
* Note that if we are using KSHGLOB, then we test for a following
@ -1268,6 +1268,8 @@ patcomppiece(int *flagp, int paren)
kshchar = STOUC('+');
else if (*patparse == zpc_special[ZPC_KSH_BANG])
kshchar = STOUC('!');
else if (*patparse == zpc_special[ZPC_KSH_BANG2])
kshchar = STOUC('!');
else if (*patparse == zpc_special[ZPC_KSH_AT])
kshchar = STOUC('@');
else if (*patparse == zpc_special[ZPC_KSH_STAR])
@ -1424,7 +1426,7 @@ patcomppiece(int *flagp, int paren)
DPUTS(zpc_special[ZPC_INBRACK] == Marker,
"Treating '[' as pattern character although disabled");
flags |= P_SIMPLE;
if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
if (*patparse == Hat || *patparse == Bang) {
patparse++;
starter = patnode(P_ANYBUT);
} else
@ -4245,7 +4247,8 @@ haswilds(char *str)
((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
(str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
(str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
(str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) ||
(str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) ||
(str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) ||
(str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
return 1;
break;

@ -193,29 +193,30 @@ struct mathfunc {
#define Qtick ((char) 0x99)
#define Comma ((char) 0x9a)
#define Dash ((char) 0x9b) /* Only in patterns */
#define Bang ((char) 0x9c) /* Only in patterns */
/*
* Marks the last of the group above.
* Remaining tokens are even more special.
*/
#define LAST_NORMAL_TOK Dash
#define LAST_NORMAL_TOK Bang
/*
* Null arguments: placeholders for single and double quotes
* and backslashes.
*/
#define Snull ((char) 0x9c)
#define Dnull ((char) 0x9d)
#define Bnull ((char) 0x9e)
#define Snull ((char) 0x9d)
#define Dnull ((char) 0x9e)
#define Bnull ((char) 0x9f)
/*
* Backslash which will be returned to "\" instead of being stripped
* when we turn the string into a printable format.
*/
#define Bnullkeep ((char) 0x9f)
#define Bnullkeep ((char) 0xa0)
/*
* Null argument that does not correspond to any character.
* This should be last as it does not appear in ztokens and
* is used to initialise the IMETA type in inittyptab().
*/
#define Nularg ((char) 0xa0)
#define Nularg ((char) 0xa1)
/*
* Take care to update the use of IMETA appropriately when adding
@ -226,7 +227,7 @@ struct mathfunc {
* Also used in pattern character arrays as guaranteed not to
* mark a character in a string.
*/
#define Marker ((char) 0xa1)
#define Marker ((char) 0xa2)
/* chars that need to be quoted if meant literally */
@ -1549,6 +1550,7 @@ enum zpc_chars {
ZPC_KSH_STAR, /* * for *(...) in KSH_GLOB */
ZPC_KSH_PLUS, /* + for +(...) in KSH_GLOB */
ZPC_KSH_BANG, /* ! for !(...) in KSH_GLOB */
ZPC_KSH_BANG2, /* ! for !(...) in KSH_GLOB, untokenised */
ZPC_KSH_AT, /* @ for @(...) in KSH_GLOB */
ZPC_COUNT /* Number of special chararacters */
};

@ -622,3 +622,36 @@
0:quoted - works in pattern in parameter
>bcdef
>cdef
[[ a != [^a] ]]
0:^ active in character class if not quoted
[[ a = ['^a'] ]]
0:^ not active in character class if quoted
[[ a != [!a] ]]
0:! active in character class if not quoted
[[ a = ['!a'] ]]
0:! not active in character class if quoted
# Actually, we don't need the quoting here,
# c.f. the next test. This just makes it look
# more standard.
cset="^a-z"
[[ "^" = ["$cset"] ]] || print Fail 1
[[ "a" = ["$cset"] ]] || print Fail 2
[[ "-" = ["$cset"] ]] || print Fail 3
[[ "z" = ["$cset"] ]] || print Fail 4
[[ "1" != ["$cset"] ]] || print Fail 5
[[ "b" != ["$cset"] ]] || print Fail 6
0:character set specified as quoted variable
cset="^a-z"
[[ "^" = [$~cset] ]] || print Fail 1
[[ "a" != [$~cset] ]] || print Fail 2
[[ "-" = [$~cset] ]] || print Fail 3
[[ "z" != [$~cset] ]] || print Fail 4
[[ "1" = [$~cset] ]] || print Fail 5
[[ "b" != [$~cset] ]] || print Fail 6
0:character set specified as active variabe

Loading…
Cancel
Save