mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-10-29 05:21:00 +01:00
37689: ! and ^ need to be tokenised in character sets
This commit is contained in:
parent
8eb9070d67
commit
ad16356e19
7 changed files with 110 additions and 42 deletions
|
|
@ -1,3 +1,9 @@
|
|||
2016-01-19 Peter Stephenson <p.stephenson@samsung.com>
|
||||
|
||||
* 37689: README, Src/glob.c, Src/lex.c, Src/pattern.c,
|
||||
Src/zsh.h, Test/D02glob.ztst: also ! and ^ need to be tokenised
|
||||
in character set.
|
||||
|
||||
2016-01-18 Daniel Shahaf <d.s@daniel.shahaf.name>
|
||||
|
||||
* 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c,
|
||||
|
|
|
|||
34
README
34
README
|
|
@ -29,17 +29,43 @@ Zsh is a shell with lots of features. For a list of some of these, see the
|
|||
file FEATURES, and for the latest changes see NEWS. For more
|
||||
details, see the documentation.
|
||||
|
||||
Incompatibilities between 5.1 and 5.2
|
||||
Incompatibilities between 5.2 and 5.3
|
||||
-------------------------------------
|
||||
|
||||
In character classes delimited by "[" and "]" within patterns, whether
|
||||
used for filename generation (globbing) or other forms of pattern
|
||||
matching, it used not to be possible to quote "-" when used for a range,
|
||||
or "^" and "!" when used for negating a character set. The characters can
|
||||
now be quoted by any of the standard shell means, but note that
|
||||
the "[" and "]" must not be quoted. For example,
|
||||
|
||||
[[ $a = ['a-z'] ]]
|
||||
|
||||
matches if the variable a contains just one of the characters "a", "-"
|
||||
or "z" only. Previously this would have matched any lower case ASCII
|
||||
letter. Note therefore the useful fact that
|
||||
|
||||
[[ $a = ["$cset"] ]]
|
||||
|
||||
matches any character contained in the variable "cset". A consequence
|
||||
of this change is that variables that should have active ranges need
|
||||
(with default zsh options) to be indicated explicitly, e.g.
|
||||
|
||||
cset="a-z"
|
||||
[[ b = [${~cset}] ]]
|
||||
|
||||
The "~" causes the "-" character to be active. In sh emulation the
|
||||
"~" is unncessary in this example and double quotes must be used to
|
||||
suppress the range behaviour of the "-".
|
||||
|
||||
Incompatibilities between 5.0.8 and 5.2
|
||||
---------------------------------------
|
||||
|
||||
The behaviour of the parameter flag (P) has changed when it appears
|
||||
in a nested parameter group, in order to make it more useful in
|
||||
such cases. A (P) in the outermost parameter group behaves as
|
||||
before. See NEWS for more.
|
||||
|
||||
Incompatibilities between 5.0.8 and 5.1
|
||||
---------------------------------------
|
||||
|
||||
The default behaviour when text is pasted into an X Windows terminal has
|
||||
changed significantly (unless you are using a very old terminal emulator
|
||||
that doesn't support this mode). Now, the new "bracketed paste mode"
|
||||
|
|
|
|||
19
Src/glob.c
19
Src/glob.c
|
|
@ -3476,7 +3476,7 @@ static void
|
|||
zshtokenize(char *s, int flags)
|
||||
{
|
||||
char *t;
|
||||
int bslash = 0, seen_brct = 0;
|
||||
int bslash = 0;
|
||||
|
||||
for (; *s; s++) {
|
||||
cont:
|
||||
|
|
@ -3507,20 +3507,6 @@ zshtokenize(char *s, int flags)
|
|||
*t = Inang;
|
||||
*s = Outang;
|
||||
break;
|
||||
case '[':
|
||||
if (bslash)
|
||||
s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
|
||||
else {
|
||||
seen_brct = 1;
|
||||
*s = Inbrack;
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
if (bslash)
|
||||
s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
|
||||
else if (seen_brct) /* see corresonding code in lex.c */
|
||||
*s = Dash;
|
||||
break;
|
||||
case '(':
|
||||
case '|':
|
||||
case ')':
|
||||
|
|
@ -3531,10 +3517,13 @@ zshtokenize(char *s, int flags)
|
|||
case '^':
|
||||
case '#':
|
||||
case '~':
|
||||
case '[':
|
||||
case ']':
|
||||
case '*':
|
||||
case '?':
|
||||
case '=':
|
||||
case '-':
|
||||
case '!':
|
||||
for (t = ztokens; *t; t++) {
|
||||
if (*t == *s) {
|
||||
if (bslash)
|
||||
|
|
|
|||
17
Src/lex.c
17
Src/lex.c
|
|
@ -35,7 +35,7 @@
|
|||
/* tokens */
|
||||
|
||||
/**/
|
||||
mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\";
|
||||
mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
|
||||
|
||||
/* parts of the current token */
|
||||
|
||||
|
|
@ -395,8 +395,9 @@ ctxtlex(void)
|
|||
#define LX2_BQUOTE 16
|
||||
#define LX2_COMMA 17
|
||||
#define LX2_DASH 18
|
||||
#define LX2_OTHER 19
|
||||
#define LX2_META 20
|
||||
#define LX2_BANG 19
|
||||
#define LX2_OTHER 20
|
||||
#define LX2_META 21
|
||||
|
||||
static unsigned char lexact1[256], lexact2[256], lextok2[256];
|
||||
|
||||
|
|
@ -406,7 +407,7 @@ initlextabs(void)
|
|||
{
|
||||
int t0;
|
||||
static char *lx1 = "\\q\n;!&|(){}[]<>";
|
||||
static char *lx2 = ";)|$[]~({}><=\\\'\"`,-";
|
||||
static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
|
||||
|
||||
for (t0 = 0; t0 != 256; t0++) {
|
||||
lexact1[t0] = LX1_OTHER;
|
||||
|
|
@ -1364,6 +1365,14 @@ gettokstr(int c, int sub)
|
|||
else
|
||||
c = '-';
|
||||
break;
|
||||
case LX2_BANG:
|
||||
/*
|
||||
* Same logic as Dash, for ! to perform negation in range.
|
||||
*/
|
||||
if (seen_brct)
|
||||
c = Bang;
|
||||
else
|
||||
c = '!';
|
||||
}
|
||||
add(c);
|
||||
c = hgetc();
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ typedef unsigned long zrange_t;
|
|||
*/
|
||||
static const char zpc_chars[ZPC_COUNT] = {
|
||||
'/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
|
||||
Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
|
||||
Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@'
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -257,7 +257,7 @@ static const char zpc_chars[ZPC_COUNT] = {
|
|||
/**/
|
||||
mod_export const char *zpc_strings[ZPC_COUNT] = {
|
||||
NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
|
||||
"^", "#", NULL, "?(", "*(", "+(", "!(", "@("
|
||||
"^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@("
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -481,7 +481,7 @@ patcompcharsset(void)
|
|||
*/
|
||||
zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
|
||||
zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
|
||||
zpc_special[ZPC_KSH_AT] = Marker;
|
||||
zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker;
|
||||
}
|
||||
/*
|
||||
* Note that if we are using KSHGLOB, then we test for a following
|
||||
|
|
@ -1268,6 +1268,8 @@ patcomppiece(int *flagp, int paren)
|
|||
kshchar = STOUC('+');
|
||||
else if (*patparse == zpc_special[ZPC_KSH_BANG])
|
||||
kshchar = STOUC('!');
|
||||
else if (*patparse == zpc_special[ZPC_KSH_BANG2])
|
||||
kshchar = STOUC('!');
|
||||
else if (*patparse == zpc_special[ZPC_KSH_AT])
|
||||
kshchar = STOUC('@');
|
||||
else if (*patparse == zpc_special[ZPC_KSH_STAR])
|
||||
|
|
@ -1424,7 +1426,7 @@ patcomppiece(int *flagp, int paren)
|
|||
DPUTS(zpc_special[ZPC_INBRACK] == Marker,
|
||||
"Treating '[' as pattern character although disabled");
|
||||
flags |= P_SIMPLE;
|
||||
if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
|
||||
if (*patparse == Hat || *patparse == Bang) {
|
||||
patparse++;
|
||||
starter = patnode(P_ANYBUT);
|
||||
} else
|
||||
|
|
@ -4245,7 +4247,8 @@ haswilds(char *str)
|
|||
((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
|
||||
(str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
|
||||
(str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
|
||||
(str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) ||
|
||||
(str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) ||
|
||||
(str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) ||
|
||||
(str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
|
||||
return 1;
|
||||
break;
|
||||
|
|
|
|||
16
Src/zsh.h
16
Src/zsh.h
|
|
@ -193,29 +193,30 @@ struct mathfunc {
|
|||
#define Qtick ((char) 0x99)
|
||||
#define Comma ((char) 0x9a)
|
||||
#define Dash ((char) 0x9b) /* Only in patterns */
|
||||
#define Bang ((char) 0x9c) /* Only in patterns */
|
||||
/*
|
||||
* Marks the last of the group above.
|
||||
* Remaining tokens are even more special.
|
||||
*/
|
||||
#define LAST_NORMAL_TOK Dash
|
||||
#define LAST_NORMAL_TOK Bang
|
||||
/*
|
||||
* Null arguments: placeholders for single and double quotes
|
||||
* and backslashes.
|
||||
*/
|
||||
#define Snull ((char) 0x9c)
|
||||
#define Dnull ((char) 0x9d)
|
||||
#define Bnull ((char) 0x9e)
|
||||
#define Snull ((char) 0x9d)
|
||||
#define Dnull ((char) 0x9e)
|
||||
#define Bnull ((char) 0x9f)
|
||||
/*
|
||||
* Backslash which will be returned to "\" instead of being stripped
|
||||
* when we turn the string into a printable format.
|
||||
*/
|
||||
#define Bnullkeep ((char) 0x9f)
|
||||
#define Bnullkeep ((char) 0xa0)
|
||||
/*
|
||||
* Null argument that does not correspond to any character.
|
||||
* This should be last as it does not appear in ztokens and
|
||||
* is used to initialise the IMETA type in inittyptab().
|
||||
*/
|
||||
#define Nularg ((char) 0xa0)
|
||||
#define Nularg ((char) 0xa1)
|
||||
|
||||
/*
|
||||
* Take care to update the use of IMETA appropriately when adding
|
||||
|
|
@ -226,7 +227,7 @@ struct mathfunc {
|
|||
* Also used in pattern character arrays as guaranteed not to
|
||||
* mark a character in a string.
|
||||
*/
|
||||
#define Marker ((char) 0xa1)
|
||||
#define Marker ((char) 0xa2)
|
||||
|
||||
/* chars that need to be quoted if meant literally */
|
||||
|
||||
|
|
@ -1549,6 +1550,7 @@ enum zpc_chars {
|
|||
ZPC_KSH_STAR, /* * for *(...) in KSH_GLOB */
|
||||
ZPC_KSH_PLUS, /* + for +(...) in KSH_GLOB */
|
||||
ZPC_KSH_BANG, /* ! for !(...) in KSH_GLOB */
|
||||
ZPC_KSH_BANG2, /* ! for !(...) in KSH_GLOB, untokenised */
|
||||
ZPC_KSH_AT, /* @ for @(...) in KSH_GLOB */
|
||||
ZPC_COUNT /* Number of special chararacters */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -622,3 +622,36 @@
|
|||
0:quoted - works in pattern in parameter
|
||||
>bcdef
|
||||
>cdef
|
||||
|
||||
[[ a != [^a] ]]
|
||||
0:^ active in character class if not quoted
|
||||
|
||||
[[ a = ['^a'] ]]
|
||||
0:^ not active in character class if quoted
|
||||
|
||||
[[ a != [!a] ]]
|
||||
0:! active in character class if not quoted
|
||||
|
||||
[[ a = ['!a'] ]]
|
||||
0:! not active in character class if quoted
|
||||
|
||||
# Actually, we don't need the quoting here,
|
||||
# c.f. the next test. This just makes it look
|
||||
# more standard.
|
||||
cset="^a-z"
|
||||
[[ "^" = ["$cset"] ]] || print Fail 1
|
||||
[[ "a" = ["$cset"] ]] || print Fail 2
|
||||
[[ "-" = ["$cset"] ]] || print Fail 3
|
||||
[[ "z" = ["$cset"] ]] || print Fail 4
|
||||
[[ "1" != ["$cset"] ]] || print Fail 5
|
||||
[[ "b" != ["$cset"] ]] || print Fail 6
|
||||
0:character set specified as quoted variable
|
||||
|
||||
cset="^a-z"
|
||||
[[ "^" = [$~cset] ]] || print Fail 1
|
||||
[[ "a" != [$~cset] ]] || print Fail 2
|
||||
[[ "-" = [$~cset] ]] || print Fail 3
|
||||
[[ "z" != [$~cset] ]] || print Fail 4
|
||||
[[ "1" = [$~cset] ]] || print Fail 5
|
||||
[[ "b" != [$~cset] ]] || print Fail 6
|
||||
0:character set specified as active variabe
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue