mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-06-15 20:38:10 +02:00
51884: reset IFS if it contains invalid characters
This happens only if MULTIBYTE option is on.
This commit is contained in:
parent
4345eed1fe
commit
1b9bc3441c
5 changed files with 61 additions and 18 deletions
|
@ -1,3 +1,9 @@
|
|||
2023-06-26 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||
|
||||
* 51884: Doc/Zsh/params.yo, Src/params.c, Src/utils.c,
|
||||
Test/D04parameter.ztst: if MULTIBYTE option is on and IFS contains
|
||||
invalid bytes in curret locale then reset it to default
|
||||
|
||||
2023-06-22 Bart Schaefer <schaefer@zsh.org>
|
||||
|
||||
* 51887: Src/math.c, Src/params.c, Test/K02parameter.ztst:
|
||||
|
|
|
@ -1325,15 +1325,18 @@ Internal field separators (by default space, tab, newline and NUL), that
|
|||
are used to separate words which result from
|
||||
command or parameter expansion and words read by
|
||||
the tt(read) builtin. Any characters from the set space, tab and
|
||||
newline that appear in the IFS are called em(IFS white space).
|
||||
newline that appear in the tt(IFS) are called em(IFS white space).
|
||||
One or more IFS white space characters or one non-IFS white space
|
||||
character together with any adjacent IFS white space character delimit
|
||||
a field. If an IFS white space character appears twice consecutively
|
||||
in the IFS, this character is treated as if it were not an IFS white
|
||||
in the tt(IFS), this character is treated as if it were not an IFS white
|
||||
space character.
|
||||
|
||||
If the parameter is unset, the default is used. Note this has
|
||||
a different effect from setting the parameter to an empty string.
|
||||
|
||||
If tt(MULTIBYTE) option is on and tt(IFS) contains invalid characters in
|
||||
the current locale, it is reset to the default.
|
||||
)
|
||||
vindex(KEYBOARD_HACK)
|
||||
item(tt(KEYBOARD_HACK))(
|
||||
|
|
|
@ -4748,6 +4748,7 @@ setlang(char *x)
|
|||
if ((x = getsparam_u(ln->name)) && *x)
|
||||
setlocale(ln->category, x);
|
||||
unqueue_signals();
|
||||
inittyptab();
|
||||
}
|
||||
|
||||
/**/
|
||||
|
@ -4771,6 +4772,7 @@ lc_allsetfn(Param pm, char *x)
|
|||
else {
|
||||
setlocale(LC_ALL, unmeta(x));
|
||||
clear_mbstate();
|
||||
inittyptab();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4809,6 +4811,7 @@ lcsetfn(Param pm, char *x)
|
|||
}
|
||||
unqueue_signals();
|
||||
clear_mbstate(); /* LC_CTYPE may have changed */
|
||||
inittyptab();
|
||||
}
|
||||
#endif /* USE_LOCALE */
|
||||
|
||||
|
|
42
Src/utils.c
42
Src/utils.c
|
@ -74,9 +74,6 @@ set_widearray(char *mb_array, Widechar_array wca)
|
|||
}
|
||||
wca->len = 0;
|
||||
|
||||
if (!isset(MULTIBYTE))
|
||||
return;
|
||||
|
||||
if (mb_array) {
|
||||
VARARR(wchar_t, tmpwcs, strlen(mb_array));
|
||||
wchar_t *wcptr = tmpwcs;
|
||||
|
@ -87,8 +84,7 @@ set_widearray(char *mb_array, Widechar_array wca)
|
|||
int mblen;
|
||||
|
||||
if ((unsigned char) *mb_array <= 0x7f) {
|
||||
mb_array++;
|
||||
*wcptr++ = (wchar_t)*mb_array;
|
||||
*wcptr++ = (wchar_t)*mb_array++;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -4121,8 +4117,9 @@ inittyptab(void)
|
|||
* having IIDENT here is a good idea at all, but this code
|
||||
* should disappear into history...
|
||||
*/
|
||||
for (t0 = 0240; t0 != 0400; t0++)
|
||||
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
|
||||
if isset(MULTIBYTE)
|
||||
for (t0 = 0240; t0 != 0400; t0++)
|
||||
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
|
||||
#endif
|
||||
/* typtab['.'] |= IIDENT; */ /* Allow '.' in variable names - broken */
|
||||
typtab['_'] = IIDENT | IUSER;
|
||||
|
@ -4137,11 +4134,24 @@ inittyptab(void)
|
|||
typtab[t0] |= ITOK | IMETA;
|
||||
for (t0 = (int) (unsigned char) Snull; t0 <= (int) (unsigned char) Nularg; t0++)
|
||||
typtab[t0] |= ITOK | IMETA | INULL;
|
||||
for (s = ifs ? ifs : EMULATION(EMULATE_KSH|EMULATE_SH) ?
|
||||
DEFAULT_IFS_SH : DEFAULT_IFS; *s; s++) {
|
||||
/* ifs */
|
||||
#define CURRENT_DEFAULT_IFS (EMULATION(EMULATE_KSH|EMULATE_SH) ? \
|
||||
DEFAULT_IFS_SH : DEFAULT_IFS)
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE)) {
|
||||
set_widearray(ifs ? ifs : CURRENT_DEFAULT_IFS, &ifs_wide);
|
||||
if (ifs && !ifs_wide.chars) {
|
||||
zwarn("IFS has an invalid character; resetting IFS to default");
|
||||
zsfree(ifs);
|
||||
ifs = ztrdup(CURRENT_DEFAULT_IFS);
|
||||
set_widearray(ifs, &ifs_wide);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (s = ifs ? ifs : CURRENT_DEFAULT_IFS; *s; s++) {
|
||||
int c = (unsigned char) (*s == Meta ? *++s ^ 32 : *s);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (!isascii(c)) {
|
||||
if (isset(MULTIBYTE) && !isascii(c)) {
|
||||
/* see comment for wordchars below */
|
||||
continue;
|
||||
}
|
||||
|
@ -4154,10 +4164,15 @@ inittyptab(void)
|
|||
}
|
||||
typtab[c] |= ISEP;
|
||||
}
|
||||
/* wordchars */
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE))
|
||||
set_widearray(wordchars, &wordchars_wide);
|
||||
#endif
|
||||
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
|
||||
int c = (unsigned char) (*s == Meta ? *++s ^ 32 : *s);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (!isascii(c)) {
|
||||
if (isset(MULTIBYTE) && !isascii(c)) {
|
||||
/*
|
||||
* If we have support for multibyte characters, we don't
|
||||
* handle non-ASCII characters here; instead, we turn
|
||||
|
@ -4170,11 +4185,6 @@ inittyptab(void)
|
|||
#endif
|
||||
typtab[c] |= IWORD;
|
||||
}
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
set_widearray(wordchars, &wordchars_wide);
|
||||
set_widearray(ifs ? ifs : EMULATION(EMULATE_KSH|EMULATE_SH) ?
|
||||
DEFAULT_IFS_SH : DEFAULT_IFS, &ifs_wide);
|
||||
#endif
|
||||
for (s = SPECCHARS; *s; s++)
|
||||
typtab[(unsigned char) *s] |= ISPECIAL;
|
||||
if (typtab_flags & ZTF_SP_COMMA)
|
||||
|
|
|
@ -2280,6 +2280,27 @@ F:We do not care what $OLDPWD is, as long as it does not cause an error
|
|||
F:As of this writing, var=$@ and var="$@" with null IFS have unspecified
|
||||
F:behavior, see http://austingroupbugs.net/view.php?id=888
|
||||
|
||||
(
|
||||
IFS=$'\x80'
|
||||
if [[ $IFS = $' \t\n\0' ]]; then
|
||||
echo OK # if $'\x80' is illegal (e.g. Linux)
|
||||
else # otherwise (e.g. macOS), it should work as a separator
|
||||
s=$'foo\x80\bar'
|
||||
[[ ${${=s}[1]} = foo ]] && echo OK
|
||||
fi
|
||||
)
|
||||
0D:reset IFS to default if it contains illegal character
|
||||
>OK
|
||||
|
||||
(
|
||||
unsetopt multibyte
|
||||
IFS=$'\xc3\xa9'
|
||||
s=$'foo\xc3bar\xa9boo'
|
||||
echo ${${=s}[2]}
|
||||
)
|
||||
0:eight bit chars in IFS should work if multibute option is off
|
||||
>bar
|
||||
|
||||
() {
|
||||
setopt localoptions extendedglob
|
||||
[[ $- = [[:alnum:]]## ]] || print Failed 1
|
||||
|
|
Loading…
Reference in a new issue