51884: reset IFS if it contains invalid characters

This happens only if MULTIBYTE option is on.
master
Jun-ichi Takimoto 11 months ago
parent 4345eed1fe
commit 1b9bc3441c

@ -1,3 +1,9 @@
2023-06-26 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
* 51884: Doc/Zsh/params.yo, Src/params.c, Src/utils.c,
Test/D04parameter.ztst: if MULTIBYTE option is on and IFS contains
invalid bytes in curret locale then reset it to default
2023-06-22 Bart Schaefer <schaefer@zsh.org>
* 51887: Src/math.c, Src/params.c, Test/K02parameter.ztst:

@ -1325,15 +1325,18 @@ Internal field separators (by default space, tab, newline and NUL), that
are used to separate words which result from
command or parameter expansion and words read by
the tt(read) builtin. Any characters from the set space, tab and
newline that appear in the IFS are called em(IFS white space).
newline that appear in the tt(IFS) are called em(IFS white space).
One or more IFS white space characters or one non-IFS white space
character together with any adjacent IFS white space character delimit
a field. If an IFS white space character appears twice consecutively
in the IFS, this character is treated as if it were not an IFS white
in the tt(IFS), this character is treated as if it were not an IFS white
space character.
If the parameter is unset, the default is used. Note this has
a different effect from setting the parameter to an empty string.
If tt(MULTIBYTE) option is on and tt(IFS) contains invalid characters in
the current locale, it is reset to the default.
)
vindex(KEYBOARD_HACK)
item(tt(KEYBOARD_HACK))(

@ -4748,6 +4748,7 @@ setlang(char *x)
if ((x = getsparam_u(ln->name)) && *x)
setlocale(ln->category, x);
unqueue_signals();
inittyptab();
}
/**/
@ -4771,6 +4772,7 @@ lc_allsetfn(Param pm, char *x)
else {
setlocale(LC_ALL, unmeta(x));
clear_mbstate();
inittyptab();
}
}
@ -4809,6 +4811,7 @@ lcsetfn(Param pm, char *x)
}
unqueue_signals();
clear_mbstate(); /* LC_CTYPE may have changed */
inittyptab();
}
#endif /* USE_LOCALE */

@ -74,9 +74,6 @@ set_widearray(char *mb_array, Widechar_array wca)
}
wca->len = 0;
if (!isset(MULTIBYTE))
return;
if (mb_array) {
VARARR(wchar_t, tmpwcs, strlen(mb_array));
wchar_t *wcptr = tmpwcs;
@ -87,8 +84,7 @@ set_widearray(char *mb_array, Widechar_array wca)
int mblen;
if ((unsigned char) *mb_array <= 0x7f) {
mb_array++;
*wcptr++ = (wchar_t)*mb_array;
*wcptr++ = (wchar_t)*mb_array++;
continue;
}
@ -4121,8 +4117,9 @@ inittyptab(void)
* having IIDENT here is a good idea at all, but this code
* should disappear into history...
*/
for (t0 = 0240; t0 != 0400; t0++)
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
if isset(MULTIBYTE)
for (t0 = 0240; t0 != 0400; t0++)
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
#endif
/* typtab['.'] |= IIDENT; */ /* Allow '.' in variable names - broken */
typtab['_'] = IIDENT | IUSER;
@ -4137,11 +4134,24 @@ inittyptab(void)
typtab[t0] |= ITOK | IMETA;
for (t0 = (int) (unsigned char) Snull; t0 <= (int) (unsigned char) Nularg; t0++)
typtab[t0] |= ITOK | IMETA | INULL;
for (s = ifs ? ifs : EMULATION(EMULATE_KSH|EMULATE_SH) ?
DEFAULT_IFS_SH : DEFAULT_IFS; *s; s++) {
/* ifs */
#define CURRENT_DEFAULT_IFS (EMULATION(EMULATE_KSH|EMULATE_SH) ? \
DEFAULT_IFS_SH : DEFAULT_IFS)
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
set_widearray(ifs ? ifs : CURRENT_DEFAULT_IFS, &ifs_wide);
if (ifs && !ifs_wide.chars) {
zwarn("IFS has an invalid character; resetting IFS to default");
zsfree(ifs);
ifs = ztrdup(CURRENT_DEFAULT_IFS);
set_widearray(ifs, &ifs_wide);
}
}
#endif
for (s = ifs ? ifs : CURRENT_DEFAULT_IFS; *s; s++) {
int c = (unsigned char) (*s == Meta ? *++s ^ 32 : *s);
#ifdef MULTIBYTE_SUPPORT
if (!isascii(c)) {
if (isset(MULTIBYTE) && !isascii(c)) {
/* see comment for wordchars below */
continue;
}
@ -4154,10 +4164,15 @@ inittyptab(void)
}
typtab[c] |= ISEP;
}
/* wordchars */
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE))
set_widearray(wordchars, &wordchars_wide);
#endif
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
int c = (unsigned char) (*s == Meta ? *++s ^ 32 : *s);
#ifdef MULTIBYTE_SUPPORT
if (!isascii(c)) {
if (isset(MULTIBYTE) && !isascii(c)) {
/*
* If we have support for multibyte characters, we don't
* handle non-ASCII characters here; instead, we turn
@ -4170,11 +4185,6 @@ inittyptab(void)
#endif
typtab[c] |= IWORD;
}
#ifdef MULTIBYTE_SUPPORT
set_widearray(wordchars, &wordchars_wide);
set_widearray(ifs ? ifs : EMULATION(EMULATE_KSH|EMULATE_SH) ?
DEFAULT_IFS_SH : DEFAULT_IFS, &ifs_wide);
#endif
for (s = SPECCHARS; *s; s++)
typtab[(unsigned char) *s] |= ISPECIAL;
if (typtab_flags & ZTF_SP_COMMA)

@ -2280,6 +2280,27 @@ F:We do not care what $OLDPWD is, as long as it does not cause an error
F:As of this writing, var=$@ and var="$@" with null IFS have unspecified
F:behavior, see http://austingroupbugs.net/view.php?id=888
(
IFS=$'\x80'
if [[ $IFS = $' \t\n\0' ]]; then
echo OK # if $'\x80' is illegal (e.g. Linux)
else # otherwise (e.g. macOS), it should work as a separator
s=$'foo\x80\bar'
[[ ${${=s}[1]} = foo ]] && echo OK
fi
)
0D:reset IFS to default if it contains illegal character
>OK
(
unsetopt multibyte
IFS=$'\xc3\xa9'
s=$'foo\xc3bar\xa9boo'
echo ${${=s}[2]}
)
0:eight bit chars in IFS should work if multibute option is off
>bar
() {
setopt localoptions extendedglob
[[ $- = [[:alnum:]]## ]] || print Failed 1

Loading…
Cancel
Save