mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-12-28 16:15:02 +01:00
51214: handle read -d and a delimiter that can't be decoded into a character
Terminate input at the raw byte value of the delimiter. Also document and test the use of an empty string as a way to specify NUL as the delimiter.
This commit is contained in:
parent
2701ab161d
commit
35a2f155c3
5 changed files with 29 additions and 3 deletions
|
@ -1,5 +1,9 @@
|
||||||
2022-12-16 Oliver Kiddle <opk@zsh.org>
|
2022-12-16 Oliver Kiddle <opk@zsh.org>
|
||||||
|
|
||||||
|
* 51214: Doc/Zsh/builtins.yo, Src/builtin.c, Test/B04read.ztst,
|
||||||
|
Test/D07multibyte.ztst: with read -d and a delimiter that can't be
|
||||||
|
decoded into a character terminate input at the raw byte value
|
||||||
|
|
||||||
* Jun T.: 51207: Src/builtin.c, Test/B04read.ztst:
|
* Jun T.: 51207: Src/builtin.c, Test/B04read.ztst:
|
||||||
fix for read -d when the delimiter is a byte >= 0x80
|
fix for read -d when the delimiter is a byte >= 0x80
|
||||||
|
|
||||||
|
|
|
@ -1589,7 +1589,8 @@ Input is read from the coprocess.
|
||||||
)
|
)
|
||||||
item(tt(-d) var(delim))(
|
item(tt(-d) var(delim))(
|
||||||
Input is terminated by the first character of var(delim) instead of
|
Input is terminated by the first character of var(delim) instead of
|
||||||
by newline.
|
by newline. For compatibility with other shells, if var(delim) is an
|
||||||
|
empty string, input is terminated at the first NUL.
|
||||||
)
|
)
|
||||||
item(tt(-t) [ var(num) ])(
|
item(tt(-t) [ var(num) ])(
|
||||||
Test if input is available before attempting to read. If var(num)
|
Test if input is available before attempting to read. If var(num)
|
||||||
|
|
|
@ -6282,6 +6282,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
||||||
long izle_timeout = 0;
|
long izle_timeout = 0;
|
||||||
#ifdef MULTIBYTE_SUPPORT
|
#ifdef MULTIBYTE_SUPPORT
|
||||||
wchar_t delim = L'\n', wc;
|
wchar_t delim = L'\n', wc;
|
||||||
|
int rawbyte = 0;
|
||||||
mbstate_t mbs;
|
mbstate_t mbs;
|
||||||
char *laststart;
|
char *laststart;
|
||||||
size_t ret;
|
size_t ret;
|
||||||
|
@ -6412,9 +6413,11 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
||||||
wi = WEOF;
|
wi = WEOF;
|
||||||
if (wi != WEOF)
|
if (wi != WEOF)
|
||||||
delim = (wchar_t)wi;
|
delim = (wchar_t)wi;
|
||||||
else
|
else {
|
||||||
delim = (wchar_t) (unsigned char) ((delimstr[0] == Meta) ?
|
delim = (wchar_t) (unsigned char) ((delimstr[0] == Meta) ?
|
||||||
delimstr[1] ^ 32 : delimstr[0]);
|
delimstr[1] ^ 32 : delimstr[0]);
|
||||||
|
rawbyte = 1;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
delim = (unsigned char) ((delimstr[0] == Meta) ?
|
delim = (unsigned char) ((delimstr[0] == Meta) ?
|
||||||
delimstr[1] ^ 32 : delimstr[0]);
|
delimstr[1] ^ 32 : delimstr[0]);
|
||||||
|
@ -6842,7 +6845,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*bptr = (char)c;
|
*bptr = (char)c;
|
||||||
if (isset(MULTIBYTE)) {
|
if (isset(MULTIBYTE) && !rawbyte) {
|
||||||
ret = mbrtowc(&wc, bptr, 1, &mbs);
|
ret = mbrtowc(&wc, bptr, 1, &mbs);
|
||||||
if (!ret) /* NULL */
|
if (!ret) /* NULL */
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
|
|
@ -82,6 +82,10 @@
|
||||||
>Testing the
|
>Testing the
|
||||||
>null hypothesis
|
>null hypothesis
|
||||||
|
|
||||||
|
read -ed '' <<<$'one\0two'
|
||||||
|
0:empty delimiter terminates at nulls
|
||||||
|
>one
|
||||||
|
|
||||||
print -n $'first line\x80second line\x80' |
|
print -n $'first line\x80second line\x80' |
|
||||||
while read -d $'\x80' line; do print $line; done
|
while read -d $'\x80' line; do print $line; done
|
||||||
0:read with a delimiter >= 0x80
|
0:read with a delimiter >= 0x80
|
||||||
|
|
|
@ -212,6 +212,20 @@
|
||||||
>first
|
>first
|
||||||
>second
|
>second
|
||||||
|
|
||||||
|
read -ed £
|
||||||
|
0:read with multibyte delimiter where bytes of delimiter also occur in input
|
||||||
|
<one¤twoãthree£four
|
||||||
|
>one¤twoãthree
|
||||||
|
|
||||||
|
read -ed $'\xa0' <<<$'first\xa0second'
|
||||||
|
0:read delimited by a byte that isn't a valid multibyte character
|
||||||
|
>first
|
||||||
|
|
||||||
|
read -ed $'\xc2'
|
||||||
|
0:read delimited by a single byte terminates if the byte is part of a multibyte character
|
||||||
|
<one£two
|
||||||
|
>one
|
||||||
|
|
||||||
(IFS=«
|
(IFS=«
|
||||||
read -d » -A array
|
read -d » -A array
|
||||||
print -l $array)
|
print -l $array)
|
||||||
|
|
Loading…
Reference in a new issue