mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-06-11 07:08:07 +02:00
52169: a few more improvements of (#) flag
fix (#X) in C locale in FreeBSD, DragonFly, NetBSD. Negative values such as ${(#X):--1} are now error. UCS4 is limited to < 0x8000_0000 (in OSes without __STDC_ISO_10646__).
This commit is contained in:
parent
e4e9afe373
commit
02e33c54d8
3 changed files with 126 additions and 125 deletions
|
@ -1,3 +1,7 @@
|
||||||
|
2023-09-27 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||||
|
|
||||||
|
* 52169: Src/subst.c, Src/utils.c: a few more improvemets of (#)
|
||||||
|
|
||||||
2023-09-23 Bart Schaefer <schaefer@BartMAC2014.local>
|
2023-09-23 Bart Schaefer <schaefer@BartMAC2014.local>
|
||||||
|
|
||||||
* 52180: Doc/Zsh/expn.yo: clarify array behavior of ${|var|...}
|
* 52180: Doc/Zsh/expn.yo: clarify array behavior of ${|var|...}
|
||||||
|
|
17
Src/subst.c
17
Src/subst.c
|
@ -1501,16 +1501,15 @@ substevalchar(char *ptr)
|
||||||
return noerrs ? dupstring(""): NULL;
|
return noerrs ? dupstring(""): NULL;
|
||||||
}
|
}
|
||||||
errflag |= saved_errflag;
|
errflag |= saved_errflag;
|
||||||
#ifdef MULTIBYTE_SUPPORT
|
if (ires < 0) {
|
||||||
if (isset(MULTIBYTE) && ires > 127) {
|
zerr("character not in range");
|
||||||
/* '\\' + 'U' + 8 bytes of character + '\0' */
|
|
||||||
char buf[11];
|
|
||||||
|
|
||||||
/* inefficient: should separate out \U handling from getkeystring */
|
|
||||||
sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu);
|
|
||||||
ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL);
|
|
||||||
}
|
}
|
||||||
if (len == 0)
|
#ifdef MULTIBYTE_SUPPORT
|
||||||
|
else if (isset(MULTIBYTE) && ires > 127) {
|
||||||
|
ptr = zhalloc(MB_CUR_MAX);
|
||||||
|
len = ucs4tomb((unsigned int)ires & 0xffffffff, ptr);
|
||||||
|
}
|
||||||
|
if (len <= 0)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
ptr = zhalloc(2);
|
ptr = zhalloc(2);
|
||||||
|
|
230
Src/utils.c
230
Src/utils.c
|
@ -6672,11 +6672,14 @@ dquotedzputs(char const *s, FILE *stream)
|
||||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && !defined(__STDC_ISO_10646__)
|
# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && !defined(__STDC_ISO_10646__)
|
||||||
/* Convert a character from UCS4 encoding to UTF-8 */
|
/* Convert a character from UCS4 encoding to UTF-8 */
|
||||||
|
|
||||||
static size_t
|
static int
|
||||||
ucs4toutf8(char *dest, unsigned int wval)
|
ucs4toutf8(char *dest, unsigned int wval)
|
||||||
{
|
{
|
||||||
size_t len;
|
int len;
|
||||||
|
|
||||||
|
/* UCS4 is now equvalent to UTF-32 and limited to 0 - 0x10_FFFF.
|
||||||
|
* This function accepts 0 - 0x7FFF_FFFF (old range of UCS4) to be
|
||||||
|
* compatible with wctomb(3) (in UTF-8 locale) on Linux. */
|
||||||
if (wval < 0x80)
|
if (wval < 0x80)
|
||||||
len = 1;
|
len = 1;
|
||||||
else if (wval < 0x800)
|
else if (wval < 0x800)
|
||||||
|
@ -6687,8 +6690,12 @@ ucs4toutf8(char *dest, unsigned int wval)
|
||||||
len = 4;
|
len = 4;
|
||||||
else if (wval < 0x4000000)
|
else if (wval < 0x4000000)
|
||||||
len = 5;
|
len = 5;
|
||||||
else
|
else if (wval < 0x80000000)
|
||||||
len = 6;
|
len = 6;
|
||||||
|
else {
|
||||||
|
zerr("character not in range");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
switch (len) { /* falls through except to the last case */
|
switch (len) { /* falls through except to the last case */
|
||||||
case 6: dest[5] = (wval & 0x3f) | 0x80; wval >>= 6;
|
case 6: dest[5] = (wval & 0x3f) | 0x80; wval >>= 6;
|
||||||
|
@ -6705,30 +6712,89 @@ ucs4toutf8(char *dest, unsigned int wval)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Convert UCS4 to a multibyte character in current locale.
|
||||||
|
* Result is saved in buf (must be at least MB_CUR_MAX bytes long).
|
||||||
|
* Returns the number of bytes saved in buf, or -1 if conversion fails. */
|
||||||
|
|
||||||
/*
|
/**/
|
||||||
* The following only occurs once or twice in the code, but in different
|
int
|
||||||
* places depending how character set conversion is implemented.
|
ucs4tomb(unsigned int wval, char *buf)
|
||||||
*/
|
{
|
||||||
#define CHARSET_FAILED() \
|
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
|
||||||
if (how & GETKEY_DOLLAR_QUOTE) { \
|
int count = wctomb(buf, (wchar_t)wval);
|
||||||
while ((*tdest++ = *++s)) { \
|
if (count == -1)
|
||||||
if (how & GETKEY_UPDATE_OFFSET) { \
|
zerr("character not in range");
|
||||||
if (s - sstart > *misc) \
|
return count;
|
||||||
(*misc)++; \
|
#else /* !(HAVE_WCHAR_H && HAVE_WCTOMB && __STDC_ISO_10646__) */
|
||||||
} \
|
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||||
if (*s == Snull) { \
|
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
|
||||||
*len = (s - sstart) + 1; \
|
return ucs4toutf8(buf, wval);
|
||||||
*tdest = '\0'; \
|
} else {
|
||||||
return buf; \
|
# ifdef HAVE_ICONV
|
||||||
} \
|
iconv_t cd;
|
||||||
} \
|
char inbuf[4], *bsave = buf;
|
||||||
*len = tdest - buf; \
|
ICONV_CONST char *inptr = inbuf;
|
||||||
return buf; \
|
size_t inbytes = 4, outbytes = 6;
|
||||||
} \
|
const char *codesetstr = nl_langinfo(CODESET);
|
||||||
*t = '\0'; \
|
size_t count;
|
||||||
*len = t - buf; \
|
int i;
|
||||||
return buf
|
|
||||||
|
/*
|
||||||
|
* If the code set isn't handled, we'd better assume it's US-ASCII
|
||||||
|
* rather than just failing hopelessly. Solaris has a weird habit
|
||||||
|
* of returning 646. This is handled by the native iconv(), but
|
||||||
|
* not by GNU iconv; what's more, some versions of the native iconv
|
||||||
|
* don't handle standard names like ASCII.
|
||||||
|
*
|
||||||
|
* This should only be a problem if there's a mismatch between the
|
||||||
|
* NLS and the iconv in use, which probably only means if libiconv
|
||||||
|
* is in use. We checked at configure time if our libraries pulled
|
||||||
|
* in _libiconv_version, which should be a good test.
|
||||||
|
*
|
||||||
|
* It shouldn't ever be NULL, but while we're being paranoid...
|
||||||
|
*/
|
||||||
|
# ifdef ICONV_FROM_LIBICONV
|
||||||
|
if (!codesetstr || !*codesetstr)
|
||||||
|
codesetstr = "US-ASCII";
|
||||||
|
# endif
|
||||||
|
cd = iconv_open(codesetstr, "UCS-4BE");
|
||||||
|
# ifdef ICONV_FROM_LIBICONV
|
||||||
|
if (cd == (iconv_t)-1 && !strcmp(codesetstr, "646")) {
|
||||||
|
codesetstr = "US-ASCII";
|
||||||
|
cd = iconv_open(codesetstr, "UCS-4BE");
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
if (cd == (iconv_t)-1) {
|
||||||
|
zerr("cannot do charset conversion (iconv failed)");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* store value in big endian form */
|
||||||
|
for (i=3; i>=0; i--) {
|
||||||
|
inbuf[i] = wval & 0xff;
|
||||||
|
wval >>= 8;
|
||||||
|
}
|
||||||
|
count = iconv(cd, &inptr, &inbytes, &buf, &outbytes);
|
||||||
|
iconv_close(cd);
|
||||||
|
if (count) {
|
||||||
|
/* -1 indicates error. Positive value means number of "invalid"
|
||||||
|
* (or "non-reversible") conversions, which we consider as
|
||||||
|
* "out-of-range" characters. */
|
||||||
|
zerr("character not in range");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return buf - bsave;
|
||||||
|
# else /* !HAVE_ICONV */
|
||||||
|
zerr("cannot do charset conversion (iconv not available)");
|
||||||
|
return -1;
|
||||||
|
# endif /* HAVE_ICONV */
|
||||||
|
}
|
||||||
|
# else /* !(HAVE_NL_LANGINFO && CODESET) */
|
||||||
|
zerr("cannot do charset conversion (NLS not supported)");
|
||||||
|
return -1;
|
||||||
|
# endif /* HAVE_NL_LANGINFO && CODESET */
|
||||||
|
#endif /* HAVE_WCHAR_H && HAVE_WCTOMB && __STDC_ISO_10646__ */
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decode a key string, turning it into the literal characters.
|
* Decode a key string, turning it into the literal characters.
|
||||||
|
@ -6785,21 +6851,6 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf = NULL;
|
char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf = NULL;
|
||||||
char svchar = '\0';
|
char svchar = '\0';
|
||||||
int meta = 0, control = 0, ignoring = 0;
|
int meta = 0, control = 0, ignoring = 0;
|
||||||
int i;
|
|
||||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
|
|
||||||
wint_t wval;
|
|
||||||
int count;
|
|
||||||
#else
|
|
||||||
unsigned int wval;
|
|
||||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
|
||||||
# if defined(HAVE_ICONV)
|
|
||||||
iconv_t cd;
|
|
||||||
char inbuf[4];
|
|
||||||
size_t inbytes, outbytes;
|
|
||||||
# endif
|
|
||||||
size_t count;
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
DPUTS((how & GETKEY_UPDATE_OFFSET) &&
|
DPUTS((how & GETKEY_UPDATE_OFFSET) &&
|
||||||
(how & ~(GETKEYS_DOLLARS_QUOTE|GETKEY_UPDATE_OFFSET)),
|
(how & ~(GETKEYS_DOLLARS_QUOTE|GETKEY_UPDATE_OFFSET)),
|
||||||
|
@ -6864,7 +6915,8 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
}
|
}
|
||||||
for (; *s; s++) {
|
for (; *s; s++) {
|
||||||
if (*s == '\\' && s[1]) {
|
if (*s == '\\' && s[1]) {
|
||||||
int miscadded;
|
int miscadded, count, i;
|
||||||
|
unsigned int wval;
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
|
||||||
(*misc)--;
|
(*misc)--;
|
||||||
miscadded = 1;
|
miscadded = 1;
|
||||||
|
@ -6979,86 +7031,32 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
*misc = wval;
|
*misc = wval;
|
||||||
return s+1;
|
return s+1;
|
||||||
}
|
}
|
||||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
|
count = ucs4tomb(wval, t);
|
||||||
count = wctomb(t, (wchar_t)wval);
|
|
||||||
if (count == -1) {
|
if (count == -1) {
|
||||||
zerr("character not in range");
|
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||||
CHARSET_FAILED();
|
while ((*tdest++ = *++s)) {
|
||||||
|
if (how & GETKEY_UPDATE_OFFSET) {
|
||||||
|
if (s - sstart > *misc)
|
||||||
|
(*misc)++;
|
||||||
|
}
|
||||||
|
if (*s == Snull) {
|
||||||
|
*len = (s - sstart) + 1;
|
||||||
|
*tdest = '\0';
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*len = tdest - buf;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*t = '\0';
|
||||||
|
*len = t - buf;
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||||
(*misc) += count;
|
(*misc) += count;
|
||||||
t += count;
|
t += count;
|
||||||
# else
|
|
||||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
|
||||||
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
|
|
||||||
count = ucs4toutf8(t, wval);
|
|
||||||
t += count;
|
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
|
||||||
(*misc) += count;
|
|
||||||
} else {
|
|
||||||
# ifdef HAVE_ICONV
|
|
||||||
ICONV_CONST char *inptr = inbuf;
|
|
||||||
const char *codesetstr = nl_langinfo(CODESET);
|
|
||||||
inbytes = 4;
|
|
||||||
outbytes = 6;
|
|
||||||
/* store value in big endian form */
|
|
||||||
for (i=3;i>=0;i--) {
|
|
||||||
inbuf[i] = wval & 0xff;
|
|
||||||
wval >>= 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the code set isn't handled, we'd better
|
|
||||||
* assume it's US-ASCII rather than just failing
|
|
||||||
* hopelessly. Solaris has a weird habit of
|
|
||||||
* returning 646. This is handled by the
|
|
||||||
* native iconv(), but not by GNU iconv; what's
|
|
||||||
* more, some versions of the native iconv don't
|
|
||||||
* handle standard names like ASCII.
|
|
||||||
*
|
|
||||||
* This should only be a problem if there's a
|
|
||||||
* mismatch between the NLS and the iconv in use,
|
|
||||||
* which probably only means if libiconv is in use.
|
|
||||||
* We checked at configure time if our libraries
|
|
||||||
* pulled in _libiconv_version, which should be
|
|
||||||
* a good test.
|
|
||||||
*
|
|
||||||
* It shouldn't ever be NULL, but while we're
|
|
||||||
* being paranoid...
|
|
||||||
*/
|
|
||||||
#ifdef ICONV_FROM_LIBICONV
|
|
||||||
if (!codesetstr || !*codesetstr)
|
|
||||||
codesetstr = "US-ASCII";
|
|
||||||
#endif
|
|
||||||
cd = iconv_open(codesetstr, "UCS-4BE");
|
|
||||||
#ifdef ICONV_FROM_LIBICONV
|
|
||||||
if (cd == (iconv_t)-1 && !strcmp(codesetstr, "646")) {
|
|
||||||
codesetstr = "US-ASCII";
|
|
||||||
cd = iconv_open(codesetstr, "UCS-4BE");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (cd == (iconv_t)-1) {
|
|
||||||
zerr("cannot do charset conversion (iconv failed)");
|
|
||||||
CHARSET_FAILED();
|
|
||||||
}
|
|
||||||
count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
|
|
||||||
iconv_close(cd);
|
|
||||||
if (count == (size_t)-1) {
|
|
||||||
zerr("character not in range");
|
|
||||||
CHARSET_FAILED();
|
|
||||||
}
|
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
|
||||||
(*misc) += count;
|
|
||||||
# else
|
|
||||||
zerr("cannot do charset conversion (iconv not available)");
|
|
||||||
CHARSET_FAILED();
|
|
||||||
# endif
|
|
||||||
}
|
|
||||||
# else
|
|
||||||
zerr("cannot do charset conversion (NLS not supported)");
|
|
||||||
CHARSET_FAILED();
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||||
char *t2;
|
char *t2;
|
||||||
for (t2 = tbuf; t2 < t; t2++) {
|
for (t2 = tbuf; t2 < t; t2++) {
|
||||||
|
|
Loading…
Reference in a new issue