mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-05-18 21:51:02 +02:00
52169: a few more improvements of (#) flag
fix (#X) in C locale in FreeBSD, DragonFly, NetBSD. Negative values such as ${(#X):--1} are now error. UCS4 is limited to < 0x8000_0000 (in OSes without __STDC_ISO_10646__).
This commit is contained in:
parent
e4e9afe373
commit
02e33c54d8
3 changed files with 126 additions and 125 deletions
|
@ -1,3 +1,7 @@
|
|||
2023-09-27 Jun-ichi Takimoto <takimoto-j@kba.biglobe.ne.jp>
|
||||
|
||||
* 52169: Src/subst.c, Src/utils.c: a few more improvemets of (#)
|
||||
|
||||
2023-09-23 Bart Schaefer <schaefer@BartMAC2014.local>
|
||||
|
||||
* 52180: Doc/Zsh/expn.yo: clarify array behavior of ${|var|...}
|
||||
|
|
17
Src/subst.c
17
Src/subst.c
|
@ -1501,16 +1501,15 @@ substevalchar(char *ptr)
|
|||
return noerrs ? dupstring(""): NULL;
|
||||
}
|
||||
errflag |= saved_errflag;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE) && ires > 127) {
|
||||
/* '\\' + 'U' + 8 bytes of character + '\0' */
|
||||
char buf[11];
|
||||
|
||||
/* inefficient: should separate out \U handling from getkeystring */
|
||||
sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu);
|
||||
ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL);
|
||||
if (ires < 0) {
|
||||
zerr("character not in range");
|
||||
}
|
||||
if (len == 0)
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
else if (isset(MULTIBYTE) && ires > 127) {
|
||||
ptr = zhalloc(MB_CUR_MAX);
|
||||
len = ucs4tomb((unsigned int)ires & 0xffffffff, ptr);
|
||||
}
|
||||
if (len <= 0)
|
||||
#endif
|
||||
{
|
||||
ptr = zhalloc(2);
|
||||
|
|
230
Src/utils.c
230
Src/utils.c
|
@ -6672,11 +6672,14 @@ dquotedzputs(char const *s, FILE *stream)
|
|||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && !defined(__STDC_ISO_10646__)
|
||||
/* Convert a character from UCS4 encoding to UTF-8 */
|
||||
|
||||
static size_t
|
||||
static int
|
||||
ucs4toutf8(char *dest, unsigned int wval)
|
||||
{
|
||||
size_t len;
|
||||
int len;
|
||||
|
||||
/* UCS4 is now equvalent to UTF-32 and limited to 0 - 0x10_FFFF.
|
||||
* This function accepts 0 - 0x7FFF_FFFF (old range of UCS4) to be
|
||||
* compatible with wctomb(3) (in UTF-8 locale) on Linux. */
|
||||
if (wval < 0x80)
|
||||
len = 1;
|
||||
else if (wval < 0x800)
|
||||
|
@ -6687,8 +6690,12 @@ ucs4toutf8(char *dest, unsigned int wval)
|
|||
len = 4;
|
||||
else if (wval < 0x4000000)
|
||||
len = 5;
|
||||
else
|
||||
else if (wval < 0x80000000)
|
||||
len = 6;
|
||||
else {
|
||||
zerr("character not in range");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (len) { /* falls through except to the last case */
|
||||
case 6: dest[5] = (wval & 0x3f) | 0x80; wval >>= 6;
|
||||
|
@ -6705,30 +6712,89 @@ ucs4toutf8(char *dest, unsigned int wval)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Convert UCS4 to a multibyte character in current locale.
|
||||
* Result is saved in buf (must be at least MB_CUR_MAX bytes long).
|
||||
* Returns the number of bytes saved in buf, or -1 if conversion fails. */
|
||||
|
||||
/*
|
||||
* The following only occurs once or twice in the code, but in different
|
||||
* places depending how character set conversion is implemented.
|
||||
*/
|
||||
#define CHARSET_FAILED() \
|
||||
if (how & GETKEY_DOLLAR_QUOTE) { \
|
||||
while ((*tdest++ = *++s)) { \
|
||||
if (how & GETKEY_UPDATE_OFFSET) { \
|
||||
if (s - sstart > *misc) \
|
||||
(*misc)++; \
|
||||
} \
|
||||
if (*s == Snull) { \
|
||||
*len = (s - sstart) + 1; \
|
||||
*tdest = '\0'; \
|
||||
return buf; \
|
||||
} \
|
||||
} \
|
||||
*len = tdest - buf; \
|
||||
return buf; \
|
||||
} \
|
||||
*t = '\0'; \
|
||||
*len = t - buf; \
|
||||
return buf
|
||||
/**/
|
||||
int
|
||||
ucs4tomb(unsigned int wval, char *buf)
|
||||
{
|
||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
|
||||
int count = wctomb(buf, (wchar_t)wval);
|
||||
if (count == -1)
|
||||
zerr("character not in range");
|
||||
return count;
|
||||
#else /* !(HAVE_WCHAR_H && HAVE_WCTOMB && __STDC_ISO_10646__) */
|
||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
|
||||
return ucs4toutf8(buf, wval);
|
||||
} else {
|
||||
# ifdef HAVE_ICONV
|
||||
iconv_t cd;
|
||||
char inbuf[4], *bsave = buf;
|
||||
ICONV_CONST char *inptr = inbuf;
|
||||
size_t inbytes = 4, outbytes = 6;
|
||||
const char *codesetstr = nl_langinfo(CODESET);
|
||||
size_t count;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If the code set isn't handled, we'd better assume it's US-ASCII
|
||||
* rather than just failing hopelessly. Solaris has a weird habit
|
||||
* of returning 646. This is handled by the native iconv(), but
|
||||
* not by GNU iconv; what's more, some versions of the native iconv
|
||||
* don't handle standard names like ASCII.
|
||||
*
|
||||
* This should only be a problem if there's a mismatch between the
|
||||
* NLS and the iconv in use, which probably only means if libiconv
|
||||
* is in use. We checked at configure time if our libraries pulled
|
||||
* in _libiconv_version, which should be a good test.
|
||||
*
|
||||
* It shouldn't ever be NULL, but while we're being paranoid...
|
||||
*/
|
||||
# ifdef ICONV_FROM_LIBICONV
|
||||
if (!codesetstr || !*codesetstr)
|
||||
codesetstr = "US-ASCII";
|
||||
# endif
|
||||
cd = iconv_open(codesetstr, "UCS-4BE");
|
||||
# ifdef ICONV_FROM_LIBICONV
|
||||
if (cd == (iconv_t)-1 && !strcmp(codesetstr, "646")) {
|
||||
codesetstr = "US-ASCII";
|
||||
cd = iconv_open(codesetstr, "UCS-4BE");
|
||||
}
|
||||
# endif
|
||||
if (cd == (iconv_t)-1) {
|
||||
zerr("cannot do charset conversion (iconv failed)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* store value in big endian form */
|
||||
for (i=3; i>=0; i--) {
|
||||
inbuf[i] = wval & 0xff;
|
||||
wval >>= 8;
|
||||
}
|
||||
count = iconv(cd, &inptr, &inbytes, &buf, &outbytes);
|
||||
iconv_close(cd);
|
||||
if (count) {
|
||||
/* -1 indicates error. Positive value means number of "invalid"
|
||||
* (or "non-reversible") conversions, which we consider as
|
||||
* "out-of-range" characters. */
|
||||
zerr("character not in range");
|
||||
return -1;
|
||||
}
|
||||
return buf - bsave;
|
||||
# else /* !HAVE_ICONV */
|
||||
zerr("cannot do charset conversion (iconv not available)");
|
||||
return -1;
|
||||
# endif /* HAVE_ICONV */
|
||||
}
|
||||
# else /* !(HAVE_NL_LANGINFO && CODESET) */
|
||||
zerr("cannot do charset conversion (NLS not supported)");
|
||||
return -1;
|
||||
# endif /* HAVE_NL_LANGINFO && CODESET */
|
||||
#endif /* HAVE_WCHAR_H && HAVE_WCTOMB && __STDC_ISO_10646__ */
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a key string, turning it into the literal characters.
|
||||
|
@ -6785,21 +6851,6 @@ getkeystring(char *s, int *len, int how, int *misc)
|
|||
char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf = NULL;
|
||||
char svchar = '\0';
|
||||
int meta = 0, control = 0, ignoring = 0;
|
||||
int i;
|
||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
|
||||
wint_t wval;
|
||||
int count;
|
||||
#else
|
||||
unsigned int wval;
|
||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||
# if defined(HAVE_ICONV)
|
||||
iconv_t cd;
|
||||
char inbuf[4];
|
||||
size_t inbytes, outbytes;
|
||||
# endif
|
||||
size_t count;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
DPUTS((how & GETKEY_UPDATE_OFFSET) &&
|
||||
(how & ~(GETKEYS_DOLLARS_QUOTE|GETKEY_UPDATE_OFFSET)),
|
||||
|
@ -6864,7 +6915,8 @@ getkeystring(char *s, int *len, int how, int *misc)
|
|||
}
|
||||
for (; *s; s++) {
|
||||
if (*s == '\\' && s[1]) {
|
||||
int miscadded;
|
||||
int miscadded, count, i;
|
||||
unsigned int wval;
|
||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
|
||||
(*misc)--;
|
||||
miscadded = 1;
|
||||
|
@ -6979,86 +7031,32 @@ getkeystring(char *s, int *len, int how, int *misc)
|
|||
*misc = wval;
|
||||
return s+1;
|
||||
}
|
||||
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
|
||||
count = wctomb(t, (wchar_t)wval);
|
||||
count = ucs4tomb(wval, t);
|
||||
if (count == -1) {
|
||||
zerr("character not in range");
|
||||
CHARSET_FAILED();
|
||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||
while ((*tdest++ = *++s)) {
|
||||
if (how & GETKEY_UPDATE_OFFSET) {
|
||||
if (s - sstart > *misc)
|
||||
(*misc)++;
|
||||
}
|
||||
if (*s == Snull) {
|
||||
*len = (s - sstart) + 1;
|
||||
*tdest = '\0';
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
*len = tdest - buf;
|
||||
}
|
||||
else {
|
||||
*t = '\0';
|
||||
*len = t - buf;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||
(*misc) += count;
|
||||
t += count;
|
||||
# else
|
||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
|
||||
count = ucs4toutf8(t, wval);
|
||||
t += count;
|
||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||
(*misc) += count;
|
||||
} else {
|
||||
# ifdef HAVE_ICONV
|
||||
ICONV_CONST char *inptr = inbuf;
|
||||
const char *codesetstr = nl_langinfo(CODESET);
|
||||
inbytes = 4;
|
||||
outbytes = 6;
|
||||
/* store value in big endian form */
|
||||
for (i=3;i>=0;i--) {
|
||||
inbuf[i] = wval & 0xff;
|
||||
wval >>= 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the code set isn't handled, we'd better
|
||||
* assume it's US-ASCII rather than just failing
|
||||
* hopelessly. Solaris has a weird habit of
|
||||
* returning 646. This is handled by the
|
||||
* native iconv(), but not by GNU iconv; what's
|
||||
* more, some versions of the native iconv don't
|
||||
* handle standard names like ASCII.
|
||||
*
|
||||
* This should only be a problem if there's a
|
||||
* mismatch between the NLS and the iconv in use,
|
||||
* which probably only means if libiconv is in use.
|
||||
* We checked at configure time if our libraries
|
||||
* pulled in _libiconv_version, which should be
|
||||
* a good test.
|
||||
*
|
||||
* It shouldn't ever be NULL, but while we're
|
||||
* being paranoid...
|
||||
*/
|
||||
#ifdef ICONV_FROM_LIBICONV
|
||||
if (!codesetstr || !*codesetstr)
|
||||
codesetstr = "US-ASCII";
|
||||
#endif
|
||||
cd = iconv_open(codesetstr, "UCS-4BE");
|
||||
#ifdef ICONV_FROM_LIBICONV
|
||||
if (cd == (iconv_t)-1 && !strcmp(codesetstr, "646")) {
|
||||
codesetstr = "US-ASCII";
|
||||
cd = iconv_open(codesetstr, "UCS-4BE");
|
||||
}
|
||||
#endif
|
||||
if (cd == (iconv_t)-1) {
|
||||
zerr("cannot do charset conversion (iconv failed)");
|
||||
CHARSET_FAILED();
|
||||
}
|
||||
count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
|
||||
iconv_close(cd);
|
||||
if (count == (size_t)-1) {
|
||||
zerr("character not in range");
|
||||
CHARSET_FAILED();
|
||||
}
|
||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||
(*misc) += count;
|
||||
# else
|
||||
zerr("cannot do charset conversion (iconv not available)");
|
||||
CHARSET_FAILED();
|
||||
# endif
|
||||
}
|
||||
# else
|
||||
zerr("cannot do charset conversion (NLS not supported)");
|
||||
CHARSET_FAILED();
|
||||
# endif
|
||||
# endif
|
||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||
char *t2;
|
||||
for (t2 = tbuf; t2 < t; t2++) {
|
||||
|
|
Loading…
Reference in a new issue