1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-10-29 05:21:00 +01:00

18525: add manual UTF-8 conversion so \u and \U should work on more systems

This commit is contained in:
Oliver Kiddle 2003-05-13 12:50:26 +00:00
parent 8ee83d44ff
commit da74eb6338
2 changed files with 107 additions and 43 deletions

View file

@ -30,13 +30,15 @@
#include "zsh.mdh"
#include "utils.pro"
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
#include <wchar.h>
# ifndef __STDC_ISO_10646__
# if defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
# include <iconv.h>
# endif
# endif
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined (__STDC_ISO_10646__)
# include <wchar.h>
#else
# ifdef HAVE_LANGINFO_H
# include <langinfo.h>
# if defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
# include <iconv.h>
# endif
# endif
#endif
/* name of script being sourced */
@ -3271,6 +3273,42 @@ dquotedzputs(char const *s, FILE *stream)
}
#endif
# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && !defined(__STDC_ISO_10646__)
/* Convert a character from UCS4 encoding to UTF-8 */
size_t
ucs4toutf8(char *dest, unsigned int wval)
{
size_t len;
if (wval < 0x80)
len = 1;
else if (wval < 0x800)
len = 2;
else if (wval < 0x10000)
len = 3;
else if (wval < 0x200000)
len = 4;
else if (wval < 0x4000000)
len = 5;
else
len = 6;
switch (len) { /* falls through except to the last case */
case 6: dest[5] = (wval & 0x3f) | 0x80; wval >>= 6;
case 5: dest[4] = (wval & 0x3f) | 0x80; wval >>= 6;
case 4: dest[3] = (wval & 0x3f) | 0x80; wval >>= 6;
case 3: dest[2] = (wval & 0x3f) | 0x80; wval >>= 6;
case 2: dest[1] = (wval & 0x3f) | 0x80; wval >>= 6;
*dest = wval | (0xfc << (6 - len)) & 0xfc;
break;
case 1: *dest = wval;
}
return len;
}
#endif
/*
* Decode a key string, turning it into the literal characters.
* The length is returned in len.
@ -3299,18 +3337,18 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
char svchar = '\0';
int meta = 0, control = 0;
int i;
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
# ifdef __STDC_ISO_10646__
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
wint_t wval;
# elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
size_t count;
#else
unsigned int wval;
# if defined(HAVE_NL_LANGINFO) && defined(CODESET) && (defined(HAVE_ICONV) || defined(HAVE_LIBICONV))
iconv_t cd;
char inbuf[4];
wchar_t outbuf[1];
size_t inbytes, outbytes;
char *inptr, *outptr;
# endif
char *inptr;
size_t count;
# endif
#endif
if (fromwhere == 6)
@ -3387,8 +3425,6 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
*misc = 1;
break;
}
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB)
#if defined(__STDC_ISO_10646__) || defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
case 'u':
case 'U':
wval = 0;
@ -3407,33 +3443,8 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
*misc = wval;
return s+1;
}
#ifdef __STDC_ISO_10646__
#if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
count = wctomb(t, (wchar_t)wval);
#elif defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
inbytes = outbytes = 4;
inptr = inbuf;
outptr = (char *)outbuf;
/* assume big endian convention for UCS-4 */
for (i=3;i>=0;i--) {
inbuf[i] = wval & 0xff;
wval >>= 8;
}
cd = iconv_open("WCHAR_T", "ISO-10646");
if (cd == (iconv_t)-1) {
zerr("cannot do charset conversion", NULL, 0);
if (fromwhere == 4) {
for (u = t; (*u++ = *++s););
return t;
}
*t = '\0';
*len = t - buf;
return buf;
}
iconv(cd, (const char **)&inptr, &inbytes, &outptr, &outbytes);
iconv_close(cd);
count = wctomb(t, *outbuf);
#endif
if (count == (size_t)-1) {
zerr("character not in range", NULL, 0);
if (fromwhere == 4) {
@ -3446,8 +3457,56 @@ getkeystring(char *s, int *len, int fromwhere, int *misc)
}
t += count;
continue;
#endif
#endif
# else
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
t += ucs4toutf8(t, wval);
continue;
} else {
# if defined(HAVE_ICONV) || defined(HAVE_LIBICONV)
inbytes = 4;
outbytes = 6;
inptr = inbuf;
/* assume big endian convention for UCS-4 */
for (i=3;i>=0;i--) {
inbuf[i] = wval & 0xff;
wval >>= 8;
}
cd = iconv_open(nl_langinfo(CODESET), "ISO-10646");
if (cd == (iconv_t)-1) {
zerr("cannot do charset conversion", NULL, 0);
if (fromwhere == 4) {
for (u = t; (*u++ = *++s););
return t;
}
*t = '\0';
*len = t - buf;
return buf;
}
count = iconv(cd, (char **)&inptr, &inbytes, &t, &outbytes);
iconv_close(cd);
if (count == (size_t)-1) {
zerr("cannot do charset conversion", NULL, 0);
*t = '\0';
*len = t - buf;
return buf;
}
continue;
# else
zerr("cannot do charset conversion", NULL, 0);
*t = '\0';
*len = t - buf;
return buf;
# endif
}
# else
zerr("cannot do charset conversion", NULL, 0);
*t = '\0';
*len = t - buf;
return buf;
# endif
# endif
default:
def:
if ((idigit(*s) && *s < '8') || *s == 'x') {