1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-11-01 18:30:55 +01:00

27812: display invalid bytes in multibyte characters specially

This commit is contained in:
Peter Stephenson 2010-03-22 19:46:53 +00:00
parent 08dec290d4
commit b16923b096
5 changed files with 82 additions and 14 deletions

View file

@ -1,3 +1,10 @@
2010-03-22 Peter Stephenson <p.w.stephenson@ntlworld.com>
* 27812: Doc/Zsh/zle.yo, Src/Zle/zle.h, Src/Zle/zle_refresh.c,
Src/Zle/zle_utils.c: when wchar_t contains Unicode code points,
use private area to put bytes that don't form characters for
special display.
2010-03-22 Peter Stephenson <pws@csr.com>
* 27822: Src/hist.c, Src/lex.c, Src/zle_params.c,
@ -12949,5 +12956,5 @@
*****************************************************
* This is used by the shell to define $ZSH_PATCHLEVEL
* $Revision: 1.4941 $
* $Revision: 1.4942 $
*****************************************************

View file

@ -2286,6 +2286,20 @@ angle brackets. The number is the code point of the character in the wide
character set; this may or may not be Unicode, depending on the operating
system.
)
item(Invalid multibyte characters)(
If the tt(MULTIBYTE) option is in effect, any sequence of one or more
bytes that does not form a valid character in the current character
set is treated as a series of bytes each shown as a special character.
This case can be distinguished from other unprintable characters
as the bytes are represented as two hexadecimal digits between angle
brackets, as distinct from the four or eight digits that are used for
unprintable characters that are nonetheless valid in the current
character set.
Not all systems support this: for it to work, the system's representation of
wide characters must be code values from the Universal Character Set,
as defined by IS0 10646 (also known as Unicode).
)
enditem()
If tt(zle_highlight) is not set or no value applies to a particular

View file

@ -419,6 +419,20 @@ typedef struct {
typedef REFRESH_ELEMENT *REFRESH_STRING;
#if defined(MULTIBYTE_SUPPORT) && defined(__STDC_ISO_10646__)
#define ZSH_INVALID_WCHAR_BASE (0xe000U)
#define ZSH_INVALID_WCHAR_TEST(x) \
((unsigned)(x) >= ZSH_INVALID_WCHAR_BASE && \
(unsigned)(x) <= (ZSH_INVALID_WCHAR_BASE + 255u))
#define ZSH_INVALID_WCHAR_TO_CHAR(x) \
((char)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE))
#define ZSH_INVALID_WCHAR_TO_INT(x) \
((int)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE))
#define ZSH_CHAR_TO_INVALID_WCHAR(x) \
((wchar_t)(STOUC(x) + ZSH_INVALID_WCHAR_BASE))
#endif
#ifdef DEBUG
#define METACHECK() \
DPUTS(zlemetaline == NULL, "line not metafied")

View file

@ -1263,7 +1263,11 @@ zrefresh(void)
}
}
#ifdef MULTIBYTE_SUPPORT
else if (iswprint(*t) && (width = WCWIDTH(*t)) > 0) {
else if (
#ifdef __STDC_ISO_10646__
!ZSH_INVALID_WCHAR_TEST(*t) &&
#endif
iswprint(*t) && (width = WCWIDTH(*t)) > 0) {
int ichars;
if (width > rpms.sen - rpms.s) {
int started = 0;
@ -1367,6 +1371,12 @@ zrefresh(void)
wchar_t wc;
int started = 0;
#ifdef __STDC_ISO_10646__
if (ZSH_INVALID_WCHAR_TEST(*t)) {
int c = ZSH_INVALID_WCHAR_TO_INT(*t);
sprintf(dispchars, "<%.02x>", c);
} else
#endif
if ((unsigned)*t > 0xffffU) {
sprintf(dispchars, "<%.08x>", (unsigned)*t);
} else {

View file

@ -120,11 +120,19 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf)
size_t ret;
char *ptr;
ret = wctomb(buf, inchar);
if (ret <= 0) {
/* Ick. */
buf[0] = '?';
return 1;
#ifdef __STDC_ISO_10646__
if (ZSH_INVALID_WCHAR_TEST(inchar)) {
buf[0] = ZSH_INVALID_WCHAR_TO_CHAR(inchar);
ret = 1;
} else
#endif
{
ret = wctomb(buf, inchar);
if (ret <= 0) {
/* Ick. */
buf[0] = '?';
return 1;
}
}
ptr = buf + ret - 1;
for (;;) {
@ -196,13 +204,20 @@ zlelineasstring(ZLE_STRING_T instr, int inll, int incs, int *outllp,
for (i=0; i < inll; i++, incs--) {
if (incs == 0)
outcs = mb_len;
j = wcrtomb(s + mb_len, instr[i], &mbs);
if (j == -1) {
/* invalid char; what to do? */
s[mb_len++] = ZWC('?');
memset(&mbs, 0, sizeof(mbs));
} else {
mb_len += j;
#ifdef __STDC_ISO_10646__
if (ZSH_INVALID_WCHAR_TEST(instr[i])) {
s[mb_len++] = ZSH_INVALID_WCHAR_TO_CHAR(instr[i]);
} else
#endif
{
j = wcrtomb(s + mb_len, instr[i], &mbs);
if (j == -1) {
/* invalid char */
s[mb_len++] = ZWC('?');
memset(&mbs, 0, sizeof(mbs));
} else {
mb_len += j;
}
}
}
if (incs == 0)
@ -332,6 +347,13 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
while (ll > 0) {
size_t cnt = mbrtowc(outptr, inptr, ll, &mbs);
#ifdef __STDC_ISO_10646__
if (cnt == MB_INCOMPLETE || cnt == MB_INVALID) {
/* Use private encoding for invalid single byte */
*outptr = ZSH_CHAR_TO_INVALID_WCHAR(*inptr);
cnt = 1;
}
#else
/*
* At this point we don't handle either incomplete (-2) or
* invalid (-1) multibyte sequences. Use the current length
@ -339,6 +361,7 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
*/
if (cnt == MB_INCOMPLETE || cnt == MB_INVALID)
break;
#endif
if (cnt == 0) {
/* Converting '\0' returns 0, but a '\0' is a real