mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-11-01 18:30:55 +01:00
27812: display invalid bytes in multibyte characters specially
This commit is contained in:
parent
08dec290d4
commit
b16923b096
5 changed files with 82 additions and 14 deletions
|
|
@ -1,3 +1,10 @@
|
|||
2010-03-22 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 27812: Doc/Zsh/zle.yo, Src/Zle/zle.h, Src/Zle/zle_refresh.c,
|
||||
Src/Zle/zle_utils.c: when wchar_t contains Unicode code points,
|
||||
use private area to put bytes that don't form characters for
|
||||
special display.
|
||||
|
||||
2010-03-22 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 27822: Src/hist.c, Src/lex.c, Src/zle_params.c,
|
||||
|
|
@ -12949,5 +12956,5 @@
|
|||
|
||||
*****************************************************
|
||||
* This is used by the shell to define $ZSH_PATCHLEVEL
|
||||
* $Revision: 1.4941 $
|
||||
* $Revision: 1.4942 $
|
||||
*****************************************************
|
||||
|
|
|
|||
|
|
@ -2286,6 +2286,20 @@ angle brackets. The number is the code point of the character in the wide
|
|||
character set; this may or may not be Unicode, depending on the operating
|
||||
system.
|
||||
)
|
||||
item(Invalid multibyte characters)(
|
||||
If the tt(MULTIBYTE) option is in effect, any sequence of one or more
|
||||
bytes that does not form a valid character in the current character
|
||||
set is treated as a series of bytes each shown as a special character.
|
||||
This case can be distinguished from other unprintable characters
|
||||
as the bytes are represented as two hexadecimal digits between angle
|
||||
brackets, as distinct from the four or eight digits that are used for
|
||||
unprintable characters that are nonetheless valid in the current
|
||||
character set.
|
||||
|
||||
Not all systems support this: for it to work, the system's representation of
|
||||
wide characters must be code values from the Universal Character Set,
|
||||
as defined by IS0 10646 (also known as Unicode).
|
||||
)
|
||||
enditem()
|
||||
|
||||
If tt(zle_highlight) is not set or no value applies to a particular
|
||||
|
|
|
|||
|
|
@ -419,6 +419,20 @@ typedef struct {
|
|||
typedef REFRESH_ELEMENT *REFRESH_STRING;
|
||||
|
||||
|
||||
#if defined(MULTIBYTE_SUPPORT) && defined(__STDC_ISO_10646__)
|
||||
#define ZSH_INVALID_WCHAR_BASE (0xe000U)
|
||||
#define ZSH_INVALID_WCHAR_TEST(x) \
|
||||
((unsigned)(x) >= ZSH_INVALID_WCHAR_BASE && \
|
||||
(unsigned)(x) <= (ZSH_INVALID_WCHAR_BASE + 255u))
|
||||
#define ZSH_INVALID_WCHAR_TO_CHAR(x) \
|
||||
((char)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE))
|
||||
#define ZSH_INVALID_WCHAR_TO_INT(x) \
|
||||
((int)((unsigned)(x) - ZSH_INVALID_WCHAR_BASE))
|
||||
#define ZSH_CHAR_TO_INVALID_WCHAR(x) \
|
||||
((wchar_t)(STOUC(x) + ZSH_INVALID_WCHAR_BASE))
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
#define METACHECK() \
|
||||
DPUTS(zlemetaline == NULL, "line not metafied")
|
||||
|
|
|
|||
|
|
@ -1263,7 +1263,11 @@ zrefresh(void)
|
|||
}
|
||||
}
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
else if (iswprint(*t) && (width = WCWIDTH(*t)) > 0) {
|
||||
else if (
|
||||
#ifdef __STDC_ISO_10646__
|
||||
!ZSH_INVALID_WCHAR_TEST(*t) &&
|
||||
#endif
|
||||
iswprint(*t) && (width = WCWIDTH(*t)) > 0) {
|
||||
int ichars;
|
||||
if (width > rpms.sen - rpms.s) {
|
||||
int started = 0;
|
||||
|
|
@ -1367,6 +1371,12 @@ zrefresh(void)
|
|||
wchar_t wc;
|
||||
int started = 0;
|
||||
|
||||
#ifdef __STDC_ISO_10646__
|
||||
if (ZSH_INVALID_WCHAR_TEST(*t)) {
|
||||
int c = ZSH_INVALID_WCHAR_TO_INT(*t);
|
||||
sprintf(dispchars, "<%.02x>", c);
|
||||
} else
|
||||
#endif
|
||||
if ((unsigned)*t > 0xffffU) {
|
||||
sprintf(dispchars, "<%.08x>", (unsigned)*t);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -120,11 +120,19 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf)
|
|||
size_t ret;
|
||||
char *ptr;
|
||||
|
||||
ret = wctomb(buf, inchar);
|
||||
if (ret <= 0) {
|
||||
/* Ick. */
|
||||
buf[0] = '?';
|
||||
return 1;
|
||||
#ifdef __STDC_ISO_10646__
|
||||
if (ZSH_INVALID_WCHAR_TEST(inchar)) {
|
||||
buf[0] = ZSH_INVALID_WCHAR_TO_CHAR(inchar);
|
||||
ret = 1;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ret = wctomb(buf, inchar);
|
||||
if (ret <= 0) {
|
||||
/* Ick. */
|
||||
buf[0] = '?';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ptr = buf + ret - 1;
|
||||
for (;;) {
|
||||
|
|
@ -196,13 +204,20 @@ zlelineasstring(ZLE_STRING_T instr, int inll, int incs, int *outllp,
|
|||
for (i=0; i < inll; i++, incs--) {
|
||||
if (incs == 0)
|
||||
outcs = mb_len;
|
||||
j = wcrtomb(s + mb_len, instr[i], &mbs);
|
||||
if (j == -1) {
|
||||
/* invalid char; what to do? */
|
||||
s[mb_len++] = ZWC('?');
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
} else {
|
||||
mb_len += j;
|
||||
#ifdef __STDC_ISO_10646__
|
||||
if (ZSH_INVALID_WCHAR_TEST(instr[i])) {
|
||||
s[mb_len++] = ZSH_INVALID_WCHAR_TO_CHAR(instr[i]);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
j = wcrtomb(s + mb_len, instr[i], &mbs);
|
||||
if (j == -1) {
|
||||
/* invalid char */
|
||||
s[mb_len++] = ZWC('?');
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
} else {
|
||||
mb_len += j;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (incs == 0)
|
||||
|
|
@ -332,6 +347,13 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
|
|||
while (ll > 0) {
|
||||
size_t cnt = mbrtowc(outptr, inptr, ll, &mbs);
|
||||
|
||||
#ifdef __STDC_ISO_10646__
|
||||
if (cnt == MB_INCOMPLETE || cnt == MB_INVALID) {
|
||||
/* Use private encoding for invalid single byte */
|
||||
*outptr = ZSH_CHAR_TO_INVALID_WCHAR(*inptr);
|
||||
cnt = 1;
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* At this point we don't handle either incomplete (-2) or
|
||||
* invalid (-1) multibyte sequences. Use the current length
|
||||
|
|
@ -339,6 +361,7 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
|
|||
*/
|
||||
if (cnt == MB_INCOMPLETE || cnt == MB_INVALID)
|
||||
break;
|
||||
#endif
|
||||
|
||||
if (cnt == 0) {
|
||||
/* Converting '\0' returns 0, but a '\0' is a real
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue