mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-09-02 22:11:54 +02:00
21736: improve tests for word and identifier characters with multibyte input
This commit is contained in:
parent
ce43e4a22c
commit
409296e22f
7 changed files with 104 additions and 35 deletions
|
@ -1,3 +1,10 @@
|
|||
2005-09-20 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 21736: Src/init.c, Src/params.c, Src/pattern.c, Src/utils.c,
|
||||
Src/Zle/zle.h, Src/Zle/zle_main.c: Fix WORDCHARS to use multibyte
|
||||
characters; rationalise test for identifiers only to use ASCII
|
||||
characters; remove existing hack for 8-bit characters.
|
||||
|
||||
2005-09-19 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* unposted, c.f. 21735: Doc/Zsh/grammar.yo: document
|
||||
|
|
|
@ -66,12 +66,7 @@ typedef wint_t ZLE_INT_T;
|
|||
|
||||
#define ZC_iblank iswspace
|
||||
#define ZC_icntrl iswcntrl
|
||||
/*
|
||||
* TODO: doesn't work on arguments with side effects.
|
||||
* Also YUK. Not even sure this is guaranteed to work.
|
||||
* Should be easy to do along the lines of wcsiword.
|
||||
*/
|
||||
#define ZC_iident(x) (x < 256 && iident((int)x))
|
||||
#define ZC_iident wcsiident
|
||||
|
||||
#define ZC_tolower towlower
|
||||
#define ZC_toupper towupper
|
||||
|
|
|
@ -106,11 +106,6 @@ mod_export ZLE_INT_T lastchar_wide;
|
|||
/**/
|
||||
mod_export int
|
||||
lastchar_wide_valid;
|
||||
|
||||
/**/
|
||||
mod_export ZLE_STRING_T zle_wordchars;
|
||||
#else
|
||||
# define zle_wordchars wordchars;
|
||||
#endif
|
||||
|
||||
/* the bindings for the previous and for this key */
|
||||
|
@ -1558,17 +1553,6 @@ trashzle(void)
|
|||
kungetct = 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
mod_export void
|
||||
wordcharstrigger(void)
|
||||
{
|
||||
#ifdef ZLE_UNICODE_SUPPORT
|
||||
zrealloc(zle_wordchars, strlen(wordchars)*MB_CUR_MAX);
|
||||
mbsrtowcs(zle_wordchars, (const char **)&wordchars,
|
||||
strlen(wordchars), NULL);
|
||||
/* TODO: error handling here */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Hook functions. Used to allow access to zle parameters if zle is
|
||||
* active. */
|
||||
|
@ -1636,8 +1620,6 @@ setup_(UNUSED(Module m))
|
|||
kungetbuf = (char *) zalloc(kungetsz = 32);
|
||||
comprecursive = 0;
|
||||
rdstrs = NULL;
|
||||
wordcharstriggerptr = wordcharstrigger;
|
||||
wordcharstrigger();
|
||||
|
||||
/* initialise the keymap system */
|
||||
init_keymaps();
|
||||
|
@ -1712,7 +1694,6 @@ finish_(UNUSED(Module m))
|
|||
zlegetlineptr = NULL;
|
||||
zlereadptr = fallback_zleread;
|
||||
zlesetkeymapptr= noop_function_int;
|
||||
wordcharstriggerptr = noop_function;
|
||||
|
||||
getkeyptr = NULL;
|
||||
|
||||
|
|
|
@ -1179,9 +1179,6 @@ mod_export ZleVoidIntFn zlesetkeymapptr = noop_function_int;
|
|||
|
||||
#endif /* !LINKED_XMOD_zshQszle */
|
||||
|
||||
/**/
|
||||
mod_export ZleVoidFn wordcharstriggerptr = noop_function;
|
||||
|
||||
/**/
|
||||
unsigned char *
|
||||
autoload_zleread(char **lp, char **rp, int ha, int con)
|
||||
|
|
|
@ -3346,7 +3346,6 @@ wordcharssetfn(UNUSED(Param pm), char *x)
|
|||
zsfree(wordchars);
|
||||
wordchars = x;
|
||||
inittyptab();
|
||||
wordcharstriggerptr();
|
||||
}
|
||||
|
||||
/* Function to get value for special parameter `_' */
|
||||
|
|
|
@ -2749,6 +2749,10 @@ patmatchrange(char *range, int ch)
|
|||
return 1;
|
||||
break;
|
||||
case PP_WORD:
|
||||
/*
|
||||
* HERE: when we support multibyte characters,
|
||||
* this test needs to be wcsiword().
|
||||
*/
|
||||
if (iword(ch))
|
||||
return 1;
|
||||
break;
|
||||
|
|
98
Src/utils.c
98
Src/utils.c
|
@ -35,6 +35,16 @@
|
|||
/**/
|
||||
char *scriptname;
|
||||
|
||||
#ifdef ZLE_UNICODE_SUPPORT
|
||||
/*
|
||||
* The wordchars variable turned into a wide character array.
|
||||
* This is much more convenient for testing.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export wchar_t *wordchars_wide;
|
||||
#endif
|
||||
|
||||
/* Print an error */
|
||||
|
||||
/**/
|
||||
|
@ -2456,8 +2466,18 @@ inittyptab(void)
|
|||
typtab[t0] = IDIGIT | IALNUM | IWORD | IIDENT | IUSER;
|
||||
for (t0 = 'a'; t0 <= 'z'; t0++)
|
||||
typtab[t0] = typtab[t0 - 'a' + 'A'] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
|
||||
#ifndef ZLE_UNICODE_SUPPORT
|
||||
/*
|
||||
* This really doesn't seem to me the right thing to do when
|
||||
* we have multibyte character support... it was a hack to assume
|
||||
* eight bit characters `worked' for some values of work before
|
||||
* we could test for them properly. I'm not 100% convinced
|
||||
* having IIDENT here is a good idea at all, but this code
|
||||
* should disappear into history...
|
||||
*/
|
||||
for (t0 = 0240; t0 != 0400; t0++)
|
||||
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
|
||||
#endif
|
||||
typtab['_'] = IIDENT | IUSER;
|
||||
typtab['-'] = IUSER;
|
||||
typtab[' '] |= IBLANK | INBLANK;
|
||||
|
@ -2477,8 +2497,44 @@ inittyptab(void)
|
|||
}
|
||||
typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= ISEP;
|
||||
}
|
||||
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++)
|
||||
typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= IWORD;
|
||||
#ifdef ZLE_UNICODE_SUPPORT
|
||||
if (wordchars) {
|
||||
const char *wordchars_ptr = wordchars;
|
||||
mbstate_t mbs;
|
||||
int nchars;
|
||||
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
wordchars_wide = (wchar_t *)
|
||||
zrealloc(wordchars_wide, (strlen(wordchars)+1)*sizeof(wchar_t));
|
||||
nchars = mbsrtowcs(wordchars_wide, &wordchars_ptr, strlen(wordchars),
|
||||
&mbs);
|
||||
if (nchars == -1) {
|
||||
/* Conversion state is undefined: better just set to null */
|
||||
*wordchars_wide = L'\0';
|
||||
} else {
|
||||
wordchars_wide[nchars] = L'\0';
|
||||
}
|
||||
} else {
|
||||
wordchars_wide = zrealloc(wordchars_wide, sizeof(wchar_t));
|
||||
*wordchars_wide = L'\0';
|
||||
}
|
||||
#endif
|
||||
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
|
||||
int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
|
||||
#ifdef ZLE_UNICODE_SUPPORT
|
||||
if (!isascii(c)) {
|
||||
/*
|
||||
* If we have support for multibyte characters, we don't
|
||||
* handle non-ASCII characters here; instead, we turn
|
||||
* wordchars into a wide character array.
|
||||
* (We may actually have a single-byte 8-bit character set,
|
||||
* but it works the same way.)
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
typtab[c] |= IWORD;
|
||||
}
|
||||
for (s = SPECCHARS; *s; s++)
|
||||
typtab[STOUC(*s)] |= ISPECIAL;
|
||||
if (isset(BANGHIST) && bangchar && interact && isset(SHINSTDIN))
|
||||
|
@ -2503,9 +2559,6 @@ wcsiword(wchar_t c)
|
|||
* produces an ASCII character. If it does, use iword on that.
|
||||
* If it doesn't, use iswalnum on the original character. This
|
||||
* is pretty good most of the time.
|
||||
*
|
||||
* TODO: extend WORDCHARS to handle multibyte chars by some kind
|
||||
* of hierarchical list or hash table.
|
||||
*/
|
||||
len = wctomb(outstr, c);
|
||||
|
||||
|
@ -2515,7 +2568,40 @@ wcsiword(wchar_t c)
|
|||
} else if (len == 1 && isascii(*outstr)) {
|
||||
return iword(*outstr);
|
||||
} else {
|
||||
return iswalnum(c);
|
||||
return iswalnum(c) || wcschr(wordchars_wide, c);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* iident() macro extended to support wide characters.
|
||||
*
|
||||
* The macro is intended to test if a character is allowed in an
|
||||
* internal zsh identifier. Until the main shell handles multibyte
|
||||
* characters it's not a good idea to allow characters other than
|
||||
* ASCII characters; it would cause zle to allow characters that
|
||||
* the main shell would reject. Eventually we should be able
|
||||
* to allow all alphanumerics.
|
||||
*
|
||||
* Otherwise similar to wcsiword.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
wcsiident(wchar_t c)
|
||||
{
|
||||
int len;
|
||||
VARARR(char, outstr, MB_CUR_MAX);
|
||||
|
||||
len = wctomb(outstr, c);
|
||||
|
||||
if (len == 0) {
|
||||
/* NULL is special */
|
||||
return 0;
|
||||
} else if (len == 1 && isascii(*outstr)) {
|
||||
return iword(*outstr);
|
||||
} else {
|
||||
/* not currently allowed, see above */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue