1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-09-02 22:11:54 +02:00

21736: improve tests for word and identifier characters with multibyte input

This commit is contained in:
Peter Stephenson 2005-09-20 15:10:26 +00:00
parent ce43e4a22c
commit 409296e22f
7 changed files with 104 additions and 35 deletions

View file

@ -1,3 +1,10 @@
2005-09-20 Peter Stephenson <pws@csr.com>
* 21736: Src/init.c, Src/params.c, Src/pattern.c, Src/utils.c,
Src/Zle/zle.h, Src/Zle/zle_main.c: Fix WORDCHARS to use multibyte
characters; rationalise test for identifiers only to use ASCII
characters; remove existing hack for 8-bit characters.
2005-09-19 Peter Stephenson <pws@csr.com>
* unposted, c.f. 21735: Doc/Zsh/grammar.yo: document

View file

@ -66,12 +66,7 @@ typedef wint_t ZLE_INT_T;
#define ZC_iblank iswspace
#define ZC_icntrl iswcntrl
/*
* TODO: doesn't work on arguments with side effects.
* Also YUK. Not even sure this is guaranteed to work.
* Should be easy to do along the lines of wcsiword.
*/
#define ZC_iident(x) (x < 256 && iident((int)x))
#define ZC_iident wcsiident
#define ZC_tolower towlower
#define ZC_toupper towupper

View file

@ -106,11 +106,6 @@ mod_export ZLE_INT_T lastchar_wide;
/**/
mod_export int
lastchar_wide_valid;
/**/
mod_export ZLE_STRING_T zle_wordchars;
#else
# define zle_wordchars wordchars;
#endif
/* the bindings for the previous and for this key */
@ -1558,17 +1553,6 @@ trashzle(void)
kungetct = 0;
}
/**/
mod_export void
wordcharstrigger(void)
{
#ifdef ZLE_UNICODE_SUPPORT
zrealloc(zle_wordchars, strlen(wordchars)*MB_CUR_MAX);
mbsrtowcs(zle_wordchars, (const char **)&wordchars,
strlen(wordchars), NULL);
/* TODO: error handling here */
#endif
}
/* Hook functions. Used to allow access to zle parameters if zle is
* active. */
@ -1636,8 +1620,6 @@ setup_(UNUSED(Module m))
kungetbuf = (char *) zalloc(kungetsz = 32);
comprecursive = 0;
rdstrs = NULL;
wordcharstriggerptr = wordcharstrigger;
wordcharstrigger();
/* initialise the keymap system */
init_keymaps();
@ -1712,7 +1694,6 @@ finish_(UNUSED(Module m))
zlegetlineptr = NULL;
zlereadptr = fallback_zleread;
zlesetkeymapptr= noop_function_int;
wordcharstriggerptr = noop_function;
getkeyptr = NULL;

View file

@ -1179,9 +1179,6 @@ mod_export ZleVoidIntFn zlesetkeymapptr = noop_function_int;
#endif /* !LINKED_XMOD_zshQszle */
/**/
mod_export ZleVoidFn wordcharstriggerptr = noop_function;
/**/
unsigned char *
autoload_zleread(char **lp, char **rp, int ha, int con)

View file

@ -3346,7 +3346,6 @@ wordcharssetfn(UNUSED(Param pm), char *x)
zsfree(wordchars);
wordchars = x;
inittyptab();
wordcharstriggerptr();
}
/* Function to get value for special parameter `_' */

View file

@ -2749,6 +2749,10 @@ patmatchrange(char *range, int ch)
return 1;
break;
case PP_WORD:
/*
* HERE: when we support multibyte characters,
* this test needs to be wcsiword().
*/
if (iword(ch))
return 1;
break;

View file

@ -35,6 +35,16 @@
/**/
char *scriptname;
#ifdef ZLE_UNICODE_SUPPORT
/*
* The wordchars variable turned into a wide character array.
* This is much more convenient for testing.
*/
/**/
mod_export wchar_t *wordchars_wide;
#endif
/* Print an error */
/**/
@ -2456,8 +2466,18 @@ inittyptab(void)
typtab[t0] = IDIGIT | IALNUM | IWORD | IIDENT | IUSER;
for (t0 = 'a'; t0 <= 'z'; t0++)
typtab[t0] = typtab[t0 - 'a' + 'A'] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
#ifndef ZLE_UNICODE_SUPPORT
/*
* This really doesn't seem to me the right thing to do when
* we have multibyte character support... it was a hack to assume
* eight bit characters `worked' for some values of work before
* we could test for them properly. I'm not 100% convinced
* having IIDENT here is a good idea at all, but this code
* should disappear into history...
*/
for (t0 = 0240; t0 != 0400; t0++)
typtab[t0] = IALPHA | IALNUM | IIDENT | IUSER | IWORD;
#endif
typtab['_'] = IIDENT | IUSER;
typtab['-'] = IUSER;
typtab[' '] |= IBLANK | INBLANK;
@ -2477,8 +2497,44 @@ inittyptab(void)
}
typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= ISEP;
}
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++)
typtab[STOUC(*s == Meta ? *++s ^ 32 : *s)] |= IWORD;
#ifdef ZLE_UNICODE_SUPPORT
if (wordchars) {
const char *wordchars_ptr = wordchars;
mbstate_t mbs;
int nchars;
memset(&mbs, 0, sizeof(mbs));
wordchars_wide = (wchar_t *)
zrealloc(wordchars_wide, (strlen(wordchars)+1)*sizeof(wchar_t));
nchars = mbsrtowcs(wordchars_wide, &wordchars_ptr, strlen(wordchars),
&mbs);
if (nchars == -1) {
/* Conversion state is undefined: better just set to null */
*wordchars_wide = L'\0';
} else {
wordchars_wide[nchars] = L'\0';
}
} else {
wordchars_wide = zrealloc(wordchars_wide, sizeof(wchar_t));
*wordchars_wide = L'\0';
}
#endif
for (s = wordchars ? wordchars : DEFAULT_WORDCHARS; *s; s++) {
int c = STOUC(*s == Meta ? *++s ^ 32 : *s);
#ifdef ZLE_UNICODE_SUPPORT
if (!isascii(c)) {
/*
* If we have support for multibyte characters, we don't
* handle non-ASCII characters here; instead, we turn
* wordchars into a wide character array.
* (We may actually have a single-byte 8-bit character set,
* but it works the same way.)
*/
continue;
}
#endif
typtab[c] |= IWORD;
}
for (s = SPECCHARS; *s; s++)
typtab[STOUC(*s)] |= ISPECIAL;
if (isset(BANGHIST) && bangchar && interact && isset(SHINSTDIN))
@ -2503,9 +2559,6 @@ wcsiword(wchar_t c)
* produces an ASCII character. If it does, use iword on that.
* If it doesn't, use iswalnum on the original character. This
* is pretty good most of the time.
*
* TODO: extend WORDCHARS to handle multibyte chars by some kind
* of hierarchical list or hash table.
*/
len = wctomb(outstr, c);
@ -2515,7 +2568,40 @@ wcsiword(wchar_t c)
} else if (len == 1 && isascii(*outstr)) {
return iword(*outstr);
} else {
return iswalnum(c);
return iswalnum(c) || wcschr(wordchars_wide, c);
}
}
/*
* iident() macro extended to support wide characters.
*
* The macro is intended to test if a character is allowed in an
* internal zsh identifier. Until the main shell handles multibyte
* characters it's not a good idea to allow characters other than
* ASCII characters; it would cause zle to allow characters that
* the main shell would reject. Eventually we should be able
* to allow all alphanumerics.
*
* Otherwise similar to wcsiword.
*/
/**/
mod_export int
wcsiident(wchar_t c)
{
int len;
VARARR(char, outstr, MB_CUR_MAX);
len = wctomb(outstr, c);
if (len == 0) {
/* NULL is special */
return 0;
} else if (len == 1 && isascii(*outstr)) {
return iword(*outstr);
} else {
/* not currently allowed, see above */
return 0;
}
}
#endif