mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-11-17 23:51:06 +01:00
24070: some \u fixes in getkeystring()
This commit is contained in:
parent
ea15ee8867
commit
1e836045b3
4 changed files with 160 additions and 73 deletions
|
|
@ -1,3 +1,9 @@
|
||||||
|
2007-11-06 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||||
|
|
||||||
|
* 24070: Src/utils.c, Test/A03quoting.ztst,
|
||||||
|
Test/D07multibyte.ztst: Some fixes for \u handling in
|
||||||
|
getkeystring().
|
||||||
|
|
||||||
2007-11-06 Peter Stephenson <pws@csr.com>
|
2007-11-06 Peter Stephenson <pws@csr.com>
|
||||||
|
|
||||||
* 24069: Doc/Zsh/mod_curses.yo, Src/Modules/curses.c: add
|
* 24069: Doc/Zsh/mod_curses.yo, Src/Modules/curses.c: add
|
||||||
|
|
|
||||||
207
Src/utils.c
207
Src/utils.c
|
|
@ -4578,6 +4578,31 @@ ucs4toutf8(char *dest, unsigned int wval)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following only occurs once or twice in the code, but in different
|
||||||
|
* places depending how character set conversion is implemented.
|
||||||
|
*/
|
||||||
|
#define CHARSET_FAILED() \
|
||||||
|
if (how & GETKEY_DOLLAR_QUOTE) { \
|
||||||
|
while ((*tdest++ = *++s)) { \
|
||||||
|
if (how & GETKEY_UPDATE_OFFSET) { \
|
||||||
|
if (s - sstart > *misc) \
|
||||||
|
(*misc)++; \
|
||||||
|
} \
|
||||||
|
if (*s == Snull) { \
|
||||||
|
*len = (s - sstart) + 1; \
|
||||||
|
*tdest = '\0'; \
|
||||||
|
return buf; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
*len = tdest - buf; \
|
||||||
|
return buf; \
|
||||||
|
} \
|
||||||
|
*t = '\0'; \
|
||||||
|
*len = t - buf; \
|
||||||
|
return buf
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decode a key string, turning it into the literal characters.
|
* Decode a key string, turning it into the literal characters.
|
||||||
* The value returned is a newly allocated string from the heap.
|
* The value returned is a newly allocated string from the heap.
|
||||||
|
|
@ -4622,7 +4647,7 @@ mod_export char *
|
||||||
getkeystring(char *s, int *len, int how, int *misc)
|
getkeystring(char *s, int *len, int how, int *misc)
|
||||||
{
|
{
|
||||||
char *buf, tmp[1];
|
char *buf, tmp[1];
|
||||||
char *t, *tdest = NULL, *u = NULL, *sstart = s;
|
char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf;
|
||||||
char svchar = '\0';
|
char svchar = '\0';
|
||||||
int meta = 0, control = 0;
|
int meta = 0, control = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -4642,38 +4667,69 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DPUTS((how & GETKEY_UPDATE_OFFSET) &&
|
DPUTS((how & GETKEY_UPDATE_OFFSET) &&
|
||||||
(how & ~(GETKEY_DOLLAR_QUOTE|GETKEY_UPDATE_OFFSET)),
|
(how & ~(GETKEYS_DOLLARS_QUOTE|GETKEY_UPDATE_OFFSET)),
|
||||||
"BUG: offset updating in getkeystring only supported with $'.");
|
"BUG: offset updating in getkeystring only supported with $'.");
|
||||||
|
DPUTS((how & (GETKEY_DOLLAR_QUOTE|GETKEY_SINGLE_CHAR)) ==
|
||||||
|
(GETKEY_DOLLAR_QUOTE|GETKEY_SINGLE_CHAR),
|
||||||
|
"BUG: incompatible options in getkeystring");
|
||||||
|
|
||||||
if (how & GETKEY_SINGLE_CHAR)
|
if (how & GETKEY_SINGLE_CHAR)
|
||||||
t = buf = tmp;
|
t = buf = tmp;
|
||||||
else
|
else {
|
||||||
t = buf = zhalloc(strlen(s) + 1);
|
/* Length including terminating NULL */
|
||||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
int maxlen = 1;
|
||||||
/*
|
/*
|
||||||
* TODO: we're not necessarily guaranteed the output string will
|
* We're not necessarily guaranteed the output string will
|
||||||
* be no longer than the input with \u and \U when output
|
* be no longer than the input with \u and \U when output
|
||||||
* characters need to be metafied: should check the maximum
|
* characters need to be metafied. As this is the only
|
||||||
* length.
|
* case where the string can get longer (?I think),
|
||||||
*
|
* include it in the allocation length here but don't
|
||||||
* We're going to unmetafy into the original string, but
|
* bother taking account of other factors.
|
||||||
* to get a proper metafied input we're going to metafy
|
|
||||||
* into an allocated buffer. This is necessary if we have
|
|
||||||
* \u and \U's with multiple metafied bytes. We can't
|
|
||||||
* simply remetafy the entire string because there may
|
|
||||||
* be tokens (indeed, we know there are lexical nulls floating
|
|
||||||
* around), so we have to be aware character by character
|
|
||||||
* what we are converting.
|
|
||||||
*/
|
*/
|
||||||
tdest = t;
|
for (t = s; *t; t++) {
|
||||||
t = s;
|
if (*t == '\\') {
|
||||||
|
if (!t[1]) {
|
||||||
|
maxlen++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (t[1] == 'u' || t[1] == 'U')
|
||||||
|
maxlen += MB_CUR_MAX * 2;
|
||||||
|
else
|
||||||
|
maxlen += 2;
|
||||||
|
/* skip the backslash and the following character */
|
||||||
|
t++;
|
||||||
|
} else
|
||||||
|
maxlen++;
|
||||||
|
}
|
||||||
|
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||||
|
/*
|
||||||
|
* We're going to unmetafy into a new string, but
|
||||||
|
* to get a proper metafied input we're going to metafy
|
||||||
|
* into an intermediate buffer. This is necessary if we have
|
||||||
|
* \u and \U's with multiple metafied bytes. We can't
|
||||||
|
* simply remetafy the entire string because there may
|
||||||
|
* be tokens (indeed, we know there are lexical nulls floating
|
||||||
|
* around), so we have to be aware character by character
|
||||||
|
* what we are converting.
|
||||||
|
*
|
||||||
|
* In this case, buf is the final buffer (as usual),
|
||||||
|
* but t points into a temporary buffer that just has
|
||||||
|
* to be long enough to hold the result of one escape
|
||||||
|
* code transformation. We count this is a full multibyte
|
||||||
|
* character (MB_CUR_MAX) with every character metafied
|
||||||
|
* (*2) plus a little bit of fuzz (for e.g. the odd backslash).
|
||||||
|
*/
|
||||||
|
buf = tdest = zhalloc(maxlen);
|
||||||
|
t = tbuf = zhalloc(MB_CUR_MAX * 3 + 1);
|
||||||
|
} else {
|
||||||
|
t = buf = zhalloc(maxlen);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (; *s; s++) {
|
for (; *s; s++) {
|
||||||
char *torig = t;
|
|
||||||
if (*s == '\\' && s[1]) {
|
if (*s == '\\' && s[1]) {
|
||||||
int miscadded;
|
int miscadded;
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) {
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
|
||||||
(*misc)++;
|
(*misc)--;
|
||||||
miscadded = 1;
|
miscadded = 1;
|
||||||
} else
|
} else
|
||||||
miscadded = 0;
|
miscadded = 0;
|
||||||
|
|
@ -4707,7 +4763,7 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
if (!(how & GETKEY_EMACS)) {
|
if (!(how & GETKEY_EMACS)) {
|
||||||
*t++ = '\\', s--;
|
*t++ = '\\', s--;
|
||||||
if (miscadded)
|
if (miscadded)
|
||||||
(*misc)--;
|
(*misc)++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* FALL THROUGH */
|
/* FALL THROUGH */
|
||||||
|
|
@ -4715,30 +4771,32 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
*t++ = '\033';
|
*t++ = '\033';
|
||||||
break;
|
break;
|
||||||
case 'M':
|
case 'M':
|
||||||
|
/* HERE: GETKEY_UPDATE_OFFSET */
|
||||||
if (how & GETKEY_EMACS) {
|
if (how & GETKEY_EMACS) {
|
||||||
if (s[1] == '-')
|
if (s[1] == '-')
|
||||||
s++;
|
s++;
|
||||||
meta = 1 + control; /* preserve the order of ^ and meta */
|
meta = 1 + control; /* preserve the order of ^ and meta */
|
||||||
} else {
|
} else {
|
||||||
if (miscadded)
|
if (miscadded)
|
||||||
(*misc)--;
|
(*misc)++;
|
||||||
*t++ = '\\', s--;
|
*t++ = '\\', s--;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
case 'C':
|
case 'C':
|
||||||
|
/* HERE: GETKEY_UPDATE_OFFSET */
|
||||||
if (how & GETKEY_EMACS) {
|
if (how & GETKEY_EMACS) {
|
||||||
if (s[1] == '-')
|
if (s[1] == '-')
|
||||||
s++;
|
s++;
|
||||||
control = 1;
|
control = 1;
|
||||||
} else {
|
} else {
|
||||||
if (miscadded)
|
if (miscadded)
|
||||||
(*misc)--;
|
(*misc)++;
|
||||||
*t++ = '\\', s--;
|
*t++ = '\\', s--;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
case Meta:
|
case Meta:
|
||||||
if (miscadded)
|
if (miscadded)
|
||||||
(*misc)--;
|
(*misc)++;
|
||||||
*t++ = '\\', s--;
|
*t++ = '\\', s--;
|
||||||
break;
|
break;
|
||||||
case '-':
|
case '-':
|
||||||
|
|
@ -4755,15 +4813,16 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
goto def;
|
goto def;
|
||||||
case 'u':
|
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
|
|
||||||
(*misc) += 4;
|
|
||||||
case 'U':
|
case 'U':
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) {
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||||
(*misc) += 6;
|
(*misc) -= 4;
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
case 'u':
|
||||||
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
|
||||||
|
(*misc) -= 6; /* HERE don't really believe this */
|
||||||
/*
|
/*
|
||||||
* We've now adjusted the offset for all the input
|
* We've now adjusted the offset for all the input
|
||||||
* characters, so we need to subtract for each
|
* characters, so we need to add for each
|
||||||
* byte of output below.
|
* byte of output below.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
@ -4787,31 +4846,18 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
count = wctomb(t, (wchar_t)wval);
|
count = wctomb(t, (wchar_t)wval);
|
||||||
if (count == -1) {
|
if (count == -1) {
|
||||||
zerr("character not in range");
|
zerr("character not in range");
|
||||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
CHARSET_FAILED();
|
||||||
/* HERE new convention */
|
|
||||||
for (u = t; (*u++ = *++s);) {
|
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) &&
|
|
||||||
s - sstart > *misc)
|
|
||||||
(*misc)++;
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
*t = '\0';
|
|
||||||
*len = t - buf;
|
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||||
(*misc) += count;
|
(*misc) += count;
|
||||||
t += count;
|
t += count;
|
||||||
continue;
|
|
||||||
# else
|
# else
|
||||||
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
# if defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||||
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
|
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
|
||||||
count = ucs4toutf8(t, wval);
|
count = ucs4toutf8(t, wval);
|
||||||
t += count;
|
t += count;
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||||
(*misc) += count;
|
(*misc) += count;
|
||||||
continue;
|
|
||||||
} else {
|
} else {
|
||||||
# ifdef HAVE_ICONV
|
# ifdef HAVE_ICONV
|
||||||
ICONV_CONST char *inptr = inbuf;
|
ICONV_CONST char *inptr = inbuf;
|
||||||
|
|
@ -4826,46 +4872,55 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
cd = iconv_open(nl_langinfo(CODESET), "UCS-4BE");
|
cd = iconv_open(nl_langinfo(CODESET), "UCS-4BE");
|
||||||
if (cd == (iconv_t)-1) {
|
if (cd == (iconv_t)-1) {
|
||||||
zerr("cannot do charset conversion");
|
zerr("cannot do charset conversion");
|
||||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
CHARSET_FAILED();
|
||||||
/* HERE: new convention */
|
|
||||||
for (u = t; (*u++ = *++s);) {
|
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) &&
|
|
||||||
s - sstart > *misc)
|
|
||||||
(*misc)++;
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
*t = '\0';
|
|
||||||
*len = t - buf;
|
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
|
count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
|
||||||
iconv_close(cd);
|
iconv_close(cd);
|
||||||
if (count == (size_t)-1) {
|
if (count == (size_t)-1) {
|
||||||
zerr("character not in range");
|
zerr("character not in range");
|
||||||
*t = '\0';
|
CHARSET_FAILED();
|
||||||
*len = t - buf;
|
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
|
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
|
||||||
(*misc) += count;
|
(*misc) += count;
|
||||||
continue;
|
|
||||||
# else
|
# else
|
||||||
zerr("cannot do charset conversion");
|
zerr("cannot do charset conversion");
|
||||||
*t = '\0';
|
CHARSET_FAILED();
|
||||||
*len = t - buf;
|
|
||||||
return buf;
|
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
# else
|
# else
|
||||||
zerr("cannot do charset conversion");
|
zerr("cannot do charset conversion");
|
||||||
*t = '\0';
|
CHARSET_FAILED();
|
||||||
*len = t - buf;
|
|
||||||
return buf;
|
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||||
|
char *t2;
|
||||||
|
for (t2 = tbuf; t2 < t; t2++) {
|
||||||
|
if (imeta(*t2)) {
|
||||||
|
*tdest++ = Meta;
|
||||||
|
*tdest++ = *t2 ^ 32;
|
||||||
|
} else
|
||||||
|
*tdest++ = *t2;
|
||||||
|
}
|
||||||
|
/* reset temporary buffer after handling */
|
||||||
|
t = tbuf;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
case '\'':
|
||||||
|
case '\\':
|
||||||
|
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||||
|
/*
|
||||||
|
* Usually \' and \\ will have the initial
|
||||||
|
* \ turned into a Bnull, however that's not
|
||||||
|
* necessarily the case when called from
|
||||||
|
* completion.
|
||||||
|
*/
|
||||||
|
*t++ = *s;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
default:
|
default:
|
||||||
def:
|
def:
|
||||||
|
/* HERE: GETKEY_UPDATE_OFFSET? */
|
||||||
if ((idigit(*s) && *s < '8') || *s == 'x') {
|
if ((idigit(*s) && *s < '8') || *s == 'x') {
|
||||||
if (!(how & GETKEY_OCTAL_ESC)) {
|
if (!(how & GETKEY_OCTAL_ESC)) {
|
||||||
if (*s == '0')
|
if (*s == '0')
|
||||||
|
|
@ -4890,7 +4945,7 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
} else {
|
} else {
|
||||||
if (!(how & GETKEY_EMACS) && *s != '\\') {
|
if (!(how & GETKEY_EMACS) && *s != '\\') {
|
||||||
if (miscadded)
|
if (miscadded)
|
||||||
(*misc)--;
|
(*misc)++;
|
||||||
*t++ = '\\';
|
*t++ = '\\';
|
||||||
}
|
}
|
||||||
*t++ = *s;
|
*t++ = *s;
|
||||||
|
|
@ -4961,6 +5016,8 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
*/
|
*/
|
||||||
*tdest++ = *++s;
|
*tdest++ = *++s;
|
||||||
}
|
}
|
||||||
|
/* reset temporary buffer, now handled */
|
||||||
|
t = tbuf;
|
||||||
continue;
|
continue;
|
||||||
} else
|
} else
|
||||||
*t++ = *s;
|
*t++ = *s;
|
||||||
|
|
@ -4984,13 +5041,17 @@ getkeystring(char *s, int *len, int how, int *misc)
|
||||||
}
|
}
|
||||||
if (how & GETKEY_DOLLAR_QUOTE) {
|
if (how & GETKEY_DOLLAR_QUOTE) {
|
||||||
char *t2;
|
char *t2;
|
||||||
for (t2 = torig; t2 < t; t2++) {
|
for (t2 = tbuf; t2 < t; t2++) {
|
||||||
if (imeta(*t2)) {
|
if (imeta(*t2)) {
|
||||||
*tdest++ = Meta;
|
*tdest++ = Meta;
|
||||||
*tdest++ = *t2 ^ 32;
|
*tdest++ = *t2 ^ 32;
|
||||||
} else
|
} else
|
||||||
*tdest++ = *t2;
|
*tdest++ = *t2;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* Reset use of temporary buffer.
|
||||||
|
*/
|
||||||
|
t = tbuf;
|
||||||
}
|
}
|
||||||
if ((how & GETKEY_SINGLE_CHAR) && t != tmp) {
|
if ((how & GETKEY_SINGLE_CHAR) && t != tmp) {
|
||||||
*misc = STOUC(tmp[0]);
|
*misc = STOUC(tmp[0]);
|
||||||
|
|
|
||||||
|
|
@ -42,3 +42,13 @@
|
||||||
unsetopt rcquotes
|
unsetopt rcquotes
|
||||||
0:Yes RC_QUOTES with single quotes
|
0:Yes RC_QUOTES with single quotes
|
||||||
>'
|
>'
|
||||||
|
|
||||||
|
print '<\u0041>'
|
||||||
|
printf '%s\n' $'<\u0042>'
|
||||||
|
print '<\u0043>'
|
||||||
|
printf '%s\n' $'<\u0044>'
|
||||||
|
0:\u in both print and printf
|
||||||
|
><A>
|
||||||
|
><B>
|
||||||
|
><C>
|
||||||
|
><D>
|
||||||
|
|
|
||||||
|
|
@ -384,3 +384,13 @@
|
||||||
print -r ${(q)foo}
|
print -r ${(q)foo}
|
||||||
0:Backslash-quoting of unprintable/invalid characters uses $'...'
|
0:Backslash-quoting of unprintable/invalid characters uses $'...'
|
||||||
>X$'\300'Y$'\a'Z$'\177'T
|
>X$'\300'Y$'\a'Z$'\177'T
|
||||||
|
|
||||||
|
# This also isn't strictly multibyte and is here to reduce the
|
||||||
|
# likelihood of a "can't do character set conversion" error.
|
||||||
|
testfn() { (LC_ALL=C; print $'\u00e9') }
|
||||||
|
repeat 4 testfn
|
||||||
|
1:error handling in Unicode quoting
|
||||||
|
?testfn: character not in range
|
||||||
|
?testfn: character not in range
|
||||||
|
?testfn: character not in range
|
||||||
|
?testfn: character not in range
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue