1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-11-17 23:51:06 +01:00

24070: some \u fixes in getkeystring()

This commit is contained in:
Peter Stephenson 2007-11-06 20:45:07 +00:00
parent ea15ee8867
commit 1e836045b3
4 changed files with 160 additions and 73 deletions

View file

@ -1,3 +1,9 @@
2007-11-06 Peter Stephenson <p.w.stephenson@ntlworld.com>
* 24070: Src/utils.c, Test/A03quoting.ztst,
Test/D07multibyte.ztst: Some fixes for \u handling in
getkeystring().
2007-11-06 Peter Stephenson <pws@csr.com> 2007-11-06 Peter Stephenson <pws@csr.com>
* 24069: Doc/Zsh/mod_curses.yo, Src/Modules/curses.c: add * 24069: Doc/Zsh/mod_curses.yo, Src/Modules/curses.c: add

View file

@ -4578,6 +4578,31 @@ ucs4toutf8(char *dest, unsigned int wval)
} }
#endif #endif
/*
* The following only occurs once or twice in the code, but in different
* places depending how character set conversion is implemented.
*/
#define CHARSET_FAILED() \
if (how & GETKEY_DOLLAR_QUOTE) { \
while ((*tdest++ = *++s)) { \
if (how & GETKEY_UPDATE_OFFSET) { \
if (s - sstart > *misc) \
(*misc)++; \
} \
if (*s == Snull) { \
*len = (s - sstart) + 1; \
*tdest = '\0'; \
return buf; \
} \
} \
*len = tdest - buf; \
return buf; \
} \
*t = '\0'; \
*len = t - buf; \
return buf
/* /*
* Decode a key string, turning it into the literal characters. * Decode a key string, turning it into the literal characters.
* The value returned is a newly allocated string from the heap. * The value returned is a newly allocated string from the heap.
@ -4622,7 +4647,7 @@ mod_export char *
getkeystring(char *s, int *len, int how, int *misc) getkeystring(char *s, int *len, int how, int *misc)
{ {
char *buf, tmp[1]; char *buf, tmp[1];
char *t, *tdest = NULL, *u = NULL, *sstart = s; char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf;
char svchar = '\0'; char svchar = '\0';
int meta = 0, control = 0; int meta = 0, control = 0;
int i; int i;
@ -4642,38 +4667,69 @@ getkeystring(char *s, int *len, int how, int *misc)
#endif #endif
DPUTS((how & GETKEY_UPDATE_OFFSET) && DPUTS((how & GETKEY_UPDATE_OFFSET) &&
(how & ~(GETKEY_DOLLAR_QUOTE|GETKEY_UPDATE_OFFSET)), (how & ~(GETKEYS_DOLLARS_QUOTE|GETKEY_UPDATE_OFFSET)),
"BUG: offset updating in getkeystring only supported with $'."); "BUG: offset updating in getkeystring only supported with $'.");
DPUTS((how & (GETKEY_DOLLAR_QUOTE|GETKEY_SINGLE_CHAR)) ==
(GETKEY_DOLLAR_QUOTE|GETKEY_SINGLE_CHAR),
"BUG: incompatible options in getkeystring");
if (how & GETKEY_SINGLE_CHAR) if (how & GETKEY_SINGLE_CHAR)
t = buf = tmp; t = buf = tmp;
else else {
t = buf = zhalloc(strlen(s) + 1); /* Length including terminating NULL */
if (how & GETKEY_DOLLAR_QUOTE) { int maxlen = 1;
/* /*
* TODO: we're not necessarily guaranteed the output string will * We're not necessarily guaranteed the output string will
* be no longer than the input with \u and \U when output * be no longer than the input with \u and \U when output
* characters need to be metafied: should check the maximum * characters need to be metafied. As this is the only
* length. * case where the string can get longer (?I think),
* * include it in the allocation length here but don't
* We're going to unmetafy into the original string, but * bother taking account of other factors.
* to get a proper metafied input we're going to metafy
* into an allocated buffer. This is necessary if we have
* \u and \U's with multiple metafied bytes. We can't
* simply remetafy the entire string because there may
* be tokens (indeed, we know there are lexical nulls floating
* around), so we have to be aware character by character
* what we are converting.
*/ */
tdest = t; for (t = s; *t; t++) {
t = s; if (*t == '\\') {
if (!t[1]) {
maxlen++;
break;
}
if (t[1] == 'u' || t[1] == 'U')
maxlen += MB_CUR_MAX * 2;
else
maxlen += 2;
/* skip the backslash and the following character */
t++;
} else
maxlen++;
}
if (how & GETKEY_DOLLAR_QUOTE) {
/*
* We're going to unmetafy into a new string, but
* to get a proper metafied input we're going to metafy
* into an intermediate buffer. This is necessary if we have
* \u and \U's with multiple metafied bytes. We can't
* simply remetafy the entire string because there may
* be tokens (indeed, we know there are lexical nulls floating
* around), so we have to be aware character by character
* what we are converting.
*
* In this case, buf is the final buffer (as usual),
* but t points into a temporary buffer that just has
* to be long enough to hold the result of one escape
* code transformation. We count this is a full multibyte
* character (MB_CUR_MAX) with every character metafied
* (*2) plus a little bit of fuzz (for e.g. the odd backslash).
*/
buf = tdest = zhalloc(maxlen);
t = tbuf = zhalloc(MB_CUR_MAX * 3 + 1);
} else {
t = buf = zhalloc(maxlen);
}
} }
for (; *s; s++) { for (; *s; s++) {
char *torig = t;
if (*s == '\\' && s[1]) { if (*s == '\\' && s[1]) {
int miscadded; int miscadded;
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) { if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
(*misc)++; (*misc)--;
miscadded = 1; miscadded = 1;
} else } else
miscadded = 0; miscadded = 0;
@ -4707,7 +4763,7 @@ getkeystring(char *s, int *len, int how, int *misc)
if (!(how & GETKEY_EMACS)) { if (!(how & GETKEY_EMACS)) {
*t++ = '\\', s--; *t++ = '\\', s--;
if (miscadded) if (miscadded)
(*misc)--; (*misc)++;
continue; continue;
} }
/* FALL THROUGH */ /* FALL THROUGH */
@ -4715,30 +4771,32 @@ getkeystring(char *s, int *len, int how, int *misc)
*t++ = '\033'; *t++ = '\033';
break; break;
case 'M': case 'M':
/* HERE: GETKEY_UPDATE_OFFSET */
if (how & GETKEY_EMACS) { if (how & GETKEY_EMACS) {
if (s[1] == '-') if (s[1] == '-')
s++; s++;
meta = 1 + control; /* preserve the order of ^ and meta */ meta = 1 + control; /* preserve the order of ^ and meta */
} else { } else {
if (miscadded) if (miscadded)
(*misc)--; (*misc)++;
*t++ = '\\', s--; *t++ = '\\', s--;
} }
continue; continue;
case 'C': case 'C':
/* HERE: GETKEY_UPDATE_OFFSET */
if (how & GETKEY_EMACS) { if (how & GETKEY_EMACS) {
if (s[1] == '-') if (s[1] == '-')
s++; s++;
control = 1; control = 1;
} else { } else {
if (miscadded) if (miscadded)
(*misc)--; (*misc)++;
*t++ = '\\', s--; *t++ = '\\', s--;
} }
continue; continue;
case Meta: case Meta:
if (miscadded) if (miscadded)
(*misc)--; (*misc)++;
*t++ = '\\', s--; *t++ = '\\', s--;
break; break;
case '-': case '-':
@ -4755,15 +4813,16 @@ getkeystring(char *s, int *len, int how, int *misc)
return buf; return buf;
} }
goto def; goto def;
case 'u':
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc)
(*misc) += 4;
case 'U': case 'U':
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) { if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
(*misc) += 6; (*misc) -= 4;
/* FALLTHROUGH */
case 'u':
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc) {
(*misc) -= 6; /* HERE don't really believe this */
/* /*
* We've now adjusted the offset for all the input * We've now adjusted the offset for all the input
* characters, so we need to subtract for each * characters, so we need to add for each
* byte of output below. * byte of output below.
*/ */
} }
@ -4787,31 +4846,18 @@ getkeystring(char *s, int *len, int how, int *misc)
count = wctomb(t, (wchar_t)wval); count = wctomb(t, (wchar_t)wval);
if (count == -1) { if (count == -1) {
zerr("character not in range"); zerr("character not in range");
if (how & GETKEY_DOLLAR_QUOTE) { CHARSET_FAILED();
/* HERE new convention */
for (u = t; (*u++ = *++s);) {
if ((how & GETKEY_UPDATE_OFFSET) &&
s - sstart > *misc)
(*misc)++;
}
return t;
}
*t = '\0';
*len = t - buf;
return buf;
} }
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
(*misc) += count; (*misc) += count;
t += count; t += count;
continue;
# else # else
# if defined(HAVE_NL_LANGINFO) && defined(CODESET) # if defined(HAVE_NL_LANGINFO) && defined(CODESET)
if (!strcmp(nl_langinfo(CODESET), "UTF-8")) { if (!strcmp(nl_langinfo(CODESET), "UTF-8")) {
count = ucs4toutf8(t, wval); count = ucs4toutf8(t, wval);
t += count; t += count;
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
(*misc) += count; (*misc) += count;
continue;
} else { } else {
# ifdef HAVE_ICONV # ifdef HAVE_ICONV
ICONV_CONST char *inptr = inbuf; ICONV_CONST char *inptr = inbuf;
@ -4826,46 +4872,55 @@ getkeystring(char *s, int *len, int how, int *misc)
cd = iconv_open(nl_langinfo(CODESET), "UCS-4BE"); cd = iconv_open(nl_langinfo(CODESET), "UCS-4BE");
if (cd == (iconv_t)-1) { if (cd == (iconv_t)-1) {
zerr("cannot do charset conversion"); zerr("cannot do charset conversion");
if (how & GETKEY_DOLLAR_QUOTE) { CHARSET_FAILED();
/* HERE: new convention */
for (u = t; (*u++ = *++s);) {
if ((how & GETKEY_UPDATE_OFFSET) &&
s - sstart > *misc)
(*misc)++;
}
return t;
}
*t = '\0';
*len = t - buf;
return buf;
} }
count = iconv(cd, &inptr, &inbytes, &t, &outbytes); count = iconv(cd, &inptr, &inbytes, &t, &outbytes);
iconv_close(cd); iconv_close(cd);
if (count == (size_t)-1) { if (count == (size_t)-1) {
zerr("character not in range"); zerr("character not in range");
*t = '\0'; CHARSET_FAILED();
*len = t - buf;
return buf;
} }
if ((how & GETKEY_UPDATE_OFFSET) && s - sstart > *misc) if ((how & GETKEY_UPDATE_OFFSET) && s - sstart < *misc)
(*misc) += count; (*misc) += count;
continue;
# else # else
zerr("cannot do charset conversion"); zerr("cannot do charset conversion");
*t = '\0'; CHARSET_FAILED();
*len = t - buf;
return buf;
# endif # endif
} }
# else # else
zerr("cannot do charset conversion"); zerr("cannot do charset conversion");
*t = '\0'; CHARSET_FAILED();
*len = t - buf;
return buf;
# endif # endif
# endif # endif
if (how & GETKEY_DOLLAR_QUOTE) {
char *t2;
for (t2 = tbuf; t2 < t; t2++) {
if (imeta(*t2)) {
*tdest++ = Meta;
*tdest++ = *t2 ^ 32;
} else
*tdest++ = *t2;
}
/* reset temporary buffer after handling */
t = tbuf;
}
continue;
case '\'':
case '\\':
if (how & GETKEY_DOLLAR_QUOTE) {
/*
* Usually \' and \\ will have the initial
* \ turned into a Bnull, however that's not
* necessarily the case when called from
* completion.
*/
*t++ = *s;
break;
}
/* FALLTHROUGH */
default: default:
def: def:
/* HERE: GETKEY_UPDATE_OFFSET? */
if ((idigit(*s) && *s < '8') || *s == 'x') { if ((idigit(*s) && *s < '8') || *s == 'x') {
if (!(how & GETKEY_OCTAL_ESC)) { if (!(how & GETKEY_OCTAL_ESC)) {
if (*s == '0') if (*s == '0')
@ -4890,7 +4945,7 @@ getkeystring(char *s, int *len, int how, int *misc)
} else { } else {
if (!(how & GETKEY_EMACS) && *s != '\\') { if (!(how & GETKEY_EMACS) && *s != '\\') {
if (miscadded) if (miscadded)
(*misc)--; (*misc)++;
*t++ = '\\'; *t++ = '\\';
} }
*t++ = *s; *t++ = *s;
@ -4961,6 +5016,8 @@ getkeystring(char *s, int *len, int how, int *misc)
*/ */
*tdest++ = *++s; *tdest++ = *++s;
} }
/* reset temporary buffer, now handled */
t = tbuf;
continue; continue;
} else } else
*t++ = *s; *t++ = *s;
@ -4984,13 +5041,17 @@ getkeystring(char *s, int *len, int how, int *misc)
} }
if (how & GETKEY_DOLLAR_QUOTE) { if (how & GETKEY_DOLLAR_QUOTE) {
char *t2; char *t2;
for (t2 = torig; t2 < t; t2++) { for (t2 = tbuf; t2 < t; t2++) {
if (imeta(*t2)) { if (imeta(*t2)) {
*tdest++ = Meta; *tdest++ = Meta;
*tdest++ = *t2 ^ 32; *tdest++ = *t2 ^ 32;
} else } else
*tdest++ = *t2; *tdest++ = *t2;
} }
/*
* Reset use of temporary buffer.
*/
t = tbuf;
} }
if ((how & GETKEY_SINGLE_CHAR) && t != tmp) { if ((how & GETKEY_SINGLE_CHAR) && t != tmp) {
*misc = STOUC(tmp[0]); *misc = STOUC(tmp[0]);

View file

@ -42,3 +42,13 @@
unsetopt rcquotes unsetopt rcquotes
0:Yes RC_QUOTES with single quotes 0:Yes RC_QUOTES with single quotes
>' >'
print '<\u0041>'
printf '%s\n' $'<\u0042>'
print '<\u0043>'
printf '%s\n' $'<\u0044>'
0:\u in both print and printf
><A>
><B>
><C>
><D>

View file

@ -384,3 +384,13 @@
print -r ${(q)foo} print -r ${(q)foo}
0:Backslash-quoting of unprintable/invalid characters uses $'...' 0:Backslash-quoting of unprintable/invalid characters uses $'...'
>X$'\300'Y$'\a'Z$'\177'T >X$'\300'Y$'\a'Z$'\177'T
# This also isn't strictly multibyte and is here to reduce the
# likelihood of a "can't do character set conversion" error.
testfn() { (LC_ALL=C; print $'\u00e9') }
repeat 4 testfn
1:error handling in Unicode quoting
?testfn: character not in range
?testfn: character not in range
?testfn: character not in range
?testfn: character not in range