mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-11-01 18:30:55 +01:00
26047: convert lower levels of completion matching to use
multibyte strings and wide characters
This commit is contained in:
parent
ac38534728
commit
85c513894d
7 changed files with 404 additions and 317 deletions
13
ChangeLog
13
ChangeLog
|
|
@ -1,12 +1,17 @@
|
|||
2008-11-15 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 26047: Src/pattern.c, Src/Zle/comp.h, Src/Zle/compmatch.c,
|
||||
Src/Zle/complete.c, Src/Zle/compmatch.c, Src/Zle/computil.c,
|
||||
Src/Zle/zle_utils.c: convert lower levels of completion
|
||||
matching to user multibyte strings / wide characters.
|
||||
|
||||
* Phil (unposted): README: another typo.
|
||||
|
||||
2008-11-15 Clint Adams <clint@zsh.org>
|
||||
|
||||
* 26046: Functions/TCP/tcp_send: return an error if session's fd is
|
||||
unusable.
|
||||
|
||||
2008-11-15 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* Phil (unposted): README: another typo.
|
||||
|
||||
2008-11-13 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 26042 with some fixes from 26043 (Mikael): README,
|
||||
|
|
|
|||
|
|
@ -190,10 +190,7 @@ struct cpattern {
|
|||
* Note the allocated length may be longer
|
||||
* than the null-terminated string.
|
||||
*/
|
||||
int chr; /* if a single character, it
|
||||
* TODO: eventually should be a
|
||||
* convchar_t.
|
||||
*/
|
||||
convchar_t chr; /* if a single character, it */
|
||||
} u;
|
||||
};
|
||||
|
||||
|
|
@ -201,9 +198,17 @@ struct cpattern {
|
|||
* For now this just handles single-byte characters.
|
||||
* TODO: this will change.
|
||||
*/
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
#define PATMATCHRANGE(r, c, ip, mtp) mb_patmatchrange(r, c, ip, mtp)
|
||||
#define PATMATCHINDEX(r, i, cp, mtp) mb_patmatchindex(r, i, cp, mtp)
|
||||
#define CONVCAST(c) ((wchar_t)(c))
|
||||
#define CHR_INVALID (WEOF)
|
||||
#else
|
||||
#define PATMATCHRANGE(r, c, ip, mtp) patmatchrange(r, c, ip, mtp)
|
||||
#define PATMATCHINDEX(r, i, cp, mtp) patmatchindex(r, i, cp, mtp)
|
||||
#define CONVCAST(c) (c)
|
||||
#define CONVCAST(c) (c)
|
||||
#define CHR_INVALID (-1)
|
||||
#endif
|
||||
|
||||
/* This is a special return value for parse_cmatcher(), *
|
||||
* signalling an error. */
|
||||
|
|
|
|||
|
|
@ -381,11 +381,12 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
|
|||
{
|
||||
Cpattern ret = NULL, r = NULL, n;
|
||||
char *s = *sp;
|
||||
int inchar;
|
||||
int l = 0;
|
||||
convchar_t inchar;
|
||||
int l = 0, inlen;
|
||||
|
||||
*err = 0;
|
||||
|
||||
MB_METACHARINIT();
|
||||
while (*s && (e ? (*s != e) : !inblank(*s))) {
|
||||
n = (Cpattern) hcalloc(sizeof(*n));
|
||||
n->next = NULL;
|
||||
|
|
@ -409,11 +410,12 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
|
|||
if (*s == '\\' && s[1])
|
||||
s++;
|
||||
|
||||
if (*s == Meta)
|
||||
inchar = STOUC(*++s) ^ 32;
|
||||
else
|
||||
inchar = STOUC(*s);
|
||||
s++;
|
||||
inlen = MB_METACHARLENCONV(s, &inchar);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (inchar == WEOF)
|
||||
inchar = (convchar_t)(*s == Meta ? s[1] ^ 32 : *s);
|
||||
#endif
|
||||
s += inlen;
|
||||
n->tp = CPAT_CHAR;
|
||||
n->u.chr = inchar;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1152,11 +1152,10 @@ comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
|
|||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
pattern_match1(Cpattern p, int c, int *mtp)
|
||||
mod_export convchar_t
|
||||
pattern_match1(Cpattern p, convchar_t c, int *mtp)
|
||||
{
|
||||
/* TODO: should become convchar_t */
|
||||
int ind;
|
||||
convchar_t ind;
|
||||
|
||||
*mtp = 0;
|
||||
switch (p->tp) {
|
||||
|
|
@ -1193,29 +1192,31 @@ pattern_match1(Cpattern p, int c, int *mtp)
|
|||
* wind is the index returned by a pattern match on the word pattern,
|
||||
* with type wmtp.
|
||||
* wchr is the word character.
|
||||
* Return -1 if no matching character, else the character.
|
||||
* Return CHR_INVALID if no matching character, else the character.
|
||||
*
|
||||
* Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
|
||||
* pattern also has that type.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
|
||||
mod_export convchar_t
|
||||
pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp,
|
||||
convchar_t wchr)
|
||||
{
|
||||
int lchr, lmtp;
|
||||
convchar_t lchr;
|
||||
int lmtp;
|
||||
|
||||
if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
|
||||
/*
|
||||
* No equivalent. No possible match; give up.
|
||||
*/
|
||||
return -1;
|
||||
return CHR_INVALID;
|
||||
}
|
||||
/*
|
||||
* If we matched an exact character rather than a range
|
||||
* type, return it.
|
||||
*/
|
||||
if (lchr != -1)
|
||||
if (lchr != CHR_INVALID)
|
||||
return lchr;
|
||||
|
||||
/*
|
||||
|
|
@ -1223,9 +1224,9 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
|
|||
* version of the word character.
|
||||
*/
|
||||
if (wmtp == PP_UPPER && lmtp == PP_LOWER)
|
||||
return tulower(wchr);
|
||||
return ZC_tolower(wchr);
|
||||
else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
|
||||
return tuupper(wchr);
|
||||
return ZC_toupper(wchr);
|
||||
else if (wmtp == lmtp) {
|
||||
/*
|
||||
* Be lenient and allow identical replacements
|
||||
|
|
@ -1238,25 +1239,21 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
|
|||
/*
|
||||
* Non-matching generic types; this can't work.
|
||||
*/
|
||||
return -1;
|
||||
return CHR_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the given pattern matches the given string.
|
||||
* p and s are either anchor or line pattern and string;
|
||||
* wp and ws are word (candidate) pattern and string
|
||||
* p is either an anchor or line pattern and string;
|
||||
* wp and wsc are word (candidate) pattern and string
|
||||
*
|
||||
* If only one pattern is given, we just check if characters match.
|
||||
* If both line and word are given, we check that characters match
|
||||
* for {...} classes by comparing positions in the strings.
|
||||
* Check that characters match for {...} classes by comparing positions in the
|
||||
* strings.
|
||||
*
|
||||
* Patterns and strings are always passed in pairs, so it is enough
|
||||
* to check for non-NULL wp. p should always be present.
|
||||
*
|
||||
* If prestrict is not NULL, it is a chain of patterns at least as long
|
||||
* prestrict is a chain of patterns at least as long
|
||||
* as the line string. In this case we are still assembling the line at
|
||||
* s (which has been allocated but doesn't yet contain anything useful)
|
||||
* newline (which has been allocated but doesn't yet contain anything useful)
|
||||
* and must continue to do so as we go along; prestrict gives
|
||||
* restrictions on the line character to be applied along side the other
|
||||
* patterns. In the simple case a restriction is a character to be put
|
||||
|
|
@ -1264,27 +1261,22 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
|
|||
* deduce an actual matching character. Note prestrict is never an
|
||||
* equivalence class. In extreme cases we can't deduce a unique
|
||||
* character; then the match fails.
|
||||
*
|
||||
* If prestrict is not NULL, s will be NULL.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
||||
Cpattern prestrict)
|
||||
static int
|
||||
pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen,
|
||||
Cpattern prestrict, ZLE_STRING_T newline)
|
||||
{
|
||||
int c, ind;
|
||||
int wc, wind;
|
||||
int len = 0, wlen, mt, wmt;
|
||||
convchar_t c;
|
||||
convchar_t ind, wind;
|
||||
int mt, wmt;
|
||||
|
||||
while (p && wp && (prestrict || *s) && *ws) {
|
||||
while (p && wp && wsclen && prestrict) {
|
||||
/* First test the word character */
|
||||
if (*ws == Meta) {
|
||||
wc = STOUC(ws[1]) ^ 32;
|
||||
wlen = 2;
|
||||
} else {
|
||||
wc = STOUC(*ws);
|
||||
wlen = 1;
|
||||
}
|
||||
wind = pattern_match1(wp, wc, &wmt);
|
||||
wind = pattern_match1(wp, *wsc, &wmt);
|
||||
if (!wind)
|
||||
return 0;
|
||||
|
||||
|
|
@ -1292,55 +1284,45 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
|||
* Now the line character; deal with the case where
|
||||
* we don't yet have it, only a restriction on it.
|
||||
*/
|
||||
if (prestrict) {
|
||||
if (prestrict->tp == CPAT_CHAR) {
|
||||
/*
|
||||
* Easy case: restricted to an exact character on
|
||||
* the line. Procede as normal.
|
||||
*/
|
||||
c = prestrict->u.chr;
|
||||
} else {
|
||||
if (p->tp == CPAT_CHAR) {
|
||||
/*
|
||||
* Normal line pattern is an exact character: as
|
||||
* long as this matches prestrict, we can proceed
|
||||
* as usual.
|
||||
*/
|
||||
c = p->u.chr;
|
||||
} else if (p->tp == CPAT_EQUIV) {
|
||||
/*
|
||||
* An equivalence, so we can deduce the character
|
||||
* backwards from the word pattern and see if it
|
||||
* matches prestrict.
|
||||
*/
|
||||
if ((c = pattern_match_equivalence(p, wind, wmt, wc)) == -1)
|
||||
return 0;
|
||||
} else {
|
||||
/*
|
||||
* Not an equivalence, so that means we must match
|
||||
* the word (not just the word pattern), so grab it
|
||||
* and make sure it fulfills our needs. I think.
|
||||
* Not 100% sure about that, but what else can
|
||||
* we do? We haven't actually been passed a string
|
||||
* from the command line.
|
||||
*/
|
||||
c = wc;
|
||||
}
|
||||
/* Character so deduced must match the restriction. */
|
||||
if (!pattern_match1(prestrict, c, &mt))
|
||||
return 0;
|
||||
}
|
||||
len = imeta(c) ? 2 : 1;
|
||||
if (prestrict->tp == CPAT_CHAR) {
|
||||
/*
|
||||
* Easy case: restricted to an exact character on
|
||||
* the line. Procede as normal.
|
||||
*/
|
||||
c = prestrict->u.chr;
|
||||
} else {
|
||||
/* We have the character itself. */
|
||||
if (*s == Meta) {
|
||||
c = STOUC(s[1]) ^ 32;
|
||||
len = 2;
|
||||
if (p->tp == CPAT_CHAR) {
|
||||
/*
|
||||
* Normal line pattern is an exact character: as
|
||||
* long as this matches prestrict, we can proceed
|
||||
* as usual.
|
||||
*/
|
||||
c = p->u.chr;
|
||||
} else if (p->tp == CPAT_EQUIV) {
|
||||
/*
|
||||
* An equivalence, so we can deduce the character
|
||||
* backwards from the word pattern and see if it
|
||||
* matches prestrict.
|
||||
*/
|
||||
if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) ==
|
||||
CHR_INVALID)
|
||||
return 0;
|
||||
} else {
|
||||
c = STOUC(*s);
|
||||
len = 1;
|
||||
/*
|
||||
* Not an equivalence, so that means we must match
|
||||
* the word (not just the word pattern), so grab it
|
||||
* and make sure it fulfills our needs. I think.
|
||||
* Not 100% sure about that, but what else can
|
||||
* we do? We haven't actually been passed a string
|
||||
* from the command line.
|
||||
*/
|
||||
c = *wsc;
|
||||
}
|
||||
/* Character so deduced must match the restriction. */
|
||||
if (!pattern_match1(prestrict, c, &mt))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If either is "?", they match each other; no further tests.
|
||||
* Apply this even if the character wasn't convertable;
|
||||
|
|
@ -1364,7 +1346,7 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
|||
*/
|
||||
if ((mt == PP_LOWER || mt == PP_UPPER) &&
|
||||
(wmt == PP_LOWER || wmt == PP_UPPER)) {
|
||||
if (tulower(c) != tulower(wc))
|
||||
if (ZC_tolower(c) != ZC_tolower(*wsc))
|
||||
return 0;
|
||||
} else {
|
||||
/* Other different classes can't match. */
|
||||
|
|
@ -1373,71 +1355,46 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
|||
}
|
||||
}
|
||||
|
||||
if (prestrict) {
|
||||
/* We need to assemble the line */
|
||||
if (imeta(c)) {
|
||||
*s++ = Meta;
|
||||
*s++ = c ^ 32;
|
||||
} else {
|
||||
*s++ = c;
|
||||
}
|
||||
prestrict = prestrict->next;
|
||||
} else
|
||||
s += len;
|
||||
ws += wlen;
|
||||
/* We need to assemble the line */
|
||||
*newline++ = (ZLE_CHAR_T)c;
|
||||
prestrict = prestrict->next;
|
||||
wsc++;
|
||||
wsclen--;
|
||||
p = p->next;
|
||||
wp = wp->next;
|
||||
}
|
||||
|
||||
while (p && (prestrict || *s)) {
|
||||
if (prestrict) {
|
||||
/*
|
||||
* As above, but with even less info to go on.
|
||||
* (Can this happen?) At least handle the cases where
|
||||
* one of our patterns has given us a specific character.
|
||||
*/
|
||||
if (prestrict->tp == CPAT_CHAR) {
|
||||
c = prestrict->u.chr;
|
||||
} else {
|
||||
if (p->tp == CPAT_CHAR) {
|
||||
c = p->u.chr;
|
||||
} else {
|
||||
/*
|
||||
* OK. Here we are in a function with just a line
|
||||
* pattern and another pattern to restrict the
|
||||
* characters that can go on the line, and no actual
|
||||
* characters. We're matching two patterns against
|
||||
* one another to generate a character to insert.
|
||||
* This is a bit too psychedelic, so I'm going to
|
||||
* bale out now. See you on the ground.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
if (!pattern_match1(prestrict, c, &mt))
|
||||
return 0;
|
||||
}
|
||||
while (p && prestrict) {
|
||||
/*
|
||||
* As above, but with even less info to go on.
|
||||
* (Can this happen?) At least handle the cases where
|
||||
* one of our patterns has given us a specific character.
|
||||
*/
|
||||
if (prestrict->tp == CPAT_CHAR) {
|
||||
c = prestrict->u.chr;
|
||||
} else {
|
||||
if (*s == Meta) {
|
||||
c = STOUC(s[1]) ^ 32;
|
||||
len = 2;
|
||||
if (p->tp == CPAT_CHAR) {
|
||||
c = p->u.chr;
|
||||
} else {
|
||||
c = STOUC(*s);
|
||||
len = 1;
|
||||
/*
|
||||
* OK. Here we are in a function with just a line
|
||||
* pattern and another pattern to restrict the
|
||||
* characters that can go on the line, and no actual
|
||||
* characters. We're matching two patterns against
|
||||
* one another to generate a character to insert.
|
||||
* This is a bit too psychedelic, so I'm going to
|
||||
* bale out now. See you on the ground.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
if (!pattern_match1(prestrict, c, &mt))
|
||||
return 0;
|
||||
}
|
||||
if (!pattern_match1(p, c, &mt))
|
||||
return 0;
|
||||
p = p->next;
|
||||
if (prestrict) {
|
||||
if (imeta(c)) {
|
||||
*s++ = Meta;
|
||||
*s++ = c ^ 32;
|
||||
} else {
|
||||
*s++ = c;
|
||||
}
|
||||
prestrict = prestrict->next;
|
||||
} else
|
||||
s += len;
|
||||
*newline++ = (ZLE_CHAR_T)c;
|
||||
prestrict = prestrict->next;
|
||||
}
|
||||
|
||||
if (prestrict) {
|
||||
|
|
@ -1445,8 +1402,53 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
|||
return 0;
|
||||
}
|
||||
|
||||
while (wp && *ws) {
|
||||
while (wp && wsclen) {
|
||||
/* No funny business when we only have the word pattern. */
|
||||
if (!pattern_match1(wp, *wsc, &wmt))
|
||||
return 0;
|
||||
wp = wp->next;
|
||||
wsc++;
|
||||
wsclen--;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The usual version of pattern matching, without the line string
|
||||
* being handled by restriction.
|
||||
*
|
||||
* Check if the given pattern matches the given string.
|
||||
* p and s are either anchor or line pattern and string;
|
||||
* wp and ws are word (candidate) pattern and string
|
||||
*
|
||||
* If only one pattern is given, we just check if characters match.
|
||||
* If both line and word are given, we check that characters match
|
||||
* for {...} classes by comparing positions in the strings.
|
||||
*
|
||||
* Patterns and strings are always passed in pairs, so it is enough
|
||||
* to check for non-NULL wp. p should always be present.
|
||||
*/
|
||||
/**/
|
||||
mod_export int
|
||||
pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
|
||||
{
|
||||
convchar_t c, wc;
|
||||
convchar_t ind, wind;
|
||||
int len = 0, wlen, mt, wmt;
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
mbstate_t lstate, wstate;
|
||||
|
||||
memset(&lstate, 0, sizeof(lstate));
|
||||
memset(&wstate, 0, sizeof(wstate));
|
||||
#endif
|
||||
|
||||
while (p && wp && *s && *ws) {
|
||||
/* First test the word character */
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
|
||||
#else
|
||||
if (*ws == Meta) {
|
||||
wc = STOUC(ws[1]) ^ 32;
|
||||
wlen = 2;
|
||||
|
|
@ -1454,6 +1456,94 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
|||
wc = STOUC(*ws);
|
||||
wlen = 1;
|
||||
}
|
||||
#endif
|
||||
wind = pattern_match1(wp, wc, &wmt);
|
||||
if (!wind)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Now the line character.
|
||||
*/
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
len = mb_metacharlenconv_r(s, &c, &lstate);
|
||||
#else
|
||||
/* We have the character itself. */
|
||||
if (*s == Meta) {
|
||||
c = STOUC(s[1]) ^ 32;
|
||||
len = 2;
|
||||
} else {
|
||||
c = STOUC(*s);
|
||||
len = 1;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* If either is "?", they match each other; no further tests.
|
||||
* Apply this even if the character wasn't convertable;
|
||||
* there's no point trying to be clever in that case.
|
||||
*/
|
||||
if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
|
||||
{
|
||||
ind = pattern_match1(p, c, &mt);
|
||||
if (!ind)
|
||||
return 0;
|
||||
if (ind != wind)
|
||||
return 0;
|
||||
if (mt != wmt) {
|
||||
/*
|
||||
* Special case if matching lower vs. upper or
|
||||
* vice versa. The transformed characters must match.
|
||||
* We don't need to check the transformation is
|
||||
* the appropriate one for each character separately,
|
||||
* since that was done in pattern_match1(), so just
|
||||
* compare lower-cased versions of both.
|
||||
*/
|
||||
if ((mt == PP_LOWER || mt == PP_UPPER) &&
|
||||
(wmt == PP_LOWER || wmt == PP_UPPER)) {
|
||||
if (ZC_tolower(c) != ZC_tolower(wc))
|
||||
return 0;
|
||||
} else {
|
||||
/* Other different classes can't match. */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s += len;
|
||||
ws += wlen;
|
||||
p = p->next;
|
||||
wp = wp->next;
|
||||
}
|
||||
|
||||
while (p && *s) {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
len = mb_metacharlenconv_r(s, &c, &lstate);
|
||||
#else
|
||||
if (*s == Meta) {
|
||||
c = STOUC(s[1]) ^ 32;
|
||||
len = 2;
|
||||
} else {
|
||||
c = STOUC(*s);
|
||||
len = 1;
|
||||
}
|
||||
#endif
|
||||
if (!pattern_match1(p, c, &mt))
|
||||
return 0;
|
||||
p = p->next;
|
||||
s += len;
|
||||
}
|
||||
|
||||
while (wp && *ws) {
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
|
||||
#else
|
||||
if (*ws == Meta) {
|
||||
wc = STOUC(ws[1]) ^ 32;
|
||||
wlen = 2;
|
||||
} else {
|
||||
wc = STOUC(*ws);
|
||||
wlen = 1;
|
||||
}
|
||||
#endif
|
||||
if (!pattern_match1(wp, wc, &wmt))
|
||||
return 0;
|
||||
wp = wp->next;
|
||||
|
|
@ -1463,16 +1553,6 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The usual version of pattern matching, without the line string
|
||||
* being handled by restriction.
|
||||
*/
|
||||
/**/
|
||||
mod_export int
|
||||
pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
|
||||
{
|
||||
return pattern_match_restrict(p, s, wp, ws, NULL);
|
||||
}
|
||||
|
||||
/* This splits the given string into a list of cline structs, separated
|
||||
* at those places where one of the anchors of an `*' pattern was found.
|
||||
|
|
@ -1575,30 +1655,45 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
|
|||
* buffer line. Then we test if this line matches the string given by
|
||||
* wlen and word.
|
||||
*
|
||||
* wpat contains pattern that matched previously
|
||||
* lpat contains the pattern for line we build
|
||||
* The matcher ) wpat, containing pattern that matched previously
|
||||
* mp gives ) lpat, containing the pattern for line we build
|
||||
* line is the line we are assembling; it is initially empty
|
||||
* mword is a string that matched wpat before
|
||||
* word is string that we try to match now
|
||||
*
|
||||
* The return value is the length of the string matched in the word, it
|
||||
* is zero if we couldn't build a line that matches the word.
|
||||
*
|
||||
* TODO: a lot of the nastiness associated with variable string
|
||||
* lengths can go when we switch to wide characters. (Why didn't
|
||||
* I just keep line unmetafied and metafy into place at the end? Er...)
|
||||
*/
|
||||
|
||||
/**/
|
||||
static int
|
||||
bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
||||
bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word,
|
||||
int wlen, int sfx)
|
||||
{
|
||||
Cpattern lpat = mp->line;
|
||||
Cpattern wpat = mp->word;
|
||||
Cpattern curgenpat;
|
||||
VARARR(struct cpattern, genpatarr, mp->llen);
|
||||
Cmlist ms;
|
||||
int llen, rl;
|
||||
char *oword = word, *line = *linep;
|
||||
int llen, rl, l;
|
||||
convchar_t convchr, *wordcp;
|
||||
VARARR(convchar_t, wordchars, wlen);
|
||||
VARARR(struct cpattern, genpatarr, mp->llen);
|
||||
|
||||
/*
|
||||
* We may need to start the "word" array from the end. This
|
||||
* is much easier if we convert it to an array of (possibly wide)
|
||||
* characters.
|
||||
*/
|
||||
MB_METACHARINIT();
|
||||
for (l = wlen, wordcp = wordchars; l; l--) {
|
||||
int charlen = MB_METACHARLENCONV(word, &convchr);
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (convchr == WEOF)
|
||||
convchr = (*word == Meta) ? word[1] ^ 32 : *word;
|
||||
#endif
|
||||
*wordcp++ = convchr;
|
||||
word += charlen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over all characters. At this stage, line is an empty
|
||||
|
|
@ -1616,9 +1711,10 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
* when we finally match the line against the set of matchers.
|
||||
*/
|
||||
curgenpat = genpatarr;
|
||||
MB_METACHARINIT();
|
||||
while (lpat) {
|
||||
int wchr = (*mword == Meta) ? STOUC(mword[1]) ^ 32 : STOUC(*mword);
|
||||
int wmtp, wind;
|
||||
convchar_t wchr, wind;
|
||||
int wmtp, mwordlen;
|
||||
/*
|
||||
* If the line pattern is an equivalence, query wpat to find the
|
||||
* word part of the equivalence. If we don't find one we don't try
|
||||
|
|
@ -1628,9 +1724,10 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
* the behaviour of the old logic that this replaces.)
|
||||
*/
|
||||
if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
|
||||
mwordlen = MB_METACHARLENCONV(mword, &wchr);
|
||||
wind = pattern_match1(wpat, wchr, &wmtp);
|
||||
wpat = wpat->next;
|
||||
mword += (*mword == Meta) ? 2 : 1;
|
||||
mword += mwordlen;
|
||||
} else
|
||||
wind = 0;
|
||||
if (wind) {
|
||||
|
|
@ -1638,9 +1735,9 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
* Successful match for word side of equivalence.
|
||||
* Find the line equivalent.
|
||||
*/
|
||||
int lchr;
|
||||
convchar_t lchr;
|
||||
if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
|
||||
== -1) {
|
||||
== CHR_INVALID) {
|
||||
/*
|
||||
* No equivalent. No possible match; give up.
|
||||
*/
|
||||
|
|
@ -1694,50 +1791,40 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
llen = mp->llen;
|
||||
rl = 0;
|
||||
|
||||
*line = '\0';
|
||||
if (sfx)
|
||||
{
|
||||
/*
|
||||
* We need to work backwards from the end of both the
|
||||
* word and the line strings.
|
||||
*
|
||||
* Position at the end of the word by counting characters.
|
||||
*/
|
||||
int l = wlen;
|
||||
while (l--)
|
||||
word += (*word == Meta) ? 2 : 1;
|
||||
wordcp = wordchars + wlen;
|
||||
|
||||
/*
|
||||
* We construct the line from the end. We've left
|
||||
* enough space for possible Meta's.
|
||||
* We construct the line from the end.
|
||||
*/
|
||||
line += 2 * llen;
|
||||
*line = '\0';
|
||||
line += llen;
|
||||
curgenpat = genpatarr + llen;
|
||||
} else
|
||||
} else {
|
||||
wordcp = wordchars;
|
||||
curgenpat = genpatarr;
|
||||
}
|
||||
|
||||
/* we now reuse mp, lpat, wpat for the global matchers */
|
||||
MB_METACHARINIT();
|
||||
while (llen && wlen) {
|
||||
int wchr, wmtp;
|
||||
char *wp;
|
||||
convchar_t wchr;
|
||||
int wmtp;
|
||||
convchar_t *wp;
|
||||
Cpattern tmpgenpat;
|
||||
|
||||
if (sfx) {
|
||||
if (word > oword + 1 && word[-2] == Meta)
|
||||
wp = word - 2;
|
||||
else
|
||||
wp = word - 1;
|
||||
wp = wordcp - 1;
|
||||
curgenpat--;
|
||||
} else
|
||||
wp = word;
|
||||
if (*wp == Meta)
|
||||
wchr = STOUC(wp[1]) ^ 32;
|
||||
else
|
||||
wchr = STOUC(*wp);
|
||||
if (pattern_match1(curgenpat, wchr, &wmtp))
|
||||
wp = wordcp;
|
||||
if (pattern_match1(curgenpat, *wp, &wmtp))
|
||||
{
|
||||
int lchr;
|
||||
convchar_t lchr;
|
||||
/*
|
||||
* We can match the line character directly with the word
|
||||
* character. If the line character is a fixed one,
|
||||
|
|
@ -1749,36 +1836,27 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
lchr = curgenpat->u.chr;
|
||||
else
|
||||
lchr = wchr;
|
||||
if (imeta(lchr)) {
|
||||
if (sfx)
|
||||
line -= 2;
|
||||
line[0] = Meta;
|
||||
line[1] = lchr ^ 32;
|
||||
if (!sfx)
|
||||
line += 2;
|
||||
} else {
|
||||
if (sfx)
|
||||
line--;
|
||||
line[0] = lchr;
|
||||
if (!sfx)
|
||||
line++;
|
||||
}
|
||||
|
||||
if (sfx)
|
||||
*--line = lchr;
|
||||
else
|
||||
*line++ = lchr;
|
||||
|
||||
llen--;
|
||||
wlen--;
|
||||
rl++;
|
||||
|
||||
if (sfx)
|
||||
word = wp;
|
||||
wordcp = wp;
|
||||
else {
|
||||
if (llen)
|
||||
curgenpat++;
|
||||
word += (*word == Meta) ? 2 : 1;
|
||||
wordcp++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
char *lp;
|
||||
ZLE_CHAR_T *lp;
|
||||
/*
|
||||
* Need to loop over pattern matchers.
|
||||
*/
|
||||
|
|
@ -1794,66 +1872,31 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
if (mp && !mp->flags && mp->wlen <= wlen &&
|
||||
mp->llen <= llen)
|
||||
{
|
||||
if (sfx) {
|
||||
/*
|
||||
* We haven't assembled the line yet, and with
|
||||
* Meta characters we don't yet know the length.
|
||||
* We'll fix this up later.
|
||||
*/
|
||||
lp = line - 2 * mp->llen;
|
||||
} else
|
||||
lp = line;
|
||||
wp = word;
|
||||
if (sfx) {
|
||||
int l = mp->wlen;
|
||||
while (l--) {
|
||||
if (wp > oword + 1 && wp[-2] == Meta)
|
||||
wp -= 2;
|
||||
else
|
||||
wp--;
|
||||
}
|
||||
lp = line;
|
||||
wp = wordcp;
|
||||
tmpgenpat = curgenpat;
|
||||
|
||||
tmpgenpat = curgenpat - mp->llen;
|
||||
} else
|
||||
tmpgenpat = curgenpat;
|
||||
if (pattern_match_restrict(mp->line, lp,
|
||||
mp->word, wp, tmpgenpat)) {
|
||||
if (sfx) {
|
||||
lp -= mp->llen;
|
||||
wp -= mp->wlen;
|
||||
tmpgenpat -= mp->llen;
|
||||
}
|
||||
|
||||
if (pattern_match_restrict(mp->line, mp->word, wp,
|
||||
wlen - (wp - wordchars),
|
||||
tmpgenpat, lp)) {
|
||||
/*
|
||||
* Matched: advance over as many characters
|
||||
* of the patterns and strings as
|
||||
* we've done matches.
|
||||
*/
|
||||
if (sfx) {
|
||||
int imove = mp->llen, nchar;
|
||||
char *pmove = lp;
|
||||
word = wp;
|
||||
|
||||
/* Close the gap we left in the line string */
|
||||
while (imove--)
|
||||
pmove += (*pmove == Meta) ? 2 : 1;
|
||||
/* Number of bytes to move */
|
||||
nchar = (int)(pmove - lp);
|
||||
/* The size of the gap */
|
||||
imove = 2 * mp->llen - nchar;
|
||||
if (imove) {
|
||||
lp = line - imove;
|
||||
/* Moving up, so start at the top */
|
||||
while (nchar--)
|
||||
*--line = *--lp;
|
||||
/* line is at the start of the moved text */
|
||||
}
|
||||
|
||||
line = lp;
|
||||
wordcp = wp;
|
||||
curgenpat = tmpgenpat;
|
||||
} else {
|
||||
int cnt = mp->llen;
|
||||
while (cnt--) {
|
||||
line += (*line == Meta) ? 2 : 1;
|
||||
}
|
||||
|
||||
cnt = mp->wlen;
|
||||
while (cnt--)
|
||||
word += (*word == Meta) ? 2 : 1;
|
||||
|
||||
line += mp->llen;
|
||||
wordcp += mp->wlen;
|
||||
curgenpat += mp->llen;
|
||||
}
|
||||
llen -= mp->llen;
|
||||
|
|
@ -1869,10 +1912,6 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
|
|||
}
|
||||
if (!llen) {
|
||||
/* Unmatched portion in the line built, return matched length. */
|
||||
if (sfx)
|
||||
*linep = line;
|
||||
else
|
||||
*line = '\0';
|
||||
return rl;
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -1891,7 +1930,14 @@ join_strs(int la, char *sa, int lb, char *sb)
|
|||
|
||||
Cmlist ms;
|
||||
Cmatcher mp;
|
||||
int t, bl, rr = rl;
|
||||
int t, bl;
|
||||
/** rr is the remaining length already allocated in rs */
|
||||
int rr = rl;
|
||||
/*
|
||||
* convlen is the length we need for the string converted to
|
||||
* char * (possibly multibyte).
|
||||
*/
|
||||
int convlen;
|
||||
char *rp = rs;
|
||||
|
||||
while (la && lb) {
|
||||
|
|
@ -1906,35 +1952,49 @@ join_strs(int la, char *sa, int lb, char *sb)
|
|||
if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
|
||||
pattern_match(mp->word, sb, NULL, NULL)) {
|
||||
/* It matched one of the strings, t says which one. */
|
||||
/* TODO: double to allow Meta, not necessary
|
||||
when properly unmetafied */
|
||||
VARARR(char, linearr, 2*mp->llen + 1);
|
||||
char **ap, **bp, *line = linearr;
|
||||
VARARR(ZLE_CHAR_T, line, mp->llen);
|
||||
char **ap, **bp;
|
||||
int *alp, *blp;
|
||||
|
||||
if (t) {
|
||||
ap = &sa; alp = &la;
|
||||
bp = &sb; blp = &lb;
|
||||
ap = &sa;
|
||||
alp = &la;
|
||||
|
||||
bp = &sb;
|
||||
blp = &lb;
|
||||
} else {
|
||||
ap = &sb; alp = &lb;
|
||||
bp = &sa; blp = &la;
|
||||
ap = &sb;
|
||||
alp = &lb;
|
||||
|
||||
bp = &sa;
|
||||
blp = &la;
|
||||
}
|
||||
/* Now try to build a string that matches the other
|
||||
* string. */
|
||||
if ((bl = bld_line(mp, &line, *ap, *bp, *blp, 0))) {
|
||||
if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) {
|
||||
/* Found one, put it into the return string. */
|
||||
if (rr <= mp->llen) {
|
||||
char *convstr =
|
||||
zlelineasstring(line, mp->llen, 0, &convlen,
|
||||
NULL, 0);
|
||||
if (rr <= convlen) {
|
||||
char *or = rs;
|
||||
int alloclen = (convlen > 20) ? convlen : 20;
|
||||
|
||||
rs = realloc(rs, (rl += 20));
|
||||
rr += 20;
|
||||
rs = realloc(rs, (rl += alloclen));
|
||||
rr += alloclen;
|
||||
rp += rs - or;
|
||||
}
|
||||
memcpy(rp, line, mp->llen);
|
||||
rp += mp->llen; rr -= mp->llen;
|
||||
*ap += mp->wlen; *alp -= mp->wlen;
|
||||
*bp += bl; *blp -= bl;
|
||||
memcpy(rp, convstr, convlen);
|
||||
rp += convlen;
|
||||
rr -= convlen;
|
||||
/* HERE: multibyte chars */
|
||||
*ap += mp->wlen;
|
||||
*alp -= mp->wlen;
|
||||
|
||||
*bp += bl;
|
||||
*blp -= bl;
|
||||
t = 1;
|
||||
free(convstr);
|
||||
} else
|
||||
t = 0;
|
||||
}
|
||||
|
|
@ -1944,16 +2004,20 @@ join_strs(int la, char *sa, int lb, char *sb)
|
|||
break;
|
||||
} else {
|
||||
/* Same character, just take it. */
|
||||
if (rr <= 1) {
|
||||
if (rr <= 1 /* HERE charlen */) {
|
||||
char *or = rs;
|
||||
|
||||
rs = realloc(rs, (rl += 20));
|
||||
rr += 20;
|
||||
rp += rs - or;
|
||||
}
|
||||
*rp++ = *sa; rr--;
|
||||
sa++; sb++;
|
||||
la--; lb--;
|
||||
/* HERE: multibyte char */
|
||||
*rp++ = *sa;
|
||||
rr--;
|
||||
sa++;
|
||||
sb++;
|
||||
la--;
|
||||
lb--;
|
||||
}
|
||||
}
|
||||
if (la || lb)
|
||||
|
|
@ -2035,9 +2099,11 @@ check_cmdata(Cmdata md, int sfx)
|
|||
} else {
|
||||
md->line = 0;
|
||||
md->len = md->olen = md->cl->wlen;
|
||||
/* HERE: multibyte */
|
||||
if ((md->str = md->cl->word) && sfx)
|
||||
md->str += md->len;
|
||||
md->alen = md->cl->llen;
|
||||
/* HERE: multibyte */
|
||||
if ((md->astr = md->cl->line) && sfx)
|
||||
md->astr += md->alen;
|
||||
}
|
||||
|
|
@ -2060,9 +2126,11 @@ undo_cmdata(Cmdata md, int sfx)
|
|||
r->wlen = 0;
|
||||
r->flags |= CLF_LINE;
|
||||
r->llen = md->len;
|
||||
/* HERE: multibyte */
|
||||
r->line = md->str - (sfx ? md->len : 0);
|
||||
} else if (md->len != md->olen) {
|
||||
r->wlen = md->len;
|
||||
/* HERE: multibyte */
|
||||
r->word = md->str - (sfx ? md->len : 0);
|
||||
DPUTS(r->wlen > 0 && !*r->word, "Bad word");
|
||||
}
|
||||
|
|
@ -2116,24 +2184,24 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
|
|||
NULL, NULL)) ||
|
||||
pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
|
||||
NULL, NULL))) {
|
||||
/* TODO: doubled to allow Meta, not necessary
|
||||
* when properly unmetafied */
|
||||
VARARR(char, linearr, 2*mp->llen + 1);
|
||||
VARARR(ZLE_CHAR_T, line, mp->llen);
|
||||
int bl;
|
||||
char *mw, *line = linearr;
|
||||
char *mw;
|
||||
|
||||
/* Then build all the possible lines and see
|
||||
* if one of them matches the other string. */
|
||||
/* HERE: they're multibyte */
|
||||
if (t)
|
||||
mw = ow - (sfx ? mp->wlen : 0);
|
||||
else
|
||||
mw = nw - (sfx ? mp->wlen : 0);
|
||||
|
||||
if ((bl = bld_line(mp, &line, mw, (t ? nw : ow),
|
||||
if ((bl = bld_line(mp, line, mw, (t ? nw : ow),
|
||||
(t ? nl : ol), sfx))) {
|
||||
/* Yep, one of the lines matched the other
|
||||
* string. */
|
||||
|
||||
/* HERE: multibyte characters */
|
||||
if (t) {
|
||||
ol = mp->wlen; nl = bl;
|
||||
} else {
|
||||
|
|
@ -2146,8 +2214,10 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
|
|||
md->len -= nl;
|
||||
*mlen = ol;
|
||||
|
||||
return get_cline(NULL, 0, dupstring(line), mp->llen,
|
||||
NULL, 0, CLF_JOIN);
|
||||
return get_cline(NULL, 0,
|
||||
zlelineasstring(line, mp->llen,
|
||||
0, NULL, NULL, 1),
|
||||
mp->llen, NULL, 0, CLF_JOIN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4062,7 +4062,7 @@ cfp_matcher_range(Cmatcher *ms, char *add)
|
|||
len += addlen + 1;
|
||||
} else {
|
||||
/* The usual set of matcher possibilities. */
|
||||
int ind;
|
||||
convchar_t ind;
|
||||
if (m->line->tp == CPAT_EQUIV &&
|
||||
m->word->tp == CPAT_EQUIV) {
|
||||
/*
|
||||
|
|
@ -4086,7 +4086,7 @@ cfp_matcher_range(Cmatcher *ms, char *add)
|
|||
* word pattern.
|
||||
*/
|
||||
if ((ind = pattern_match_equivalence
|
||||
(m->word, ind, mt, addc)) != -1) {
|
||||
(m->word, ind, mt, addc)) != CHR_INVALID) {
|
||||
if (ret) {
|
||||
if (imeta(ind)) {
|
||||
*p++ = Meta;
|
||||
|
|
|
|||
|
|
@ -167,9 +167,10 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf)
|
|||
* instead of wide characters where appropriate and with the contents
|
||||
* metafied.
|
||||
*
|
||||
* If outll is non-NULL, assign the new length. If outcs is non-NULL,
|
||||
* assign the new character position. This is the conventional string
|
||||
* length, without the NULL byte.
|
||||
* If outllp is non-NULL, assign the new length. This is the conventional
|
||||
* string length, without the NULL byte.
|
||||
*
|
||||
* If outcsp is non-NULL, assign the new character position.
|
||||
*
|
||||
* If useheap is 1, memory is returned from the heap, else is allocated
|
||||
* for later freeing.
|
||||
|
|
|
|||
|
|
@ -3344,7 +3344,6 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
|
|||
}
|
||||
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* This is effectively the reverse of mb_patmatchrange().
|
||||
* Given a range descriptor of the same form, and an index into it,
|
||||
|
|
@ -3353,11 +3352,6 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
|
|||
* return the type in mtp instead. Return 1 if successful, 0 if
|
||||
* there was no corresponding index. Note all pointer arguments
|
||||
* must be non-null.
|
||||
*
|
||||
* TODO: for now the completion matching code does not handle
|
||||
* multibyte. When it does, we will need either this, or
|
||||
* patmatchindex(), but not both---unlike user-initiated pattern
|
||||
* matching, multibyte mode in the line editor is always on when available.
|
||||
*/
|
||||
|
||||
/**/
|
||||
|
|
@ -3438,10 +3432,9 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
|
|||
/* No corresponding index. */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**/
|
||||
#endif
|
||||
#endif /* MULTIBYTE_SUPPORT */
|
||||
|
||||
/*
|
||||
* Identical function to mb_patmatchrange() above for single-byte
|
||||
|
|
@ -3572,9 +3565,17 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**/
|
||||
#ifndef MULTIBYTE_SUPPORT
|
||||
|
||||
/*
|
||||
* Identical function to mb_patmatchindex() above for single-byte
|
||||
* characters. Here -1 represents a character that needs a special type.
|
||||
*
|
||||
* Unlike patmatchrange, we only need this in ZLE, which always
|
||||
* uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
|
||||
* this function in that case.
|
||||
*/
|
||||
|
||||
/**/
|
||||
|
|
@ -3658,6 +3659,9 @@ patmatchindex(char *range, int ind, int *chr, int *mtp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
#endif /* MULTIBYTE_SUPPORT */
|
||||
|
||||
/*
|
||||
* Repeatedly match something simple and say how many times.
|
||||
* charstart is an array parallel to that starting at patinput
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue