26047: convert lower levels of completion matching to use

multibyte strings and wide characters
2025-11-01 18:30:55 +01:00 · 2008-11-15 21:27:45 +00:00 · 2008-11-15 21:27:45 +00:00 · 85c513894d
commit 85c513894d
parent ac38534728
7 changed files with 404 additions and 317 deletions
--- a/13
+++ b/13
@ -1,12 +1,17 @@
+2008-11-15  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 26047: Src/pattern.c, Src/Zle/comp.h, Src/Zle/compmatch.c,
+	Src/Zle/complete.c, Src/Zle/compmatch.c, Src/Zle/computil.c,
+	Src/Zle/zle_utils.c:  convert lower levels of completion
+	matching to user multibyte strings / wide characters.
+
+	* Phil (unposted): README: another typo.
+
 2008-11-15  Clint Adams  <clint@zsh.org>

 	* 26046: Functions/TCP/tcp_send: return an error if session's fd is
 	unusable.

-2008-11-15  Peter Stephenson  <p.w.stephenson@ntlworld.com>
-
-	* Phil (unposted): README: another typo.
-
 2008-11-13  Peter Stephenson  <p.w.stephenson@ntlworld.com>

 	* 26042 with some fixes from 26043 (Mikael): README,
--- a/Src/Zle/comp.h
+++ b/Src/Zle/comp.h
@ -190,10 +190,7 @@ struct cpattern {
 				 * Note the allocated length may be longer
 				 * than the null-terminated string.
 				 */
-	int chr;		/* if a single character, it
-				 * TODO: eventually should be a
-				 * convchar_t.
-				 */
+	convchar_t chr;		/* if a single character, it */
    } u;
 };

@ -201,9 +198,17 @@ struct cpattern {
 * For now this just handles single-byte characters.
 * TODO: this will change.
 */
+#ifdef MULTIBYTE_SUPPORT
+#define PATMATCHRANGE(r, c, ip, mtp)	mb_patmatchrange(r, c, ip, mtp)
+#define PATMATCHINDEX(r, i, cp, mtp)	mb_patmatchindex(r, i, cp, mtp)
+#define CONVCAST(c)			((wchar_t)(c))
+#define CHR_INVALID			(WEOF)
+#else
 #define PATMATCHRANGE(r, c, ip, mtp)	patmatchrange(r, c, ip, mtp)
 #define PATMATCHINDEX(r, i, cp, mtp)	patmatchindex(r, i, cp, mtp)
-#define CONVCAST(c)	(c)
+#define CONVCAST(c)			(c)
+#define CHR_INVALID			(-1)
+#endif

 /* This is a special return value for parse_cmatcher(), *
 * signalling an error. */
--- a/Src/Zle/complete.c
+++ b/Src/Zle/complete.c
@ -381,11 +381,12 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
 {
    Cpattern ret = NULL, r = NULL, n;
    char *s = *sp;
-    int inchar;
-    int l = 0;
+    convchar_t inchar;
+    int l = 0, inlen;

    *err = 0;

+    MB_METACHARINIT();
    while (*s && (e ? (*s != e) : !inblank(*s))) {
 	n = (Cpattern) hcalloc(sizeof(*n));
 	n->next = NULL;
@ -409,11 +410,12 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
 	    if (*s == '\\' && s[1])
 		s++;

-	    if (*s == Meta)
-		inchar = STOUC(*++s) ^ 32;
-	    else
-		inchar = STOUC(*s);
-	    s++;
+	    inlen = MB_METACHARLENCONV(s, &inchar);
+#ifdef MULTIBYTE_SUPPORT
+	    if (inchar == WEOF)
+		inchar = (convchar_t)(*s == Meta ? s[1] ^ 32 : *s);
+#endif
+	    s += inlen;
 	    n->tp = CPAT_CHAR;
 	    n->u.chr = inchar;
 	}
--- a/Src/Zle/compmatch.c
+++ b/Src/Zle/compmatch.c
@ -1152,11 +1152,10 @@ comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
 */

 /**/
-mod_export int
-pattern_match1(Cpattern p, int c, int *mtp)
+mod_export convchar_t
+pattern_match1(Cpattern p, convchar_t c, int *mtp)
 {
-    /* TODO: should become convchar_t */
-    int ind;
+    convchar_t ind;

    *mtp = 0;
    switch (p->tp) {
@ -1193,29 +1192,31 @@ pattern_match1(Cpattern p, int c, int *mtp)
 * wind is the index returned by a pattern match on the word pattern,
 * with type wmtp.
 * wchr is the word character.
- * Return -1 if no matching character, else the character.
+ * Return CHR_INVALID if no matching character, else the character.
 *
 * Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
 * pattern also has that type.
 */

 /**/
-mod_export int
-pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
+mod_export convchar_t
+pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp,
+			  convchar_t wchr)
 {
-    int lchr, lmtp;
+    convchar_t lchr;
+    int lmtp;

    if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
 	/*
 	 * No equivalent.  No possible match; give up.
 	 */
-	return -1;
+	return CHR_INVALID;
    }
    /*
     * If we matched an exact character rather than a range
     * type, return it.
     */
-    if (lchr != -1)
+    if (lchr != CHR_INVALID)
 	return lchr;

    /*
@ -1223,9 +1224,9 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
     * version of the word character.
     */
    if (wmtp == PP_UPPER && lmtp == PP_LOWER)
-	return tulower(wchr);
+	return ZC_tolower(wchr);
    else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
-	return tuupper(wchr);
+	return ZC_toupper(wchr);
    else if (wmtp == lmtp) {
 	/*
 	 * Be lenient and allow identical replacements
@ -1238,25 +1239,21 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
 	/*
 	 * Non-matching generic types; this can't work.
 	 */
-	return -1;
+	return CHR_INVALID;
    }
 }

 /*
 * Check if the given pattern matches the given string.
- * p and  s are either anchor or line pattern and string;
- * wp and ws are word (candidate) pattern and string
+ * p is either an anchor or line pattern and string;
+ * wp and wsc are word (candidate) pattern and string
 *
- * If only one pattern is given, we just check if characters match.
- * If both line and word are given, we check that characters match
- * for {...} classes by comparing positions in the strings.
+ * Check that characters match for {...} classes by comparing positions in the
+ * strings.
 *
- * Patterns and strings are always passed in pairs, so it is enough
- * to check for non-NULL wp. p should always be present.
- *
- * If prestrict is not NULL, it is a chain of patterns at least as long
+ * prestrict is a chain of patterns at least as long
 * as the line string.  In this case we are still assembling the line at
- * s (which has been allocated but doesn't yet contain anything useful)
+ * newline (which has been allocated but doesn't yet contain anything useful)
 * and must continue to do so as we go along; prestrict gives
 * restrictions on the line character to be applied along side the other
 * patterns.  In the simple case a restriction is a character to be put
@ -1264,27 +1261,22 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
 * deduce an actual matching character.  Note prestrict is never an
 * equivalence class.  In extreme cases we can't deduce a unique
 * character; then the match fails.
+ *
+ * If prestrict is not NULL, s will be NULL.
 */

 /**/
-mod_export int
-pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
-		       Cpattern prestrict)
+static int
+pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen,  
+		       Cpattern prestrict, ZLE_STRING_T newline)
 {
-    int c, ind;
-    int wc, wind;
-    int len = 0, wlen, mt, wmt;
+    convchar_t c;
+    convchar_t ind, wind;
+    int mt, wmt;

-    while (p && wp && (prestrict || *s) && *ws) {
+    while (p && wp && wsclen && prestrict) {
 	/* First test the word character */
-	if (*ws == Meta) {
-	    wc = STOUC(ws[1]) ^ 32;
-	    wlen = 2;
-	} else {
-	    wc = STOUC(*ws);
-	    wlen = 1;
-	}
-	wind = pattern_match1(wp, wc, &wmt);
+	wind = pattern_match1(wp, *wsc, &wmt);
 	if (!wind)
 	    return 0;

@ -1292,55 +1284,45 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
 	 * Now the line character; deal with the case where
 	 * we don't yet have it, only a restriction on it.
 	 */
-	if (prestrict) {
-	    if (prestrict->tp == CPAT_CHAR) {
-		/*
-		 * Easy case: restricted to an exact character on
-		 * the line.  Procede as normal.
-		 */
-		c = prestrict->u.chr;
-	    } else {
-		if (p->tp == CPAT_CHAR) {
-		    /*
-		     * Normal line pattern is an exact character:  as
-		     * long as this matches prestrict, we can proceed
-		     * as usual.
-		     */
-		    c = p->u.chr;
-		} else if (p->tp == CPAT_EQUIV) {
-		    /*
-		     * An equivalence, so we can deduce the character
-		     * backwards from the word pattern and see if it
-		     * matches prestrict.
-		     */
-		    if ((c = pattern_match_equivalence(p, wind, wmt, wc)) == -1)
-			return 0;
-		} else {
-		    /*
-		     * Not an equivalence, so that means we must match
-		     * the word (not just the word pattern), so grab it
-		     * and make sure it fulfills our needs.  I think.
-		     * Not 100% sure about that, but what else can
-		     * we do?  We haven't actually been passed a string
-		     * from the command line.
-		     */
-		    c = wc;
-		}
-		/* Character so deduced must match the restriction. */
-		if (!pattern_match1(prestrict, c, &mt))
-		    return 0;
-	    }
-	    len = imeta(c) ? 2 : 1;
+	if (prestrict->tp == CPAT_CHAR) {
+	    /*
+	     * Easy case: restricted to an exact character on
+	     * the line.  Procede as normal.
+	     */
+	    c = prestrict->u.chr;
 	} else {
-	    /* We have the character itself. */
-	    if (*s == Meta) {
-		c = STOUC(s[1]) ^ 32;
-		len = 2;
+	    if (p->tp == CPAT_CHAR) {
+		/*
+		 * Normal line pattern is an exact character:  as
+		 * long as this matches prestrict, we can proceed
+		 * as usual.
+		 */
+		c = p->u.chr;
+	    } else if (p->tp == CPAT_EQUIV) {
+		/*
+		 * An equivalence, so we can deduce the character
+		 * backwards from the word pattern and see if it
+		 * matches prestrict.
+		 */
+		if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) ==
+		    CHR_INVALID)
+		    return 0;
 	    } else {
-		c = STOUC(*s);
-		len = 1;
+		/*
+		 * Not an equivalence, so that means we must match
+		 * the word (not just the word pattern), so grab it
+		 * and make sure it fulfills our needs.  I think.
+		 * Not 100% sure about that, but what else can
+		 * we do?  We haven't actually been passed a string
+		 * from the command line.
+		 */
+		c = *wsc;
 	    }
+	    /* Character so deduced must match the restriction. */
+	    if (!pattern_match1(prestrict, c, &mt))
+		return 0;
 	}
+
 	/*
 	 * If either is "?", they match each other; no further tests.
 	 * Apply this even if the character wasn't convertable;
@ -1364,7 +1346,7 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
 		 */
 		if ((mt == PP_LOWER || mt == PP_UPPER) &&
 		    (wmt == PP_LOWER || wmt == PP_UPPER)) {
-		    if (tulower(c) != tulower(wc))
+		    if (ZC_tolower(c) != ZC_tolower(*wsc))
 			return 0;
 		} else {
 		    /* Other different classes can't match. */
@ -1373,71 +1355,46 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
 	    }
 	}

-	if (prestrict) {
-	    /* We need to assemble the line */
-	    if (imeta(c)) {
-		*s++ = Meta;
-		*s++ = c ^ 32;
-	    } else {
-		*s++ = c;
-	    }
-	    prestrict = prestrict->next;
-	} else
-	    s += len;
-	ws += wlen;
+	/* We need to assemble the line */
+	*newline++ = (ZLE_CHAR_T)c;
+	prestrict = prestrict->next;
+	wsc++;
+	wsclen--;
 	p = p->next;
 	wp = wp->next;
    }

-    while (p && (prestrict || *s)) {
-	if (prestrict) {
-	    /*
-	     * As above, but with even less info to go on.
-	     * (Can this happen?)  At least handle the cases where
-	     * one of our patterns has given us a specific character.
-	     */
-	    if (prestrict->tp == CPAT_CHAR) {
-		c = prestrict->u.chr;
-	    } else {
-		if (p->tp == CPAT_CHAR) {
-		    c = p->u.chr;
-		} else {
-		    /*
-		     * OK.  Here we are in a function with just a line
-		     * pattern and another pattern to restrict the
-		     * characters that can go on the line, and no actual
-		     * characters.  We're matching two patterns against
-		     * one another to generate a character to insert.
-		     * This is a bit too psychedelic, so I'm going to
-		     * bale out now.  See you on the ground.
-		     */
-		    return 0;
-		}
-		if (!pattern_match1(prestrict, c, &mt))
-		    return 0;
-	    }
+    while (p && prestrict) {
+	/*
+	 * As above, but with even less info to go on.
+	 * (Can this happen?)  At least handle the cases where
+	 * one of our patterns has given us a specific character.
+	 */
+	if (prestrict->tp == CPAT_CHAR) {
+	    c = prestrict->u.chr;
 	} else {
-	    if (*s == Meta) {
-		c = STOUC(s[1]) ^ 32;
-		len = 2;
+	    if (p->tp == CPAT_CHAR) {
+		c = p->u.chr;
 	    } else {
-		c = STOUC(*s);
-		len = 1;
+		/*
+		 * OK.  Here we are in a function with just a line
+		 * pattern and another pattern to restrict the
+		 * characters that can go on the line, and no actual
+		 * characters.  We're matching two patterns against
+		 * one another to generate a character to insert.
+		 * This is a bit too psychedelic, so I'm going to
+		 * bale out now.  See you on the ground.
+		 */
+		return 0;
 	    }
+	    if (!pattern_match1(prestrict, c, &mt))
+		return 0;
 	}
 	if (!pattern_match1(p, c, &mt))
 	    return 0;
 	p = p->next;
-	if (prestrict) {
-	    if (imeta(c)) {
-		*s++ = Meta;
-		*s++ = c ^ 32;
-	    } else {
-		*s++ = c;
-	    }
-	    prestrict = prestrict->next;
-	} else
-	    s += len;
+	*newline++ = (ZLE_CHAR_T)c;
+	prestrict = prestrict->next;
    }

    if (prestrict) {
@ -1445,8 +1402,53 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
 	return 0;
    }

-    while (wp && *ws) {
+    while (wp && wsclen) {
 	/* No funny business when we only have the word pattern. */
+	if (!pattern_match1(wp, *wsc, &wmt))
+	    return 0;
+	wp = wp->next;
+	wsc++;
+	wsclen--;
+    }
+
+    return 1;
+}
+
+
+/*
+ * The usual version of pattern matching, without the line string
+ * being handled by restriction.
+ *
+ * Check if the given pattern matches the given string.
+ * p and  s are either anchor or line pattern and string;
+ * wp and ws are word (candidate) pattern and string
+ *
+ * If only one pattern is given, we just check if characters match.
+ * If both line and word are given, we check that characters match
+ * for {...} classes by comparing positions in the strings.
+ *
+ * Patterns and strings are always passed in pairs, so it is enough
+ * to check for non-NULL wp. p should always be present.
+ */
+/**/
+mod_export int
+pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
+{
+    convchar_t c, wc;
+    convchar_t ind, wind;
+    int len = 0, wlen, mt, wmt;
+#ifdef MULTIBYTE_SUPPORT
+    mbstate_t lstate, wstate;
+
+    memset(&lstate, 0, sizeof(lstate));
+    memset(&wstate, 0, sizeof(wstate));
+#endif
+
+    while (p && wp && *s && *ws) {
+	/* First test the word character */
+#ifdef MULTIBYTE_SUPPORT
+	wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
+#else
 	if (*ws == Meta) {
 	    wc = STOUC(ws[1]) ^ 32;
 	    wlen = 2;
@ -1454,6 +1456,94 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
 	    wc = STOUC(*ws);
 	    wlen = 1;
 	}
+#endif
+	wind = pattern_match1(wp, wc, &wmt);
+	if (!wind)
+	    return 0;
+
+	/*
+	 * Now the line character.
+	 */
+#ifdef MULTIBYTE_SUPPORT
+	len = mb_metacharlenconv_r(s, &c, &lstate);
+#else
+	/* We have the character itself. */
+	if (*s == Meta) {
+	    c = STOUC(s[1]) ^ 32;
+	    len = 2;
+	} else {
+	    c = STOUC(*s);
+	    len = 1;
+	}
+#endif
+	/*
+	 * If either is "?", they match each other; no further tests.
+	 * Apply this even if the character wasn't convertable;
+	 * there's no point trying to be clever in that case.
+	 */
+	if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
+	{
+	    ind = pattern_match1(p, c, &mt);
+	    if (!ind)
+		return 0;
+	    if (ind != wind)
+		return 0;
+	    if (mt != wmt) {
+		/*
+		 * Special case if matching lower vs. upper or
+		 * vice versa.  The transformed characters must match.
+		 * We don't need to check the transformation is
+		 * the appropriate one for each character separately,
+		 * since that was done in pattern_match1(), so just
+		 * compare lower-cased versions of both.
+		 */
+		if ((mt == PP_LOWER || mt == PP_UPPER) &&
+		    (wmt == PP_LOWER || wmt == PP_UPPER)) {
+		    if (ZC_tolower(c) != ZC_tolower(wc))
+			return 0;
+		} else {
+		    /* Other different classes can't match. */
+		    return 0;
+		}
+	    }
+	}
+
+	s += len;
+	ws += wlen;
+	p = p->next;
+	wp = wp->next;
+    }
+
+    while (p && *s) {
+#ifdef MULTIBYTE_SUPPORT
+	len = mb_metacharlenconv_r(s, &c, &lstate);
+#else
+	if (*s == Meta) {
+	    c = STOUC(s[1]) ^ 32;
+	    len = 2;
+	} else {
+	    c = STOUC(*s);
+	    len = 1;
+	}
+#endif
+	if (!pattern_match1(p, c, &mt))
+	    return 0;
+	p = p->next;
+	s += len;
+    }
+
+    while (wp && *ws) {
+#ifdef MULTIBYTE_SUPPORT
+	wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
+#else
+	if (*ws == Meta) {
+	    wc = STOUC(ws[1]) ^ 32;
+	    wlen = 2;
+	} else {
+	    wc = STOUC(*ws);
+	    wlen = 1;
+	}
+#endif
 	if (!pattern_match1(wp, wc, &wmt))
 	    return 0;
 	wp = wp->next;
@ -1463,16 +1553,6 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
    return 1;
 }

-/*
- * The usual version of pattern matching, without the line string
- * being handled by restriction.
- */
-/**/
-mod_export int
-pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
-{
-    return pattern_match_restrict(p, s, wp, ws, NULL);
-}

 /* This splits the given string into a list of cline structs, separated
 * at those places where one of the anchors of an `*' pattern was found.
@ -1575,30 +1655,45 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
 * buffer line.  Then we test if this line matches the string given by
 * wlen and word.
 *
- * wpat contains pattern that matched previously
- * lpat contains the pattern for line we build
+ * The matcher  ) wpat, containing pattern that matched previously
+ *   mp gives   ) lpat, containing the pattern for line we build
+ * line is the line we are assembling; it is initially empty
 * mword is a string that matched wpat before
 * word is string that we try to match now
 *
 * The return value is the length of the string matched in the word, it
 * is zero if we couldn't build a line that matches the word.
- *
- * TODO: a lot of the nastiness associated with variable string
- * lengths can go when we switch to wide characters.  (Why didn't
- * I just keep line unmetafied and metafy into place at the end?  Er...)
 */

 /**/
 static int
-bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
+bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word,
+	 int wlen, int sfx)
 {
    Cpattern lpat = mp->line;
    Cpattern wpat = mp->word;
    Cpattern curgenpat;
-    VARARR(struct cpattern, genpatarr, mp->llen);
    Cmlist ms;
-    int llen, rl;
-    char *oword = word, *line = *linep;
+    int llen, rl, l;
+    convchar_t convchr, *wordcp;
+    VARARR(convchar_t, wordchars, wlen);
+    VARARR(struct cpattern, genpatarr, mp->llen);
+
+    /*
+     * We may need to start the "word" array from the end.  This
+     * is much easier if we convert it to an array of (possibly wide)
+     * characters.
+     */
+    MB_METACHARINIT();
+    for (l = wlen, wordcp = wordchars; l; l--) {
+	int charlen = MB_METACHARLENCONV(word, &convchr);
+#ifdef MULTIBYTE_SUPPORT
+	if (convchr == WEOF)
+	    convchr = (*word == Meta) ? word[1] ^ 32 : *word;
+#endif
+	*wordcp++ = convchr;
+	word += charlen;
+    }

    /*
     * Loop over all characters.  At this stage, line is an empty
@ -1616,9 +1711,10 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
     * when we finally match the line against the set of matchers.
     */
    curgenpat = genpatarr;
+    MB_METACHARINIT();
    while (lpat) {
-	int wchr = (*mword == Meta) ? STOUC(mword[1]) ^ 32 : STOUC(*mword);
-	int wmtp, wind;
+	convchar_t wchr, wind;
+	int wmtp, mwordlen;
 	/*
 	 * If the line pattern is an equivalence, query wpat to find the
 	 * word part of the equivalence.  If we don't find one we don't try
@ -1628,9 +1724,10 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
 	 * the behaviour of the old logic that this replaces.)
 	 */
 	if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
+	    mwordlen = MB_METACHARLENCONV(mword, &wchr);
 	    wind = pattern_match1(wpat, wchr, &wmtp);
 	    wpat = wpat->next;
-	    mword += (*mword == Meta) ? 2 : 1;
+	    mword += mwordlen;
 	} else
 	    wind = 0;
 	if (wind) {
@ -1638,9 +1735,9 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
 	     * Successful match for word side of equivalence.
 	     * Find the line equivalent.
 	     */
-	    int lchr;
+	    convchar_t lchr;
 	    if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
-		== -1) {
+		== CHR_INVALID) {
 		/*
 		 * No equivalent.  No possible match; give up.
 		 */
@ -1694,50 +1791,40 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
    llen = mp->llen;
    rl = 0;

-    *line = '\0';
    if (sfx)
    {
 	/*
 	 * We need to work backwards from the end of both the
 	 * word and the line strings.
-	 *
-	 * Position at the end of the word by counting characters.
 	 */
-	int l = wlen;
-	while (l--)
-	    word += (*word == Meta) ? 2 : 1;
+	wordcp = wordchars + wlen;

 	/*
-	 * We construct the line from the end.  We've left
-	 * enough space for possible Meta's.
+	 * We construct the line from the end.
 	 */
-	line += 2 * llen;
-	*line = '\0';
+	line += llen;
 	curgenpat = genpatarr + llen;
-    } else
+    } else {
+	wordcp = wordchars;
 	curgenpat = genpatarr;
+    }

    /* we now reuse mp, lpat, wpat for the global matchers */
+    MB_METACHARINIT();
    while (llen && wlen) {
-	int wchr, wmtp;
-	char *wp;
+	convchar_t wchr;
+	int wmtp;
+	convchar_t *wp;
 	Cpattern tmpgenpat;

 	if (sfx) {
-	    if (word > oword + 1 && word[-2] == Meta)
-		wp = word - 2;
-	    else
-		wp = word - 1;
+	    wp = wordcp - 1;
 	    curgenpat--;
 	} else
-	    wp = word;
-	if (*wp == Meta)
-	    wchr = STOUC(wp[1]) ^ 32;
-	else
-	    wchr = STOUC(*wp);
-	if (pattern_match1(curgenpat, wchr, &wmtp))
+	    wp = wordcp;
+	if (pattern_match1(curgenpat, *wp, &wmtp))
 	{
-	    int lchr;
+	    convchar_t lchr;
 	    /*
 	     * We can match the line character directly with the word
 	     * character.  If the line character is a fixed one,
@ -1749,36 +1836,27 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
 		lchr = curgenpat->u.chr;
 	    else
 		lchr = wchr;
-	    if (imeta(lchr)) {
-		if (sfx)
-		    line -= 2;
-		line[0] = Meta;
-		line[1] = lchr ^ 32;
-		if (!sfx)
-		    line += 2;
-	    } else {
-		if (sfx)
-		    line--;
-		line[0] = lchr;
-		if (!sfx)
-		    line++;
-	    }
+
+	    if (sfx)
+		*--line = lchr;
+	    else
+		*line++ = lchr;

 	    llen--;
 	    wlen--;
 	    rl++;

 	    if (sfx)
-		word = wp;
+		wordcp = wp;
 	    else {
 		if (llen)
 		    curgenpat++;
-		word += (*word == Meta) ? 2 : 1;
+		wordcp++;
 	    }
 	}
 	else
 	{
-	    char *lp;
+	    ZLE_CHAR_T *lp;
 	    /*
 	     * Need to loop over pattern matchers.
 	     */
@ -1794,66 +1872,31 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
 		if (mp && !mp->flags && mp->wlen <= wlen &&
 		    mp->llen <= llen)
 		{
-		    if (sfx) {
-			/*
-			 * We haven't assembled the line yet, and with
-			 * Meta characters we don't yet know the length.
-			 * We'll fix this up later.
-			 */
-			lp = line - 2 * mp->llen;
-		    } else
-			lp = line;
-		    wp = word;
-		    if (sfx) {
-			int l = mp->wlen;
-			while (l--) {
-			    if (wp > oword + 1 && wp[-2] == Meta)
-				wp -= 2;
-			    else
-				wp--;
-			}
+		    lp = line;
+		    wp = wordcp;
+		    tmpgenpat = curgenpat;

-			tmpgenpat = curgenpat - mp->llen;
-		    } else
-			tmpgenpat = curgenpat;
-		    if (pattern_match_restrict(mp->line, lp,
-					       mp->word, wp, tmpgenpat)) {
+		    if (sfx) {
+			lp -= mp->llen;
+			wp -= mp->wlen;
+			tmpgenpat -= mp->llen;
+		    }
+
+		    if (pattern_match_restrict(mp->line, mp->word, wp,
+					       wlen - (wp - wordchars),
+					       tmpgenpat, lp)) {
 			/*
 			 * Matched: advance over as many characters
 			 * of the patterns and strings as
 			 * we've done matches.
 			 */
 			if (sfx) {
-			    int imove = mp->llen, nchar;
-			    char *pmove = lp;
-			    word = wp;
-			    
-			    /* Close the gap we left in the line string */
-			    while (imove--)
-				pmove += (*pmove == Meta) ? 2 : 1;
-			    /* Number of bytes to move */
-			    nchar = (int)(pmove - lp);
-			    /* The size of the gap */
-			    imove = 2 * mp->llen - nchar;
-			    if (imove) {
-				lp = line - imove;
-				/* Moving up, so start at the top */
-				while (nchar--)
-				    *--line = *--lp;
-				/* line is at the start of the moved text */
-			    }
-
+			    line = lp;
+			    wordcp = wp;
 			    curgenpat = tmpgenpat;
 			} else {
-			    int cnt = mp->llen;
-			    while (cnt--) {
-				line += (*line == Meta) ? 2 : 1;
-			    }
-
-			    cnt = mp->wlen;
-			    while (cnt--)
-				word += (*word == Meta) ? 2 : 1;
-
+			    line += mp->llen;
+			    wordcp += mp->wlen;
 			    curgenpat += mp->llen;
 			}
 			llen -= mp->llen;
@ -1869,10 +1912,6 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
    }
    if (!llen) {
 	/* Unmatched portion in the line built, return matched length. */
-	if (sfx)
-	    *linep = line;
-	else
-	    *line = '\0';
 	return rl;
    }
    return 0;
@ -1891,7 +1930,14 @@ join_strs(int la, char *sa, int lb, char *sb)

    Cmlist ms;
    Cmatcher mp;
-    int t, bl, rr = rl;
+    int t, bl;
+    /** rr is the remaining length already allocated in rs */
+    int rr = rl;
+    /*
+     * convlen is the length we need for the string converted to
+     * char * (possibly multibyte).
+     */
+    int convlen;
    char *rp = rs;

    while (la && lb) {
@ -1906,35 +1952,49 @@ join_strs(int la, char *sa, int lb, char *sb)
 		    if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
 			pattern_match(mp->word, sb, NULL, NULL)) {
 			/* It matched one of the strings, t says which one. */
-			/* TODO: double to allow Meta, not necessary
-			   when properly unmetafied */
-			VARARR(char, linearr, 2*mp->llen + 1);
-			char **ap, **bp, *line = linearr;
+			VARARR(ZLE_CHAR_T, line, mp->llen);
+			char **ap, **bp;
 			int *alp, *blp;

 			if (t) {
-			    ap = &sa; alp = &la;
-			    bp = &sb; blp = &lb;
+			    ap = &sa;
+			    alp = &la;
+
+			    bp = &sb;
+			    blp = &lb;
 			} else {
-			    ap = &sb; alp = &lb;
-			    bp = &sa; blp = &la;
+			    ap = &sb;
+			    alp = &lb;
+
+			    bp = &sa;
+			    blp = &la;
 			}
 			/* Now try to build a string that matches the other
 			 * string. */
-			if ((bl = bld_line(mp, &line, *ap, *bp, *blp, 0))) {
+			if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) {
 			    /* Found one, put it into the return string. */
-			    if (rr <= mp->llen) {
+			    char *convstr =
+				zlelineasstring(line, mp->llen, 0, &convlen,
+						NULL, 0);
+			    if (rr <= convlen) {
 				char *or = rs;
+				int alloclen = (convlen > 20) ? convlen : 20;

-				rs = realloc(rs, (rl += 20));
-				rr += 20;
+				rs = realloc(rs, (rl += alloclen));
+				rr += alloclen;
 				rp += rs - or;
 			    }
-			    memcpy(rp, line, mp->llen);
-			    rp += mp->llen; rr -= mp->llen;
-			    *ap += mp->wlen; *alp -= mp->wlen;
-			    *bp += bl; *blp -= bl;
+			    memcpy(rp, convstr, convlen);
+			    rp += convlen;
+			    rr -= convlen;
+			    /* HERE: multibyte chars */
+			    *ap += mp->wlen;
+			    *alp -= mp->wlen;
+
+			    *bp += bl;
+			    *blp -= bl;
 			    t = 1;
+			    free(convstr);
 			} else
 			    t = 0;
 		    }
@ -1944,16 +2004,20 @@ join_strs(int la, char *sa, int lb, char *sb)
 		break;
 	} else {
 	    /* Same character, just take it. */
-	    if (rr <= 1) {
+	    if (rr <= 1 /* HERE charlen */) {
 		char *or = rs;

 		rs = realloc(rs, (rl += 20));
 		rr += 20;
 		rp += rs - or;
 	    }
-	    *rp++ = *sa; rr--;
-	    sa++; sb++;
-	    la--; lb--;
+	    /* HERE: multibyte char */
+	    *rp++ = *sa;
+	    rr--;
+	    sa++;
+	    sb++;
+	    la--;
+	    lb--;
 	}
    }
    if (la || lb)
@ -2035,9 +2099,11 @@ check_cmdata(Cmdata md, int sfx)
 	} else {
 	    md->line = 0;
 	    md->len = md->olen = md->cl->wlen;
+	    /* HERE: multibyte */
 	    if ((md->str = md->cl->word) && sfx)
 		md->str += md->len;
 	    md->alen = md->cl->llen;
+	    /* HERE: multibyte */
 	    if ((md->astr = md->cl->line) && sfx)
 		md->astr += md->alen;
 	}
@ -2060,9 +2126,11 @@ undo_cmdata(Cmdata md, int sfx)
 	r->wlen = 0;
 	r->flags |= CLF_LINE;
 	r->llen = md->len;
+	/* HERE: multibyte */
 	r->line = md->str - (sfx ? md->len : 0);
    } else if (md->len != md->olen) {
 	r->wlen = md->len;
+	/* HERE: multibyte */
 	r->word = md->str - (sfx ? md->len : 0);
 	DPUTS(r->wlen > 0 && !*r->word, "Bad word");
    }
@ -2116,24 +2184,24 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
 				       NULL, NULL)) ||
 		     pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
 				   NULL, NULL))) {
-		    /* TODO: doubled to allow Meta, not necessary
-		     * when properly unmetafied */
-		    VARARR(char, linearr, 2*mp->llen + 1);
+		    VARARR(ZLE_CHAR_T, line, mp->llen);
 		    int bl;
-		    char *mw, *line = linearr;
+		    char *mw;

 		    /* Then build all the possible lines and see
 		     * if one of them matches the other string. */
+		    /* HERE: they're multibyte */
 		    if (t)
 			mw = ow - (sfx ? mp->wlen : 0);
 		    else
 			mw = nw - (sfx ? mp->wlen : 0);

-		    if ((bl = bld_line(mp, &line, mw, (t ? nw : ow),
+		    if ((bl = bld_line(mp, line, mw, (t ? nw : ow),
 				       (t ? nl : ol), sfx)))  {
 			/* Yep, one of the lines matched the other
 			 * string. */

+			/* HERE: multibyte characters */
 			if (t) {
 			    ol = mp->wlen; nl = bl;
 			} else {
@ -2146,8 +2214,10 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
 			md->len -= nl;
 			*mlen = ol;

-			return get_cline(NULL, 0, dupstring(line), mp->llen,
-					 NULL, 0, CLF_JOIN);
+			return get_cline(NULL, 0,
+					 zlelineasstring(line, mp->llen,
+							 0, NULL, NULL, 1),
+					 mp->llen, NULL, 0, CLF_JOIN);
 		    }
 		}
 	    }
--- a/Src/Zle/computil.c
+++ b/Src/Zle/computil.c
@ -4062,7 +4062,7 @@ cfp_matcher_range(Cmatcher *ms, char *add)
 		    len += addlen + 1;
 	    } else {
 		/* The usual set of matcher possibilities. */
-		int ind;
+		convchar_t ind;
 		if (m->line->tp == CPAT_EQUIV &&
 		    m->word->tp == CPAT_EQUIV) {
 		    /*
@ -4086,7 +4086,7 @@ cfp_matcher_range(Cmatcher *ms, char *add)
 			 * word pattern.
 			 */
 			if ((ind = pattern_match_equivalence
-			     (m->word, ind, mt, addc)) != -1) {
+			     (m->word, ind, mt, addc)) != CHR_INVALID) {
 			    if (ret) {
 				if (imeta(ind)) {
 				    *p++ = Meta;
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@ -167,9 +167,10 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf)
 * instead of wide characters where appropriate and with the contents
 * metafied.
 *
- * If outll is non-NULL, assign the new length.  If outcs is non-NULL,
- * assign the new character position.  This is the conventional string
- * length, without the NULL byte.
+ * If outllp is non-NULL, assign the new length.  This is the conventional
+ * string length, without the NULL byte.
+ *
+ * If outcsp is non-NULL, assign the new character position.
 *
 * If useheap is 1, memory is returned from the heap, else is allocated
 * for later freeing.
--- a/Src/pattern.c
+++ b/Src/pattern.c
@ -3344,7 +3344,6 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
 }


-#if 0
 /*
 * This is effectively the reverse of mb_patmatchrange().
 * Given a range descriptor of the same form, and an index into it,
@ -3353,11 +3352,6 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
 * return the type in mtp instead.  Return 1 if successful, 0 if
 * there was no corresponding index.  Note all pointer arguments
 * must be non-null.
- *
- * TODO: for now the completion matching code does not handle
- * multibyte.  When it does, we will need either this, or
- * patmatchindex(), but not both---unlike user-initiated pattern
- * matching, multibyte mode in the line editor is always on when available.
 */

 /**/
@ -3438,10 +3432,9 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
    /* No corresponding index. */
    return 0;
 }
-#endif

 /**/
-#endif
+#endif /* MULTIBYTE_SUPPORT */

 /*
 * Identical function to mb_patmatchrange() above for single-byte
@ -3572,9 +3565,17 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
    return 0;
 }

+
+/**/
+#ifndef MULTIBYTE_SUPPORT
+
 /*
 * Identical function to mb_patmatchindex() above for single-byte
 * characters.  Here -1 represents a character that needs a special type.
+ *
+ * Unlike patmatchrange, we only need this in ZLE, which always
+ * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
+ * this function in that case.
 */

 /**/
@ -3658,6 +3659,9 @@ patmatchindex(char *range, int ind, int *chr, int *mtp)
    return 0;
 }

+/**/
+#endif /* MULTIBYTE_SUPPORT */
+
 /*
 * Repeatedly match something simple and say how many times.
 * charstart is an array parallel to that starting at patinput