users/12987: add subword capability to word-style

2025-01-21 00:01:26 +01:00 · 2008-06-24 11:18:39 +00:00 · 2008-06-24 11:18:39 +00:00 · 86b8b5eaa3
commit 86b8b5eaa3
parent a440669201
4 changed files with 84 additions and 14 deletions
--- a/5
+++ b/5
@ -1,5 +1,10 @@
 2008-06-24  Peter Stephenson  <pws@csr.com>

+	* users/12987: Doc/Zsh/contrib.yo,
+	Functions/Zle/match-words-by-style,
+	Functions/Zle/select-word-style: add subword matching to
+	word-style capabilities.
+
 	* 25242: Mikael: Doc/Zsh/prompt.yo, Src/prompt.c: add V prompt
 	test for contents of psvar.

--- a/Doc/Zsh/contrib.yo
+++ b/Doc/Zsh/contrib.yo
@ -459,6 +459,12 @@ Restore the default settings; this is usually the same as `tt(normal)'.
 )
 enditem()

+All but `tt(default)' can be input as an upper case character, which was
+the same effect but with subword matching turned on.  In this case, words
+with upper case characters are treated specially: each separate run of
+upper case characters, or an upper case character followed by any number of
+other characters, is considered a word.
+
 More control can be obtained using the tt(zstyle) command, as described in
 ifzman(zmanref(zshmodules))\
 ifnzman(noderef(The zsh/zutil Module)).  Each style is looked up in the
@ -507,6 +513,9 @@ alphanumerics plus the characters `tt(-)' and `tt(:)'.  Be careful
 including `tt(])', `tt(^)' and `tt(-)' as these are special inside
 character classes.

+tt(word-style) may also have `tt(-subword)' appended to its value to
+turn on subword matching, as described above.
+
 The style tt(skip-chars) is mostly useful for
 tt(transpose-words) and similar functions.  If set, it gives a count of
 characters starting at the cursor position which will not be considered
--- a/Functions/Zle/match-words-by-style
+++ b/Functions/Zle/match-words-by-style
@ -105,7 +105,7 @@ done
 [[ -z $skip ]] && skip=0

 case $wordstyle in
-  (shell) local bufwords
+  (*shell*) local bufwords
 	  # This splits the line into words as the shell understands them.
 	  bufwords=(${(z)LBUFFER})
 	  nwords=${#bufwords}
@ -133,7 +133,7 @@ case $wordstyle in
 	    wordpat2=${(q)wordpat2}
 	  fi
 	  ;;
-  (*space) spacepat='[[:space:]]#'
+  (*space*) spacepat='[[:space:]]#'
           wordpat1='[^[:space:]]##'
 	   wordpat2=$wordpat1
 	   ;;
@ -160,8 +160,8 @@ case $wordstyle in
      fi
      # Quote $wc where necessary, because we don't want those
      # characters to be considered as pattern characters later on.
-      if [[ $wordstyle = *specified ]]; then
-        if [[ $wordstyle != un* ]]; then
+      if [[ $wordstyle = *specified* ]]; then
+        if [[ $wordstyle != *unspecified* ]]; then
 	  # The given set of characters are the word characters, nothing else
 	  wordpat1="[${wc}]##"
 	  # anything else is a space.
@ -189,6 +189,28 @@ eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
 word1=$match[1]
 ws1=$match[2]

+if [[ $wordstyle = *subword* ]]; then
+  # The rule here is that a word boundary may be an upper case letter
+  # followed by a lower case letter, or an upper case letter at
+  # the start of a group of upper case letters.  To make
+  # it easier to be consistent, we just use anything that
+  # isn't an upper case characer instead of a lower case
+  # character.
+  # Here the initial "*" will match greedily, so we get the
+  # last such match, as we want.
+  integer epos
+  if [[ $word1 = (#b)(*)([[:upper:]][^[:upper:]]*) ]]; then
+    (( epos = ${#match[1]} ))
+  fi
+  if [[ $word1 = (#b)(*[^[:upper:]])([[:upper:]]*) ]]; then
+    (( ${#match[1]} > epos ))  &&  (( epos = ${#match[1]} ))
+  fi
+  if (( epos > 0 )); then
+    pat1+=$word1[1,epos]
+    word1=$word1[epos+1,-1]
+  fi
+fi
+
 match=()
 charskip=
 repeat $skip charskip+=\?
@ -200,4 +222,30 @@ ws2=$match[1]
 word2=$match[2]
 ws3=$match[3]

+if [[ $wordstyle = *subword* ]]; then
+  # Do we have a group of upper case characters at the start
+  # of word2 (that don't form the entire word)?
+  # Again, rely on greedy matching of first pattern.
+  if [[ $word2 = (#b)([[:upper:]][[:upper:]]##)(*) && -n $match[2] ]]; then
+    # Yes, so the last one is new word boundary.
+    (( epos = ${#match[1]} - 1 ))
+    # Otherwise, do we have upper followed by non-upper not
+    # at the start?  Ignore the initial character, we already
+    # know it's a word boundary so it can be an upper case character
+    # if it wants.
+  elif [[ $word2 = (#b)(?[^[:upper:]]##)[[:upper:]]* ]]; then
+    (( epos = ${#match[1]} ))
+  else
+    (( epos = 0 ))
+  fi
+  if (( epos )); then
+    # Careful: if we matched a subword there's no whitespace immediately
+    # after the matched word, so ws3 should be empty and any existing
+    # value tacked onto pat2.
+    pat2="${word2[epos+1,-1]}$ws3$pat2"
+    ws3=
+    word2=$word2[1,epos]
+  fi
+fi
+
 matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")
--- a/Functions/Zle/select-word-style
+++ b/Functions/Zle/select-word-style
@ -10,7 +10,7 @@ word_functions=(backward-kill-word backward-word

 [[ -z $1 ]] && autoload read-from-minibuffer

-local REPLY detail f
+local REPLY detail f wordstyle

 if ! zle -l $word_functions[1]; then
    for f in $word_functions; do
@ -25,6 +25,7 @@ while true; do
    if [[ -n $WIDGET && -z $1 ]]; then
 	read-from-minibuffer -k1 "Word styles (hit return for more detail):
 (b)ash (n)ormal (s)hell (w)hitespace (d)efault (q)uit
+(B), (N), (S), (W) as above with subword matching
 ${detail}? " || return 1
    else
 	REPLY=$1
@ -33,31 +34,31 @@ ${detail}? " || return 1
    detail=

    case $REPLY in
-	(b*)
+	([bB]*)
 	# bash style
-	zstyle ':zle:*' word-style standard
+	wordstyle=standard
 	zstyle ':zle:*' word-chars ''
 	;;

-	(n*)
+	([nN]*)
 	# normal zsh style
-	zstyle ':zle:*' word-style standard
+	wordstyle=standard
 	zstyle ':zle:*' word-chars "$WORDCHARS"
 	;;

-	(s*)
+	([sS]*)
 	# shell command arguments or special tokens
-	zstyle ':zle:*' word-style shell
+	wordstyle=shell
 	;;

-	(w*)
+	([wW]*)
 	# whitespace-delimited
-	zstyle ':zle:*' word-style space
+	wordstyle=space
 	;;

 	(d*)
 	# default: could also return widgets to builtins here
-	zstyle -d ':zle:*' word-style
+	wordstyle=
 	zstyle -d ':zle:*' word-chars
 	;;

@ -84,5 +85,12 @@ $detail" >&2
 	continue
 	;;
    esac
+
+    if [[ -n $wordstyle ]]; then
+      if [[ $REPLY = [[:upper:]]* ]]; then
+	wordstyle+=-subword
+      fi
+      zstyle ':zle:*' word-style $wordstyle
+    fi
    return
 done