Merge of 22606: word-context style for word matching.

2025-10-27 16:50:58 +01:00 · 2007-10-29 03:06:09 +00:00 · 2007-10-29 03:06:09 +00:00 · 2e89ebbdc7
commit 2e89ebbdc7
parent 6c39ff7b81
3 changed files with 284 additions and 3 deletions
--- a/Doc/Zsh/contrib.yo
+++ b/Doc/Zsh/contrib.yo
@ -383,12 +383,13 @@ tindex(capitalize-word-match)
 tindex(up-case-word-match)
 tindex(down-case-word-match)
 tindex(select-word-style)
+tindex(match-word-context)
 tindex(match-words-by-style)
 xitem(tt(forward-word-match), tt(backward-word-match))
 xitem(tt(kill-word-match), tt(backward-kill-word-match))
 xitem(tt(transpose-words-match), tt(capitalize-word-match))
 xitem(tt(up-case-word-match), tt(down-case-word-match))
-item(tt(select-word-style), tt(match-words-by-style))(
+item(tt(select-word-style), tt(match-word-context), tt(match-words-by-style))(
 The eight `tt(-match)' functions are drop-in replacements for the
 builtin widgets without the suffix.  By default they behave in a similar
 way.  However, by the use of styles and the function tt(select-word-style),
@ -462,7 +463,7 @@ Words are whitespace-delimited strings of characters.
 )
 enditem()

-The first three of those styles usually use tt($WORDCHARS), but the value
+The first three of those rules usually use tt($WORDCHARS), but the value
 in the parameter can be overridden by the style tt(word-chars), which works
 in exactly the same way as tt($WORDCHARS).  In addition, the style
 tt(word-class) uses character class syntax to group characters and takes
@ -473,7 +474,7 @@ alphanumerics plus the characters `tt(-)' and `tt(:)'.  Be careful
 including `tt(])', `tt(^)' and `tt(-)' as these are special inside
 character classes.

-The final style is tt(skip-chars).  This is mostly useful for
+The style tt(skip-chars) is mostly useful for
 tt(transpose-words) and similar functions.  If set, it gives a count of
 characters starting at the cursor position which will not be considered
 part of the word and are treated as space, regardless of what they actually
@ -485,6 +486,16 @@ has been set, and tt(transpose-words-match) is called with the cursor on
 the var(X) of tt(foo)var(X)tt(bar), where var(X) can be any character, then
 the resulting expression is tt(bar)var(X)tt(foo).

+Finer grained control can be obtained by setting the style tt(word-context)
+to an array of pairs of entries.  Each pair of entries consists of a
+var(pattern) and a var(subcontext).  The shell argument the cursor is on is
+matched against each var(pattern) in turn until one matches; if it does,
+the context is extended by a colon and the corresponding var(subcontext).
+Note that the test is made against the original word on the line, with no
+stripping of quotes.  If the cursor is at the end of the line the test is
+performed against an empty string; if it is on whitespace between words the
+test is made against a single space.  Some examples are given below.
+
 Here are some examples of use of the styles, actually taken from the
 simplified interface in tt(select-word-style):

@ -500,6 +511,21 @@ example(style ':zle:*kill*' word-style space)
 Uses space-delimited words for widgets with the word `kill' in the name.
 Neither of the styles tt(word-chars) nor tt(word-class) is used in this case.

+Here are some examples of use of the tt(word-context) style to extend
+the context.
+
+example(zstyle ':zle:*' word-context "*/*" file "[[:space:]]" whitespace
+zstyle ':zle:transpose-words:whitespace' word-style shell
+zstyle ':zle:transpose-words:filename' word-style normal
+zstyle ':zle:transpose-words:filename' word-chars '')
+
+This provides two different ways of using tt(transpose-words) depending on
+whether the cursor is on whitespace between words or on a filename, here
+any word containing a tt(/).  On whitespace, complete arguments as defined
+by standard shell rules will be transposed.  In a filename, only
+alphanumerics will be transposed.  Elsewhere, words will be transposed
+using the default style for tt(:zle:transpose-words).
+
 The word matching and all the handling of tt(zstyle) settings is actually
 implemented by the function tt(match-words-by-style).  This can be used to
 create new user-defined widgets.  The calling function should set the local
@ -526,6 +552,10 @@ endsitem()

 For example, tt(match-words-by-style -w shell -c 0) may be used to
 extract the command argument around the cursor.
+
+The tt(word-context) style is implemented by the function
+tt(match-word-context).  This should not usually need to be called
+directly.
 )
 tindex(delete-whole-word-match)
 item(tt(delete-whole-word-match))(
--- a/Functions/Zle/match-word-context
+++ b/Functions/Zle/match-word-context
@ -0,0 +1,48 @@
+# See if we can extend the word context to something more specific.
+# curcontext must be set to the base context by this point; it
+# will be appended to directly.
+
+emulate -L zsh
+setopt extendedglob
+
+local -a worcon bufwords
+local pat tag lastword word
+integer iword
+
+zstyle -a $curcontext word-context worcon || return 0
+
+if (( ${#worcon} % 2 )); then
+  zle -M "Bad word-context style in context $curcontext"
+  return
+fi
+
+bufwords=(${(z)LBUFFER})
+iword=${#bufwords}
+lastword=${bufwords[-1]}
+bufwords=(${(z)BUFFER})
+
+if [[ $lastword = ${bufwords[iword]} ]]; then
+  # If the word immediately left of the cursor is complete,
+  # we're not on it.  Either we're on unquoted whitespace, or
+  # the start of a new word.  Test the latter.
+  if [[ -z $RBUFFER ]]; then
+    # Nothing there, so not in a word.
+      word=''
+  elif [[ $RBUFFER[1] = [[:space:]] ]]; then
+    # Whitespace, so not in a word.
+    word=' '
+  else
+    # We want the next word along.
+    word=${bufwords[iword+1]}
+  fi
+else
+  # We're on a word.
+  word=${bufwords[iword]}
+fi
+
+for pat tag in "${worcon[@]}"; do
+  if [[ $word = ${~pat} ]]; then
+    curcontext+=":$tag"
+    return
+  fi
+done
--- a/Functions/Zle/match-words-by-style
+++ b/Functions/Zle/match-words-by-style
@ -0,0 +1,203 @@
+# Match words by the style given below.  The matching depends on the
+# cursor position.  The matched_words array is set to the matched portions
+# separately.  These look like:
+#    <stuff-at-start> <word-before-cursor> <whitespace-before-cursor>
+#    <whitespace-after-cursor> <word-after-cursor> <whitespace-after-word>
+#    <stuff-at-end>
+# where the cursor position is always after the third item and `after'
+# is to be interpreted as `after or on'.  Some
+# of the array elements will be empty; this depends on the style.
+# For example
+#    foo bar  rod stick
+#            ^
+# with the cursor where indicated will with typical settings produce the
+# elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'.
+#
+# The style word-style can be set to indicate what a word is.
+# The three possibilities are:
+#
+#  shell	Words are shell words, i.e. elements of a command line.
+#  whitespace	Words are space delimited words; only space or tab characters
+#               are considered to terminated a word.
+#  normal       (the default): the usual zle logic is applied, with all
+#		alphanumeric characters plus any characters in $WORDCHARS
+#		considered parts of a word.  The style word-chars overrides
+#		the parameter.  (Any currently undefined value will be
+#		treated as `normal', but this should not be relied upon.)
+#  specified    Similar to normal, except that only the words given
+#               in the string (and not also alphanumeric characters)
+#               are to be considered parts of words.
+#  unspecified  The negation of `specified': the characters given
+#               are those that aren't to be considered parts of a word.
+#               They should probably include white space.
+#
+# In the case of the `normal' or `(un)specified', more control on the
+# behaviour can be obtained by setting the style `word-chars' for the
+# current context.  The value is used to override $WORDCHARS locally.
+# Hence,
+#   zstyle ':zle:transpose-words*' word-style normal
+#   zstyle ':zle:transpose-words*' word-chars ''
+# will force bash-style word recognition, i.e only alphanumeric characters
+# are considered parts of a word.  It is up to the function which calls
+# match-words-by-style to set the context in the variable curcontext,
+# else a default context will be used (not recommended).
+#
+# You can override the use of word-chars with the style word-class.
+# This specifies the same information, but as a character class.
+# The surrounding square brackets shouldn't be given, but anything
+# which can appear inside is allowed.  For example,
+#   zstyle ':zle:*' word-class '-:[:alnum:]'
+# is valid.  Note the usual care with `]' , `^' and `-' must be taken if
+# they need to appear as individual characters rather than for grouping.
+#
+# The final style is `skip-chars'.  This is an integer; that many
+# characters counting the one under the cursor will be treated as
+# whitespace regardless and added to the front of the fourth element of
+# matched_words.  The default is zero, i.e. the character under the cursor
+# will appear in <whitespace-after-cursor> if it is whitespace, else in
+# <word-after-cursor>.  This style is mostly useful for forcing
+# transposition to ignore the current character.
+#
+# The values of the styles can be overridden by options to the function:
+#  -w <word-style>
+#  -s <skip-chars>
+#  -c <word-class>
+#  -C <word-chars>
+
+emulate -L zsh
+setopt extendedglob
+
+local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass
+local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip
+local nwords MATCH MBEGIN MEND
+
+local curcontext=${curcontext:-:zle:match-words-by-style}
+
+autoload -U match-word-context
+match-word-context
+
+while getopts "w:s:c:C:" opt; do
+  case $opt in
+    (w)
+    wordstyle=$OPTARG
+    ;;
+
+    (s)
+    skip=$OPTARG
+    ;;
+
+    (c)
+    wordclass=$OPTARG
+    ;;
+
+    (C)
+    wordchars=$OPTARG
+    ;;
+
+    (*)
+    return 1
+    ;;
+  esac
+done
+
+[[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle
+[[ -z $skip ]] && zstyle -s $curcontext skip-chars skip
+[[ -z $skip ]] && skip=0
+
+case $wordstyle in
+  (shell) local bufwords
+	  # This splits the line into words as the shell understands them.
+	  bufwords=(${(z)LBUFFER})
+	  nwords=${#bufwords}
+	  wordpat1="${(q)bufwords[-1]}"
+
+	  # Take substring of RBUFFER to skip over $skip characters
+	  # from the cursor position.
+	  bufwords=(${(z)RBUFFER[1+$skip,-1]})
+	  wordpat2="${(q)bufwords[1]}"
+	  spacepat='[[:space:]]#'
+
+	  # Assume the words are at the top level, i.e. if we are inside
+	  # 'something with spaces' then we need to ignore the embedded
+	  # spaces and consider the whole word.
+	  bufwords=(${(z)BUFFER})
+	  if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then
+	    # Yes, we're in the middle of a shell word.
+	    # Find out what's in front.
+	    eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
+	    # Now everything from ${#pat1}+1 is wordy
+	    wordpat1=${LBUFFER[${#pat1}+1,-1]}
+	    wordpat2=${RBUFFER[1,${#bufwords[$nwords]}-${#wordpat1}+1]}
+
+	    wordpat1=${(q)wordpat1}
+	    wordpat2=${(q)wordpat2}
+	  fi
+	  ;;
+  (*space) spacepat='[[:space:]]#'
+           wordpat1='[^[:space:]]##'
+	   wordpat2=$wordpat1
+	   ;;
+  (*) local wc
+      # See if there is a character class.
+      wc=$wordclass
+      if [[ -n $wc ]] || zstyle -s $curcontext word-class wc; then
+	# Treat as a character class: do minimal quoting.
+	wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH}
+      else
+	# See if there is a local version of $WORDCHARS.
+	wc=$wordchars
+	if [[ -z $wc ]]; then
+	  zstyle -s $curcontext word-chars wc ||
+	  wc=$WORDCHARS
+	fi
+	if [[ $wc = (#b)(?*)-(*) ]]; then
+	  # We need to bring any `-' to the front to avoid confusing
+	  # character classes... we get away with `]' since in zsh
+          # this isn't a pattern character if it's quoted.
+	  wc=-$match[1]$match[2]
+	fi
+	wc="${(q)wc}"
+      fi
+      # Quote $wc where necessary, because we don't want those
+      # characters to be considered as pattern characters later on.
+      if [[ $wordstyle = *specified ]]; then
+        if [[ $wordstyle != un* ]]; then
+	  # The given set of characters are the word characters, nothing else
+	  wordpat1="[${wc}]##"
+	  # anything else is a space.
+	  spacepat="[^${wc}]#"
+	else
+	  # The other way round.
+	  wordpat1="[^${wc}]##"
+	  spacepat="[${wc}]#"
+    	fi
+      else
+        # Normal: similar, but add alphanumerics.
+	wordpat1="[${wc}[:alnum:]]##"
+	spacepat="[^${wc}[:alnum:]]#"
+      fi
+      wordpat2=$wordpat1
+      ;;
+esac
+
+# The eval makes any special characters in the parameters active.
+# In particular, we need the surrounding `[' s to be `real'.
+# This is why we quoted the wordpats in the `shell' option, where
+# they have to be treated as literal strings at this point.
+match=()
+eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
+word1=$match[1]
+ws1=$match[2]
+
+match=()
+charskip=
+repeat $skip charskip+=\?
+
+eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\
+${wordpat2}')('${spacepat}')}'
+
+ws2=$match[1]
+word2=$match[2]
+ws3=$match[3]
+
+matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")