mirror of
				git://git.code.sf.net/p/zsh/code
				synced 2025-10-31 06:00:54 +01:00 
			
		
		
		
	Add keyword retrieval of words. Improve test for start of word in subwords for use in delete-whole-word. If line after cursor is empty, white space is treated as ws-after-cursor.
		
			
				
	
	
		
			296 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			296 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| # Match words by the style given below.  The matching depends on the
 | |
| # cursor position.  The matched_words array is set to the matched portions
 | |
| # separately.  These look like:
 | |
| #    <stuff-at-start> <word-before-cursor> <whitespace-before-cursor>
 | |
| #    <whitespace-after-cursor> <word-after-cursor> <whitespace-after-word>
 | |
| #    <stuff-at-end>
 | |
| # where the cursor position is always after the third item and `after'
 | |
| # is to be interpreted as `after or on'.
 | |
| #
 | |
| # matched_words may be an associative array, in which case the
 | |
| # values above are now given by the elements named start, word-before-cursor,
 | |
| # ws-before-cursor, ws-after-cursor, word-after-cursor, ws-after-word,
 | |
| # end.  In addition, the element is-word-start is 1 if the cursor
 | |
| # is on the start of a word; this is non-trivial in the case of subword
 | |
| # (camel case) matching as there may be no white space to test.
 | |
| #
 | |
| # Some of the array elements will be empty; this depends on the style.
 | |
| # For example
 | |
| #    foo bar  rod stick
 | |
| #            ^
 | |
| # with the cursor where indicated will with typical settings produce the
 | |
| # elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'.
 | |
| #
 | |
| # The style word-style can be set to indicate what a word is.
 | |
| # The three possibilities are:
 | |
| #
 | |
| #  shell	Words are shell words, i.e. elements of a command line.
 | |
| #  whitespace	Words are space delimited words; only space or tab characters
 | |
| #               are considered to terminated a word.
 | |
| #  normal       (the default): the usual zle logic is applied, with all
 | |
| #		alphanumeric characters plus any characters in $WORDCHARS
 | |
| #		considered parts of a word.  The style word-chars overrides
 | |
| #		the parameter.  (Any currently undefined value will be
 | |
| #		treated as `normal', but this should not be relied upon.)
 | |
| #  specified    Similar to normal, except that only the words given
 | |
| #               in the string (and not also alphanumeric characters)
 | |
| #               are to be considered parts of words.
 | |
| #  unspecified  The negation of `specified': the characters given
 | |
| #               are those that aren't to be considered parts of a word.
 | |
| #               They should probably include white space.
 | |
| #
 | |
| # In the case of the `normal' or `(un)specified', more control on the
 | |
| # behaviour can be obtained by setting the style `word-chars' for the
 | |
| # current context.  The value is used to override $WORDCHARS locally.
 | |
| # Hence,
 | |
| #   zstyle ':zle:transpose-words*' word-style normal
 | |
| #   zstyle ':zle:transpose-words*' word-chars ''
 | |
| # will force bash-style word recognition, i.e only alphanumeric characters
 | |
| # are considered parts of a word.  It is up to the function which calls
 | |
| # match-words-by-style to set the context in the variable curcontext,
 | |
| # else a default context will be used (not recommended).
 | |
| #
 | |
| # You can override the use of word-chars with the style word-class.
 | |
| # This specifies the same information, but as a character class.
 | |
| # The surrounding square brackets shouldn't be given, but anything
 | |
| # which can appear inside is allowed.  For example,
 | |
| #   zstyle ':zle:*' word-class '-:[:alnum:]'
 | |
| # is valid.  Note the usual care with `]' , `^' and `-' must be taken if
 | |
| # they need to appear as individual characters rather than for grouping.
 | |
| #
 | |
| # The final style is `skip-chars'.  This is an integer; that many
 | |
| # characters counting the one under the cursor will be treated as
 | |
| # whitespace regardless and added to the front of the fourth element of
 | |
| # matched_words.  The default is zero, i.e. the character under the cursor
 | |
| # will appear in <whitespace-after-cursor> if it is whitespace, else in
 | |
| # <word-after-cursor>.  This style is mostly useful for forcing
 | |
| # transposition to ignore the current character.
 | |
| #
 | |
| # The values of the styles can be overridden by options to the function:
 | |
| #  -w <word-style>
 | |
| #  -s <skip-chars>
 | |
| #  -c <word-class>
 | |
| #  -C <word-chars>
 | |
| 
 | |
| emulate -L zsh
 | |
| setopt extendedglob
 | |
| 
 | |
| local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass
 | |
| local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip
 | |
| local nwords MATCH MBEGIN MEND subwordrange
 | |
| 
 | |
| local curcontext=${curcontext:-:zle:match-words-by-style}
 | |
| 
 | |
| autoload -Uz match-word-context
 | |
| match-word-context
 | |
| 
 | |
| while getopts "w:s:c:C:r:" opt; do
 | |
|   case $opt in
 | |
|     (w)
 | |
|     wordstyle=$OPTARG
 | |
|     ;;
 | |
| 
 | |
|     (s)
 | |
|     skip=$OPTARG
 | |
|     ;;
 | |
| 
 | |
|     (c)
 | |
|     wordclass=$OPTARG
 | |
|     ;;
 | |
| 
 | |
|     (C)
 | |
|     wordchars=$OPTARG
 | |
|     ;;
 | |
| 
 | |
|     (r)
 | |
|     subwordrange=$OPTARG
 | |
|     ;;
 | |
| 
 | |
|     (*)
 | |
|     return 1
 | |
|     ;;
 | |
|   esac
 | |
| done
 | |
| 
 | |
| [[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle
 | |
| [[ -z $skip ]] && zstyle -s $curcontext skip-chars skip
 | |
| [[ -z $skip ]] && skip=0
 | |
| 
 | |
| case $wordstyle in
 | |
|   (*shell*) local bufwords
 | |
| 	  # This splits the line into words as the shell understands them.
 | |
| 	  bufwords=(${(Z:n:)LBUFFER})
 | |
| 	  nwords=${#bufwords}
 | |
| 	  wordpat1="${(q)bufwords[-1]}"
 | |
| 
 | |
| 	  # Take substring of RBUFFER to skip over $skip characters
 | |
| 	  # from the cursor position.
 | |
| 	  bufwords=(${(Z:n:)RBUFFER[1+$skip,-1]})
 | |
| 	  wordpat2="${(q)bufwords[1]}"
 | |
| 	  spacepat='[[:space:]]#'
 | |
| 
 | |
| 	  # Assume the words are at the top level, i.e. if we are inside
 | |
| 	  # 'something with spaces' then we need to ignore the embedded
 | |
| 	  # spaces and consider the whole word.
 | |
| 	  bufwords=(${(Z:n:)BUFFER})
 | |
| 	  if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then
 | |
| 	    # Yes, we're in the middle of a shell word.
 | |
| 	    # Find out what's in front.
 | |
| 	    eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
 | |
| 	    # Now everything from ${#pat1}+1 is wordy
 | |
| 	    wordpat1=${LBUFFER[${#pat1}+1,-1]}
 | |
| 	    wordpat2=${RBUFFER[1,${#bufwords[$nwords]}-${#wordpat1}+1]}
 | |
| 
 | |
| 	    wordpat1=${(q)wordpat1}
 | |
| 	    wordpat2=${(q)wordpat2}
 | |
| 	  fi
 | |
| 	  ;;
 | |
|   (*space*) spacepat='[[:space:]]#'
 | |
|            wordpat1='[^[:space:]]##'
 | |
| 	   wordpat2=$wordpat1
 | |
| 	   ;;
 | |
|   (*) local wc
 | |
|       # See if there is a character class.
 | |
|       wc=$wordclass
 | |
|       if [[ -n $wc ]] || zstyle -s $curcontext word-class wc; then
 | |
| 	# Treat as a character class: do minimal quoting.
 | |
| 	wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH}
 | |
|       else
 | |
| 	# See if there is a local version of $WORDCHARS.
 | |
| 	wc=$wordchars
 | |
| 	if [[ -z $wc ]]; then
 | |
| 	  zstyle -s $curcontext word-chars wc ||
 | |
| 	  wc=$WORDCHARS
 | |
| 	fi
 | |
| 	if [[ $wc = (#b)(?*)-(*) ]]; then
 | |
| 	  # We need to bring any `-' to the front to avoid confusing
 | |
| 	  # character classes... we get away with `]' since in zsh
 | |
|           # this isn't a pattern character if it's quoted.
 | |
| 	  wc=-$match[1]$match[2]
 | |
| 	fi
 | |
| 	wc="${(q)wc}"
 | |
|       fi
 | |
|       # Quote $wc where necessary, because we don't want those
 | |
|       # characters to be considered as pattern characters later on.
 | |
|       if [[ $wordstyle = *specified* ]]; then
 | |
|         if [[ $wordstyle != *unspecified* ]]; then
 | |
| 	  # The given set of characters are the word characters, nothing else
 | |
| 	  wordpat1="[${wc}]##"
 | |
| 	  # anything else is a space.
 | |
| 	  spacepat="[^${wc}]#"
 | |
| 	else
 | |
| 	  # The other way round.
 | |
| 	  wordpat1="[^${wc}]##"
 | |
| 	  spacepat="[${wc}]#"
 | |
|     	fi
 | |
|       else
 | |
|         # Normal: similar, but add alphanumerics.
 | |
| 	wordpat1="[${wc}[:alnum:]]##"
 | |
| 	spacepat="[^${wc}[:alnum:]]#"
 | |
|       fi
 | |
|       wordpat2=$wordpat1
 | |
|       ;;
 | |
| esac
 | |
| 
 | |
| # The eval makes any special characters in the parameters active.
 | |
| # In particular, we need the surrounding `[' s to be `real'.
 | |
| # This is why we quoted the wordpats in the `shell' option, where
 | |
| # they have to be treated as literal strings at this point.
 | |
| match=()
 | |
| eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
 | |
| word1=$match[1]
 | |
| ws1=$match[2]
 | |
| 
 | |
| if [[ $wordstyle = *subword* ]]; then
 | |
|   if [[ -z $subwordrange ]] &&
 | |
|     ! zstyle -s $curcontext subword-range subwordrange; then
 | |
|     subwordrange='[:upper:]'
 | |
|   fi
 | |
|   # The rule here is that a word boundary may be an upper case letter
 | |
|   # followed by a lower case letter, or an upper case letter at
 | |
|   # the start of a group of upper case letters.  To make
 | |
|   # it easier to be consistent, we just use anything that
 | |
|   # isn't an upper case character instead of a lower case
 | |
|   # character.
 | |
|   # Here the initial "*" will match greedily, so we get the
 | |
|   # last such match, as we want.
 | |
|   integer epos
 | |
|   if [[ $word1 = (#b)(*)([${~subwordrange}][^${~subwordrange}]*) ]]; then
 | |
|     (( epos = ${#match[1]} ))
 | |
|   fi
 | |
|   if [[ $word1 = (#b)(*[^${~subwordrange}])([${~subwordrange}]*) ]]; then
 | |
|     (( ${#match[1]} > epos ))  &&  (( epos = ${#match[1]} ))
 | |
|   fi
 | |
|   if (( epos > 0 )); then
 | |
|     pat1+=$word1[1,epos]
 | |
|     word1=$word1[epos+1,-1]
 | |
|   fi
 | |
| fi
 | |
| 
 | |
| match=()
 | |
| charskip=${(l:skip::?:)}
 | |
| 
 | |
| eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\
 | |
| ${wordpat2}')('${spacepat}')}'
 | |
| if [[ -n $match[2] ]]; then
 | |
|   ws2=$match[1]
 | |
|   word2=$match[2]
 | |
|   ws3=$match[3]
 | |
| else
 | |
|   # No more words, so anything left is white space after cursor.
 | |
|   ws2=$RBUFFER
 | |
|   pat2=
 | |
| fi
 | |
| 
 | |
| integer wordstart
 | |
| [[ ( -n $ws1 || -n $ws2 ) && -n $word2 ]] && wordstart=1
 | |
| if [[ $wordstyle = *subword* ]]; then
 | |
|   # Do we have a group of upper case characters at the start
 | |
|   # of word2 (that don't form the entire word)?
 | |
|   # Again, rely on greedy matching of first pattern.
 | |
|   if [[ $word2 = (#b)([${~subwordrange}][${~subwordrange}]##)(*) &&
 | |
| 	  -n $match[2] ]]; then
 | |
|     # Yes, so the last one is new word boundary.
 | |
|     (( epos = ${#match[1]} - 1 ))
 | |
|     # Otherwise, are we in the middle of a word?
 | |
|     # In other, er, words, we've got something on the left with no
 | |
|     # white space following and something that doesn't start a word here.
 | |
|   elif [[ -n $word1 && -z $ws1 && -z $ws2 && \
 | |
|     $word2 = (#b)([^${~subwordrange}]##)* ]]; then
 | |
|     (( epos = ${#match[1]} ))
 | |
|     # Otherwise, do we have upper followed by non-upper not
 | |
|     # at the start?  Ignore the initial character, we already
 | |
|     # know it's a word boundary so it can be an upper case character
 | |
|     # if it wants.
 | |
|   elif [[ $word2 = (#b)(?[^${~subwordrange}]##)[${~subwordrange}]* ]]; then
 | |
|     (( epos = ${#match[1]} ))
 | |
|     (( wordstart = 1 ))
 | |
|   else
 | |
|     (( epos = 0 ))
 | |
|   fi
 | |
|   if (( epos )); then
 | |
|     # Careful: if we matched a subword there's no whitespace immediately
 | |
|     # after the matched word, so ws3 should be empty and any existing
 | |
|     # value tacked onto pat2.
 | |
|     pat2="${word2[epos+1,-1]}$ws3$pat2"
 | |
|     ws3=
 | |
|     word2=$word2[1,epos]
 | |
|   fi
 | |
| fi
 | |
| 
 | |
| # matched_words should be local to caller.
 | |
| # Just fix type here.
 | |
| if [[ ${(t)matched_words} = *association* ]]; then
 | |
|   matched_words=(
 | |
|     start              "$pat1"
 | |
|     word-before-cursor "$word1"
 | |
|     ws-before-cursor   "$ws1"
 | |
|     ws-after-cursor    "$ws2"
 | |
|     word-after-cursor  "$word2"
 | |
|     ws-after-word      "$ws3"
 | |
|     end                "$pat2"
 | |
|     is-word-start      $wordstart
 | |
|   )
 | |
| else
 | |
|   matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")
 | |
| fi
 |