mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-01-01 05:16:05 +01:00
26361e438b
Add keyword retrieval of words. Improve test for start of word in subwords for use in delete-whole-word. If line after cursor is empty, white space is treated as ws-after-cursor.
296 lines
10 KiB
Text
296 lines
10 KiB
Text
# Match words by the style given below. The matching depends on the
|
|
# cursor position. The matched_words array is set to the matched portions
|
|
# separately. These look like:
|
|
# <stuff-at-start> <word-before-cursor> <whitespace-before-cursor>
|
|
# <whitespace-after-cursor> <word-after-cursor> <whitespace-after-word>
|
|
# <stuff-at-end>
|
|
# where the cursor position is always after the third item and `after'
|
|
# is to be interpreted as `after or on'.
|
|
#
|
|
# matched_words may be an associative array, in which case the
|
|
# values above are now given by the elements named start, word-before-cursor,
|
|
# ws-before-cursor, ws-after-cursor, word-after-cursor, ws-after-word,
|
|
# end. In addition, the element is-word-start is 1 if the cursor
|
|
# is on the start of a word; this is non-trivial in the case of subword
|
|
# (camel case) matching as there may be no white space to test.
|
|
#
|
|
# Some of the array elements will be empty; this depends on the style.
|
|
# For example
|
|
# foo bar rod stick
|
|
# ^
|
|
# with the cursor where indicated will with typical settings produce the
|
|
# elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'.
|
|
#
|
|
# The style word-style can be set to indicate what a word is.
|
|
# The three possibilities are:
|
|
#
|
|
# shell Words are shell words, i.e. elements of a command line.
|
|
# whitespace Words are space delimited words; only space or tab characters
|
|
# are considered to terminated a word.
|
|
# normal (the default): the usual zle logic is applied, with all
|
|
# alphanumeric characters plus any characters in $WORDCHARS
|
|
# considered parts of a word. The style word-chars overrides
|
|
# the parameter. (Any currently undefined value will be
|
|
# treated as `normal', but this should not be relied upon.)
|
|
# specified Similar to normal, except that only the words given
|
|
# in the string (and not also alphanumeric characters)
|
|
# are to be considered parts of words.
|
|
# unspecified The negation of `specified': the characters given
|
|
# are those that aren't to be considered parts of a word.
|
|
# They should probably include white space.
|
|
#
|
|
# In the case of the `normal' or `(un)specified', more control on the
|
|
# behaviour can be obtained by setting the style `word-chars' for the
|
|
# current context. The value is used to override $WORDCHARS locally.
|
|
# Hence,
|
|
# zstyle ':zle:transpose-words*' word-style normal
|
|
# zstyle ':zle:transpose-words*' word-chars ''
|
|
# will force bash-style word recognition, i.e only alphanumeric characters
|
|
# are considered parts of a word. It is up to the function which calls
|
|
# match-words-by-style to set the context in the variable curcontext,
|
|
# else a default context will be used (not recommended).
|
|
#
|
|
# You can override the use of word-chars with the style word-class.
|
|
# This specifies the same information, but as a character class.
|
|
# The surrounding square brackets shouldn't be given, but anything
|
|
# which can appear inside is allowed. For example,
|
|
# zstyle ':zle:*' word-class '-:[:alnum:]'
|
|
# is valid. Note the usual care with `]' , `^' and `-' must be taken if
|
|
# they need to appear as individual characters rather than for grouping.
|
|
#
|
|
# The final style is `skip-chars'. This is an integer; that many
|
|
# characters counting the one under the cursor will be treated as
|
|
# whitespace regardless and added to the front of the fourth element of
|
|
# matched_words. The default is zero, i.e. the character under the cursor
|
|
# will appear in <whitespace-after-cursor> if it is whitespace, else in
|
|
# <word-after-cursor>. This style is mostly useful for forcing
|
|
# transposition to ignore the current character.
|
|
#
|
|
# The values of the styles can be overridden by options to the function:
|
|
# -w <word-style>
|
|
# -s <skip-chars>
|
|
# -c <word-class>
|
|
# -C <word-chars>
|
|
|
|
emulate -L zsh
|
|
setopt extendedglob
|
|
|
|
local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass
|
|
local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip
|
|
local nwords MATCH MBEGIN MEND subwordrange
|
|
|
|
local curcontext=${curcontext:-:zle:match-words-by-style}
|
|
|
|
autoload -Uz match-word-context
|
|
match-word-context
|
|
|
|
while getopts "w:s:c:C:r:" opt; do
|
|
case $opt in
|
|
(w)
|
|
wordstyle=$OPTARG
|
|
;;
|
|
|
|
(s)
|
|
skip=$OPTARG
|
|
;;
|
|
|
|
(c)
|
|
wordclass=$OPTARG
|
|
;;
|
|
|
|
(C)
|
|
wordchars=$OPTARG
|
|
;;
|
|
|
|
(r)
|
|
subwordrange=$OPTARG
|
|
;;
|
|
|
|
(*)
|
|
return 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
[[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle
|
|
[[ -z $skip ]] && zstyle -s $curcontext skip-chars skip
|
|
[[ -z $skip ]] && skip=0
|
|
|
|
case $wordstyle in
|
|
(*shell*) local bufwords
|
|
# This splits the line into words as the shell understands them.
|
|
bufwords=(${(Z:n:)LBUFFER})
|
|
nwords=${#bufwords}
|
|
wordpat1="${(q)bufwords[-1]}"
|
|
|
|
# Take substring of RBUFFER to skip over $skip characters
|
|
# from the cursor position.
|
|
bufwords=(${(Z:n:)RBUFFER[1+$skip,-1]})
|
|
wordpat2="${(q)bufwords[1]}"
|
|
spacepat='[[:space:]]#'
|
|
|
|
# Assume the words are at the top level, i.e. if we are inside
|
|
# 'something with spaces' then we need to ignore the embedded
|
|
# spaces and consider the whole word.
|
|
bufwords=(${(Z:n:)BUFFER})
|
|
if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then
|
|
# Yes, we're in the middle of a shell word.
|
|
# Find out what's in front.
|
|
eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
|
|
# Now everything from ${#pat1}+1 is wordy
|
|
wordpat1=${LBUFFER[${#pat1}+1,-1]}
|
|
wordpat2=${RBUFFER[1,${#bufwords[$nwords]}-${#wordpat1}+1]}
|
|
|
|
wordpat1=${(q)wordpat1}
|
|
wordpat2=${(q)wordpat2}
|
|
fi
|
|
;;
|
|
(*space*) spacepat='[[:space:]]#'
|
|
wordpat1='[^[:space:]]##'
|
|
wordpat2=$wordpat1
|
|
;;
|
|
(*) local wc
|
|
# See if there is a character class.
|
|
wc=$wordclass
|
|
if [[ -n $wc ]] || zstyle -s $curcontext word-class wc; then
|
|
# Treat as a character class: do minimal quoting.
|
|
wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH}
|
|
else
|
|
# See if there is a local version of $WORDCHARS.
|
|
wc=$wordchars
|
|
if [[ -z $wc ]]; then
|
|
zstyle -s $curcontext word-chars wc ||
|
|
wc=$WORDCHARS
|
|
fi
|
|
if [[ $wc = (#b)(?*)-(*) ]]; then
|
|
# We need to bring any `-' to the front to avoid confusing
|
|
# character classes... we get away with `]' since in zsh
|
|
# this isn't a pattern character if it's quoted.
|
|
wc=-$match[1]$match[2]
|
|
fi
|
|
wc="${(q)wc}"
|
|
fi
|
|
# Quote $wc where necessary, because we don't want those
|
|
# characters to be considered as pattern characters later on.
|
|
if [[ $wordstyle = *specified* ]]; then
|
|
if [[ $wordstyle != *unspecified* ]]; then
|
|
# The given set of characters are the word characters, nothing else
|
|
wordpat1="[${wc}]##"
|
|
# anything else is a space.
|
|
spacepat="[^${wc}]#"
|
|
else
|
|
# The other way round.
|
|
wordpat1="[^${wc}]##"
|
|
spacepat="[${wc}]#"
|
|
fi
|
|
else
|
|
# Normal: similar, but add alphanumerics.
|
|
wordpat1="[${wc}[:alnum:]]##"
|
|
spacepat="[^${wc}[:alnum:]]#"
|
|
fi
|
|
wordpat2=$wordpat1
|
|
;;
|
|
esac
|
|
|
|
# The eval makes any special characters in the parameters active.
|
|
# In particular, we need the surrounding `[' s to be `real'.
|
|
# This is why we quoted the wordpats in the `shell' option, where
|
|
# they have to be treated as literal strings at this point.
|
|
match=()
|
|
eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
|
|
word1=$match[1]
|
|
ws1=$match[2]
|
|
|
|
if [[ $wordstyle = *subword* ]]; then
|
|
if [[ -z $subwordrange ]] &&
|
|
! zstyle -s $curcontext subword-range subwordrange; then
|
|
subwordrange='[:upper:]'
|
|
fi
|
|
# The rule here is that a word boundary may be an upper case letter
|
|
# followed by a lower case letter, or an upper case letter at
|
|
# the start of a group of upper case letters. To make
|
|
# it easier to be consistent, we just use anything that
|
|
# isn't an upper case character instead of a lower case
|
|
# character.
|
|
# Here the initial "*" will match greedily, so we get the
|
|
# last such match, as we want.
|
|
integer epos
|
|
if [[ $word1 = (#b)(*)([${~subwordrange}][^${~subwordrange}]*) ]]; then
|
|
(( epos = ${#match[1]} ))
|
|
fi
|
|
if [[ $word1 = (#b)(*[^${~subwordrange}])([${~subwordrange}]*) ]]; then
|
|
(( ${#match[1]} > epos )) && (( epos = ${#match[1]} ))
|
|
fi
|
|
if (( epos > 0 )); then
|
|
pat1+=$word1[1,epos]
|
|
word1=$word1[epos+1,-1]
|
|
fi
|
|
fi
|
|
|
|
match=()
|
|
charskip=${(l:skip::?:)}
|
|
|
|
eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\
|
|
${wordpat2}')('${spacepat}')}'
|
|
if [[ -n $match[2] ]]; then
|
|
ws2=$match[1]
|
|
word2=$match[2]
|
|
ws3=$match[3]
|
|
else
|
|
# No more words, so anything left is white space after cursor.
|
|
ws2=$RBUFFER
|
|
pat2=
|
|
fi
|
|
|
|
integer wordstart
|
|
[[ ( -n $ws1 || -n $ws2 ) && -n $word2 ]] && wordstart=1
|
|
if [[ $wordstyle = *subword* ]]; then
|
|
# Do we have a group of upper case characters at the start
|
|
# of word2 (that don't form the entire word)?
|
|
# Again, rely on greedy matching of first pattern.
|
|
if [[ $word2 = (#b)([${~subwordrange}][${~subwordrange}]##)(*) &&
|
|
-n $match[2] ]]; then
|
|
# Yes, so the last one is new word boundary.
|
|
(( epos = ${#match[1]} - 1 ))
|
|
# Otherwise, are we in the middle of a word?
|
|
# In other, er, words, we've got something on the left with no
|
|
# white space following and something that doesn't start a word here.
|
|
elif [[ -n $word1 && -z $ws1 && -z $ws2 && \
|
|
$word2 = (#b)([^${~subwordrange}]##)* ]]; then
|
|
(( epos = ${#match[1]} ))
|
|
# Otherwise, do we have upper followed by non-upper not
|
|
# at the start? Ignore the initial character, we already
|
|
# know it's a word boundary so it can be an upper case character
|
|
# if it wants.
|
|
elif [[ $word2 = (#b)(?[^${~subwordrange}]##)[${~subwordrange}]* ]]; then
|
|
(( epos = ${#match[1]} ))
|
|
(( wordstart = 1 ))
|
|
else
|
|
(( epos = 0 ))
|
|
fi
|
|
if (( epos )); then
|
|
# Careful: if we matched a subword there's no whitespace immediately
|
|
# after the matched word, so ws3 should be empty and any existing
|
|
# value tacked onto pat2.
|
|
pat2="${word2[epos+1,-1]}$ws3$pat2"
|
|
ws3=
|
|
word2=$word2[1,epos]
|
|
fi
|
|
fi
|
|
|
|
# matched_words should be local to caller.
|
|
# Just fix type here.
|
|
if [[ ${(t)matched_words} = *association* ]]; then
|
|
matched_words=(
|
|
start "$pat1"
|
|
word-before-cursor "$word1"
|
|
ws-before-cursor "$ws1"
|
|
ws-after-cursor "$ws2"
|
|
word-after-cursor "$word2"
|
|
ws-after-word "$ws3"
|
|
end "$pat2"
|
|
is-word-start $wordstart
|
|
)
|
|
else
|
|
matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")
|
|
fi
|