1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-11-01 18:30:55 +01:00

Unposted, c.f. 21752: Expand insert-composed-char.

This commit is contained in:
Peter Stephenson 2005-09-22 22:23:44 +00:00
parent fdd0397411
commit f13252c310
5 changed files with 410 additions and 255 deletions

View file

@ -1,3 +1,10 @@
2005-09-22 Peter Stephenson <pws@pwstephenson.fsnet.co.uk>
* unposted, c.f. 21752: Doc/Zsh/contrib.yo,
Functions/Zle/.distfiles, Functions/Zle/defined-composed-chars,
Functions/Zle/insert-composed-char: add some alphabets to
insert-composed-chars.
2005-09-22 Clint Adams <clint@zsh.org>
* Stephen Rueger: 21744: unconditionally assume that

View file

@ -687,9 +687,10 @@ The function may be run outside zle in which case it prints the character
keystrokes.
The set of accented characters is reasonably complete up to Unicode
character U+0180, the set of special characters less so. However, it
mostly gives up at that point. Adding new characters is easy, however.
Please send any additions to tt(zsh-workers@sunsite.dk).
character U+0180, the set of special characters less so. However, it it
is very sporadic from that point. Adding new characters is easy,
however; see the function tt(define-composed-chars). Please send any
additions to tt(zsh-workers@sunsite.dk).
The codes for the second character when used to accent the first are as
follows. Note that not every character can take every accent.
@ -709,14 +710,15 @@ sitem(tt(_))(Underline, however there are currently no underlined characters.)
sitem(tt(/))(Stroke through the base character.)
sitem(tt("))(Double acute (only supported on a few letters).)
sitem(tt(;))(Ogonek. (A little forward facing hook at the bottom right
of the character. The "g" stands for "Ogonek" but another
mnemonic is that g has a squiggle below the line.))
of the character.))
sitem(tt(<))(Caron. (A little v over the letter.))
sitem(tt(0))(Circle over the base character.)
sitem(tt(2))(Hook over the base character.)
sitem(tt(9))(Horn over the base character.)
endsitem()
The most common characters from the Arabic, Cyrillic, Greek and Hebrew
alphabets are available; consult RFC 1345 for the appropriate sequences.
The following other two-character sequences are understood.
startitem()
@ -772,6 +774,14 @@ sitem(tt(PI))(Pilcrow (paragraph))
sitem(tt(-o))(Spanish masculine ordinal indicator)
sitem(tt(>>))(Right guillemet)
sitem(tt(?I))(Inverted question mark)
sitem(tt(-1))(Hyphen)
sitem(tt(-N))(En dash)
sitem(tt(-M))(Em dash)
sitem(tt(-3))(Horizontal bar)
sitem(tt(:3))(Vertical ellipsis)
sitem(tt(.3))(Horizontal midline ellipsis)
sitem(tt(!2))(Double vertical line)
sitem(tt(=2))(Double low line)
sitem(tt(RQUOTE()6))(Left single quote)
sitem(tt(RQUOTE()9))(Right single quote)
sitem(tt(.9))("Right" low quote)
@ -798,6 +808,7 @@ sitem(tt(12))(Half)
sitem(tt(34))(Three quarters)
sitem(tt(*X))(Multiplication)
sitem(tt(-:))(Division)
sitem(tt(%0))(Per mille)
endsitem()
)
item(Accents on their own)(

View file

@ -2,7 +2,8 @@ DISTFILES_SRC='
.distfiles
backward-kill-word-match backward-word-match
capitalize-word-match copy-earlier-word
cycle-completion-positions delete-whole-word-match
cycle-completion-positions
define-composed-chars delete-whole-word-match
down-case-word-match down-line-or-beginning-search
edit-command-line forward-word-match
history-pattern-search history-search-end

View file

@ -0,0 +1,371 @@
# This is not a widget function, it is only a helper for insert-composed-char
# to cut down on resident memory use.
# The associative array zsh_accent_chars is indexed by the
# accent. The values are sets of character / Unicode pairs for
# the character with the given accent. The Unicode value is
# a hex index with no base discriminator; essentially a UCS-4 index
# with the leading zeroes suppressed.
typeset -gA zsh_accented_chars
# Save quite a lot of space by using short names internally.
local -A z
local a
# grave
a=\!
z[$a]="\
A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \
"
# acute
a=\'
z[$a]="\
A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \
L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \
"
# circumflex
a=\>
z[$a]="\
A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \
H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \
"
# tilde
a=\?
z[$a]="\
A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \
"
# macron (d-, D- give eth)
a=-
z[$a]="\
A 100 a 101 d F0 D D0 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \
"
# breve
a=\(
z[$a]="\
A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D "
# dot above, small i with no dot, or l with middle dot
a=.
z[$a]="\
\
C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 L 13F l 140 Z 17B z 17C \
"
# diaeresis / Umlaut
a=:
z[$a]="\
A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \
"
# cedilla
a=,
z[$a]="\
C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \
S 15E s 15F T 162 t 163 \
"
# underline (_) would go here
# stroke through
a=/
z[$a]="\
O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \
"
# double acute
a=\"
z[$a]="\
O 150 o 151 U 170 u 171\
"
# ogonek
a=\;
z[$a]="\
A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \
"
# caron
a=\<
z[$a]="\
C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \
S 160 s 161 T 164 t 165 Z 17D z 17E \
"
# ring above
a=0
z[$a]="\
A C5 a E5 U 16E u 16F \
"
# hook above
a=2
z[$a]="\
A 1EA2 a 1EA3 E 1EBA e 1EBA \
"
# horn, also right quotation marks
a=9
z[$a]="\
O 1A0 o 1A1 U 1Af u 1b0 ' 2019 . 201A \" 201D : 201E \
"
# left quotation marks
a=6
z[$a]="\
' 2018 \" 201C \
"
# reversed quotation marks for convenience
a=\'
z[$a]+=" \
9 201B \
"
a=\"
z[$a]+=" \
9 201F \
"
# ligature with E
a=e
z[$a]="\
A C6 O 152 \
"
# ligature with e
a=e
z[$a]="\
a E6 o 153 \
"
# ligature with J
a=J
z[$a]="\
I 132 \
"
# ligature with j
a=j
z[$a]="\
i 133 \
"
# eszett
a=s
z[$a]="\
s DF \
"
# upper case thorn
a=H
z[$a]="\
T DE \
"
# lower case thorn
a=h
z[$a]="\
t FE \
"
# Arabic characters
a=\+
z[$a]+=" \
, 60C ; 61B ? 61F a 627 b 628 t 62A g 62C x 62E d 62F r 631 z 632 s 633 \
c 635 e 639 i 63A + 640 f 641 q 642 k 643 l 644 m 645 n 646 h 647 w 648 \
j 649 y 64A : 64B \" 64C = 64D / 64E ' 64F 1 650 3 651 0 652 p 67E v 6A4 \
"
a=\'
z[$a]+=" H 621"
z[a]+=" \
0 6F0 1 6F1 2 6F2 3 6F3 4 6F4 5 6F5 6 6F6 7 6F7 8 6F8 9 6F9 \
"
z[d]+=" d 636"
z[f]+=" g 6AF"
z[H]+=" a 623 w 624 y 626 z 638"
z[h]+=" a 625"
z[j]+=" t 637"
z[k]+=" t 62B h 62D d 630"
z[M]+=" a 622"
z[m]+=" t 629"
z[n]+=" s 634"
z[S]+=" a 670"
# Cyrillic characters
a=\=
z[$a]+=" \
A 410 B 411 V 412 G 413 D 414 E 415 Z 417 I 418 J 419 K 41A L 41B \
M 41C N 41D O 41E P 41F R 420 S 421 T 422 U 423 F 424 H 425 C 426 \
Y 42B
a 430 b 431 v 432 g 433 d 434 e 435 z 437 i 438 j 439 k 43A l 43B \
m 43C n 43D o 43E p 43F r 440 s 441 t 442 u 443 f 444 h 445 c 446 \
y 44B
"
z[%]+=" \
D 402 G 403 J 408 V 40E Z 416 C 427 S 428 z 436 c 447 s 448
d 452 g 453 j 458 v 45E \
"
z[A]+=" J 42F"
z[a]+=" j 44F"
z[c]+=" S 429 s 449"
z[E]+=" I 404 J 42D"
z[e]+=" j 44D i 454"
z[I]+=" I 406 Y 407"
z[i]+=" i 456 y 457"
z[J]+=" L 409 N 40A K 40C"
z[j]+=" l 459 n 45A k 45C"
z[O]+=" I 401"
z[o]+=" i 451"
z[S]+=" D 405"
z[s]+=" T 40B d 455 t 45B"
z[U]+=" J 42E"
z[u]+=" j 44E"
z[Z]+=" D 40F"
z[z]+=" d 45F"
a=\"
z[$a]+=" = 42A % 42C"
a=\'
z[$a]+=" = 44A % 44C"
z[3]+=" \
Y 462 y 463 O 46A o 46B F 472 f 473 V 474 v 475 C 480 c 481 \
G 490 g 491 \
"
# Greek characters
a=%
z[$a]+=" \
A 386 E 388 Y 389 I 38A O 38C U 38E W 38F \
a 3Ac e 3Ad y 3Ae i 3AF \
o 3CC u 3CD w 3CE ' 3F4 \
"
a=\*
z[$a]+=" \
A 391 B 392 G 393 D 394 E 395 Z 396 Y 397 H 398 I 399 K 39A L 39B \
M 39C N 39D C 39E O 39F P 3A0 R 3A1 S 3A3 T 3A4 U 3A5 F 3A6 X 3A7 \
Q 3A8 W 3A9 J 3AA V 3Ab \
a 3B1 b 3B2 g 3B3 d 3B4 e 3B5 z 3B6 y 3b7 h 3B8 i 3B9 k 3Ba l 3BB \
m 3BC n 3BD c 3BE o 3BF p 3C0 r 3C1 s 3C3 t 3C4 u 3C5 f 3C6 x 3C7 \
q 3C8 w 3C9 j 3CA v 3CB \
"
a=3
z[$a]+=" \
i 390 u 3B0 T 3DA t 3DB M 3DC m 3DD K 3DE k 3DF P 3E0 p 3E1 j 3F5 \
"
z[s]+=" * 3C2"
z[G]+=" ' 3D8 , 3D9"
# Hebrew characters
a=+
z[$a]+=" \
A 5D0 B 5D1 G 5D2 D 5D3 H 5D4 W 5D5 Z 5D6 X 5D7 J 5D9 K 5DB L 5Dc M 5dE \
N 5E0 S 5E1 E 5E2 P 5E4 Q 5E7 R 5E8 T 5EA \
"
a=j
z[$a]+=" T 5D8 Z 5E5"
a=%
z[$a]+=" K 5DA M 5DD N 5DF P 5E3 "
a=J
z[$a]+=" Z 5e6"
a=h
z[$a]+=" S 5e9"
# Remaining characters are handled as separate pairs.
# We need to remember that the assoc array is keyed by the second character.
# Left square bracket
a=\(
z[$a]+=" < 5B"
# Reverse solidus (backslash to you and me).
a=/
z[$a]+=" / 5C"
# Right square bracket, circumflex
a=\>
z[$a]+=" ) 5D ' 5E"
# Grave a
a=\!
z[$a]+=" ' 60"
# diglyphys for (usually) standard characters {, |, }, ~
a=\!
z[$a]+=" ( 7B"
z[$a]+=" ! 7C"
a=\)
z[$a]+=" ! 7D"
a=\?
z[$a]+=" ' 7E"
# non-breaking space
z[S]+=" N A0"
# inverted exclamation mark
z[I]+=" ! A1"
# cent
z[t]+=" C A2"
# pound sterling
z[d]+=" P A3"
# currency
z[u]+=" C A4"
# yen
z[e]+=" Y A5"
# broken bar
z[B]+=" B A6"
# section
z[E]+=" S A7"
# lonely diaeresis
z[:]+=" ' A8"
# copyright
z[o]+=" C A9"
# spanish feminine ordinal
z[a]+=" - AA"
# left guillemet
a=\<
z[$a]+=" < AB"
z[O]+=" N AC"
# soft hyphen
z[-]+=" - AD"
# registered
z[g]+=" R AE"
# lonely macron
z[m]+=" ' AF"
# degree
z[G]+=" D B0"
# +/-
z[-]+=" + B1"
# superscripts
z[S]+=" 2 B2 3 B3"
# lonely acute
a=\'
z[$a]+=" ' B4"
# micro
z[y]+=" M B5"
# pilcrow (paragraph)
z[I]+=" P B6"
# Middle dot
z[M]+=" . B7"
# Lonely cedilla
z[,]+=" ' B8"
# Superscript one
z[S]+=" 1 B9"
# spanish masculine ordinal
z[o]+=" - BA"
# right guillemet
a=\>
z[$a]+=" > BB"
# fractions
z[4]+=" 1 BC 3 BE"
z[2]+=" 1 BD"
# inverted question mark
z[I]+=" ? BF"
# multiplication
z[X]+=" * D7"
# division
z[:]+=" - F7"
# kra
z[k]+=" k 138"
# apostrophe n
z[n]+=" ' 149"
# Lappish ng
z[G]+=" N 14A"
z[g]+=" n 14B"
# OI
z[I]+=" O 1A2"
z[i]+=" o 1A3"
# yr
z[r]+=" y 1A6"
# ezh
z[D]+=" E 1B7"
# euro (I invented this but it's logical)
z[u]+=" E 20AC"
# hyphen
z[1]+=" - 2010"
# en dash
z[N]+=" - 2013"
# em dash
z[M]+=" - 2014"
# horizontal bar, vertical and horizontal ellipsis
z[3]+=" - 2015 : 22EE . 22EF"
# double vertical line, double low line
z[2]+=" ! 2016 = 2017"
# dagger and double dagger
z[-]+=" / 2020"
z[=]+=" / 2021"
# per mille
z[0]+=" % 2030"
zsh_accented_chars=("${(kv)z[@]}")

View file

@ -86,6 +86,14 @@
# -o Spanish masculine ordinal indicator
# >> Right guillemet
# ?I Inverted question mark
# -1 Hyphen
# -N en dash
# -M em dash
# -3 horizontal bar
# :3 vertical ellipsis
# .3 horizontal midline ellipsis
# !2 double vertical line
# =2 double low line
# '6 Left single quote
# '9 Right single quote
# .9 "Right" low quote
@ -110,6 +118,7 @@
# 34 Three quarters
# *X Multiplication
# -: Division
# %0 Per mille
#
# Accents with no base character
# '> Circumflex (caret)
@ -131,255 +140,11 @@ else
fi
if (( ${+zsh_accented_chars} == 0 )); then
# The associative array zsh_accent_chars is indexed by the
# accent. The values are sets of character / Unicode pairs for
# the character with the given accent. The Unicode value is
# a hex index with no base discriminator; essentially a UCS-4 index
# with the leading zeroes suppressed.
typeset -gA zsh_accented_chars
# grave
accent=\!
zsh_accented_chars[$accent]="\
A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \
"
# acute
accent=\'
zsh_accented_chars[$accent]="\
A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \
L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \
"
# circumflex
accent=\>
zsh_accented_chars[$accent]="\
A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \
H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \
"
# tilde
accent=\?
zsh_accented_chars[$accent]="\
A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \
"
# macron (d-, D- give eth)
accent=-
zsh_accented_chars[$accent]="\
A 100 a 101 d F0 D D0 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \
"
# breve
accent=\(
zsh_accented_chars[$accent]="\
A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \
"
# dot above, small i with no dot, or l with middle dot
accent=.
zsh_accented_chars[$accent]="\
C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 L 13F l 140 Z 17B z 17C \
"
# diaeresis / Umlaut
accent=:
zsh_accented_chars[$accent]="\
A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \
"
# cedilla
accent=,
zsh_accented_chars[$accent]="\
C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \
S 15E s 15F T 162 t 163 \
"
# underline (_) would go here
# stroke through
accent=/
zsh_accented_chars[$accent]="\
O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \
"
# double acute
accent=\"
zsh_accented_chars[$accent]="\
O 150 o 151 U 170 u 171\
"
# ogonek
accent=\;
zsh_accented_chars[$accent]="\
A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \
"
# caron
accent=\<
zsh_accented_chars[$accent]="\
C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \
S 160 s 161 T 164 t 165 Z 17D z 17E \
"
# ring above
accent=0
zsh_accented_chars[$accent]="\
A C5 a E5 U 16E u 16F \
"
# hook above
accent=2
zsh_accented_chars[$accent]="\
A 1EA2 a 1EA3 E 1EBA e 1EBA \
"
# horn, also right quotation marks
accent=9
zsh_accented_chars[$accent]="\
O 1A0 o 1A1 U 1Af u 1b0 ' 2019 . 201A \" 201D : 201E \
"
# left quotation marks
accent=6
zsh_accented_chars[$accent]="\
' 2018 \" 201C \
"
# reversed quotation marks for convenience
accent=\'
zsh_accented_chars[$accent]+=" \
9 201B \
"
accent=\"
zsh_accented_chars[$accent]+=" \
9 201F \
"
# ligature with E
accent=e
zsh_accented_chars[$accent]="\
A C6 O 152 \
"
# ligature with e
accent=e
zsh_accented_chars[$accent]="\
a E6 o 153 \
"
# ligature with J
accent=J
zsh_accented_chars[$accent]="\
I 132 \
"
# ligature with j
accent=j
zsh_accented_chars[$accent]="\
i 133 \
"
# eszett
accent=s
zsh_accented_chars[$accent]="\
s DF \
"
# upper case thorn
accent=H
zsh_accented_chars[$accent]="\
T DE \
"
# lower case thorn
accent=h
zsh_accented_chars[$accent]="\
t FE \
"
# Remaining characters are handled as separate pairs.
# We need to remember that the assoc array is keyed by the second character.
# Left square bracket
accent=\(
zsh_accented_chars[$accent]+=" < 5B"
# Reverse solidus (backslash to you and me).
accent=/
zsh_accented_chars[$accent]+=" / 5C"
# Right square bracket, circumflex
accent=\>
zsh_accented_chars[$accent]+=" ) 5D ' 5E"
# Grave accent
accent=\!
zsh_accented_chars[$accent]+=" ' 60"
# diglyphys for (usually) standard characters {, |, }, ~
accent=\!
zsh_accented_chars[$accent]+=" ( 7B"
zsh_accented_chars[$accent]+=" ! 7C"
accent=\)
zsh_accented_chars[$accent]+=" ! 7D"
accent=\?
zsh_accented_chars[$accent]+=" ' 7E"
# non-breaking space
zsh_accented_chars[S]+=" N A0"
# inverted exclamation mark
zsh_accented_chars[I]+=" ! A1"
# cent
zsh_accented_chars[t]+=" C A2"
# pound sterling
zsh_accented_chars[d]+=" P A3"
# currency
zsh_accented_chars[u]+=" C A4"
# yen
zsh_accented_chars[e]+=" Y A5"
# broken bar
zsh_accented_chars[B]+=" B A6"
# section
zsh_accented_chars[E]+=" S A7"
# lonely diaeresis
zsh_accented_chars[:]+=" ' A8"
# copyright
zsh_accented_chars[o]+=" C A9"
# spanish feminine ordinal
zsh_accented_chars[a]+=" - AA"
# left guillemet
accent=\<
zsh_accented_chars[$accent]+=" < AB"
zsh_accented_chars[O]+=" N AC"
# soft hyphen
zsh_accented_chars[-]+=" - AD"
# registered
zsh_accented_chars[g]+=" R AE"
# lonely macron
zsh_accented_chars[m]+=" ' AF"
# degree
zsh_accented_chars[G]+=" D B0"
# +/-
zsh_accented_chars[-]+=" + B1"
# superscripts
zsh_accented_chars[S]+=" 2 B2 3 B3"
# lonely acute
accent=\'
zsh_accented_chars[$accent]+=" ' B4"
# micro
zsh_accented_chars[y]+=" M B5"
# pilcrow (paragraph)
zsh_accented_chars[I]+=" P B6"
# Middle dot
zsh_accented_chars[M]+=" . B7"
# Lonely cedilla
zsh_accented_chars[,]+=" ' B8"
# Superscript one
zsh_accented_chars[S]+=" 1 B9"
# spanish masculine ordinal
zsh_accented_chars[o]+=" - BA"
# right guillemet
accent=\>
zsh_accented_chars[$accent]+=" > BB"
# fractions
zsh_accented_chars[4]+=" 1 BC 3 BE"
zsh_accented_chars[2]+=" 1 BD"
# inverted question mark
zsh_accented_chars[I]+=" ? BF"
# multiplication
zsh_accented_chars[X]+=" * D7"
# division
zsh_accented_chars[:]+=" - F7"
# kra
zsh_accented_chars[k]+=" k 138"
# apostrophe n
zsh_accented_chars[n]+=" ' 149"
# Lappish ng
zsh_accented_chars[G]+=" N 14A"
zsh_accented_chars[g]+=" n 14B"
# OI
zsh_accented_chars[I]+=" O 1A2"
zsh_accented_chars[i]+=" o 1A3"
# yr
zsh_accented_chars[r]+=" y 1A6"
# ezh
zsh_accented_chars[D]+=" E 1B7"
# euro (I invented this but it's logical)
zsh_accented_chars[u]+=" E 20AC"
# dagger and double dagger
zsh_accented_chars[-]+=" / 2020"
zsh_accented_chars[=]+=" / 2021"
# Save quite a lot of memory by running and then erasing
# the function that defines the characters.
autoload -U define-composed-chars
define-composed-chars
unfunction define-composed-chars
fi
read -k basechar || return 1