mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-11-27 15:01:00 +01:00
407 lines
10 KiB
Text
407 lines
10 KiB
Text
# Accented characters. Inputs two keys. There are two types: those
|
|
# with a base character followed by an accent (see below for codes for
|
|
# accents), and those with a two-character mnemonic for the composed
|
|
# character. These are (with the exception of the Euro) the codes
|
|
# given by RFC 1345. Note that some codes in RFC 1345 require three
|
|
# characters to be input; none of these are handled.
|
|
#
|
|
# For best results zsh should have been built with support for
|
|
# multibyte characters (--enable-multibyte), but single character sets
|
|
# also work.
|
|
#
|
|
# Outputs the character converted from Unicode into the local representation.
|
|
# (The conversion is done within the shell, using whatever facilities
|
|
# the C library provides.)
|
|
#
|
|
# When used as a zle widget, the character is inserted at the cursor
|
|
# position. With a numeric argument, preview in status line; outside zle,
|
|
# print character (and newline) to standard output.
|
|
#
|
|
# The set of accented characters is reasonably complete up to U+0180, the
|
|
# set of special characters less so. However, it mostly gives up at that
|
|
# point. Adding new Unicode characters is easy, however. Please send any
|
|
# additions to zsh-workers@sunsite.dk .
|
|
#
|
|
# Some of the accent codes are a little more obscure than others.
|
|
# ! Grave
|
|
# ' Acute
|
|
# > Circumflex
|
|
# ? Tilde
|
|
# - Macron. (A horizonal bar over the letter.)
|
|
# ( Breve. (A shallow dish shape over the letter.)
|
|
# . Dot above, or no dot with lower case i, or dot in the middle of L or l.
|
|
# : Diaeresis (Umlaut)
|
|
# , Cedilla
|
|
# _ Underline (none of these currently)
|
|
# / Stroke through character
|
|
# " Double acute
|
|
# ; Ogonek. (A little forward facing hook at the bottom right
|
|
# of the character.)
|
|
# < Caron. (A little v over the letter.)
|
|
# 0 Circle
|
|
# 2 Hook
|
|
# 9 Horn
|
|
# Hence A! is upper case A with a grave, c, is lower case c with cedilla.
|
|
#
|
|
# Some other composed charaters:
|
|
# Various ligatures:
|
|
# AE ae OE oe IJ ij
|
|
#
|
|
# ASCII characters not on all keyboards:
|
|
# <( [
|
|
# // \
|
|
# )> ]
|
|
# (! {
|
|
# !! |
|
|
# !) }
|
|
# '? ~
|
|
#
|
|
# Special letters:
|
|
# ss Eszett (schafes S)
|
|
# D- d- TH th Eth and thorn
|
|
# kk kra
|
|
# 'n 'n
|
|
# NG ng ng
|
|
# OI oi OI
|
|
# yr yr
|
|
# ED ezh
|
|
#
|
|
# Currency symbols:
|
|
# Ct Cent
|
|
# Pd Pound sterling
|
|
# Cu Currency
|
|
# Ye Yen
|
|
# Eu Euro (not in RFC 1345 but logical)
|
|
#
|
|
# Punctuation
|
|
# !I Inverted !
|
|
# BB Broken vertical bar
|
|
# SE Section
|
|
# Co Copyright
|
|
# -a Spanish feminine ordinal indicator
|
|
# << Left guillemet
|
|
# -- Soft hyphen
|
|
# Rg Registered trade mark
|
|
# PI Pilcrow (paragraph)
|
|
# -o Spanish masculine ordinal indicator
|
|
# >> Right guillemet
|
|
# ?I Inverted question mark
|
|
# '6 Left single quote
|
|
# '9 Right single quote
|
|
# .9 "Right" low quote
|
|
# 9' Reversed "right" quote
|
|
# "6 Left double quote
|
|
# "9 Right double quote
|
|
# :9 "Right" low double quote
|
|
# 9" Reversed "right" double quote
|
|
# /- Dagger
|
|
# /= Double dagger
|
|
#
|
|
# Mathematical
|
|
# DG Degree
|
|
# +- +/-
|
|
# 2S Superscript 2
|
|
# 3S Superscript 3
|
|
# My Micro
|
|
# .M Middle dot
|
|
# 1S Superscript 1
|
|
# 14 Quarter
|
|
# 12 Half
|
|
# 34 Three quarters
|
|
# *X Multiplication
|
|
# -: Division
|
|
#
|
|
# Accents with no base character
|
|
# '> Circumflex (caret)
|
|
# '! Grave (backtick)
|
|
# ', Cedilla
|
|
# ': Diaeresis (Umlaut)
|
|
# 'm Macron
|
|
# '' Acute
|
|
|
|
emulate -LR zsh
|
|
setopt cbases extendedglob printeightbit
|
|
|
|
local accent basechar ochar error
|
|
|
|
if [[ -n $WIDGET ]]; then
|
|
error=(zle -M)
|
|
else
|
|
error=print
|
|
fi
|
|
|
|
if (( ${+zsh_accented_chars} == 0 )); then
|
|
# The associative array zsh_accent_chars is indexed by the
|
|
# accent. The values are sets of character / Unicode pairs for
|
|
# the character with the given accent. The Unicode value is
|
|
# a hex index with no base discriminator; essentially a UCS-4 index
|
|
# with the leading zeroes suppressed.
|
|
typeset -gA zsh_accented_chars
|
|
|
|
# grave
|
|
accent=\!
|
|
zsh_accented_chars[$accent]="\
|
|
A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \
|
|
"
|
|
# acute
|
|
accent=\'
|
|
zsh_accented_chars[$accent]="\
|
|
A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \
|
|
L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \
|
|
"
|
|
# circumflex
|
|
accent=\>
|
|
zsh_accented_chars[$accent]="\
|
|
A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \
|
|
H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \
|
|
"
|
|
# tilde
|
|
accent=\?
|
|
zsh_accented_chars[$accent]="\
|
|
A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \
|
|
"
|
|
# macron (d-, D- give eth)
|
|
accent=-
|
|
zsh_accented_chars[$accent]="\
|
|
A 100 a 101 d F0 D D0 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \
|
|
"
|
|
# breve
|
|
accent=\(
|
|
zsh_accented_chars[$accent]="\
|
|
A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \
|
|
"
|
|
# dot above, small i with no dot, or l with middle dot
|
|
accent=.
|
|
zsh_accented_chars[$accent]="\
|
|
C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 L 13F l 140 Z 17B z 17C \
|
|
"
|
|
# diaeresis / Umlaut
|
|
accent=:
|
|
zsh_accented_chars[$accent]="\
|
|
A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \
|
|
"
|
|
# cedilla
|
|
accent=,
|
|
zsh_accented_chars[$accent]="\
|
|
C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \
|
|
S 15E s 15F T 162 t 163 \
|
|
"
|
|
# underline (_) would go here
|
|
# stroke through
|
|
accent=/
|
|
zsh_accented_chars[$accent]="\
|
|
O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \
|
|
"
|
|
# double acute
|
|
accent=\"
|
|
zsh_accented_chars[$accent]="\
|
|
O 150 o 151 U 170 u 171\
|
|
"
|
|
# ogonek
|
|
accent=\;
|
|
zsh_accented_chars[$accent]="\
|
|
A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \
|
|
"
|
|
# caron
|
|
accent=\<
|
|
zsh_accented_chars[$accent]="\
|
|
C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \
|
|
S 160 s 161 T 164 t 165 Z 17D z 17E \
|
|
"
|
|
# ring above
|
|
accent=0
|
|
zsh_accented_chars[$accent]="\
|
|
A C5 a E5 U 16E u 16F \
|
|
"
|
|
# hook above
|
|
accent=2
|
|
zsh_accented_chars[$accent]="\
|
|
A 1EA2 a 1EA3 E 1EBA e 1EBA \
|
|
"
|
|
# horn, also right quotation marks
|
|
accent=9
|
|
zsh_accented_chars[$accent]="\
|
|
O 1A0 o 1A1 U 1Af u 1b0 ' 2019 . 201A \" 201D : 201E \
|
|
"
|
|
# left quotation marks
|
|
accent=6
|
|
zsh_accented_chars[$accent]="\
|
|
' 2018 \" 201C \
|
|
"
|
|
# reversed quotation marks for convenience
|
|
accent=\'
|
|
zsh_accented_chars[$accent]+=" \
|
|
9 201B \
|
|
"
|
|
accent=\"
|
|
zsh_accented_chars[$accent]+=" \
|
|
9 201F \
|
|
"
|
|
|
|
# ligature with E
|
|
accent=e
|
|
zsh_accented_chars[$accent]="\
|
|
A C6 O 152 \
|
|
"
|
|
# ligature with e
|
|
accent=e
|
|
zsh_accented_chars[$accent]="\
|
|
a E6 o 153 \
|
|
"
|
|
# ligature with J
|
|
accent=J
|
|
zsh_accented_chars[$accent]="\
|
|
I 132 \
|
|
"
|
|
# ligature with j
|
|
accent=j
|
|
zsh_accented_chars[$accent]="\
|
|
i 133 \
|
|
"
|
|
# eszett
|
|
accent=s
|
|
zsh_accented_chars[$accent]="\
|
|
s DF \
|
|
"
|
|
# upper case thorn
|
|
accent=H
|
|
zsh_accented_chars[$accent]="\
|
|
T DE \
|
|
"
|
|
# lower case thorn
|
|
accent=h
|
|
zsh_accented_chars[$accent]="\
|
|
t FE \
|
|
"
|
|
|
|
# Remaining characters are handled as separate pairs.
|
|
# We need to remember that the assoc array is keyed by the second character.
|
|
# Left square bracket
|
|
accent=\(
|
|
zsh_accented_chars[$accent]+=" < 5B"
|
|
# Reverse solidus (backslash to you and me).
|
|
accent=/
|
|
zsh_accented_chars[$accent]+=" / 5C"
|
|
# Right square bracket, circumflex
|
|
accent=\>
|
|
zsh_accented_chars[$accent]+=" ) 5D ' 5E"
|
|
# Grave accent
|
|
accent=\!
|
|
zsh_accented_chars[$accent]+=" ' 60"
|
|
# diglyphys for (usually) standard characters {, |, }, ~
|
|
accent=\!
|
|
zsh_accented_chars[$accent]+=" ( 7B"
|
|
zsh_accented_chars[$accent]+=" ! 7C"
|
|
accent=\)
|
|
zsh_accented_chars[$accent]+=" ! 7D"
|
|
accent=\?
|
|
zsh_accented_chars[$accent]+=" ' 7E"
|
|
# non-breaking space
|
|
zsh_accented_chars[S]+=" N A0"
|
|
# inverted exclamation mark
|
|
zsh_accented_chars[I]+=" ! A1"
|
|
# cent
|
|
zsh_accented_chars[t]+=" C A2"
|
|
# pound sterling
|
|
zsh_accented_chars[d]+=" P A3"
|
|
# currency
|
|
zsh_accented_chars[u]+=" C A4"
|
|
# yen
|
|
zsh_accented_chars[e]+=" Y A5"
|
|
# broken bar
|
|
zsh_accented_chars[B]+=" B A6"
|
|
# section
|
|
zsh_accented_chars[E]+=" S A7"
|
|
# lonely diaeresis
|
|
zsh_accented_chars[:]+=" ' A8"
|
|
# copyright
|
|
zsh_accented_chars[o]+=" C A9"
|
|
# spanish feminine ordinal
|
|
zsh_accented_chars[a]+=" - AA"
|
|
# left guillemet
|
|
accent=\<
|
|
zsh_accented_chars[$accent]+=" < AB"
|
|
zsh_accented_chars[O]+=" N AC"
|
|
# soft hyphen
|
|
zsh_accented_chars[-]+=" - AD"
|
|
# registered
|
|
zsh_accented_chars[g]+=" R AE"
|
|
# lonely macron
|
|
zsh_accented_chars[m]+=" ' AF"
|
|
# degree
|
|
zsh_accented_chars[G]+=" D B0"
|
|
# +/-
|
|
zsh_accented_chars[-]+=" + B1"
|
|
# superscripts
|
|
zsh_accented_chars[S]+=" 2 B2 3 B3"
|
|
# lonely acute
|
|
accent=\'
|
|
zsh_accented_chars[$accent]+=" ' B4"
|
|
# micro
|
|
zsh_accented_chars[y]+=" M B5"
|
|
# pilcrow (paragraph)
|
|
zsh_accented_chars[I]+=" P B6"
|
|
# Middle dot
|
|
zsh_accented_chars[M]+=" . B7"
|
|
# Lonely cedilla
|
|
zsh_accented_chars[,]+=" ' B8"
|
|
# Superscript one
|
|
zsh_accented_chars[S]+=" 1 B9"
|
|
# spanish masculine ordinal
|
|
zsh_accented_chars[o]+=" - BA"
|
|
# right guillemet
|
|
accent=\>
|
|
zsh_accented_chars[$accent]+=" > BB"
|
|
# fractions
|
|
zsh_accented_chars[4]+=" 1 BC 3 BE"
|
|
zsh_accented_chars[2]+=" 1 BD"
|
|
# inverted question mark
|
|
zsh_accented_chars[I]+=" ? BF"
|
|
# multiplication
|
|
zsh_accented_chars[X]+=" * D7"
|
|
# division
|
|
zsh_accented_chars[:]+=" - F7"
|
|
# kra
|
|
zsh_accented_chars[k]+=" k 138"
|
|
# apostrophe n
|
|
zsh_accented_chars[n]+=" ' 149"
|
|
# Lappish ng
|
|
zsh_accented_chars[G]+=" N 14A"
|
|
zsh_accented_chars[g]+=" n 14B"
|
|
# OI
|
|
zsh_accented_chars[I]+=" O 1A2"
|
|
zsh_accented_chars[i]+=" o 1A3"
|
|
# yr
|
|
zsh_accented_chars[r]+=" y 1A6"
|
|
# ezh
|
|
zsh_accented_chars[D]+=" E 1B7"
|
|
# euro (I invented this but it's logical)
|
|
zsh_accented_chars[u]+=" E 20AC"
|
|
# dagger and double dagger
|
|
zsh_accented_chars[-]+=" / 2020"
|
|
zsh_accented_chars[=]+=" / 2021"
|
|
fi
|
|
|
|
read -k basechar || return 1
|
|
read -k accent || return 1
|
|
|
|
local -A charmap
|
|
charmap=(${=zsh_accented_chars[$accent]})
|
|
|
|
if [[ ${#charmap} -eq 0 || -z $charmap[$basechar] ]]; then
|
|
$error "Combination ${basechar}${accent} is not available."
|
|
return 1
|
|
fi
|
|
|
|
if [[ -z $WIDGET ]]; then
|
|
[[ -t 1 ]] && print
|
|
print "\U${(l.8..0.)charmap[$basechar]}"
|
|
else
|
|
ochar="$(print -n "\U${(l.8..0.)charmap[$basechar]}")"
|
|
|
|
if (( ${+NUMERIC} )); then
|
|
$error "Character ${(l.8..0.)charmap[$basechar]}: $ochar"
|
|
else
|
|
LBUFFER+=$ochar
|
|
fi
|
|
fi
|