mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-08-07 01:30:59 +02:00
users/9788: add (oN) glob qualifier for no sorting
22076: more documentation for multibyte handling
This commit is contained in:
parent
174ad4a80f
commit
b5a83cc754
5 changed files with 240 additions and 24 deletions
|
@ -1,3 +1,11 @@
|
||||||
|
2005-12-15 Peter Stephenson <pws@csr.com>
|
||||||
|
|
||||||
|
* 22076: INSTALL, Etc/FAQ.yo: more information on multibyte
|
||||||
|
handling.
|
||||||
|
|
||||||
|
* users/9788: Doc/Zsh/expn.yo, Src/glob.c: add (oN) qualifier
|
||||||
|
for no sorting.
|
||||||
|
|
||||||
2005-12-14 Bart Schaefer <schaefer@zsh.org>
|
2005-12-14 Bart Schaefer <schaefer@zsh.org>
|
||||||
|
|
||||||
* 21814: Src/loop.c, Src/signals.c: if an error occurs in an
|
* 21814: Src/loop.c, Src/signals.c: if an error occurs in an
|
||||||
|
|
|
@ -1958,11 +1958,13 @@ they are sorted by the time of the last access, modification, or
|
||||||
inode change respectively; if tt(d), files in subdirectories appear before
|
inode change respectively; if tt(d), files in subdirectories appear before
|
||||||
those in the current directory at each level of the search DASH()- this is best
|
those in the current directory at each level of the search DASH()- this is best
|
||||||
combined with other criteria, for example `tt(odon)' to sort on names for
|
combined with other criteria, for example `tt(odon)' to sort on names for
|
||||||
files within the same directory. Note that tt(a), tt(m), and tt(c) compare
|
files within the same directory; if tt(N), no sorting is performed.
|
||||||
|
Note that tt(a), tt(m), and tt(c) compare
|
||||||
the age against the current time, hence the first name in the list is the
|
the age against the current time, hence the first name in the list is the
|
||||||
youngest file. Also note that the modifiers tt(^) and tt(-) are used,
|
youngest file. Also note that the modifiers tt(^) and tt(-) are used,
|
||||||
so `tt(*(^-oL))' gives a list of all files sorted by file size in descending
|
so `tt(*(^-oL))' gives a list of all files sorted by file size in descending
|
||||||
order, following any symbolic links.
|
order, following any symbolic links. Unless tt(oN) is used, multiple order
|
||||||
|
specifiers may occur to resolve ties.
|
||||||
)
|
)
|
||||||
item(tt(O)var(c))(
|
item(tt(O)var(c))(
|
||||||
like `tt(o)', but sorts in descending order; i.e. `tt(*(^oc))' is the
|
like `tt(o)', but sorts in descending order; i.e. `tt(*(^oc))' is the
|
||||||
|
|
190
Etc/FAQ.yo
190
Etc/FAQ.yo
|
@ -43,11 +43,11 @@ whenlatex(report(ARG1)(ARG2)(ARG3))\
|
||||||
whenman(report(ARG1)(ARG2)(ARG3))\
|
whenman(report(ARG1)(ARG2)(ARG3))\
|
||||||
whenms(report(ARG1)(ARG2)(ARG3))\
|
whenms(report(ARG1)(ARG2)(ARG3))\
|
||||||
whensgml(report(ARG1)(ARG2)(ARG3)))
|
whensgml(report(ARG1)(ARG2)(ARG3)))
|
||||||
myreport(Z-Shell Frequently-Asked Questions)(Peter Stephenson)(2005/07/18)
|
myreport(Z-Shell Frequently-Asked Questions)(Peter Stephenson)(2005/12/14)
|
||||||
COMMENT(-- the following are for Usenet and must appear first)\
|
COMMENT(-- the following are for Usenet and must appear first)\
|
||||||
description(\
|
description(\
|
||||||
mydit(Archive-Name:) unix-faq/shell/zsh
|
mydit(Archive-Name:) unix-faq/shell/zsh
|
||||||
mydit(Last-Modified:) 2005/07/18
|
mydit(Last-Modified:) 2005/12/14
|
||||||
mydit(Submitted-By:) email(pws@pwstephenson.fsnet.co.uk (Peter Stephenson))
|
mydit(Submitted-By:) email(pws@pwstephenson.fsnet.co.uk (Peter Stephenson))
|
||||||
mydit(Posting-Frequency:) Monthly
|
mydit(Posting-Frequency:) Monthly
|
||||||
mydit(Copyright:) (C) P.W. Stephenson, 1995--2005 (see end of document)
|
mydit(Copyright:) (C) P.W. Stephenson, 1995--2005 (see end of document)
|
||||||
|
@ -126,11 +126,18 @@ Chapter 4: The mysteries of completion
|
||||||
4.5. How do I get started with programmable completion?
|
4.5. How do I get started with programmable completion?
|
||||||
4.6. Suppose I want to complete all files during a special completion?
|
4.6. Suppose I want to complete all files during a special completion?
|
||||||
|
|
||||||
Chapter 5: The future of zsh
|
Chapter 5: Multibyte input
|
||||||
5.1. What bugs are currently known and unfixed? (Plus recent important changes)
|
|
||||||
5.2. Where do I report bugs, get more info / who's working on zsh?
|
5.1. What is multibyte input?
|
||||||
5.3. What's on the wish-list?
|
5.2. How does zsh handle multibyte input?
|
||||||
5.4. Did zsh have problems in the year 2000?
|
5.3. How do I ensure multibyte input works on my system?
|
||||||
|
5.4. How can I input characters that aren't on my keyboard?
|
||||||
|
|
||||||
|
Chapter 6: The future of zsh
|
||||||
|
6.1. What bugs are currently known and unfixed? (Plus recent important changes)
|
||||||
|
6.2. Where do I report bugs, get more info / who's working on zsh?
|
||||||
|
6.3. What's on the wish-list?
|
||||||
|
6.4. Did zsh have problems in the year 2000?
|
||||||
|
|
||||||
Acknowledgments
|
Acknowledgments
|
||||||
|
|
||||||
|
@ -1945,6 +1952,175 @@ sect(Suppose I want to complete all files during a special completion?)
|
||||||
such as expansion or approximate completion.
|
such as expansion or approximate completion.
|
||||||
|
|
||||||
|
|
||||||
|
chapter(Multibyte input)
|
||||||
|
|
||||||
|
sect(What is multibyte input?)
|
||||||
|
|
||||||
|
For a long time computers had a simple idea of a character: each octet
|
||||||
|
(8-bit byte) of text contained one character. This meant an application
|
||||||
|
could only use 256 characters at once. The first 128 characters (0 to
|
||||||
|
127) on Unix and similar systems usually corresponded to the ASCII
|
||||||
|
character set, as they still do. So all other possibilities had to be
|
||||||
|
crammed into the remaining 128. This was done by picking the appropriate
|
||||||
|
character set for the use you were making. For example, ISO 8859
|
||||||
|
specified a set of extensions to ASCII for various alphabets.
|
||||||
|
|
||||||
|
This was fine for simple extensions and certain short enough relatives of
|
||||||
|
the Latin alphabet (with no more than a few dozen alphabetic characters),
|
||||||
|
but useless for complex alphabets. Also, having a different character
|
||||||
|
set for each language is inconvenient: you have to start a new terminal
|
||||||
|
to run the shell with each character set. So the character set had to be
|
||||||
|
extended. To cut a long story short, the world has mostly standardised
|
||||||
|
on a character set called Unicode, related to the international standard
|
||||||
|
ISO 10646. The intention is that this will contain every single
|
||||||
|
character used in all the languages of the world.
|
||||||
|
|
||||||
|
This has far too many characters to fit into a single octet. What's
|
||||||
|
more, UNIX utilities such as zsh are so used to dealing with ASCII that
|
||||||
|
removing it would cause no end of trouble. So what happens is this: the
|
||||||
|
128 ASCII characters are kept exactly the same (and they're the same as
|
||||||
|
the first 128 characters of Unicode), but the remaining 128 characters
|
||||||
|
are used to build up any other Unicode character by combining multiple
|
||||||
|
octets together. The shell doesn't need to interpret these directly; it
|
||||||
|
just needs to ask the system library how many octets form the next
|
||||||
|
character, and if there's a valid character there at all. (It can also
|
||||||
|
ask the system what width the character takes up on the screen, so that
|
||||||
|
characters no longer need to be exacxtly one position wide.)
|
||||||
|
|
||||||
|
The way this is done is called UTF-8. Multibyte encodings of other
|
||||||
|
character sets exist (you might encounter them for Asian character sets);
|
||||||
|
zsh will be able to use any such encoding as long as it contains ASCII as
|
||||||
|
a single-octet subset and the system can provide information about other
|
||||||
|
characters. However, in the case of Unicode, UTF-8 is the only one you
|
||||||
|
are likely to enounter.
|
||||||
|
|
||||||
|
(In case you're confused: Unicode is the characters set, while UTF-8 is
|
||||||
|
an encoding of it. You might hear about other encodings, such as UCS-2
|
||||||
|
and UCS-4 which are basically the character's index in the character set
|
||||||
|
as a two-octet or four-octet integer. You might see files encoded this
|
||||||
|
way, for example on Windows, but the shell can't deal directly with text
|
||||||
|
in those formats.)
|
||||||
|
|
||||||
|
|
||||||
|
sect(How does zsh handle multibyte input?)
|
||||||
|
|
||||||
|
Until version 4.3, zsh didn't handle multibyte input properly at all.
|
||||||
|
Each octet in a multibyte character would look to the shell like a
|
||||||
|
separate character. If your terminal handled the character set,
|
||||||
|
characters might appear correct on screen, but trying to edit them would
|
||||||
|
cause all sorts of odd effects. (It was possible to edit in zsh using
|
||||||
|
single-byte extensions of ASCII such as the ISO 8859 family, however.)
|
||||||
|
|
||||||
|
From version 4.3, multibyte input is handled in the line editor if zsh
|
||||||
|
has been compiled with the appropriate definitions. This will happen
|
||||||
|
automatically if the compiler defines __STDC_ISO_10646__, which is true
|
||||||
|
for many recent GNU-based systems. On other systems you must configure
|
||||||
|
zsh with the argument --enable-multibyte to configure. (The reason for
|
||||||
|
this is that the presence of __STDC_ISO_10646__ ensures all the required
|
||||||
|
library support is present, short-circuiting a large number of
|
||||||
|
configuration tests.) Explicit use of --enable-multibyte should work on
|
||||||
|
many other recent UNIX systems; if it works on yours, and that's not
|
||||||
|
mentioned in the shell documentation, please report this to
|
||||||
|
zsh-workers@sunsite.dk, and if it doesn't but you can work out why not
|
||||||
|
we'd also be interested in hearing.
|
||||||
|
|
||||||
|
You can test if multibyte handling is compiled into your version of the
|
||||||
|
shell by running:
|
||||||
|
verb(
|
||||||
|
(bindkey -m)
|
||||||
|
)
|
||||||
|
which should output a warning:
|
||||||
|
verb(
|
||||||
|
bindkey: warning: `bindkey -m' disables multibyte support
|
||||||
|
)
|
||||||
|
If it doesn't, you don't have multibyte support in your shell. The
|
||||||
|
parentheses are there to run the command in a subshell, which protects
|
||||||
|
your interactive shell from the effects being warned about.
|
||||||
|
|
||||||
|
Multibyte strings are not yet handled anywhere else in the shell. This
|
||||||
|
means, for example, patterns treat multibyte characters as a set of single
|
||||||
|
octets and the ${#var} syntax counts octets, not characters. There will
|
||||||
|
probably be new syntax to ensure that zsh can work both in its traditional
|
||||||
|
way as well as when interpreting multibyte characters.
|
||||||
|
|
||||||
|
|
||||||
|
sect(How do I ensure multibyte input works on my system?)
|
||||||
|
|
||||||
|
Once you have a version of zsh with multibyte support, you need to
|
||||||
|
ensure the envivronment is correct. We'll assume you're using UTF-8.
|
||||||
|
Many modern systems may come set up correctly already. Try one of
|
||||||
|
the editing widgets described in the next section to see.
|
||||||
|
|
||||||
|
There are basically three components.
|
||||||
|
|
||||||
|
itemize(
|
||||||
|
it() The locale. This describes a whole series of features specific
|
||||||
|
to countries or regions of which the character set is one. Usually
|
||||||
|
it is controlled by the environment variable tt(LANG) (there are
|
||||||
|
others but this is the one to start with). You need to find a
|
||||||
|
locale whose name contains mytt(UTF-8). This will be a variant on
|
||||||
|
your usual locale, which typically indicates the language and
|
||||||
|
country; for example, mine is mytt(en_GB.UTF-8). Luckily, zsh can
|
||||||
|
complete locale names, so if you have the new completion system
|
||||||
|
loaded you can type mytt(export LANG=) and attempt to complete a
|
||||||
|
suitable locale. It's the locale that tells the shell to expect the
|
||||||
|
right form of multibyte input. (However, there's no guarantee that
|
||||||
|
the shell is actually going to get this input: for example, if you
|
||||||
|
edit file names that have been created using a different character
|
||||||
|
set it won't work properly.)
|
||||||
|
it() The terminal emulator. Those that are supplied with a recent
|
||||||
|
desktop environment, such as gnome-terminal, are likely to have
|
||||||
|
extensive support for localization and may work correctly as soon
|
||||||
|
as they know the locale.
|
||||||
|
it() The font. If you selected this from a menu in your terminal
|
||||||
|
emulator, there's a good chance it already selected the right
|
||||||
|
character set to go with it. If you hand-picked an old fashioned
|
||||||
|
X font with a lot of dashes, you need to make sure it ends with
|
||||||
|
the right character encoding, mytt(iso10646-1) (and not, for
|
||||||
|
example, mytt(iso8859-1)). Not all characters will be available
|
||||||
|
in any font, and some fonts may have a more restricted range of
|
||||||
|
Unicode characters than others.
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
sect(How can I input characters that aren't on my keyboard?)
|
||||||
|
|
||||||
|
Two functions are provided with zsh that help you input characters.
|
||||||
|
As with all editing widgets implemented by functions, you need to
|
||||||
|
mark the function for autoload, create the widget, and, if you are
|
||||||
|
going to use it frequently, bind it to a key sequence. The
|
||||||
|
following binds tt(insert-composed-char) to F5 on my keyboard:
|
||||||
|
verb(
|
||||||
|
autoload -Uz insert-composed-char
|
||||||
|
zle -N insert-composed-char
|
||||||
|
bindkey '\e[15~' insert-composed-char
|
||||||
|
)
|
||||||
|
|
||||||
|
The two widgets are described in the tt(zshcontrib(1)) manual
|
||||||
|
page, but here is a brief summary:
|
||||||
|
|
||||||
|
tt(insert-composed-char) is followed by two characters that
|
||||||
|
are a mnemonic for a multibyte character. For example mytt(a:)
|
||||||
|
is a with an umlaut; mytt(cH) is the symbol for hearts on a playing
|
||||||
|
card. Various accented characters, European and related alphabets,
|
||||||
|
and punctuation and mathematical symbols are available. The
|
||||||
|
mnemonics are mostly those given by RFC 1345, see
|
||||||
|
url(http://www.faqs.org/rfcs/rfc1345.html)\
|
||||||
|
(http://www.faqs.org/rfcs/rfc1345.html).
|
||||||
|
|
||||||
|
tt(insert-unicode-char) is used to input a Unicode character by
|
||||||
|
its hexadecimal number. This is the number given in the Unicode
|
||||||
|
character charts, see for example \
|
||||||
|
url(http://www.unicode.org/charts/)(http://www.unicode.org/charts/).
|
||||||
|
You need to execute the function, then type the hexadecimal number
|
||||||
|
(you can omit any leading zeroes), then execute the function again.
|
||||||
|
|
||||||
|
Both functions can be used without multibyte mode, provided the locale is
|
||||||
|
correct and the character selected exists in the current character set;
|
||||||
|
however, using UTF-8 massively extends the number of valid characters
|
||||||
|
that can be produced.
|
||||||
|
|
||||||
|
|
||||||
chapter(The future of zsh)
|
chapter(The future of zsh)
|
||||||
|
|
||||||
sect(What bugs are currently known and unfixed? (Plus recent \
|
sect(What bugs are currently known and unfixed? (Plus recent \
|
||||||
|
|
13
INSTALL
13
INSTALL
|
@ -272,7 +272,16 @@ The support can be explicitly enabled or disable with --enable-multibyte or
|
||||||
--disable-multibyte. Reports of systems where multibyte support was not
|
--disable-multibyte. Reports of systems where multibyte support was not
|
||||||
enabled by default but --enable-multibyte resulted in a usable shell would
|
enabled by default but --enable-multibyte resulted in a usable shell would
|
||||||
be appreciated. The developers are not aware of any need to use
|
be appreciated. The developers are not aware of any need to use
|
||||||
--disable-multibyte and this should be reported as a bug.
|
--disable-multibyte and this should be reported as a bug. Currently
|
||||||
|
multibyte mode is believed to work automatically on:
|
||||||
|
|
||||||
|
- All(?) current GNU/Linux distributions
|
||||||
|
- All(?) current BSD variants
|
||||||
|
- OS X 10.4.3
|
||||||
|
|
||||||
|
and to work when configured with --enable-multibyte on:
|
||||||
|
|
||||||
|
- Solaris 8 and later
|
||||||
|
|
||||||
The main shell is not yet aware of multibyte characters, so for example the
|
The main shell is not yet aware of multibyte characters, so for example the
|
||||||
length of a scalar parameter will return the number of bytes, not
|
length of a scalar parameter will return the number of bytes, not
|
||||||
|
@ -281,6 +290,8 @@ characters. This means that pattern tests such as ? and [[:alpha:]] do not
|
||||||
work correctly with characters in multibyte character sets beyond the ASCII
|
work correctly with characters in multibyte character sets beyond the ASCII
|
||||||
subset.
|
subset.
|
||||||
|
|
||||||
|
See chapter 5 in the FAQ for some notes on multibyte input.
|
||||||
|
|
||||||
Memory Routines
|
Memory Routines
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
47
Src/glob.c
47
Src/glob.c
|
@ -56,11 +56,14 @@ struct gmatch {
|
||||||
|
|
||||||
#define GS_NAME 1
|
#define GS_NAME 1
|
||||||
#define GS_DEPTH 2
|
#define GS_DEPTH 2
|
||||||
#define GS_SIZE 4
|
|
||||||
#define GS_ATIME 8
|
#define GS_SHIFT_BASE 4
|
||||||
#define GS_MTIME 16
|
|
||||||
#define GS_CTIME 32
|
#define GS_SIZE (GS_SHIFT_BASE)
|
||||||
#define GS_LINKS 64
|
#define GS_ATIME (GS_SHIFT_BASE << 1)
|
||||||
|
#define GS_MTIME (GS_SHIFT_BASE << 2)
|
||||||
|
#define GS_CTIME (GS_SHIFT_BASE << 3)
|
||||||
|
#define GS_LINKS (GS_SHIFT_BASE << 4)
|
||||||
|
|
||||||
#define GS_SHIFT 5
|
#define GS_SHIFT 5
|
||||||
#define GS__SIZE (GS_SIZE << GS_SHIFT)
|
#define GS__SIZE (GS_SIZE << GS_SHIFT)
|
||||||
|
@ -69,7 +72,8 @@ struct gmatch {
|
||||||
#define GS__CTIME (GS_CTIME << GS_SHIFT)
|
#define GS__CTIME (GS_CTIME << GS_SHIFT)
|
||||||
#define GS__LINKS (GS_LINKS << GS_SHIFT)
|
#define GS__LINKS (GS_LINKS << GS_SHIFT)
|
||||||
|
|
||||||
#define GS_DESC 4096
|
#define GS_DESC (GS_SHIFT_BASE << (2*GS_SHIFT))
|
||||||
|
#define GS_NONE (GS_SHIFT_BASE << (2*GS_SHIFT+1))
|
||||||
|
|
||||||
#define GS_NORMAL (GS_SIZE | GS_ATIME | GS_MTIME | GS_CTIME | GS_LINKS)
|
#define GS_NORMAL (GS_SIZE | GS_ATIME | GS_MTIME | GS_CTIME | GS_LINKS)
|
||||||
#define GS_LINKED (GS_NORMAL << GS_SHIFT)
|
#define GS_LINKED (GS_NORMAL << GS_SHIFT)
|
||||||
|
@ -1414,6 +1418,7 @@ zglob(LinkList list, LinkNode np, int nountok)
|
||||||
case 'm': t = GS_MTIME; break;
|
case 'm': t = GS_MTIME; break;
|
||||||
case 'c': t = GS_CTIME; break;
|
case 'c': t = GS_CTIME; break;
|
||||||
case 'd': t = GS_DEPTH; break;
|
case 'd': t = GS_DEPTH; break;
|
||||||
|
case 'N': t = GS_NONE; break;
|
||||||
default:
|
default:
|
||||||
zerr("unknown sort specifier", NULL, 0);
|
zerr("unknown sort specifier", NULL, 0);
|
||||||
restore_globstate(saved);
|
restore_globstate(saved);
|
||||||
|
@ -1622,10 +1627,13 @@ zglob(LinkList list, LinkNode np, int nountok)
|
||||||
matchct = 1;
|
matchct = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Sort arguments in to lexical (and possibly numeric) order. *
|
|
||||||
* This is reversed to facilitate insertion into the list. */
|
if (!(gf_sortlist[0] & GS_NONE)) {
|
||||||
qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
|
/* Sort arguments in to lexical (and possibly numeric) order. *
|
||||||
(int (*) _((const void *, const void *)))gmatchcmp);
|
* This is reversed to facilitate insertion into the list. */
|
||||||
|
qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
|
||||||
|
(int (*) _((const void *, const void *)))gmatchcmp);
|
||||||
|
}
|
||||||
|
|
||||||
if (first < 0) {
|
if (first < 0) {
|
||||||
first += matchct;
|
first += matchct;
|
||||||
|
@ -1637,10 +1645,21 @@ zglob(LinkList list, LinkNode np, int nountok)
|
||||||
else if (end > matchct)
|
else if (end > matchct)
|
||||||
end = matchct;
|
end = matchct;
|
||||||
if ((end -= first) > 0) {
|
if ((end -= first) > 0) {
|
||||||
matchptr = matchbuf + matchct - first - end;
|
if (gf_sortlist[0] & GS_NONE) {
|
||||||
while (end-- > 0) { /* insert matches in the arg list */
|
/* Match list was never reversed, so insert back to front. */
|
||||||
insertlinknode(list, node, matchptr->name);
|
matchptr = matchbuf + matchct - first - 1;
|
||||||
matchptr++;
|
while (end-- > 0) {
|
||||||
|
/* insert matches in the arg list */
|
||||||
|
insertlinknode(list, node, matchptr->name);
|
||||||
|
matchptr--;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
matchptr = matchbuf + matchct - first - end;
|
||||||
|
while (end-- > 0) {
|
||||||
|
/* insert matches in the arg list */
|
||||||
|
insertlinknode(list, node, matchptr->name);
|
||||||
|
matchptr++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(matchbuf);
|
free(matchbuf);
|
||||||
|
|
Loading…
Reference in a new issue