mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-11-01 18:30:55 +01:00
23118: improve sorting to make it work with locales
This commit is contained in:
parent
f3bf48149a
commit
553e011320
15 changed files with 497 additions and 221 deletions
|
|
@ -1,3 +1,12 @@
|
|||
2007-01-21 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||
|
||||
* 23118: Doc/Zsh/expn.yo, Src/builtin.c, Src/glob.c, Src/jobs.c,
|
||||
Src/sort.c, Src/subst.c, Src/utils.c, Src/zsh.h, Src/zsh.mdd,
|
||||
Src/Zle/compcore.c, Src/Zle/computil.c, Src/Zle/zle_tricky.c,
|
||||
Test/B03print.ztst, Test/D04parameter.ztst: improve sorting,
|
||||
making it work properly with locales and handling embedded
|
||||
nulls consistently.
|
||||
|
||||
2007-01-21 Clint Adams <clint@zsh.org>
|
||||
|
||||
* 23117: arno: Completion/Unix/Command/_yafc:
|
||||
|
|
|
|||
|
|
@ -723,9 +723,10 @@ example by using `tt(${(AA)=)var(name)tt(=)...tt(})' to activate
|
|||
field splitting, when creating an associative array.
|
||||
)
|
||||
item(tt(a))(
|
||||
With tt(o) or tt(O), sort in array index order. Note that `tt(oa)' is
|
||||
therefore equivalent to the default but `tt(Oa)' is useful for
|
||||
obtaining an array's elements in reverse order.
|
||||
Sort in array index order; when combined with `tt(O)' sort in reverse
|
||||
array index order. Note that `tt(a)' is therefore equivalent to the
|
||||
default but `tt(Oa)' is useful for obtaining an array's elements in reverse
|
||||
order.
|
||||
)
|
||||
item(tt(c))(
|
||||
With tt(${#)var(name)tt(}), count the total number of characters in an array,
|
||||
|
|
@ -750,7 +751,7 @@ Join the words of arrays together using newline as a separator.
|
|||
This is a shorthand for `tt(pj:\n:)'.
|
||||
)
|
||||
item(tt(i))(
|
||||
With tt(o) or tt(O), sort case-independently.
|
||||
Sort case-insensitively. May be combined with `tt(n)' or `tt(O)'.
|
||||
)
|
||||
item(tt(k))(
|
||||
If var(name) refers to an associative array, substitute the em(keys)
|
||||
|
|
@ -763,13 +764,25 @@ item(tt(L))(
|
|||
Convert all letters in the result to lower case.
|
||||
)
|
||||
item(tt(n))(
|
||||
With tt(o) or tt(O), sort numerically.
|
||||
Sort decimal numbers numerically; if the first differing
|
||||
characters of two test strings are not digits, sorting
|
||||
is lexical. Numbers with initial zeroes
|
||||
are sorted before those without. Hence the array `tt(foo1 foo02
|
||||
foo2 foo3 foo20 foo23)' is sorted into the order shown. Trailing
|
||||
non-digits are not sorted; the order of `tt(2foo)' and `tt(2bar)'
|
||||
is not defined. May be combined with `tt(i)' or `tt(O)'.
|
||||
)
|
||||
item(tt(o))(
|
||||
Sort the resulting words in ascending order.
|
||||
Sort the resulting words in ascending order; if this appears on its
|
||||
own the sorting is lexical and case-sensitive (unless the locale
|
||||
renders it case-insensitive). Sorting in ascending order is the
|
||||
default for other forms of sorting, so this is ignored if combined
|
||||
with `tt(a)', `tt(i)' or `tt(n)'.
|
||||
)
|
||||
item(tt(O))(
|
||||
Sort the resulting words in descending order.
|
||||
Sort the resulting words in descending order; `tt(O)' without `tt(a)',
|
||||
`tt(i)' or `tt(n)' sorts in reverse lexical order. May be combined
|
||||
with `tt(a)', `tt(i)' or `tt(n)' to reverse the order of sorting.
|
||||
)
|
||||
item(tt(P))(
|
||||
This forces the value of the parameter var(name) to be interpreted as a
|
||||
|
|
|
|||
|
|
@ -3028,7 +3028,7 @@ matchcmp(Cmatch *a, Cmatch *b)
|
|||
if ((*b)->disp && !((*b)->flags & CMF_MORDER))
|
||||
return 1;
|
||||
|
||||
return strbpcmp(&((*a)->str), &((*b)->str));
|
||||
return zstrbcmp((*a)->str, (*b)->str);
|
||||
}
|
||||
|
||||
/* This tests whether two matches are equal (would produce the same
|
||||
|
|
@ -3077,8 +3077,9 @@ makearray(LinkList l, int type, int flags, int *np, int *nlp, int *llp)
|
|||
char **ap, **bp, **cp;
|
||||
|
||||
/* Now sort the array (it contains strings). */
|
||||
qsort((void *) rp, n, sizeof(char *),
|
||||
(int (*) _((const void *, const void *)))strbpcmp);
|
||||
strmetasort((char **)rp, SORTIT_IGNORING_BACKSLASHES |
|
||||
(isset(NUMERICGLOBSORT) ? SORTIT_NUMERICALLY : 0),
|
||||
NULL);
|
||||
|
||||
/* And delete the ones that occur more than once. */
|
||||
for (ap = cp = (char **) rp; *ap; ap++) {
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ cd_calc()
|
|||
static int
|
||||
cd_sort(const void *a, const void *b)
|
||||
{
|
||||
return strpcmp(&(*((Cdstr *) a))->sortstr, &(*((Cdstr *) b))->sortstr);
|
||||
return zstrpcmp((*((Cdstr *) a))->sortstr, (*((Cdstr *) b))->sortstr, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
|||
|
|
@ -2100,28 +2100,26 @@ sfxlen(char *s, char *t)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* This is strcmp with ignoring backslashes. */
|
||||
/* This is zstrcmp with ignoring backslashes. */
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
strbpcmp(char **aa, char **bb)
|
||||
zstrbcmp(const char *a, const char *b)
|
||||
{
|
||||
char *a = *aa, *b = *bb;
|
||||
const char *astart = a;
|
||||
|
||||
while (*a && *b) {
|
||||
if (*a == '\\')
|
||||
a++;
|
||||
if (*b == '\\')
|
||||
b++;
|
||||
if (*a != *b)
|
||||
if (*a != *b || !*a)
|
||||
break;
|
||||
if (*a)
|
||||
a++;
|
||||
if (*b)
|
||||
b++;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
if (isset(NUMERICGLOBSORT) && (idigit(*a) || idigit(*b))) {
|
||||
for (; a > *aa && idigit(a[-1]); a--, b--);
|
||||
for (; a > astart && idigit(a[-1]); a--, b--);
|
||||
if (idigit(*a) && idigit(*b)) {
|
||||
while (*a == '0')
|
||||
a++;
|
||||
|
|
@ -2310,8 +2308,8 @@ listlist(LinkList l)
|
|||
*p = (char *) getdata(node);
|
||||
*p = NULL;
|
||||
|
||||
qsort((void *) data, num, sizeof(char *),
|
||||
(int (*) _((const void *, const void *))) strbpcmp);
|
||||
strmetasort((char **)data, SORTIT_IGNORING_BACKSLASHES |
|
||||
(isset(NUMERICGLOBSORT) ? SORTIT_NUMERICALLY : 0), NULL);
|
||||
|
||||
for (p = data, lenp = lens; *p; p++, lenp++) {
|
||||
len = *lenp = ZMB_nicewidth(*p) + 2;
|
||||
|
|
|
|||
|
|
@ -617,8 +617,7 @@ bin_set(char *nam, char **args, UNUSED(Options ops), UNUSED(int func))
|
|||
}
|
||||
}
|
||||
if (sort)
|
||||
qsort(args, arrlen(args), sizeof(char *),
|
||||
sort > 0 ? strpcmp : invstrpcmp);
|
||||
strmetasort(args, sort < 0 ? SORTIT_BACKWARDS : 0, NULL);
|
||||
if (array) {
|
||||
/* create an array with the specified elements */
|
||||
char **a = NULL, **y;
|
||||
|
|
@ -3603,30 +3602,16 @@ bin_print(char *name, char **args, Options ops, int func)
|
|||
}
|
||||
|
||||
/* -o and -O -- sort the arguments */
|
||||
/*
|
||||
* TODO: this appears to be yet another of the endless
|
||||
* chunks of code that didn't get fixed up properly
|
||||
* to reflect the fact that args contains unmetafied
|
||||
* strings that may contain NULs with the lengths in
|
||||
* len.
|
||||
*/
|
||||
if (OPT_ISSET(ops,'o')) {
|
||||
if (fmt && !*args) return 0;
|
||||
if (OPT_ISSET(ops,'i'))
|
||||
qsort(args, arrlen(args), sizeof(char *), cstrpcmp);
|
||||
else
|
||||
qsort(args, arrlen(args), sizeof(char *), strpcmp);
|
||||
} else if (OPT_ISSET(ops,'O')) {
|
||||
if (fmt && !*args) return 0;
|
||||
if (OPT_ISSET(ops,'i'))
|
||||
qsort(args, arrlen(args), sizeof(char *), invcstrpcmp);
|
||||
else
|
||||
qsort(args, arrlen(args), sizeof(char *), invstrpcmp);
|
||||
if (OPT_ISSET(ops,'o') || OPT_ISSET(ops,'O')) {
|
||||
int flags;
|
||||
|
||||
if (fmt && !*args)
|
||||
return 0;
|
||||
flags = OPT_ISSET(ops,'i') ? SORTIT_IGNORING_CASE : 0;
|
||||
if (OPT_ISSET(ops,'O'))
|
||||
flags |= SORTIT_BACKWARDS;
|
||||
strmetasort(args, flags, len);
|
||||
}
|
||||
/* after sorting arguments, recalculate lengths */
|
||||
if(OPT_ISSET(ops,'o') || OPT_ISSET(ops,'O'))
|
||||
for(n = 0; n < argc; n++)
|
||||
len[n] = strlen(args[n]);
|
||||
|
||||
/* -c -- output in columns */
|
||||
if (!fmt && (OPT_ISSET(ops,'c') || OPT_ISSET(ops,'C'))) {
|
||||
|
|
|
|||
|
|
@ -855,10 +855,7 @@ gmatchcmp(Gmatch a, Gmatch b)
|
|||
for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
|
||||
switch (*s & ~GS_DESC) {
|
||||
case GS_NAME:
|
||||
if (gf_numsort)
|
||||
r = nstrpcmp(&b->name, &a->name);
|
||||
else
|
||||
r = strpcmp(&b->name, &a->name);
|
||||
r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
|
||||
break;
|
||||
case GS_DEPTH:
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1967,13 +1967,13 @@ bin_kill(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func))
|
|||
if (!strncmp(signame, "SIG", 3))
|
||||
signame += 3;
|
||||
for (sig = 1; sig <= SIGCOUNT; sig++)
|
||||
if (!cstrpcmp(sigs + sig, &signame))
|
||||
if (!strcasecmp(sigs[sig], signame))
|
||||
break;
|
||||
if (sig > SIGCOUNT) {
|
||||
int i;
|
||||
|
||||
for (i = 0; alt_sigs[i].name; i++)
|
||||
if (!cstrpcmp(&alt_sigs[i].name, &signame))
|
||||
if (!strcasecmp(alt_sigs[i].name, signame))
|
||||
{
|
||||
sig = alt_sigs[i].num;
|
||||
break;
|
||||
|
|
|
|||
332
Src/sort.c
Normal file
332
Src/sort.c
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* sort.c - comparison and sorting of strings
|
||||
*
|
||||
* This file is part of zsh, the Z shell.
|
||||
*
|
||||
* Copyright (c) 1992-2007 Paul Falstad
|
||||
* All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and to distribute modified versions of this software for any
|
||||
* purpose, provided that the above copyright notice and the following
|
||||
* two paragraphs appear in all copies of this software.
|
||||
*
|
||||
* In no event shall Paul Falstad or the Zsh Development Group be liable
|
||||
* to any party for direct, indirect, special, incidental, or consequential
|
||||
* damages arising out of the use of this software and its documentation,
|
||||
* even if Paul Falstad and the Zsh Development Group have been advised of
|
||||
* the possibility of such damage.
|
||||
*
|
||||
* Paul Falstad and the Zsh Development Group specifically disclaim any
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose. The software
|
||||
* provided hereunder is on an "as is" basis, and Paul Falstad and the
|
||||
* Zsh Development Group have no obligation to provide maintenance,
|
||||
* support, updates, enhancements, or modifications.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "zsh.mdh"
|
||||
#include "sort.pro"
|
||||
|
||||
/* Flag for direction of sort: 1 forwards, -1 reverse */
|
||||
static int sortdir;
|
||||
|
||||
/* Flag that sort is numeric */
|
||||
static int sortnumeric;
|
||||
|
||||
/**/
|
||||
static int
|
||||
eltpcmp(const void *a, const void *b)
|
||||
{
|
||||
const SortElt ae = *(const SortElt *)a;
|
||||
const SortElt be = *(const SortElt *)b;
|
||||
const char *as = ae->cmp;
|
||||
const char *bs = be->cmp;
|
||||
int cmp;
|
||||
|
||||
if (ae->len != -1 || be->len != -1) {
|
||||
/*
|
||||
* Length recorded. We only do that if there are embedded
|
||||
* nulls we need to treat as regular characters.
|
||||
*
|
||||
* Since we don't know where multibyte characters start,
|
||||
* but do know that a null character can't occur inside
|
||||
* one (we are relying on multibyte characters being extensions
|
||||
* of ASCII), we can compare starting from after the last
|
||||
* null character that occurred in both strings.
|
||||
*/
|
||||
const char *cmpa, *cmpb;
|
||||
const char *laststarta = as;
|
||||
int len;
|
||||
if (ae->len != -1) {
|
||||
len = ae->len;
|
||||
if (be->len != -1 && len > be->len)
|
||||
len = be->len;
|
||||
}
|
||||
else
|
||||
len = be->len;
|
||||
|
||||
for (cmpa = as, cmpb = bs; *cmpa == *cmpb && len--; cmpa++, cmpb++)
|
||||
if (!*cmpa)
|
||||
laststarta = cmpa + 1;
|
||||
if (*cmpa == *cmpb && ae->len != be->len) {
|
||||
/*
|
||||
* Special case: one of the strings has finished, but
|
||||
* another one continues after the NULL. The string
|
||||
* that's finished sorts below the other. We need
|
||||
* to handle this here since stroll() or strcmp()
|
||||
* will just compare the strings as equal.
|
||||
*/
|
||||
if (ae->len != -1) {
|
||||
if (be->len != -1) {
|
||||
/*
|
||||
* if a is shorter it's earlier, so return -1 and
|
||||
* vice versa
|
||||
*/
|
||||
return (ae->len - be->len) * sortdir;
|
||||
} else {
|
||||
/*
|
||||
* a has a length and so continues, hence
|
||||
* b sorts lower.
|
||||
*/
|
||||
return sortdir;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* b must continue because it has a recorded length,
|
||||
* so a sorts lower.
|
||||
*/
|
||||
return - sortdir;
|
||||
}
|
||||
}
|
||||
|
||||
bs += (laststarta - as);
|
||||
as += (laststarta - as);
|
||||
}
|
||||
#ifdef HAVE_STRCOLL
|
||||
cmp = strcoll(as, bs);
|
||||
#endif
|
||||
if (sortnumeric) {
|
||||
for (; *as == *bs && *as; as++, bs++);
|
||||
#ifndef HAVE_STRCOLL
|
||||
cmp = (int)STOUC(*as) - (int)STOUC(*bs);
|
||||
#endif
|
||||
if (idigit(*as) || idigit(*bs)) {
|
||||
for (; as > *(char **)a && idigit(as[-1]); as--, bs--);
|
||||
if (idigit(*as) && idigit(*bs)) {
|
||||
while (*as == '0')
|
||||
as++;
|
||||
while (*bs == '0')
|
||||
bs++;
|
||||
for (; idigit(*as) && *as == *bs; as++, bs++);
|
||||
if (idigit(*as) || idigit(*bs)) {
|
||||
cmp = (int)STOUC(*as) - (int)STOUC(*bs);
|
||||
while (idigit(*as) && idigit(*bs))
|
||||
as++, bs++;
|
||||
if (idigit(*as) && !idigit(*bs))
|
||||
return 1;
|
||||
if (idigit(*bs) && !idigit(*as))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef HAVE_STRCOLL
|
||||
else
|
||||
cmp = strcmp(as, bs);
|
||||
#endif
|
||||
|
||||
return sortdir * cmp;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Front-end to eltpcmp() to compare strings.
|
||||
* TODO: it would be better to eliminate this altogether by
|
||||
* making the calling function call into the sort code
|
||||
* at a higher level.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
zstrcmp(const char *as, const char *bs, int sortflags)
|
||||
{
|
||||
struct sortelt ae, be, *aeptr, *beptr;
|
||||
int oldsortdir = sortdir, oldsortnumeric = sortnumeric, ret;
|
||||
|
||||
ae.cmp = as;
|
||||
be.cmp = bs;
|
||||
ae.len = -1;
|
||||
be.len = -1;
|
||||
|
||||
aeptr = &ae;
|
||||
beptr = &be;
|
||||
|
||||
sortdir = 1;
|
||||
sortnumeric = (sortflags & SORTIT_NUMERICALLY) ? 1 : 0;
|
||||
|
||||
ret = eltpcmp(&aeptr, &beptr);
|
||||
|
||||
/* Paranoia: I don't think we ever need to restore these. */
|
||||
sortnumeric = oldsortnumeric;
|
||||
sortdir = oldsortdir;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Sort an array of metafied strings. Use an "or" of bit flags
|
||||
* to decide how to sort. See the SORTIT_* flags in zsh.h.
|
||||
*
|
||||
* If unmetalenp is not NULL, the strings in array are already
|
||||
* unmetafied and unmetalenp is an array containing the corresponding
|
||||
* lengths.
|
||||
*/
|
||||
|
||||
/**/
|
||||
void
|
||||
strmetasort(char **array, int sortwhat, int *unmetalenp)
|
||||
{
|
||||
char **arrptr;
|
||||
/*
|
||||
* Array of elements containing stuff to sort. Note sortptrarr
|
||||
* is an array of pointers, since that's more efficient
|
||||
* for qsort() to manipulate. sortarr is the array of
|
||||
* structures.
|
||||
*/
|
||||
SortElt *sortptrarr, *sortptrarrptr;
|
||||
SortElt sortarr, sortarrptr;
|
||||
int oldsortdir, oldsortnumeric, nsort;
|
||||
|
||||
nsort = arrlen(array);
|
||||
if (nsort < 2)
|
||||
return;
|
||||
|
||||
pushheap();
|
||||
|
||||
sortptrarr = (SortElt *) zhalloc(nsort * sizeof(SortElt));
|
||||
sortarr = (SortElt) zhalloc(nsort * sizeof(struct sortelt));
|
||||
for (arrptr = array, sortptrarrptr = sortptrarr, sortarrptr = sortarr;
|
||||
*arrptr; arrptr++, sortptrarrptr++, sortarrptr++) {
|
||||
char *metaptr;
|
||||
int needlen, needalloc;
|
||||
*sortptrarrptr = sortarrptr;
|
||||
sortarrptr->orig = *arrptr;
|
||||
|
||||
if (unmetalenp) {
|
||||
/*
|
||||
* Already unmetafied. We just need to check for
|
||||
* embededded nulls.
|
||||
*/
|
||||
int count = unmetalenp[arrptr-array];
|
||||
/* Remember this length for sorted array */
|
||||
sortarrptr->origlen = count;
|
||||
for (metaptr = *arrptr; *metaptr != '\0' && count--; metaptr++)
|
||||
;
|
||||
/* *metaptr must now be \0, even if we reached the end */
|
||||
needlen = (count != 0);
|
||||
} else {
|
||||
/*
|
||||
* Not yet unmetafied. See if it needs unmetafying.
|
||||
* If it doesn't, there can't be any embedded nulls,
|
||||
* since these are metafied.
|
||||
*/
|
||||
needlen = 0;
|
||||
for (metaptr = *arrptr; *metaptr && *metaptr != Meta;
|
||||
metaptr++);
|
||||
}
|
||||
/*
|
||||
* See if we need to do some special checking.
|
||||
* Either we're going to need to copy it to transform it,
|
||||
* or we need to unmetafy it.
|
||||
*/
|
||||
if ((needalloc = (sortwhat &
|
||||
(SORTIT_IGNORING_CASE|SORTIT_IGNORING_BACKSLASHES)))
|
||||
|| *metaptr == Meta) {
|
||||
char *s, *t, *src = *arrptr, *dst;
|
||||
int len;
|
||||
sortarrptr->cmp = dst = (char *)zhalloc(strlen(src) + 1);
|
||||
|
||||
if (unmetalenp) {
|
||||
/* Already unmetafied and we have the length. */
|
||||
len = unmetalenp[arrptr-array];
|
||||
} else if (*metaptr != '\0') {
|
||||
/*
|
||||
* Needs unmetafying. We need to check for
|
||||
* embedded nulls while we do this.
|
||||
*/
|
||||
char *t = dst + (metaptr - src);
|
||||
|
||||
if (metaptr != src)
|
||||
memcpy(dst, src, metaptr - src);
|
||||
while ((*t = *metaptr++)) {
|
||||
if (*t++ == Meta) {
|
||||
if ((t[-1] = *metaptr++ ^ 32) == '\0')
|
||||
needlen = 1;
|
||||
}
|
||||
}
|
||||
len = t - dst;
|
||||
src = dst;
|
||||
} else {
|
||||
/*
|
||||
* Doesn't need unmetafying.
|
||||
* This means metaptr is the NULL at the
|
||||
* end of the string, so we have the length, and
|
||||
* there are no embedded nulls, so we don't
|
||||
* need the length later.
|
||||
* We're only copying the string to transform it
|
||||
* below.
|
||||
*/
|
||||
len = metaptr - src;
|
||||
}
|
||||
if (sortwhat & SORTIT_IGNORING_CASE) {
|
||||
for (s = src, t = dst; s - src != len; )
|
||||
*t++ = tulower(*s++);
|
||||
src = dst;
|
||||
}
|
||||
if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {
|
||||
/* copy null byte, so increment length */
|
||||
for (s = src, t = dst; s - src != len+1; ) {
|
||||
if (*s == '\\') {
|
||||
s++;
|
||||
len--;
|
||||
}
|
||||
*t++ = *s++;
|
||||
}
|
||||
}
|
||||
/* Do we need to store the length (embedded null)? */
|
||||
sortarrptr->len = needlen ? len : -1;
|
||||
} else {
|
||||
/*
|
||||
* We can use the string as is, although it's possible
|
||||
* we still need to take account of an embedded null.
|
||||
*/
|
||||
sortarrptr->cmp = *arrptr;
|
||||
sortarrptr->len = needlen ? unmetalenp[arrptr-array] : -1;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We need to restore sortdir so that calls to
|
||||
* [n]strcmp work when
|
||||
*/
|
||||
oldsortdir = sortdir;
|
||||
oldsortnumeric = sortnumeric;
|
||||
|
||||
sortdir = (sortwhat & SORTIT_BACKWARDS) ? -1 : 1;
|
||||
sortnumeric = (sortwhat & SORTIT_NUMERICALLY) ? 1 : 0;
|
||||
|
||||
qsort(sortptrarr, nsort, sizeof(SortElt *), eltpcmp);
|
||||
|
||||
sortnumeric = oldsortnumeric;
|
||||
sortdir = oldsortdir;
|
||||
for (arrptr = array, sortptrarrptr = sortptrarr; nsort--; ) {
|
||||
if (unmetalenp)
|
||||
unmetalenp[arrptr-array] = (*sortptrarrptr)->origlen;
|
||||
*arrptr++ = (*sortptrarrptr++)->orig;
|
||||
}
|
||||
|
||||
popheap();
|
||||
}
|
||||
185
Src/subst.c
185
Src/subst.c
|
|
@ -618,146 +618,6 @@ strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub,
|
|||
return dest;
|
||||
}
|
||||
|
||||
typedef int (*CompareFn) _((const void *, const void *));
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
strpcmp(const void *a, const void *b)
|
||||
{
|
||||
#ifdef HAVE_STRCOLL
|
||||
return strcoll(*(char **)a, *(char **)b);
|
||||
#else
|
||||
return strcmp(*(char **)a, *(char **)b);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
invstrpcmp(const void *a, const void *b)
|
||||
{
|
||||
#ifdef HAVE_STRCOLL
|
||||
return -strcoll(*(char **)a, *(char **)b);
|
||||
#else
|
||||
return -strcmp(*(char **)a, *(char **)b);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
cstrpcmp(const void *a, const void *b)
|
||||
{
|
||||
#ifdef HAVE_STRCOLL
|
||||
VARARR(char, c, strlen(*(char **) a) + 1);
|
||||
VARARR(char, d, strlen(*(char **) b) + 1);
|
||||
char *s, *t;
|
||||
int cmp;
|
||||
|
||||
for (s = *(char **) a, t = c; (*t++ = tulower(*s++)););
|
||||
for (s = *(char **) b, t = d; (*t++ = tulower(*s++)););
|
||||
|
||||
cmp = strcoll(c, d);
|
||||
|
||||
return cmp;
|
||||
#else
|
||||
char *c = *(char **)a, *d = *(char **)b;
|
||||
|
||||
for (; *c && tulower(*c) == tulower(*d); c++, d++);
|
||||
|
||||
return (int)STOUC(tulower(*c)) - (int)STOUC(tulower(*d));
|
||||
#endif
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
invcstrpcmp(const void *a, const void *b)
|
||||
{
|
||||
#ifdef HAVE_STRCOLL
|
||||
VARARR(char, c, strlen(*(char **) a) + 1);
|
||||
VARARR(char, d, strlen(*(char **) b) + 1);
|
||||
char *s, *t;
|
||||
int cmp;
|
||||
|
||||
for (s = *(char **) a, t = c; (*t++ = tulower(*s++)););
|
||||
for (s = *(char **) b, t = d; (*t++ = tulower(*s++)););
|
||||
|
||||
cmp = strcoll(c, d);
|
||||
|
||||
return -cmp;
|
||||
#else
|
||||
char *c = *(char **)a, *d = *(char **)b;
|
||||
|
||||
for (; *c && tulower(*c) == tulower(*d); c++, d++);
|
||||
|
||||
return (int)STOUC(tulower(*d)) - (int)STOUC(tulower(*c));
|
||||
#endif
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
nstrpcmp(const void *a, const void *b)
|
||||
{
|
||||
char *c = *(char **)a, *d = *(char **)b;
|
||||
int cmp;
|
||||
|
||||
#ifdef HAVE_STRCOLL
|
||||
cmp = strcoll(c, d);
|
||||
#endif
|
||||
for (; *c == *d && *c; c++, d++);
|
||||
#ifndef HAVE_STRCOLL
|
||||
cmp = (int)STOUC(*c) - (int)STOUC(*d);
|
||||
#endif
|
||||
if (idigit(*c) || idigit(*d)) {
|
||||
for (; c > *(char **)a && idigit(c[-1]); c--, d--);
|
||||
if (idigit(*c) && idigit(*d)) {
|
||||
while (*c == '0')
|
||||
c++;
|
||||
while (*d == '0')
|
||||
d++;
|
||||
for (; idigit(*c) && *c == *d; c++, d++);
|
||||
if (idigit(*c) || idigit(*d)) {
|
||||
cmp = (int)STOUC(*c) - (int)STOUC(*d);
|
||||
while (idigit(*c) && idigit(*d))
|
||||
c++, d++;
|
||||
if (idigit(*c) && !idigit(*d))
|
||||
return 1;
|
||||
if (idigit(*d) && !idigit(*c))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
invnstrpcmp(const void *a, const void *b)
|
||||
{
|
||||
return -nstrpcmp(a, b);
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
instrpcmp(const void *a, const void *b)
|
||||
{
|
||||
VARARR(char, c, strlen(*(char **) a) + 1);
|
||||
VARARR(char, d, strlen(*(char **) b) + 1);
|
||||
char **e = (char **)&c;
|
||||
char **f = (char **)&d;
|
||||
char *s, *t;
|
||||
|
||||
for (s = *(char **) a, t = c; (*t++ = tulower(*s++)););
|
||||
for (s = *(char **) b, t = d; (*t++ = tulower(*s++)););
|
||||
|
||||
return nstrpcmp(&e, &f);
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
invinstrpcmp(const void *a, const void *b)
|
||||
{
|
||||
return -instrpcmp(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Pad the string str, returning a result from the heap (or str itself,
|
||||
* if it didn't need padding). If str is too large, it will be truncated.
|
||||
|
|
@ -1470,13 +1330,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
/* Value from (I) flag, used for ditto. */
|
||||
int flnum = 0;
|
||||
/*
|
||||
* sortit is an obscure combination of the settings for (o), (O),
|
||||
* (i) and (n). casind is (i) and numord is (n); these are
|
||||
* separate so we can have fun doing the obscure combinatorics later.
|
||||
* sortit is to be passed to strmetasort().
|
||||
* indord is the (a) flag, which for consistency doesn't get
|
||||
* combined into sortit.
|
||||
*/
|
||||
int sortit = 0, casind = 0, numord = 0, indord = 0;
|
||||
int sortit = SORTIT_ANYOLDHOW, indord = 0;
|
||||
/* (u): straightforward. */
|
||||
int unique = 0;
|
||||
/* combination of (L), (U) and (C) flags. */
|
||||
|
|
@ -1693,18 +1551,20 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
break;
|
||||
|
||||
case 'o':
|
||||
sortit = 1;
|
||||
if (!sortit)
|
||||
sortit |= SORTIT_SOMEHOW; /* sort, no modifiers */
|
||||
break;
|
||||
case 'O':
|
||||
sortit = 2;
|
||||
sortit |= SORTIT_BACKWARDS;
|
||||
break;
|
||||
case 'i':
|
||||
casind = 1;
|
||||
sortit |= SORTIT_IGNORING_CASE;
|
||||
break;
|
||||
case 'n':
|
||||
numord = 1;
|
||||
sortit |= SORTIT_NUMERICALLY;
|
||||
break;
|
||||
case 'a':
|
||||
sortit |= SORTIT_SOMEHOW;
|
||||
indord = 1;
|
||||
break;
|
||||
|
||||
|
|
@ -1870,15 +1730,6 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
s++;
|
||||
}
|
||||
}
|
||||
/* Sort is done by indexing on sortit-1:
|
||||
* bit 1: ascending (o)/descending (O)
|
||||
* bit 2: case sensitive/independent (i)
|
||||
* bit 3: strict order/numeric (n)
|
||||
* unless indord (a) is set set, in which case only test for
|
||||
* descending by assuming only (O) is possible (not verified).
|
||||
*/
|
||||
if (sortit)
|
||||
sortit += (casind << 1) + (numord << 2);
|
||||
|
||||
/*
|
||||
* premul, postmul specify the padding character to be used
|
||||
|
|
@ -3171,11 +3022,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
return n;
|
||||
}
|
||||
/* Handle (o) and (O) and their variants */
|
||||
if (sortit) {
|
||||
if (sortit != SORTIT_ANYOLDHOW) {
|
||||
if (!copied)
|
||||
aval = arrdup(aval);
|
||||
if (indord) {
|
||||
if (sortit & 2) {
|
||||
if (sortit & SORTIT_BACKWARDS) {
|
||||
char *copy;
|
||||
char **end = aval + arrlen(aval) - 1, **start = aval;
|
||||
|
||||
|
|
@ -3187,14 +3038,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
}
|
||||
} else {
|
||||
static CompareFn sortfn[] = {
|
||||
strpcmp, invstrpcmp, cstrpcmp, invcstrpcmp,
|
||||
nstrpcmp, invnstrpcmp, instrpcmp, invinstrpcmp
|
||||
};
|
||||
|
||||
i = arrlen(aval);
|
||||
if (i && (*aval[i-1] || --i))
|
||||
qsort(aval, i, sizeof(char *), sortfn[sortit-1]);
|
||||
/*
|
||||
* HERE: we tested if the last element of the array
|
||||
* was not a NULL string. Why the last element?
|
||||
* Why didn't we expect NULL strings to work?
|
||||
* Was it just a clumsy way of testing whether there
|
||||
* was enough in the array to sort?
|
||||
*/
|
||||
strmetasort(aval, sortit, NULL);
|
||||
}
|
||||
}
|
||||
if (plan9) {
|
||||
|
|
|
|||
11
Src/utils.c
11
Src/utils.c
|
|
@ -3625,6 +3625,17 @@ metafy(char *buf, int len, int heap)
|
|||
return buf;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Take a null-terminated, metafied string in s into a literal
|
||||
* representation by converting in place. The length is in *len
|
||||
* len is non-NULL; if len is NULL, you don't know the length of
|
||||
* the final string, but if it's to be supplied to some system
|
||||
* routine that always uses NULL termination, such as a filename
|
||||
* interpreter, that doesn't matter. Note the NULL termination
|
||||
* is always copied for purposes of that kind.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export char *
|
||||
unmetafy(char *s, int *len)
|
||||
|
|
|
|||
45
Src/zsh.h
45
Src/zsh.h
|
|
@ -1944,6 +1944,51 @@ struct heap {
|
|||
#define ZSIG_ALIAS (1<<3) /* Trap is stored under an alias */
|
||||
#define ZSIG_SHIFT 4
|
||||
|
||||
/***********/
|
||||
/* Sorting */
|
||||
/***********/
|
||||
|
||||
typedef int (*CompareFn) _((const void *, const void *));
|
||||
|
||||
enum {
|
||||
SORTIT_ANYOLDHOW = 0, /* Defaults */
|
||||
SORTIT_IGNORING_CASE = 1,
|
||||
SORTIT_NUMERICALLY = 2,
|
||||
SORTIT_BACKWARDS = 4,
|
||||
/*
|
||||
* Ignore backslashes that quote another character---which may
|
||||
* be another backslash; the second backslash is active.
|
||||
*/
|
||||
SORTIT_IGNORING_BACKSLASHES = 8,
|
||||
/*
|
||||
* Ignored by strmetasort(); used by paramsubst() to indicate
|
||||
* there is some sorting to do.
|
||||
*/
|
||||
SORTIT_SOMEHOW = 16,
|
||||
};
|
||||
|
||||
/*
|
||||
* Element of array passed to qsort().
|
||||
*/
|
||||
struct sortelt {
|
||||
/* The original string. */
|
||||
char *orig;
|
||||
/* The string used for comparison. */
|
||||
const char *cmp;
|
||||
/*
|
||||
* The length of the string if passed down to the sort algorithm.
|
||||
* Used to sort the lengths together with the strings.
|
||||
*/
|
||||
int origlen;
|
||||
/*
|
||||
* The length of the string, if needed, else -1.
|
||||
* The length is only needed if there are embededded nulls.
|
||||
*/
|
||||
int len;
|
||||
};
|
||||
|
||||
typedef struct sortelt *SortElt;
|
||||
|
||||
/************************/
|
||||
/* Flags to casemodifiy */
|
||||
/************************/
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ alwayslink=1
|
|||
objects="builtin.o compat.o cond.o exec.o glob.o hashtable.o \
|
||||
hist.o init.o input.o jobs.o lex.o linklist.o loop.o math.o \
|
||||
mem.o module.o options.o params.o parse.o pattern.o prompt.o signals.o \
|
||||
signames.o string.o subst.o text.o utils.o watch.o"
|
||||
signames.o sort.o string.o subst.o text.o utils.o watch.o"
|
||||
|
||||
headers="../config.h system.h zsh.h sigcount.h signals.h \
|
||||
prototypes.h hashtable.h ztype.h"
|
||||
|
|
|
|||
|
|
@ -263,3 +263,20 @@
|
|||
printf '%b\n' '\0123'
|
||||
0:printf handles \0... octal escapes in replacement text
|
||||
>S
|
||||
|
||||
print -lO $'a' $'a\0' $'a\0b' $'a\0b\0' $'a\0b\0a' $'a\0b\0b' $'a\0c' |
|
||||
while read -r line; do
|
||||
for (( i = 1; i <= ${#line}; i++ )); do
|
||||
foo=$line[i]
|
||||
printf "%02x" $(( #foo ))
|
||||
done
|
||||
print
|
||||
done
|
||||
0:sorting with embedded nulls
|
||||
>610063
|
||||
>6100620062
|
||||
>6100620061
|
||||
>61006200
|
||||
>610062
|
||||
>6100
|
||||
>61
|
||||
|
|
|
|||
|
|
@ -886,3 +886,20 @@
|
|||
>/one
|
||||
>/
|
||||
>/
|
||||
|
||||
nully=($'a\0c' $'a\0b\0b' $'a\0b\0a' $'a\0b\0' $'a\0b' $'a\0' $'a')
|
||||
for string in ${(o)nully}; do
|
||||
for (( i = 1; i <= ${#string}; i++ )); do
|
||||
foo=$string[i]
|
||||
printf "%02x" $(( #foo ))
|
||||
done
|
||||
print
|
||||
done
|
||||
0:Sorting arrays with embedded nulls
|
||||
>61
|
||||
>6100
|
||||
>610062
|
||||
>61006200
|
||||
>6100620061
|
||||
>6100620062
|
||||
>610063
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue