mirror of
				git://git.code.sf.net/p/zsh/code
				synced 2025-10-27 04:40:59 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			382 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			382 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * sort.c - comparison and sorting of strings
 | |
|  *
 | |
|  * This file is part of zsh, the Z shell.
 | |
|  *
 | |
|  * Copyright (c) 1992-2007 Paul Falstad
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Permission is hereby granted, without written agreement and without
 | |
|  * license or royalty fees, to use, copy, modify, and distribute this
 | |
|  * software and to distribute modified versions of this software for any
 | |
|  * purpose, provided that the above copyright notice and the following
 | |
|  * two paragraphs appear in all copies of this software.
 | |
|  *
 | |
|  * In no event shall Paul Falstad or the Zsh Development Group be liable
 | |
|  * to any party for direct, indirect, special, incidental, or consequential
 | |
|  * damages arising out of the use of this software and its documentation,
 | |
|  * even if Paul Falstad and the Zsh Development Group have been advised of
 | |
|  * the possibility of such damage.
 | |
|  *
 | |
|  * Paul Falstad and the Zsh Development Group specifically disclaim any
 | |
|  * warranties, including, but not limited to, the implied warranties of
 | |
|  * merchantability and fitness for a particular purpose.  The software
 | |
|  * provided hereunder is on an "as is" basis, and Paul Falstad and the
 | |
|  * Zsh Development Group have no obligation to provide maintenance,
 | |
|  * support, updates, enhancements, or modifications.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include "zsh.mdh"
 | |
| #include "sort.pro"
 | |
| 
 | |
| /* Flag for direction of sort: 1 forwards, -1 reverse */
 | |
| static int sortdir;
 | |
| 
 | |
| /* Flag that sort is numeric */
 | |
| static int sortnumeric;
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| eltpcmp(const void *a, const void *b)
 | |
| {
 | |
|     const SortElt ae = *(const SortElt *)a;
 | |
|     const SortElt be = *(const SortElt *)b;
 | |
|     const char *as = ae->cmp, *bs = be->cmp;
 | |
|     const char *ao = as;
 | |
|     int cmp;
 | |
| 
 | |
|     if (ae->len != -1 || be->len != -1) {
 | |
| 	/*
 | |
| 	 * Length recorded.  We only do that if there are embedded
 | |
| 	 * nulls we need to treat as regular characters.
 | |
| 	 *
 | |
| 	 * Since we don't know where multibyte characters start,
 | |
| 	 * but do know that a null character can't occur inside
 | |
| 	 * one (we are relying on multibyte characters being extensions
 | |
| 	 * of ASCII), we can compare starting from after the last
 | |
| 	 * null character that occurred in both strings.
 | |
| 	 */
 | |
| 	const char *cmpa, *cmpb;
 | |
| 	const char *laststarta = as;
 | |
| 	int len;
 | |
| 	if (ae->len != -1) {
 | |
| 	    len = ae->len;
 | |
| 	    if (be->len != -1 && len > be->len)
 | |
| 		len = be->len;
 | |
| 	}
 | |
| 	else
 | |
| 	    len = be->len;
 | |
| 
 | |
| 	for (cmpa = as, cmpb = bs; *cmpa == *cmpb && len--; cmpa++, cmpb++) {
 | |
| 	    if (!*cmpa) {
 | |
| 		/*
 | |
| 		 * If either string doesn't have a length, we've reached
 | |
| 		 * the end.  This is covered in the test below.
 | |
| 		 */
 | |
| 		if (ae->len == -1 || be->len == -1)
 | |
| 		    break;
 | |
| 		laststarta = cmpa + 1;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (*cmpa == *cmpb && ae->len != be->len) {
 | |
| 	    /*
 | |
| 	     * Special case: one of the strings has finished, but
 | |
| 	     * another one continues after the NULL.  The string
 | |
| 	     * that's finished sorts below the other.  We need
 | |
| 	     * to handle this here since strcoll() or strcmp()
 | |
| 	     * will just compare the strings as equal.
 | |
| 	     */
 | |
| 	    if (ae->len != -1) {
 | |
| 		if (be->len != -1) {
 | |
| 		    /*
 | |
| 		     * if a is shorter it's earlier, so return -1 and
 | |
| 		     * vice versa 
 | |
| 		     */
 | |
| 		    return (ae->len - be->len) * sortdir;
 | |
| 		} else {
 | |
| 		    /*
 | |
| 		     * a has a length and so continues, hence
 | |
| 		     * b sorts lower.
 | |
| 		     */
 | |
| 		    return sortdir;
 | |
| 		}
 | |
| 	    } else {
 | |
| 		/*
 | |
| 		 * b must continue because it has a recorded length,
 | |
| 		 * so a sorts lower.
 | |
| 		 */
 | |
| 		return - sortdir;
 | |
| 	    }
 | |
| 	}
 | |
| 
 | |
| 	bs += (laststarta - as);
 | |
| 	as += (laststarta - as);
 | |
|     }
 | |
| #ifdef HAVE_STRCOLL
 | |
|     cmp = strcoll(as, bs);
 | |
| #endif
 | |
|     if (sortnumeric) {
 | |
| 	for (; *as == *bs && *as; as++, bs++);
 | |
| #ifndef HAVE_STRCOLL
 | |
| 	cmp = (int)STOUC(*as) - (int)STOUC(*bs);
 | |
| #endif
 | |
| 	if (idigit(*as) || idigit(*bs)) {
 | |
| 	    for (; as > ao && idigit(as[-1]); as--, bs--);
 | |
| 	    if (idigit(*as) && idigit(*bs)) {
 | |
| 		while (*as == '0')
 | |
| 		    as++;
 | |
| 		while (*bs == '0')
 | |
| 		    bs++;
 | |
| 		for (; idigit(*as) && *as == *bs; as++, bs++);
 | |
| 		if (idigit(*as) || idigit(*bs)) {
 | |
| 		    cmp = (int)STOUC(*as) - (int)STOUC(*bs);
 | |
| 		    while (idigit(*as) && idigit(*bs))
 | |
| 			as++, bs++;
 | |
| 		    if (idigit(*as) && !idigit(*bs))
 | |
| 			return sortdir;
 | |
| 		    if (idigit(*bs) && !idigit(*as))
 | |
| 			return -sortdir;
 | |
| 		}
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
| #ifndef HAVE_STRCOLL
 | |
|     else
 | |
| 	cmp = strcmp(as, bs);
 | |
| #endif
 | |
| 
 | |
|     return sortdir * cmp;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Front-end to eltpcmp() to compare strings.
 | |
|  * TODO: it would be better to eliminate this altogether by
 | |
|  * making the calling function call into the sort code
 | |
|  * at a higher level.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| zstrcmp(const char *as, const char *bs, int sortflags)
 | |
| {
 | |
|     struct sortelt ae, be, *aeptr, *beptr;
 | |
|     int oldsortdir = sortdir, oldsortnumeric = sortnumeric, ret;
 | |
| 
 | |
|     ae.cmp = as;
 | |
|     be.cmp = bs;
 | |
|     ae.len = -1;
 | |
|     be.len = -1;
 | |
| 
 | |
|     aeptr = &ae;
 | |
|     beptr = &be;
 | |
| 
 | |
|     sortdir = 1;
 | |
|     sortnumeric = (sortflags & SORTIT_NUMERICALLY) ? 1 : 0;
 | |
| 
 | |
|     ret = eltpcmp(&aeptr, &beptr);
 | |
| 
 | |
|     /* Paranoia: I don't think we ever need to restore these. */
 | |
|     sortnumeric = oldsortnumeric;
 | |
|     sortdir = oldsortdir;
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Sort an array of metafied strings.  Use an "or" of bit flags
 | |
|  * to decide how to sort.  See the SORTIT_* flags in zsh.h.
 | |
|  *
 | |
|  * If unmetalenp is not NULL, the strings in array are already
 | |
|  * unmetafied and unmetalenp is an array containing the corresponding
 | |
|  * lengths.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export void
 | |
| strmetasort(char **array, int sortwhat, int *unmetalenp)
 | |
| {
 | |
|     char **arrptr;
 | |
|     /*
 | |
|      * Array of elements containing stuff to sort.  Note sortptrarr
 | |
|      * is an array of pointers, since that's more efficient
 | |
|      * for qsort() to manipulate.  sortarr is the array of
 | |
|      * structures.
 | |
|      */
 | |
|     SortElt *sortptrarr, *sortptrarrptr;
 | |
|     SortElt sortarr, sortarrptr;
 | |
|     int oldsortdir, oldsortnumeric, nsort;
 | |
| 
 | |
|     nsort = arrlen(array);
 | |
|     if (nsort < 2)
 | |
| 	return;
 | |
| 
 | |
|     pushheap();
 | |
| 
 | |
|     sortptrarr = (SortElt *) zhalloc(nsort * sizeof(SortElt));
 | |
|     sortarr = (SortElt) zhalloc(nsort * sizeof(struct sortelt));
 | |
|     for (arrptr = array, sortptrarrptr = sortptrarr, sortarrptr = sortarr;
 | |
| 	 *arrptr; arrptr++, sortptrarrptr++, sortarrptr++) {
 | |
| 	char *metaptr;
 | |
| 	int needlen, needalloc;
 | |
| 	*sortptrarrptr = sortarrptr;
 | |
| 	sortarrptr->orig = *arrptr;
 | |
| 
 | |
| 	if (unmetalenp) {
 | |
| 	    /*
 | |
| 	     * Already unmetafied.  We just need to check for
 | |
| 	     * embededded nulls.
 | |
| 	     */
 | |
| 	    int count = unmetalenp[arrptr-array];
 | |
| 	    /* Remember this length for sorted array */
 | |
| 	    sortarrptr->origlen = count;
 | |
| 	    for (metaptr = *arrptr; *metaptr != '\0' && count--; metaptr++)
 | |
| 		;
 | |
| 	    /* *metaptr must now be \0, even if we reached the end */
 | |
| 	    needlen = (count != 0);
 | |
| 	} else {
 | |
| 	    /*
 | |
| 	     * Not yet unmetafied.  See if it needs unmetafying.
 | |
| 	     * If it doesn't, there can't be any embedded nulls,
 | |
| 	     * since these are metafied.
 | |
| 	     */
 | |
| 	    needlen = 0;
 | |
| 	    for (metaptr = *arrptr; *metaptr && *metaptr != Meta;
 | |
| 		 metaptr++);
 | |
| 	}
 | |
| 	/*
 | |
| 	 * See if we need to do some special checking.
 | |
| 	 * Either we're going to need to copy it to transform it,
 | |
| 	 * or we need to unmetafy it.
 | |
| 	 */
 | |
| 	if ((needalloc = (sortwhat &
 | |
| 			  (SORTIT_IGNORING_CASE|SORTIT_IGNORING_BACKSLASHES)))
 | |
| 	    || *metaptr == Meta) {
 | |
| 	    char *s, *t, *src = *arrptr, *dst;
 | |
| 	    int len;
 | |
| 	    sortarrptr->cmp = dst =
 | |
| 		(char *)zhalloc(((sortwhat & SORTIT_IGNORING_CASE)?2:1)*strlen(src)+1);
 | |
| 
 | |
| 	    if (unmetalenp) {
 | |
| 		/* Already unmetafied and we have the length. */
 | |
| 		len = unmetalenp[arrptr-array];
 | |
| 	    } else if (*metaptr != '\0') {
 | |
| 		/*
 | |
| 		 * Needs unmetafying.  We need to check for
 | |
| 		 * embedded nulls while we do this.
 | |
| 		 */
 | |
| 		char *t = dst + (metaptr - src);
 | |
| 
 | |
| 		if (metaptr != src)
 | |
| 		    memcpy(dst, src, metaptr - src);
 | |
| 		while ((*t = *metaptr++)) {
 | |
| 		    if (*t++ == Meta) {
 | |
| 			if ((t[-1] = *metaptr++ ^ 32) == '\0')
 | |
| 			    needlen = 1;
 | |
| 		    }
 | |
| 		}
 | |
| 		len = t - dst;
 | |
| 		src = dst;
 | |
| 	    } else {
 | |
| 		/*
 | |
| 		 * Doesn't need unmetafying.
 | |
| 		 * This means metaptr is the NULL at the
 | |
| 		 * end of the string, so we have the length, and
 | |
| 		 * there are no embedded nulls, so we don't
 | |
| 		 * need the length later.
 | |
| 		 * We're only copying the string to transform it
 | |
| 		 * below.
 | |
| 		 */
 | |
| 		len = metaptr - src;
 | |
| 	    }
 | |
| 	    if (sortwhat & SORTIT_IGNORING_CASE) {
 | |
| 		char *send = src + len;
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| 		if (isset(MULTIBYTE)) {
 | |
| 		    /*
 | |
| 		     * Lower the case the hard way.  Convert to a wide
 | |
| 		     * character, process that, and convert back.  We
 | |
| 		     * don't assume the characters have the same
 | |
| 		     * multibyte length.  We can't use casemodify()
 | |
| 		     * because we have unmetafied data, which may have
 | |
| 		     * been passed down to use.
 | |
| 		     */
 | |
| 		    mbstate_t mbsin, mbsout;
 | |
| 		    int clen;
 | |
| 		    wchar_t wc;
 | |
| 		    memset(&mbsin, 0, sizeof(mbstate_t));
 | |
| 		    memset(&mbsout, 0, sizeof(mbstate_t));
 | |
| 
 | |
| 		    for (s = src, t = dst; s < send; ) {
 | |
| 			clen = mbrtowc(&wc, s, send-s, &mbsin);
 | |
| 			if (clen < 0) {
 | |
| 			    /* invalid or unfinished: treat as single bytes */
 | |
| 			    while (s < send)
 | |
| 				*t++ = tulower(*s++);
 | |
| 			    break;
 | |
| 			}
 | |
| 			if (clen == 0) {
 | |
| 			    /* embedded null */
 | |
| 			    *t++ = '\0';
 | |
| 			    s++;
 | |
| 			    continue;
 | |
| 			}
 | |
| 			s += clen;
 | |
| 			wc = towlower(wc);
 | |
| 			clen = wcrtomb(t, wc, &mbsout);
 | |
| 			t += clen;
 | |
| 			DPUTS(clen < 0, "Bad conversion when lowering case");
 | |
| 		    }
 | |
| 		    *t = '\0';
 | |
| 		    len = t - dst;
 | |
| 		} else
 | |
| #endif
 | |
| 		    for (s = src, t = dst; s < send; )
 | |
| 			*t++ = tulower(*s++);
 | |
| 		src = dst;
 | |
| 	    }
 | |
| 	    if (sortwhat & SORTIT_IGNORING_BACKSLASHES) {
 | |
|                 char *end = src + len + 1;
 | |
| 		/* copy null byte, so increment length */
 | |
| 		for (s = src, t = dst; s < end; ) {
 | |
| 		    if (*s == '\\') {
 | |
| 			s++;
 | |
| 			len--;
 | |
| 		    }
 | |
| 		    *t++ = *s++;
 | |
| 		}
 | |
| 	    }
 | |
| 	    /* Do we need to store the length (embedded null)? */
 | |
| 	    sortarrptr->len = needlen ? len : -1;
 | |
| 	} else {
 | |
| 	    /*
 | |
| 	     * We can use the string as is, although it's possible
 | |
| 	     * we still need to take account of an embedded null.
 | |
| 	     */
 | |
| 	    sortarrptr->cmp = *arrptr;
 | |
| 	    sortarrptr->len = needlen ? unmetalenp[arrptr-array] : -1;
 | |
| 	}
 | |
|     }
 | |
|     /*
 | |
|      * We probably don't need to restore the following, but it's pretty cheap.
 | |
|      */
 | |
|     oldsortdir = sortdir;
 | |
|     oldsortnumeric = sortnumeric;
 | |
| 
 | |
|     sortdir = (sortwhat & SORTIT_BACKWARDS) ? -1 : 1;
 | |
|     sortnumeric = (sortwhat & SORTIT_NUMERICALLY) ? 1 : 0;
 | |
| 
 | |
|     qsort(sortptrarr, nsort, sizeof(SortElt), eltpcmp);
 | |
| 
 | |
|     sortnumeric = oldsortnumeric;
 | |
|     sortdir = oldsortdir;
 | |
|     for (arrptr = array, sortptrarrptr = sortptrarr; nsort--; ) {
 | |
| 	if (unmetalenp)
 | |
| 	    unmetalenp[arrptr-array] = (*sortptrarrptr)->origlen;
 | |
| 	*arrptr++ = (*sortptrarrptr++)->orig;
 | |
|     }
 | |
| 
 | |
|     popheap();
 | |
| }
 |