mirror of
				git://git.code.sf.net/p/zsh/code
				synced 2025-10-31 06:00:54 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			4338 lines
		
	
	
	
		
			115 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			4338 lines
		
	
	
	
		
			115 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * pattern.c - pattern matching
 | |
|  *
 | |
|  * This file is part of zsh, the Z shell.
 | |
|  *
 | |
|  * Copyright (c) 1999 Peter Stephenson
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Permission is hereby granted, without written agreement and without
 | |
|  * license or royalty fees, to use, copy, modify, and distribute this
 | |
|  * software and to distribute modified versions of this software for any
 | |
|  * purpose, provided that the above copyright notice and the following
 | |
|  * two paragraphs appear in all copies of this software.
 | |
|  *
 | |
|  * In no event shall Peter Stephenson or the Zsh Development Group be liable
 | |
|  * to any party for direct, indirect, special, incidental, or consequential
 | |
|  * damages arising out of the use of this software and its documentation,
 | |
|  * even if Peter Stephenson and the Zsh Development Group have been advised of
 | |
|  * the possibility of such damage.
 | |
|  *
 | |
|  * Peter Stephenson and the Zsh Development Group specifically disclaim any
 | |
|  * warranties, including, but not limited to, the implied warranties of
 | |
|  * merchantability and fitness for a particular purpose.  The software
 | |
|  * provided hereunder is on an "as is" basis, and Peter Stephenson and the
 | |
|  * Zsh Development Group have no obligation to provide maintenance,
 | |
|  * support, updates, enhancements, or modifications.
 | |
|  *
 | |
|  * Pattern matching code derived from the regexp library by Henry
 | |
|  * Spencer, which has the following copyright.
 | |
|  *
 | |
|  *	Copyright (c) 1986 by University of Toronto.
 | |
|  *	Written by Henry Spencer.  Not derived from licensed software.
 | |
|  *
 | |
|  *	Permission is granted to anyone to use this software for any
 | |
|  *	purpose on any computer system, and to redistribute it freely,
 | |
|  *	subject to the following restrictions:
 | |
|  *
 | |
|  *	1. The author is not responsible for the consequences of use of
 | |
|  *		this software, no matter how awful, even if they arise
 | |
|  *		from defects in it.
 | |
|  *
 | |
|  *	2. The origin of this software must not be misrepresented, either
 | |
|  *		by explicit claim or by omission.
 | |
|  *
 | |
|  *	3. Altered versions must be plainly marked as such, and must not
 | |
|  *		be misrepresented as being the original software.
 | |
|  *
 | |
|  * Eagle-eyed readers will notice this is an altered version.  Incredibly
 | |
|  * sharp-eyed readers might even find bits that weren't altered.
 | |
|  *
 | |
|  *
 | |
|  *      And I experienced a sense that, like certain regular
 | |
|  *      expressions, seemed to match the day from beginning to end, so
 | |
|  *      that I did not need to identify the parenthesised subexpression
 | |
|  *      that told of dawn, nor the group of characters that indicated
 | |
|  *      the moment when my grandfather returned home with news of
 | |
|  *      Swann's departure for Paris; and the whole length of the month
 | |
|  *      of May, as if matched by a closure, fitted into the buffer of my
 | |
|  *      life with no sign of overflowing, turning the days, like a
 | |
|  *      procession of insects that could consist of this or that
 | |
|  *      species, into a random and unstructured repetition of different
 | |
|  *      sequences, anchored from the first day of the month to the last
 | |
|  *      in the same fashion as the weeks when I knew I would not see
 | |
|  *      Gilberte and would search in vain for any occurrences of the
 | |
|  *      string in the avenue of hawthorns by Tansonville, without my
 | |
|  *      having to delimit explicitly the start or finish of the pattern.
 | |
|  *
 | |
|  *                                 M. Proust, "In Search of Lost Files",
 | |
|  *                                 bk I, "The Walk by Bourne's Place".
 | |
|  */
 | |
| 
 | |
| #include "zsh.mdh"
 | |
| 
 | |
| /*
 | |
|  * The following union is used mostly for alignment purposes.
 | |
|  * Normal nodes are longs, while certain nodes take a char * as an argument;
 | |
|  * here we make sure that they both work out to the same length.
 | |
|  * The compiled regexp we construct consists of upats stuck together;
 | |
|  * anything else to be added (strings, numbers) is stuck after and
 | |
|  * then aligned to a whole number of upat units.
 | |
|  *
 | |
|  * Note also that offsets are in terms of the sizes of these things.
 | |
|  */
 | |
| union upat {
 | |
|     long l;
 | |
|     unsigned char *p;
 | |
| };
 | |
| 
 | |
| typedef union upat *Upat;
 | |
| 
 | |
| #include "pattern.pro"
 | |
| 
 | |
| /* Number of active parenthesized expressions allowed in backreferencing */
 | |
| #define NSUBEXP  9
 | |
| 
 | |
| /* definition	number	opnd?	meaning */
 | |
| #define	P_END	  0x00	/* no	End of program. */
 | |
| #define P_EXCSYNC 0x01	/* no   Test if following exclude already failed */
 | |
| #define P_EXCEND  0x02	/* no   Test if exclude matched orig branch */
 | |
| #define	P_BACK	  0x03	/* no	Match "", "next" ptr points backward. */
 | |
| #define	P_EXACTLY 0x04	/* lstr	Match this string. */
 | |
| #define	P_NOTHING 0x05	/* no	Match empty string. */
 | |
| #define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
 | |
| #define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
 | |
| #define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
 | |
| #define P_ISSTART 0x09  /* no   Match start of string. */
 | |
| #define P_ISEND   0x0a  /* no   Match end of string. */
 | |
| #define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
 | |
| #define P_COUNT   0x0c  /* 3*long uc* node Match a number of repetitions */
 | |
| /* numbered so we can test bit 5 for a branch */
 | |
| #define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
 | |
| #define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
 | |
| /* excludes are also branches, but have bit 4 set, too */
 | |
| #define P_EXCLUDE 0x30	/* uc* node Exclude this from previous branch */
 | |
| #define P_EXCLUDP 0x31	/* uc* node Exclude, using full file path so far */
 | |
| /* numbered so we can test bit 6 so as not to match initial '.' */
 | |
| #define	P_ANY	  0x40	/* no	Match any one character. */
 | |
| #define	P_ANYOF	  0x41	/* str  Match any character in this string. */
 | |
| #define	P_ANYBUT  0x42	/* str  Match any character not in this string. */
 | |
| #define P_STAR    0x43	/* no   Match any set of characters. */
 | |
| #define P_NUMRNG  0x44	/* zr, zr Match a numeric range. */
 | |
| #define P_NUMFROM 0x45	/* zr   Match a number >= X */
 | |
| #define P_NUMTO   0x46	/* zr   Match a number <= X */
 | |
| #define P_NUMANY  0x47	/* no   Match any set of decimal digits */
 | |
| /* spaces left for P_OPEN+n,... for backreferences */
 | |
| #define	P_OPEN	  0x80	/* no	Mark this point in input as start of n. */
 | |
| #define	P_CLOSE	  0x90	/* no	Analogous to OPEN. */
 | |
| /*
 | |
|  * no    no argument
 | |
|  * zr    the range type zrange_t:  may be zlong or unsigned long
 | |
|  * char  a single char
 | |
|  * uc*   a pointer to unsigned char, used at run time and initialised
 | |
|  *       to NULL.
 | |
|  * str   null-terminated, metafied string
 | |
|  * lstr  length as long then string, not null-terminated, unmetafied.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Notes on usage:
 | |
|  * P_WBRANCH:  This works like a branch and is used in complex closures,
 | |
|  *    to ensure we don't succeed on a zero-length match of the pattern,
 | |
|  *    since that would cause an infinite loop.  We do this by recording
 | |
|  *    the positions where we have already tried to match.   See the
 | |
|  *    P_WBRANCH test in patmatch().
 | |
|  *
 | |
|  *  P_ANY, P_ANYOF:  the operand is a null terminated
 | |
|  *    string.  Normal characters match as expected.  Characters
 | |
|  *    in the range Meta+PP_ALPHA..Meta+PP_UNKWN do the appropriate
 | |
|  *    Posix range tests.  This relies on imeta returning true for these
 | |
|  *    characters.  We treat unknown POSIX ranges as never matching.
 | |
|  *    PP_RANGE means the next two (possibly metafied) characters form
 | |
|  *    the limits of a range to test; it's too much like hard work to
 | |
|  *    expand the range.
 | |
|  *
 | |
|  *  P_EXCLUDE, P_EXCSYNC, PEXCEND:  P_EXCLUDE appears in the pattern like
 | |
|  *    P_BRANCH, but applies to the immediately preceding branch.  The code in
 | |
|  *    the corresponding branch is followed by a P_EXCSYNC, which simply
 | |
|  *    acts as a marker that a P_EXCLUDE comes next.  The P_EXCLUDE
 | |
|  *    has a pointer to char embedded in it, which works
 | |
|  *    like P_WBRANCH:  if we get to the P_EXCSYNC, and we already matched
 | |
|  *    up to the same position, fail.  Thus we are forced to backtrack
 | |
|  *    on closures in the P_BRANCH if the first attempt was excluded.
 | |
|  *    Corresponding to P_EXCSYNC in the original branch, there is a
 | |
|  *    P_EXCEND in the exclusion.  If we get to this point, and we did
 | |
|  *    *not* match in the original branch, the exclusion itself fails,
 | |
|  *    otherwise it succeeds since we know the tail already matches,
 | |
|  *    so P_EXCEND is the end of the exclusion test.
 | |
|  *    The whole sorry mess looks like this, where the upper lines
 | |
|  *    show the linkage of the branches, and the lower shows the linkage
 | |
|  *    of their pattern arguments.
 | |
|  *
 | |
|  *     	        ---------------------      ----------------------
 | |
|  *              ^      	       	     v    ^      	         v
 | |
|  *      ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
 | |
|  *                               	                         ^
 | |
|  *		       	  |                                      |
 | |
|  *			   --------------------------------------
 | |
|  *
 | |
|  * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
 | |
|  *   that we prepend the path so far to the exclude pattern.   This is
 | |
|  *   for top level file globs, e.g. ** / *.c~*foo.c
 | |
|  *                                    ^ I had to leave this space
 | |
|  * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
 | |
|  *
 | |
|  * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
 | |
|  * closure (#cN,M) and is used to initialise the count.  Executing
 | |
|  * the pattern leads back to the P_COUNT, while the next links of the
 | |
|  * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
 | |
|  *
 | |
|  *	       	        ----------------
 | |
|  *     	       	       v       	        ^
 | |
|  *        <COUNTSTART><COUNT>pattern<BACK> tail
 | |
|  *	     	    v      v  	  	    ^
 | |
|  *	            ------------------------
 | |
|  */
 | |
| 
 | |
| #define	P_OP(p)		((p)->l & 0xff)
 | |
| #define	P_NEXT(p)	((p)->l >> 8)
 | |
| #define	P_OPERAND(p)	((p) + 1)
 | |
| #define P_ISBRANCH(p)   ((p)->l & 0x20)
 | |
| #define P_ISEXCLUDE(p)	(((p)->l & 0x30) == 0x30)
 | |
| #define P_NOTDOT(p)	((p)->l & 0x40)
 | |
| 
 | |
| /* Specific to lstr type, i.e. P_EXACTLY. */
 | |
| #define P_LS_LEN(p)	((p)[1].l) /* can be used as lvalue */
 | |
| #define P_LS_STR(p)	((char *)((p) + 2))
 | |
| 
 | |
| /* Specific to P_COUNT: arguments as offset in nodes from operator */
 | |
| #define P_CT_CURRENT	(1)	/* Current count */
 | |
| #define P_CT_MIN	(2)     /* Minimum count */
 | |
| #define P_CT_MAX	(3)	/* Maximum count, -1 for none */
 | |
| #define P_CT_PTR	(4)	/* Pointer to last match start */
 | |
| #define P_CT_OPERAND	(5)	/* Operand of P_COUNT */
 | |
| 
 | |
| /* Flags needed when pattern is executed */
 | |
| #define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */
 | |
| #define P_HSTART        0x02	/* Starts with # or ##'d pattern. */
 | |
| #define P_PURESTR	0x04	/* Can be matched with a strcmp */
 | |
| 
 | |
| #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
 | |
| typedef zlong zrange_t;
 | |
| #define ZRANGE_T_IS_SIGNED	(1)
 | |
| #define ZRANGE_MAX ZLONG_MAX
 | |
| #else
 | |
| typedef unsigned long zrange_t;
 | |
| #define ZRANGE_MAX ULONG_MAX
 | |
| #endif
 | |
| 
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| /*
 | |
|  * Handle a byte that's not part of a valid character.
 | |
|  *
 | |
|  * This range in Unicode is recommended for purposes of this
 | |
|  * kind as it corresponds to invalid characters.
 | |
|  *
 | |
|  * Note that this strictly only works if wchar_t represents
 | |
|  * Unicode code points, which isn't necessarily true; however,
 | |
|  * converting an invalid character into an unknown format is
 | |
|  * a bit tricky...
 | |
|  */
 | |
| #define WCHAR_INVALID(ch)			\
 | |
|     ((wchar_t) (0xDC00 + STOUC(ch)))
 | |
| #endif /* MULTIBYTE_SUPPORT */
 | |
| 
 | |
| /*
 | |
|  * Array of characters corresponding to zpc_chars enum, which it must match.
 | |
|  */
 | |
| static const char zpc_chars[ZPC_COUNT] = {
 | |
|     '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
 | |
|     Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@'
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Corresponding strings used in enable/disable -p.
 | |
|  * NULL means no way of turning this on or off.
 | |
|  */
 | |
| /**/
 | |
| mod_export const char *zpc_strings[ZPC_COUNT] = {
 | |
|    NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
 | |
|    "^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@("
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Corresponding array of pattern disables as set by the user
 | |
|  * using "disable -p".
 | |
|  */
 | |
| /**/
 | |
| mod_export char zpc_disables[ZPC_COUNT];
 | |
| 
 | |
| /*
 | |
|  * Stack of saved (compressed) zpc_disables for function scope.
 | |
|  */
 | |
| 
 | |
| static struct zpc_disables_save *zpc_disables_stack;
 | |
| 
 | |
| /*
 | |
|  * Characters which terminate a simple string (ZPC_COUNT) or
 | |
|  * an entire pattern segment (the first ZPC_SEG_COUNT).
 | |
|  * Each entry is either the corresponding character in zpc_chars
 | |
|  * or Marker which is guaranteed not to match a character in a
 | |
|  * pattern we are compiling.
 | |
|  *
 | |
|  * The complete list indicates characters that are special, so e.g.
 | |
|  * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
 | |
|  * *and* Tilde is currently special.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| char zpc_special[ZPC_COUNT];
 | |
| 
 | |
| /* Default size for pattern buffer */
 | |
| #define P_DEF_ALLOC 256
 | |
| 
 | |
| /* Flags used in compilation */
 | |
| static char *patstart, *patparse;	/* input pointers */
 | |
| static int patnpar;		/* () count */
 | |
| static char *patcode;		/* point of code emission */
 | |
| static long patsize;		/* size of code */
 | |
| static char *patout;		/* start of code emission string */
 | |
| static long patalloc;		/* size allocated for same */
 | |
| 
 | |
| /* Flags used in both compilation and execution */
 | |
| static int patflags;		    /* flags passed down to patcompile */
 | |
| static int patglobflags;  /* globbing flags & approx */
 | |
| 
 | |
| /*
 | |
|  * Increment pointer to metafied multibyte string.
 | |
|  */
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| typedef wint_t patint_t;
 | |
| 
 | |
| #define PEOF WEOF
 | |
| 
 | |
| #define METACHARINC(x) ((void)metacharinc(&x))
 | |
| 
 | |
| /*
 | |
|  * TODO: the shiftstate isn't well handled; we don't guarantee
 | |
|  * to maintain it properly between characters.  If we don't
 | |
|  * need it we should use mbtowc() instead.
 | |
|  */
 | |
| static mbstate_t shiftstate;
 | |
| 
 | |
| /*
 | |
|  * Multibyte version: it's (almost) as easy to return the
 | |
|  * value as not, so do so since we sometimes need it..
 | |
|  */
 | |
| static wchar_t
 | |
| metacharinc(char **x)
 | |
| {
 | |
|     char *inptr = *x;
 | |
|     char inchar;
 | |
|     size_t ret = MB_INVALID;
 | |
|     wchar_t wc;
 | |
| 
 | |
|     /*
 | |
|      * Cheat if the top bit isn't set.  This is second-guessing
 | |
|      * the library, but we know for sure that if the character
 | |
|      * set doesn't have the property that all bytes with the 8th
 | |
|      * bit clear are single characters then we are stuffed.
 | |
|      */
 | |
|     if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80))
 | |
|     {
 | |
| 	if (itok(*inptr))
 | |
| 	    inchar = ztokens[*inptr++ - Pound];
 | |
| 	else if (*inptr == Meta) {
 | |
| 	    inptr++;
 | |
| 	    inchar = *inptr++ ^ 32;
 | |
| 	} else {
 | |
| 	    inchar = *inptr++;
 | |
| 	}
 | |
| 	*x = inptr;
 | |
| 	return (wchar_t)STOUC(inchar);
 | |
|     }
 | |
| 
 | |
|     while (*inptr) {
 | |
| 	if (itok(*inptr))
 | |
| 	    inchar = ztokens[*inptr++ - Pound];
 | |
| 	else if (*inptr == Meta) {
 | |
| 	    inptr++;
 | |
| 	    inchar = *inptr++ ^ 32;
 | |
| 	} else {
 | |
| 	    inchar = *inptr++;
 | |
| 	}
 | |
| 	ret = mbrtowc(&wc, &inchar, 1, &shiftstate);
 | |
| 
 | |
| 	if (ret == MB_INVALID)
 | |
| 	    break;
 | |
| 	if (ret == MB_INCOMPLETE)
 | |
| 	    continue;
 | |
| 	*x = inptr;
 | |
| 	return wc;
 | |
|     }
 | |
| 
 | |
|     /* Error. */
 | |
|     /* Reset the shift state for next time. */
 | |
|     memset(&shiftstate, 0, sizeof(shiftstate));
 | |
|     return WCHAR_INVALID(*(*x)++);
 | |
| }
 | |
| 
 | |
| #else
 | |
| typedef int patint_t;
 | |
| 
 | |
| #define PEOF EOF
 | |
| 
 | |
| #define METACHARINC(x)	((void)((x) += (*(x) == Meta) ? 2 : 1))
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Return unmetafied char from string (x is any char *).
 | |
|  * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
 | |
|  * in effect.
 | |
|  */
 | |
| #define UNMETA(x)	(*(x) == Meta ? (x)[1] ^ 32 : *(x))
 | |
| 
 | |
| /* Add n more characters, ensuring there is enough space. */
 | |
| 
 | |
| enum {
 | |
|     PA_NOALIGN = 1,
 | |
|     PA_UNMETA  = 2
 | |
| };
 | |
| 
 | |
| /**/
 | |
| static void
 | |
| patadd(char *add, int ch, long n, int paflags)
 | |
| {
 | |
|     /* Make sure everything gets aligned unless we get PA_NOALIGN. */
 | |
|     long newpatsize = patsize + n;
 | |
|     if (!(paflags & PA_NOALIGN))
 | |
| 	newpatsize = (newpatsize + sizeof(union upat) - 1) &
 | |
| 		      ~(sizeof(union upat) - 1);
 | |
|     if (patalloc < newpatsize) {
 | |
| 	long newpatalloc =
 | |
| 	    2*(newpatsize > patalloc ? newpatsize : patalloc);
 | |
| 	patout = (char *)zrealloc((char *)patout, newpatalloc);
 | |
| 	patcode = patout + patsize;
 | |
| 	patalloc = newpatalloc;
 | |
|     }
 | |
|     patsize = newpatsize;
 | |
|     if (add) {
 | |
| 	if (paflags & PA_UNMETA) {
 | |
| 	    /*
 | |
| 	     * Unmetafy and untokenize the string as we go.
 | |
| 	     * The Meta characters in add aren't counted in n.
 | |
| 	     */
 | |
| 	    while (n--) {
 | |
| 		if (itok(*add))
 | |
| 		    *patcode++ = ztokens[*add++ - Pound];
 | |
| 		else if (*add == Meta) {
 | |
| 		    add++;
 | |
| 		    *patcode++ = *add++ ^ 32;
 | |
| 		} else {
 | |
| 		    *patcode++ = *add++;
 | |
| 		}
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    while (n--)
 | |
| 		*patcode++ = *add++;
 | |
| 	}
 | |
|     } else
 | |
| 	*patcode++ = ch;
 | |
|     patcode = patout + patsize;
 | |
| }
 | |
| 
 | |
| static long rn_offs;
 | |
| /* operates on pointers to union upat, returns a pointer */
 | |
| #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
 | |
| 		    (P_OP(p) == P_BACK) ? \
 | |
| 		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)
 | |
| 
 | |
| /*
 | |
|  * Set up zpc_special with characters that end a string segment.
 | |
|  * "Marker" cannot occur in the pattern we are compiling so
 | |
|  * is used to mark "invalid".
 | |
|  */
 | |
| static void
 | |
| patcompcharsset(void)
 | |
| {
 | |
|     char *spp, *disp;
 | |
|     int i;
 | |
| 
 | |
|     /* Initialise enabled special characters */
 | |
|     memcpy(zpc_special, zpc_chars, ZPC_COUNT);
 | |
|     /* Apply user disables from disable -p */
 | |
|     for (i = 0, spp = zpc_special, disp = zpc_disables;
 | |
| 	 i < ZPC_COUNT;
 | |
| 	 i++, spp++, disp++) {
 | |
| 	if (*disp)
 | |
| 	    *spp = Marker;
 | |
|     }
 | |
| 
 | |
|     if (!isset(EXTENDEDGLOB)) {
 | |
| 	/* Extended glob characters are not active */
 | |
| 	zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
 | |
| 	    zpc_special[ZPC_HASH] = Marker;
 | |
|     }
 | |
|     if (!isset(KSHGLOB)) {
 | |
| 	/*
 | |
| 	 * Ksh glob characters are not active.
 | |
| 	 * * and ? are shared with normal globbing, but for their
 | |
| 	 * use here we are looking for a following Inpar.
 | |
| 	 */
 | |
| 	zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
 | |
| 	    zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
 | |
| 	    zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker;
 | |
|     }
 | |
|     /*
 | |
|      * Note that if we are using KSHGLOB, then we test for a following
 | |
|      * Inpar, not zpc_special[ZPC_INPAR]:  the latter makes an Inpar on
 | |
|      * its own active.  The zpc_special[ZPC_KSH_*] followed by any old Inpar
 | |
|      * discriminate ksh globbing.
 | |
|      */
 | |
|     if (isset(SHGLOB)) {
 | |
| 	/*
 | |
| 	 * Grouping and numeric ranges are not valid.
 | |
| 	 * We do allow alternation, however; it's needed for
 | |
| 	 * "case".  This may not be entirely consistent.
 | |
| 	 *
 | |
| 	 * Don't disable Outpar: we may need to match the end of KSHGLOB
 | |
| 	 * parentheses and it would be difficult to tell them apart.
 | |
| 	 */
 | |
| 	zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Called before parsing a set of file matches to initialize flags */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| patcompstart(void)
 | |
| {
 | |
|     patcompcharsset();
 | |
|     if (isset(CASEGLOB))
 | |
| 	patglobflags = 0;
 | |
|     else
 | |
| 	patglobflags = GF_IGNCASE;
 | |
|     if (isset(MULTIBYTE))
 | |
| 	patglobflags |= GF_MULTIBYTE;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Top level pattern compilation subroutine
 | |
|  * exp is a null-terminated, metafied string.
 | |
|  * inflags is an or of some PAT_* flags.
 | |
|  * endexp, if non-null, is set to a pointer to the end of the
 | |
|  *   part of exp which was compiled.  This is used when
 | |
|  *   compiling patterns for directories which must be
 | |
|  *   matched recursively.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export Patprog
 | |
| patcompile(char *exp, int inflags, char **endexp)
 | |
| {
 | |
|     int flags = 0;
 | |
|     long len = 0;
 | |
|     long startoff;
 | |
|     Upat pscan;
 | |
|     char *lng, *strp = NULL;
 | |
|     Patprog p;
 | |
| 
 | |
|     queue_signals();
 | |
| 
 | |
|     startoff = sizeof(struct patprog);
 | |
|     /* Ensure alignment of start of program string */
 | |
|     startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
 | |
| 
 | |
|     /* Allocate reasonable sized chunk if none, reduce size if too big */
 | |
|     if (patalloc != P_DEF_ALLOC)
 | |
| 	patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
 | |
|     patcode = patout + startoff;
 | |
|     patsize = patcode - patout;
 | |
|     patstart = patparse = exp;
 | |
|     /*
 | |
|      * Note global patnpar numbers parentheses 1..9, while patnpar
 | |
|      * in struct is actual count of parentheses.
 | |
|      */
 | |
|     patnpar = 1;
 | |
|     patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
 | |
| 
 | |
|     if (!(patflags & PAT_FILE)) {
 | |
| 	patcompcharsset();
 | |
| 	zpc_special[ZPC_SLASH] = Marker;
 | |
| 	remnulargs(patparse);
 | |
| 	if (isset(MULTIBYTE))
 | |
| 	    patglobflags = GF_MULTIBYTE;
 | |
| 	else
 | |
| 	    patglobflags = 0;
 | |
|     }
 | |
|     if (patflags & PAT_LCMATCHUC)
 | |
| 	patglobflags |= GF_LCMATCHUC;
 | |
|     /*
 | |
|      * Have to be set now, since they get updated during compilation.
 | |
|      */
 | |
|     ((Patprog)patout)->globflags = patglobflags;
 | |
| 
 | |
|     if (!(patflags & PAT_ANY)) {
 | |
| 	/* Look for a really pure string, with no tokens at all. */
 | |
| 	if (!(patglobflags & ~GF_MULTIBYTE)
 | |
| #ifdef __CYGWIN__
 | |
| 	    /*
 | |
| 	     * If the OS treats files case-insensitively and we
 | |
| 	     * are looking at files, we don't need to use pattern
 | |
| 	     * matching to find the file.
 | |
| 	     */
 | |
| 	    || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
 | |
| #endif
 | |
| 	    )
 | |
| 	{
 | |
| 	    /*
 | |
| 	     * Waah!  I wish I understood this.
 | |
| 	     * Empty metafied strings have an initial Nularg.
 | |
| 	     * This never corresponds to a real character in
 | |
| 	     * a glob pattern or string, so skip it.
 | |
| 	     */
 | |
| 	    if (*exp == Nularg)
 | |
| 		exp++;
 | |
| 	    for (strp = exp; *strp &&
 | |
| 		     (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
 | |
| 		 strp++)
 | |
| 		;
 | |
| 	}
 | |
| 	if (!strp || (*strp && *strp != '/')) {
 | |
| 	    /* No, do normal compilation. */
 | |
| 	    strp = NULL;
 | |
| 	    if (patcompswitch(0, &flags) == 0) {
 | |
| 		unqueue_signals();
 | |
| 		return NULL;
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    /*
 | |
| 	     * Yes, copy the string, and skip compilation altogether.
 | |
| 	     * Null terminate for the benefit of globbing.
 | |
| 	     * Leave metafied both for globbing and for our own
 | |
| 	     * efficiency.
 | |
| 	     */
 | |
| 	    patparse = strp;
 | |
| 	    len = strp - exp;
 | |
| 	    patadd(exp, 0, len + 1, 0);
 | |
| 	    patout[startoff + len] = '\0';
 | |
| 	    patflags |= PAT_PURES;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /* end of compilation: safe to use pointers */
 | |
|     p = (Patprog)patout;
 | |
|     p->startoff = startoff;
 | |
|     p->patstartch = '\0';
 | |
|     p->globend = patglobflags;
 | |
|     p->flags = patflags;
 | |
|     p->mustoff = 0;
 | |
|     p->size = patsize;
 | |
|     p->patmlen = len;
 | |
|     p->patnpar = patnpar-1;
 | |
| 
 | |
|     if (!strp) {
 | |
| 	pscan = (Upat)(patout + startoff);
 | |
| 
 | |
| 	if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
 | |
| 	    /* only one top level choice */
 | |
| 	    pscan = P_OPERAND(pscan);
 | |
| 
 | |
| 	    if (flags & P_PURESTR) {
 | |
| 		/*
 | |
| 		 * The pattern can be matched with a simple strncmp/strcmp.
 | |
| 		 * Careful in case we've overwritten the node for the next ptr.
 | |
| 		 */
 | |
| 		char *dst = patout + startoff;
 | |
| 		Upat next;
 | |
| 		p->flags |= PAT_PURES;
 | |
| 		for (; pscan; pscan = next) {
 | |
| 		    next = PATNEXT(pscan);
 | |
| 		    if (P_OP(pscan) == P_EXACTLY) {
 | |
| 			char *opnd = P_LS_STR(pscan), *mtest;
 | |
| 			long oplen = P_LS_LEN(pscan), ilen;
 | |
| 			int nmeta = 0;
 | |
| 			/*
 | |
| 			 * Unfortunately we unmetafied the string
 | |
| 			 * and we need to put any metacharacters
 | |
| 			 * back now we know it's a pure string.
 | |
| 			 * This shouldn't happen too often, it's
 | |
| 			 * just that there are some cases such
 | |
| 			 * as . and .. in files where we really
 | |
| 			 * need a pure string even if there are
 | |
| 			 * pattern characters flying around.
 | |
| 			 */
 | |
| 			for (mtest = opnd, ilen = oplen; ilen;
 | |
| 			     mtest++, ilen--)
 | |
| 			    if (imeta(*mtest))
 | |
| 				nmeta++;
 | |
| 			if (nmeta) {
 | |
| 			    patadd(NULL, 0, nmeta, 0);
 | |
| 			    p = (Patprog)patout;
 | |
| 			    opnd = dupstring_wlen(opnd, oplen);
 | |
| 			    dst = patout + startoff;
 | |
| 			}
 | |
| 
 | |
| 			while (oplen--) {
 | |
| 			    if (imeta(*opnd)) {
 | |
| 				*dst++ = Meta;
 | |
| 				*dst++ = *opnd++ ^ 32;
 | |
| 			    } else {
 | |
| 				*dst++ = *opnd++;
 | |
| 			    }
 | |
| 			}
 | |
| 			/* Only one string in a PAT_PURES, so now done. */
 | |
| 			break;
 | |
| 		    }
 | |
| 		}
 | |
| 		p->size = dst - patout;
 | |
| 		/* patmlen is really strlen.  We don't need a null. */
 | |
| 		p->patmlen = p->size - startoff;
 | |
| 	    } else {
 | |
| 		/* starting point info */
 | |
| 		if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
 | |
| 		    P_LS_LEN(pscan))
 | |
| 		    p->patstartch = *P_LS_STR(pscan);
 | |
| 		/*
 | |
| 		 * Find the longest literal string in something expensive.
 | |
| 		 * This is itself not all that cheap if we have
 | |
| 		 * case-insensitive matching or approximation, so don't.
 | |
| 		 */
 | |
| 		if ((flags & P_HSTART) && !p->globflags) {
 | |
| 		    lng = NULL;
 | |
| 		    len = 0;
 | |
| 		    for (; pscan; pscan = PATNEXT(pscan))
 | |
| 			if (P_OP(pscan) == P_EXACTLY &&
 | |
| 			    P_LS_LEN(pscan) >= len) {
 | |
| 			    lng = P_LS_STR(pscan);
 | |
| 			    len = P_LS_LEN(pscan);
 | |
| 			}
 | |
| 		    if (lng) {
 | |
| 			p->mustoff = lng - patout;
 | |
| 			p->patmlen = len;
 | |
| 		    }
 | |
| 		}
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * The pattern was compiled in a fixed buffer:  unless told otherwise,
 | |
|      * we stick the compiled pattern on the heap.  This is necessary
 | |
|      * for files where we will often be compiling multiple segments at once.
 | |
|      * But if we get the ZDUP flag we always put it in zalloc()ed memory.
 | |
|      */
 | |
|     if (patflags & PAT_ZDUP) {
 | |
| 	Patprog newp = (Patprog)zalloc(patsize);
 | |
| 	memcpy((char *)newp, (char *)p, patsize);
 | |
| 	p = newp;
 | |
|     } else if (!(patflags & PAT_STATIC)) {
 | |
| 	Patprog newp = (Patprog)zhalloc(patsize);
 | |
| 	memcpy((char *)newp, (char *)p, patsize);
 | |
| 	p = newp;
 | |
|     }
 | |
| 
 | |
|     if (endexp)
 | |
| 	*endexp = patparse;
 | |
| 
 | |
|     unqueue_signals();
 | |
|     return p;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Main body or parenthesized subexpression in pattern
 | |
|  * Parenthesis (and any ksh_glob gubbins) will have been removed.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static long
 | |
| patcompswitch(int paren, int *flagp)
 | |
| {
 | |
|     long starter, br, ender, excsync = 0;
 | |
|     int parno = 0;
 | |
|     int flags, gfchanged = 0;
 | |
|     long savglobflags = (long)patglobflags;
 | |
|     Upat ptr;
 | |
| 
 | |
|     *flagp = 0;
 | |
| 
 | |
|     if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
 | |
| 	/*
 | |
| 	 * parenthesized:  make an open node.
 | |
| 	 * We can only refer to the first nine parentheses.
 | |
| 	 * For any others, we just use P_OPEN on its own; there's
 | |
| 	 * no gain in arbitrarily limiting the number of parentheses.
 | |
| 	 */
 | |
| 	parno = patnpar++;
 | |
| 	starter = patnode(P_OPEN + parno);
 | |
|     } else
 | |
| 	starter = 0;
 | |
| 
 | |
|     br = patnode(P_BRANCH);
 | |
|     if (!patcompbranch(&flags, paren))
 | |
| 	return 0;
 | |
|     if (patglobflags != (int)savglobflags)
 | |
| 	gfchanged++;
 | |
|     if (starter)
 | |
| 	pattail(starter, br);
 | |
|     else
 | |
| 	starter = br;
 | |
| 
 | |
|     *flagp |= flags & (P_HSTART|P_PURESTR);
 | |
| 
 | |
|     while (*patparse == zpc_chars[ZPC_BAR] ||
 | |
| 	   (*patparse == zpc_special[ZPC_TILDE] &&
 | |
| 	    (patparse[1] == '/' ||
 | |
| 	     !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
 | |
| 	int tilde = *patparse++ == zpc_special[ZPC_TILDE];
 | |
| 	long gfnode = 0, newbr;
 | |
| 
 | |
| 	*flagp &= ~P_PURESTR;
 | |
| 
 | |
| 	if (tilde) {
 | |
| 	    union upat up;
 | |
| 	    /* excsync remembers the P_EXCSYNC node before a chain of
 | |
| 	     * exclusions:  all point back to this.  only the
 | |
| 	     * original (non-excluded) branch gets a trailing P_EXCSYNC.
 | |
| 	     */
 | |
| 	    if (!excsync) {
 | |
| 		excsync = patnode(P_EXCSYNC);
 | |
| 		patoptail(br, excsync);
 | |
| 	    }
 | |
| 	    /*
 | |
| 	     * By default, approximations are turned off in exclusions:
 | |
| 	     * we need to do this here as otherwise the code compiling
 | |
| 	     * the exclusion doesn't know if the flags have really
 | |
| 	     * changed if the error count gets restored.
 | |
| 	     */
 | |
| 	    patglobflags &= ~0xff;
 | |
| 	    if (!(patflags & PAT_FILET) || paren) {
 | |
| 		br = patnode(P_EXCLUDE);
 | |
| 	    } else {
 | |
| 		/*
 | |
| 		 * At top level (paren == 0) in a file glob !(patflags
 | |
| 		 * &PAT_FILET) do the exclusion prepending the file path
 | |
| 		 * so far.  We need to flag this to avoid unnecessarily
 | |
| 		 * copying the path.
 | |
| 		 */
 | |
| 		br = patnode(P_EXCLUDP);
 | |
| 		patflags |= PAT_HAS_EXCLUDP;
 | |
| 	    }
 | |
| 	    up.p = NULL;
 | |
| 	    patadd((char *)&up, 0, sizeof(up), 0);
 | |
| 	    /* / is not treated as special if we are at top level */
 | |
| 	    if (!paren && zpc_special[ZPC_SLASH] == '/') {
 | |
| 		tilde++;
 | |
| 		zpc_special[ZPC_SLASH] = Marker;
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    excsync = 0;
 | |
| 	    br = patnode(P_BRANCH);
 | |
| 	    /*
 | |
| 	     * The position of the following statements means globflags
 | |
| 	     * set in the main branch carry over to the exclusion.
 | |
| 	     */
 | |
| 	    if (!paren) {
 | |
| 		patglobflags = 0;
 | |
| 		if (((Patprog)patout)->globflags) {
 | |
| 		    /*
 | |
| 		     * If at top level, we need to reinitialize flags to zero,
 | |
| 		     * since (#i)foo|bar only applies to foo and we stuck
 | |
| 		     * the #i into the global flags.
 | |
| 		     * We could have done it so that they only got set in the
 | |
| 		     * first branch, but it's quite convenient having any
 | |
| 		     * global flags set in the header and not buried in the
 | |
| 		     * pattern.  (Or maybe it isn't and we should
 | |
| 		     * forget this bit and always stick in an explicit GFLAGS
 | |
| 		     * statement instead of using the header.)
 | |
| 		     * Also, this can't happen for file globs where there are
 | |
| 		     * no top-level |'s.
 | |
| 		     *
 | |
| 		     * No gfchanged, as nothing to follow branch at top
 | |
| 		     * level.
 | |
| 		     */
 | |
| 		    union upat up;
 | |
| 		    gfnode = patnode(P_GFLAGS);
 | |
| 		    up.l = patglobflags;
 | |
| 		    patadd((char *)&up, 0, sizeof(union upat), 0);
 | |
| 		}
 | |
| 	    } else {
 | |
| 		patglobflags = (int)savglobflags;
 | |
| 	    }
 | |
| 	}
 | |
| 	newbr = patcompbranch(&flags, paren);
 | |
| 	if (tilde == 2) {
 | |
| 	    /* restore special treatment of / */
 | |
| 	    zpc_special[ZPC_SLASH] = '/';
 | |
| 	}
 | |
| 	if (!newbr)
 | |
| 	    return 0;
 | |
| 	if (gfnode)
 | |
| 	    pattail(gfnode, newbr);
 | |
| 	if (!tilde && patglobflags != (int)savglobflags)
 | |
| 	    gfchanged++;
 | |
| 	pattail(starter, br);
 | |
| 	if (excsync)
 | |
| 	    patoptail(br, patnode(P_EXCEND));
 | |
| 	*flagp |= flags & P_HSTART;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Make a closing node, hooking it to the end.
 | |
|      * Note that we can't optimize P_NOTHING out here, since another
 | |
|      * branch at that point would indicate the current choices continue,
 | |
|      * which they don't.
 | |
|      */
 | |
|     ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
 | |
|     pattail(starter, ender);
 | |
| 
 | |
|     /*
 | |
|      * Hook the tails of the branches to the closing node,
 | |
|      * except for exclusions which terminate where they are.
 | |
|      */
 | |
|     for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
 | |
| 	if (!P_ISEXCLUDE(ptr))
 | |
| 	    patoptail(ptr-(Upat)patout, ender);
 | |
| 
 | |
|     /* check for proper termination */
 | |
|     if ((paren && *patparse++ != Outpar) ||
 | |
| 	(!paren && *patparse &&
 | |
| 	 !((patflags & PAT_FILE) && *patparse == '/')))
 | |
| 	return 0;
 | |
| 
 | |
|     if (paren && gfchanged) {
 | |
| 	/*
 | |
| 	 * Restore old values of flags when leaving parentheses.
 | |
| 	 * gfchanged detects a change in any branch (except exclusions
 | |
| 	 * which are separate), since we need to emit this even if
 | |
| 	 * a later branch happened to put the flags back.
 | |
| 	 */
 | |
| 	pattail(ender, patnode(P_GFLAGS));
 | |
| 	patglobflags = (int)savglobflags;
 | |
| 	patadd((char *)&savglobflags, 0, sizeof(long), 0);
 | |
|     }
 | |
| 
 | |
|     return starter;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Compile something ended by Bar, Outpar, Tilde, or end of string.
 | |
|  * Note the BRANCH or EXCLUDE tag must already have been omitted:
 | |
|  * this returns the position of the operand of that.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static long
 | |
| patcompbranch(int *flagp, int paren)
 | |
| {
 | |
|     long chain, latest = 0, starter;
 | |
|     int flags = 0;
 | |
| 
 | |
|     *flagp = P_PURESTR;
 | |
| 
 | |
|     starter = chain = 0;
 | |
|     while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
 | |
| 	   (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
 | |
| 	    memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
 | |
| 	if ((*patparse == zpc_special[ZPC_INPAR] &&
 | |
| 	     patparse[1] == zpc_special[ZPC_HASH]) ||
 | |
| 	    (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
 | |
| 	     patparse[2] == zpc_special[ZPC_HASH])) {
 | |
| 	    /* Globbing flags. */
 | |
| 	    char *pp1 = patparse;
 | |
| 	    int oldglobflags = patglobflags, ignore;
 | |
| 	    long assert;
 | |
| 	    patparse += (*patparse == '@') ? 3 : 2;
 | |
| 	    if (!patgetglobflags(&patparse, &assert, &ignore))
 | |
| 		return 0;
 | |
| 	    if (!ignore) {
 | |
| 		if (assert) {
 | |
| 		    /*
 | |
| 		     * Start/end assertion looking like flags, but
 | |
| 		     * actually handled as a normal node
 | |
| 		     */
 | |
| 		    latest = patnode(assert);
 | |
| 		    flags = 0;
 | |
| 		} else {
 | |
| 		    if (pp1 == patstart) {
 | |
| 			/* Right at start of pattern, the simplest case.
 | |
| 			 * Put them into the flags and don't emit anything.
 | |
| 			 */
 | |
| 			((Patprog)patout)->globflags = patglobflags;
 | |
| 			continue;
 | |
| 		    } else if (!*patparse) {
 | |
| 			/* Right at the end, so just leave the flags for
 | |
| 			 * the next Patprog in the chain to pick up.
 | |
| 			 */
 | |
| 			break;
 | |
| 		    }
 | |
| 		    /*
 | |
| 		     * Otherwise, we have to stick them in as a pattern
 | |
| 		     * matching nothing.
 | |
| 		     */
 | |
| 		    if (oldglobflags != patglobflags) {
 | |
| 			/* Flags changed */
 | |
| 			union upat up;
 | |
| 			latest = patnode(P_GFLAGS);
 | |
| 			up.l = patglobflags;
 | |
| 			patadd((char *)&up, 0, sizeof(union upat), 0);
 | |
| 		    } else {
 | |
| 			/* No effect. */
 | |
| 			continue;
 | |
| 		    }
 | |
| 		}
 | |
| 	    } else if (!*patparse)
 | |
| 		break;
 | |
| 	    else
 | |
| 		continue;
 | |
| 	} else if (*patparse == zpc_special[ZPC_HAT]) {
 | |
| 	    /*
 | |
| 	     * ^pat:  anything but pat.  For proper backtracking,
 | |
| 	     * etc., we turn this into (*~pat), except without the
 | |
| 	     * parentheses.
 | |
| 	     */
 | |
| 	    patparse++;
 | |
| 	    latest = patcompnot(0, &flags);
 | |
| 	} else
 | |
| 	    latest = patcomppiece(&flags, paren);
 | |
| 	if (!latest)
 | |
| 	    return 0;
 | |
| 	if (!starter)
 | |
| 	    starter = latest;
 | |
| 	if (!(flags & P_PURESTR))
 | |
| 	    *flagp &= ~P_PURESTR;
 | |
| 	if (!chain)
 | |
| 	    *flagp |= flags & P_HSTART;
 | |
| 	else
 | |
| 	    pattail(chain, latest);
 | |
| 	chain = latest;
 | |
|     }
 | |
|     /* check if there was nothing in the loop, i.e. () */
 | |
|     if (!chain)
 | |
| 	starter = patnode(P_NOTHING);
 | |
| 
 | |
|     return starter;
 | |
| }
 | |
| 
 | |
| /* get glob flags, return 1 for success, 0 for failure */
 | |
| 
 | |
| /**/
 | |
| int
 | |
| patgetglobflags(char **strp, long *assertp, int *ignore)
 | |
| {
 | |
|     char *nptr, *ptr = *strp;
 | |
|     zlong ret;
 | |
| 
 | |
|     *assertp = 0;
 | |
|     *ignore = 1;
 | |
|     /* (#X): assumes we are still positioned on the first X */
 | |
|     for (; *ptr && *ptr != Outpar; ptr++) {
 | |
| 	if (*ptr == 'q') {
 | |
| 	    /* Glob qualifiers, ignored in pattern code */
 | |
| 	    while (*ptr && *ptr != Outpar)
 | |
| 		ptr++;
 | |
| 	    break;
 | |
| 	} else {
 | |
| 	    *ignore = 0;
 | |
| 	    switch (*ptr) {
 | |
| 	    case 'a':
 | |
| 		/* Approximate matching, max no. of errors follows */
 | |
| 		ret = zstrtol(++ptr, &nptr, 10);
 | |
| 		/*
 | |
| 		 * We can't have more than 254, because we need 255 to
 | |
| 		 * mark 254 errors in wbranch and exclude sync strings
 | |
| 		 * (hypothetically --- hope no-one tries it).
 | |
| 		 */
 | |
| 		if (ret < 0 || ret > 254 || ptr == nptr)
 | |
| 		    return 0;
 | |
| 		patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
 | |
| 		ptr = nptr-1;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'l':
 | |
| 		/* Lowercase in pattern matches lower or upper in target */
 | |
| 		patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'i':
 | |
| 		/* Fully case insensitive */
 | |
| 		patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'I':
 | |
| 		/* Restore case sensitivity */
 | |
| 		patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
 | |
| 		break;
 | |
| 
 | |
| 	    case 'b':
 | |
| 		/* Make backreferences */
 | |
| 		patglobflags |= GF_BACKREF;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'B':
 | |
| 		/* Don't make backreferences */
 | |
| 		patglobflags &= ~GF_BACKREF;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'm':
 | |
| 		/* Make references to complete match */
 | |
| 		patglobflags |= GF_MATCHREF;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'M':
 | |
| 		/* Don't */
 | |
| 		patglobflags &= ~GF_MATCHREF;
 | |
| 		break;
 | |
| 
 | |
| 	    case 's':
 | |
| 		*assertp = P_ISSTART;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'e':
 | |
| 		*assertp = P_ISEND;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'u':
 | |
| 		patglobflags |= GF_MULTIBYTE;
 | |
| 		break;
 | |
| 
 | |
| 	    case 'U':
 | |
| 		patglobflags &= ~GF_MULTIBYTE;
 | |
| 		break;
 | |
| 
 | |
| 	    default:
 | |
| 		return 0;
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
|     if (*ptr != Outpar)
 | |
| 	return 0;
 | |
|     /* Start/end assertions must appear on their own. */
 | |
|     if (*assertp && (*strp)[1] != Outpar)
 | |
| 	return 0;
 | |
|     *strp = ptr + 1;
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| static const char *colon_stuffs[]  = {
 | |
|     "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph", 
 | |
|     "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
 | |
|     "IFS", "IFSSPACE", "WORD", "INCOMPLETE", "INVALID", NULL
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Handle the guts of a [:stuff:] character class element.
 | |
|  * start is the beginning of "stuff" and len is its length.
 | |
|  * This code is exported for the benefit of completion matching.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| range_type(char *start, int len)
 | |
| {
 | |
|     const char **csp;
 | |
| 
 | |
|     for (csp = colon_stuffs; *csp; csp++) {
 | |
| 	if (strlen(*csp) == len && !strncmp(start, *csp, len))
 | |
| 		return (csp - colon_stuffs) + PP_FIRST;
 | |
|     }
 | |
| 
 | |
|     return PP_UNKWN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Convert the contents of a [...] or [^...] expression (just the
 | |
|  * ... part) back into a string.  This is used by compfiles -p/-P
 | |
|  * for some reason.  The compiled form (a metafied string) is
 | |
|  * passed in rangestr.
 | |
|  *
 | |
|  * If outstr is non-NULL the compiled form is placed there.  It
 | |
|  * must be sufficiently long.  A terminating NULL is appended.
 | |
|  *
 | |
|  * Return the length required, not including the terminating NULL.
 | |
|  *
 | |
|  * TODO: this is non-multibyte for now.  It will need to be defined
 | |
|  * appropriately with MULTIBYTE_SUPPORT when the completion matching
 | |
|  * code catches up.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| pattern_range_to_string(char *rangestr, char *outstr)
 | |
| {
 | |
|     int len = 0;
 | |
| 
 | |
|     while (*rangestr) {
 | |
| 	if (imeta(STOUC(*rangestr))) {
 | |
| 	    int swtype = STOUC(*rangestr) - STOUC(Meta);
 | |
| 
 | |
| 	    if (swtype == 0) {
 | |
| 		/* Ordindary metafied character */
 | |
| 		if (outstr)
 | |
| 		{
 | |
| 		    *outstr++ = Meta;
 | |
| 		    *outstr++ = rangestr[1] ^ 32;
 | |
| 		}
 | |
| 		len += 2;
 | |
| 		rangestr += 2;
 | |
| 	    } else if (swtype == PP_RANGE) {
 | |
| 		/* X-Y range */
 | |
| 		int i;
 | |
| 
 | |
| 		for (i = 0; i < 2; i++) {
 | |
| 		    if (*rangestr == Meta) {
 | |
| 			if (outstr) {
 | |
| 			    *outstr++ = Meta;
 | |
| 			    *outstr++ = rangestr[1];
 | |
| 			}
 | |
| 			len += 2;
 | |
| 			rangestr += 2;
 | |
| 		    } else {
 | |
| 			if (outstr)
 | |
| 			    *outstr++ = *rangestr;
 | |
| 			len++;
 | |
| 			rangestr++;
 | |
| 		    }
 | |
| 
 | |
| 		    if (i == 0) {
 | |
| 			if (outstr)
 | |
| 			    *outstr++ = '-';
 | |
| 			len++;
 | |
| 		    }
 | |
| 		}
 | |
| 	    } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
 | |
| 		/* [:stuff:]; we need to output [: and :] */
 | |
| 		const char *found = colon_stuffs[swtype - PP_FIRST];
 | |
| 		int newlen = strlen(found);
 | |
| 		if (outstr) {
 | |
| 		    strcpy(outstr, "[:");
 | |
| 		    outstr += 2;
 | |
| 		    memcpy(outstr, found, newlen);
 | |
| 		    outstr += newlen;
 | |
| 		    strcpy(outstr, ":]");
 | |
| 		    outstr += 2;
 | |
| 		}
 | |
| 		len += newlen + 4;
 | |
| 		rangestr++;
 | |
| 	    } else {
 | |
| 		/* shouldn't happen */
 | |
| 		DPUTS(1, "BUG: unknown PP_ code in pattern range");
 | |
| 		rangestr++;
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    /* ordinary character, guaranteed no Meta handling needed */
 | |
| 	    if (outstr)
 | |
| 		*outstr++ = *rangestr;
 | |
| 	    len++;
 | |
| 	    rangestr++;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     if (outstr)
 | |
| 	*outstr = '\0';
 | |
|     return len;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * compile a chunk such as a literal string or a [...] followed
 | |
|  * by a possible hash operator
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static long
 | |
| patcomppiece(int *flagp, int paren)
 | |
| {
 | |
|     long starter = 0, next, op, opnd;
 | |
|     int flags, flags2, kshchar, len, ch, patch, nmeta;
 | |
|     int hash, count;
 | |
|     union upat up;
 | |
|     char *nptr, *str0, *ptr, *patprev;
 | |
|     zrange_t from, to;
 | |
|     char *charstart;
 | |
| 
 | |
|     flags = 0;
 | |
|     str0 = patprev = patparse;
 | |
|     for (;;) {
 | |
| 	/*
 | |
| 	 * Check if we have a string. First, we need to make sure
 | |
| 	 * the string doesn't introduce a ksh-like parenthesized expression.
 | |
| 	 */
 | |
| 	kshchar = '\0';
 | |
| 	if (*patparse && patparse[1] == Inpar) {
 | |
| 	    if (*patparse == zpc_special[ZPC_KSH_PLUS])
 | |
| 		kshchar = STOUC('+');
 | |
| 	    else if (*patparse == zpc_special[ZPC_KSH_BANG])
 | |
| 		kshchar = STOUC('!');
 | |
| 	    else if (*patparse == zpc_special[ZPC_KSH_BANG2])
 | |
| 		kshchar = STOUC('!');
 | |
| 	    else if (*patparse == zpc_special[ZPC_KSH_AT])
 | |
| 		kshchar = STOUC('@');
 | |
| 	    else if (*patparse == zpc_special[ZPC_KSH_STAR])
 | |
| 		kshchar = STOUC('*');
 | |
| 	    else if (*patparse == zpc_special[ZPC_KSH_QUEST])
 | |
| 		kshchar = STOUC('?');
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * If '(' is disabled as a pattern char, allow ')' as
 | |
| 	 * an ordinary string character if there are no parentheses to
 | |
| 	 * close.  Don't allow it otherwise, it changes the syntax.
 | |
| 	 */
 | |
| 	if (zpc_special[ZPC_INPAR] != Marker || *patparse != Outpar ||
 | |
| 	    paren) {
 | |
| 	    /*
 | |
| 	     * End of string (or no string at all) if ksh-type parentheses,
 | |
| 	     * or special character, unless that character is a tilde and
 | |
| 	     * the character following is an end-of-segment character.  Thus
 | |
| 	     * tildes are not special if there is nothing following to
 | |
| 	     * be excluded.
 | |
| 	     *
 | |
| 	     * Don't look for X()-style kshglobs at this point; we've
 | |
| 	     * checked above for the case with parentheses and we don't
 | |
| 	     * want to match without parentheses.
 | |
| 	     */
 | |
| 	    if (kshchar ||
 | |
| 		(memchr(zpc_special, *patparse, ZPC_NO_KSH_GLOB) &&
 | |
| 		 (*patparse != zpc_special[ZPC_TILDE] ||
 | |
| 		  patparse[1] == '/' ||
 | |
| 		  !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
 | |
| 		break;
 | |
| 	    }
 | |
|     	}
 | |
| 
 | |
| 	/* Remember the previous character for backtracking */
 | |
| 	patprev = patparse;
 | |
| 	METACHARINC(patparse);
 | |
|     }
 | |
| 
 | |
|     if (patparse > str0) {
 | |
| 	long slen = patparse - str0;
 | |
| 	int morelen;
 | |
| 
 | |
| 	/* Ordinary string: cancel kshchar lookahead */
 | |
| 	kshchar = '\0';
 | |
| 	/*
 | |
| 	 * Assume it matches a simple string until we find otherwise.
 | |
| 	 */
 | |
| 	flags |= P_PURESTR;
 | |
| 	DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
 | |
| 	/* more than one character matched? */
 | |
| 	morelen = (patprev > str0);
 | |
| 	/*
 | |
| 	 * If we have more than one character, a following hash
 | |
| 	 * or (#c...) only applies to the last, so backtrack one character.
 | |
| 	 */
 | |
| 	if ((*patparse == zpc_special[ZPC_HASH] ||
 | |
| 	     (*patparse == zpc_special[ZPC_INPAR] &&
 | |
| 	      patparse[1] == zpc_special[ZPC_HASH] &&
 | |
| 	      patparse[2] == 'c') ||
 | |
| 	     (*patparse == zpc_special[ZPC_KSH_AT] &&
 | |
| 	      patparse[1] == Inpar &&
 | |
| 	      patparse[2] == zpc_special[ZPC_HASH] &&
 | |
| 	      patparse[3] == 'c')) && morelen)
 | |
| 	    patparse = patprev;
 | |
| 	/*
 | |
| 	 * If len is 1, we can't have an active # following, so doesn't
 | |
| 	 * matter that we don't make X in `XX#' simple.
 | |
| 	 */
 | |
| 	if (!morelen)
 | |
| 	    flags |= P_SIMPLE;
 | |
| 	starter = patnode(P_EXACTLY);
 | |
| 
 | |
| 	/* Get length of string without metafication. */
 | |
| 	nmeta = 0;
 | |
| 	/* inherited from domatch, but why, exactly? */
 | |
| 	if (*str0 == Nularg)
 | |
| 	    str0++;
 | |
| 	for (ptr = str0; ptr < patparse; ptr++) {
 | |
| 	    if (*ptr == Meta) {
 | |
| 		nmeta++;
 | |
| 		ptr++;
 | |
| 	    }
 | |
| 	}
 | |
| 	slen = (patparse - str0) - nmeta;
 | |
| 	/* First add length, which is a long */
 | |
| 	patadd((char *)&slen, 0, sizeof(long), 0);
 | |
| 	/*
 | |
| 	 * Then the string, not null terminated.
 | |
| 	 * Unmetafy and untokenize; pass the final length,
 | |
| 	 * which is what we need to allocate, i.e. not including
 | |
| 	 * a count for each Meta in the string.
 | |
| 	 */
 | |
| 	patadd(str0, 0, slen, PA_UNMETA);
 | |
| 	nptr = P_LS_STR((Upat)patout + starter);
 | |
| 	/*
 | |
| 	 * It's much simpler to turn off pure string mode for
 | |
| 	 * any case-insensitive or approximate matching; usually,
 | |
| 	 * that is correct, or they wouldn't have been turned on.
 | |
| 	 * However, we need to make sure we match a "." or ".."
 | |
| 	 * in a file name as a pure string.  There's a minor bug
 | |
| 	 * that this will also apply to something like
 | |
| 	 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
 | |
| 	 * going to write funny patterns, you get no sympathy from me.
 | |
| 	 */
 | |
| 	if (patglobflags &
 | |
| #ifdef __CYGWIN__
 | |
| 	    /*
 | |
| 	     * As above: don't use pattern matching for files
 | |
| 	     * just because of case insensitivity if file system
 | |
| 	     * is known to be case insensitive.
 | |
| 	     *
 | |
| 	     * This is known to be necessary in at least one case:
 | |
| 	     * if "mount -c /" is in effect, so that drives appear
 | |
| 	     * directly under / instead of the usual /cygdrive, they
 | |
| 	     * aren't shown by readdir().  So it's vital we don't use
 | |
| 	     * globbing to find "/c", since that'll fail.
 | |
| 	     */
 | |
| 	    ((patflags & PAT_FILE) ?
 | |
| 	    (0xFF|GF_LCMATCHUC) :
 | |
| 	    (0xFF|GF_LCMATCHUC|GF_IGNCASE))
 | |
| #else
 | |
| 	    (0xFF|GF_LCMATCHUC|GF_IGNCASE)
 | |
| #endif
 | |
| 	    ) {
 | |
| 	    if (!(patflags & PAT_FILE))
 | |
| 		flags &= ~P_PURESTR;
 | |
| 	    else if (!(nptr[0] == '.' &&
 | |
| 		       (slen == 1 || (nptr[1] == '.' && slen == 2))))
 | |
| 		flags &= ~P_PURESTR;
 | |
| 	}
 | |
|     } else {
 | |
| 	if (kshchar)
 | |
| 	    patparse++;
 | |
| 
 | |
| 	patch = *patparse;
 | |
| 	METACHARINC(patparse);
 | |
| 	switch(patch) {
 | |
| 	case Quest:
 | |
| 	    DPUTS(zpc_special[ZPC_QUEST] == Marker,
 | |
| 		  "Treating '?' as pattern character although disabled");
 | |
| 	    flags |= P_SIMPLE;
 | |
| 	    starter = patnode(P_ANY);
 | |
| 	    break;
 | |
| 	case Star:
 | |
| 	    DPUTS(zpc_special[ZPC_STAR] == Marker,
 | |
| 		  "Treating '*' as pattern character although disabled");
 | |
| 	    /* kshchar is used as a sign that we can't have #'s. */
 | |
| 	    kshchar = -1;
 | |
| 	    starter = patnode(P_STAR);
 | |
| 	    break;
 | |
| 	case Inbrack:
 | |
| 	    DPUTS(zpc_special[ZPC_INBRACK] == Marker,
 | |
| 		  "Treating '[' as pattern character although disabled");
 | |
| 	    flags |= P_SIMPLE;
 | |
| 	    if (*patparse == Hat || *patparse == Bang) {
 | |
| 		patparse++;
 | |
| 		starter = patnode(P_ANYBUT);
 | |
| 	    } else
 | |
| 		starter = patnode(P_ANYOF);
 | |
| 	    /*
 | |
| 	     * []...] means match a "]" or other included characters.
 | |
| 	     * However, to be a bit helpful and for compatibility
 | |
| 	     * with other shells, don't take in that sense if
 | |
| 	     * there's no further "]".  That's still imperfect,
 | |
| 	     * but it's all we can do --- we're required to
 | |
| 	     * treat [$var]*[$var]with empty var as [ ... ]
 | |
| 	     * containing "]*[".
 | |
| 	     */
 | |
| 	    if (*patparse == Outbrack && strchr(patparse+1, Outbrack)) {
 | |
| 		patparse++;
 | |
| 		patadd(NULL, ']', 1, PA_NOALIGN);
 | |
| 	    }
 | |
| 	    while (*patparse && *patparse != Outbrack) {
 | |
| 		/* Meta is not a token */
 | |
| 		if (*patparse == Inbrack && patparse[1] == ':' &&
 | |
| 			(nptr = strchr(patparse+2, ':')) &&
 | |
| 			nptr[1] == Outbrack) {
 | |
| 			/* Posix range. */
 | |
| 			patparse += 2;
 | |
| 			len = nptr - patparse;
 | |
| 			ch = range_type(patparse, len);
 | |
| 			patparse = nptr + 2;
 | |
| 			if (ch != PP_UNKWN)
 | |
| 			    patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
 | |
| 			continue;
 | |
| 		}
 | |
| 		charstart = patparse;
 | |
| 		METACHARINC(patparse);
 | |
| 
 | |
| 		if (*patparse == Dash && patparse[1] &&
 | |
| 		    patparse[1] != Outbrack) {
 | |
| 		    patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
 | |
| 		    if (itok(*charstart)) {
 | |
| 			patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
 | |
| 			       PA_NOALIGN);
 | |
| 		    } else {
 | |
| 			patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
 | |
| 		    }
 | |
| 		    charstart = ++patparse;	/* skip Dash token */
 | |
| 		    METACHARINC(patparse);
 | |
| 		}
 | |
| 		if (itok(*charstart)) {
 | |
| 		    patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
 | |
| 			   PA_NOALIGN);
 | |
| 		} else {
 | |
| 		    patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
 | |
| 		}
 | |
| 	    }
 | |
| 	    if (*patparse != Outbrack)
 | |
| 		return 0;
 | |
| 	    patparse++;
 | |
| 	    /* terminate null string and fix alignment */
 | |
| 	    patadd(NULL, 0, 1, 0);
 | |
| 	    break;
 | |
| 	case Inpar:
 | |
| 	    DPUTS(!kshchar && zpc_special[ZPC_INPAR] == Marker,
 | |
| 		  "Treating '(' as pattern character although disabled");
 | |
| 	    DPUTS(isset(SHGLOB) && !kshchar,
 | |
| 		  "Treating bare '(' as pattern character with SHGLOB");
 | |
| 	    if (kshchar == '!') {
 | |
| 		/* This is nasty, we should really either handle all
 | |
| 		 * kshglobbing below or here.  But most of the
 | |
| 		 * others look like non-ksh patterns, while this one
 | |
| 		 * doesn't, so we handle it here and leave the rest.
 | |
| 		 * We treat it like an extendedglob ^, except that
 | |
| 		 * it goes into parentheses.
 | |
| 		 *
 | |
| 		 * If we did do kshglob here, we could support
 | |
| 		 * the old behaviour that things like !(foo)##
 | |
| 		 * work, but it makes the code more complicated at
 | |
| 		 * the expense of allowing the user to do things
 | |
| 		 * they shouldn't.
 | |
| 		 */
 | |
| 		if (!(starter = patcompnot(1, &flags2)))
 | |
| 		    return 0;
 | |
| 	    } else if (!(starter = patcompswitch(1, &flags2)))
 | |
| 		return 0;
 | |
| 	    flags |= flags2 & P_HSTART;
 | |
| 	    break;
 | |
| 	case Inang:
 | |
| 	    /* Numeric glob */
 | |
| 	    DPUTS(zpc_special[ZPC_INANG] == Marker,
 | |
| 		  "Treating '<' as pattern character although disabled");
 | |
| 	    DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
 | |
| 	    len = 0;		/* beginning present 1, end present 2 */
 | |
| 	    if (idigit(*patparse)) {
 | |
| 		from = (zrange_t) zstrtol((char *)patparse,
 | |
| 					 (char **)&nptr, 10);
 | |
| 		patparse = nptr;
 | |
| 		len |= 1;
 | |
| 	    }
 | |
| 	    DPUTS(!IS_DASH(*patparse), "BUG: - missing from numeric glob");
 | |
| 	    patparse++;
 | |
| 	    if (idigit(*patparse)) {
 | |
| 		to = (zrange_t) zstrtol((char *)patparse,
 | |
| 					  (char **)&nptr, 10);
 | |
| 		patparse = nptr;
 | |
| 		len |= 2;
 | |
| 	    }
 | |
| 	    if (*patparse != Outang)
 | |
| 		return 0;
 | |
| 	    patparse++;
 | |
| 	    switch(len) {
 | |
| 	    case 3:
 | |
| 		starter = patnode(P_NUMRNG);
 | |
| 		patadd((char *)&from, 0, sizeof(from), 0);
 | |
| 		patadd((char *)&to, 0, sizeof(to), 0);
 | |
| 		break;
 | |
| 	    case 2:
 | |
| 		starter = patnode(P_NUMTO);
 | |
| 		patadd((char *)&to, 0, sizeof(to), 0);
 | |
| 		break;
 | |
| 	    case 1:
 | |
| 		starter = patnode(P_NUMFROM);
 | |
| 		patadd((char *)&from, 0, sizeof(from), 0);
 | |
| 		break;
 | |
| 	    case 0:
 | |
| 		starter = patnode(P_NUMANY);
 | |
| 		break;
 | |
| 	    }
 | |
| 	    /* This can't be simple, because it isn't.
 | |
| 	     * Mention in manual that matching digits with [...]
 | |
| 	     * is more efficient.
 | |
| 	     */
 | |
| 	    break;
 | |
| 	case Pound:
 | |
| 	    DPUTS(zpc_special[ZPC_HASH] == Marker,
 | |
| 		  "Treating '#' as pattern character although disabled");
 | |
| 	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
 | |
| 	    /*
 | |
| 	     * A hash here is an error; it should follow something
 | |
| 	     * repeatable.
 | |
| 	     */
 | |
| 	    return 0;
 | |
| 	    break;
 | |
| 	case Bnullkeep:
 | |
| 	    /*
 | |
| 	     * Marker for restoring a backslash in output:
 | |
| 	     * does not match a character.
 | |
| 	     */
 | |
| 	    next = patcomppiece(flagp, paren);
 | |
| 	    /*
 | |
| 	     * Can't match a pure string since we need to do this
 | |
| 	     * as multiple chunks.
 | |
| 	     */
 | |
| 	    *flagp &= ~P_PURESTR;
 | |
| 	    return next;
 | |
| 	    break;
 | |
| #ifdef DEBUG
 | |
| 	default:
 | |
| 	    dputs("BUG: character not handled in patcomppiece");
 | |
| 	    return 0;
 | |
| 	    break;
 | |
| #endif
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     count = 0;
 | |
|     if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
 | |
| 	!(count = ((*patparse == zpc_special[ZPC_INPAR] &&
 | |
| 		    patparse[1] == zpc_special[ZPC_HASH] &&
 | |
| 		    patparse[2] == 'c') ||
 | |
| 		   (*patparse == zpc_special[ZPC_KSH_AT] &&
 | |
| 		    patparse[1] == Inpar &&
 | |
| 		    patparse[2] == zpc_special[ZPC_HASH] &&
 | |
| 		    patparse[3] == 'c'))) &&
 | |
| 	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
 | |
| 	*flagp = flags;
 | |
| 	return starter;
 | |
|     }
 | |
| 
 | |
|     /* too much at once doesn't currently work */
 | |
|     if (kshchar && (hash || count))
 | |
| 	return 0;
 | |
| 
 | |
|     if (kshchar == '*') {
 | |
| 	op = P_ONEHASH;
 | |
| 	*flagp = P_HSTART;
 | |
|     } else if (kshchar == '+') {
 | |
| 	op = P_TWOHASH;
 | |
| 	*flagp = P_HSTART;
 | |
|     } else if (kshchar == '?') {
 | |
| 	op = 0;
 | |
| 	*flagp = 0;
 | |
|     } else if (count) {
 | |
| 	op = P_COUNT;
 | |
| 	patparse += 3;
 | |
| 	*flagp = P_HSTART;
 | |
|     } else if (*++patparse == zpc_special[ZPC_HASH]) {
 | |
| 	op = P_TWOHASH;
 | |
| 	patparse++;
 | |
| 	*flagp = P_HSTART;
 | |
|     } else {
 | |
| 	op = P_ONEHASH;
 | |
| 	*flagp = P_HSTART;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Note optimizations with pointers into P_NOTHING branches:  some
 | |
|      * should logically point to next node after current piece.
 | |
|      *
 | |
|      * Backtracking is also encoded in a slightly obscure way:  the
 | |
|      * code emitted ensures we test the non-empty branch of complex
 | |
|      * patterns before the empty branch on each repetition.  Hence
 | |
|      * each time we fail on a non-empty branch, we try the empty branch,
 | |
|      * which is equivalent to backtracking.
 | |
|      */
 | |
|     if (op == P_COUNT) {
 | |
| 	/* (#cN,M) */
 | |
| 	union upat countargs[P_CT_OPERAND];
 | |
| 	char *opp = patparse;
 | |
| 
 | |
| 	countargs[0].l = P_COUNT;
 | |
| 	countargs[P_CT_CURRENT].l = 0L;
 | |
| 	countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10);
 | |
| 	if (patparse == opp) {
 | |
| 	    /* missing number treated as zero */
 | |
| 	    countargs[P_CT_MIN].l = 0L;
 | |
| 	}
 | |
| 	if (*patparse != ',' && *patparse != Comma) {
 | |
| 	    /* either max = min or error */
 | |
| 	    if (*patparse != Outpar)
 | |
| 		return 0;
 | |
| 	    countargs[P_CT_MAX].l = countargs[P_CT_MIN].l;
 | |
| 	} else {
 | |
| 	    opp = ++patparse;
 | |
| 	    countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10);
 | |
| 	    if (*patparse != Outpar)
 | |
| 		return 0;
 | |
| 	    if (patparse == opp) {
 | |
| 		/* missing number treated as infinity: record as -1 */
 | |
| 		countargs[P_CT_MAX].l = -1L;
 | |
| 	    }
 | |
| 	}
 | |
| 	patparse++;
 | |
| 	countargs[P_CT_PTR].p = NULL;
 | |
| 	/* Mark this chain as a min/max count... */
 | |
| 	patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs));
 | |
| 	/*
 | |
| 	 * The next of the operand is a loop back to the P_COUNT.  This is
 | |
| 	 * how we get recursion for the count.  We don't loop back to
 | |
| 	 * the P_COUNTSTART; that's used for initialising the count
 | |
| 	 * and saving and restoring the count for any enclosing use
 | |
| 	 * of the match.
 | |
| 	 */
 | |
| 	opnd = P_OPERAND(starter) + P_CT_OPERAND;
 | |
| 	pattail(opnd, patnode(P_BACK));
 | |
| 	pattail(opnd, P_OPERAND(starter));
 | |
| 	/*
 | |
| 	 * The next of the counter operators is what follows the
 | |
| 	 * closure.
 | |
| 	 * This handles matching of the tail.
 | |
| 	 */
 | |
| 	next = patnode(P_NOTHING);
 | |
| 	pattail(starter, next);
 | |
| 	pattail(P_OPERAND(starter), next);
 | |
|     } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
 | |
| 	P_OP((Upat)patout+starter) == P_ANY) {
 | |
| 	/* Optimize ?# to *.  Silly thing to do, since who would use
 | |
| 	 * use ?# ? But it makes the later code shorter.
 | |
| 	 */
 | |
| 	Upat uptr = (Upat)patout + starter;
 | |
| 	if (op == P_TWOHASH) {
 | |
| 	    /* ?## becomes ?* */
 | |
| 	    uptr->l = (uptr->l & ~0xff) | P_ANY;
 | |
| 	    pattail(starter, patnode(P_STAR));
 | |
| 	} else {
 | |
| 	    uptr->l = (uptr->l & ~0xff) | P_STAR;
 | |
| 	}
 | |
|     } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
 | |
| 	/* Simplify, but not if we need to look for approximations. */
 | |
| 	patinsert(op, starter, NULL, 0);
 | |
|     } else if (op == P_ONEHASH) {
 | |
| 	/* Emit x# as (x&|), where & means "self". */
 | |
| 	up.p = NULL;
 | |
| 	patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
 | |
| 	                                      /* Either x */
 | |
| 	patoptail(starter, patnode(P_BACK));  /* and loop */
 | |
| 	patoptail(starter, starter);	      /* back */
 | |
| 	pattail(starter, patnode(P_BRANCH));  /* or */
 | |
| 	pattail(starter, patnode(P_NOTHING)); /* null. */
 | |
|     } else if (op == P_TWOHASH) {
 | |
| 	/* Emit x## as x(&|) where & means "self". */
 | |
| 	next = patnode(P_WBRANCH);	      /* Either */
 | |
| 	up.p = NULL;
 | |
| 	patadd((char *)&up, 0, sizeof(up), 0);
 | |
| 	pattail(starter, next);
 | |
| 	pattail(patnode(P_BACK), starter);    /* loop back */
 | |
| 	pattail(next, patnode(P_BRANCH));     /* or */
 | |
| 	pattail(starter, patnode(P_NOTHING)); /* null. */
 | |
|     } else if (kshchar == '?') {
 | |
| 	/* Emit ?(x) as (x|) */
 | |
| 	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
 | |
| 	pattail(starter, patnode(P_BRANCH));   /* or */
 | |
| 	next = patnode(P_NOTHING);	       /* null */
 | |
| 	pattail(starter, next);
 | |
| 	patoptail(starter, next);
 | |
|     }
 | |
|     if (*patparse == zpc_special[ZPC_HASH])
 | |
| 	return 0;
 | |
| 
 | |
|     return starter;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
 | |
|  * parentheses if necessary.   As you see, that's really quite easy.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static long
 | |
| patcompnot(int paren, int *flagsp)
 | |
| {
 | |
|     union upat up;
 | |
|     long excsync, br, excl, n, starter;
 | |
|     int dummy;
 | |
| 
 | |
|     /* Here, we're matching a star at the start. */
 | |
|     *flagsp = P_HSTART;
 | |
| 
 | |
|     starter = patnode(P_BRANCH);
 | |
|     br = patnode(P_STAR);
 | |
|     excsync = patnode(P_EXCSYNC);
 | |
|     pattail(br, excsync);
 | |
|     pattail(starter, excl = patnode(P_EXCLUDE));
 | |
|     up.p = NULL;
 | |
|     patadd((char *)&up, 0, sizeof(up), 0);
 | |
|     if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy, 0))))
 | |
| 	return 0;
 | |
|     pattail(br, patnode(P_EXCEND));
 | |
|     n = patnode(P_NOTHING); /* just so much easier */
 | |
|     pattail(excsync, n);
 | |
|     pattail(excl, n);
 | |
| 
 | |
|     return starter;
 | |
| }
 | |
| 
 | |
| /* Emit a node */
 | |
| 
 | |
| /**/
 | |
| static long
 | |
| patnode(long op)
 | |
| {
 | |
|     long starter = (Upat)patcode - (Upat)patout;
 | |
|     union upat up;
 | |
| 
 | |
|     up.l = op;
 | |
|     patadd((char *)&up, 0, sizeof(union upat), 0);
 | |
|     return starter;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * insert an operator in front of an already emitted operand:
 | |
|  * we relocate the operand.  there had better be nothing else after.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static void
 | |
| patinsert(long op, int opnd, char *xtra, int sz)
 | |
| {
 | |
|     char *src, *dst, *opdst;
 | |
|     union upat buf, *lptr;
 | |
| 
 | |
|     buf.l = 0;
 | |
|     patadd((char *)&buf, 0, sizeof(buf), 0);
 | |
|     if (sz)
 | |
| 	patadd(xtra, 0, sz, 0);
 | |
|     src = patcode - sizeof(union upat) - sz;
 | |
|     dst = patcode;
 | |
|     opdst = patout + opnd * sizeof(union upat);
 | |
|     while (src > opdst)
 | |
| 	*--dst = *--src;
 | |
| 
 | |
|     /* A cast can't be an lvalue */
 | |
|     lptr = (Upat)opdst;
 | |
|     lptr->l = op;
 | |
|     opdst += sizeof(union upat);
 | |
|     while (sz--)
 | |
| 	*opdst++ = *xtra++;
 | |
| }
 | |
| 
 | |
| /* set the 'next' pointer at the end of a node chain */
 | |
| 
 | |
| /**/
 | |
| static void
 | |
| pattail(long p, long val)
 | |
| {
 | |
|     Upat scan, temp;
 | |
|     long offset;
 | |
| 
 | |
|     scan = (Upat)patout + p;
 | |
|     for (;;) {
 | |
| 	if (!(temp = PATNEXT(scan)))
 | |
| 	    break;
 | |
| 	scan = temp;
 | |
|     }
 | |
| 
 | |
|     offset = (P_OP(scan) == P_BACK)
 | |
| 	? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
 | |
| 
 | |
|     scan->l |= offset << 8;
 | |
| }
 | |
| 
 | |
| /* do pattail, but on operand of first argument; nop if operandless */
 | |
| 
 | |
| /**/
 | |
| static void
 | |
| patoptail(long p, long val)
 | |
| {
 | |
|     Upat ptr = (Upat)patout + p;
 | |
|     int op = P_OP(ptr);
 | |
|     if (!p || !P_ISBRANCH(ptr))
 | |
| 	return;
 | |
|     if (op == P_BRANCH)
 | |
| 	pattail(P_OPERAND(p), val);
 | |
|     else
 | |
| 	pattail(P_OPERAND(p) + 1, val);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Run a pattern.
 | |
|  */
 | |
| struct rpat {
 | |
|     char *patinstart;		/* Start of input string */
 | |
|     char *patinend;		/* End of input string */
 | |
|     char *patinput;		/* String input pointer */
 | |
|     char *patinpath;		/* Full path for use with ~ exclusions */
 | |
|     int   patinlen;		/* Length of last successful match.
 | |
| 				 * Includes count of Meta characters.
 | |
| 				 */
 | |
| 
 | |
|     char *patbeginp[NSUBEXP];	/* Pointer to backref beginnings */
 | |
|     char *patendp[NSUBEXP];	/* Pointer to backref ends */
 | |
|     int parsfound;		/* parentheses (with backrefs) found */
 | |
| 
 | |
|     int globdots;		/* Glob initial dots? */
 | |
| };
 | |
| 
 | |
| static struct rpat pattrystate;
 | |
| 
 | |
| #define patinstart	(pattrystate.patinstart)
 | |
| #define patinend	(pattrystate.patinend)
 | |
| #define patinput	(pattrystate.patinput)
 | |
| #define patinpath	(pattrystate.patinpath)
 | |
| #define patinlen	(pattrystate.patinlen)
 | |
| #define patbeginp	(pattrystate.patbeginp)
 | |
| #define patendp		(pattrystate.patendp)
 | |
| #define parsfound	(pattrystate.parsfound)
 | |
| #define globdots	(pattrystate.globdots)
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Character functions operating on unmetafied strings.
 | |
|  */
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| 
 | |
| /* Get a character from the start point in a string */
 | |
| #define CHARREF(x, y)	charref((x), (y), (int *)NULL)
 | |
| static wchar_t
 | |
| charref(char *x, char *y, int *zmb_ind)
 | |
| {
 | |
|     wchar_t wc;
 | |
|     size_t ret;
 | |
| 
 | |
|     if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
 | |
| 	return (wchar_t) STOUC(*x);
 | |
| 
 | |
|     ret = mbrtowc(&wc, x, y-x, &shiftstate);
 | |
| 
 | |
|     if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
 | |
| 	/* Error. */
 | |
| 	/* Reset the shift state for next time. */
 | |
| 	memset(&shiftstate, 0, sizeof(shiftstate));
 | |
| 	if (zmb_ind)
 | |
| 	    *zmb_ind = (ret == MB_INVALID) ? ZMB_INVALID : ZMB_INCOMPLETE;
 | |
| 	return WCHAR_INVALID(*x);
 | |
|     }
 | |
| 
 | |
|     if (zmb_ind)
 | |
| 	*zmb_ind = ZMB_VALID;
 | |
|     return wc;
 | |
| }
 | |
| 
 | |
| /* Get  a pointer to the next character */
 | |
| #define CHARNEXT(x, y)	charnext((x), (y))
 | |
| static char *
 | |
| charnext(char *x, char *y)
 | |
| {
 | |
|     wchar_t wc;
 | |
|     size_t ret;
 | |
| 
 | |
|     if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
 | |
| 	return x + 1;
 | |
| 
 | |
|     ret = mbrtowc(&wc, x, y-x, &shiftstate);
 | |
| 
 | |
|     if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
 | |
| 	/* Error.  Treat as single byte. */
 | |
| 	/* Reset the shift state for next time. */
 | |
| 	memset(&shiftstate, 0, sizeof(shiftstate));
 | |
| 	return x + 1;
 | |
|     }
 | |
| 
 | |
|     /* Nulls here are normal characters */
 | |
|     return x + (ret ? ret : 1);
 | |
| }
 | |
| 
 | |
| /* Increment a pointer past the current character. */
 | |
| #define CHARINC(x, y)	((x) = charnext((x), (y)))
 | |
| 
 | |
| 
 | |
| /* Get a character and increment */
 | |
| #define CHARREFINC(x, y, z)	charrefinc(&(x), (y), (z))
 | |
| static wchar_t
 | |
| charrefinc(char **x, char *y, int *z)
 | |
| {
 | |
|     wchar_t wc;
 | |
|     size_t ret;
 | |
| 
 | |
|     if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
 | |
| 	return (wchar_t) STOUC(*(*x)++);
 | |
| 
 | |
|     ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
 | |
| 
 | |
|     if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
 | |
| 	/* Error.  Treat as single byte, but flag. */
 | |
| 	*z = 1;
 | |
| 	/* Reset the shift state for next time. */
 | |
| 	memset(&shiftstate, 0, sizeof(shiftstate));
 | |
| 	return WCHAR_INVALID(*(*x)++);
 | |
|     }
 | |
| 
 | |
|     /* Nulls here are normal characters */
 | |
|     *x += ret ? ret : 1;
 | |
| 
 | |
|     return wc;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Counter the number of characters between two pointers, smaller first
 | |
|  *
 | |
|  * This is used when setting values in parameters, so we obey
 | |
|  * the MULTIBYTE option (even if it's been overridden locally).
 | |
|  */
 | |
| #define CHARSUB(x,y)	charsub(x, y)
 | |
| static ptrdiff_t
 | |
| charsub(char *x, char *y)
 | |
| {
 | |
|     ptrdiff_t res = 0;
 | |
|     size_t ret;
 | |
|     wchar_t wc;
 | |
| 
 | |
|     if (!isset(MULTIBYTE))
 | |
| 	return y - x;
 | |
| 
 | |
|     while (x < y) {
 | |
| 	ret = mbrtowc(&wc, x, y-x, &shiftstate);
 | |
| 
 | |
| 	if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
 | |
| 	    /* Error.  Treat remainder as single characters */
 | |
| 	    return res + (y - x);
 | |
| 	}
 | |
| 
 | |
| 	/* Treat nulls as normal characters */
 | |
| 	if (!ret)
 | |
| 	    ret = 1;
 | |
| 	res++;
 | |
| 	x += ret;
 | |
|     }
 | |
| 
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| #else /* no MULTIBYTE_SUPPORT */
 | |
| 
 | |
| /* Get a character from the start point in a string */
 | |
| #define CHARREF(x, y)	(STOUC(*(x)))
 | |
| /* Get  a pointer to the next character */
 | |
| #define CHARNEXT(x, y)	((x)+1)
 | |
| /* Increment a pointer past the current character. */
 | |
| #define CHARINC(x, y)	((x)++)
 | |
| /* Get a character and increment */
 | |
| #define CHARREFINC(x, y, z)	(STOUC(*(x)++))
 | |
| /* Counter the number of characters between two pointers, smaller first */
 | |
| #define CHARSUB(x,y)	((y) - (x))
 | |
| 
 | |
| #endif /* MULTIBYTE_SUPPORT */
 | |
| 
 | |
| /*
 | |
|  * The following need to be accessed in the globbing scanner for
 | |
|  * a multi-component file path.  See horror story in glob.c.
 | |
|  */
 | |
| /**/
 | |
| int errsfound;				/* Total error count so far */
 | |
| 
 | |
| /**/
 | |
| int forceerrs;				/* Forced maximum error count */
 | |
| 
 | |
| /*
 | |
|  * exactpos is used to remember how far down an exact string we have
 | |
|  * matched, if we are doing approximation and can therefore redo from
 | |
|  * the same point; we never need to otherwise.
 | |
|  *
 | |
|  * exactend is a pointer to the end of the string, which isn't
 | |
|  * null-terminated.
 | |
|  */
 | |
| static char *exactpos, *exactend;
 | |
| 
 | |
| /**/
 | |
| void
 | |
| pattrystart(void)
 | |
| {
 | |
|     forceerrs = -1;
 | |
|     errsfound = 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Fix up string length stuff.
 | |
|  *
 | |
|  * If we call patallocstr() with "force" to set things up early, it's
 | |
|  * done there, else it's done in pattryrefs().  The reason for the
 | |
|  * difference is in the latter case we may not be relying on
 | |
|  * patallocstr() having an effect.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static void
 | |
| patmungestring(char **string, int *stringlen, int *unmetalenin)
 | |
| {
 | |
|     /*
 | |
|      * Special signalling of empty tokenised string.
 | |
|      */
 | |
|     if (*stringlen > 0 && **string == Nularg) {
 | |
| 	(*string)++;
 | |
| 	/*
 | |
| 	 * If we don't have an unmetafied length
 | |
| 	 * and need it (we may not) we'll get it later.
 | |
| 	 */
 | |
| 	if (*unmetalenin > 0)
 | |
| 	    (*unmetalenin)--;
 | |
| 	if (*stringlen > 0)
 | |
| 	    (*stringlen)--;
 | |
|     }
 | |
| 
 | |
|     /* Ensure we have a metafied length */
 | |
|     if (*stringlen < 0)
 | |
| 	*stringlen = strlen(*string);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Allocate memory for pattern match.  Note this is specific to use
 | |
|  * of pattern *and* trial string.
 | |
|  *
 | |
|  * Unmetafy a trial string for use in pattern matching, if needed.
 | |
|  *
 | |
|  * If it is needed, returns a heap allocated string; if not needed,
 | |
|  * returns NULL.
 | |
|  *
 | |
|  * prog is the pattern to be executed.
 | |
|  * string is the metafied trial string.
 | |
|  * stringlen is it's length; it will be calculated if it's negative
 | |
|  *   (this is a simple strlen()).
 | |
|  * unmetalen is the unmetafied length of the string, may be -1.
 | |
|  * force is 1 if we always unmetafy: this is useful if we are going
 | |
|  *   to try again with different versions of the string.  If this is
 | |
|  *   called from pattryrefs() we don't force unmetafication as it won't
 | |
|  *   be optimal.  This option should be used if the resulting
 | |
|  *   patstralloc is going to be passed to pattrylen() / pattryrefs().
 | |
|  * In patstralloc (supplied by caller, must last until last pattry is done)
 | |
|  *  unmetalen is the unmetafied length of the string; it will be
 | |
|  *    calculated if the input value is negative.
 | |
|  *  unmetalenp is the umetafied length of a path segment preceding
 | |
|  *    the trial string needed for file mananagement; it is calculated as
 | |
|  *    needed so does not need to be initialised.
 | |
|  *  alloced is the memory allocated on the heap --- same as return value from
 | |
|  *    function.
 | |
|  */
 | |
| /**/
 | |
| mod_export
 | |
| char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
 | |
| 		  int force, Patstralloc patstralloc)
 | |
| {
 | |
|     int needfullpath;
 | |
| 
 | |
|     if (force)
 | |
| 	patmungestring(&string, &stringlen, &unmetalen);
 | |
| 
 | |
|     /*
 | |
|      * For a top-level ~-exclusion, we will need the full
 | |
|      * path to exclude, so copy the path so far and append the
 | |
|      * current test string.
 | |
|      */
 | |
|     needfullpath = (prog->flags & PAT_HAS_EXCLUDP) && pathpos;
 | |
| 
 | |
|     /* Get the length of the full string when unmetafied. */
 | |
|     if (unmetalen < 0)
 | |
| 	patstralloc->unmetalen = ztrsub(string + stringlen, string);
 | |
|     else
 | |
| 	patstralloc->unmetalen = unmetalen;
 | |
|     if (needfullpath) {
 | |
| 	patstralloc->unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
 | |
| 	if (!patstralloc->unmetalenp)
 | |
| 	    needfullpath = 0;
 | |
|     } else
 | |
| 	patstralloc->unmetalenp = 0;
 | |
|     /* Initialise cache area */
 | |
|     patstralloc->progstrunmeta = NULL;
 | |
|     patstralloc->progstrunmetalen = 0;
 | |
| 
 | |
|     DPUTS(needfullpath && (prog->flags & (PAT_PURES|PAT_ANY)),
 | |
| 	  "rum sort of file exclusion");
 | |
|     /*
 | |
|      * Partly for efficiency, and partly for the convenience of
 | |
|      * globbing, we don't unmetafy pure string patterns, and
 | |
|      * there's no reason to if the pattern is just a *.
 | |
|      */
 | |
|     if (force ||
 | |
| 	(!(prog->flags & (PAT_PURES|PAT_ANY))
 | |
| 	 && (needfullpath || patstralloc->unmetalen != stringlen))) {
 | |
| 	/*
 | |
| 	 * We need to copy if we need to prepend the path so far
 | |
| 	 * (in which case we copy both chunks), or if we have
 | |
| 	 * Meta characters.
 | |
| 	 */
 | |
| 	char *dst, *ptr;
 | |
| 	int i, icopy, ncopy;
 | |
| 
 | |
| 	dst = patstralloc->alloced =
 | |
| 	    zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
 | |
| 
 | |
| 	if (needfullpath) {
 | |
| 	    /* loop twice, copy path buffer first time */
 | |
| 	    ptr = pathbuf;
 | |
| 	    ncopy = patstralloc->unmetalenp;
 | |
| 	} else {
 | |
| 	    /* just loop once, copy string with unmetafication */
 | |
| 	    ptr = string;
 | |
| 	    ncopy = patstralloc->unmetalen;
 | |
| 	}
 | |
| 	for (icopy = 0; icopy < 2; icopy++) {
 | |
| 	    for (i = 0; i < ncopy; i++) {
 | |
| 		if (*ptr == Meta) {
 | |
| 		    ptr++;
 | |
| 		    *dst++ = *ptr++ ^ 32;
 | |
| 		} else {
 | |
| 		    *dst++ = *ptr++;
 | |
| 		}
 | |
| 	    }
 | |
| 	    if (!needfullpath)
 | |
| 		break;
 | |
| 	    /* next time append test string to path so far */
 | |
| 	    ptr = string;
 | |
| 	    ncopy = patstralloc->unmetalen;
 | |
| 	}
 | |
|     }
 | |
|     else
 | |
|     {
 | |
| 	patstralloc->alloced = NULL;
 | |
|     }
 | |
| 
 | |
|     return patstralloc->alloced;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * Test prog against null-terminated, metafied string.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| pattry(Patprog prog, char *string)
 | |
| {
 | |
|     return pattryrefs(prog, string, -1, -1, NULL, 0, NULL, NULL, NULL);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Test prog against string of given length, no null termination
 | |
|  * but still metafied at this point.  offset gives an offset
 | |
|  * to include in reported match indices
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| pattrylen(Patprog prog, char *string, int len, int unmetalen,
 | |
| 	  Patstralloc patstralloc, int offset)
 | |
| {
 | |
|     return pattryrefs(prog, string, len, unmetalen, patstralloc, offset,
 | |
| 		      NULL, NULL, NULL);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Test prog against string with given lengths.  The input
 | |
|  * string is metafied; stringlen is the raw string length, and
 | |
|  * unmetalen the number of characters in the original string (some
 | |
|  * of which may now be metafied).  Either value may be -1
 | |
|  * to indicate a null-terminated string which will be counted.  Note
 | |
|  * there may be a severe penalty for this if a lot of matching is done
 | |
|  * on one string.
 | |
|  *
 | |
|  * If patstralloc is not NULL it is used to optimise unmetafication
 | |
|  * of a trial string that may be passed (or any substring may be passed) to
 | |
|  * pattryrefs multiple times or the same pattern (N.B. so patstralloc
 | |
|  * depends on both prog *and* the trial string).  This should only be
 | |
|  * done if there is no path prefix (pathpos == 0) as otherwise the path
 | |
|  * buffer and unmetafied string may not match.  To do this,
 | |
|  * patallocstr() is called (use force = 1 to ensure it is always
 | |
|  * unmetafied); paststralloc points to existing storage. Memory is
 | |
|  * on the heap.
 | |
|  *
 | |
|  * patstralloc->alloced and patstralloc->unmetalen contain the
 | |
|  * unmetafied string and its length.  In that case, the rules for the
 | |
|  * earlier arguments change:
 | |
|  * - string is an unmetafied string
 | |
|  * - stringlen is its unmetafied (i.e. actual) length
 | |
|  * - unmetalenin is not used.
 | |
|  * string and stringlen may refer to arbitrary substrings of
 | |
|  * patstralloc->alloced without any internal modification to patstralloc.
 | |
|  *
 | |
|  * patoffset is the position in the original string (not seen by
 | |
|  * the pattern module) at which we are trying to match.
 | |
|  * This is added in to the positions recorded in patbeginp and patendp
 | |
|  * when we are looking for substrings.  Currently this only happens
 | |
|  * in the parameter substitution code.  It refers to a real character
 | |
|  * offset, i.e. is already in the form ready for presentation to the
 | |
|  * general public --- this is necessary as we don't have the
 | |
|  * information to convert it down here.
 | |
|  *
 | |
|  * Note this is a character offset, i.e. a single possibly metafied and
 | |
|  * possibly multibyte character counts as 1.
 | |
|  *
 | |
|  * The last three arguments are used to report the positions for the
 | |
|  * backreferences. On entry, *nump should contain the maximum number
 | |
|  * of positions to report.  In this case the match, mbegin, mend
 | |
|  * arrays are not altered.
 | |
|  *
 | |
|  * If nump is NULL but endp is not NULL, then *endp is set to the
 | |
|  * end position of the match, taking into account patinstart.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 | |
| 	   Patstralloc patstralloc, int patoffset,
 | |
| 	   int *nump, int *begp, int *endp)
 | |
| {
 | |
|     int i, maxnpos = 0, ret;
 | |
|     int origlen;
 | |
|     char **sp, **ep, *ptr;
 | |
|     char *progstr = (char *)prog + prog->startoff;
 | |
|     struct patstralloc patstralloc_struct;
 | |
| 
 | |
|     if (nump) {
 | |
| 	maxnpos = *nump;
 | |
| 	*nump = 0;
 | |
|     }
 | |
| 
 | |
|     if (!patstralloc)
 | |
| 	patmungestring(&string, &stringlen, &unmetalenin);
 | |
|     origlen = stringlen;
 | |
| 
 | |
|     if (patstralloc) {
 | |
| 	DPUTS(!patstralloc->alloced,
 | |
| 	      "External unmetafy didn't actually unmetafy.");
 | |
| 	DPUTS(patstralloc->unmetalenp,
 | |
| 	      "Ooh-err: pathpos with external unmetafy. I have bad vibes.");
 | |
| 	patinpath = NULL;
 | |
| 	patinstart = string;
 | |
| 	/* stringlen is unmetafied length; unmetalenin is ignored */
 | |
|     } else {
 | |
| 	patstralloc = &patstralloc_struct;
 | |
| 	if (patallocstr(prog, string, stringlen, unmetalenin, 0, patstralloc)) {
 | |
| 	    patinstart = patstralloc->alloced + patstralloc->unmetalenp;
 | |
| 	    stringlen = patstralloc->unmetalen;
 | |
| 	} else
 | |
| 	    patinstart = string;
 | |
| 	if (patstralloc->unmetalenp)
 | |
| 	    patinpath = patstralloc->alloced;
 | |
| 	else
 | |
| 	    patinpath = NULL;
 | |
|     }
 | |
| 
 | |
|     patflags = prog->flags;
 | |
|     patinend = patinstart + stringlen;
 | |
|     /*
 | |
|      * From now on we do not require NULL termination of
 | |
|      * the test string.  There should also be no more references
 | |
|      * to the variable string.
 | |
|      */
 | |
| 
 | |
|     if (prog->flags & (PAT_PURES|PAT_ANY)) {
 | |
| 	/*
 | |
| 	 * Either we are testing against a pure string,
 | |
| 	 * or we can match anything at all.
 | |
| 	 */
 | |
| 	int pstrlen;
 | |
| 	char *pstr;
 | |
| 	if (patstralloc->alloced)
 | |
| 	{
 | |
| 	    /*
 | |
| 	     * Unmetafied; we need pattern string that's also unmetafied.
 | |
| 	     * We'll cache it in the patstralloc structure.
 | |
| 	     * Note it's on the heap.
 | |
| 	     */
 | |
| 	    if (!patstralloc->progstrunmeta)
 | |
| 	    {
 | |
| 		patstralloc->progstrunmeta =
 | |
| 		    dupstrpfx(progstr, (int)prog->patmlen);
 | |
| 		unmetafy(patstralloc->progstrunmeta,
 | |
| 			 &patstralloc->progstrunmetalen);
 | |
| 	    }
 | |
| 	    pstr = patstralloc->progstrunmeta;
 | |
| 	    pstrlen = patstralloc->progstrunmetalen;
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 	    /* Metafied. */
 | |
| 	    pstr = progstr;
 | |
| 	    pstrlen = (int)prog->patmlen;
 | |
| 	}
 | |
| 	if (prog->flags & PAT_ANY) {
 | |
| 	    /*
 | |
| 	     * Optimisation for a single "*": always matches
 | |
| 	     * (except for no_glob_dots, see below).
 | |
| 	     */
 | |
| 	    ret = 1;
 | |
| 	} else {
 | |
| 	    /*
 | |
| 	     * Testing a pure string.  See if initial
 | |
| 	     * components match.
 | |
| 	     */
 | |
| 	    int lendiff = stringlen - pstrlen;
 | |
| 	    if (lendiff < 0) {
 | |
| 		/* No, the pattern string is too long. */
 | |
| 		ret = 0;
 | |
| 	    } else if (!memcmp(pstr, patinstart, pstrlen)) {
 | |
| 		/*
 | |
| 		 * Initial component matches.  Matches either
 | |
| 		 * if lengths are the same or we are not anchored
 | |
| 		 * to the end of the string.
 | |
| 		 */
 | |
| 		ret = !lendiff || (prog->flags & PAT_NOANCH);
 | |
| 	    } else {
 | |
| 		/* No match. */
 | |
| 		ret = 0;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (ret) {
 | |
| 	    /*
 | |
| 	     * For files, we won't match initial "."s unless
 | |
| 	     * glob_dots is set.
 | |
| 	     */
 | |
| 	    if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
 | |
| 		ret = 0;
 | |
| 	    } else {
 | |
| 		/*
 | |
| 		 * Remember the length in case used for ${..#..} etc.
 | |
| 		 * In this case, we didn't unmetafy the pattern string
 | |
| 		 * in the original structure, but it might be unmetafied
 | |
| 		 * for use with an unmetafied test string.
 | |
| 		 */
 | |
| 		patinlen = pstrlen;
 | |
| 		/* if matching files, must update globbing flags */
 | |
| 		patglobflags = prog->globend;
 | |
| 
 | |
| 		if ((patglobflags & GF_MATCHREF) &&
 | |
| 		    !(patflags & PAT_FILE)) {
 | |
| 		    char *str;
 | |
| 		    int mlen;
 | |
| 
 | |
| 		    if (patstralloc->alloced) {
 | |
| 			/*
 | |
| 			 * Unmetafied: pstrlen contains unmetafied
 | |
| 			 * length in bytes.
 | |
| 			 */
 | |
| 			str = metafy(patinstart, pstrlen, META_DUP);
 | |
| 			mlen = CHARSUB(patinstart, patinstart + pstrlen);
 | |
| 		    } else {
 | |
| 			str = ztrduppfx(patinstart, patinlen);
 | |
| 			/*
 | |
| 			 * Count the characters.  We're not using CHARSUB()
 | |
| 			 * because the string is still metafied.
 | |
| 			 */
 | |
| 			MB_METACHARINIT();
 | |
| 			mlen = MB_METASTRLEN2END(patinstart, 0,
 | |
| 						 patinstart + patinlen);
 | |
| 		    }
 | |
| 
 | |
| 		    setsparam("MATCH", str);
 | |
| 		    setiparam("MBEGIN",
 | |
| 			      (zlong)(patoffset + !isset(KSHARRAYS)));
 | |
| 		    setiparam("MEND",
 | |
| 			      (zlong)(mlen + patoffset +
 | |
| 				      !isset(KSHARRAYS) - 1));
 | |
| 		}
 | |
| 	    }
 | |
| 	}
 | |
|     } else {
 | |
| 	/*
 | |
| 	 * Test for a `must match' string, unless we're scanning for a match
 | |
| 	 * in which case we don't need to do this each time.
 | |
| 	 */
 | |
| 	ret = 1;
 | |
| 	if (!(prog->flags & PAT_SCAN) && prog->mustoff)
 | |
| 	{
 | |
| 	    char *testptr;	/* start pointer into test string */
 | |
| 	    char *teststop;	/* last point from which we can match */
 | |
| 	    char *patptr = (char *)prog + prog->mustoff;
 | |
| 	    int patlen = prog->patmlen;
 | |
| 	    int found = 0;
 | |
| 
 | |
| 	    if (patlen > stringlen) {
 | |
| 		/* Too long, can't match. */
 | |
| 		ret = 0;
 | |
| 	    } else {
 | |
| 		teststop = patinend - patlen;
 | |
| 
 | |
| 		for (testptr = patinstart; testptr <= teststop; testptr++)
 | |
| 		{
 | |
| 		    if (!memcmp(testptr, patptr, patlen)) {
 | |
| 			found = 1;
 | |
| 			break;
 | |
| 		    }
 | |
| 		}
 | |
| 
 | |
| 		if (!found)
 | |
| 		    ret = 0;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (!ret)
 | |
| 	    return 0;
 | |
| 
 | |
| 	patglobflags = prog->globflags;
 | |
| 	if (!(patflags & PAT_FILE)) {
 | |
| 	    forceerrs = -1;
 | |
| 	    errsfound = 0;
 | |
| 	}
 | |
| 	globdots = !(patflags & PAT_NOGLD);
 | |
| 	parsfound = 0;
 | |
| 
 | |
| 	patinput = patinstart;
 | |
| 
 | |
| 	exactpos = exactend = NULL;
 | |
| 	/* The only external call to patmatch --- all others are recursive */
 | |
| 	if (patmatch((Upat)progstr)) {
 | |
| 	    /*
 | |
| 	     * we were lazy and didn't save the globflags if an exclusion
 | |
| 	     * failed, so set it now
 | |
| 	     */
 | |
| 	    patglobflags = prog->globend;
 | |
| 
 | |
| 	    /*
 | |
| 	     * Record length of successful match, including Meta
 | |
| 	     * characters.  Do it here so that patmatchlen() can return
 | |
| 	     * it even if we delete the pattern strings.
 | |
| 	     */
 | |
| 	    patinlen = patinput - patinstart;
 | |
| 	    /*
 | |
| 	     * Optimization: if we didn't find any Meta characters
 | |
| 	     * to begin with, we don't need to look for them now.
 | |
| 	     *
 | |
| 	     * For patstralloc passed in, we want the unmetafied length.
 | |
| 	     */
 | |
| 	    if (patstralloc == &patstralloc_struct &&
 | |
| 		patstralloc->unmetalen != origlen) {
 | |
| 		for (ptr = patinstart; ptr < patinput; ptr++)
 | |
| 		    if (imeta(*ptr))
 | |
| 			patinlen++;
 | |
| 	    }
 | |
| 
 | |
| 	    /*
 | |
| 	     * Should we clear backreferences and matches on a failed
 | |
| 	     * match?
 | |
| 	     */
 | |
| 	    if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
 | |
| 		/*
 | |
| 		 * m flag: for global match.  This carries no overhead
 | |
| 		 * in the pattern matching part.
 | |
| 		 *
 | |
| 		 * Remember the test pattern is already unmetafied.
 | |
| 		 */
 | |
| 		char *str;
 | |
| 		int mlen = CHARSUB(patinstart, patinput);
 | |
| 
 | |
| 		str = metafy(patinstart, patinput - patinstart, META_DUP);
 | |
| 		setsparam("MATCH", str);
 | |
| 		setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
 | |
| 		setiparam("MEND",
 | |
| 			  (zlong)(mlen + patoffset +
 | |
| 				  !isset(KSHARRAYS) - 1));
 | |
| 	    }
 | |
| 	    if (prog->patnpar && nump) {
 | |
| 		/*
 | |
| 		 * b flag: for backreferences using parentheses. Reported
 | |
| 		 * directly.
 | |
| 		 */
 | |
| 		*nump = prog->patnpar;
 | |
| 
 | |
| 		sp = patbeginp;
 | |
| 		ep = patendp;
 | |
| 
 | |
| 		for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
 | |
| 		    if (parsfound & (1 << i)) {
 | |
| 			if (begp)
 | |
| 			    *begp++ = CHARSUB(patinstart, *sp) + patoffset;
 | |
| 			if (endp)
 | |
| 			    *endp++ = CHARSUB(patinstart, *ep) + patoffset
 | |
| 				- 1;
 | |
| 		    } else {
 | |
| 			if (begp)
 | |
| 			    *begp++ = -1;
 | |
| 			if (endp)
 | |
| 			    *endp++ = -1;
 | |
| 		    }
 | |
| 
 | |
| 		    sp++;
 | |
| 		    ep++;
 | |
| 		}
 | |
| 	    } else if (prog->patnpar && !(patflags & PAT_FILE)) {
 | |
| 		/*
 | |
| 		 * b flag: for backreferences using parentheses.
 | |
| 		 */
 | |
| 		int palen = prog->patnpar+1;
 | |
| 		char **matcharr, **mbeginarr, **mendarr;
 | |
| 		char numbuf[DIGBUFSIZE];
 | |
| 
 | |
| 		matcharr = zshcalloc(palen*sizeof(char *));
 | |
| 		mbeginarr = zshcalloc(palen*sizeof(char *));
 | |
| 		mendarr = zshcalloc(palen*sizeof(char *));
 | |
| 
 | |
| 		sp = patbeginp;
 | |
| 		ep = patendp;
 | |
| 
 | |
| 		for (i = 0; i < prog->patnpar; i++) {
 | |
| 		    if (parsfound & (1 << i)) {
 | |
| 			matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
 | |
| 			/*
 | |
| 			 * mbegin and mend give indexes into the string
 | |
| 			 * in the standard notation, i.e. respecting
 | |
| 			 * KSHARRAYS, and with the end index giving
 | |
| 			 * the last character, not one beyond.
 | |
| 			 * For example, foo=foo; [[ $foo = (f)oo ]] gives
 | |
| 			 * (without KSHARRAYS) indexes 1 and 1, which
 | |
| 			 * corresponds to indexing as ${foo[1,1]}.
 | |
| 			 */
 | |
| 			sprintf(numbuf, "%ld",
 | |
| 				(long)(CHARSUB(patinstart, *sp) +
 | |
| 				       patoffset +
 | |
| 				       !isset(KSHARRAYS)));
 | |
| 			mbeginarr[i] = ztrdup(numbuf);
 | |
| 			sprintf(numbuf, "%ld",
 | |
| 				(long)(CHARSUB(patinstart, *ep) +
 | |
| 				       patoffset +
 | |
| 				       !isset(KSHARRAYS) - 1));
 | |
| 			mendarr[i] = ztrdup(numbuf);
 | |
| 		    } else {
 | |
| 			/* Pattern wasn't set: either it was in an
 | |
| 			 * unmatched branch, or a hashed parenthesis
 | |
| 			 * that didn't match at all.
 | |
| 			 */
 | |
| 			matcharr[i] = ztrdup("");
 | |
| 			mbeginarr[i] = ztrdup("-1");
 | |
| 			mendarr[i] = ztrdup("-1");
 | |
| 		    }
 | |
| 		    sp++;
 | |
| 		    ep++;
 | |
| 		}
 | |
| 		setaparam("match", matcharr);
 | |
| 		setaparam("mbegin", mbeginarr);
 | |
| 		setaparam("mend", mendarr);
 | |
| 	    }
 | |
| 
 | |
| 	    if (!nump && endp) {
 | |
| 		/*
 | |
| 		 * We just need the overall end position.
 | |
| 		 */
 | |
| 		*endp = CHARSUB(patinstart, patinput) + patoffset;
 | |
| 	    }
 | |
| 
 | |
| 	    ret = 1;
 | |
| 	} else
 | |
| 	    ret = 0;
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Return length of previous successful match.  This is
 | |
|  * in metafied bytes, i.e. includes a count of Meta characters,
 | |
|  * unless the match was done on an unmetafied string using
 | |
|  * a patstralloc struct, in which case it too is unmetafied.
 | |
|  * Unusual and futile attempt at modular encapsulation.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| int
 | |
| patmatchlen(void)
 | |
| {
 | |
|     return patinlen;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Match literal characters with case insensitivity test:  the first
 | |
|  * comes from the input string, the second the current pattern.
 | |
|  */
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| #define ISUPPER(x)	iswupper(x)
 | |
| #define ISLOWER(x)	iswlower(x)
 | |
| #define TOUPPER(x)	towupper(x)
 | |
| #define TOLOWER(x)	towlower(x)
 | |
| #define ISDIGIT(x)	iswdigit(x)
 | |
| #else
 | |
| #define ISUPPER(x)	isupper(x)
 | |
| #define ISLOWER(x)	islower(x)
 | |
| #define TOUPPER(x)	toupper(x)
 | |
| #define TOLOWER(x)	tolower(x)
 | |
| #define ISDIGIT(x)	idigit(x)
 | |
| #endif
 | |
| #define CHARMATCH(chin, chpa) (chin == chpa || \
 | |
|         ((patglobflags & GF_IGNCASE) ? \
 | |
| 	 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
 | |
| 	  (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
 | |
| 	 (patglobflags & GF_LCMATCHUC) ? \
 | |
| 	 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))
 | |
| 
 | |
| /*
 | |
|  * The same but caching an expression from the first argument,
 | |
|  * Requires local charmatch_cache definition.
 | |
|  */
 | |
| #define CHARMATCH_EXPR(expr, chpa) \
 | |
| 	(charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
 | |
| 
 | |
| /*
 | |
|  * Main matching routine.
 | |
|  *
 | |
|  * Testing the tail end of a match is usually done by recursion, but
 | |
|  * we try to eliminate that in favour of looping for simple cases.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| patmatch(Upat prog)
 | |
| {
 | |
|     /* Current and next nodes */
 | |
|     Upat scan = prog, next, opnd;
 | |
|     char *start, *save, *chrop, *chrend, *compend;
 | |
|     int savglobflags, op, no, min, fail = 0, saverrsfound;
 | |
|     zrange_t from, to, comp;
 | |
|     patint_t nextch;
 | |
|     int q = queue_signal_level();
 | |
| 
 | |
|     /*
 | |
|      * To avoid overhead of saving state if there are no queued signals
 | |
|      * waiting, we pierce the signals.h veil and examine queue state.
 | |
|      */
 | |
| #define check_for_signals() do if (queue_front != queue_rear) { \
 | |
| 	    int savpatflags = patflags, savpatglobflags = patglobflags; \
 | |
|             char *savexactpos = exactpos, *savexactend = exactend; \
 | |
| 	    struct rpat savpattrystate = pattrystate; \
 | |
| 	    dont_queue_signals(); \
 | |
| 	    restore_queue_signals(q); \
 | |
| 	    exactpos = savexactpos; \
 | |
| 	    exactend = savexactend; \
 | |
| 	    patflags = savpatflags; \
 | |
| 	    patglobflags = savpatglobflags; \
 | |
| 	    pattrystate = savpattrystate; \
 | |
| 	} while (0)
 | |
| 
 | |
|     check_for_signals();
 | |
| 
 | |
|     while  (scan && !errflag) {
 | |
| 	next = PATNEXT(scan);
 | |
| 
 | |
| 	if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
 | |
| 	    patinput < patinend && *patinput == '.')
 | |
| 	    return 0;
 | |
| 
 | |
| 	switch (P_OP(scan)) {
 | |
| 	case P_ANY:
 | |
| 	    if (patinput == patinend)
 | |
| 		fail = 1;
 | |
| 	    else
 | |
| 		CHARINC(patinput, patinend);
 | |
| 	    break;
 | |
| 	case P_EXACTLY:
 | |
| 	    /*
 | |
| 	     * acts as nothing if *chrop is null:  this is used by
 | |
| 	     * approx code.
 | |
| 	     */
 | |
| 	    if (exactpos) {
 | |
| 		chrop = exactpos;
 | |
| 		chrend = exactend;
 | |
| 	    } else {
 | |
| 		chrop = P_LS_STR(scan);
 | |
| 		chrend = chrop + P_LS_LEN(scan);
 | |
| 	    }
 | |
| 	    exactpos = NULL;
 | |
| 	    while (chrop < chrend && patinput < patinend) {
 | |
| 		char *savpatinput = patinput;
 | |
| 		char *savchrop = chrop;
 | |
| 		int badin = 0, badpa = 0;
 | |
| 		/*
 | |
| 		 * Care with character matching:
 | |
| 		 * We do need to convert the character to wide
 | |
| 		 * representation if possible, because we may need
 | |
| 		 * to do case transformation.  However, we should
 | |
| 		 * be careful in case one, but not the other, wasn't
 | |
| 		 * representable in the current locale---in that
 | |
| 		 * case they don't match even if the returned
 | |
| 		 * values (one properly converted, one raw) are
 | |
| 		 * the same.
 | |
| 		 */
 | |
| 		patint_t chin = CHARREFINC(patinput, patinend, &badin);
 | |
| 		patint_t chpa = CHARREFINC(chrop, chrend, &badpa);
 | |
| 		if (!CHARMATCH(chin, chpa) || badin != badpa) {
 | |
| 		    fail = 1;
 | |
| 		    patinput = savpatinput;
 | |
| 		    chrop = savchrop;
 | |
| 		    break;
 | |
| 		}
 | |
| 	    }
 | |
| 	    if (chrop < chrend) {
 | |
| 		exactpos = chrop;
 | |
| 		exactend = chrend;
 | |
| 		fail = 1;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case P_ANYOF:
 | |
| 	case P_ANYBUT:
 | |
| 	    if (patinput == patinend)
 | |
| 		fail = 1;
 | |
| 	    else {
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| 		int zmb_ind;
 | |
| 		wchar_t cr = charref(patinput, patinend, &zmb_ind);
 | |
| 		char *scanop = (char *)P_OPERAND(scan);
 | |
| 		if (patglobflags & GF_MULTIBYTE) {
 | |
| 		    if (mb_patmatchrange(scanop, cr, zmb_ind, NULL, NULL) ^
 | |
| 			(P_OP(scan) == P_ANYOF))
 | |
| 			fail = 1;
 | |
| 		    else
 | |
| 			CHARINC(patinput, patinend);
 | |
| 		} else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
 | |
| 			   (P_OP(scan) == P_ANYOF))
 | |
| 		    fail = 1;
 | |
| 		else
 | |
| 		    CHARINC(patinput, patinend);
 | |
| #else
 | |
| 		if (patmatchrange((char *)P_OPERAND(scan),
 | |
| 				  CHARREF(patinput, patinend), NULL, NULL) ^
 | |
| 		    (P_OP(scan) == P_ANYOF))
 | |
| 		    fail = 1;
 | |
| 		else
 | |
| 		    CHARINC(patinput, patinend);
 | |
| #endif
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case P_NUMRNG:
 | |
| 	case P_NUMFROM:
 | |
| 	case P_NUMTO:
 | |
| 	    /*
 | |
| 	     * To do this properly, we really have to treat numbers as
 | |
| 	     * closures:  that's so things like <1-1000>33 will
 | |
| 	     * match 633 (they didn't up to 3.1.6).  To avoid making this
 | |
| 	     * too inefficient, we see if there's an exact match next:
 | |
| 	     * if there is, and it's not a digit, we return 1 after
 | |
| 	     * the first attempt.
 | |
| 	     */
 | |
| 	    op = P_OP(scan);
 | |
| 	    start = (char *)P_OPERAND(scan);
 | |
| 	    from = to = 0;
 | |
| 	    if (op != P_NUMTO) {
 | |
| #ifdef ZSH_64_BIT_TYPE
 | |
| 		/* We can't rely on pointer alignment being good enough. */
 | |
| 		memcpy((char *)&from, start, sizeof(zrange_t));
 | |
| #else
 | |
| 		from = *((zrange_t *) start);
 | |
| #endif
 | |
| 		start += sizeof(zrange_t);
 | |
| 	    }
 | |
| 	    if (op != P_NUMFROM) {
 | |
| #ifdef ZSH_64_BIT_TYPE
 | |
| 		memcpy((char *)&to, start, sizeof(zrange_t));
 | |
| #else
 | |
| 		to = *((zrange_t *) start);
 | |
| #endif
 | |
| 	    }
 | |
| 	    start = compend = patinput;
 | |
| 	    comp = 0;
 | |
| 	    while (patinput < patinend && idigit(*patinput)) {
 | |
| 		int out_of_range = 0;
 | |
| 		int digit = *patinput - '0';
 | |
| 		if (comp > ZRANGE_MAX / (zlong)10) {
 | |
| 		    out_of_range = 1;
 | |
| 		} else {
 | |
| 		    zrange_t c10 = comp ? comp * 10 : 0;
 | |
| 		    if (ZRANGE_MAX - c10 < digit) {
 | |
| 			out_of_range = 1;
 | |
| 		    } else {
 | |
| 			comp = c10;
 | |
| 			comp += digit;
 | |
| 		    }
 | |
| 		}
 | |
| 		patinput++;
 | |
| 		compend++;
 | |
| 
 | |
| 		if (out_of_range ||
 | |
| 		    (comp & ((zrange_t)1 << (sizeof(comp)*8 -
 | |
| #ifdef ZRANGE_T_IS_SIGNED
 | |
| 					    2
 | |
| #else
 | |
| 					    1
 | |
| #endif
 | |
| 				)))) {
 | |
| 		    /*
 | |
| 		     * Out of range (allowing for signedness, which
 | |
| 		     * we need if we are using zlongs).
 | |
| 		     * This is as far as we can go.
 | |
| 		     * If we're doing a range "from", skip all the
 | |
| 		     * remaining numbers.  Otherwise, we can't
 | |
| 		     * match beyond the previous point anyway.
 | |
| 		     * Leave the pointer to the last calculated
 | |
| 		     * position (compend) where it was before.
 | |
| 		     */
 | |
| 		    if (op == P_NUMFROM) {
 | |
| 			while (patinput < patinend && idigit(*patinput))
 | |
| 			    patinput++;
 | |
| 		    }
 | |
| 		}
 | |
| 	    }
 | |
| 	    save = patinput;
 | |
| 	    no = 0;
 | |
| 	    while (patinput > start) {
 | |
| 		/* if already too small, no power on earth can save it */
 | |
| 		if (comp < from && patinput <= compend)
 | |
| 		    break;
 | |
| 		if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		if (!no && P_OP(next) == P_EXACTLY &&
 | |
| 		    (!P_LS_LEN(next) ||
 | |
| 		     !idigit(STOUC(*P_LS_STR(next)))) &&
 | |
| 		    !(patglobflags & 0xff))
 | |
| 		    return 0;
 | |
| 		patinput = --save;
 | |
| 		no++;
 | |
| 		/*
 | |
| 		 * With a range start and an unrepresentable test
 | |
| 		 * number, we just back down the test string without
 | |
| 		 * changing the number until we get to a representable
 | |
| 		 * one.
 | |
| 		 */
 | |
| 		if (patinput < compend)
 | |
| 		    comp /= 10;
 | |
| 	    }
 | |
| 	    patinput = start;
 | |
| 	    fail = 1;
 | |
| 	    break;
 | |
| 	case P_NUMANY:
 | |
| 	    /* This is <->: any old set of digits, don't bother comparing */
 | |
| 	    start = patinput;
 | |
| 	    while (patinput < patinend && idigit(*patinput))
 | |
| 		patinput++;
 | |
| 	    save = patinput;
 | |
| 	    no = 0;
 | |
| 	    while (patinput > start) {
 | |
| 		if (patmatch(next))
 | |
| 		    return 1;
 | |
| 		if (!no && P_OP(next) == P_EXACTLY &&
 | |
| 		    (!P_LS_LEN(next) ||
 | |
| 		     !idigit(*P_LS_STR(next))) &&
 | |
| 		    !(patglobflags & 0xff))
 | |
| 		    return 0;
 | |
| 		patinput = --save;
 | |
| 		no++;
 | |
| 	    }
 | |
| 	    patinput = start;
 | |
| 	    fail = 1;
 | |
| 	    break;
 | |
| 	case P_NOTHING:
 | |
| 	    break;
 | |
| 	case P_BACK:
 | |
| 	    break;
 | |
| 	case P_GFLAGS:
 | |
| 	    patglobflags = P_OPERAND(scan)->l;
 | |
| 	    break;
 | |
| 	case P_OPEN:
 | |
| 	case P_OPEN+1:
 | |
| 	case P_OPEN+2:
 | |
| 	case P_OPEN+3:
 | |
| 	case P_OPEN+4:
 | |
| 	case P_OPEN+5:
 | |
| 	case P_OPEN+6:
 | |
| 	case P_OPEN+7:
 | |
| 	case P_OPEN+8:
 | |
| 	case P_OPEN+9:
 | |
| 	    no = P_OP(scan) - P_OPEN;
 | |
| 	    save = patinput;
 | |
| 
 | |
| 	    if (patmatch(next)) {
 | |
| 		/*
 | |
| 		 * Don't set patbeginp if some later invocation of
 | |
| 		 * the same parentheses already has.
 | |
| 		 */
 | |
| 		if (no && !(parsfound & (1 << (no - 1)))) {
 | |
| 		    patbeginp[no-1] = save;
 | |
| 		    parsfound |= 1 << (no - 1);
 | |
| 		}
 | |
| 		return 1;
 | |
| 	    } else
 | |
| 		return 0;
 | |
| 	    break;
 | |
| 	case P_CLOSE:
 | |
| 	case P_CLOSE+1:
 | |
| 	case P_CLOSE+2:
 | |
| 	case P_CLOSE+3:
 | |
| 	case P_CLOSE+4:
 | |
| 	case P_CLOSE+5:
 | |
| 	case P_CLOSE+6:
 | |
| 	case P_CLOSE+7:
 | |
| 	case P_CLOSE+8:
 | |
| 	case P_CLOSE+9:
 | |
| 	    no = P_OP(scan) - P_CLOSE;
 | |
| 	    save = patinput;
 | |
| 
 | |
| 	    if (patmatch(next)) {
 | |
| 		if (no && !(parsfound & (1 << (no + 15)))) {
 | |
| 		    patendp[no-1] = save;
 | |
| 		    parsfound |= 1 << (no + 15);
 | |
| 		}
 | |
| 		return 1;
 | |
| 	    } else
 | |
| 		return 0;
 | |
| 	    break;
 | |
| 	case P_EXCSYNC:
 | |
| 	    /* See the P_EXCLUDE code below for where syncptr comes from */
 | |
| 	    {
 | |
| 		unsigned char *syncptr;
 | |
| 		Upat after;
 | |
| 		after = P_OPERAND(scan);
 | |
| 		DPUTS(!P_ISEXCLUDE(after),
 | |
| 		      "BUG: EXCSYNC not followed by EXCLUDE.");
 | |
| 		DPUTS(!P_OPERAND(after)->p,
 | |
| 		      "BUG: EXCSYNC not handled by EXCLUDE");
 | |
| 		syncptr = P_OPERAND(after)->p + (patinput - patinstart);
 | |
| 		/*
 | |
| 		 * If we already matched from here, this time we fail.
 | |
| 		 * See WBRANCH code for story about error count.
 | |
| 		 */
 | |
| 		if (*syncptr && errsfound + 1 >= *syncptr)
 | |
| 		    return 0;
 | |
| 		/*
 | |
| 		 * Else record that we (possibly) matched this time.
 | |
| 		 * No harm if we don't:  then the previous test will just
 | |
| 		 * short cut the attempted match that is bound to fail.
 | |
| 		 * We never try to exclude something that has already
 | |
| 		 * failed anyway.
 | |
| 		 */
 | |
| 		*syncptr = errsfound + 1;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case P_EXCEND:
 | |
| 	    /*
 | |
| 	     * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
 | |
| 	     * branch.  Actually, we don't bother following it:  all we
 | |
| 	     * need to know is that we successfully matched so far up
 | |
| 	     * to the end of the asserted pattern; the endpoint
 | |
| 	     * in the target string is nulled out.
 | |
| 	     */
 | |
| 	    if (!(fail = (patinput < patinend)))
 | |
| 		return 1;
 | |
| 	    break;
 | |
| 	case P_BRANCH:
 | |
| 	case P_WBRANCH:
 | |
| 	    /* P_EXCLUDE shouldn't occur without a P_BRANCH */
 | |
| 	    if (!P_ISBRANCH(next)) {
 | |
| 		/* no choice, avoid recursion */
 | |
| 		DPUTS(P_OP(scan) == P_WBRANCH,
 | |
| 		      "BUG: WBRANCH with no alternative.");
 | |
| 		next = P_OPERAND(scan);
 | |
| 	    } else {
 | |
| 		do {
 | |
| 		    save = patinput;
 | |
| 		    savglobflags = patglobflags;
 | |
| 		    saverrsfound = errsfound;
 | |
| 		    if (P_ISEXCLUDE(next)) {
 | |
| 			/*
 | |
| 			 * The strategy is to test the asserted pattern,
 | |
| 			 * recording via P_EXCSYNC how far the part to
 | |
| 			 * be excluded matched.  We then set the
 | |
| 			 * length of the test string to that
 | |
| 			 * point and see if the exclusion as far as
 | |
| 			 * P_EXCEND also matches that string.
 | |
| 			 * We need to keep testing the asserted pattern
 | |
| 			 * by backtracking, since the first attempt
 | |
| 			 * may be excluded while a later attempt may not.
 | |
| 			 * For this we keep a pointer just after
 | |
| 			 * the P_EXCLUDE which is tested by the P_EXCSYNC
 | |
| 			 * to see if we matched there last time, in which
 | |
| 			 * case we fail.  If there is nothing to backtrack
 | |
| 			 * over, that doesn't matter:  we should fail anyway.
 | |
| 			 * The pointer also tells us where the asserted
 | |
| 			 * pattern matched for use by the exclusion.
 | |
| 			 *
 | |
| 			 * It's hard to allocate space for this
 | |
| 			 * beforehand since we may need to do it
 | |
| 			 * recursively.
 | |
| 			 *
 | |
| 			 * P.S. in case you were wondering, this code
 | |
| 			 * is horrible.
 | |
| 			 */
 | |
| 			Upat syncstrp;
 | |
| 			char *origpatinend;
 | |
| 			unsigned char *oldsyncstr;
 | |
| 			char *matchpt = NULL;
 | |
| 			int ret, savglobdots, matchederrs = 0;
 | |
| 			int savparsfound = parsfound;
 | |
| 			DPUTS(P_OP(scan) == P_WBRANCH,
 | |
| 			      "BUG: excluded WBRANCH");
 | |
| 			syncstrp = P_OPERAND(next);
 | |
| 			/*
 | |
| 			 * Unlike WBRANCH, each test at the same exclude
 | |
| 			 * sync point (due to an external loop) is separate,
 | |
| 			 * i.e testing (foo~bar)# is no different from
 | |
| 			 * (foo~bar)(foo~bar)... from the exclusion point
 | |
| 			 * of view, so we use a different sync string.
 | |
| 			 */
 | |
| 			oldsyncstr = syncstrp->p;
 | |
| 			syncstrp->p = (unsigned char *)
 | |
| 			    zshcalloc((patinend - patinstart) + 1);
 | |
| 			origpatinend = patinend;
 | |
| 			while ((ret = patmatch(P_OPERAND(scan)))) {
 | |
| 			    unsigned char *syncpt;
 | |
| 			    char *savpatinstart;
 | |
| 			    int savforce = forceerrs;
 | |
| 			    int savpatflags = patflags, synclen;
 | |
| 			    forceerrs = -1;
 | |
| 			    savglobdots = globdots;
 | |
| 			    matchederrs = errsfound;
 | |
| 			    matchpt = patinput;    /* may not be end */
 | |
| 			    globdots = 1;	   /* OK to match . first */
 | |
| 			    /* Find the point where the scan
 | |
| 			     * matched the part to be excluded: because
 | |
| 			     * of backtracking, the one
 | |
| 			     * most recently matched will be the first.
 | |
| 			     * (Luckily, backtracking is done after all
 | |
| 			     * possibilities for approximation have been
 | |
| 			     * checked.)
 | |
| 			     */
 | |
| 			    for (syncpt = syncstrp->p; !*syncpt; syncpt++)
 | |
| 				;
 | |
| 			    synclen = syncpt - syncstrp->p;
 | |
| 			    if (patinstart + synclen != patinend) {
 | |
| 				/*
 | |
| 				 * Temporarily mark the string as
 | |
| 				 * ending at this point.
 | |
| 				 */
 | |
| 				DPUTS(patinstart + synclen > matchpt,
 | |
| 				      "BUG: EXCSYNC failed");
 | |
| 
 | |
| 				patinend = patinstart + synclen;
 | |
| 				/*
 | |
| 				 * If this isn't really the end of the string,
 | |
| 				 * remember this for the (#e) assertion.
 | |
| 				 */
 | |
| 				patflags |= PAT_NOTEND;
 | |
| 			    }
 | |
| 			    savpatinstart = patinstart;
 | |
| 			    next = PATNEXT(scan);
 | |
| 			    while (next && P_ISEXCLUDE(next)) {
 | |
| 				patinput = save;
 | |
| 				/*
 | |
| 				 * turn off approximations in exclusions:
 | |
| 				 * note we keep remaining patglobflags
 | |
| 				 * set by asserted branch (or previous
 | |
| 				 * excluded branches, for consistency).
 | |
| 				 */
 | |
| 				patglobflags &= ~0xff;
 | |
| 				errsfound = 0;
 | |
| 				opnd = P_OPERAND(next) + 1;
 | |
| 				if (P_OP(next) == P_EXCLUDP && patinpath) {
 | |
| 				    /*
 | |
| 				     * Top level exclusion with a file,
 | |
| 				     * applies to whole path so add the
 | |
| 				     * segments already matched.
 | |
| 				     * We copied these in front of the
 | |
| 				     * test pattern, so patinend doesn't
 | |
| 				     * need moving.
 | |
| 				     */
 | |
| 				    DPUTS(patinput != patinstart,
 | |
| 					  "BUG: not at start excluding path");
 | |
| 				    patinput = patinstart = patinpath;
 | |
| 				}
 | |
| 				if (patmatch(opnd)) {
 | |
| 				    ret = 0;
 | |
| 				    /*
 | |
| 				     * Another subtlety: if we exclude the
 | |
| 				     * match, any parentheses just found
 | |
| 				     * become invalidated.
 | |
| 				     */
 | |
| 				    parsfound = savparsfound;
 | |
| 				}
 | |
| 				if (patinpath) {
 | |
| 				    patinput = savpatinstart +
 | |
| 					(patinput - patinstart);
 | |
| 				    patinstart = savpatinstart;
 | |
| 				}
 | |
| 				if (!ret)
 | |
| 				    break;
 | |
| 				next = PATNEXT(next);
 | |
| 			    }
 | |
| 			    /*
 | |
| 			     * Restore original end position.
 | |
| 			     */
 | |
| 			    patinend = origpatinend;
 | |
| 			    patflags = savpatflags;
 | |
| 			    globdots = savglobdots;
 | |
| 			    forceerrs = savforce;
 | |
| 			    if (ret)
 | |
| 				break;
 | |
| 			    patinput = save;
 | |
| 			    patglobflags = savglobflags;
 | |
| 			    errsfound = saverrsfound;
 | |
| 			}
 | |
| 			zfree((char *)syncstrp->p,
 | |
| 			      (patinend - patinstart) + 1);
 | |
| 			syncstrp->p = oldsyncstr;
 | |
| 			if (ret) {
 | |
| 			    patinput = matchpt;
 | |
| 			    errsfound = matchederrs;
 | |
| 			    return 1;
 | |
| 			}
 | |
| 			while ((scan = PATNEXT(scan)) &&
 | |
| 			       P_ISEXCLUDE(scan))
 | |
| 			    ;
 | |
| 		    } else {
 | |
| 			int ret = 1, pfree = 0;
 | |
| 			Upat ptrp = NULL;
 | |
| 			unsigned char *ptr;
 | |
| 			if (P_OP(scan) == P_WBRANCH) {
 | |
| 			    /*
 | |
| 			     * This is where we make sure that we are not
 | |
| 			     * repeatedly matching zero-length strings in
 | |
| 			     * a closure, which would cause an infinite loop,
 | |
| 			     * and also remove exponential behaviour in
 | |
| 			     * backtracking nested closures.
 | |
| 			     * The P_WBRANCH operator leaves a space for a
 | |
| 			     * uchar *, initialized to NULL, which is
 | |
| 			     * turned into a string the same length as the
 | |
| 			     * target string.  Every time we match from a
 | |
| 			     * particular point in the target string, we
 | |
| 			     * stick a 1 at the corresponding point here.
 | |
| 			     * If we come round to the same branch again, and
 | |
| 			     * there is already a 1, then the test fails.
 | |
| 			     */
 | |
| 			    opnd = P_OPERAND(scan);
 | |
| 			    ptrp = opnd++;
 | |
| 			    if (!ptrp->p) {
 | |
| 				ptrp->p = (unsigned char *)
 | |
| 				    zshcalloc((patinend - patinstart) + 1);
 | |
| 				pfree = 1;
 | |
| 			    }
 | |
| 			    ptr = ptrp->p + (patinput - patinstart);
 | |
| 
 | |
| 			    /*
 | |
| 			     * Without approximation, this is just a
 | |
| 			     * single bit test.  With approximation, we
 | |
| 			     * need to know how many errors there were
 | |
| 			     * last time we made the test.  If errsfound
 | |
| 			     * is now smaller than it was, hence we can
 | |
| 			     * make more approximations in the remaining
 | |
| 			     * code, we continue with the test.
 | |
| 			     * (This is why the max number of errors is
 | |
| 			     * 254, not 255.)
 | |
| 			     */
 | |
| 			    if (*ptr && errsfound + 1 >= *ptr)
 | |
| 				ret = 0;
 | |
| 			    *ptr = errsfound + 1;
 | |
| 			} else
 | |
| 			    opnd = P_OPERAND(scan);
 | |
| 			if (ret)
 | |
| 			    ret = patmatch(opnd);
 | |
| 			if (pfree) {
 | |
| 			    zfree((char *)ptrp->p,
 | |
| 				  (patinend - patinstart) + 1);
 | |
| 			    ptrp->p = NULL;
 | |
| 			}
 | |
| 			if (ret)
 | |
| 			    return 1;
 | |
| 			scan = PATNEXT(scan);
 | |
| 		    }
 | |
| 		    patinput = save;
 | |
| 		    patglobflags = savglobflags;
 | |
| 		    errsfound = saverrsfound;
 | |
| 		    DPUTS(P_OP(scan) == P_WBRANCH,
 | |
| 			  "BUG: WBRANCH not first choice.");
 | |
| 		    next = PATNEXT(scan);
 | |
| 		} while (scan && P_ISBRANCH(scan));
 | |
| 		return 0;
 | |
| 	    }
 | |
| 	    break;
 | |
| 	case P_STAR:
 | |
| 	    /* Handle specially for speed, although really P_ONEHASH+P_ANY */
 | |
| 	    while (P_OP(next) == P_STAR) {
 | |
| 		/*
 | |
| 		 * If there's another * following we can optimise it
 | |
| 		 * out.  Chains of *'s can give pathologically bad
 | |
| 		 * performance.
 | |
| 		 */
 | |
| 		scan = next;
 | |
| 		next = PATNEXT(scan);
 | |
| 	    }
 | |
| 	    /*FALLTHROUGH*/
 | |
| 	case P_ONEHASH:
 | |
| 	case P_TWOHASH:
 | |
| 	    /*
 | |
| 	     * This is just simple cases, matching one character.
 | |
| 	     * With approximations, we still handle * this way, since
 | |
| 	     * no approximation is ever necessary, but other closures
 | |
| 	     * are handled by the more complicated branching method
 | |
| 	     */
 | |
| 	    op = P_OP(scan);
 | |
| 	    /* Note that no counts possibly metafied characters */
 | |
| 	    start = patinput;
 | |
| 	    {
 | |
| 		char *lastcharstart;
 | |
| 		/*
 | |
| 		 * Array to record the start of characters for
 | |
| 		 * backtracking.
 | |
| 		 */
 | |
| 		VARARR(char, charstart, patinend-patinput);
 | |
| 		memset(charstart, 0, patinend-patinput);
 | |
| 
 | |
| 		if (op == P_STAR) {
 | |
| 		    for (no = 0; patinput < patinend;
 | |
| 			 CHARINC(patinput, patinend))
 | |
| 		    {
 | |
| 			charstart[patinput-start] = 1;
 | |
| 			no++;
 | |
| 		    }
 | |
| 		    /* simple optimization for reasonably common case */
 | |
| 		    if (P_OP(next) == P_END)
 | |
| 			return 1;
 | |
| 		} else {
 | |
| 		    DPUTS(patglobflags & 0xff,
 | |
| 			  "BUG: wrong backtracking with approximation.");
 | |
| 		    if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
 | |
| 			patinput == patinstart && patinput < patinend &&
 | |
| 			CHARREF(patinput, patinend) == ZWC('.'))
 | |
| 			return 0;
 | |
| 		    no = patrepeat(P_OPERAND(scan), charstart);
 | |
| 		}
 | |
| 		min = (op == P_TWOHASH) ? 1 : 0;
 | |
| 		/*
 | |
| 		 * Lookahead to avoid useless matches. This is not possible
 | |
| 		 * with approximation.
 | |
| 		 */
 | |
| 		if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
 | |
| 		    !(patglobflags & 0xff)) {
 | |
| 		    char *nextop = P_LS_STR(next);
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| 		    /* else second argument of CHARREF isn't used */
 | |
| 		    int nextlen = P_LS_LEN(next);
 | |
| #endif
 | |
| 		    /*
 | |
| 		     * If that P_EXACTLY is last (common in simple patterns,
 | |
| 		     * such as *.c), then it can be only be matched at one
 | |
| 		     * point in the test string, so record that.
 | |
| 		     */
 | |
| 		    if (P_OP(PATNEXT(next)) == P_END &&
 | |
| 			!(patflags & PAT_NOANCH)) {
 | |
| 			int ptlen = patinend - patinput;
 | |
| 			int lenmatch = patinend -
 | |
| 			    (min ? CHARNEXT(start, patinend) : start);
 | |
| 			/* Are we in the right range? */
 | |
| 			if (P_LS_LEN(next) > lenmatch ||
 | |
| 			    P_LS_LEN(next) < ptlen)
 | |
| 			    return 0;
 | |
| 			/* Yes, just position appropriately and test. */
 | |
| 			patinput += ptlen - P_LS_LEN(next);
 | |
| 			/*
 | |
| 			 * Here we will need to be careful that patinput is not
 | |
| 			 * in the middle of a multibyte character.
 | |
| 			 */
 | |
| 			/* Continue loop with P_EXACTLY test. */
 | |
| 			break;
 | |
| 		    }
 | |
| 		    nextch = CHARREF(nextop, nextop + nextlen);
 | |
| 		} else
 | |
| 		    nextch = PEOF;
 | |
| 		savglobflags = patglobflags;
 | |
| 		saverrsfound = errsfound;
 | |
| 		lastcharstart = charstart + (patinput - start);
 | |
| 		if (no >= min) {
 | |
| 		    for (;;) {
 | |
| 			patint_t charmatch_cache;
 | |
| 			if (nextch == PEOF ||
 | |
| 			    (patinput < patinend &&
 | |
| 			     CHARMATCH_EXPR(CHARREF(patinput, patinend),
 | |
| 					    nextch))) {
 | |
| 			    if (patmatch(next))
 | |
| 				return 1;
 | |
| 			}
 | |
| 			if (--no < min)
 | |
| 			    break;
 | |
| 			/* find start of previous full character */
 | |
| 			while (!*--lastcharstart)
 | |
| 			    DPUTS(lastcharstart < charstart,
 | |
| 				  "lastcharstart invalid");
 | |
| 			patinput = start + (lastcharstart-charstart);
 | |
| 			patglobflags = savglobflags;
 | |
| 			errsfound = saverrsfound;
 | |
| 		    }
 | |
| 		}
 | |
| 	    }
 | |
| 	    /*
 | |
| 	     * As with branches, the patmatch(next) stuff for *
 | |
| 	     * handles approximation, so we don't need to try
 | |
| 	     * anything here.
 | |
| 	     */
 | |
| 	    return 0;
 | |
| 	case P_ISSTART:
 | |
| 	    if (patinput != patinstart || (patflags & PAT_NOTSTART))
 | |
| 		fail = 1;
 | |
| 	    break;
 | |
| 	case P_ISEND:
 | |
| 	    if (patinput < patinend || (patflags & PAT_NOTEND))
 | |
| 		fail = 1;
 | |
| 	    break;
 | |
| 	case P_COUNTSTART:
 | |
| 	    {
 | |
| 		/*
 | |
| 		 * Save and restore the current count and the
 | |
| 		 * start pointer in case the pattern has been
 | |
| 		 * executed by a previous repetition of a
 | |
| 		 * closure.
 | |
| 		 */
 | |
| 		long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l;
 | |
| 		long savecount = *curptr;
 | |
| 		unsigned char *saveptr = scan[P_CT_PTR].p;
 | |
| 		int ret;
 | |
| 
 | |
| 		*curptr = 0L;
 | |
| 		ret = patmatch(P_OPERAND(scan));
 | |
| 		*curptr = savecount;
 | |
| 		scan[P_CT_PTR].p = saveptr;
 | |
| 		return ret;
 | |
| 	    }
 | |
| 	case P_COUNT:
 | |
| 	    {
 | |
| 		/* (#cN,M): execution is relatively straightforward */
 | |
| 		long cur = scan[P_CT_CURRENT].l;
 | |
| 		long min = scan[P_CT_MIN].l;
 | |
| 		long max = scan[P_CT_MAX].l;
 | |
| 
 | |
| 		if (cur && cur >= min &&
 | |
| 		    (unsigned char *)patinput == scan[P_CT_PTR].p) {
 | |
| 		    /*
 | |
| 		     * Not at the first attempt to match so
 | |
| 		     * the previous attempt managed zero length.
 | |
| 		     * We can do this indefinitely so there's
 | |
| 		     * no point in going on.  Simply try to
 | |
| 		     * match the remainder of the pattern.
 | |
| 		     */
 | |
| 		    return patmatch(next);
 | |
| 		}
 | |
| 		scan[P_CT_PTR].p = (unsigned char *)patinput;
 | |
| 
 | |
| 		if (max < 0 || cur < max) {
 | |
| 		    char *patinput_thistime = patinput;
 | |
| 		    scan[P_CT_CURRENT].l = cur + 1;
 | |
| 		    if (patmatch(scan + P_CT_OPERAND))
 | |
| 			return 1;
 | |
| 		    scan[P_CT_CURRENT].l = cur;
 | |
| 		    patinput = patinput_thistime;
 | |
| 		}
 | |
| 		if (cur < min)
 | |
| 		    return 0;
 | |
| 		return patmatch(next);
 | |
| 	    }
 | |
| 	case P_END:
 | |
| 	    if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
 | |
| 		return 1;
 | |
| 	    break;
 | |
| #ifdef DEBUG
 | |
| 	default:
 | |
| 	    dputs("BUG: bad operand in patmatch.");
 | |
| 	    return 0;
 | |
| 	    break;
 | |
| #endif
 | |
| 	}
 | |
| 
 | |
| 	if (fail) {
 | |
| 	    if (errsfound < (patglobflags & 0xff) &&
 | |
| 		(forceerrs == -1 || errsfound < forceerrs)) {
 | |
| 		/*
 | |
| 		 * Approximation code.  There are four possibilities
 | |
| 		 *
 | |
| 		 * 1. omit character from input string
 | |
| 		 * 2. transpose characters in input and pattern strings
 | |
| 		 * 3. omit character in both input and pattern strings
 | |
| 		 * 4. omit character from pattern string.
 | |
| 		 *
 | |
| 		 * which we try in that order.
 | |
| 		 *
 | |
| 		 * Of these, 2, 3 and 4 require an exact match string
 | |
| 		 * (P_EXACTLY) while 1, 2 and 3 require that we not
 | |
| 		 * have reached the end of the input string.
 | |
| 		 *
 | |
| 		 * Note in each case after making the approximation we
 | |
| 		 * need to retry the *same* pattern; this is what
 | |
| 		 * requires exactpos, a slightly doleful way of
 | |
| 		 * communicating with the exact character matcher.
 | |
| 		 */
 | |
| 		char *savexact = exactpos;
 | |
| 		save = patinput;
 | |
| 		savglobflags = patglobflags;
 | |
| 		saverrsfound = ++errsfound;
 | |
| 		fail = 0;
 | |
| 
 | |
| 		DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
 | |
| 		      "BUG: non-exact match has set exactpos");
 | |
| 
 | |
| 		/* Try omitting a character from the input string */
 | |
| 		if (patinput < patinend) {
 | |
| 		    CHARINC(patinput, patinend);
 | |
| 		    /* If we are not on an exact match, then this is
 | |
| 		     * our last gasp effort, so we can optimize out
 | |
| 		     * the recursive call.
 | |
| 		     */
 | |
| 		    if (P_OP(scan) != P_EXACTLY)
 | |
| 			continue;
 | |
| 		    if (patmatch(scan))
 | |
| 			return 1;
 | |
| 		}
 | |
| 
 | |
| 		if (P_OP(scan) == P_EXACTLY) {
 | |
| 		    char *nextexact = savexact;
 | |
| 		    DPUTS(!savexact,
 | |
| 			  "BUG: exact match has not set exactpos");
 | |
| 		    CHARINC(nextexact, exactend);
 | |
| 
 | |
| 		    if (save < patinend) {
 | |
| 			char *nextin = save;
 | |
| 			CHARINC(nextin, patinend);
 | |
| 			patglobflags = savglobflags;
 | |
| 			errsfound = saverrsfound;
 | |
| 			exactpos = savexact;
 | |
| 
 | |
| 			/*
 | |
| 			 * Try swapping two characters in patinput and
 | |
| 			 * exactpos
 | |
| 			 */
 | |
| 			if (save < patinend && nextin < patinend &&
 | |
| 			    nextexact < exactend) {
 | |
| 			    patint_t cin0 = CHARREF(save, patinend);
 | |
| 			    patint_t cpa0 = CHARREF(exactpos, exactend);
 | |
| 			    patint_t cin1 = CHARREF(nextin, patinend);
 | |
| 			    patint_t cpa1 = CHARREF(nextexact, exactend);
 | |
| 
 | |
| 			    if (CHARMATCH(cin0, cpa1) &&
 | |
| 				CHARMATCH(cin1, cpa0)) {
 | |
| 				patinput = nextin;
 | |
| 				CHARINC(patinput, patinend);
 | |
| 				exactpos = nextexact;
 | |
| 				CHARINC(exactpos, exactend);
 | |
| 				if (patmatch(scan))
 | |
| 				    return 1;
 | |
| 
 | |
| 				patglobflags = savglobflags;
 | |
| 				errsfound = saverrsfound;
 | |
| 			    }
 | |
| 			}
 | |
| 
 | |
| 			/*
 | |
| 			 * Try moving up both strings.
 | |
| 			 */
 | |
| 			patinput = nextin;
 | |
| 			exactpos = nextexact;
 | |
| 			if (patmatch(scan))
 | |
| 			    return 1;
 | |
| 
 | |
| 			patinput = save;
 | |
| 			patglobflags = savglobflags;
 | |
| 			errsfound = saverrsfound;
 | |
| 			exactpos = savexact;
 | |
| 		    }
 | |
| 
 | |
| 		    DPUTS(exactpos == exactend, "approximating too far");
 | |
| 		    /*
 | |
| 		     * Try moving up the exact match pattern.
 | |
| 		     * This must be the last attempt, so just loop
 | |
| 		     * instead of calling recursively.
 | |
| 		     */
 | |
| 		    CHARINC(exactpos, exactend);
 | |
| 		    continue;
 | |
| 		}
 | |
| 	    }
 | |
| 	    exactpos = NULL;
 | |
| 	    return 0;
 | |
| 	}
 | |
| 
 | |
| 	scan = next;
 | |
| 
 | |
| 	/* Allow handlers to run once per loop */
 | |
| 	check_for_signals();
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**/
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| 
 | |
| /*
 | |
|  * See if character ch matches a pattern range specification.
 | |
|  * The null-terminated specification is in range; the test
 | |
|  * character is in ch.
 | |
|  *
 | |
|  * zmb is one of the enum defined above charref(), for indicating
 | |
|  * incomplete or invalid multibyte characters.
 | |
|  *
 | |
|  * indptr is used by completion matching, which is why this
 | |
|  * function is exported.  If indptr is not NULL we set *indptr
 | |
|  * to the index of the character in the range string, adjusted
 | |
|  * in the case of "A-B" ranges such that A would count as its
 | |
|  * normal index (say IA), B would count as IA + (B-A), and any
 | |
|  * character within the range as appropriate.  We're not strictly
 | |
|  * guaranteed this fits within a wint_t, but if this is Unicode
 | |
|  * in 32 bits we have a fair amount of distance left over.
 | |
|  *
 | |
|  * mtp is used in the same circumstances.  *mtp returns the match type:
 | |
|  * 0 for a standard character, else the PP_ index.  It's not
 | |
|  * useful if the match failed.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp)
 | |
| {
 | |
|     wchar_t r1, r2;
 | |
| 
 | |
|     if (indptr)
 | |
| 	*indptr = 0;
 | |
|     /*
 | |
|      * Careful here: unlike other strings, range is a NULL-terminated,
 | |
|      * metafied string, because we need to treat the Posix and hyphenated
 | |
|      * ranges specially.
 | |
|      */
 | |
|     while (*range) {
 | |
| 	if (imeta(STOUC(*range))) {
 | |
| 	    int swtype = STOUC(*range++) - STOUC(Meta);
 | |
| 	    if (mtp)
 | |
| 		*mtp = swtype;
 | |
| 	    switch (swtype) {
 | |
| 	    case 0:
 | |
| 		/* ordinary metafied character */
 | |
| 		range--;
 | |
| 		if (metacharinc(&range) == ch)
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_ALPHA:
 | |
| 		if (iswalpha(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_ALNUM:
 | |
| 		if (iswalnum(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_ASCII:
 | |
| 		if ((ch & ~0x7f) == 0)
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_BLANK:
 | |
| #if !defined(HAVE_ISWBLANK) && !defined(iswblank)
 | |
| /*
 | |
|  * iswblank() is GNU and C99. There's a remote chance that some
 | |
|  * systems still don't support it (but would support the other ones
 | |
|  * if MULTIBYTE_SUPPORT is enabled).
 | |
|  */
 | |
| #define iswblank(c) (c == L' ' || c == L'\t')
 | |
| #endif
 | |
| 		if (iswblank(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_CNTRL:
 | |
| 		if (iswcntrl(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_DIGIT:
 | |
| 		if (iswdigit(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_GRAPH:
 | |
| 		if (iswgraph(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_LOWER:
 | |
| 		if (iswlower(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_PRINT:
 | |
| 		if (WC_ISPRINT(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_PUNCT:
 | |
| 		if (iswpunct(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_SPACE:
 | |
| 		if (iswspace(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_UPPER:
 | |
| 		if (iswupper(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_XDIGIT:
 | |
| 		if (iswxdigit(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_IDENT:
 | |
| 		if (wcsitype(ch, IIDENT))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_IFS:
 | |
| 		if (wcsitype(ch, ISEP))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_IFSSPACE:
 | |
| 		/* must be ASCII space character */
 | |
| 		if (ch < 128 && iwsep((int)ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_WORD:
 | |
| 		if (wcsitype(ch, IWORD))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_RANGE:
 | |
| 		r1 = metacharinc(&range);
 | |
| 		r2 = metacharinc(&range);
 | |
| 		if (r1 <= ch && ch <= r2) {
 | |
| 		    if (indptr)
 | |
| 			*indptr += ch - r1;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		/* Careful not to screw up counting with bogus range */
 | |
| 		if (indptr && r1 < r2) {
 | |
| 		    /*
 | |
| 		     * This gets incremented again below to get
 | |
| 		     * us past the range end.  This is correct.
 | |
| 		     */
 | |
| 		    *indptr += r2 - r1;
 | |
| 		}
 | |
| 		break;
 | |
| 	    case PP_INCOMPLETE:
 | |
| 		if (zmb_ind == ZMB_INCOMPLETE)
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_INVALID:
 | |
| 		if (zmb_ind == ZMB_INVALID)
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_UNKWN:
 | |
| 		DPUTS(1, "BUG: unknown posix range passed through.\n");
 | |
| 		break;
 | |
| 	    default:
 | |
| 		DPUTS(1, "BUG: unknown metacharacter in range.");
 | |
| 		break;
 | |
| 	    }
 | |
| 	} else if (metacharinc(&range) == ch) {
 | |
| 	    if (mtp)
 | |
| 		*mtp = 0;
 | |
| 	    return 1;
 | |
| 	}
 | |
| 	if (indptr)
 | |
| 	    (*indptr)++;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * This is effectively the reverse of mb_patmatchrange().
 | |
|  * Given a range descriptor of the same form, and an index into it,
 | |
|  * try to determine the character that is matched.  If the index
 | |
|  * points to a [:...:] generic style match, set chr to WEOF and
 | |
|  * return the type in mtp instead.  Return 1 if successful, 0 if
 | |
|  * there was no corresponding index.  Note all pointer arguments
 | |
|  * must be non-null.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
 | |
| {
 | |
|     wchar_t r1, r2, rchr;
 | |
|     wint_t rdiff;
 | |
| 
 | |
|     *chr = WEOF;
 | |
|     *mtp = 0;
 | |
| 
 | |
|     while (*range) {
 | |
| 	if (imeta(STOUC(*range))) {
 | |
| 	    int swtype = STOUC(*range++) - STOUC(Meta);
 | |
| 	    switch (swtype) {
 | |
| 	    case 0:
 | |
| 		range--;
 | |
| 		rchr = metacharinc(&range);
 | |
| 		if (!ind) {
 | |
| 		    *chr = (wint_t) rchr;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	    case PP_ALPHA:
 | |
| 	    case PP_ALNUM:
 | |
| 	    case PP_ASCII:
 | |
| 	    case PP_BLANK:
 | |
| 	    case PP_CNTRL:
 | |
| 	    case PP_DIGIT:
 | |
| 	    case PP_GRAPH:
 | |
| 	    case PP_LOWER:
 | |
| 	    case PP_PRINT:
 | |
| 	    case PP_PUNCT:
 | |
| 	    case PP_SPACE:
 | |
| 	    case PP_UPPER:
 | |
| 	    case PP_XDIGIT:
 | |
| 	    case PP_IDENT:
 | |
| 	    case PP_IFS:
 | |
| 	    case PP_IFSSPACE:
 | |
| 	    case PP_WORD:
 | |
| 	    case PP_INCOMPLETE:
 | |
| 	    case PP_INVALID:
 | |
| 		if (!ind) {
 | |
| 		    *mtp = swtype;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	    case PP_RANGE:
 | |
| 		r1 = metacharinc(&range);
 | |
| 		r2 = metacharinc(&range);
 | |
| 		rdiff = (wint_t)r2 - (wint_t)r1; 
 | |
| 		if (rdiff >= ind) {
 | |
| 		    *chr = (wint_t)r1 + ind;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		/* note the extra decrement to ind below */
 | |
| 		ind -= rdiff;
 | |
| 		break;
 | |
| 	    case PP_UNKWN:
 | |
| 		DPUTS(1, "BUG: unknown posix range passed through.\n");
 | |
| 		break;
 | |
| 	    default:
 | |
| 		DPUTS(1, "BUG: unknown metacharacter in range.");
 | |
| 		break;
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    rchr = metacharinc(&range);
 | |
| 	    if (!ind) {
 | |
| 		*chr = (wint_t)rchr;
 | |
| 		return 1;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (!ind--)
 | |
| 	    break;
 | |
|     }
 | |
| 
 | |
|     /* No corresponding index. */
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| #endif /* MULTIBYTE_SUPPORT */
 | |
| 
 | |
| /*
 | |
|  * Identical function to mb_patmatchrange() above for single-byte
 | |
|  * characters.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| patmatchrange(char *range, int ch, int *indptr, int *mtp)
 | |
| {
 | |
|     int r1, r2;
 | |
| 
 | |
|     if (indptr)
 | |
| 	*indptr = 0;
 | |
|     /*
 | |
|      * Careful here: unlike other strings, range is a NULL-terminated,
 | |
|      * metafied string, because we need to treat the Posix and hyphenated
 | |
|      * ranges specially.
 | |
|      */
 | |
|     for (; *range; range++) {
 | |
| 	if (imeta(STOUC(*range))) {
 | |
| 	    int swtype = STOUC(*range) - STOUC(Meta);
 | |
| 	    if (mtp)
 | |
| 		*mtp = swtype;
 | |
| 	    switch (swtype) {
 | |
| 	    case 0:
 | |
| 		if (STOUC(*++range ^ 32) == ch)
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_ALPHA:
 | |
| 		if (isalpha(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_ALNUM:
 | |
| 		if (isalnum(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_ASCII:
 | |
| 		if ((ch & ~0x7f) == 0)
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_BLANK:
 | |
| #if !defined(HAVE_ISBLANK) && !defined(isblank)
 | |
| /*
 | |
|  * isblank() is GNU and C99. There's a remote chance that some
 | |
|  * systems still don't support it.
 | |
|  */
 | |
| #define isblank(c) (c == ' ' || c == '\t')
 | |
| #endif
 | |
| 		if (isblank(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_CNTRL:
 | |
| 		if (iscntrl(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_DIGIT:
 | |
| 		if (isdigit(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_GRAPH:
 | |
| 		if (isgraph(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_LOWER:
 | |
| 		if (islower(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_PRINT:
 | |
| 		if (ZISPRINT(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_PUNCT:
 | |
| 		if (ispunct(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_SPACE:
 | |
| 		if (isspace(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_UPPER:
 | |
| 		if (isupper(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_XDIGIT:
 | |
| 		if (isxdigit(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_IDENT:
 | |
| 		if (iident(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_IFS:
 | |
| 		if (isep(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_IFSSPACE:
 | |
| 		if (iwsep(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_WORD:
 | |
| 		if (iword(ch))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	    case PP_RANGE:
 | |
| 		range++;
 | |
| 		r1 = STOUC(UNMETA(range));
 | |
| 		METACHARINC(range);
 | |
| 		r2 = STOUC(UNMETA(range));
 | |
| 		if (*range == Meta)
 | |
| 		    range++;
 | |
| 		if (r1 <= ch && ch <= r2) {
 | |
| 		    if (indptr)
 | |
| 			*indptr += ch - r1;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		if (indptr && r1 < r2)
 | |
| 		    *indptr += r2 - r1;
 | |
| 		break;
 | |
| 	    case PP_INCOMPLETE:
 | |
| 	    case PP_INVALID:
 | |
| 		/* Never true if not in multibyte mode */
 | |
| 		break;
 | |
| 	    case PP_UNKWN:
 | |
| 		DPUTS(1, "BUG: unknown posix range passed through.\n");
 | |
| 		break;
 | |
| 	    default:
 | |
| 		DPUTS(1, "BUG: unknown metacharacter in range.");
 | |
| 		break;
 | |
| 	    }
 | |
| 	} else if (STOUC(*range) == ch) {
 | |
| 	    if (mtp)
 | |
| 		*mtp = 0;
 | |
| 	    return 1;
 | |
| 	}
 | |
| 	if (indptr)
 | |
| 	    (*indptr)++;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**/
 | |
| #ifndef MULTIBYTE_SUPPORT
 | |
| 
 | |
| /*
 | |
|  * Identical function to mb_patmatchindex() above for single-byte
 | |
|  * characters.  Here -1 represents a character that needs a special type.
 | |
|  *
 | |
|  * Unlike patmatchrange, we only need this in ZLE, which always
 | |
|  * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
 | |
|  * this function in that case.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| patmatchindex(char *range, int ind, int *chr, int *mtp)
 | |
| {
 | |
|     int r1, r2, rdiff, rchr;
 | |
| 
 | |
|     *chr = -1;
 | |
|     *mtp = 0;
 | |
| 
 | |
|     for (; *range; range++) {
 | |
| 	if (imeta(STOUC(*range))) {
 | |
| 	    int swtype = STOUC(*range) - STOUC(Meta);
 | |
| 	    switch (swtype) {
 | |
| 	    case 0:
 | |
| 		/* ordinary metafied character */
 | |
| 		rchr = STOUC(*++range) ^ 32;
 | |
| 		if (!ind) {
 | |
| 		    *chr = rchr;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	    case PP_ALPHA:
 | |
| 	    case PP_ALNUM:
 | |
| 	    case PP_ASCII:
 | |
| 	    case PP_BLANK:
 | |
| 	    case PP_CNTRL:
 | |
| 	    case PP_DIGIT:
 | |
| 	    case PP_GRAPH:
 | |
| 	    case PP_LOWER:
 | |
| 	    case PP_PRINT:
 | |
| 	    case PP_PUNCT:
 | |
| 	    case PP_SPACE:
 | |
| 	    case PP_UPPER:
 | |
| 	    case PP_XDIGIT:
 | |
| 	    case PP_IDENT:
 | |
| 	    case PP_IFS:
 | |
| 	    case PP_IFSSPACE:
 | |
| 	    case PP_WORD:
 | |
| 	    case PP_INCOMPLETE:
 | |
| 	    case PP_INVALID:
 | |
| 		if (!ind) {
 | |
| 		    *mtp = swtype;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	    case PP_RANGE:
 | |
| 		range++;
 | |
| 		r1 = STOUC(UNMETA(range));
 | |
| 		METACHARINC(range);
 | |
| 		r2 = STOUC(UNMETA(range));
 | |
| 		if (*range == Meta)
 | |
| 		    range++;
 | |
| 		rdiff = r2 - r1; 
 | |
| 		if (rdiff >= ind) {
 | |
| 		    *chr = r1 + ind;
 | |
| 		    return 1;
 | |
| 		}
 | |
| 		/* note the extra decrement to ind below */
 | |
| 		ind -= rdiff;
 | |
| 		break;
 | |
| 	    case PP_UNKWN:
 | |
| 		DPUTS(1, "BUG: unknown posix range passed through.\n");
 | |
| 		break;
 | |
| 	    default:
 | |
| 		DPUTS(1, "BUG: unknown metacharacter in range.");
 | |
| 		break;
 | |
| 	    }
 | |
| 	} else {
 | |
| 	    if (!ind) {
 | |
| 		*chr = STOUC(*range);
 | |
| 		return 1;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (!ind--)
 | |
| 	    break;
 | |
|     }
 | |
| 
 | |
|     /* No corresponding index. */
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| #endif /* MULTIBYTE_SUPPORT */
 | |
| 
 | |
| /*
 | |
|  * Repeatedly match something simple and say how many times.
 | |
|  * charstart is an array parallel to that starting at patinput
 | |
|  * and records the start of (possibly multibyte) characters
 | |
|  * to aid in later backtracking.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| static int patrepeat(Upat p, char *charstart)
 | |
| {
 | |
|     int count = 0;
 | |
|     patint_t tch, charmatch_cache;
 | |
|     char *scan, *opnd;
 | |
| 
 | |
|     scan = patinput;
 | |
|     opnd = (char *)P_OPERAND(p);
 | |
| 
 | |
|     switch(P_OP(p)) {
 | |
| #ifdef DEBUG
 | |
|     case P_ANY:
 | |
| 	dputs("BUG: ?# did not get optimized to *");
 | |
| 	return 0;
 | |
| 	break;
 | |
| #endif
 | |
|     case P_EXACTLY:
 | |
| 	DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
 | |
| 	tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p));
 | |
| 	while (scan < patinend &&
 | |
| 	       CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) {
 | |
| 	    charstart[scan-patinput] = 1;
 | |
| 	    count++;
 | |
| 	    CHARINC(scan, patinend);
 | |
| 	}
 | |
| 	break;
 | |
|     case P_ANYOF:
 | |
|     case P_ANYBUT:
 | |
| 	while (scan < patinend) {
 | |
| #ifdef MULTIBYTE_SUPPORT
 | |
| 	    int zmb_ind;
 | |
| 	    wchar_t cr = charref(scan, patinend, &zmb_ind);
 | |
| 	    if (patglobflags & GF_MULTIBYTE) {
 | |
| 		if (mb_patmatchrange(opnd, cr, zmb_ind, NULL, NULL) ^
 | |
| 		    (P_OP(p) == P_ANYOF))
 | |
| 		    break;
 | |
| 	    } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
 | |
| 		       (P_OP(p) == P_ANYOF))
 | |
| 		break;
 | |
| #else
 | |
| 	    if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
 | |
| 		(P_OP(p) == P_ANYOF))
 | |
| 		break;
 | |
| #endif
 | |
| 	    charstart[scan-patinput] = 1;
 | |
| 	    count++;
 | |
| 	    CHARINC(scan, patinend);
 | |
| 	}
 | |
| 	break;
 | |
| #ifdef DEBUG
 | |
|     default:
 | |
| 	dputs("BUG: something very strange is happening in patrepeat");
 | |
| 	return 0;
 | |
| 	break;
 | |
| #endif
 | |
|     }
 | |
| 
 | |
|     patinput = scan;
 | |
|     return count;
 | |
| }
 | |
| 
 | |
| /* Free a patprog. */
 | |
| 
 | |
| /**/
 | |
| mod_export void
 | |
| freepatprog(Patprog prog)
 | |
| {
 | |
|     if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
 | |
| 	zfree(prog, prog->size);
 | |
| }
 | |
| 
 | |
| /* Disable or reenable a pattern character */
 | |
| 
 | |
| /**/
 | |
| int
 | |
| pat_enables(const char *cmd, char **patp, int enable)
 | |
| {
 | |
|     int ret = 0;
 | |
|     const char **stringp;
 | |
|     char *disp;
 | |
| 
 | |
|     if (!*patp) {
 | |
| 	int done = 0;
 | |
| 	for (stringp = zpc_strings, disp = zpc_disables;
 | |
| 	     stringp < zpc_strings + ZPC_COUNT;
 | |
| 	     stringp++, disp++) {
 | |
| 	    if (!*stringp)
 | |
| 		continue;
 | |
| 	    if (enable ? *disp : !*disp)
 | |
| 		continue;
 | |
| 	    if (done)
 | |
| 		putc(' ', stdout);
 | |
| 	    printf("'%s'", *stringp);
 | |
| 	    done = 1;
 | |
| 	}
 | |
| 	if (done)
 | |
| 	    putc('\n', stdout);
 | |
| 	return 0;
 | |
|     }
 | |
| 
 | |
|     for (; *patp; patp++) {
 | |
| 	for (stringp = zpc_strings, disp = zpc_disables;
 | |
| 	     stringp < zpc_strings + ZPC_COUNT;
 | |
| 	     stringp++, disp++) {
 | |
| 	    if (*stringp && !strcmp(*stringp, *patp)) {
 | |
| 		*disp = (char)!enable;
 | |
| 		break;
 | |
| 	    }
 | |
| 	}
 | |
| 	if (stringp == zpc_strings + ZPC_COUNT) {
 | |
| 	    zerrnam(cmd, "invalid pattern: %s", *patp);
 | |
| 	    ret = 1;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Save the current state of pattern disables, returning the saved value.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| unsigned int
 | |
| savepatterndisables(void)
 | |
| {
 | |
|     unsigned int disables, bit;
 | |
|     char *disp;
 | |
| 
 | |
|     disables = 0;
 | |
|     for (bit = 1, disp = zpc_disables;
 | |
| 	 disp < zpc_disables + ZPC_COUNT;
 | |
| 	 bit <<= 1, disp++) {
 | |
| 	if (*disp)
 | |
| 	    disables |= bit;
 | |
|     }
 | |
|     return disables;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Function scope saving pattern enables.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| startpatternscope(void)
 | |
| {
 | |
|     Zpc_disables_save newdis;
 | |
| 
 | |
|     newdis = (Zpc_disables_save)zalloc(sizeof(*newdis));
 | |
|     newdis->next = zpc_disables_stack;
 | |
|     newdis->disables = savepatterndisables();
 | |
| 
 | |
|     zpc_disables_stack = newdis;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Restore completely the state of pattern disables.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| restorepatterndisables(unsigned int disables)
 | |
| {
 | |
|     char *disp;
 | |
|     unsigned int bit;
 | |
| 
 | |
|     for (bit = 1, disp = zpc_disables;
 | |
| 	 disp < zpc_disables + ZPC_COUNT;
 | |
| 	 bit <<= 1, disp++) {
 | |
| 	if (disables & bit)
 | |
| 	    *disp = 1;
 | |
| 	else
 | |
| 	    *disp = 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Function scope to restore pattern enables if localpatterns is turned on.
 | |
|  */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| endpatternscope(void)
 | |
| {
 | |
|     Zpc_disables_save olddis;
 | |
| 
 | |
|     olddis = zpc_disables_stack;
 | |
|     zpc_disables_stack = olddis->next;
 | |
| 
 | |
|     if (isset(LOCALPATTERNS))
 | |
| 	restorepatterndisables(olddis->disables);
 | |
| 
 | |
|     zfree(olddis, sizeof(*olddis));
 | |
| }
 | |
| 
 | |
| /* Reinitialise pattern disables */
 | |
| 
 | |
| /**/
 | |
| void
 | |
| clearpatterndisables(void)
 | |
| {
 | |
|     memset(zpc_disables, 0, ZPC_COUNT);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Check to see if str is eligible for filename generation. */
 | |
| 
 | |
| /**/
 | |
| mod_export int
 | |
| haswilds(char *str)
 | |
| {
 | |
|     char *start;
 | |
| 
 | |
|     /* `[' and `]' are legal even if bad patterns are usually not. */
 | |
|     if ((*str == Inbrack || *str == Outbrack) && !str[1])
 | |
| 	return 0;
 | |
| 
 | |
|     /* If % is immediately followed by ?, then that ? is     *
 | |
|      * not treated as a wildcard.  This is so you don't have *
 | |
|      * to escape job references such as %?foo.               */
 | |
|     if (str[0] == '%' && str[1] == Quest)
 | |
| 	str[1] = '?';
 | |
| 
 | |
|     /*
 | |
|      * Note that at this point zpc_special has not been set up.
 | |
|      */
 | |
|     start = str;
 | |
|     for (; *str; str++) {
 | |
| 	switch (*str) {
 | |
| 	    case Inpar:
 | |
| 		if ((!isset(SHGLOB) && !zpc_disables[ZPC_INPAR]) ||
 | |
| 		    (str > start && isset(KSHGLOB) &&
 | |
| 		     ((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
 | |
| 		      (str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
 | |
| 		      (str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
 | |
| 		      (str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) ||
 | |
| 		      (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) ||
 | |
| 		      (str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Bar:
 | |
| 		if (!zpc_disables[ZPC_BAR])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Star:
 | |
| 		if (!zpc_disables[ZPC_STAR])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Inbrack:
 | |
| 		if (!zpc_disables[ZPC_INBRACK])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Inang:
 | |
| 		if (!zpc_disables[ZPC_INANG])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Quest:
 | |
| 		if (!zpc_disables[ZPC_QUEST])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Pound:
 | |
| 		if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 
 | |
| 	    case Hat:
 | |
| 		if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HAT])
 | |
| 		    return 1;
 | |
| 		break;
 | |
| 	}
 | |
|     }
 | |
|     return 0;
 | |
| }
 |