mirror of
				git://git.code.sf.net/p/zsh/code
				synced 2025-10-26 16:40:29 +01:00 
			
		
		
		
	add (#cN,M) globbing flag
This commit is contained in:
		
							parent
							
								
									60a05cf85a
								
							
						
					
					
						commit
						a81e5a609e
					
				
					 5 changed files with 178 additions and 8 deletions
				
			
		|  | @ -1,3 +1,9 @@ | |||
| 2007-07-27  Peter Stephenson  <p.w.stephenson@ntlworld.com> | ||||
| 
 | ||||
| 	* 23713: Doc/Zsh/expn.yo, Misc/globtests, Src/pattern.c, | ||||
| 	Test/D02glob.ztst: add (#cN,M) globbing flag to work like | ||||
| 	{N,M} in regular expressions. | ||||
| 
 | ||||
| 2007-07-27  Clint Adams  <clint@zsh.org> | ||||
| 
 | ||||
| 	* 23712: Completion/Unix/Command/_dvi: handle dvips -j. | ||||
|  |  | |||
|  | @ -1636,6 +1636,18 @@ item(tt(B))( | |||
| Deactivate backreferences, negating the effect of the tt(b) flag from that | ||||
| point on. | ||||
| ) | ||||
| item(tt(c)var(N)tt(,)var(M))( | ||||
| The flag tt(LPAR()#c)var(N)tt(,)var(M)tt(RPAR()) can be used anywhere | ||||
| that the tt(#) or tt(##) operators can be used; it cannot be combined | ||||
| with other globbing flags and a bad pattern error occurs if it is | ||||
| misplaced.  It is equivalent to the form tt({)var(N)tt(,)var(M)tt(}) in | ||||
| regular expressions.  The previous character or group is required to | ||||
| match between var(N) and var(M) times, inclusive.  The form | ||||
| tt(LPAR()#c)var(N)tt(RPAR()) requires exactly tt(N) matches; | ||||
| tt(LPAR()#c,)var(M)tt(RPAR()) is equivalent to specifying var(N) as 0; | ||||
| tt(LPAR()#c)var(N)tt(,RPAR()) specifies that there is no maximum limit | ||||
| on the number of matches. | ||||
| ) | ||||
| item(tt(m))( | ||||
| Set references to the match data for the entire string matched; this is | ||||
| similar to backreferencing and does not work in filename generation.  The | ||||
|  |  | |||
|  | @ -182,5 +182,15 @@ f atest/path    *((#s)|/)test((#e)|/)* | |||
| f path/testy    *((#s)|/)test((#e)|/)* | ||||
| f path/testy/ohyes *((#s)|/)test((#e)|/)* | ||||
| f path/atest/ohyes *((#s)|/)test((#e)|/)* | ||||
| t XabcdabcY        X(ab|c|d)(#c5)Y | ||||
| t XabcdabcY        X(ab|c|d)(#c1,5)Y | ||||
| t XabcdabcY        X(ab|c|d)(#c5,8)Y | ||||
| t XabcdabcY        X(ab|c|d)(#c4,)Y | ||||
| f XabcdabcY        X(ab|c|d)(#c6,)Y | ||||
| f XabcdabcY        X(ab|c|d)(#c1,4)Y | ||||
| t ZX               Z(|)(#c1)X | ||||
| t froofroo         (fro(#c2))(#c2) | ||||
| f froofroofroo     (fro(#c2))(#c2) | ||||
| f froofro          (fro(#c2))(#c2) | ||||
| EOT | ||||
| print "$failed tests failed." | ||||
|  |  | |||
							
								
								
									
										144
									
								
								Src/pattern.c
									
										
									
									
									
								
							
							
						
						
									
										144
									
								
								Src/pattern.c
									
										
									
									
									
								
							|  | @ -105,6 +105,8 @@ typedef union upat *Upat; | |||
| #define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */ | ||||
| #define P_ISSTART 0x09  /* no   Match start of string. */ | ||||
| #define P_ISEND   0x0a  /* no   Match end of string. */ | ||||
| #define P_COUNTSTART 0x0b /* no Initialise P_COUNT */ | ||||
| #define P_COUNT   0x0c  /* 3*long uc* node Match a number of repetitions */ | ||||
| /* numbered so we can test bit 5 for a branch */ | ||||
| #define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */ | ||||
| #define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */ | ||||
|  | @ -179,6 +181,17 @@ typedef union upat *Upat; | |||
|  *   for top level file globs, e.g. ** / *.c~*foo.c | ||||
|  *                                    ^ I had to leave this space | ||||
|  * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long. | ||||
|  * | ||||
|  * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified | ||||
|  * closure (#cN,M) and is used to initialise the count.  Executing | ||||
|  * the pattern leads back to the P_COUNT, while the next links of the | ||||
|  * P_COUNTSTART and P_COUNT lead to the tail of the pattern: | ||||
|  * | ||||
|  *	       	        ---------------- | ||||
|  *     	       	       v       	        ^ | ||||
|  *        <COUNTSTART><COUNT>pattern<BACK> tail | ||||
|  *	     	    v      v  	  	    ^ | ||||
|  *	            ------------------------ | ||||
|  */ | ||||
| #define PP_ALPHA  1 | ||||
| #define PP_ALNUM  2 | ||||
|  | @ -211,6 +224,13 @@ typedef union upat *Upat; | |||
| #define P_LS_LEN(p)	((p)[1].l) /* can be used as lvalue */ | ||||
| #define P_LS_STR(p)	((char *)((p) + 2)) | ||||
| 
 | ||||
| /* Specific to P_COUNT: arguments as offset in nodes from operator */ | ||||
| #define P_CT_CURRENT	(1)	/* Current count */ | ||||
| #define P_CT_MIN	(2)     /* Minimum count */ | ||||
| #define P_CT_MAX	(3)	/* Maximum count, -1 for none */ | ||||
| #define P_CT_PTR	(4)	/* Pointer to last match start */ | ||||
| #define P_CT_OPERAND	(5)	/* Operand of P_COUNT */ | ||||
| 
 | ||||
| /* Flags needed when pattern is executed */ | ||||
| #define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */ | ||||
| #define P_HSTART        0x02	/* Starts with # or ##'d pattern. */ | ||||
|  | @ -766,7 +786,7 @@ patcompswitch(int paren, int *flagp) | |||
| 		     * no top-level |'s. | ||||
| 		     * | ||||
| 		     * No gfchanged, as nothing to follow branch at top | ||||
| 		     * level.  | ||||
| 		     * level. | ||||
| 		     */ | ||||
| 		    union upat up; | ||||
| 		    gfnode = patnode(P_GFLAGS); | ||||
|  | @ -948,7 +968,7 @@ patgetglobflags(char **strp, long *assertp, int *ignore) | |||
|     *ignore = 1; | ||||
|     /* (#X): assumes we are still positioned on the first X */ | ||||
|     for (; *ptr && *ptr != Outpar; ptr++) { | ||||
| 	if (*ptr == 'q') {  | ||||
| 	if (*ptr == 'q') { | ||||
| 	    /* Glob qualifiers, ignored in pattern code */ | ||||
| 	    while (*ptr && *ptr != Outpar) | ||||
| 		ptr++; | ||||
|  | @ -1044,8 +1064,9 @@ patgetglobflags(char **strp, long *assertp, int *ignore) | |||
| static long | ||||
| patcomppiece(int *flagp) | ||||
| { | ||||
|     long starter = 0, next, pound, op; | ||||
|     long starter = 0, next, op, opnd; | ||||
|     int flags, flags2, kshchar, len, ch, patch, nmeta; | ||||
|     int pound, count; | ||||
|     union upat up; | ||||
|     char *nptr, *str0, *ptr, *patprev; | ||||
|     zrange_t from, to; | ||||
|  | @ -1098,10 +1119,13 @@ patcomppiece(int *flagp) | |||
| 	/* more than one character matched? */ | ||||
| 	morelen = (patprev > str0); | ||||
| 	/*
 | ||||
| 	 * If we have more than one character, a following hash only | ||||
| 	 * applies to the last, so backtrack one character. | ||||
| 	 * If we have more than one character, a following hash | ||||
| 	 * or (#c...) only applies to the last, so backtrack one character. | ||||
| 	 */ | ||||
| 	if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen) | ||||
| 	if (isset(EXTENDEDGLOB) && | ||||
| 	    (*patparse == Pound || | ||||
| 	     (*patparse == Inpar && patparse[1] == Pound && | ||||
| 	      patparse[2] == 'c')) && morelen) | ||||
| 	    patparse = patprev; | ||||
| 	/*
 | ||||
| 	 * If len is 1, we can't have an active # following, so doesn't | ||||
|  | @ -1345,7 +1369,10 @@ patcomppiece(int *flagp) | |||
| 	} | ||||
|     } | ||||
| 
 | ||||
|     count = 0; | ||||
|     if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) && | ||||
| 	!(count = (isset(EXTENDEDGLOB) && *patparse == Inpar && | ||||
| 		   patparse[1] == Pound && patparse[2] == 'c')) && | ||||
| 	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) { | ||||
| 	*flagp = flags; | ||||
| 	return starter; | ||||
|  | @ -1364,6 +1391,10 @@ patcomppiece(int *flagp) | |||
|     } else if (kshchar == '?') { | ||||
| 	op = 0; | ||||
| 	*flagp = 0; | ||||
|     } else if (count) { | ||||
| 	op = P_COUNT; | ||||
| 	patparse += 3; | ||||
| 	*flagp = P_HSTART; | ||||
|     } else if (*++patparse == Pound) { | ||||
| 	op = P_TWOHASH; | ||||
| 	patparse++; | ||||
|  | @ -1383,7 +1414,56 @@ patcomppiece(int *flagp) | |||
|      * each time we fail on a non-empty branch, we try the empty branch, | ||||
|      * which is equivalent to backtracking. | ||||
|      */ | ||||
|     if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && | ||||
|     if (op == P_COUNT) { | ||||
| 	/* (#cN,M) */ | ||||
| 	union upat countargs[P_CT_OPERAND]; | ||||
| 	char *opp = patparse; | ||||
| 
 | ||||
| 	countargs[0].l = P_COUNT; | ||||
| 	countargs[P_CT_CURRENT].l = 0L; | ||||
| 	countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10); | ||||
| 	if (patparse == opp) { | ||||
| 	    /* missing number treated as zero */ | ||||
| 	    countargs[P_CT_MIN].l = 0L; | ||||
| 	} | ||||
| 	if (*patparse != ',' && *patparse != Comma) { | ||||
| 	    /* either max = min or error */ | ||||
| 	    if (*patparse != Outpar) | ||||
| 		return 0; | ||||
| 	    countargs[P_CT_MAX].l = countargs[P_CT_MIN].l; | ||||
| 	} else { | ||||
| 	    opp = ++patparse; | ||||
| 	    countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10); | ||||
| 	    if (*patparse != Outpar) | ||||
| 		return 0; | ||||
| 	    if (patparse == opp) { | ||||
| 		/* missing number treated as infinity: record as -1 */ | ||||
| 		countargs[P_CT_MAX].l = -1L; | ||||
| 	    } | ||||
| 	} | ||||
| 	patparse++; | ||||
| 	countargs[P_CT_PTR].p = NULL; | ||||
| 	/* Mark this chain as a min/max count... */ | ||||
| 	patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs)); | ||||
| 	/*
 | ||||
| 	 * The next of the operand is a loop back to the P_COUNT.  This is | ||||
| 	 * how we get recursion for the count.  We don't loop back to | ||||
| 	 * the P_COUNTSTART; that's used for initialising the count | ||||
| 	 * and saving and restoring the count for any enclosing use | ||||
| 	 * of the match. | ||||
| 	 */ | ||||
| 	opnd = P_OPERAND(starter) + P_CT_OPERAND; | ||||
| 	pattail(opnd, patnode(P_BACK)); | ||||
| 	pattail(opnd, P_OPERAND(starter)); | ||||
| 	/*
 | ||||
| 	 * The next of the counter operators is what follows the | ||||
| 	 * closure. | ||||
| 	 * This handles matching of the tail. | ||||
| 	 */ | ||||
| 	next = patnode(P_NOTHING); | ||||
| 	pattail(starter, next); | ||||
| 	pattail(P_OPERAND(starter), next); | ||||
|     } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) && | ||||
| 	P_OP((Upat)patout+starter) == P_ANY) { | ||||
| 	/* Optimize ?# to *.  Silly thing to do, since who would use
 | ||||
| 	 * use ?# ? But it makes the later code shorter. | ||||
|  | @ -1397,7 +1477,7 @@ patcomppiece(int *flagp) | |||
| 	    uptr->l = (uptr->l & ~0xff) | P_STAR; | ||||
| 	} | ||||
|     } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) { | ||||
| 	/* Don't simplify if we need to look for approximations. */ | ||||
| 	/* Simplify, but not if we need to look for approximations. */ | ||||
| 	patinsert(op, starter, NULL, 0); | ||||
|     } else if (op == P_ONEHASH) { | ||||
| 	/* Emit x# as (x&|), where & means "self". */ | ||||
|  | @ -2830,6 +2910,54 @@ patmatch(Upat prog) | |||
| 	    if (patinput < patinend || (patflags & PAT_NOTEND)) | ||||
| 		fail = 1; | ||||
| 	    break; | ||||
| 	case P_COUNTSTART: | ||||
| 	    { | ||||
| 		/*
 | ||||
| 		 * Save and restore the current count and the | ||||
| 		 * start pointer in case the pattern has been | ||||
| 		 * executed by a previous repetition of a | ||||
| 		 * closure. | ||||
| 		 */ | ||||
| 		long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l; | ||||
| 		long savecount = *curptr; | ||||
| 		unsigned char *saveptr = scan[P_CT_PTR].p; | ||||
| 		int ret; | ||||
| 
 | ||||
| 		*curptr = 0L; | ||||
| 		ret = patmatch(P_OPERAND(scan)); | ||||
| 		*curptr = savecount; | ||||
| 		scan[P_CT_PTR].p = saveptr; | ||||
| 		return ret; | ||||
| 	    } | ||||
| 	case P_COUNT: | ||||
| 	    { | ||||
| 		/* (#cN,M): execution is relatively straightforward */ | ||||
| 		long cur = scan[P_CT_CURRENT].l; | ||||
| 		long min = scan[P_CT_MIN].l; | ||||
| 		long max = scan[P_CT_MAX].l; | ||||
| 
 | ||||
| 		if (cur && cur >= min && | ||||
| 		    (unsigned char *)patinput == scan[P_CT_PTR].p) { | ||||
| 		    /*
 | ||||
| 		     * Not at the first attempt to match so | ||||
| 		     * the previous attempt managed zero length. | ||||
| 		     * We can do this indefinitely so there's | ||||
| 		     * no point in going on.  Simply try to | ||||
| 		     * match the remainder of the pattern. | ||||
| 		     */ | ||||
| 		    return patmatch(next); | ||||
| 		} | ||||
| 		scan[P_CT_PTR].p = (unsigned char *)patinput; | ||||
| 
 | ||||
| 		if (max < 0 || cur < max) { | ||||
| 		    scan[P_CT_CURRENT].l = cur + 1; | ||||
| 		    if (patmatch(scan + P_CT_OPERAND)) | ||||
| 			return 1; | ||||
| 		} | ||||
| 		if (cur < min) | ||||
| 		    return 0; | ||||
| 		return patmatch(next); | ||||
| 	    } | ||||
| 	case P_END: | ||||
| 	    if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH)))) | ||||
| 		return 1; | ||||
|  |  | |||
|  | @ -177,6 +177,16 @@ | |||
| >1:  [[ path/testy = *((#s)|/)test((#e)|/)* ]] | ||||
| >1:  [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]] | ||||
| >1:  [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]] | ||||
| >0:  [[ XabcdabcY = X(ab|c|d)(#c5)Y ]] | ||||
| >0:  [[ XabcdabcY = X(ab|c|d)(#c1,5)Y ]] | ||||
| >0:  [[ XabcdabcY = X(ab|c|d)(#c5,8)Y ]] | ||||
| >0:  [[ XabcdabcY = X(ab|c|d)(#c4,)Y ]] | ||||
| >1:  [[ XabcdabcY = X(ab|c|d)(#c6,)Y ]] | ||||
| >1:  [[ XabcdabcY = X(ab|c|d)(#c1,4)Y ]] | ||||
| >0:  [[ ZX = Z(|)(#c1)X ]] | ||||
| >0:  [[ froofroo = (fro(#c2))(#c2) ]] | ||||
| >1:  [[ froofroofroo = (fro(#c2))(#c2) ]] | ||||
| >1:  [[ froofro = (fro(#c2))(#c2) ]] | ||||
| >0 tests failed. | ||||
| 
 | ||||
|   globtest globtests.ksh | ||||
|  | @ -357,3 +367,7 @@ | |||
| >  [[:IFSSPACE:]] yes | ||||
| >/ [[:WORD:]] no | ||||
| >/ [[:WORD:]] yes | ||||
| 
 | ||||
|  [[ foo = (#c0)foo ]] | ||||
| 1:Misplaced (#c...) flag | ||||
| ?(eval):1: bad pattern: (#c0)foo | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue