10547: (#s) and (#e) pattern assertions

2025-07-16 18:31:28 +02:00 · 2000-04-06 18:44:01 +00:00 · 2000-04-06 18:44:01 +00:00 · d2330ba055
commit d2330ba055
parent 181811bf80
7 changed files with 875 additions and 276 deletions
--- a/6
+++ b/6
@ -1,3 +1,9 @@
+2000-04-06  Peter Stephenson  <pws@pwstephenson.fsnet.co.uk>
+
+	* 10547: Doc/Zsh/expn.yo, Misc/globtests, Src/pattern.c,
+	Src/subst.c, Test/11glob.ztst, Test/ztst.zsh: add
+	(#s) and (#e) to match at start and end of string.
+
 2000-04-06  Andrew Main  <zefram@zsh.org>

 	* zefram2: Src/lex.c: Support "3&> foo" etc.
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@ -1299,6 +1299,17 @@ item(tt(a)var(num))(
 Approximate matching: var(num) errors are allowed in the string matched by
 the pattern.  The rules for this are described in the next subsection.
 )
+item(tt(s), tt(e))(
+Unlike the other flags, these have only a local effect, and each must
+appear on its own:  `tt((#s))' and `tt((#e))' are the only valid forms.
+The `tt((#s))' flag succeeds only at the start of the test string, and the
+`tt((#e))' flag succeeds only at the end of the test string; they
+correspond to `tt(^)' and `tt($)' in standard regular expressions.  They
+are useful for matching path segments in patterns.  For example,
+`tt(*((#s)|/)test((#e)|/)*)' matches a path segment `tt(test)' in any of
+the following strings: tt(test), tt(test/at/start), tt(at/end/test),
+tt(in/test/middle).
+)
 enditem()

 For example, the test string tt(fooxx) can be matched by the pattern
--- a/Misc/globtests
+++ b/Misc/globtests
@ -14,6 +14,13 @@ while read res str pat; do
    (( failed++ ))
  fi
 done <<EOT
+# a few simple things certain nameless idiots have been known to mess up
+t foo~                foo~
+t foo~                (foo~)
+t foo~                (foo~|)
+t foo.c               *.c~boo*
+f foo.c               *.c~boo*~foo*
+# closures
 t fofo                (fo#)#
 t ffo                 (fo#)#
 t foooofo             (fo#)#
@ -75,6 +82,7 @@ f mad.moo.cow   (*~*.*).(*~*.*)
 t moo.cow       (^*.*).(^*.*)
 f sane.moo.cow  (^*.*).(^*.*)
 f mucca.pazza   mu(^c#)?.pa(^z#)?
+f _foo~         _(|*[^~])
 t fff           ((^f))
 t fff           ((^f)#)
 t fff           ((^f)##)
@ -94,6 +102,8 @@ t zoox          (^z*|*x)
 t foo           (^foo)#
 f foob          (^foo)b*
 t foobb         (^foo)b*
+f foob          (*~foo)b*
+t foobb         (*~foo)b*
 f zsh           ^z*
 t a%1X          [[:alpha:][:punct:]]#[[:digit:]][^[:lower:]]
 f a%1           [[:alpha:][:punct:]]#[[:digit:]][^[:lower:]]
@ -103,5 +113,71 @@ t :]            [:]]#
 t [             [[]
 t ]             []]
 t []            [^]]]
+# Case insensitive matching
+t fooxx         (#i)FOOXX
+f fooxx         (#l)FOOXX
+t FOOXX         (#l)fooxx
+f fooxx         (#i)FOO(#I)X(#i)X
+t fooXx         (#i)FOO(#I)X(#i)X
+t fooxx         ((#i)FOOX)x
+f fooxx         ((#i)FOOX)X
+f BAR           (bar|(#i)foo)
+t FOO           (bar|(#i)foo)
+t Modules       (#i)*m*
+t fooGRUD       (#i)(bar|(#I)foo|(#i)rod)grud
+f FOOGRUD       (#i)(bar|(#I)foo|(#i)rod)grud
+t readme        (#i)readme~README|readme
+# the readme doesn't get excluded the second time...
+t readme        (#i)readme~README|readme~README
+# Ranges with backtracking
+t 633           <1-1000>33
+t 633           <-1000>33
+t 633           <1->33
+t 633           <->33
+# Approximate matching
+t READ.ME       (#ia1)readme
+f READ..ME      (#ia1)readme
+t README        (#ia1)readm
+t READM         (#ia1)readme
+t README        (#ia1)eadme
+t EADME         (#ia1)readme
+t READEM        (#ia1)readme
+f ADME          (#ia1)readme
+f README        (#ia1)read
+t bob           (#a1)[b][b]
+f bob           (#a1)[b][b]a
+t bob           (#a1)[b]o[b]a
+f bob           (#a1)[c]o[b]
+t abcd          (#a2)XbcX
+t abcd          (#a2)ad
+t ad            (#a2)abcd
+t abcd          (#a2)bd
+t bd            (#a2)abcd
+t badc          (#a2)abcd
+# This next one is a little tricky: a[d]bc[] = a[]bc[d]
+t adbc          (#a2)abcd
+f dcba          (#a2)abcd
+# the next one is [d][cb][a] = [a][bc][d] with a transposition
+t dcba          (#a3)abcd
+t aabaXaaabY    (#a1)(a#b)#Y
+t aabaXaaabY    (#a1)(a#b)(a#b)Y
+t aaXaaaaabY    (#a1)(a#b)(a#b)Y
+t aaaXaaabY     (#a1)(a##b)##Y
+t aaaXbaabY     (#a1)(a##b)##Y
+f read.me       (#ia1)README~READ.ME
+t read.me       (#ia1)README~READ_ME
+f read.me       (#ia1)README~(#a1)READ_ME
+t test          *((#s)|/)test((#e)|/)*
+t test/path     *((#s)|/)test((#e)|/)*
+t path/test     *((#s)|/)test((#e)|/)*
+t path/test/ohyes *((#s)|/)test((#e)|/)*
+f atest         *((#s)|/)test((#e)|/)*
+f testy         *((#s)|/)test((#e)|/)*
+f testy/path    *((#s)|/)test((#e)|/)*
+f path/atest    *((#s)|/)test((#e)|/)*
+f atest/path    *((#s)|/)test((#e)|/)*
+f path/testy    *((#s)|/)test((#e)|/)*
+f path/testy/ohyes *((#s)|/)test((#e)|/)*
+f path/atest/ohyes *((#s)|/)test((#e)|/)*
 EOT
 print "$failed tests failed."
--- a/Src/pattern.c
+++ b/Src/pattern.c
@ -83,6 +83,8 @@ typedef union upat *Upat;
 #define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
 #define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
 #define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
+#define P_ISSTART 0x09  /* no   Match start of string. */
+#define P_ISEND   0x0a  /* no   Match end of string. */
 /* numbered so we can test bit 5 for a branch */
 #define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
 #define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
@ -645,34 +647,44 @@ patcompbranch(int *flagp)
 	    /* Globbing flags. */
 	    char *pp1 = patparse;
 	    int oldglobflags = patglobflags;
+	    long assert;
 	    patparse += (*patparse == '@') ? 3 : 2;
-	    if (!patgetglobflags(&patparse))
-		return 0;	    
-	    if (pp1 == patstart) {
-		/* Right at start of pattern, the simplest case.
-		 * Put them into the flags and don't emit anything.
+	    if (!patgetglobflags(&patparse, &assert))
+		return 0;
+	    if (assert) {
+		/*
+		 * Start/end assertion looking like flags, but
+		 * actually handled as a normal node
 		 */
-		((Patprog)patout)->globflags = patglobflags;
-		continue;
-	    } else if (!*patparse) {
-		/* Right at the end, so just leave the flags for
-		 * the next Patprog in the chain to pick up.
-		 */
-		break;
-	    }
-	    /*
-	     * Otherwise, we have to stick them in as a pattern
-	     * matching nothing.
-	     */
-	    if (oldglobflags != patglobflags) {
-		/* Flags changed */
-		union upat up;
-		latest = patnode(P_GFLAGS);
-		up.l = patglobflags;
-		patadd((char *)&up, 0, sizeof(union upat), 0);
+		latest = patnode(assert);
+		flags = 0;
 	    } else {
-		/* No effect. */
-		continue;
+		if (pp1 == patstart) {
+		    /* Right at start of pattern, the simplest case.
+		     * Put them into the flags and don't emit anything.
+		     */
+		    ((Patprog)patout)->globflags = patglobflags;
+		    continue;
+		} else if (!*patparse) {
+		    /* Right at the end, so just leave the flags for
+		     * the next Patprog in the chain to pick up.
+		     */
+		    break;
+		}
+		/*
+		 * Otherwise, we have to stick them in as a pattern
+		 * matching nothing.
+		 */
+		if (oldglobflags != patglobflags) {
+		    /* Flags changed */
+		    union upat up;
+		    latest = patnode(P_GFLAGS);
+		    up.l = patglobflags;
+		    patadd((char *)&up, 0, sizeof(union upat), 0);
+		} else {
+		    /* No effect. */
+		    continue;
+		}
 	    }
 	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
 	    /*
@ -707,10 +719,12 @@ patcompbranch(int *flagp)

 /**/
 int
-patgetglobflags(char **strp)
+patgetglobflags(char **strp, long *assertp)
 {
    char *nptr, *ptr = *strp;
    zlong ret;
+
+    *assertp = 0;
    /* (#X): assumes we are still positioned on the first X */
    for (; *ptr && *ptr != Outpar; ptr++) {
 	switch (*ptr) {
@ -763,12 +777,23 @@ patgetglobflags(char **strp)
 	    patglobflags &= ~GF_MATCHREF;
 	    break;

+	case 's':
+	    *assertp = P_ISSTART;
+	    break;
+
+	case 'e':
+	    *assertp = P_ISEND;
+	    break;
+
 	default:
 	    return 0;
 	}
    }
    if (*ptr != Outpar)
 	return 0;
+    /* Start/end assertions must appear on their own. */
+    if (*assertp && (*strp)[1] != Outpar)
+	return 0;
    *strp = ptr + 1;
    return 1;
 }
@ -1989,6 +2014,14 @@ patmatch(Upat prog)
 	     * anything here.
 	     */
 	    return 0;
+	case P_ISSTART:
+	    if (patinput != patinstart)
+		fail = 1;
+	    break;
+	case P_ISEND:
+	    if (*patinput)
+		fail = 1;
+	    break;
 	case P_END:
 	    if (!(fail = (*patinput && !(patflags & PAT_NOANCH))))
 		return 1;
@ -2387,6 +2420,12 @@ patprop(Upat op)
    case P_GFLAGS:
 	p = "GFLAGS";
 	break;
+    case P_ISSTART:
+	p = "ISSTART";
+	break;
+    case P_ISEND:
+	p = "ISEND";
+	break;
    case P_NOTHING:
 	p = "NOTHING";
 	break;
--- a/Src/subst.c
+++ b/Src/subst.c
--- a/Test/11glob.ztst
+++ b/Test/11glob.ztst
@ -162,6 +162,18 @@
 >1:  [[ read.me = (#ia1)README~READ.ME ]]
 >0:  [[ read.me = (#ia1)README~READ_ME ]]
 >1:  [[ read.me = (#ia1)README~(#a1)READ_ME ]]
+>0:  [[ test = *((#s)|/)test((#e)|/)* ]]
+>0:  [[ test/path = *((#s)|/)test((#e)|/)* ]]
+>0:  [[ path/test = *((#s)|/)test((#e)|/)* ]]
+>0:  [[ path/test/ohyes = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ atest = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ testy = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ testy/path = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/atest = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ atest/path = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/testy = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]]
 >0 tests failed.

  globtest globtests.ksh
--- a/Test/ztst.zsh
+++ b/Test/ztst.zsh
@ -14,16 +14,21 @@

 # Produce verbose messages if non-zero.
 # If 1, produce reports of tests executed; if 2, also report on progress.
-ZTST_verbose=0
+# Defined in such a way that any value from the environment is used.
+: ${ZTST_verbose:=0}

 # We require all options to be reset, not just emulation options.
 # Unfortunately, due to the crud which may be in /etc/zshenv this might
 # still not be good enough.  Maybe we should trick it somehow.
 emulate -R zsh

+# Set the module load path to correspond to this build of zsh.
+# This Modules directory should have been created by "make check".
+[[ -d Modules/zsh ]] && module_path=( $PWD/Modules )
+
 # We need to be able to save and restore the options used in the test.
 # We use the $options variable of the parameter module for this.
-zmodload -i parameter
+zmodload -i zsh/parameter

 # Note that both the following are regular arrays, since we only use them
 # in whole array assignments to/from $options.
@ -42,18 +47,31 @@ ZTST_mainopts=(${(kv)options})
 ZTST_testdir=$PWD
 ZTST_testname=$1

+# The source directory is not necessarily the current directory,
+# but if $0 doesn't contain a `/' assume it is.
+if [[ $0 = */* ]]; then
+  ZTST_srcdir=${0%/*}
+else
+  ZTST_srcdir=$PWD
+fi
+[[ $ZTST_srcdir = /* ]] || ZTST_srcdir="$ZTST_testdir/$ZTST_srcdir"
+
+# Set the function autoload paths to correspond to this build of zsh.
+fpath=( $ZTST_srcdir/../(Completion|Functions)/*~*/CVS(/) )
+
+: ${TMPPREFIX:=/tmp/zsh}
 # Temporary files for redirection inside tests.
-ZTST_in=${TMPPREFIX-:/tmp/zsh}.ztst.in.$$
+ZTST_in=${TMPPREFIX}.ztst.in.$$
 # hold the expected output
-ZTST_out=${TMPPREFIX-:/tmp/zsh}.ztst.out.$$
-ZTST_err=${TMPPREFIX-:/tmp/zsh}.ztst.err.$$
+ZTST_out=${TMPPREFIX}.ztst.out.$$
+ZTST_err=${TMPPREFIX}.ztst.err.$$
 # hold the actual output from the test
-ZTST_tout=${TMPPREFIX-:/tmp/zsh}.ztst.tout.$$
-ZTST_terr=${TMPPREFIX-:/tmp/zsh}.ztst.terr.$$
+ZTST_tout=${TMPPREFIX}.ztst.tout.$$
+ZTST_terr=${TMPPREFIX}.ztst.terr.$$

 ZTST_cleanup() {
-  rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp \
-         $ZTST_in $ZTST_out $ZTST_err $ZTST_tout $ZTST_terr
+  cd $ZTST_testdir
+  rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp ${TMPPREFIX}.ztst*$$
 }

 # This cleanup always gets performed, even if we abort.  Later,
@ -67,10 +85,11 @@ rm -rf dummy.tmp *.tmp
 # Report failure.  Note that all output regarding the tests goes to stdout.
 # That saves an unpleasant mixture of stdout and stderr to sort out.
 ZTST_testfailed() {
-  print "Test $ZTST_testname failed: $1"
+  print -r "Test $ZTST_testname failed: $1"
  if [[ -n $ZTST_message ]]; then
-    print "Was testing: $ZTST_message"
+    print -r "Was testing: $ZTST_message"
  fi
+  print -r "$ZTST_testname: test failed."
  ZTST_cleanup
  exit 1
 }
@ -79,7 +98,7 @@ ZTST_testfailed() {
 ZTST_verbose() {
  local lev=$1
  shift
-  [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print $* >&8
+  [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -- $* >&8
 }

 [[ ! -r $ZTST_testname ]] && ZTST_testfailed "can't read test file."
@ -97,7 +116,7 @@ ZTST_cursect=''
 ZTST_getline() {
  local IFS=
  while true; do
-    read ZTST_curline <&9 || return 1
+    read -r ZTST_curline <&9 || return 1
    [[ $ZTST_curline == \#* ]] || return 0
  done
 }
@ -144,7 +163,7 @@ $ZTST_code"

 # Read in a piece for redirection.
 ZTST_getredir() {
-  local char=${ZTST_curline[1]}
+  local char=${ZTST_curline[1]} fn
  ZTST_redir=${ZTST_curline[2,-1]}
  while ZTST_getline; do
    [[ $ZTST_curline[1] = $char ]] || break
@ -153,6 +172,22 @@ ${ZTST_curline[2,-1]}"
  done
  ZTST_verbose 2 "ZTST_getredir: read redir for '$char':
 $ZTST_redir"
+
+case $char in
+  '<') fn=$ZTST_in
+       ;;
+  '>') fn=$ZTST_out
+       ;;
+  '?') fn=$ZTST_err
+       ;;
+   *)  ZTST_testfailed "bad redir operator: $char"
+       ;;
+esac
+if [[ $ZTST_flags = *q* ]]; then
+  print -r -- "${(e)ZTST_redir}" >>$fn
+else
+  print -r -- "$ZTST_redir" >>$fn
+fi
 }

 # Execute an indented chunk.  Redirections will already have
@ -209,27 +244,24 @@ $ZTST_curline"
 	    fi
 	    ;;
 	[[:space:]]##[^[:space:]]*) ZTST_getchunk
-	  [[ $ZTST_curline != [-0-9]* ]] &&
-	  ZTST_testfailed "expecting test status at:
-$ZTST_curline"
-          ZTST_xstatus=$ZTST_curline
-	  if [[ $ZTST_curline == (#b)([^:]##):(*) ]]; then
+	  if [[ $ZTST_curline == (#b)([-0-9]##)([[:alpha:]]#)(:*)# ]]; then
 	    ZTST_xstatus=$match[1]
-	    ZTST_message=$match[2]
+	    ZTST_flags=$match[2]
+	    ZTST_message=${match[3]:+${match[3][2,-1]}}
+	  else
+	    ZTST_testfailed "expecting test status at:
+$ZTST_curline"
 	  fi
 	  ZTST_getline
 	  found=1
 	  ;;
 	'<'*) ZTST_getredir
-	  print -r "${(e)ZTST_redir}" >>$ZTST_in
 	  found=1
 	  ;;
 	'>'*) ZTST_getredir
-          print -r "${(e)ZTST_redir}" >>$ZTST_out
 	  found=1
 	  ;;
 	'?'*) ZTST_getredir
-	  print -r "${(e)ZTST_redir}" >>$ZTST_err
 	  found=1
 	  ;;
 	*) ZTST_testfailed "bad line in test block:
@ -240,8 +272,7 @@ $ZTST_curline"

    # If we found some code to execute...
    if [[ -n $ZTST_code ]]; then
-      ZTST_verbose 1 "Running test:
-$ZTST_message"
+      ZTST_verbose 1 "Running test: $ZTST_message"
      ZTST_verbose 2 "ZTST_test: expecting status: $ZTST_xstatus"

      ZTST_execchunk <$ZTST_in >$ZTST_tout 2>$ZTST_terr
@ -249,7 +280,9 @@ $ZTST_message"
      # First check we got the right status, if specified.
      if [[ $ZTST_xstatus != - && $ZTST_xstatus != $ZTST_status ]]; then
 	ZTST_testfailed "bad status $ZTST_status, expected $ZTST_xstatus from:
-$ZTST_code"
+$ZTST_code${$(<$ZTST_terr):+
+Error output:
+$(<$ZTST_terr)}"
      fi

      ZTST_verbose 2 "ZTST_test: test produced standard output:
@ -258,11 +291,13 @@ ZTST_test: and standard error:
 $(<$ZTST_terr)"

      # Now check output and error.
-      if ! diff -c $ZTST_out $ZTST_tout; then
+      if [[ $ZTST_flags != *d* ]] && ! diff -c $ZTST_out $ZTST_tout; then
 	ZTST_testfailed "output differs from expected as shown above for:
-$ZTST_code"
+$ZTST_code${$(<$ZTST_terr):+
+Error output:
+$(<$ZTST_terr)}"
      fi
-      if ! diff -c $ZTST_err $ZTST_terr; then
+      if [[ $ZTST_flags != *D* ]] && ! diff -c $ZTST_err $ZTST_terr; then
 	ZTST_testfailed "error output differs from expected as shown above for:
 $ZTST_code"
      fi