10547: (#s) and (#e) pattern assertions

2025-09-11 13:01:28 +02:00 · 2000-04-06 18:44:01 +00:00 · 2000-04-06 18:44:01 +00:00 · d2330ba055
commit d2330ba055
parent 181811bf80
7 changed files with 875 additions and 276 deletions
--- a/6
+++ b/6
@ -1,3 +1,9 @@
 2000-04-06  Peter Stephenson  <pws@pwstephenson.fsnet.co.uk>
 	* 10547: Doc/Zsh/expn.yo, Misc/globtests, Src/pattern.c,
 	Src/subst.c, Test/11glob.ztst, Test/ztst.zsh: add
 	(#s) and (#e) to match at start and end of string.
 2000-04-06  Andrew Main  <zefram@zsh.org>
 	* zefram2: Src/lex.c: Support "3&> foo" etc.
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@ -1299,6 +1299,17 @@ item(tt(a)var(num))(
 Approximate matching: var(num) errors are allowed in the string matched by
 the pattern.  The rules for this are described in the next subsection.
 )
 item(tt(s), tt(e))(
 Unlike the other flags, these have only a local effect, and each must
 appear on its own:  `tt((#s))' and `tt((#e))' are the only valid forms.
 The `tt((#s))' flag succeeds only at the start of the test string, and the
 `tt((#e))' flag succeeds only at the end of the test string; they
 correspond to `tt(^)' and `tt($)' in standard regular expressions.  They
 are useful for matching path segments in patterns.  For example,
 `tt(*((#s)|/)test((#e)|/)*)' matches a path segment `tt(test)' in any of
 the following strings: tt(test), tt(test/at/start), tt(at/end/test),
 tt(in/test/middle).
 )
 enditem()
 For example, the test string tt(fooxx) can be matched by the pattern
--- a/Misc/globtests
+++ b/Misc/globtests
@ -14,6 +14,13 @@ while read res str pat; do
    (( failed++ ))
  fi
 done <<EOT
 # a few simple things certain nameless idiots have been known to mess up
 t foo~                foo~
 t foo~                (foo~)
 t foo~                (foo~|)
 t foo.c               *.c~boo*
 f foo.c               *.c~boo*~foo*
 # closures
 t fofo                (fo#)#
 t ffo                 (fo#)#
 t foooofo             (fo#)#
@ -75,6 +82,7 @@ f mad.moo.cow   (*~*.*).(*~*.*)
 t moo.cow       (^*.*).(^*.*)
 f sane.moo.cow  (^*.*).(^*.*)
 f mucca.pazza   mu(^c#)?.pa(^z#)?
 f _foo~         _(|*[^~])
 t fff           ((^f))
 t fff           ((^f)#)
 t fff           ((^f)##)
@ -94,6 +102,8 @@ t zoox          (^z*|*x)
 t foo           (^foo)#
 f foob          (^foo)b*
 t foobb         (^foo)b*
 f foob          (*~foo)b*
 t foobb         (*~foo)b*
 f zsh           ^z*
 t a%1X          [[:alpha:][:punct:]]#[[:digit:]][^[:lower:]]
 f a%1           [[:alpha:][:punct:]]#[[:digit:]][^[:lower:]]
@ -103,5 +113,71 @@ t :]            [:]]#
 t [             [[]
 t ]             []]
 t []            [^]]]
 # Case insensitive matching
 t fooxx         (#i)FOOXX
 f fooxx         (#l)FOOXX
 t FOOXX         (#l)fooxx
 f fooxx         (#i)FOO(#I)X(#i)X
 t fooXx         (#i)FOO(#I)X(#i)X
 t fooxx         ((#i)FOOX)x
 f fooxx         ((#i)FOOX)X
 f BAR           (bar|(#i)foo)
 t FOO           (bar|(#i)foo)
 t Modules       (#i)*m*
 t fooGRUD       (#i)(bar|(#I)foo|(#i)rod)grud
 f FOOGRUD       (#i)(bar|(#I)foo|(#i)rod)grud
 t readme        (#i)readme~README|readme
 # the readme doesn't get excluded the second time...
 t readme        (#i)readme~README|readme~README
 # Ranges with backtracking
 t 633           <1-1000>33
 t 633           <-1000>33
 t 633           <1->33
 t 633           <->33
 # Approximate matching
 t READ.ME       (#ia1)readme
 f READ..ME      (#ia1)readme
 t README        (#ia1)readm
 t READM         (#ia1)readme
 t README        (#ia1)eadme
 t EADME         (#ia1)readme
 t READEM        (#ia1)readme
 f ADME          (#ia1)readme
 f README        (#ia1)read
 t bob           (#a1)[b][b]
 f bob           (#a1)[b][b]a
 t bob           (#a1)[b]o[b]a
 f bob           (#a1)[c]o[b]
 t abcd          (#a2)XbcX
 t abcd          (#a2)ad
 t ad            (#a2)abcd
 t abcd          (#a2)bd
 t bd            (#a2)abcd
 t badc          (#a2)abcd
 # This next one is a little tricky: a[d]bc[] = a[]bc[d]
 t adbc          (#a2)abcd
 f dcba          (#a2)abcd
 # the next one is [d][cb][a] = [a][bc][d] with a transposition
 t dcba          (#a3)abcd
 t aabaXaaabY    (#a1)(a#b)#Y
 t aabaXaaabY    (#a1)(a#b)(a#b)Y
 t aaXaaaaabY    (#a1)(a#b)(a#b)Y
 t aaaXaaabY     (#a1)(a##b)##Y
 t aaaXbaabY     (#a1)(a##b)##Y
 f read.me       (#ia1)README~READ.ME
 t read.me       (#ia1)README~READ_ME
 f read.me       (#ia1)README~(#a1)READ_ME
 t test          *((#s)|/)test((#e)|/)*
 t test/path     *((#s)|/)test((#e)|/)*
 t path/test     *((#s)|/)test((#e)|/)*
 t path/test/ohyes *((#s)|/)test((#e)|/)*
 f atest         *((#s)|/)test((#e)|/)*
 f testy         *((#s)|/)test((#e)|/)*
 f testy/path    *((#s)|/)test((#e)|/)*
 f path/atest    *((#s)|/)test((#e)|/)*
 f atest/path    *((#s)|/)test((#e)|/)*
 f path/testy    *((#s)|/)test((#e)|/)*
 f path/testy/ohyes *((#s)|/)test((#e)|/)*
 f path/atest/ohyes *((#s)|/)test((#e)|/)*
 EOT
 print "$failed tests failed."
--- a/Src/pattern.c
+++ b/Src/pattern.c
@ -83,6 +83,8 @@ typedef union upat *Upat;
 #define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
 #define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
 #define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
 #define P_ISSTART 0x09  /* no   Match start of string. */
 #define P_ISEND   0x0a  /* no   Match end of string. */
 /* numbered so we can test bit 5 for a branch */
 #define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
 #define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
@ -645,9 +647,18 @@ patcompbranch(int *flagp)
 	    /* Globbing flags. */
 	    char *pp1 = patparse;
 	    int oldglobflags = patglobflags;
 	    long assert;
 	    patparse += (*patparse == '@') ? 3 : 2;
-	    if (!patgetglobflags(&patparse))
+	    if (!patgetglobflags(&patparse, &assert))
 		return 0;
 	    if (assert) {
 		/*
 		 * Start/end assertion looking like flags, but
 		 * actually handled as a normal node
 		 */
 		latest = patnode(assert);
 		flags = 0;
 	    } else {
 		if (pp1 == patstart) {
 		    /* Right at start of pattern, the simplest case.
 		     * Put them into the flags and don't emit anything.
@ -674,6 +685,7 @@ patcompbranch(int *flagp)
 		    /* No effect. */
 		    continue;
 		}
 	    }
 	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
 	    /*
 	     * ^pat:  anything but pat.  For proper backtracking,
@ -707,10 +719,12 @@ patcompbranch(int *flagp)
 /**/
 int
-patgetglobflags(char **strp)
+patgetglobflags(char **strp, long *assertp)
 {
    char *nptr, *ptr = *strp;
    zlong ret;
    *assertp = 0;
    /* (#X): assumes we are still positioned on the first X */
    for (; *ptr && *ptr != Outpar; ptr++) {
 	switch (*ptr) {
@ -763,12 +777,23 @@ patgetglobflags(char **strp)
 	    patglobflags &= ~GF_MATCHREF;
 	    break;
 	case 's':
 	    *assertp = P_ISSTART;
 	    break;
 	case 'e':
 	    *assertp = P_ISEND;
 	    break;
 	default:
 	    return 0;
 	}
    }
    if (*ptr != Outpar)
 	return 0;
    /* Start/end assertions must appear on their own. */
    if (*assertp && (*strp)[1] != Outpar)
 	return 0;
    *strp = ptr + 1;
    return 1;
 }
@ -1989,6 +2014,14 @@ patmatch(Upat prog)
 	     * anything here.
 	     */
 	    return 0;
 	case P_ISSTART:
 	    if (patinput != patinstart)
 		fail = 1;
 	    break;
 	case P_ISEND:
 	    if (*patinput)
 		fail = 1;
 	    break;
 	case P_END:
 	    if (!(fail = (*patinput && !(patflags & PAT_NOANCH))))
 		return 1;
@ -2387,6 +2420,12 @@ patprop(Upat op)
    case P_GFLAGS:
 	p = "GFLAGS";
 	break;
    case P_ISSTART:
 	p = "ISSTART";
 	break;
    case P_ISEND:
 	p = "ISEND";
 	break;
    case P_NOTHING:
 	p = "NOTHING";
 	break;
--- a/Src/subst.c
+++ b/Src/subst.c
--- a/Test/11glob.ztst
+++ b/Test/11glob.ztst
@ -162,6 +162,18 @@
 >1:  [[ read.me = (#ia1)README~READ.ME ]]
 >0:  [[ read.me = (#ia1)README~READ_ME ]]
 >1:  [[ read.me = (#ia1)README~(#a1)READ_ME ]]
 >0:  [[ test = *((#s)|/)test((#e)|/)* ]]
 >0:  [[ test/path = *((#s)|/)test((#e)|/)* ]]
 >0:  [[ path/test = *((#s)|/)test((#e)|/)* ]]
 >0:  [[ path/test/ohyes = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ atest = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ testy = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ testy/path = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ path/atest = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ atest/path = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ path/testy = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]]
 >1:  [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]]
 >0 tests failed.
  globtest globtests.ksh
--- a/Test/ztst.zsh
+++ b/Test/ztst.zsh
@ -14,16 +14,21 @@
 # Produce verbose messages if non-zero.
 # If 1, produce reports of tests executed; if 2, also report on progress.
-ZTST_verbose=0
+# Defined in such a way that any value from the environment is used.
 : ${ZTST_verbose:=0}
 # We require all options to be reset, not just emulation options.
 # Unfortunately, due to the crud which may be in /etc/zshenv this might
 # still not be good enough.  Maybe we should trick it somehow.
 emulate -R zsh
 # Set the module load path to correspond to this build of zsh.
 # This Modules directory should have been created by "make check".
 [[ -d Modules/zsh ]] && module_path=( $PWD/Modules )
 # We need to be able to save and restore the options used in the test.
 # We use the $options variable of the parameter module for this.
-zmodload -i parameter
+zmodload -i zsh/parameter
 # Note that both the following are regular arrays, since we only use them
 # in whole array assignments to/from $options.
@ -42,18 +47,31 @@ ZTST_mainopts=(${(kv)options})
 ZTST_testdir=$PWD
 ZTST_testname=$1
 # The source directory is not necessarily the current directory,
 # but if $0 doesn't contain a `/' assume it is.
 if [[ $0 = */* ]]; then
  ZTST_srcdir=${0%/*}
 else
  ZTST_srcdir=$PWD
 fi
 [[ $ZTST_srcdir = /* ]] || ZTST_srcdir="$ZTST_testdir/$ZTST_srcdir"
 # Set the function autoload paths to correspond to this build of zsh.
 fpath=( $ZTST_srcdir/../(Completion|Functions)/*~*/CVS(/) )
 : ${TMPPREFIX:=/tmp/zsh}
 # Temporary files for redirection inside tests.
-ZTST_in=${TMPPREFIX-:/tmp/zsh}.ztst.in.$$
+ZTST_in=${TMPPREFIX}.ztst.in.$$
 # hold the expected output
-ZTST_out=${TMPPREFIX-:/tmp/zsh}.ztst.out.$$
+ZTST_out=${TMPPREFIX}.ztst.out.$$
-ZTST_err=${TMPPREFIX-:/tmp/zsh}.ztst.err.$$
+ZTST_err=${TMPPREFIX}.ztst.err.$$
 # hold the actual output from the test
-ZTST_tout=${TMPPREFIX-:/tmp/zsh}.ztst.tout.$$
+ZTST_tout=${TMPPREFIX}.ztst.tout.$$
-ZTST_terr=${TMPPREFIX-:/tmp/zsh}.ztst.terr.$$
+ZTST_terr=${TMPPREFIX}.ztst.terr.$$
 ZTST_cleanup() {
-  rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp \
+  cd $ZTST_testdir
-         $ZTST_in $ZTST_out $ZTST_err $ZTST_tout $ZTST_terr
+  rm -rf $ZTST_testdir/dummy.tmp $ZTST_testdir/*.tmp ${TMPPREFIX}.ztst*$$
 }
 # This cleanup always gets performed, even if we abort.  Later,
@ -67,10 +85,11 @@ rm -rf dummy.tmp *.tmp
 # Report failure.  Note that all output regarding the tests goes to stdout.
 # That saves an unpleasant mixture of stdout and stderr to sort out.
 ZTST_testfailed() {
-  print "Test $ZTST_testname failed: $1"
+  print -r "Test $ZTST_testname failed: $1"
  if [[ -n $ZTST_message ]]; then
-    print "Was testing: $ZTST_message"
+    print -r "Was testing: $ZTST_message"
  fi
  print -r "$ZTST_testname: test failed."
  ZTST_cleanup
  exit 1
 }
@ -79,7 +98,7 @@ ZTST_testfailed() {
 ZTST_verbose() {
  local lev=$1
  shift
-  [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print $* >&8
+  [[ -n $ZTST_verbose && $ZTST_verbose -ge $lev ]] && print -- $* >&8
 }
 [[ ! -r $ZTST_testname ]] && ZTST_testfailed "can't read test file."
@ -97,7 +116,7 @@ ZTST_cursect=''
 ZTST_getline() {
  local IFS=
  while true; do
-    read ZTST_curline <&9 || return 1
+    read -r ZTST_curline <&9 || return 1
    [[ $ZTST_curline == \#* ]] || return 0
  done
 }
@ -144,7 +163,7 @@ $ZTST_code"
 # Read in a piece for redirection.
 ZTST_getredir() {
-  local char=${ZTST_curline[1]}
+  local char=${ZTST_curline[1]} fn
  ZTST_redir=${ZTST_curline[2,-1]}
  while ZTST_getline; do
    [[ $ZTST_curline[1] = $char ]] || break
@ -153,6 +172,22 @@ ${ZTST_curline[2,-1]}"
  done
  ZTST_verbose 2 "ZTST_getredir: read redir for '$char':
 $ZTST_redir"
 case $char in
  '<') fn=$ZTST_in
       ;;
  '>') fn=$ZTST_out
       ;;
  '?') fn=$ZTST_err
       ;;
   *)  ZTST_testfailed "bad redir operator: $char"
       ;;
 esac
 if [[ $ZTST_flags = *q* ]]; then
  print -r -- "${(e)ZTST_redir}" >>$fn
 else
  print -r -- "$ZTST_redir" >>$fn
 fi
 }
 # Execute an indented chunk.  Redirections will already have
@ -209,27 +244,24 @@ $ZTST_curline"
 	    fi
 	    ;;
 	[[:space:]]##[^[:space:]]*) ZTST_getchunk
-	  [[ $ZTST_curline != [-0-9]* ]] &&
+	  if [[ $ZTST_curline == (#b)([-0-9]##)([[:alpha:]]#)(:*)# ]]; then
 	    ZTST_xstatus=$match[1]
 	    ZTST_flags=$match[2]
 	    ZTST_message=${match[3]:+${match[3][2,-1]}}
 	  else
 	    ZTST_testfailed "expecting test status at:
 $ZTST_curline"
          ZTST_xstatus=$ZTST_curline
 	  if [[ $ZTST_curline == (#b)([^:]##):(*) ]]; then
 	    ZTST_xstatus=$match[1]
 	    ZTST_message=$match[2]
 	  fi
 	  ZTST_getline
 	  found=1
 	  ;;
 	'<'*) ZTST_getredir
 	  print -r "${(e)ZTST_redir}" >>$ZTST_in
 	  found=1
 	  ;;
 	'>'*) ZTST_getredir
          print -r "${(e)ZTST_redir}" >>$ZTST_out
 	  found=1
 	  ;;
 	'?'*) ZTST_getredir
 	  print -r "${(e)ZTST_redir}" >>$ZTST_err
 	  found=1
 	  ;;
 	*) ZTST_testfailed "bad line in test block:
@ -240,8 +272,7 @@ $ZTST_curline"
    # If we found some code to execute...
    if [[ -n $ZTST_code ]]; then
-      ZTST_verbose 1 "Running test:
+      ZTST_verbose 1 "Running test: $ZTST_message"
 $ZTST_message"
      ZTST_verbose 2 "ZTST_test: expecting status: $ZTST_xstatus"
      ZTST_execchunk <$ZTST_in >$ZTST_tout 2>$ZTST_terr
@ -249,7 +280,9 @@ $ZTST_message"
      # First check we got the right status, if specified.
      if [[ $ZTST_xstatus != - && $ZTST_xstatus != $ZTST_status ]]; then
 	ZTST_testfailed "bad status $ZTST_status, expected $ZTST_xstatus from:
-$ZTST_code"
+$ZTST_code${$(<$ZTST_terr):+
 Error output:
 $(<$ZTST_terr)}"
      fi
      ZTST_verbose 2 "ZTST_test: test produced standard output:
@ -258,11 +291,13 @@ ZTST_test: and standard error:
 $(<$ZTST_terr)"
      # Now check output and error.
-      if ! diff -c $ZTST_out $ZTST_tout; then
+      if [[ $ZTST_flags != *d* ]] && ! diff -c $ZTST_out $ZTST_tout; then
 	ZTST_testfailed "output differs from expected as shown above for:
-$ZTST_code"
+$ZTST_code${$(<$ZTST_terr):+
 Error output:
 $(<$ZTST_terr)}"
      fi
-      if ! diff -c $ZTST_err $ZTST_terr; then
+      if [[ $ZTST_flags != *D* ]] && ! diff -c $ZTST_err $ZTST_terr; then
 	ZTST_testfailed "error output differs from expected as shown above for:
 $ZTST_code"
      fi