diff --git a/ChangeLog b/ChangeLog index 2203b9dc8..774d7044b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2001-04-19 Bart Schaefer + + * 14008, 1405x: Src/lex.c, Src/math.c, Src/params.c, + Test/D06subscript.ztst: Improve parsing of subscripts so that + many forms that failed before, work now, particularly for assoc + array keys; create a test suite. + + * 14008: Src/hist.c: Don't getsparam("HISTFILE") until it is + actually needed, because other parameter expansions reset the + history mechanism for subscript parsing. + + * 14008: Src/builtin.c: Keep track of the new Param created when + typeset_single() calls setsparam() in some circumstances; drop a + redundant isident() test. + 2001-04-19 Peter Stephenson * 14046: Doc/Zle/zle.yo, Src/Zle/zle_keymap.c: bindkey -rp removes diff --git a/Src/builtin.c b/Src/builtin.c index 627a3b82c..0f02ae0a9 100644 --- a/Src/builtin.c +++ b/Src/builtin.c @@ -1690,8 +1690,8 @@ typeset_single(char *cname, char *pname, Param pm, int func, delenv(pm->env); pm->env = NULL; } - if (value) - setsparam(pname, ztrdup(value)); + if (value && !(pm = setsparam(pname, ztrdup(value)))) + return 0; } else if (value) { zwarnnam(cname, "can't assign new value for array %s", pname, 0); return NULL; @@ -1807,9 +1807,10 @@ typeset_single(char *cname, char *pname, Param pm, int func, pm->level = keeplocal; else if (on & PM_LOCAL) pm->level = locallevel; - if (value && !(pm->flags & (PM_ARRAY|PM_HASHED))) - setsparam(pname, ztrdup(value)); - else if (newspecial && !(pm->old->flags & PM_NORESTORE)) { + if (value && !(pm->flags & (PM_ARRAY|PM_HASHED))) { + if (!(pm = setsparam(pname, ztrdup(value)))) + return 0; + } else if (newspecial && !(pm->old->flags & PM_NORESTORE)) { /* * We need to use the special setting function to re-initialise * the special parameter to empty. @@ -2061,12 +2062,6 @@ bin_typeset(char *name, char **argv, char *ops, int func) /* Take arguments literally. Don't glob */ while ((asg = getasg(*argv++))) { - /* check if argument is a valid identifier */ - if (!isident(asg->name)) { - zerr("not an identifier: %s", asg->name, 0); - returnval = 1; - continue; - } if (!typeset_single(name, asg->name, (Param) (paramtab == realparamtab ? gethashnode2(paramtab, asg->name) : diff --git a/Src/hist.c b/Src/hist.c index a80a21967..58fe748be 100644 --- a/Src/hist.c +++ b/Src/hist.c @@ -1011,7 +1011,6 @@ hend(Eprog prog) DPUTS(stophist != 2 && !(inbufflags & INP_ALIAS) && !chline, "BUG: chline is NULL in hend()"); queue_signals(); - hf = getsparam("HISTFILE"); if (histdone & HISTFLAG_SETTY) settyinfo(&shttyinfo); if (!(histactive & HA_NOINC)) @@ -1028,6 +1027,7 @@ hend(Eprog prog) && (hist_ignore_all_dups = isset(HISTIGNOREALLDUPS)) != 0) histremovedups(); /* For history sharing, lock history file once for both read and write */ + hf = getsparam("HISTFILE"); if (isset(SHAREHISTORY) && lockhistfile(hf, 0)) { readhistfile(hf, 0, HFILE_USE_OPTIONS | HFILE_FAST); curline.histnum = curhist+1; diff --git a/Src/lex.c b/Src/lex.c index de58ade7a..46d83cb21 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -1302,10 +1302,13 @@ dquote_parse(char endchar, int sub) c = hgetc(); if (c != '\n') { if (c == '$' || c == '\\' || (c == '}' && !intick && bct) || - c == endchar || c == '`') + c == endchar || c == '`' || + (math && (c == '[' || c == ']' || + c == '(' || c == ')' || + c == '{' || c == '}'))) add(Bnull); else { - /* lexstop is implicitely handled here */ + /* lexstop is implicitly handled here */ add('\\'); goto cont; } @@ -1458,6 +1461,38 @@ parsestrnoerr(char *s) return err; } +/**/ +mod_export char * +parse_subscript(char *s) +{ + int l = strlen(s), err; + char *t; + + if (!*s || *s == ']') + return 0; + lexsave(); + untokenize(t = dupstring(s)); + inpush(t, 0, NULL); + strinbeg(0); + len = 0; + bptr = tokstr = s; + bsiz = l + 1; + err = dquote_parse(']', 1); + if (err) { + err = *bptr; + *bptr = 0; + untokenize(s); + *bptr = err; + s = 0; + } else + s = bptr; + strinend(); + inpop(); + DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); + lexrestore(); + return s; +} + /* Tokenize a string given in s. Parsing is done as if s were a normal * * command-line argument but it may contain separators. This is used * * to parse the right-hand side of ${...%...} substitutions. */ diff --git a/Src/math.c b/Src/math.c index bad958243..0ea16e7ba 100644 --- a/Src/math.c +++ b/Src/math.c @@ -517,6 +517,7 @@ setvar(char *s, mnumber v) } if (noeval) return v; + untokenize(s); setnparam(s, v); return v; } diff --git a/Src/params.c b/Src/params.c index 90dd5f3c7..aaeebce76 100644 --- a/Src/params.c +++ b/Src/params.c @@ -770,38 +770,18 @@ isident(char *s) if (!iident(*ss)) break; -#if 0 - /* If this exhaust `s' or the next two characters * - * are [(, then it is a valid identifier. */ - if (!*ss || (*ss == '[' && ss[1] == '(')) - return 1; - - /* Else if the next character is not [, then it is * - * definitely not a valid identifier. */ - if (*ss != '[') - return 0; - - noeval = 1; - (void)mathevalarg(++ss, &ss); - if (*ss == ',') - (void)mathevalarg(++ss, &ss); - noeval = ne; /* restore the value of noeval */ - if (*ss != ']' || ss[1]) - return 0; - return 1; -#else /* If the next character is not [, then it is * - * definitely not a valid identifier. */ + * definitely not a valid identifier. */ if (!*ss) return 1; if (*ss != '[') return 0; - /* Require balanced [ ] pairs */ - if (skipparens('[', ']', &ss)) + /* Require balanced [ ] pairs with something between */ + if (!(ss = parse_subscript(++ss))) return 0; - return !*ss; -#endif + untokenize(s); + return !ss[1]; } /**/ @@ -933,8 +913,21 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w) } for (t = s, i = 0; - (c = *t) && ((c != ']' && c != Outbrack && + (c = *t) && ((c != Outbrack && (ishash || c != ',')) || i); t++) { + /* Untokenize INULL() except before brackets, for parsestr() */ + if (INULL(c)) { + if (t[1] == '[' || t[1] == ']') { + /* This test handles nested subscripts in hash keys */ + if (ishash && i) + *t = ztokens[c - Pound]; + needtok = 1; + ++t; + } else + *t = ztokens[c - Pound]; + continue; + } + /* Inbrack and Outbrack are probably never found here ... */ if (c == '[' || c == Inbrack) i++; else if (c == ']' || c == Outbrack) @@ -946,11 +939,18 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w) return 0; s = dupstrpfx(s, t - s); *str = tt = t; + /* If we're NOT reverse subscripting, strip the INULL()s so brackets * + * are not backslashed after parsestr(). Otherwise leave them alone * + * so that the brackets will be escaped when we patcompile() or when * + * subscript arithmetic is performed (for nested subscripts). */ + if (ishash && !rev) + remnulargs(s); if (needtok) { if (parsestr(s)) return 0; singsub(&s); - } + } else if (rev) + remnulargs(s); /* This is probably always a no-op, but ... */ if (!rev) { if (ishash) { HashTable ht = v->pm->gets.hfn(v->pm); @@ -1019,7 +1019,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w) s = d; } } else { - if (!l || s[l - 1] != '*') { + if (!l || s[l - 1] != '*' || (l > 1 && s[l - 2] == '\\')) { d = (char *) hcalloc(l + 2); strcpy(d, s); strcat(d, "*"); @@ -1028,6 +1028,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w) } } tokenize(s); + remnulargs(s); if (keymatch || (pprog = patcompile(s, 0, NULL))) { int len; @@ -1156,7 +1157,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w) return r; } } - return 0; + return down ? 0 : len + 1; } } } @@ -1170,13 +1171,27 @@ getindex(char **pptr, Value v) int start, end, inv = 0; char *s = *pptr, *tbrack; - *s++ = '['; - for (tbrack = s; *tbrack && *tbrack != ']' && *tbrack != Outbrack; tbrack++) + *s++ = Inbrack; + s = parse_subscript(s); /* Error handled after untokenizing */ + /* Now we untokenize everthing except INULL() markers so we can check * + * for the '*' and '@' special subscripts. The INULL()s are removed * + * in getarg() after we know whether we're doing reverse indexing. */ + for (tbrack = *pptr + 1; *tbrack && tbrack != s; tbrack++) { + if (INULL(*tbrack) && !*++tbrack) + break; if (itok(*tbrack)) *tbrack = ztokens[*tbrack - Pound]; - if (*tbrack == Outbrack) - *tbrack = ']'; - if ((s[0] == '*' || s[0] == '@') && s[1] == ']') { + } + /* If we reached the end of the string (s == NULL) we have an error */ + if (*tbrack) + *tbrack = Outbrack; + else { + zerr("invalid subscript", NULL, 0); + *pptr = tbrack; + return 1; + } + s = *pptr + 1; + if ((s[0] == '*' || s[0] == '@') && s[1] == Outbrack) { if ((v->isarr || IS_UNSET_VALUE(v)) && s[0] == '@') v->isarr |= SCANPM_ISVAR_AT; v->start = 0; @@ -1208,12 +1223,12 @@ getindex(char **pptr, Value v) } if (*s == ',') { zerr("invalid subscript", NULL, 0); - while (*s != ']' && *s != Outbrack) + while (*s && *s != Outbrack) s++; *pptr = s; return 1; } - if (*s == ']' || *s == Outbrack) + if (*s == Outbrack) s++; } else { int com; @@ -1228,7 +1243,7 @@ getindex(char **pptr, Value v) start--; else if (start == 0 && end == 0) end++; - if (*s == ']' || *s == Outbrack) { + if (*s == Outbrack) { s++; if (v->isarr && start == end-1 && !com && (!(v->isarr & SCANPM_MATCHMANY) || diff --git a/Test/D06subscript.ztst b/Test/D06subscript.ztst new file mode 100644 index 000000000..6438dc040 --- /dev/null +++ b/Test/D06subscript.ztst @@ -0,0 +1,130 @@ +# Test parameter subscripting. + +%prep + + s='Twinkle, twinkle, little *, [how] I [wonder] what? You are!' + a=('1' ']' '?' '\2' '\]' '\?' '\\3' '\\]' '\\?' '\\\4' '\\\]' '\\\?') + typeset -g -A A + A=($a) + +%test + + x=',' + print $s[(i)winkle] $s[(I)winkle] + print ${s[(i)You are]} $#s + print ${s[(r)$x,(R)$x]} +0:Scalar pattern subscripts without wildcards +>2 11 +>53 60 +>, twinkle, little *, + + x='*' + print $s[(i)*] $s[(i)\*] $s[(i)$x*] $s[(i)${(q)x}*] $s[(I)$x\*] + print $s[(r)?,(R)\?] $s[(r)\?,(R)?] + print $s[(r)\*,(R)*] + print $s[(r)\],(R)\[] +0:Scalar pattern subscripts with wildcards +>1 26 1 26 26 +>Twinkle, twinkle, little *, [how] I [wonder] what? ? You are! +>*, [how] I [wonder] what? You are! +>] I [ + + # $s[(R)x] actually is $s[0], but zsh treats 0 as 1 for subscripting. + print $s[(i)x] : $s[(I)x] + print $s[(r)x] : $s[(R)x] +0:Scalar pattern subscripts that do not match +>61 : 0 +>: T + + print -R $s[$s[(i)\[]] $s[(i)$s[(r)\*]] $s[(i)${(q)s[(r)\]]}] +0:Scalar subscripting using a pattern subscript to get the index +>[ 1 33 + + print -R $a[(r)?] $a[(R)?] + print $a[(n:2:i)?] $a[(n:2:I)?] + print $a[(i)\?] $a[(I)\?] + print $a[(i)*] $a[(i)\*] +0:Array pattern subscripts +>1 ? +>2 2 +>3 3 +>1 13 + + # It'd be nice to do some of the following with (r), but we run into + # limitations of the ztst script parsing of backslashes in the output. + print -R $a[(i)\\\\?] $a[(i)\\\\\?] + print -R $a[(i)\\\\\\\\?] $a[(i)\\\\\\\\\?] + print -R ${a[(i)\\\\\\\\?]} ${a[(i)\\\\\\\\\?]} + print -R "$a[(i)\\\\\\\\?] $a[(i)\\\\\\\\\?]" + print -R $a[(i)\]] $a[(i)\\\\\]] $a[(i)\\\\\\\\\]] $a[(i)\\\\\\\\\\\\\]] + print -R $a[(i)${(q)a[5]}] $a[(i)${(q)a[8]}] $a[(i)${(q)a[11]}] + print -R $a[(i)${a[3]}] $a[(i)${a[6]}] $a[(i)${a[9]}] $a[(i)${a[12]}] +0:Array pattern subscripts with multiple backslashes +>4 6 +>7 9 +>7 9 +>7 9 +>2 5 8 11 +>5 8 11 +>1 3 4 6 + + print -R $A[1] $A[?] $A[\\\\3] $A[\\\]] + print -R $A[$a[11]] + print -R $A[${(q)a[5]}] +0:Associative array lookup (direct subscripting) +>] \2 \\] \? +>\\\? +>\\\? + + # The (o) is necessary here for predictable output ordering + print -R $A[(I)\?] ${(o)A[(I)?]} + print -R $A[(i)\\\\\\\\3] + print -R $A[(I)\\\\\\\\\?] ${(o)A[(I)\\\\\\\\?]} +0:Associative array lookup (pattern subscripting) +>? 1 ? +>\\3 +>\\? \\3 \\? + + print -R $A[(R)\?] : ${(o)A[(R)?]} + print -R $A[(R)\\\\\?] ${(o)A[(R)\\\\?]} ${(o)A[(R)\\\\\?]} + print -R ${(o)A[(R)\\\\\\\\\]]} +0:Associative array lookup (reverse subscripting) +>: ] +>\? \2 \? \? +>\\] + + x='*' + A[$x]=xstar + A[${(q)x}]=qxstar + print -R ${(k)A[(r)xstar]} $A[$x] + print -R ${(k)A[(r)qxstar]} $A[${(q)x}] + # A[*] is interpreted specially, assignment to it fails silently (oops) + A[*]=star + A[\*]=backstar + print -R ${(k)A[(r)star]} $A[$x] + print -R ${(k)A[(r)backstar]} $A[\*] +0:Associative array assignment +>* xstar +>\* qxstar +>xstar +>\* backstar + + o='[' + c=']' + A[\]]=cbrack + A[\[]=obrack + A[\\\[]=backobrack + A[\\\]]=backcbrack + print -R $A[$o] $A[$c] $A[\[] $A[\]] $A[\\\[] $A[\\\]] + print -R $A[(i)\[] $A[(i)\]] $A[(i)\\\\\[] $A[(i)\\\\\]] +0:Associative array keys with open and close brackets +>obrack cbrack obrack cbrack backobrack backcbrack +>[ ] \[ \] + + print -R $A[$o] $A[$s[(r)\[]] + print -R $A[(r)$c] $A[(r)$s[(r)\]]] + print -R $A[$A[(i)\\\\\]]] +0:Associative array lookup using a pattern subscript to get the key +>obrack obrack +>] ] +>backcbrack