mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-01-04 06:14:50 +01:00
2130 lines
46 KiB
C
2130 lines
46 KiB
C
/*
|
|
* lex.c - lexical analysis
|
|
*
|
|
* This file is part of zsh, the Z shell.
|
|
*
|
|
* Copyright (c) 1992-1997 Paul Falstad
|
|
* All rights reserved.
|
|
*
|
|
* Permission is hereby granted, without written agreement and without
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
* software and to distribute modified versions of this software for any
|
|
* purpose, provided that the above copyright notice and the following
|
|
* two paragraphs appear in all copies of this software.
|
|
*
|
|
* In no event shall Paul Falstad or the Zsh Development Group be liable
|
|
* to any party for direct, indirect, special, incidental, or consequential
|
|
* damages arising out of the use of this software and its documentation,
|
|
* even if Paul Falstad and the Zsh Development Group have been advised of
|
|
* the possibility of such damage.
|
|
*
|
|
* Paul Falstad and the Zsh Development Group specifically disclaim any
|
|
* warranties, including, but not limited to, the implied warranties of
|
|
* merchantability and fitness for a particular purpose. The software
|
|
* provided hereunder is on an "as is" basis, and Paul Falstad and the
|
|
* Zsh Development Group have no obligation to provide maintenance,
|
|
* support, updates, enhancements, or modifications.
|
|
*
|
|
*/
|
|
|
|
#include "zsh.mdh"
|
|
#include "lex.pro"
|
|
|
|
#define LEX_HEAP_SIZE (32)
|
|
|
|
/* tokens */
|
|
|
|
/**/
|
|
mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\";
|
|
|
|
/* parts of the current token */
|
|
|
|
/**/
|
|
char *zshlextext;
|
|
/**/
|
|
mod_export char *tokstr;
|
|
/**/
|
|
mod_export enum lextok tok;
|
|
/**/
|
|
mod_export int tokfd;
|
|
|
|
/*
|
|
* Line number at which the first character of a token was found.
|
|
* We always set this in gettok(), which is always called from
|
|
* zshlex() unless we have reached an error. So it is always
|
|
* valid when parsing. It is not useful during execution
|
|
* of the parsed structure.
|
|
*/
|
|
|
|
/**/
|
|
zlong toklineno;
|
|
|
|
/* lexical analyzer error flag */
|
|
|
|
/**/
|
|
mod_export int lexstop;
|
|
|
|
/* if != 0, this is the first line of the command */
|
|
|
|
/**/
|
|
mod_export int isfirstln;
|
|
|
|
/* if != 0, this is the first char of the command (not including white space) */
|
|
|
|
/**/
|
|
int isfirstch;
|
|
|
|
/* flag that an alias should be expanded after expansion ending in space */
|
|
|
|
/**/
|
|
int inalmore;
|
|
|
|
/*
|
|
* Don't do spelling correction.
|
|
* Bit 1 is only valid for the current word. It's
|
|
* set when we detect a lookahead that stops the word from
|
|
* needing correction.
|
|
*/
|
|
|
|
/**/
|
|
int nocorrect;
|
|
|
|
/*
|
|
* TBD: the following exported variables are part of the non-interface
|
|
* with ZLE for completion. They are poorly named and the whole
|
|
* scheme is incredibly brittle. One piece of robustness is applied:
|
|
* the variables are only set if LEXFLAGS_ZLE is set. Improvements
|
|
* should therefore concentrate on areas with this flag set.
|
|
*
|
|
* Cursor position and line length in zle when the line is
|
|
* metafied for access from the main shell.
|
|
*/
|
|
|
|
/**/
|
|
mod_export int zlemetacs, zlemetall;
|
|
|
|
/* inwhat says what exactly we are in *
|
|
* (its value is one of the IN_* things). */
|
|
|
|
/**/
|
|
mod_export int inwhat;
|
|
|
|
/* 1 if x added to complete in a blank between words */
|
|
|
|
/**/
|
|
mod_export int addedx;
|
|
|
|
/* wb and we hold the beginning/end position of the word we are completing. */
|
|
|
|
/**/
|
|
mod_export int wb, we;
|
|
|
|
/**/
|
|
mod_export int wordbeg;
|
|
|
|
/**/
|
|
mod_export int parbegin;
|
|
|
|
/**/
|
|
mod_export int parend;
|
|
|
|
|
|
/* 1 if aliases should not be expanded */
|
|
|
|
/**/
|
|
mod_export int noaliases;
|
|
|
|
/*
|
|
* If non-zero, we are parsing a line sent to use by the editor, or some
|
|
* other string that's not part of standard command input (e.g. eval is
|
|
* part of normal command input).
|
|
*
|
|
* Set of bits from LEXFLAGS_*.
|
|
*
|
|
* Note that although it is passed into the lexer as an input, the
|
|
* lexer can set it to zero after finding the word it's searching for.
|
|
* This only happens if the line being parsed actually does come from
|
|
* ZLE, and hence the bit LEXFLAGS_ZLE is set.
|
|
*/
|
|
|
|
/**/
|
|
mod_export int lexflags;
|
|
|
|
/* don't recognize comments */
|
|
|
|
/**/
|
|
mod_export int nocomments;
|
|
|
|
/* add raw input characters while parsing command substitution */
|
|
|
|
/**/
|
|
static int lex_add_raw;
|
|
|
|
/* variables associated with the above */
|
|
|
|
static char *tokstr_raw;
|
|
static struct lexbufstate lexbuf_raw;
|
|
|
|
/* text of punctuation tokens */
|
|
|
|
/**/
|
|
mod_export char *tokstrings[WHILE + 1] = {
|
|
NULL, /* NULLTOK 0 */
|
|
";", /* SEPER */
|
|
"\\n", /* NEWLIN */
|
|
";", /* SEMI */
|
|
";;", /* DSEMI */
|
|
"&", /* AMPER 5 */
|
|
"(", /* INPAR */
|
|
")", /* OUTPAR */
|
|
"||", /* DBAR */
|
|
"&&", /* DAMPER */
|
|
">", /* OUTANG 10 */
|
|
">|", /* OUTANGBANG */
|
|
">>", /* DOUTANG */
|
|
">>|", /* DOUTANGBANG */
|
|
"<", /* INANG */
|
|
"<>", /* INOUTANG 15 */
|
|
"<<", /* DINANG */
|
|
"<<-", /* DINANGDASH */
|
|
"<&", /* INANGAMP */
|
|
">&", /* OUTANGAMP */
|
|
"&>", /* AMPOUTANG 20 */
|
|
"&>|", /* OUTANGAMPBANG */
|
|
">>&", /* DOUTANGAMP */
|
|
">>&|", /* DOUTANGAMPBANG */
|
|
"<<<", /* TRINANG */
|
|
"|", /* BAR 25 */
|
|
"|&", /* BARAMP */
|
|
"()", /* INOUTPAR */
|
|
"((", /* DINPAR */
|
|
"))", /* DOUTPAR */
|
|
"&|", /* AMPERBANG 30 */
|
|
";&", /* SEMIAMP */
|
|
";|", /* SEMIBAR */
|
|
};
|
|
|
|
/* lexical state */
|
|
|
|
static int dbparens;
|
|
static struct lexbufstate lexbuf = { NULL, 256, 0 };
|
|
|
|
/* save lexical context */
|
|
|
|
/**/
|
|
void
|
|
lex_context_save(struct lex_stack *ls, int toplevel)
|
|
{
|
|
(void)toplevel;
|
|
|
|
ls->dbparens = dbparens;
|
|
ls->isfirstln = isfirstln;
|
|
ls->isfirstch = isfirstch;
|
|
ls->lexflags = lexflags;
|
|
|
|
ls->tok = tok;
|
|
ls->tokstr = tokstr;
|
|
ls->zshlextext = zshlextext;
|
|
ls->lexbuf = lexbuf;
|
|
ls->lex_add_raw = lex_add_raw;
|
|
ls->tokstr_raw = tokstr_raw;
|
|
ls->lexbuf_raw = lexbuf_raw;
|
|
ls->lexstop = lexstop;
|
|
ls->toklineno = toklineno;
|
|
|
|
tokstr = zshlextext = lexbuf.ptr = NULL;
|
|
lexbuf.siz = 256;
|
|
tokstr_raw = lexbuf_raw.ptr = NULL;
|
|
lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
|
|
}
|
|
|
|
/* restore lexical context */
|
|
|
|
/**/
|
|
mod_export void
|
|
lex_context_restore(const struct lex_stack *ls, int toplevel)
|
|
{
|
|
(void)toplevel;
|
|
|
|
dbparens = ls->dbparens;
|
|
isfirstln = ls->isfirstln;
|
|
isfirstch = ls->isfirstch;
|
|
lexflags = ls->lexflags;
|
|
tok = ls->tok;
|
|
tokstr = ls->tokstr;
|
|
zshlextext = ls->zshlextext;
|
|
lexbuf = ls->lexbuf;
|
|
lex_add_raw = ls->lex_add_raw;
|
|
tokstr_raw = ls->tokstr_raw;
|
|
lexbuf_raw = ls->lexbuf_raw;
|
|
lexstop = ls->lexstop;
|
|
toklineno = ls->toklineno;
|
|
}
|
|
|
|
/**/
|
|
void
|
|
zshlex(void)
|
|
{
|
|
if (tok == LEXERR)
|
|
return;
|
|
do
|
|
tok = gettok();
|
|
while (tok != ENDINPUT && exalias());
|
|
nocorrect &= 1;
|
|
if (tok == NEWLIN || tok == ENDINPUT) {
|
|
while (hdocs) {
|
|
struct heredocs *next = hdocs->next;
|
|
char *doc, *munged_term;
|
|
|
|
hwbegin(0);
|
|
cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
|
|
munged_term = dupstring(hdocs->str);
|
|
STOPHIST
|
|
doc = gethere(&munged_term, hdocs->type);
|
|
ALLOWHIST
|
|
cmdpop();
|
|
hwend();
|
|
if (!doc) {
|
|
zerr("here document too large");
|
|
while (hdocs) {
|
|
next = hdocs->next;
|
|
zfree(hdocs, sizeof(struct heredocs));
|
|
hdocs = next;
|
|
}
|
|
tok = LEXERR;
|
|
break;
|
|
}
|
|
setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
|
|
munged_term);
|
|
zfree(hdocs, sizeof(struct heredocs));
|
|
hdocs = next;
|
|
}
|
|
}
|
|
if (tok != NEWLIN)
|
|
isnewlin = 0;
|
|
else
|
|
isnewlin = (inbufct) ? -1 : 1;
|
|
if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
|
|
tok = SEPER;
|
|
}
|
|
|
|
/**/
|
|
mod_export void
|
|
ctxtlex(void)
|
|
{
|
|
static int oldpos;
|
|
|
|
zshlex();
|
|
switch (tok) {
|
|
case SEPER:
|
|
case NEWLIN:
|
|
case SEMI:
|
|
case DSEMI:
|
|
case SEMIAMP:
|
|
case SEMIBAR:
|
|
case AMPER:
|
|
case AMPERBANG:
|
|
case INPAR:
|
|
case INBRACE:
|
|
case DBAR:
|
|
case DAMPER:
|
|
case BAR:
|
|
case BARAMP:
|
|
case INOUTPAR:
|
|
case DOLOOP:
|
|
case THEN:
|
|
case ELIF:
|
|
case ELSE:
|
|
case DOUTBRACK:
|
|
incmdpos = 1;
|
|
break;
|
|
case STRING:
|
|
case TYPESET:
|
|
/* case ENVSTRING: */
|
|
case ENVARRAY:
|
|
case OUTPAR:
|
|
case CASE:
|
|
case DINBRACK:
|
|
incmdpos = 0;
|
|
break;
|
|
|
|
default:
|
|
/* nothing to do, keep compiler happy */
|
|
break;
|
|
}
|
|
if (tok != DINPAR)
|
|
infor = tok == FOR ? 2 : 0;
|
|
if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
|
|
inredir = 1;
|
|
oldpos = incmdpos;
|
|
incmdpos = 0;
|
|
} else if (inredir) {
|
|
incmdpos = oldpos;
|
|
inredir = 0;
|
|
}
|
|
}
|
|
|
|
#define LX1_BKSLASH 0
|
|
#define LX1_COMMENT 1
|
|
#define LX1_NEWLIN 2
|
|
#define LX1_SEMI 3
|
|
#define LX1_AMPER 5
|
|
#define LX1_BAR 6
|
|
#define LX1_INPAR 7
|
|
#define LX1_OUTPAR 8
|
|
#define LX1_INANG 13
|
|
#define LX1_OUTANG 14
|
|
#define LX1_OTHER 15
|
|
|
|
#define LX2_BREAK 0
|
|
#define LX2_OUTPAR 1
|
|
#define LX2_BAR 2
|
|
#define LX2_STRING 3
|
|
#define LX2_INBRACK 4
|
|
#define LX2_OUTBRACK 5
|
|
#define LX2_TILDE 6
|
|
#define LX2_INPAR 7
|
|
#define LX2_INBRACE 8
|
|
#define LX2_OUTBRACE 9
|
|
#define LX2_OUTANG 10
|
|
#define LX2_INANG 11
|
|
#define LX2_EQUALS 12
|
|
#define LX2_BKSLASH 13
|
|
#define LX2_QUOTE 14
|
|
#define LX2_DQUOTE 15
|
|
#define LX2_BQUOTE 16
|
|
#define LX2_COMMA 17
|
|
#define LX2_OTHER 18
|
|
#define LX2_META 19
|
|
|
|
static unsigned char lexact1[256], lexact2[256], lextok2[256];
|
|
|
|
/**/
|
|
void
|
|
initlextabs(void)
|
|
{
|
|
int t0;
|
|
static char *lx1 = "\\q\n;!&|(){}[]<>";
|
|
static char *lx2 = ";)|$[]~({}><=\\\'\"`,";
|
|
|
|
for (t0 = 0; t0 != 256; t0++) {
|
|
lexact1[t0] = LX1_OTHER;
|
|
lexact2[t0] = LX2_OTHER;
|
|
lextok2[t0] = t0;
|
|
}
|
|
for (t0 = 0; lx1[t0]; t0++)
|
|
lexact1[(int)lx1[t0]] = t0;
|
|
for (t0 = 0; lx2[t0]; t0++)
|
|
lexact2[(int)lx2[t0]] = t0;
|
|
lexact2['&'] = LX2_BREAK;
|
|
lexact2[STOUC(Meta)] = LX2_META;
|
|
lextok2['*'] = Star;
|
|
lextok2['?'] = Quest;
|
|
lextok2['{'] = Inbrace;
|
|
lextok2['['] = Inbrack;
|
|
lextok2['$'] = String;
|
|
lextok2['~'] = Tilde;
|
|
lextok2['#'] = Pound;
|
|
lextok2['^'] = Hat;
|
|
}
|
|
|
|
/* initialize lexical state */
|
|
|
|
/**/
|
|
void
|
|
lexinit(void)
|
|
{
|
|
nocorrect = dbparens = lexstop = 0;
|
|
tok = ENDINPUT;
|
|
}
|
|
|
|
/* add a char to the string buffer */
|
|
|
|
/**/
|
|
void
|
|
add(int c)
|
|
{
|
|
*lexbuf.ptr++ = c;
|
|
if (lexbuf.siz == ++lexbuf.len) {
|
|
int newbsiz = lexbuf.siz * 2;
|
|
|
|
if (newbsiz > inbufct && inbufct > lexbuf.siz)
|
|
newbsiz = inbufct;
|
|
|
|
tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
|
|
lexbuf.ptr = tokstr + lexbuf.len;
|
|
/* len == bsiz, so bptr is at the start of newly allocated memory */
|
|
memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
|
|
lexbuf.siz = newbsiz;
|
|
}
|
|
}
|
|
|
|
#define SETPARBEGIN { \
|
|
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
|
|
zlemetacs >= zlemetall+1-inbufct) \
|
|
parbegin = inbufct; \
|
|
}
|
|
#define SETPAREND { \
|
|
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
|
|
parbegin != -1 && parend == -1) { \
|
|
if (zlemetacs >= zlemetall + 1 - inbufct) \
|
|
parbegin = -1; \
|
|
else \
|
|
parend = inbufct; \
|
|
} \
|
|
}
|
|
|
|
enum {
|
|
CMD_OR_MATH_CMD,
|
|
CMD_OR_MATH_MATH,
|
|
CMD_OR_MATH_ERR
|
|
};
|
|
|
|
/*
|
|
* Return one of the above. If it couldn't be
|
|
* parsed as math, but there was no gross error, it's a command.
|
|
*/
|
|
|
|
static int
|
|
cmd_or_math(int cs_type)
|
|
{
|
|
int oldlen = lexbuf.len;
|
|
int c;
|
|
int oinflags = inbufflags;
|
|
|
|
cmdpush(cs_type);
|
|
inbufflags |= INP_APPEND;
|
|
c = dquote_parse(')', 0);
|
|
if (!(oinflags & INP_APPEND))
|
|
inbufflags &= ~INP_APPEND;
|
|
cmdpop();
|
|
*lexbuf.ptr = '\0';
|
|
if (!c) {
|
|
/* Successfully parsed, see if it was math */
|
|
c = hgetc();
|
|
if (c == ')')
|
|
return CMD_OR_MATH_MATH; /* yes */
|
|
hungetc(c);
|
|
lexstop = 0;
|
|
c = ')';
|
|
} else if (lexstop) {
|
|
/* we haven't got anything to unget */
|
|
return CMD_OR_MATH_ERR;
|
|
}
|
|
/* else unsuccessful: unget the whole thing */
|
|
hungetc(c);
|
|
lexstop = 0;
|
|
while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) {
|
|
lexbuf.len--;
|
|
hungetc(itok(*--lexbuf.ptr) ?
|
|
ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
|
|
}
|
|
if (errflag)
|
|
return CMD_OR_MATH_ERR;
|
|
hungetc('(');
|
|
return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
|
|
}
|
|
|
|
|
|
/*
|
|
* Parse either a $(( ... )) or a $(...)
|
|
* Return the same as cmd_or_math().
|
|
*/
|
|
static int
|
|
cmd_or_math_sub(void)
|
|
{
|
|
int c = hgetc(), ret;
|
|
|
|
if (c == '(') {
|
|
int lexpos = (int)(lexbuf.ptr - tokstr);
|
|
add(Inpar);
|
|
add('(');
|
|
if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
|
|
tokstr[lexpos] = Inparmath;
|
|
add(')');
|
|
return CMD_OR_MATH_MATH;
|
|
}
|
|
if (ret == CMD_OR_MATH_ERR)
|
|
return CMD_OR_MATH_ERR;
|
|
lexbuf.ptr -= 2;
|
|
lexbuf.len -= 2;
|
|
} else {
|
|
hungetc(c);
|
|
lexstop = 0;
|
|
}
|
|
return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
|
|
}
|
|
|
|
/* Check whether we're looking at valid numeric globbing syntax *
|
|
* (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". *
|
|
* Leaves the input in the same place, returning 0 or 1. */
|
|
|
|
/**/
|
|
static int
|
|
isnumglob(void)
|
|
{
|
|
int c, ec = '-', ret = 0;
|
|
int tbs = 256, n = 0;
|
|
char *tbuf = (char *)zalloc(tbs);
|
|
|
|
while(1) {
|
|
c = hgetc();
|
|
if(lexstop) {
|
|
lexstop = 0;
|
|
break;
|
|
}
|
|
tbuf[n++] = c;
|
|
if(!idigit(c)) {
|
|
if(c != ec)
|
|
break;
|
|
if(ec == '>') {
|
|
ret = 1;
|
|
break;
|
|
}
|
|
ec = '>';
|
|
}
|
|
if(n == tbs)
|
|
tbuf = (char *)realloc(tbuf, tbs *= 2);
|
|
}
|
|
while(n--)
|
|
hungetc(tbuf[n]);
|
|
zfree(tbuf, tbs);
|
|
return ret;
|
|
}
|
|
|
|
/**/
|
|
static enum lextok
|
|
gettok(void)
|
|
{
|
|
int c, d;
|
|
int peekfd = -1;
|
|
enum lextok peek;
|
|
|
|
beginning:
|
|
tokstr = NULL;
|
|
while (iblank(c = hgetc()) && !lexstop);
|
|
toklineno = lineno;
|
|
if (lexstop)
|
|
return (errflag) ? LEXERR : ENDINPUT;
|
|
isfirstln = 0;
|
|
if ((lexflags & LEXFLAGS_ZLE))
|
|
wordbeg = inbufct - (qbang && c == bangchar);
|
|
hwbegin(-1-(qbang && c == bangchar));
|
|
/* word includes the last character read and possibly \ before ! */
|
|
if (dbparens) {
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
|
|
hungetc(c);
|
|
cmdpush(CS_MATH);
|
|
c = dquote_parse(infor ? ';' : ')', 0);
|
|
cmdpop();
|
|
*lexbuf.ptr = '\0';
|
|
if (!c && infor) {
|
|
infor--;
|
|
return DINPAR;
|
|
}
|
|
if (c || (c = hgetc()) != ')') {
|
|
hungetc(c);
|
|
return LEXERR;
|
|
}
|
|
dbparens = 0;
|
|
return DOUTPAR;
|
|
} else if (idigit(c)) { /* handle 1< foo */
|
|
d = hgetc();
|
|
if(d == '&') {
|
|
d = hgetc();
|
|
if(d == '>') {
|
|
peekfd = c - '0';
|
|
hungetc('>');
|
|
c = '&';
|
|
} else {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
hungetc('&');
|
|
}
|
|
} else if (d == '>' || d == '<') {
|
|
peekfd = c - '0';
|
|
c = d;
|
|
} else {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
}
|
|
}
|
|
|
|
/* chars in initial position in word */
|
|
|
|
/*
|
|
* Handle comments. There are some special cases when this
|
|
* is not normal command input: lexflags implies we are examining
|
|
* a line lexically without it being used for normal command input.
|
|
*/
|
|
if (c == hashchar && !nocomments &&
|
|
(isset(INTERACTIVECOMMENTS) ||
|
|
((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
|
|
(!interact || unset(SHINSTDIN) || strin)))) {
|
|
/* History is handled here to prevent extra *
|
|
* newlines being inserted into the history. */
|
|
|
|
if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr =
|
|
(char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
|
|
add(c);
|
|
}
|
|
hwend();
|
|
while ((c = ingetc()) != '\n' && !lexstop) {
|
|
hwaddc(c);
|
|
addtoline(c);
|
|
if (lexflags & LEXFLAGS_COMMENTS_KEEP)
|
|
add(c);
|
|
}
|
|
|
|
if (errflag)
|
|
peek = LEXERR;
|
|
else {
|
|
if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
|
|
*lexbuf.ptr = '\0';
|
|
if (!lexstop)
|
|
hungetc(c);
|
|
peek = STRING;
|
|
} else {
|
|
hwend();
|
|
hwbegin(0);
|
|
hwaddc('\n');
|
|
addtoline('\n');
|
|
/*
|
|
* If splitting a line and removing comments,
|
|
* we don't want a newline token since it's
|
|
* treated specially.
|
|
*/
|
|
if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
|
|
peek = ENDINPUT;
|
|
else
|
|
peek = NEWLIN;
|
|
}
|
|
}
|
|
return peek;
|
|
}
|
|
switch (lexact1[STOUC(c)]) {
|
|
case LX1_BKSLASH:
|
|
d = hgetc();
|
|
if (d == '\n')
|
|
goto beginning;
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
break;
|
|
case LX1_NEWLIN:
|
|
return NEWLIN;
|
|
case LX1_SEMI:
|
|
d = hgetc();
|
|
if(d == ';')
|
|
return DSEMI;
|
|
else if(d == '&')
|
|
return SEMIAMP;
|
|
else if (d == '|')
|
|
return SEMIBAR;
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
return SEMI;
|
|
case LX1_AMPER:
|
|
d = hgetc();
|
|
if (d == '&')
|
|
return DAMPER;
|
|
else if (d == '!' || d == '|')
|
|
return AMPERBANG;
|
|
else if (d == '>') {
|
|
tokfd = peekfd;
|
|
d = hgetc();
|
|
if (d == '!' || d == '|')
|
|
return OUTANGAMPBANG;
|
|
else if (d == '>') {
|
|
d = hgetc();
|
|
if (d == '!' || d == '|')
|
|
return DOUTANGAMPBANG;
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
return DOUTANGAMP;
|
|
}
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
return AMPOUTANG;
|
|
}
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
return AMPER;
|
|
case LX1_BAR:
|
|
d = hgetc();
|
|
if (d == '|')
|
|
return DBAR;
|
|
else if (d == '&')
|
|
return BARAMP;
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
return BAR;
|
|
case LX1_INPAR:
|
|
d = hgetc();
|
|
if (d == '(') {
|
|
if (infor) {
|
|
dbparens = 1;
|
|
return DINPAR;
|
|
}
|
|
if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr = (char *)
|
|
hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
|
|
switch (cmd_or_math(CS_MATH)) {
|
|
case CMD_OR_MATH_MATH:
|
|
return DINPAR;
|
|
|
|
case CMD_OR_MATH_CMD:
|
|
/*
|
|
* Not math, so we don't return the contents
|
|
* as a string in this case.
|
|
*/
|
|
tokstr = NULL;
|
|
return INPAR;
|
|
|
|
case CMD_OR_MATH_ERR:
|
|
/*
|
|
* LEXFLAGS_ACTIVE means we came from bufferwords(),
|
|
* so we treat as an incomplete math expression
|
|
*/
|
|
if (lexflags & LEXFLAGS_ACTIVE)
|
|
tokstr = dyncat("((", tokstr ? tokstr : "");
|
|
/* fall through */
|
|
|
|
default:
|
|
return LEXERR;
|
|
}
|
|
}
|
|
} else if (d == ')')
|
|
return INOUTPAR;
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
if (!(incond == 1 || incmdpos))
|
|
break;
|
|
return INPAR;
|
|
case LX1_OUTPAR:
|
|
return OUTPAR;
|
|
case LX1_INANG:
|
|
d = hgetc();
|
|
if (d == '(') {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
unpeekfd:
|
|
if(peekfd != -1) {
|
|
hungetc(c);
|
|
c = '0' + peekfd;
|
|
}
|
|
break;
|
|
}
|
|
if (d == '>') {
|
|
peek = INOUTANG;
|
|
} else if (d == '<') {
|
|
int e = hgetc();
|
|
|
|
if (e == '(') {
|
|
hungetc(e);
|
|
hungetc(d);
|
|
peek = INANG;
|
|
} else if (e == '<')
|
|
peek = TRINANG;
|
|
else if (e == '-')
|
|
peek = DINANGDASH;
|
|
else {
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
peek = DINANG;
|
|
}
|
|
} else if (d == '&') {
|
|
peek = INANGAMP;
|
|
} else {
|
|
hungetc(d);
|
|
if(isnumglob())
|
|
goto unpeekfd;
|
|
peek = INANG;
|
|
}
|
|
tokfd = peekfd;
|
|
return peek;
|
|
case LX1_OUTANG:
|
|
d = hgetc();
|
|
if (d == '(') {
|
|
hungetc(d);
|
|
goto unpeekfd;
|
|
} else if (d == '&') {
|
|
d = hgetc();
|
|
if (d == '!' || d == '|')
|
|
peek = OUTANGAMPBANG;
|
|
else {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
peek = OUTANGAMP;
|
|
}
|
|
} else if (d == '!' || d == '|')
|
|
peek = OUTANGBANG;
|
|
else if (d == '>') {
|
|
d = hgetc();
|
|
if (d == '&') {
|
|
d = hgetc();
|
|
if (d == '!' || d == '|')
|
|
peek = DOUTANGAMPBANG;
|
|
else {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
peek = DOUTANGAMP;
|
|
}
|
|
} else if (d == '!' || d == '|')
|
|
peek = DOUTANGBANG;
|
|
else if (d == '(') {
|
|
hungetc(d);
|
|
hungetc('>');
|
|
peek = OUTANG;
|
|
} else {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
peek = DOUTANG;
|
|
if (isset(HISTALLOWCLOBBER))
|
|
hwaddc('|');
|
|
}
|
|
} else {
|
|
hungetc(d);
|
|
lexstop = 0;
|
|
peek = OUTANG;
|
|
if (!incond && isset(HISTALLOWCLOBBER))
|
|
hwaddc('|');
|
|
}
|
|
tokfd = peekfd;
|
|
return peek;
|
|
}
|
|
|
|
/* we've started a string, now get the *
|
|
* rest of it, performing tokenization */
|
|
return gettokstr(c, 0);
|
|
}
|
|
|
|
/*
|
|
* Get the remains of a token string. This has two uses.
|
|
* When called from gettok(), with sub = 0, we have already identified
|
|
* any interesting initial character and want to get the rest of
|
|
* what we now know is a string. However, the string may still include
|
|
* metacharacters and potentially substitutions.
|
|
*
|
|
* When called from parse_subst_string() with sub = 1, we are not
|
|
* fully parsing a command line, merely tokenizing a string.
|
|
* In this case we always add characters to the parsed string
|
|
* unless there is a parse error.
|
|
*/
|
|
|
|
/**/
|
|
static enum lextok
|
|
gettokstr(int c, int sub)
|
|
{
|
|
int bct = 0, pct = 0, brct = 0, fdpar = 0;
|
|
int intpos = 1, in_brace_param = 0;
|
|
int inquote, unmatched = 0;
|
|
enum lextok peek;
|
|
#ifdef DEBUG
|
|
int ocmdsp = cmdsp;
|
|
#endif
|
|
|
|
peek = STRING;
|
|
if (!sub) {
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
|
|
}
|
|
for (;;) {
|
|
int act;
|
|
int e;
|
|
int inbl = inblank(c);
|
|
|
|
if (fdpar && !inbl && c != ')')
|
|
fdpar = 0;
|
|
|
|
if (inbl && !in_brace_param && !pct)
|
|
act = LX2_BREAK;
|
|
else {
|
|
act = lexact2[STOUC(c)];
|
|
c = lextok2[STOUC(c)];
|
|
}
|
|
switch (act) {
|
|
case LX2_BREAK:
|
|
if (!in_brace_param && !sub)
|
|
goto brk;
|
|
break;
|
|
case LX2_META:
|
|
c = hgetc();
|
|
#ifdef DEBUG
|
|
if (lexstop) {
|
|
fputs("BUG: input terminated by Meta\n", stderr);
|
|
fflush(stderr);
|
|
goto brk;
|
|
}
|
|
#endif
|
|
add(Meta);
|
|
break;
|
|
case LX2_OUTPAR:
|
|
if (fdpar) {
|
|
/* this is a single word `( )', treat as INOUTPAR */
|
|
add(c);
|
|
*lexbuf.ptr = '\0';
|
|
return INOUTPAR;
|
|
}
|
|
if ((sub || in_brace_param) && isset(SHGLOB))
|
|
break;
|
|
if (!in_brace_param && !pct--) {
|
|
if (sub) {
|
|
pct = 0;
|
|
break;
|
|
} else
|
|
goto brk;
|
|
}
|
|
c = Outpar;
|
|
break;
|
|
case LX2_BAR:
|
|
if (!pct && !in_brace_param) {
|
|
if (sub)
|
|
break;
|
|
else
|
|
goto brk;
|
|
}
|
|
if (unset(SHGLOB) || (!sub && !in_brace_param))
|
|
c = Bar;
|
|
break;
|
|
case LX2_STRING:
|
|
e = hgetc();
|
|
if (e == '[') {
|
|
cmdpush(CS_MATHSUBST);
|
|
add(String);
|
|
add(Inbrack);
|
|
c = dquote_parse(']', sub);
|
|
cmdpop();
|
|
if (c) {
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
c = Outbrack;
|
|
} else if (e == '(') {
|
|
add(String);
|
|
switch (cmd_or_math_sub()) {
|
|
case CMD_OR_MATH_CMD:
|
|
c = Outpar;
|
|
break;
|
|
|
|
case CMD_OR_MATH_MATH:
|
|
c = Outparmath;
|
|
break;
|
|
|
|
default:
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
} else {
|
|
if (e == '{') {
|
|
add(c);
|
|
c = Inbrace;
|
|
++bct;
|
|
cmdpush(CS_BRACEPAR);
|
|
if (!in_brace_param)
|
|
in_brace_param = bct;
|
|
} else {
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
}
|
|
}
|
|
break;
|
|
case LX2_INBRACK:
|
|
if (!in_brace_param)
|
|
brct++;
|
|
c = Inbrack;
|
|
break;
|
|
case LX2_OUTBRACK:
|
|
if (!in_brace_param)
|
|
brct--;
|
|
if (brct < 0)
|
|
brct = 0;
|
|
c = Outbrack;
|
|
break;
|
|
case LX2_INPAR:
|
|
if (isset(SHGLOB)) {
|
|
if (sub || in_brace_param)
|
|
break;
|
|
if (incasepat && !lexbuf.len)
|
|
return INPAR;
|
|
if (!isset(KSHGLOB) && lexbuf.len)
|
|
goto brk;
|
|
}
|
|
if (!in_brace_param) {
|
|
if (!sub) {
|
|
e = hgetc();
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
/* For command words, parentheses are only
|
|
* special at the start. But now we're tokenising
|
|
* the remaining string. So I don't see what
|
|
* the old incmdpos test here is for.
|
|
* pws 1999/6/8
|
|
*
|
|
* Oh, no.
|
|
* func1( )
|
|
* is a valid function definition in [k]sh. The best
|
|
* thing we can do, without really nasty lookahead tricks,
|
|
* is break if we find a blank after a parenthesis. At
|
|
* least this can't happen inside braces or brackets. We
|
|
* only allow this with SHGLOB (set for both sh and ksh).
|
|
*
|
|
* Things like `print @( |foo)' should still
|
|
* work, because [k]sh don't allow multiple words
|
|
* in a function definition, so we only do this
|
|
* in command position.
|
|
* pws 1999/6/14
|
|
*/
|
|
if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
|
|
!brct && !intpos && incmdpos)) {
|
|
/*
|
|
* Either a () token, or a command word with
|
|
* something suspiciously like a ksh function
|
|
* definition.
|
|
* The current word isn't spellcheckable.
|
|
*/
|
|
nocorrect |= 2;
|
|
goto brk;
|
|
}
|
|
}
|
|
/*
|
|
* This also handles the [k]sh `foo( )' function definition.
|
|
* Maintain a variable fdpar, set as long as a single set of
|
|
* parentheses contains only space. Then if we get to the
|
|
* closing parenthesis and it is still set, we can assume we
|
|
* have a function definition. Only do this at the start of
|
|
* the word, since the (...) must be a separate token.
|
|
*/
|
|
if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
|
|
fdpar = 1;
|
|
}
|
|
c = Inpar;
|
|
break;
|
|
case LX2_INBRACE:
|
|
if (isset(IGNOREBRACES) || sub)
|
|
c = '{';
|
|
else {
|
|
if (!lexbuf.len && incmdpos) {
|
|
add('{');
|
|
*lexbuf.ptr = '\0';
|
|
return STRING;
|
|
}
|
|
if (in_brace_param) {
|
|
cmdpush(CS_BRACE);
|
|
}
|
|
bct++;
|
|
}
|
|
break;
|
|
case LX2_OUTBRACE:
|
|
if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
|
|
break;
|
|
if (!bct)
|
|
break;
|
|
if (in_brace_param) {
|
|
cmdpop();
|
|
}
|
|
if (bct-- == in_brace_param)
|
|
in_brace_param = 0;
|
|
c = Outbrace;
|
|
break;
|
|
case LX2_COMMA:
|
|
if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
|
|
c = Comma;
|
|
break;
|
|
case LX2_OUTANG:
|
|
if (in_brace_param || sub)
|
|
break;
|
|
e = hgetc();
|
|
if (e != '(') {
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
goto brk;
|
|
}
|
|
add(OutangProc);
|
|
if (skipcomm()) {
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
c = Outpar;
|
|
break;
|
|
case LX2_INANG:
|
|
if (isset(SHGLOB) && sub)
|
|
break;
|
|
e = hgetc();
|
|
if (!(in_brace_param || sub) && e == '(') {
|
|
add(Inang);
|
|
if (skipcomm()) {
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
c = Outpar;
|
|
break;
|
|
}
|
|
hungetc(e);
|
|
if(isnumglob()) {
|
|
add(Inang);
|
|
while ((c = hgetc()) != '>')
|
|
add(c);
|
|
c = Outang;
|
|
break;
|
|
}
|
|
lexstop = 0;
|
|
if (in_brace_param || sub)
|
|
break;
|
|
goto brk;
|
|
case LX2_EQUALS:
|
|
if (!sub) {
|
|
if (intpos) {
|
|
e = hgetc();
|
|
if (e != '(') {
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
c = Equals;
|
|
} else {
|
|
add(Equals);
|
|
if (skipcomm()) {
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
c = Outpar;
|
|
}
|
|
} else if (peek != ENVSTRING &&
|
|
(incmdpos || intypeset) && !bct && !brct) {
|
|
char *t = tokstr;
|
|
if (idigit(*t))
|
|
while (++t < lexbuf.ptr && idigit(*t));
|
|
else {
|
|
int sav = *lexbuf.ptr;
|
|
*lexbuf.ptr = '\0';
|
|
t = itype_end(t, IIDENT, 0);
|
|
if (t < lexbuf.ptr) {
|
|
skipparens(Inbrack, Outbrack, &t);
|
|
} else {
|
|
*lexbuf.ptr = sav;
|
|
}
|
|
}
|
|
if (*t == '+')
|
|
t++;
|
|
if (t == lexbuf.ptr) {
|
|
e = hgetc();
|
|
if (e == '(') {
|
|
*lexbuf.ptr = '\0';
|
|
return ENVARRAY;
|
|
}
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
peek = ENVSTRING;
|
|
intpos = 2;
|
|
} else
|
|
c = Equals;
|
|
} else
|
|
c = Equals;
|
|
}
|
|
break;
|
|
case LX2_BKSLASH:
|
|
c = hgetc();
|
|
if (c == '\n') {
|
|
c = hgetc();
|
|
if (!lexstop)
|
|
continue;
|
|
} else {
|
|
add(Bnull);
|
|
if (c == STOUC(Meta)) {
|
|
c = hgetc();
|
|
#ifdef DEBUG
|
|
if (lexstop) {
|
|
fputs("BUG: input terminated by Meta\n", stderr);
|
|
fflush(stderr);
|
|
goto brk;
|
|
}
|
|
#endif
|
|
add(Meta);
|
|
}
|
|
}
|
|
if (lexstop)
|
|
goto brk;
|
|
break;
|
|
case LX2_QUOTE: {
|
|
int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
|
|
|
|
add(Snull);
|
|
cmdpush(CS_QUOTE);
|
|
for (;;) {
|
|
STOPHIST
|
|
while ((c = hgetc()) != '\'' && !lexstop) {
|
|
if (strquote && c == '\\') {
|
|
c = hgetc();
|
|
if (lexstop)
|
|
break;
|
|
/*
|
|
* Mostly we don't need to do anything special
|
|
* with escape backslashes or closing quotes
|
|
* inside $'...'; however in completion we
|
|
* need to be able to strip multiple backslashes
|
|
* neatly.
|
|
*/
|
|
if (c == '\\' || c == '\'')
|
|
add(Bnull);
|
|
else
|
|
add('\\');
|
|
} else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
|
|
if (lexbuf.ptr[-1] == '\\')
|
|
lexbuf.ptr--, lexbuf.len--;
|
|
else
|
|
break;
|
|
}
|
|
add(c);
|
|
}
|
|
ALLOWHIST
|
|
if (c != '\'') {
|
|
unmatched = '\'';
|
|
peek = LEXERR;
|
|
cmdpop();
|
|
goto brk;
|
|
}
|
|
e = hgetc();
|
|
if (e != '\'' || unset(RCQUOTES) || strquote)
|
|
break;
|
|
add(c);
|
|
}
|
|
cmdpop();
|
|
hungetc(e);
|
|
lexstop = 0;
|
|
c = Snull;
|
|
break;
|
|
}
|
|
case LX2_DQUOTE:
|
|
add(Dnull);
|
|
cmdpush(CS_DQUOTE);
|
|
c = dquote_parse('"', sub);
|
|
cmdpop();
|
|
if (c) {
|
|
unmatched = '"';
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
c = Dnull;
|
|
break;
|
|
case LX2_BQUOTE:
|
|
add(Tick);
|
|
cmdpush(CS_BQUOTE);
|
|
SETPARBEGIN
|
|
inquote = 0;
|
|
while ((c = hgetc()) != '`' && !lexstop) {
|
|
if (c == '\\') {
|
|
c = hgetc();
|
|
if (c != '\n') {
|
|
add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
|
|
add(c);
|
|
}
|
|
else if (!sub && isset(CSHJUNKIEQUOTES))
|
|
add(c);
|
|
} else {
|
|
if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
|
|
break;
|
|
}
|
|
add(c);
|
|
if (c == '\'') {
|
|
if ((inquote = !inquote))
|
|
STOPHIST
|
|
else
|
|
ALLOWHIST
|
|
}
|
|
}
|
|
}
|
|
if (inquote)
|
|
ALLOWHIST
|
|
cmdpop();
|
|
if (c != '`') {
|
|
unmatched = '`';
|
|
peek = LEXERR;
|
|
goto brk;
|
|
}
|
|
c = Tick;
|
|
SETPAREND
|
|
break;
|
|
}
|
|
add(c);
|
|
c = hgetc();
|
|
if (intpos)
|
|
intpos--;
|
|
if (lexstop)
|
|
break;
|
|
}
|
|
brk:
|
|
if (errflag) {
|
|
if (in_brace_param) {
|
|
while(bct-- >= in_brace_param)
|
|
cmdpop();
|
|
}
|
|
return LEXERR;
|
|
}
|
|
hungetc(c);
|
|
if (unmatched)
|
|
zerr("unmatched %c", unmatched);
|
|
if (in_brace_param) {
|
|
while(bct-- >= in_brace_param)
|
|
cmdpop();
|
|
zerr("closing brace expected");
|
|
} else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
|
|
peek == STRING && lexbuf.ptr[-1] == '}' &&
|
|
lexbuf.ptr[-2] != Bnull) {
|
|
/* hack to get {foo} command syntax work */
|
|
lexbuf.ptr--;
|
|
lexbuf.len--;
|
|
lexstop = 0;
|
|
hungetc('}');
|
|
}
|
|
*lexbuf.ptr = '\0';
|
|
DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
|
|
return peek;
|
|
}
|
|
|
|
|
|
/*
|
|
* Parse input as if in double quotes.
|
|
* endchar is the end character to expect.
|
|
* sub has got something to do with whether we are doing quoted substitution.
|
|
* Return non-zero for error (character to unget), else zero
|
|
*/
|
|
|
|
/**/
|
|
static int
|
|
dquote_parse(char endchar, int sub)
|
|
{
|
|
int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
|
|
int c;
|
|
int math = endchar == ')' || endchar == ']' || infor;
|
|
int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
|
|
|
|
while (((c = hgetc()) != endchar || bct ||
|
|
(math && ((pct > 0) || (brct > 0))) ||
|
|
intick) && !lexstop) {
|
|
cont:
|
|
switch (c) {
|
|
case '\\':
|
|
c = hgetc();
|
|
if (c != '\n') {
|
|
if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
|
|
c == endchar || c == '`' ||
|
|
(endchar == ']' && (c == '[' || c == ']' ||
|
|
c == '(' || c == ')' ||
|
|
c == '{' || c == '}' ||
|
|
(c == '"' && sub))))
|
|
add(Bnull);
|
|
else {
|
|
/* lexstop is implicitly handled here */
|
|
add('\\');
|
|
goto cont;
|
|
}
|
|
} else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
|
|
continue;
|
|
break;
|
|
case '\n':
|
|
err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
|
|
break;
|
|
case '$':
|
|
if (intick)
|
|
break;
|
|
c = hgetc();
|
|
if (c == '(') {
|
|
add(Qstring);
|
|
switch (cmd_or_math_sub()) {
|
|
case CMD_OR_MATH_CMD:
|
|
c = Outpar;
|
|
break;
|
|
|
|
case CMD_OR_MATH_MATH:
|
|
c = Outparmath;
|
|
break;
|
|
|
|
default:
|
|
err = 1;
|
|
break;
|
|
}
|
|
} else if (c == '[') {
|
|
add(String);
|
|
add(Inbrack);
|
|
cmdpush(CS_MATHSUBST);
|
|
err = dquote_parse(']', sub);
|
|
cmdpop();
|
|
c = Outbrack;
|
|
} else if (c == '{') {
|
|
add(Qstring);
|
|
c = Inbrace;
|
|
cmdpush(CS_BRACEPAR);
|
|
bct++;
|
|
} else if (c == '$')
|
|
add(Qstring);
|
|
else {
|
|
hungetc(c);
|
|
lexstop = 0;
|
|
c = Qstring;
|
|
}
|
|
break;
|
|
case '}':
|
|
if (intick || !bct)
|
|
break;
|
|
c = Outbrace;
|
|
bct--;
|
|
cmdpop();
|
|
break;
|
|
case '`':
|
|
c = Qtick;
|
|
if (intick == 2)
|
|
ALLOWHIST
|
|
if ((intick = !intick)) {
|
|
SETPARBEGIN
|
|
cmdpush(CS_BQUOTE);
|
|
} else {
|
|
SETPAREND
|
|
cmdpop();
|
|
}
|
|
break;
|
|
case '\'':
|
|
if (!intick)
|
|
break;
|
|
if (intick == 1)
|
|
intick = 2, STOPHIST
|
|
else
|
|
intick = 1, ALLOWHIST
|
|
break;
|
|
case '(':
|
|
if (!math || !bct)
|
|
pct++;
|
|
break;
|
|
case ')':
|
|
if (!math || !bct)
|
|
err = (!pct-- && math);
|
|
break;
|
|
case '[':
|
|
if (!math || !bct)
|
|
brct++;
|
|
break;
|
|
case ']':
|
|
if (!math || !bct)
|
|
err = (!brct-- && math);
|
|
break;
|
|
case '"':
|
|
if (intick || (endchar != '"' && !bct))
|
|
break;
|
|
if (bct) {
|
|
add(Dnull);
|
|
cmdpush(CS_DQUOTE);
|
|
err = dquote_parse('"', sub);
|
|
cmdpop();
|
|
c = Dnull;
|
|
} else
|
|
err = 1;
|
|
break;
|
|
}
|
|
if (err || lexstop)
|
|
break;
|
|
add(c);
|
|
}
|
|
if (intick == 2)
|
|
ALLOWHIST
|
|
if (intick) {
|
|
cmdpop();
|
|
}
|
|
while (bct--)
|
|
cmdpop();
|
|
if (lexstop)
|
|
err = intick || endchar || err;
|
|
else if (err == 1) {
|
|
/*
|
|
* TODO: as far as I can see, this hack is used in gettokstr()
|
|
* to hungetc() a character on an error. However, I don't
|
|
* understand what that actually gets us, and we can't guarantee
|
|
* it's a character anyway, because of the previous test.
|
|
*
|
|
* We use the same feature in cmd_or_math where we actually do
|
|
* need to unget if we decide it's really a command substitution.
|
|
* We try to handle the other case by testing for lexstop.
|
|
*/
|
|
err = c;
|
|
}
|
|
if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
|
|
inwhat = IN_MATH;
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Tokenize a string given in s. Parsing is done as in double
|
|
* quotes. This is usually called before singsub().
|
|
*
|
|
* parsestr() is noisier, reporting an error if the parse failed.
|
|
*
|
|
* On entry, *s must point to a string allocated from the stack of
|
|
* exactly the right length, i.e. strlen(*s) + 1, as the string
|
|
* is used as the lexical token string whose memory management
|
|
* demands this. Usually the input string will therefore be
|
|
* the result of an immediately preceding dupstring().
|
|
*/
|
|
|
|
/**/
|
|
mod_export int
|
|
parsestr(char **s)
|
|
{
|
|
int err;
|
|
|
|
if ((err = parsestrnoerr(s))) {
|
|
untokenize(*s);
|
|
if (!(errflag & ERRFLAG_INT)) {
|
|
if (err > 32 && err < 127)
|
|
zerr("parse error near `%c'", err);
|
|
else
|
|
zerr("parse error");
|
|
}
|
|
}
|
|
return err;
|
|
}
|
|
|
|
/**/
|
|
mod_export int
|
|
parsestrnoerr(char **s)
|
|
{
|
|
int l = strlen(*s), err;
|
|
|
|
zcontext_save();
|
|
untokenize(*s);
|
|
inpush(dupstring(*s), 0, NULL);
|
|
strinbeg(0);
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr = *s;
|
|
lexbuf.siz = l + 1;
|
|
err = dquote_parse('\0', 1);
|
|
if (tokstr)
|
|
*s = tokstr;
|
|
*lexbuf.ptr = '\0';
|
|
strinend();
|
|
inpop();
|
|
DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
|
|
zcontext_restore();
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Parse a subscript in string s.
|
|
* sub is passed down to dquote_parse().
|
|
* endchar is the final character.
|
|
* Return the next character, or NULL.
|
|
*/
|
|
/**/
|
|
mod_export char *
|
|
parse_subscript(char *s, int sub, int endchar)
|
|
{
|
|
int l = strlen(s), err;
|
|
char *t;
|
|
|
|
if (!*s || *s == endchar)
|
|
return 0;
|
|
zcontext_save();
|
|
untokenize(t = dupstring(s));
|
|
inpush(t, 0, NULL);
|
|
strinbeg(0);
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr = s;
|
|
lexbuf.siz = l + 1;
|
|
err = dquote_parse(endchar, sub);
|
|
if (err) {
|
|
err = *lexbuf.ptr;
|
|
*lexbuf.ptr = '\0';
|
|
untokenize(s);
|
|
*lexbuf.ptr = err;
|
|
s = NULL;
|
|
} else {
|
|
s = lexbuf.ptr;
|
|
}
|
|
strinend();
|
|
inpop();
|
|
DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
|
|
zcontext_restore();
|
|
return s;
|
|
}
|
|
|
|
/* Tokenize a string given in s. Parsing is done as if s were a normal *
|
|
* command-line argument but it may contain separators. This is used *
|
|
* to parse the right-hand side of ${...%...} substitutions. */
|
|
|
|
/**/
|
|
mod_export int
|
|
parse_subst_string(char *s)
|
|
{
|
|
int c, l = strlen(s), err;
|
|
char *ptr;
|
|
enum lextok ctok;
|
|
|
|
if (!*s || !strcmp(s, nulstring))
|
|
return 0;
|
|
zcontext_save();
|
|
untokenize(s);
|
|
inpush(dupstring(s), 0, NULL);
|
|
strinbeg(0);
|
|
lexbuf.len = 0;
|
|
lexbuf.ptr = tokstr = s;
|
|
lexbuf.siz = l + 1;
|
|
c = hgetc();
|
|
ctok = gettokstr(c, 1);
|
|
err = errflag;
|
|
strinend();
|
|
inpop();
|
|
DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
|
|
zcontext_restore();
|
|
/* Keep any interrupt error status */
|
|
errflag = err | (errflag & ERRFLAG_INT);
|
|
if (ctok == LEXERR) {
|
|
untokenize(s);
|
|
return 1;
|
|
}
|
|
#ifdef DEBUG
|
|
/*
|
|
* Historical note: we used to check here for olen (the value of lexbuf.len
|
|
* before zcontext_restore()) == l, but that's not necessarily the case if
|
|
* we stripped an RCQUOTE.
|
|
*/
|
|
if (ctok != STRING || (errflag && !noerrs)) {
|
|
fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
|
|
errflag ? "errflag" : "ctok != STRING");
|
|
fflush(stderr);
|
|
untokenize(s);
|
|
return 1;
|
|
}
|
|
#endif
|
|
/* Check for $'...' quoting. This needs special handling. */
|
|
for (ptr = s; *ptr; )
|
|
{
|
|
if (*ptr == String && ptr[1] == Snull)
|
|
{
|
|
char *t;
|
|
int len, tlen, diff;
|
|
t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
|
|
len += 2;
|
|
tlen = strlen(t);
|
|
diff = len - tlen;
|
|
/*
|
|
* Yuk.
|
|
* parse_subst_string() currently handles strings in-place.
|
|
* That's not so easy to fix without knowing whether
|
|
* additional memory should come off the heap or
|
|
* otherwise. So we cheat by copying the unquoted string
|
|
* into place, unless it's too long. That's not the
|
|
* normal case, but I'm worried there are pathological
|
|
* cases with converting metafied multibyte strings.
|
|
* If someone can prove there aren't I will be very happy.
|
|
*/
|
|
if (diff < 0) {
|
|
DPUTS(1, "$'...' subst too long: fix get_parse_string()");
|
|
return 1;
|
|
}
|
|
memcpy(ptr, t, tlen);
|
|
ptr += tlen;
|
|
if (diff > 0) {
|
|
char *dptr = ptr;
|
|
char *sptr = ptr + diff;
|
|
while ((*dptr++ = *sptr++))
|
|
;
|
|
}
|
|
} else
|
|
ptr++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Called below to report word positions. */
|
|
|
|
/**/
|
|
static void
|
|
gotword(void)
|
|
{
|
|
we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
|
|
if (zlemetacs <= we) {
|
|
wb = zlemetall - wordbeg + addedx;
|
|
lexflags = 0;
|
|
}
|
|
}
|
|
|
|
/* Check if current lex text matches an alias: 1 if so, else 0 */
|
|
|
|
static int
|
|
checkalias(void)
|
|
{
|
|
Alias an;
|
|
|
|
if (!zshlextext)
|
|
return 0;
|
|
|
|
if (!noaliases && isset(ALIASESOPT) &&
|
|
(!isset(POSIXALIASES) ||
|
|
(tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) {
|
|
char *suf;
|
|
|
|
an = (Alias) aliastab->getnode(aliastab, zshlextext);
|
|
if (an && !an->inuse &&
|
|
((an->node.flags & ALIAS_GLOBAL) ||
|
|
(incmdpos && tok == STRING) || inalmore)) {
|
|
if (!lexstop) {
|
|
/*
|
|
* Tokens that don't require a space after, get one,
|
|
* because they are treated as if preceded by one.
|
|
*/
|
|
int c = hgetc();
|
|
hungetc(c);
|
|
if (!iblank(c))
|
|
inpush(" ", INP_ALIAS, 0);
|
|
}
|
|
inpush(an->text, INP_ALIAS, an);
|
|
if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
|
|
aliasspaceflag = 1;
|
|
lexstop = 0;
|
|
return 1;
|
|
}
|
|
if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
|
|
suf > zshlextext && suf[-1] != Meta &&
|
|
(an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
|
|
!an->inuse && incmdpos) {
|
|
inpush(dupstring(zshlextext), INP_ALIAS, NULL);
|
|
inpush(" ", INP_ALIAS, NULL);
|
|
inpush(an->text, INP_ALIAS, an);
|
|
lexstop = 0;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* expand aliases and reserved words */
|
|
|
|
/**/
|
|
int
|
|
exalias(void)
|
|
{
|
|
Reswd rw;
|
|
|
|
hwend();
|
|
if (interact && isset(SHINSTDIN) && !strin && !incasepat &&
|
|
tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
|
|
(isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
|
|
spckword(&tokstr, 1, incmdpos, 1);
|
|
|
|
if (!tokstr) {
|
|
zshlextext = tokstrings[tok];
|
|
|
|
if (tok == NEWLIN)
|
|
return 0;
|
|
return checkalias();
|
|
} else {
|
|
VARARR(char, copy, (strlen(tokstr) + 1));
|
|
|
|
if (has_token(tokstr)) {
|
|
char *p, *t;
|
|
|
|
zshlextext = p = copy;
|
|
for (t = tokstr;
|
|
(*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
|
|
} else
|
|
zshlextext = tokstr;
|
|
|
|
if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
|
|
int zp = lexflags;
|
|
|
|
gotword();
|
|
if ((zp & LEXFLAGS_ZLE) && !lexflags) {
|
|
if (zshlextext == copy)
|
|
zshlextext = tokstr;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (tok == STRING) {
|
|
/* Check for an alias */
|
|
if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) {
|
|
if (zshlextext == copy)
|
|
zshlextext = tokstr;
|
|
return 1;
|
|
}
|
|
|
|
/* Then check for a reserved word */
|
|
if ((incmdpos ||
|
|
(unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
|
|
zshlextext[0] == '}' && !zshlextext[1])) &&
|
|
(rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
|
|
tok = rw->token;
|
|
if (tok == DINBRACK)
|
|
incond = 1;
|
|
} else if (incond && !strcmp(zshlextext, "]]")) {
|
|
tok = DOUTBRACK;
|
|
incond = 0;
|
|
} else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
|
|
tok = BANG;
|
|
}
|
|
inalmore = 0;
|
|
if (zshlextext == copy)
|
|
zshlextext = tokstr;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**/
|
|
void
|
|
zshlex_raw_add(int c)
|
|
{
|
|
if (!lex_add_raw)
|
|
return;
|
|
|
|
*lexbuf_raw.ptr++ = c;
|
|
if (lexbuf_raw.siz == ++lexbuf_raw.len) {
|
|
int newbsiz = lexbuf_raw.siz * 2;
|
|
|
|
tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
|
|
lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
|
|
memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
|
|
lexbuf_raw.siz = newbsiz;
|
|
}
|
|
}
|
|
|
|
/**/
|
|
void
|
|
zshlex_raw_back(void)
|
|
{
|
|
if (!lex_add_raw)
|
|
return;
|
|
lexbuf_raw.ptr--;
|
|
lexbuf_raw.len--;
|
|
}
|
|
|
|
/**/
|
|
int
|
|
zshlex_raw_mark(int offset)
|
|
{
|
|
if (!lex_add_raw)
|
|
return 0;
|
|
return lexbuf_raw.len + offset;
|
|
}
|
|
|
|
/**/
|
|
void
|
|
zshlex_raw_back_to_mark(int mark)
|
|
{
|
|
if (!lex_add_raw)
|
|
return;
|
|
lexbuf_raw.ptr = tokstr_raw + mark;
|
|
lexbuf_raw.len = mark;
|
|
}
|
|
|
|
/*
|
|
* Skip (...) for command-style substitutions: $(...), <(...), >(...)
|
|
*
|
|
* In order to ensure we don't stop at closing parentheses with
|
|
* some other syntactic significance, we'll parse the input until
|
|
* we find an unmatched closing parenthesis. However, we'll throw
|
|
* away the result of the parsing and just keep the string we've built
|
|
* up on the way.
|
|
*/
|
|
|
|
/**/
|
|
static int
|
|
skipcomm(void)
|
|
{
|
|
#ifdef ZSH_OLD_SKIPCOMM
|
|
int pct = 1, c, start = 1;
|
|
|
|
cmdpush(CS_CMDSUBST);
|
|
SETPARBEGIN
|
|
c = Inpar;
|
|
do {
|
|
int iswhite;
|
|
add(c);
|
|
c = hgetc();
|
|
if (itok(c) || lexstop)
|
|
break;
|
|
iswhite = inblank(c);
|
|
switch (c) {
|
|
case '(':
|
|
pct++;
|
|
break;
|
|
case ')':
|
|
pct--;
|
|
break;
|
|
case '\\':
|
|
add(c);
|
|
c = hgetc();
|
|
break;
|
|
case '\'': {
|
|
int strquote = lexbuf.ptr[-1] == '$';
|
|
add(c);
|
|
STOPHIST
|
|
while ((c = hgetc()) != '\'' && !lexstop) {
|
|
if (c == '\\' && strquote) {
|
|
add(c);
|
|
c = hgetc();
|
|
}
|
|
add(c);
|
|
}
|
|
ALLOWHIST
|
|
break;
|
|
}
|
|
case '\"':
|
|
add(c);
|
|
while ((c = hgetc()) != '\"' && !lexstop)
|
|
if (c == '\\') {
|
|
add(c);
|
|
add(hgetc());
|
|
} else
|
|
add(c);
|
|
break;
|
|
case '`':
|
|
add(c);
|
|
while ((c = hgetc()) != '`' && !lexstop)
|
|
if (c == '\\')
|
|
add(c), add(hgetc());
|
|
else
|
|
add(c);
|
|
break;
|
|
case '#':
|
|
if (start) {
|
|
add(c);
|
|
while ((c = hgetc()) != '\n' && !lexstop)
|
|
add(c);
|
|
iswhite = 1;
|
|
}
|
|
break;
|
|
}
|
|
start = iswhite;
|
|
}
|
|
while (pct);
|
|
if (!lexstop)
|
|
SETPAREND
|
|
cmdpop();
|
|
return lexstop;
|
|
#else
|
|
char *new_tokstr;
|
|
int new_lexstop, new_lex_add_raw;
|
|
int save_infor = infor;
|
|
struct lexbufstate new_lexbuf;
|
|
|
|
infor = 0;
|
|
cmdpush(CS_CMDSUBST);
|
|
SETPARBEGIN
|
|
add(Inpar);
|
|
|
|
new_lex_add_raw = lex_add_raw + 1;
|
|
if (!lex_add_raw) {
|
|
/*
|
|
* We'll combine the string so far with the input
|
|
* read in for the command substitution. To do this
|
|
* we'll just propagate the current tokstr etc. as the
|
|
* variables used for adding raw input, and
|
|
* ensure we swap those for the real tokstr etc. at the end.
|
|
*
|
|
* However, we need to save and restore the rest of the
|
|
* lexical and parse state as we're effectively parsing
|
|
* an internal string. Because we're still parsing it from
|
|
* the original input source (we have to --- we don't know
|
|
* when to stop inputting it otherwise and can't rely on
|
|
* the input being recoverable until we've read it) we need
|
|
* to keep the same history context.
|
|
*/
|
|
new_tokstr = tokstr;
|
|
new_lexbuf = lexbuf;
|
|
|
|
/*
|
|
* If we're expanding an alias at this point, we need the whole
|
|
* remaining text as part of the string for the command in
|
|
* parentheses, so don't backtrack. This is different from the
|
|
* usual case where the alias is fully within the command, where
|
|
* we want the unexpanded text so that it will be expanded
|
|
* again when the command in the parentheses is executed.
|
|
*
|
|
* I never wanted to be a software engineer, you know.
|
|
*/
|
|
if (inbufflags & INP_ALIAS)
|
|
inbufflags |= INP_RAW_KEEP;
|
|
zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
|
|
hist_in_word(1);
|
|
} else {
|
|
/*
|
|
* Set up for nested command subsitution, however
|
|
* we don't actually need the string until we get
|
|
* back to the top level and recover the lot.
|
|
* The $() body just appears empty.
|
|
*
|
|
* We do need to propagate the raw variables which would
|
|
* otherwise by cleared, though.
|
|
*/
|
|
new_tokstr = tokstr_raw;
|
|
new_lexbuf = lexbuf_raw;
|
|
|
|
zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
|
|
}
|
|
tokstr_raw = new_tokstr;
|
|
lexbuf_raw = new_lexbuf;
|
|
lex_add_raw = new_lex_add_raw;
|
|
/*
|
|
* Don't do any ZLE specials down here: they're only needed
|
|
* when we return the string from the recursive parse.
|
|
* (TBD: this probably means we should be initialising lexflags
|
|
* more consistently.)
|
|
*
|
|
* Note that in that case we're still using the ZLE line reading
|
|
* function at the history layer --- this is consistent with the
|
|
* intention of maintaining the history and input layers across
|
|
* the recursive parsing.
|
|
*/
|
|
lexflags &= ~LEXFLAGS_ZLE;
|
|
dbparens = 0; /* restored by zcontext_restore_partial() */
|
|
|
|
if (!parse_event(OUTPAR) || tok != OUTPAR)
|
|
lexstop = 1;
|
|
/* Outpar lexical token gets added in caller if present */
|
|
|
|
/*
|
|
* We're going to keep the full raw input string
|
|
* as the current token string after popping the stack.
|
|
*/
|
|
new_tokstr = tokstr_raw;
|
|
new_lexbuf = lexbuf_raw;
|
|
/*
|
|
* We're also going to propagate the lexical state:
|
|
* if we couldn't parse the command substitution we
|
|
* can't continue.
|
|
*/
|
|
new_lexstop = lexstop;
|
|
|
|
zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
|
|
|
|
if (lex_add_raw) {
|
|
/*
|
|
* Keep going, so retain the raw variables.
|
|
*/
|
|
tokstr_raw = new_tokstr;
|
|
lexbuf_raw = new_lexbuf;
|
|
} else {
|
|
if (!new_lexstop) {
|
|
/* Ignore the ')' added on input */
|
|
new_lexbuf.len--;
|
|
*--new_lexbuf.ptr = '\0';
|
|
}
|
|
|
|
/*
|
|
* Convince the rest of lex.c we were examining a string
|
|
* all along.
|
|
*/
|
|
tokstr = new_tokstr;
|
|
lexbuf = new_lexbuf;
|
|
lexstop = new_lexstop;
|
|
hist_in_word(0);
|
|
}
|
|
|
|
if (!lexstop)
|
|
SETPAREND
|
|
cmdpop();
|
|
infor = save_infor;
|
|
|
|
return lexstop;
|
|
#endif
|
|
}
|