mirror of
git://git.code.sf.net/p/zsh/code
synced 2025-10-23 16:40:24 +02:00
23375: Phil Pennock: =~, zsh/regex etc. etc.
This commit is contained in:
parent
eb4c3d4bf2
commit
7f03c3d851
14 changed files with 392 additions and 51 deletions
|
@ -3,7 +3,7 @@
|
|||
*
|
||||
* This file is part of zsh, the Z shell.
|
||||
*
|
||||
* Copyright (c) 2001, 2002, 2003, 2004 Clint Adams
|
||||
* Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams
|
||||
* All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
|
@ -40,6 +40,37 @@
|
|||
static pcre *pcre_pattern;
|
||||
static pcre_extra *pcre_hints;
|
||||
|
||||
/**/
|
||||
static int
|
||||
zpcre_utf8_enabled(void)
|
||||
{
|
||||
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||
static int have_utf8_pcre = -1;
|
||||
|
||||
/* value can toggle based on MULTIBYTE, so don't
|
||||
* be too eager with caching */
|
||||
if (have_utf8_pcre < -1)
|
||||
return 0;
|
||||
|
||||
if (!isset(MULTIBYTE))
|
||||
return 0;
|
||||
|
||||
if ((have_utf8_pcre == -1) &&
|
||||
(!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
|
||||
|
||||
if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
|
||||
have_utf8_pcre = -2; /* erk, failed to ask */
|
||||
}
|
||||
|
||||
if (have_utf8_pcre < 0)
|
||||
return 0;
|
||||
return have_utf8_pcre;
|
||||
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**/
|
||||
static int
|
||||
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
|
||||
|
@ -52,8 +83,14 @@ bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
|
|||
if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
|
||||
if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
|
||||
|
||||
if (zpcre_utf8_enabled())
|
||||
pcre_opts |= PCRE_UTF8;
|
||||
|
||||
pcre_hints = NULL; /* Is this necessary? */
|
||||
|
||||
if (pcre_pattern)
|
||||
pcre_free(pcre_pattern);
|
||||
|
||||
pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL);
|
||||
|
||||
if (pcre_pattern == NULL)
|
||||
|
@ -100,37 +137,52 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
|
|||
|
||||
/**/
|
||||
static int
|
||||
zpcre_get_substrings(char *arg, int *ovec, int ret, char *receptacle)
|
||||
zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar, int matchedinarr)
|
||||
{
|
||||
char **captures, **matches;
|
||||
char **captures, **match_all, **matches;
|
||||
int capture_start = 1;
|
||||
|
||||
if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
|
||||
|
||||
matches = zarrdup(&captures[1]); /* first one would be entire string */
|
||||
if (receptacle == NULL)
|
||||
setaparam("match", matches);
|
||||
else
|
||||
setaparam(receptacle, matches);
|
||||
|
||||
pcre_free_substring_list((const char **)captures);
|
||||
}
|
||||
if (matchedinarr)
|
||||
capture_start = 0;
|
||||
if (matchvar == NULL)
|
||||
matchvar = "MATCH";
|
||||
if (substravar == NULL)
|
||||
substravar = "match";
|
||||
|
||||
return 0;
|
||||
/* captures[0] will be entire matched string, [1] first substring */
|
||||
if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
|
||||
match_all = ztrdup(captures[0]);
|
||||
setsparam(matchvar, match_all);
|
||||
matches = zarrdup(&captures[capture_start]);
|
||||
setaparam(substravar, matches);
|
||||
pcre_free_substring_list((const char **)captures);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
static int
|
||||
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||
{
|
||||
int ret, capcount, *ovec, ovecsize;
|
||||
int ret, capcount, *ovec, ovecsize, c;
|
||||
char *matched_portion = NULL;
|
||||
char *receptacle = NULL;
|
||||
int return_value = 1;
|
||||
|
||||
if (pcre_pattern == NULL) {
|
||||
zwarnnam(nam, "no pattern has been compiled");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(OPT_ISSET(ops,'a')) {
|
||||
receptacle = *args++;
|
||||
if(!*args) {
|
||||
zwarnnam(nam, "not enough arguments");
|
||||
return 1;
|
||||
}
|
||||
if(OPT_HASARG(ops,c='a')) {
|
||||
receptacle = OPT_ARG(ops,c);
|
||||
}
|
||||
if(OPT_HASARG(ops,c='v')) {
|
||||
matched_portion = OPT_ARG(ops,c);
|
||||
}
|
||||
if(!*args) {
|
||||
zwarnnam(nam, "not enough arguments");
|
||||
}
|
||||
|
||||
if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
|
||||
|
@ -144,18 +196,20 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
|||
|
||||
ret = pcre_exec(pcre_pattern, pcre_hints, *args, strlen(*args), 0, 0, ovec, ovecsize);
|
||||
|
||||
if (ret==0) return 0;
|
||||
else if (ret==PCRE_ERROR_NOMATCH) return 1; /* no match */
|
||||
if (ret==0) return_value = 0;
|
||||
else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
|
||||
else if (ret>0) {
|
||||
zpcre_get_substrings(*args, ovec, ret, receptacle);
|
||||
return 0;
|
||||
zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, 0);
|
||||
return_value = 0;
|
||||
}
|
||||
else {
|
||||
zwarnnam(nam, "error in pcre_exec");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 1;
|
||||
if (ovec)
|
||||
zfree(ovec, ovecsize*sizeof(int));
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
/**/
|
||||
|
@ -164,33 +218,63 @@ cond_pcre_match(char **a, int id)
|
|||
{
|
||||
pcre *pcre_pat;
|
||||
const char *pcre_err;
|
||||
char *lhstr, *rhre;
|
||||
char *lhstr, *rhre, *avar=NULL;
|
||||
int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
|
||||
int return_value = 0;
|
||||
|
||||
if (zpcre_utf8_enabled())
|
||||
pcre_opts |= PCRE_UTF8;
|
||||
|
||||
lhstr = cond_str(a,0,0);
|
||||
rhre = cond_str(a,1,0);
|
||||
pcre_pat = ov = NULL;
|
||||
|
||||
if (isset(BASHREMATCH))
|
||||
avar="BASH_REMATCH";
|
||||
|
||||
switch(id) {
|
||||
case CPCRE_PLAIN:
|
||||
pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
|
||||
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
|
||||
ovsize = (capcnt+1)*3;
|
||||
ov = zalloc(ovsize*sizeof(int));
|
||||
r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
|
||||
if (r==0) return 1;
|
||||
pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
|
||||
if (pcre_pat == NULL) {
|
||||
zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
|
||||
break;
|
||||
}
|
||||
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
|
||||
ovsize = (capcnt+1)*3;
|
||||
ov = zalloc(ovsize*sizeof(int));
|
||||
r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
|
||||
/* r < 0 => error; r==0 match but not enough size in ov
|
||||
* r > 0 => (r-1) substrings found; r==1 => no substrings
|
||||
*/
|
||||
if (r==0) {
|
||||
zwarn("reportable zsh problem: pcre_exec() returned 0");
|
||||
return_value = 1;
|
||||
break;
|
||||
}
|
||||
else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */
|
||||
else if (r<0) {
|
||||
zwarn("pcre_exec() error: %d", r);
|
||||
break;
|
||||
}
|
||||
else if (r>0) {
|
||||
zpcre_get_substrings(lhstr, ov, r, NULL);
|
||||
return 1;
|
||||
zpcre_get_substrings(lhstr, ov, r, NULL, avar, isset(BASHREMATCH));
|
||||
return_value = 1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (pcre_pat)
|
||||
pcre_free(pcre_pat);
|
||||
if (ov)
|
||||
zfree(ov, ovsize*sizeof(int));
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
static struct conddef cotab[] = {
|
||||
CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
|
||||
/* CONDDEF can register =~ but it won't be found */
|
||||
};
|
||||
|
||||
/**/
|
||||
|
@ -206,7 +290,7 @@ static struct conddef cotab[] = {
|
|||
static struct builtin bintab[] = {
|
||||
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimx", NULL),
|
||||
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL),
|
||||
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 2, 0, "a", NULL)
|
||||
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:", NULL)
|
||||
};
|
||||
|
||||
|
||||
|
|
161
Src/Modules/regex.c
Normal file
161
Src/Modules/regex.c
Normal file
|
@ -0,0 +1,161 @@
|
|||
/*
|
||||
* regex.c
|
||||
*
|
||||
* This file is part of zsh, the Z shell.
|
||||
*
|
||||
* Copyright (c) 2007 Phil Pennock
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and to distribute modified versions of this software for any
|
||||
* purpose, provided that the above copyright notice and the following
|
||||
* two paragraphs appear in all copies of this software.
|
||||
*
|
||||
* In no event shall Phil Pennock or the Zsh Development Group be liable
|
||||
* to any party for direct, indirect, special, incidental, or consequential
|
||||
* damages arising out of the use of this software and its documentation,
|
||||
* even if Phil Pennock and the Zsh Development Group have been advised of
|
||||
* the possibility of such damage.
|
||||
*
|
||||
* Phil Pennock and the Zsh Development Group specifically disclaim any
|
||||
* warranties, including, but not limited to, the implied warranties of
|
||||
* merchantability and fitness for a particular purpose. The software
|
||||
* provided hereunder is on an "as is" basis, and Phil Pennock and the
|
||||
* Zsh Development Group have no obligation to provide maintenance,
|
||||
* support, updates, enhancements, or modifications.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "regex.mdh"
|
||||
#include "regex.pro"
|
||||
|
||||
#include <regex.h>
|
||||
|
||||
/* we default to a vaguely modern syntax and set of capabilities */
|
||||
#define ZREGEX_EXTENDED 0
|
||||
/* if you want Basic syntax, make it an alternative options */
|
||||
|
||||
static void
|
||||
zregex_regerrwarn(int r, regex_t *re, char *msg)
|
||||
{
|
||||
char *errbuf;
|
||||
size_t errbufsz;
|
||||
|
||||
errbufsz = regerror(r, re, NULL, 0);
|
||||
errbuf = zalloc(errbufsz*sizeof(char));
|
||||
regerror(r, re, errbuf, errbufsz);
|
||||
zwarn("%s: %s", msg, errbuf);
|
||||
zfree(errbuf, errbufsz);
|
||||
}
|
||||
|
||||
/**/
|
||||
static int
|
||||
zcond_regex_match(char **a, int id)
|
||||
{
|
||||
regex_t re;
|
||||
regmatch_t *m, *matches = NULL;
|
||||
size_t matchessz;
|
||||
char *lhstr, *rhre, *s, **arr, **x;
|
||||
int r, n, return_value, rcflags, reflags, nelem, start;
|
||||
|
||||
lhstr = cond_str(a,0,0);
|
||||
rhre = cond_str(a,1,0);
|
||||
rcflags = reflags = 0;
|
||||
return_value = 0; /* 1 => matched successfully */
|
||||
|
||||
switch(id) {
|
||||
case ZREGEX_EXTENDED:
|
||||
rcflags |= REG_EXTENDED;
|
||||
if (!isset(CASEMATCH))
|
||||
rcflags |= REG_ICASE;
|
||||
r = regcomp(&re, rhre, rcflags);
|
||||
if (r) {
|
||||
zregex_regerrwarn(r, &re, "failed to compile regex");
|
||||
break;
|
||||
}
|
||||
/* re.re_nsub is number of parenthesized groups, we also need
|
||||
* 1 for the 0 offset, which is the entire matched portion
|
||||
*/
|
||||
if (re.re_nsub < 0) {
|
||||
zwarn("INTERNAL ERROR: regcomp() returned "
|
||||
"negative subpattern count %d", re.re_nsub);
|
||||
break;
|
||||
}
|
||||
matchessz = (re.re_nsub + 1) * sizeof(regmatch_t);
|
||||
matches = zalloc(matchessz);
|
||||
r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags);
|
||||
if (r == REG_NOMATCH) /**/;
|
||||
else if (r == 0) {
|
||||
return_value = 1;
|
||||
if (isset(BASHREMATCH)) {
|
||||
start = 0;
|
||||
nelem = re.re_nsub + 1;
|
||||
} else {
|
||||
start = 1;
|
||||
nelem = re.re_nsub;
|
||||
}
|
||||
arr = NULL; /* bogus gcc warning of used uninitialised */
|
||||
/* entire matched portion + re_nsub substrings + NULL */
|
||||
if (nelem) {
|
||||
arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1));
|
||||
for (m = matches + start, n = start; n <= re.re_nsub; ++n, ++m, ++x) {
|
||||
*x = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
|
||||
}
|
||||
*x = NULL;
|
||||
}
|
||||
if (isset(BASHREMATCH)) {
|
||||
setaparam("BASH_REMATCH", arr);
|
||||
} else {
|
||||
m = matches;
|
||||
s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
|
||||
setsparam("MATCH", s);
|
||||
if (nelem)
|
||||
setaparam("match", arr);
|
||||
}
|
||||
}
|
||||
else zregex_regerrwarn(r, &re, "regex matching error");
|
||||
break;
|
||||
default:
|
||||
DPUTS(1, "bad regex option");
|
||||
break;
|
||||
}
|
||||
|
||||
if (matches)
|
||||
zfree(matches, matchessz);
|
||||
regfree(&re);
|
||||
return return_value;
|
||||
}
|
||||
|
||||
static struct conddef cotab[] = {
|
||||
CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED)
|
||||
};
|
||||
|
||||
/**/
|
||||
int
|
||||
setup_(UNUSED(Module m))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
boot_(Module m)
|
||||
{
|
||||
return !addconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
cleanup_(Module m)
|
||||
{
|
||||
deleteconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
finish_(UNUSED(Module m))
|
||||
{
|
||||
return 0;
|
||||
}
|
10
Src/Modules/regex.mdd
Normal file
10
Src/Modules/regex.mdd
Normal file
|
@ -0,0 +1,10 @@
|
|||
name=zsh/regex
|
||||
link=`if test x$ac_cv_func_regcomp = xyes && \
|
||||
test x$ac_cv_func_regexec = xyes && \
|
||||
test x$ac_cv_func_regerror = xyes && \
|
||||
test x$ac_cv_func_regfree = xyes; then echo dynamic; else echo no; fi`
|
||||
load=no
|
||||
|
||||
autobins=""
|
||||
|
||||
objects="regex.o"
|
Loading…
Add table
Add a link
Reference in a new issue