1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2025-10-23 16:40:24 +02:00

23375: Phil Pennock: =~, zsh/regex etc. etc.

This commit is contained in:
Peter Stephenson 2007-05-01 22:05:03 +00:00
parent eb4c3d4bf2
commit 7f03c3d851
14 changed files with 392 additions and 51 deletions

View file

@ -3,7 +3,7 @@
*
* This file is part of zsh, the Z shell.
*
* Copyright (c) 2001, 2002, 2003, 2004 Clint Adams
* Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams
* All rights reserved.
*
* Permission is hereby granted, without written agreement and without
@ -40,6 +40,37 @@
static pcre *pcre_pattern;
static pcre_extra *pcre_hints;
/**/
static int
zpcre_utf8_enabled(void)
{
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
static int have_utf8_pcre = -1;
/* value can toggle based on MULTIBYTE, so don't
* be too eager with caching */
if (have_utf8_pcre < -1)
return 0;
if (!isset(MULTIBYTE))
return 0;
if ((have_utf8_pcre == -1) &&
(!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
have_utf8_pcre = -2; /* erk, failed to ask */
}
if (have_utf8_pcre < 0)
return 0;
return have_utf8_pcre;
#else
return 0;
#endif
}
/**/
static int
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
@ -52,8 +83,14 @@ bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
if (zpcre_utf8_enabled())
pcre_opts |= PCRE_UTF8;
pcre_hints = NULL; /* Is this necessary? */
if (pcre_pattern)
pcre_free(pcre_pattern);
pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL);
if (pcre_pattern == NULL)
@ -100,37 +137,52 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
/**/
static int
zpcre_get_substrings(char *arg, int *ovec, int ret, char *receptacle)
zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar, int matchedinarr)
{
char **captures, **matches;
char **captures, **match_all, **matches;
int capture_start = 1;
if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
matches = zarrdup(&captures[1]); /* first one would be entire string */
if (receptacle == NULL)
setaparam("match", matches);
else
setaparam(receptacle, matches);
pcre_free_substring_list((const char **)captures);
}
if (matchedinarr)
capture_start = 0;
if (matchvar == NULL)
matchvar = "MATCH";
if (substravar == NULL)
substravar = "match";
return 0;
/* captures[0] will be entire matched string, [1] first substring */
if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
match_all = ztrdup(captures[0]);
setsparam(matchvar, match_all);
matches = zarrdup(&captures[capture_start]);
setaparam(substravar, matches);
pcre_free_substring_list((const char **)captures);
}
return 0;
}
/**/
static int
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
{
int ret, capcount, *ovec, ovecsize;
int ret, capcount, *ovec, ovecsize, c;
char *matched_portion = NULL;
char *receptacle = NULL;
int return_value = 1;
if (pcre_pattern == NULL) {
zwarnnam(nam, "no pattern has been compiled");
return 1;
}
if(OPT_ISSET(ops,'a')) {
receptacle = *args++;
if(!*args) {
zwarnnam(nam, "not enough arguments");
return 1;
}
if(OPT_HASARG(ops,c='a')) {
receptacle = OPT_ARG(ops,c);
}
if(OPT_HASARG(ops,c='v')) {
matched_portion = OPT_ARG(ops,c);
}
if(!*args) {
zwarnnam(nam, "not enough arguments");
}
if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
@ -144,18 +196,20 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
ret = pcre_exec(pcre_pattern, pcre_hints, *args, strlen(*args), 0, 0, ovec, ovecsize);
if (ret==0) return 0;
else if (ret==PCRE_ERROR_NOMATCH) return 1; /* no match */
if (ret==0) return_value = 0;
else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
zpcre_get_substrings(*args, ovec, ret, receptacle);
return 0;
zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, 0);
return_value = 0;
}
else {
zwarnnam(nam, "error in pcre_exec");
return 1;
}
return 1;
if (ovec)
zfree(ovec, ovecsize*sizeof(int));
return return_value;
}
/**/
@ -164,33 +218,63 @@ cond_pcre_match(char **a, int id)
{
pcre *pcre_pat;
const char *pcre_err;
char *lhstr, *rhre;
char *lhstr, *rhre, *avar=NULL;
int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
int return_value = 0;
if (zpcre_utf8_enabled())
pcre_opts |= PCRE_UTF8;
lhstr = cond_str(a,0,0);
rhre = cond_str(a,1,0);
pcre_pat = ov = NULL;
if (isset(BASHREMATCH))
avar="BASH_REMATCH";
switch(id) {
case CPCRE_PLAIN:
pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
ovsize = (capcnt+1)*3;
ov = zalloc(ovsize*sizeof(int));
r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
if (r==0) return 1;
pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
if (pcre_pat == NULL) {
zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
break;
}
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
ovsize = (capcnt+1)*3;
ov = zalloc(ovsize*sizeof(int));
r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
/* r < 0 => error; r==0 match but not enough size in ov
* r > 0 => (r-1) substrings found; r==1 => no substrings
*/
if (r==0) {
zwarn("reportable zsh problem: pcre_exec() returned 0");
return_value = 1;
break;
}
else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */
else if (r<0) {
zwarn("pcre_exec() error: %d", r);
break;
}
else if (r>0) {
zpcre_get_substrings(lhstr, ov, r, NULL);
return 1;
zpcre_get_substrings(lhstr, ov, r, NULL, avar, isset(BASHREMATCH));
return_value = 1;
break;
}
break;
}
return 0;
if (pcre_pat)
pcre_free(pcre_pat);
if (ov)
zfree(ov, ovsize*sizeof(int));
return return_value;
}
static struct conddef cotab[] = {
CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
/* CONDDEF can register =~ but it won't be found */
};
/**/
@ -206,7 +290,7 @@ static struct conddef cotab[] = {
static struct builtin bintab[] = {
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimx", NULL),
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL),
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 2, 0, "a", NULL)
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:", NULL)
};

161
Src/Modules/regex.c Normal file
View file

@ -0,0 +1,161 @@
/*
* regex.c
*
* This file is part of zsh, the Z shell.
*
* Copyright (c) 2007 Phil Pennock
* All Rights Reserved.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and to distribute modified versions of this software for any
* purpose, provided that the above copyright notice and the following
* two paragraphs appear in all copies of this software.
*
* In no event shall Phil Pennock or the Zsh Development Group be liable
* to any party for direct, indirect, special, incidental, or consequential
* damages arising out of the use of this software and its documentation,
* even if Phil Pennock and the Zsh Development Group have been advised of
* the possibility of such damage.
*
* Phil Pennock and the Zsh Development Group specifically disclaim any
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose. The software
* provided hereunder is on an "as is" basis, and Phil Pennock and the
* Zsh Development Group have no obligation to provide maintenance,
* support, updates, enhancements, or modifications.
*
*/
#include "regex.mdh"
#include "regex.pro"
#include <regex.h>
/* we default to a vaguely modern syntax and set of capabilities */
#define ZREGEX_EXTENDED 0
/* if you want Basic syntax, make it an alternative options */
static void
zregex_regerrwarn(int r, regex_t *re, char *msg)
{
char *errbuf;
size_t errbufsz;
errbufsz = regerror(r, re, NULL, 0);
errbuf = zalloc(errbufsz*sizeof(char));
regerror(r, re, errbuf, errbufsz);
zwarn("%s: %s", msg, errbuf);
zfree(errbuf, errbufsz);
}
/**/
static int
zcond_regex_match(char **a, int id)
{
regex_t re;
regmatch_t *m, *matches = NULL;
size_t matchessz;
char *lhstr, *rhre, *s, **arr, **x;
int r, n, return_value, rcflags, reflags, nelem, start;
lhstr = cond_str(a,0,0);
rhre = cond_str(a,1,0);
rcflags = reflags = 0;
return_value = 0; /* 1 => matched successfully */
switch(id) {
case ZREGEX_EXTENDED:
rcflags |= REG_EXTENDED;
if (!isset(CASEMATCH))
rcflags |= REG_ICASE;
r = regcomp(&re, rhre, rcflags);
if (r) {
zregex_regerrwarn(r, &re, "failed to compile regex");
break;
}
/* re.re_nsub is number of parenthesized groups, we also need
* 1 for the 0 offset, which is the entire matched portion
*/
if (re.re_nsub < 0) {
zwarn("INTERNAL ERROR: regcomp() returned "
"negative subpattern count %d", re.re_nsub);
break;
}
matchessz = (re.re_nsub + 1) * sizeof(regmatch_t);
matches = zalloc(matchessz);
r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags);
if (r == REG_NOMATCH) /**/;
else if (r == 0) {
return_value = 1;
if (isset(BASHREMATCH)) {
start = 0;
nelem = re.re_nsub + 1;
} else {
start = 1;
nelem = re.re_nsub;
}
arr = NULL; /* bogus gcc warning of used uninitialised */
/* entire matched portion + re_nsub substrings + NULL */
if (nelem) {
arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1));
for (m = matches + start, n = start; n <= re.re_nsub; ++n, ++m, ++x) {
*x = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
}
*x = NULL;
}
if (isset(BASHREMATCH)) {
setaparam("BASH_REMATCH", arr);
} else {
m = matches;
s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
setsparam("MATCH", s);
if (nelem)
setaparam("match", arr);
}
}
else zregex_regerrwarn(r, &re, "regex matching error");
break;
default:
DPUTS(1, "bad regex option");
break;
}
if (matches)
zfree(matches, matchessz);
regfree(&re);
return return_value;
}
static struct conddef cotab[] = {
CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED)
};
/**/
int
setup_(UNUSED(Module m))
{
return 0;
}
/**/
int
boot_(Module m)
{
return !addconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
}
/**/
int
cleanup_(Module m)
{
deleteconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
return 0;
}
/**/
int
finish_(UNUSED(Module m))
{
return 0;
}

10
Src/Modules/regex.mdd Normal file
View file

@ -0,0 +1,10 @@
name=zsh/regex
link=`if test x$ac_cv_func_regcomp = xyes && \
test x$ac_cv_func_regexec = xyes && \
test x$ac_cv_func_regerror = xyes && \
test x$ac_cv_func_regfree = xyes; then echo dynamic; else echo no; fi`
load=no
autobins=""
objects="regex.o"