mirror of
				git://git.code.sf.net/p/zsh/code
				synced 2025-10-31 06:00:54 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			486 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			486 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * pcre.c - interface to the PCRE library
 | |
|  *
 | |
|  * This file is part of zsh, the Z shell.
 | |
|  *
 | |
|  * Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams
 | |
|  * All rights reserved.
 | |
|  *
 | |
|  * Permission is hereby granted, without written agreement and without
 | |
|  * license or royalty fees, to use, copy, modify, and distribute this
 | |
|  * software and to distribute modified versions of this software for any
 | |
|  * purpose, provided that the above copyright notice and the following
 | |
|  * two paragraphs appear in all copies of this software.
 | |
|  *
 | |
|  * In no event shall Clint Adams or the Zsh Development Group be liable
 | |
|  * to any party for direct, indirect, special, incidental, or consequential
 | |
|  * damages arising out of the use of this software and its documentation,
 | |
|  * even if Andrew Main and the Zsh Development Group have been advised of
 | |
|  * the possibility of such damage.
 | |
|  *
 | |
|  * Clint Adams and the Zsh Development Group specifically disclaim any
 | |
|  * warranties, including, but not limited to, the implied warranties of
 | |
|  * merchantability and fitness for a particular purpose.  The software
 | |
|  * provided hereunder is on an "as is" basis, and Andrew Main and the
 | |
|  * Zsh Development Group have no obligation to provide maintenance,
 | |
|  * support, updates, enhancements, or modifications.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| 
 | |
| #include "pcre.mdh"
 | |
| #include "pcre.pro"
 | |
| 
 | |
| #define CPCRE_PLAIN 0
 | |
| 
 | |
| /**/
 | |
| #if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
 | |
| #include <pcre.h>
 | |
| 
 | |
| static pcre *pcre_pattern;
 | |
| static pcre_extra *pcre_hints;
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| zpcre_utf8_enabled(void)
 | |
| {
 | |
| #if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
 | |
|     static int have_utf8_pcre = -1;
 | |
| 
 | |
|     /* value can toggle based on MULTIBYTE, so don't
 | |
|      * be too eager with caching */
 | |
|     if (have_utf8_pcre < -1)
 | |
| 	return 0;
 | |
| 
 | |
|     if (!isset(MULTIBYTE))
 | |
| 	return 0;
 | |
| 
 | |
|     if ((have_utf8_pcre == -1) &&
 | |
|         (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
 | |
| 
 | |
| 	if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
 | |
| 	    have_utf8_pcre = -2; /* erk, failed to ask */
 | |
|     }
 | |
| 
 | |
|     if (have_utf8_pcre < 0)
 | |
| 	return 0;
 | |
|     return have_utf8_pcre;
 | |
| 
 | |
| #else
 | |
|     return 0;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
 | |
| {
 | |
|     int pcre_opts = 0, pcre_errptr;
 | |
|     const char *pcre_error;
 | |
|     char *target;
 | |
|     
 | |
|     if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED;
 | |
|     if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS;
 | |
|     if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
 | |
|     if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
 | |
|     if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL;
 | |
|     
 | |
|     if (zpcre_utf8_enabled())
 | |
| 	pcre_opts |= PCRE_UTF8;
 | |
| 
 | |
|     pcre_hints = NULL;  /* Is this necessary? */
 | |
|     
 | |
|     if (pcre_pattern)
 | |
| 	pcre_free(pcre_pattern);
 | |
| 
 | |
|     target = ztrdup(*args);
 | |
|     unmetafy(target, NULL);
 | |
| 
 | |
|     pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL);
 | |
|     
 | |
|     free(target);
 | |
| 
 | |
|     if (pcre_pattern == NULL)
 | |
|     {
 | |
| 	zwarnnam(nam, "error in regex: %s", pcre_error);
 | |
| 	return 1;
 | |
|     }
 | |
|     
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| #ifdef HAVE_PCRE_STUDY
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func))
 | |
| {
 | |
|     const char *pcre_error;
 | |
| 
 | |
|     if (pcre_pattern == NULL)
 | |
|     {
 | |
| 	zwarnnam(nam, "no pattern has been compiled for study");
 | |
| 	return 1;
 | |
|     }
 | |
|     
 | |
|     pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error);
 | |
|     if (pcre_error != NULL)
 | |
|     {
 | |
| 	zwarnnam(nam, "error while studying regex: %s", pcre_error);
 | |
| 	return 1;
 | |
|     }
 | |
|     
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| #else /* !HAVE_PCRE_STUDY */
 | |
| 
 | |
| # define bin_pcre_study bin_notavail
 | |
| 
 | |
| /**/
 | |
| #endif /* !HAVE_PCRE_STUDY */
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
 | |
| 		     char *substravar, int want_offset_pair, int matchedinarr,
 | |
| 		     int want_begin_end)
 | |
| {
 | |
|     char **captures, *match_all, **matches;
 | |
|     char offset_all[50];
 | |
|     int capture_start = 1;
 | |
| 
 | |
|     if (matchedinarr)
 | |
| 	capture_start = 0;
 | |
|     if (matchvar == NULL)
 | |
| 	matchvar = "MATCH";
 | |
|     if (substravar == NULL)
 | |
| 	substravar = "match";
 | |
|     
 | |
|     /* captures[0] will be entire matched string, [1] first substring */
 | |
|     if (!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
 | |
| 	int nelem = arrlen(captures)-1;
 | |
| 	/* Set to the offsets of the complete match */
 | |
| 	if (want_offset_pair) {
 | |
| 	    sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
 | |
| 	    setsparam("ZPCRE_OP", ztrdup(offset_all));
 | |
| 	}
 | |
| 	match_all = metafy(captures[0], -1, META_DUP);
 | |
| 	setsparam(matchvar, match_all);
 | |
| 	/*
 | |
| 	 * If we're setting match, mbegin, mend we only do
 | |
| 	 * so if there were parenthesised matches, for consistency
 | |
| 	 * (c.f. regex.c).
 | |
| 	 */
 | |
| 	if (!want_begin_end || nelem) {
 | |
| 	    char **x, **y;
 | |
| 	    y = &captures[capture_start];
 | |
| 	    matches = x = (char **) zalloc(sizeof(char *) * (arrlen(y) + 1));
 | |
| 	    do {
 | |
| 		if (*y)
 | |
| 		    *x++ = metafy(*y, -1, META_DUP);
 | |
| 		else
 | |
| 		    *x++ = NULL;
 | |
| 	    } while (*y++);
 | |
| 	    setaparam(substravar, matches);
 | |
| 	}
 | |
| 
 | |
| 	if (want_begin_end) {
 | |
| 	    char *ptr = arg;
 | |
| 	    zlong offs = 0;
 | |
| 
 | |
| 	    /* Count the characters before the match */
 | |
| 	    MB_METACHARINIT();
 | |
| 	    while (ptr < arg + ovec[0]) {
 | |
| 		offs++;
 | |
| 		ptr += MB_METACHARLEN(ptr);
 | |
| 	    }
 | |
| 	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
 | |
| 	    /* Add on the characters in the match */
 | |
| 	    while (ptr < arg + ovec[1]) {
 | |
| 		offs++;
 | |
| 		ptr += MB_METACHARLEN(ptr);
 | |
| 	    }
 | |
| 	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
 | |
| 	    if (nelem) {
 | |
| 		char **mbegin, **mend, **bptr, **eptr;
 | |
| 		int i, *ipair;
 | |
| 
 | |
| 		bptr = mbegin = zalloc(sizeof(char*)*(nelem+1));
 | |
| 		eptr = mend = zalloc(sizeof(char*)*(nelem+1));
 | |
| 
 | |
| 		for (ipair = ovec + 2, i = 0;
 | |
| 		     i < nelem;
 | |
| 		     ipair += 2, i++, bptr++, eptr++)
 | |
| 		{
 | |
| 		    char buf[DIGBUFSIZE];
 | |
| 		    ptr = arg;
 | |
| 		    offs = 0;
 | |
| 		    /* Find the start offset */
 | |
| 		    MB_METACHARINIT();
 | |
| 		    while (ptr < arg + ipair[0]) {
 | |
| 			offs++;
 | |
| 			ptr += MB_METACHARLEN(ptr);
 | |
| 		    }
 | |
| 		    convbase(buf, offs + !isset(KSHARRAYS), 10);
 | |
| 		    *bptr = ztrdup(buf);
 | |
| 		    /* Continue to the end offset */
 | |
| 		    while (ptr < arg + ipair[1]) {
 | |
| 			offs++;
 | |
| 			ptr += MB_METACHARLEN(ptr);
 | |
| 		    }
 | |
| 		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
 | |
| 		    *eptr = ztrdup(buf);
 | |
| 		}
 | |
| 		*bptr = *eptr = NULL;
 | |
| 
 | |
| 		setaparam("mbegin", mbegin);
 | |
| 		setaparam("mend", mend);
 | |
| 	    }
 | |
| 	}
 | |
| 
 | |
| 	pcre_free_substring_list((const char **)captures);
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| getposint(char *instr, char *nam)
 | |
| {
 | |
|     char *eptr;
 | |
|     int ret;
 | |
| 
 | |
|     ret = (int)zstrtol(instr, &eptr, 10);
 | |
|     if (*eptr || ret < 0) {
 | |
| 	zwarnnam(nam, "integer expected: %s", instr);
 | |
| 	return -1;
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
 | |
| {
 | |
|     int ret, capcount, *ovec, ovecsize, c;
 | |
|     char *matched_portion = NULL;
 | |
|     char *plaintext = NULL;
 | |
|     char *receptacle = NULL;
 | |
|     int return_value = 1;
 | |
|     /* The subject length and offset start are both int values in pcre_exec */
 | |
|     int subject_len;
 | |
|     int offset_start = 0;
 | |
|     int want_offset_pair = 0;
 | |
| 
 | |
|     if (pcre_pattern == NULL) {
 | |
| 	zwarnnam(nam, "no pattern has been compiled");
 | |
| 	return 1;
 | |
|     }
 | |
|     
 | |
|     if(OPT_HASARG(ops,c='a')) {
 | |
| 	receptacle = OPT_ARG(ops,c);
 | |
|     }
 | |
|     if(OPT_HASARG(ops,c='v')) {
 | |
| 	matched_portion = OPT_ARG(ops,c);
 | |
|     }
 | |
|     if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
 | |
| 	if ((offset_start = getposint(OPT_ARG(ops,c), nam) < 0))
 | |
| 	    return 1;
 | |
|     }
 | |
|     /* For the entire match, 'Return' the offset byte positions instead of the matched string */
 | |
|     if(OPT_ISSET(ops,'b')) want_offset_pair = 1; 
 | |
|     
 | |
|     if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
 | |
|     {
 | |
| 	zwarnnam(nam, "error %d in fullinfo", ret);
 | |
| 	return 1;
 | |
|     }
 | |
| 
 | |
|     ovecsize = (capcount+1)*3;
 | |
|     ovec = zalloc(ovecsize*sizeof(int));
 | |
| 
 | |
|     plaintext = ztrdup(*args);
 | |
|     unmetafy(plaintext, NULL);
 | |
|     subject_len = (int)strlen(plaintext);
 | |
| 
 | |
|     if (offset_start > 0 && offset_start >= subject_len)
 | |
| 	ret = PCRE_ERROR_NOMATCH;
 | |
|     else
 | |
| 	ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize);
 | |
| 
 | |
|     if (ret==0) return_value = 0;
 | |
|     else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
 | |
|     else if (ret>0) {
 | |
| 	zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle,
 | |
| 			     want_offset_pair, 0, 0);
 | |
| 	return_value = 0;
 | |
|     }
 | |
|     else {
 | |
| 	zwarnnam(nam, "error in pcre_exec [%d]", ret);
 | |
|     }
 | |
|     
 | |
|     if (ovec)
 | |
| 	zfree(ovec, ovecsize*sizeof(int));
 | |
| 
 | |
|     return return_value;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| static int
 | |
| cond_pcre_match(char **a, int id)
 | |
| {
 | |
|     pcre *pcre_pat;
 | |
|     const char *pcre_err;
 | |
|     char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar=NULL;
 | |
|     int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
 | |
|     int return_value = 0;
 | |
| 
 | |
|     if (zpcre_utf8_enabled())
 | |
| 	pcre_opts |= PCRE_UTF8;
 | |
|     if (isset(REMATCHPCRE) && !isset(CASEMATCH))
 | |
| 	pcre_opts |= PCRE_CASELESS;
 | |
| 
 | |
|     lhstr = cond_str(a,0,0);
 | |
|     rhre = cond_str(a,1,0);
 | |
|     lhstr_plain = ztrdup(lhstr);
 | |
|     rhre_plain = ztrdup(rhre);
 | |
|     unmetafy(lhstr_plain, NULL);
 | |
|     unmetafy(rhre_plain, NULL);
 | |
|     pcre_pat = NULL;
 | |
|     ov = NULL;
 | |
|     ovsize = 0;
 | |
| 
 | |
|     if (isset(BASHREMATCH))
 | |
| 	avar="BASH_REMATCH";
 | |
| 
 | |
|     switch(id) {
 | |
| 	 case CPCRE_PLAIN:
 | |
| 		pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL);
 | |
| 		if (pcre_pat == NULL) {
 | |
| 		    zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
 | |
| 		    break;
 | |
| 		}
 | |
|                 pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
 | |
|     		ovsize = (capcnt+1)*3;
 | |
| 		ov = zalloc(ovsize*sizeof(int));
 | |
|     		r = pcre_exec(pcre_pat, NULL, lhstr_plain, strlen(lhstr_plain), 0, 0, ov, ovsize);
 | |
| 		/* r < 0 => error; r==0 match but not enough size in ov
 | |
| 		 * r > 0 => (r-1) substrings found; r==1 => no substrings
 | |
| 		 */
 | |
|     		if (r==0) {
 | |
| 		    zwarn("reportable zsh problem: pcre_exec() returned 0");
 | |
| 		    return_value = 1;
 | |
| 		    break;
 | |
| 		}
 | |
| 	        else if (r==PCRE_ERROR_NOMATCH) {
 | |
| 		    return_value = 0; /* no match */
 | |
| 		    break;
 | |
| 		}
 | |
| 		else if (r<0) {
 | |
| 		    zwarn("pcre_exec() error [%d]", r);
 | |
| 		    break;
 | |
| 		}
 | |
|                 else if (r>0) {
 | |
| 		    zpcre_get_substrings(lhstr_plain, ov, r, NULL, avar, 0,
 | |
| 					 isset(BASHREMATCH),
 | |
| 					 !isset(BASHREMATCH));
 | |
| 		    return_value = 1;
 | |
| 		    break;
 | |
| 		}
 | |
| 		break;
 | |
|     }
 | |
| 
 | |
|     if (lhstr_plain)
 | |
| 	free(lhstr_plain);
 | |
|     if(rhre_plain)
 | |
| 	free(rhre_plain);
 | |
|     if (pcre_pat)
 | |
| 	pcre_free(pcre_pat);
 | |
|     if (ov)
 | |
| 	zfree(ov, ovsize*sizeof(int));
 | |
| 
 | |
|     return return_value;
 | |
| }
 | |
| 
 | |
| static struct conddef cotab[] = {
 | |
|     CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
 | |
|     /* CONDDEF can register =~ but it won't be found */
 | |
| };
 | |
| 
 | |
| /**/
 | |
| #else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
 | |
| 
 | |
| # define bin_pcre_compile bin_notavail
 | |
| # define bin_pcre_study bin_notavail
 | |
| # define bin_pcre_match bin_notavail
 | |
| 
 | |
| /**/
 | |
| #endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
 | |
| 
 | |
| static struct builtin bintab[] = {
 | |
|     BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs",  NULL),
 | |
|     BUILTIN("pcre_match",   0, bin_pcre_match,   1, 1, 0, "a:v:n:b",    NULL),
 | |
|     BUILTIN("pcre_study",   0, bin_pcre_study,   0, 0, 0, NULL,    NULL)
 | |
| };
 | |
| 
 | |
| 
 | |
| static struct features module_features = {
 | |
|     bintab, sizeof(bintab)/sizeof(*bintab),
 | |
| #if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
 | |
|     cotab, sizeof(cotab)/sizeof(*cotab),
 | |
| #else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
 | |
|     NULL, 0,
 | |
| #endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
 | |
|     NULL, 0,
 | |
|     NULL, 0,
 | |
|     0
 | |
| };
 | |
| 
 | |
| 
 | |
| /**/
 | |
| int
 | |
| setup_(UNUSED(Module m))
 | |
| {
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| int
 | |
| features_(Module m, char ***features)
 | |
| {
 | |
|     *features = featuresarray(m, &module_features);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| int
 | |
| enables_(Module m, int **enables)
 | |
| {
 | |
|     return handlefeatures(m, &module_features, enables);
 | |
| }
 | |
| 
 | |
| /**/
 | |
| int
 | |
| boot_(Module m)
 | |
| {
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /**/
 | |
| int
 | |
| cleanup_(Module m)
 | |
| {
 | |
|     return setfeatureenables(m, &module_features, NULL);
 | |
| }
 | |
| 
 | |
| /**/
 | |
| int
 | |
| finish_(UNUSED(Module m))
 | |
| {
 | |
|     return 0;
 | |
| }
 |